3 months ago · a01c02bcff
--- a/Model/back_defect.pth
+++ b/Model/back_defect.pth
--- a/Model/inner_box.pth
+++ b/Model/inner_box.pth
--- a/Model/no_reflect_front_defect.pth
+++ b/Model/no_reflect_front_defect.pth
--- a/Model/outer_box.pth
+++ b/Model/outer_box.pth
--- a/Test/test01.py
+++ b/Test/test01.py
--- a/app/__init__.py
+++ b/app/__init__.py
--- a/app/api/__init__.py
+++ b/app/api/__init__.py
--- a/app/api/card_inference.py
+++ b/app/api/card_inference.py
@@ -0,0 +1,40 @@
 
															+from fastapi import APIRouter, File, UploadFile, Depends, HTTPException, Path
														
 
															+from fastapi.concurrency import run_in_threadpool
														
 
															+from typing import Annotated
														
 
															+from enum import Enum
														
 
															+from ..core.config import settings
														
 
															+from app.services.card_service import CardInferenceService, card_service
														
 
															+
														
 
															+router = APIRouter()
														
 
															+
														
 
															+model_names = list(settings.CARD_MODELS_CONFIG.keys())
														
 
															+InferenceType = Enum("InferenceType", {name: name for name in model_names})
														
 
															+
														
 
															+
														
 
															+@router.post("/json_result")
														
 
															+async def card_json_result(
														
 
															+        inference_type: InferenceType,
														
 
															+        # 依赖注入保持不变
														
 
															+        service: CardInferenceService = Depends(lambda: card_service),
														
 
															+        file: UploadFile = File(...)
														
 
															+):
														
 
															+    """
														
 
															+    接收一张卡片图片，使用指定类型的模型进行推理，并返回JSON结果。
														
 
															+
														
 
															+    - **inference_type**: 要使用的模型类型（从下拉列表中选择）。
														
 
															+    - **file**: 要上传的图片文件。
														
 
															+    """
														
 
															+    image_bytes = await file.read()
														
 
															+
														
 
															+    try:
														
 
															+        # 3. 传递参数时，使用 .value 获取 Enum 的字符串值
														
 
															+        json_result = await run_in_threadpool(
														
 
															+            service.predict,
														
 
															+            inference_type=inference_type.value,  # 使用 .value
														
 
															+            image_bytes=image_bytes
														
 
															+        )
														
 
															+        return json_result
														
 
															+    except ValueError as e:
														
 
															+        raise HTTPException(status_code=400, detail=str(e))
														
 
															+    except Exception as e:
														
 
															+        raise HTTPException(status_code=500, detail=f"服务器内部错误: {e}")
														
--- a/app/core/__init__.py
+++ b/app/core/__init__.py
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -0,0 +1,58 @@
 
															+from pathlib import Path
														
 
															+
														
 
															+
														
 
															+# 定义一个模型的配置结构
														
 
															+class CardModelConfig:
														
 
															+    pth_path: str
														
 
															+    class_dict: dict
														
 
															+    img_size: dict
														
 
															+    confidence: float
														
 
															+    input_channels: int
														
 
															+
														
 
															+
														
 
															+class Settings:
														
 
															+    API_prefix: str = "/api/card_inference"
														
 
															+    BASE_PATH = Path(__file__).parent.parent.absolute()
														
 
															+
														
 
															+    # 使用一个字典来管理所有卡片检测模型
														
 
															+    # key (如 'outer_box') 将成为 API 路径中的 {inference_type}
														
 
															+    CARD_MODELS_CONFIG: dict[str, CardModelConfig] = {
														
 
															+        "outer_box": {
														
 
															+            "pth_path": "Model/outer_box.pth",
														
 
															+            "class_dict": {1: 'outer_box'},
														
 
															+            "img_size": {'width': 1280, 'height': 1280},
														
 
															+            "confidence": 0.5,
														
 
															+            "input_channels": 3,
														
 
															+        },
														
 
															+        "inner_box": {
														
 
															+            "pth_path": "Model/inner_box.pth",
														
 
															+            "class_dict": {1: 'inner_box'},
														
 
															+            "img_size": {'width': 1280, 'height': 1280},
														
 
															+            "confidence": 0.5,
														
 
															+            "input_channels": 3,
														
 
															+        },
														
 
															+        "back_defect": {
														
 
															+            "pth_path": "Model/back_defect.pth",
														
 
															+            "class_dict": {
														
 
															+                1: 'wear', 2: 'wear_and_impact', 3: 'impact',
														
 
															+                4: 'damaged', 5: 'wear_and_stain',
														
 
															+            },
														
 
															+            "img_size": {'width': 512, 'height': 512},
														
 
															+            "confidence": 0.5,
														
 
															+            "input_channels": 3,
														
 
															+        },
														
 
															+        "no_reflect_front_defect": {
														
 
															+            "pth_path": "Model/no_reflect_front_defect.pth",
														
 
															+            "class_dict": {1: 'scratch',
														
 
															+                           2: 'pit',
														
 
															+                           3: 'stain'},
														
 
															+            "img_size": {'width': 512, 'height': 512},
														
 
															+            "confidence": 0.5,
														
 
															+            "input_channels": 3,
														
 
															+        }
														
 
															+
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+settings = Settings()
														
 
															+print(settings.BASE_PATH)
														
--- a/app/core/model_loader.py
+++ b/app/core/model_loader.py
@@ -0,0 +1,38 @@
 
															+from typing import Dict
														
 
															+from .config import settings
														
 
															+from ..utils.fry_bisenetv2_predictor_V01_250811 import FryBisenetV2Predictor
														
 
															+
														
 
															+# 全局的模型预测器字典
														
 
															+predictors: Dict[str, FryBisenetV2Predictor] = {}
														
 
															+
														
 
															+
														
 
															+def load_models():
														
 
															+    print("--- 开始加载卡片识别模型 ---")
														
 
															+    for name, config in settings.CARD_MODELS_CONFIG.items():
														
 
															+        print(f"... 正在加载模型: {name} ...")
														
 
															+        try:
														
 
															+            predictor = FryBisenetV2Predictor(
														
 
															+                pth_path=config['pth_path'],
														
 
															+                real_seg_class_dict=config['class_dict'],
														
 
															+                imgSize_train_dict=config['img_size'],
														
 
															+                confidence=config['confidence'],
														
 
															+                input_channels=config['input_channels']
														
 
															+            )
														
 
															+            predictors[name] = predictor
														
 
															+            print(f"--- 模型 '{name}' 加载成功 ---")
														
 
															+        except Exception as e:
														
 
															+            print(f"!!! 模型 '{name}' 加载失败: {e} !!!")
														
 
															+
														
 
															+
														
 
															+def unload_models():
														
 
															+    """在应用关闭时清理资源"""
														
 
															+    print("... 卸载模型 ...")
														
 
															+    predictors.clear()
														
 
															+
														
 
															+
														
 
															+def get_predictor(name: str) -> FryBisenetV2Predictor:
														
 
															+    """获取一个已加载的预测器实例"""
														
 
															+    predictor = predictors.get(name)
														
 
															+    if not predictor:
														
 
															+        raise ValueError(f"模型 '{name}' 不存在或未成功加载。可用模型: {list(predictors.keys())}")
														
 
															+    return predictor
														
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,22 @@
 
															+# app/main.py
														
 
															+from fastapi import FastAPI
														
 
															+from contextlib import asynccontextmanager
														
 
															+from .core.model_loader import load_models, unload_models
														
 
															+from app.api.card_inference import router as card_inference_router
														
 
															+
														
 
															+from .core.config import settings
														
 
															+
														
 
															+
														
 
															+@asynccontextmanager
														
 
															+async def lifespan(app: FastAPI):
														
 
															+    print("--- 应用启动 ---")
														
 
															+    load_models()
														
 
															+    yield
														
 
															+
														
 
															+    print("--- 应用关闭 ---")
														
 
															+    unload_models()
														
 
															+
														
 
															+
														
 
															+app = FastAPI(title="卡片框和缺陷检测服务", lifespan=lifespan)
														
 
															+
														
 
															+app.include_router(card_inference_router, prefix=settings.API_prefix)
														
--- a/app/services/__init__.py
+++ b/app/services/__init__.py
--- a/app/services/card_service.py
+++ b/app/services/card_service.py
@@ -0,0 +1,39 @@
 
															+# app/services/card_service.py
														
 
															+import cv2
														
 
															+import numpy as np
														
 
															+from ..core.model_loader import get_predictor
														
 
															+
														
 
															+
														
 
															+class CardInferenceService:
														
 
															+    def predict(self, inference_type: str, image_bytes: bytes) -> dict:
														
 
															+        """
														
 
															+        执行卡片识别推理。
														
 
															+
														
 
															+        Args:
														
 
															+            inference_type: 模型类型 (e.g., 'outer_box').
														
 
															+            image_bytes: 从API请求中获得的原始图像字节。
														
 
															+
														
 
															+        Returns:
														
 
															+            一个包含推理结果的字典。
														
 
															+        """
														
 
															+        # 1. 获取对应的预测器实例
														
 
															+        predictor = get_predictor(inference_type)
														
 
															+
														
 
															+        # 2. 将字节流解码为OpenCV图像
														
 
															+        # 将字节数据转换为numpy数组
														
 
															+        np_arr = np.frombuffer(image_bytes, np.uint8)
														
 
															+        # 从numpy数组中解码图像
														
 
															+        img_bgr = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
														
 
															+
														
 
															+        if img_bgr is None:
														
 
															+            raise ValueError("无法解码图像，请确保上传的是有效的图片格式 (JPG, PNG, etc.)")
														
 
															+
														
 
															+        # 3. 调用我们新加的 predict_from_image 方法进行推理
														
 
															+        result = predictor.predict_from_image(img_bgr)
														
 
															+
														
 
															+        # 4. 返回JSON兼容的结果
														
 
															+        return result
														
 
															+
														
 
															+
														
 
															+# 创建一个单例服务
														
 
															+card_service = CardInferenceService()
														
--- a/app/utils/__init__.py
+++ b/app/utils/__init__.py
--- a/app/utils/backbone.py
+++ b/app/utils/backbone.py
@@ -0,0 +1,474 @@
 
															+import torch
														
 
															+import torch.nn as nn
														
 
															+
														
 
															+
														
 
															+class ConvBNReLU(nn.Module):
														
 
															+
														
 
															+    def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1,
														
 
															+                 dilation=1, groups=1, bias=False):
														
 
															+        super(ConvBNReLU, self).__init__()
														
 
															+        self.conv = nn.Conv2d(
														
 
															+            in_chan, out_chan, kernel_size=ks, stride=stride,
														
 
															+            padding=padding, dilation=dilation,
														
 
															+            groups=groups, bias=bias)
														
 
															+        self.bn = nn.BatchNorm2d(out_chan)
														
 
															+        self.relu = nn.ReLU(inplace=True)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat = self.conv(x)
														
 
															+        feat = self.bn(feat)
														
 
															+        feat = self.relu(feat)
														
 
															+        return feat
														
 
															+
														
 
															+
														
 
															+class UpSample(nn.Module):
														
 
															+
														
 
															+    def __init__(self, n_chan, factor=2):
														
 
															+        super(UpSample, self).__init__()
														
 
															+        out_chan = n_chan * factor * factor
														
 
															+        self.proj = nn.Conv2d(n_chan, out_chan, 1, 1, 0)
														
 
															+        self.up = nn.PixelShuffle(factor)
														
 
															+        self.init_weight()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat = self.proj(x)
														
 
															+        feat = self.up(feat)
														
 
															+        return feat
														
 
															+
														
 
															+    def init_weight(self):
														
 
															+        nn.init.xavier_normal_(self.proj.weight, gain=1.)
														
 
															+
														
 
															+
														
 
															+class DetailBranch(nn.Module):
														
 
															+
														
 
															+    def __init__(self, input_channel=3):
														
 
															+        super(DetailBranch, self).__init__()
														
 
															+        self.S1 = nn.Sequential(
														
 
															+            ConvBNReLU(input_channel, 64, 3, stride=2),
														
 
															+            ConvBNReLU(64, 64, 3, stride=1),
														
 
															+        )
														
 
															+        self.S2 = nn.Sequential(
														
 
															+            ConvBNReLU(64, 64, 3, stride=2),
														
 
															+            ConvBNReLU(64, 64, 3, stride=1),
														
 
															+            ConvBNReLU(64, 64, 3, stride=1),
														
 
															+        )
														
 
															+        self.S3 = nn.Sequential(
														
 
															+            ConvBNReLU(64, 128, 3, stride=2),
														
 
															+            ConvBNReLU(128, 128, 3, stride=1),
														
 
															+            ConvBNReLU(128, 128, 3, stride=1),
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat = self.S1(x)
														
 
															+        feat = self.S2(feat)
														
 
															+        feat = self.S3(feat)
														
 
															+        return feat
														
 
															+
														
 
															+
														
 
															+class StemBlock(nn.Module):
														
 
															+
														
 
															+    def __init__(self, input_channel=3):
														
 
															+        super(StemBlock, self).__init__()
														
 
															+        self.conv = ConvBNReLU(input_channel, 16, 3, stride=2)
														
 
															+        self.left = nn.Sequential(
														
 
															+            ConvBNReLU(16, 8, 1, stride=1, padding=0),
														
 
															+            ConvBNReLU(8, 16, 3, stride=2),
														
 
															+        )
														
 
															+        self.right = nn.MaxPool2d(
														
 
															+            kernel_size=3, stride=2, padding=1, ceil_mode=False)
														
 
															+        self.fuse = ConvBNReLU(32, 16, 3, stride=1)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat = self.conv(x)
														
 
															+        feat_left = self.left(feat)
														
 
															+        feat_right = self.right(feat)
														
 
															+        feat = torch.cat([feat_left, feat_right], dim=1)
														
 
															+        feat = self.fuse(feat)
														
 
															+        return feat
														
 
															+
														
 
															+
														
 
															+class CEBlock(nn.Module):
														
 
															+
														
 
															+    def __init__(self):
														
 
															+        super(CEBlock, self).__init__()
														
 
															+        self.bn = nn.BatchNorm2d(128)
														
 
															+        self.conv_gap = ConvBNReLU(128, 128, 1, stride=1, padding=0)
														
 
															+        # TODO: in paper here is naive conv2d, no bn-relu
														
 
															+        self.conv_last = ConvBNReLU(128, 128, 3, stride=1)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat = torch.mean(x, dim=(2, 3), keepdim=True)
														
 
															+        feat = self.bn(feat)
														
 
															+        feat = self.conv_gap(feat)
														
 
															+        feat = feat + x
														
 
															+        feat = self.conv_last(feat)
														
 
															+        return feat
														
 
															+
														
 
															+
														
 
															+class GELayerS1(nn.Module):
														
 
															+
														
 
															+    def __init__(self, in_chan, out_chan, exp_ratio=6):
														
 
															+        super(GELayerS1, self).__init__()
														
 
															+        mid_chan = in_chan * exp_ratio
														
 
															+        self.conv1 = ConvBNReLU(in_chan, in_chan, 3, stride=1)
														
 
															+        self.dwconv = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                in_chan, mid_chan, kernel_size=3, stride=1,
														
 
															+                padding=1, groups=in_chan, bias=False),
														
 
															+            nn.BatchNorm2d(mid_chan),
														
 
															+            nn.ReLU(inplace=True),  # not shown in paper
														
 
															+        )
														
 
															+        self.conv2 = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                mid_chan, out_chan, kernel_size=1, stride=1,
														
 
															+                padding=0, bias=False),
														
 
															+            nn.BatchNorm2d(out_chan),
														
 
															+        )
														
 
															+        self.conv2[1].last_bn = True
														
 
															+        self.relu = nn.ReLU(inplace=True)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat = self.conv1(x)
														
 
															+        feat = self.dwconv(feat)
														
 
															+        feat = self.conv2(feat)
														
 
															+        feat = feat + x
														
 
															+        feat = self.relu(feat)
														
 
															+        return feat
														
 
															+
														
 
															+
														
 
															+class GELayerS2(nn.Module):
														
 
															+
														
 
															+    def __init__(self, in_chan, out_chan, exp_ratio=6):
														
 
															+        super(GELayerS2, self).__init__()
														
 
															+        mid_chan = in_chan * exp_ratio
														
 
															+        self.conv1 = ConvBNReLU(in_chan, in_chan, 3, stride=1)
														
 
															+        self.dwconv1 = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                in_chan, mid_chan, kernel_size=3, stride=2,
														
 
															+                padding=1, groups=in_chan, bias=False),
														
 
															+            nn.BatchNorm2d(mid_chan),
														
 
															+        )
														
 
															+        self.dwconv2 = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                mid_chan, mid_chan, kernel_size=3, stride=1,
														
 
															+                padding=1, groups=mid_chan, bias=False),
														
 
															+            nn.BatchNorm2d(mid_chan),
														
 
															+            nn.ReLU(inplace=True),  # not shown in paper
														
 
															+        )
														
 
															+        self.conv2 = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                mid_chan, out_chan, kernel_size=1, stride=1,
														
 
															+                padding=0, bias=False),
														
 
															+            nn.BatchNorm2d(out_chan),
														
 
															+        )
														
 
															+        self.conv2[1].last_bn = True
														
 
															+        self.shortcut = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                in_chan, in_chan, kernel_size=3, stride=2,
														
 
															+                padding=1, groups=in_chan, bias=False),
														
 
															+            nn.BatchNorm2d(in_chan),
														
 
															+            nn.Conv2d(
														
 
															+                in_chan, out_chan, kernel_size=1, stride=1,
														
 
															+                padding=0, bias=False),
														
 
															+            nn.BatchNorm2d(out_chan),
														
 
															+        )
														
 
															+        self.relu = nn.ReLU(inplace=True)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat = self.conv1(x)
														
 
															+        feat = self.dwconv1(feat)
														
 
															+        feat = self.dwconv2(feat)
														
 
															+        feat = self.conv2(feat)
														
 
															+        shortcut = self.shortcut(x)
														
 
															+        feat = feat + shortcut
														
 
															+        feat = self.relu(feat)
														
 
															+        return feat
														
 
															+
														
 
															+
														
 
															+class SegmentBranch(nn.Module):
														
 
															+
														
 
															+    def __init__(self, input_channel=3):
														
 
															+        super(SegmentBranch, self).__init__()
														
 
															+        self.S1S2 = StemBlock(input_channel)
														
 
															+        self.S3 = nn.Sequential(
														
 
															+            GELayerS2(16, 32),
														
 
															+            GELayerS1(32, 32),
														
 
															+        )
														
 
															+        self.S4 = nn.Sequential(
														
 
															+            GELayerS2(32, 64),
														
 
															+            GELayerS1(64, 64),
														
 
															+        )
														
 
															+        self.S5_4 = nn.Sequential(
														
 
															+            GELayerS2(64, 128),
														
 
															+            GELayerS1(128, 128),
														
 
															+            GELayerS1(128, 128),
														
 
															+            GELayerS1(128, 128),
														
 
															+        )
														
 
															+        self.S5_5 = CEBlock()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat2 = self.S1S2(x)
														
 
															+        feat3 = self.S3(feat2)
														
 
															+        feat4 = self.S4(feat3)
														
 
															+        feat5_4 = self.S5_4(feat4)
														
 
															+        feat5_5 = self.S5_5(feat5_4)
														
 
															+        return feat2, feat3, feat4, feat5_4, feat5_5
														
 
															+
														
 
															+
														
 
															+class BGALayer(nn.Module):
														
 
															+
														
 
															+    def __init__(self):
														
 
															+        super(BGALayer, self).__init__()
														
 
															+        self.left1 = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                128, 128, kernel_size=3, stride=1,
														
 
															+                padding=1, groups=128, bias=False),
														
 
															+            nn.BatchNorm2d(128),
														
 
															+            nn.Conv2d(
														
 
															+                128, 128, kernel_size=1, stride=1,
														
 
															+                padding=0, bias=False),
														
 
															+        )
														
 
															+        self.left2 = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                128, 128, kernel_size=3, stride=2,
														
 
															+                padding=1, bias=False),
														
 
															+            nn.BatchNorm2d(128),
														
 
															+            nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)
														
 
															+        )
														
 
															+        self.right1 = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                128, 128, kernel_size=3, stride=1,
														
 
															+                padding=1, bias=False),
														
 
															+            nn.BatchNorm2d(128),
														
 
															+        )
														
 
															+        self.right2 = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                128, 128, kernel_size=3, stride=1,
														
 
															+                padding=1, groups=128, bias=False),
														
 
															+            nn.BatchNorm2d(128),
														
 
															+            nn.Conv2d(
														
 
															+                128, 128, kernel_size=1, stride=1,
														
 
															+                padding=0, bias=False),
														
 
															+        )
														
 
															+        self.up1 = nn.Upsample(scale_factor=4)
														
 
															+        self.up2 = nn.Upsample(scale_factor=4)
														
 
															+        ##TODO: does this really has no relu?
														
 
															+        self.conv = nn.Sequential(
														
 
															+            nn.Conv2d(
														
 
															+                128, 128, kernel_size=3, stride=1,
														
 
															+                padding=1, bias=False),
														
 
															+            nn.BatchNorm2d(128),
														
 
															+            nn.ReLU(inplace=True),  # not shown in paper
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x_d, x_s):
														
 
															+        dsize = x_d.size()[2:]
														
 
															+        left1 = self.left1(x_d)
														
 
															+        left2 = self.left2(x_d)
														
 
															+        right1 = self.right1(x_s)
														
 
															+        right2 = self.right2(x_s)
														
 
															+        right1 = self.up1(right1)
														
 
															+        left = left1 * torch.sigmoid(right1)
														
 
															+        right = left2 * torch.sigmoid(right2)
														
 
															+        right = self.up2(right)
														
 
															+        out = self.conv(left + right)
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															+class SegmentHead(nn.Module):
														
 
															+
														
 
															+    def __init__(self, in_chan, mid_chan, n_classes, up_factor=8, aux=True):
														
 
															+        super(SegmentHead, self).__init__()
														
 
															+        self.conv = ConvBNReLU(in_chan, mid_chan, 3, stride=1)
														
 
															+        self.drop = nn.Dropout(0.1)
														
 
															+        self.up_factor = up_factor
														
 
															+
														
 
															+        out_chan = n_classes
														
 
															+        mid_chan2 = up_factor * up_factor if aux else mid_chan
														
 
															+        up_factor = up_factor // 2 if aux else up_factor
														
 
															+        self.conv_out = nn.Sequential(
														
 
															+            nn.Sequential(
														
 
															+                nn.Upsample(scale_factor=2),
														
 
															+                ConvBNReLU(mid_chan, mid_chan2, 3, stride=1)
														
 
															+            ) if aux else nn.Identity(),
														
 
															+            nn.Conv2d(mid_chan2, out_chan, 1, 1, 0, bias=True),
														
 
															+            nn.Upsample(scale_factor=up_factor, mode='bilinear', align_corners=False)
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        feat = self.conv(x)
														
 
															+        feat = self.drop(feat)
														
 
															+        feat = self.conv_out(feat)
														
 
															+        return feat
														
 
															+
														
 
															+
														
 
															+class BiSeNetV2(nn.Module):
														
 
															+
														
 
															+    def __init__(self, n_classes, input_channels=3, aux_mode='train'):
														
 
															+        super(BiSeNetV2, self).__init__()
														
 
															+        self.aux_mode = aux_mode
														
 
															+        self.detail = DetailBranch(input_channels)
														
 
															+        self.segment = SegmentBranch(input_channels)
														
 
															+        self.bga = BGALayer()
														
 
															+
														
 
															+        ## TODO: what is the number of mid chan ?
														
 
															+        self.head = SegmentHead(128, 1024, n_classes, up_factor=8, aux=False)
														
 
															+        if self.aux_mode == 'train':
														
 
															+            self.aux2 = SegmentHead(16, 128, n_classes, up_factor=4)
														
 
															+            self.aux3 = SegmentHead(32, 128, n_classes, up_factor=8)
														
 
															+            self.aux4 = SegmentHead(64, 128, n_classes, up_factor=16)
														
 
															+            self.aux5_4 = SegmentHead(128, 128, n_classes, up_factor=32)
														
 
															+
														
 
															+        self.init_weights()
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        size = x.size()[2:]
														
 
															+
														
 
															+        feat_d = self.detail(x)
														
 
															+        feat2, feat3, feat4, feat5_4, feat_s = self.segment(x)
														
 
															+        feat_head = self.bga(feat_d, feat_s)
														
 
															+
														
 
															+        logits = self.head(feat_head)
														
 
															+        if self.aux_mode == 'train':
														
 
															+            logits_aux2 = self.aux2(feat2)
														
 
															+            logits_aux3 = self.aux3(feat3)
														
 
															+            logits_aux4 = self.aux4(feat4)
														
 
															+            logits_aux5_4 = self.aux5_4(feat5_4)
														
 
															+            return logits, logits_aux2, logits_aux3, logits_aux4, logits_aux5_4
														
 
															+        elif self.aux_mode == 'eval':
														
 
															+            return logits,
														
 
															+        elif self.aux_mode == 'pred':
														
 
															+            pred = logits.argmax(dim=1)
														
 
															+            return pred
														
 
															+        else:
														
 
															+            raise NotImplementedError
														
 
															+
														
 
															+    def init_weights(self):
														
 
															+        for name, module in self.named_modules():
														
 
															+            if isinstance(module, (nn.Conv2d, nn.Linear)):
														
 
															+                nn.init.kaiming_normal_(module.weight, mode='fan_out')
														
 
															+                if not module.bias is None: nn.init.constant_(module.bias, 0)
														
 
															+            elif isinstance(module, nn.modules.batchnorm._BatchNorm):
														
 
															+                if hasattr(module, 'last_bn') and module.last_bn:
														
 
															+                    nn.init.zeros_(module.weight)
														
 
															+                else:
														
 
															+                    nn.init.ones_(module.weight)
														
 
															+                nn.init.zeros_(module.bias)
														
 
															+        self.load_pretrain()
														
 
															+
														
 
															+    def load_pretrain(self):
														
 
															+        # 230423：推理时，不必在这里加载预训练模型
														
 
															+        pass
														
 
															+
														
 
															+    def get_params(self):
														
 
															+        def add_param_to_list(mod, wd_params, nowd_params):
														
 
															+            for param in mod.parameters():
														
 
															+                if param.dim() == 1:
														
 
															+                    nowd_params.append(param)
														
 
															+                elif param.dim() == 4:
														
 
															+                    wd_params.append(param)
														
 
															+                else:
														
 
															+                    print(name)
														
 
															+
														
 
															+        wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
														
 
															+        for name, child in self.named_children():
														
 
															+            if 'head' in name or 'aux' in name:
														
 
															+                add_param_to_list(child, lr_mul_wd_params, lr_mul_nowd_params)
														
 
															+            else:
														
 
															+                add_param_to_list(child, wd_params, nowd_params)
														
 
															+        return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
														
 
															+
														
 
															+
														
 
															+class OhemCELoss(nn.Module):
														
 
															+    """
														
 
															+    算法本质：
														
 
															+    Ohem本质：核心思路是取所有损失大于阈值的像素点参与计算，但是最少也要保证取n_min个
														
 
															+    """
														
 
															+
														
 
															+    def __init__(self, paramsDict, thresh, lb_ignore=255):
														
 
															+        super(OhemCELoss, self).__init__()
														
 
															+
														
 
															+        self.paramsDict = paramsDict
														
 
															+
														
 
															+        device_str = self.paramsDict['params']['device_str']
														
 
															+        # 确保模型被发送到device_str
														
 
															+        device = torch.device(device_str)
														
 
															+
														
 
															+        # self.thresh = 0.3567
														
 
															+        self.thresh = -torch.log(torch.tensor(thresh, requires_grad=False, dtype=torch.float)).to(device)
														
 
															+        # self.lb_ignore = 255
														
 
															+        self.lb_ignore = lb_ignore
														
 
															+        self.criteria = nn.CrossEntropyLoss(ignore_index=lb_ignore, reduction='none')
														
 
															+
														
 
															+    def forward(self, logits, labels):
														
 
															+        # logits: [2,11,1088,896]  batch,classNum,height,width
														
 
															+        # labels: [2,1088,896]  batch,height,width
														
 
															+
														
 
															+        # 1、计算n_min（最少算多少个像素点）的大小
														
 
															+        # n_min的大小：一个batch的n张h*w的label图的所有的像素点的十六分之一
														
 
															+        # n_min: 121856
														
 
															+        n_min = labels[labels != self.lb_ignore].numel() // 16
														
 
															+        # 2、交叉熵预测得到loss之后，打平成一维的
														
 
															+        # loss.shape =  (1949696,)  1949696 = 2 * 1088 * 896
														
 
															+        loss = self.criteria(logits, labels).view(-1)
														
 
															+        # 3、所有loss中大于阈值的，这边叫做loss hard，这些点才参与损失计算
														
 
															+        # 注意，这里是优化了pytorch中 Ohem 排序的，不然排序太耗时间了
														
 
															+        # loss_hard.shape = (140232,)
														
 
															+        loss_hard = loss[loss > self.thresh]
														
 
															+        # 4、如果总数小于了n_min，那么肯定要保证有n_min个
														
 
															+        if loss_hard.numel() < n_min:
														
 
															+            loss_hard, _ = loss.topk(n_min)
														
 
															+        # 5、如果参与的像素点的个数大于了n_min个，那么这些点都参与计算
														
 
															+        # loss_hard_mean = 0.7070
														
 
															+        loss_hard_mean = torch.mean(loss_hard)
														
 
															+        # 6、返回损失的均值
														
 
															+        # 7、为什么Ohem的损失不能很好的评估模型的损失
														
 
															+        # 因为Ohem对应的损失只考虑了大于阈值对应部分的损失，小于阈值部分的没有考虑
														
 
															+        return loss_hard_mean
														
 
															+
														
 
															+
														
 
															+# if __name__ == "__main__":
														
 
															+#
														
 
															+#     # ==========================================================
														
 
															+#     # 支持不同输入通道的bisenetv2
														
 
															+#     # ==========================================================
														
 
															+#
														
 
															+#     input_channels = 7
														
 
															+#
														
 
															+#     x = torch.randn(2, input_channels, 256, 256).cuda()
														
 
															+#     # x = torch.randn(2, 3, 224, 224).cuda()
														
 
															+#     print("=============输入：=============")
														
 
															+#     print(x.shape)
														
 
															+#
														
 
															+#     model = BiSeNetV2(n_classes=19,input_channels=7)
														
 
															+#
														
 
															+#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
														
 
															+#     print(device)
														
 
															+#     model = model.to(device)
														
 
															+#
														
 
															+#     netBeforeTime = time.time()
														
 
															+#     outs = model(x)
														
 
															+#     netEndTime = time.time()
														
 
															+#     print("模型推理花费时间：",netEndTime-netBeforeTime)
														
 
															+#     print("=============输出：=============")
														
 
															+#     for out in outs:
														
 
															+#         print(out.size())
														
 
															+#     #  print(logits.size())
														
 
															+
														
 
															+
														
 
															+"""
														
 
															+=============输入：=============
														
 
															+torch.Size([2, 7, 256, 256])
														
 
															+cuda
														
 
															+模型推理花费时间： 0.3020000457763672
														
 
															+=============输出：=============
														
 
															+torch.Size([2, 19, 256, 256])
														
 
															+torch.Size([2, 19, 256, 256])
														
 
															+torch.Size([2, 19, 256, 256])
														
 
															+torch.Size([2, 19, 256, 256])
														
 
															+torch.Size([2, 19, 256, 256])
														
 
															+
														
 
															+进程已结束,退出代码0
														
 
															+
														
 
															+
														
 
															+"""
														
--- a/app/utils/create_predict_result.py
+++ b/app/utils/create_predict_result.py
@@ -0,0 +1,117 @@
 
															+import numpy as np
														
 
															+import cv2
														
 
															+from pathlib import Path
														
 
															+import time
														
 
															+
														
 
															+from app.utils.data_augmentation import LetterBox
														
 
															+
														
 
															+
														
 
															+def point_mapTo_originImg(originImgSize, imgSize_train, now_point):
														
 
															+    letterBox = LetterBox(imgSize_train)
														
 
															+    rect_dict = letterBox.get_offset(originImgSize)
														
 
															+
														
 
															+    x_ratio = originImgSize['width'] * 1.0 / rect_dict['width']
														
 
															+    y_ratio = originImgSize['height'] * 1.0 / rect_dict['height']
														
 
															+
														
 
															+    new_y = round((now_point[1] - rect_dict['y']) * y_ratio)
														
 
															+    new_x = round((now_point[0] - rect_dict['x']) * x_ratio)
														
 
															+
														
 
															+    new_point = [new_x, new_y]
														
 
															+
														
 
															+    return new_point
														
 
															+
														
 
															+
														
 
															+def create_result_singleImg(segClassDict, now_ansImgDict, originImgSize, imgSize_train, confidence=0.5):
														
 
															+    """ansImg_list.append({"ans_img":ansImg,"probs":probs,"file_name":file_name})"""
														
 
															+
														
 
															+    label = now_ansImgDict['ans_img']
														
 
															+    probs = now_ansImgDict['probs']
														
 
															+    file_name = now_ansImgDict['file_name']
														
 
															+
														
 
															+    # confidence = 0.5
														
 
															+    assert confidence > 0, "置信度必须大于0"
														
 
															+    # label[label < confidence] = 0
														
 
															+
														
 
															+    per_result = {}
														
 
															+    per_result['num'] = 0
														
 
															+    per_result['cls'] = []
														
 
															+    per_result['names'] = []
														
 
															+    per_result['conf'] = []
														
 
															+    per_result['shapes'] = []
														
 
															+
														
 
															+    # 背景肯定不需要提取
														
 
															+    excludeClassList = ['___background___']
														
 
															+
														
 
															+    # 遍历每个分类
														
 
															+    for key, val in segClassDict.items():
														
 
															+
														
 
															+        if val not in excludeClassList:
														
 
															+
														
 
															+            now_class_num = int(key)
														
 
															+            now_prob_img = probs[now_class_num]
														
 
															+
														
 
															+            # 10、将对应分类的图片弄白，其余全黑
														
 
															+            imgZero = np.zeros(label.shape, dtype=np.uint8)
														
 
															+            imgZero[label == (now_class_num)] = 255
														
 
															+
														
 
															+            # 20、检测轮廓
														
 
															+            contours, _ = cv2.findContours(imgZero, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
														
 
															+            # drawnContourImg = np.zeros((imgZero.shape[0], imgZero.shape[1], 3), dtype=np.uint8)
														
 
															+            # 把轮廓画出来了，可能label就是对应的轮廓
														
 
															+            # drawnContourImg = cv2.drawContours(drawnContourImg, contours, -1, (255, 0, 0), 2)
														
 
															+
														
 
															+            # 30、获取shape数据
														
 
															+            if len(contours):
														
 
															+                maxCntArea = 5
														
 
															+
														
 
															+                # 找最大轮廓面积对应的index
														
 
															+                for index in range(len(contours)):
														
 
															+                    nowPntList = []
														
 
															+                    # 面积
														
 
															+                    contourArea = cv2.contourArea(contours[index])
														
 
															+                    # 面积要大于5
														
 
															+                    if contourArea > maxCntArea:
														
 
															+
														
 
															+                        # 计算置信度
														
 
															+                        # 创建一个全为零的掩码图像，大小和原图像一样
														
 
															+                        mask = np.zeros_like(imgZero)
														
 
															+
														
 
															+                        # 绘制第一个轮廓（假设我们只关心第一个轮廓）
														
 
															+                        cv2.drawContours(mask, contours, index, (255), thickness=cv2.FILLED)
														
 
															+
														
 
															+                        # plt.imshow(now_prob_img, cmap='gray')
														
 
															+                        # plt.show()
														
 
															+
														
 
															+                        # plt.imshow(mask, cmap='gray')
														
 
															+                        # plt.show()
														
 
															+
														
 
															+                        # 计算轮廓区域的均值
														
 
															+                        mean_val = cv2.mean(now_prob_img, mask=mask)
														
 
															+                        now_conf = mean_val[0]
														
 
															+
														
 
															+                        externalContour = contours[index]
														
 
															+                        pointNum = len(externalContour)
														
 
															+                        # print(pointNum)
														
 
															+
														
 
															+                        for i in range(pointNum):
														
 
															+                            nowPoint = externalContour[i]
														
 
															+                            nowPoint_list = nowPoint[0].tolist()
														
 
															+                            # 点要放到到原图上面
														
 
															+                            new_point = point_mapTo_originImg(originImgSize, imgSize_train, nowPoint_list)
														
 
															+
														
 
															+                            nowPntList.append(new_point)
														
 
															+
														
 
															+                        pre_defect = {}
														
 
															+                        pre_defect['class_num'] = int(key)
														
 
															+                        pre_defect['label'] = str(val)
														
 
															+                        pre_defect['probability'] = now_conf
														
 
															+                        pre_defect['points'] = nowPntList
														
 
															+
														
 
															+                        if now_conf >= confidence:
														
 
															+                            per_result['cls'].append(int(key))
														
 
															+                            per_result['names'].append(str(val))
														
 
															+                            per_result['conf'] = now_conf
														
 
															+                            per_result['shapes'].append(pre_defect)
														
 
															+
														
 
															+    per_result['num'] = len(per_result['shapes'])
														
 
															+    return per_result
														
--- a/app/utils/data_augmentation.py
+++ b/app/utils/data_augmentation.py
@@ -0,0 +1,156 @@
 
															+import copy
														
 
															+import random
														
 
															+import numpy as np
														
 
															+import math
														
 
															+import cv2
														
 
															+import numpy as np
														
 
															+import math
														
 
															+import cv2
														
 
															+
														
 
															+class LetterBox(object):
														
 
															+    def __init__(self, size={'width':640,'height':640},  auto=False, stride=32,*args, **kwargs):
														
 
															+        # 需要调整的额size
														
 
															+        self.size = size
														
 
															+        self.h = size["height"]
														
 
															+        self.w = size["width"]
														
 
															+        self.auto = auto  # pass max size integer, automatically solve for short side using stride
														
 
															+        self.stride = stride  # used with auto
														
 
															+
														
 
															+
														
 
															+    def __call__(self, im_lb):
														
 
															+        imgList = im_lb['imgList']
														
 
															+        lb = im_lb['lb']
														
 
															+        if lb is not None:
														
 
															+            assert imgList[0].shape[:2] == lb.shape[:2]
														
 
															+
														
 
															+        ans_imgList = self.handle_imgList(imgList)
														
 
															+
														
 
															+        # 处理label
														
 
															+        # 处理label
														
 
															+        if lb is not None:
														
 
															+            ans_lb = self.handle_single_label(lb)
														
 
															+        else:
														
 
															+            ans_lb = None
														
 
															+
														
 
															+        returnObj = dict(imgList=ans_imgList, lb=ans_lb)
														
 
															+        return returnObj
														
 
															+
														
 
															+
														
 
															+
														
 
															+    def handle_imgList(self,imgList):
														
 
															+        # 处理图片
														
 
															+        ans_imgList = []
														
 
															+        for per_img in imgList:
														
 
															+            ans_img = self.handle_single_img(per_img)
														
 
															+            ans_imgList.append(ans_img)
														
 
															+        return ans_imgList
														
 
															+
														
 
															+    def get_offset(self,originImgSize={'width':4096,'height':7000}):
														
 
															+
														
 
															+        # _240429_1543_
														
 
															+        # [特别注意]：ResizeBeforeLetterbox中重写了这个逻辑
														
 
															+
														
 
															+        originH = originImgSize['height']
														
 
															+        originW = originImgSize['width']
														
 
															+
														
 
															+        dstH = self.h
														
 
															+        dstW = self.w
														
 
															+
														
 
															+        def fry_resize_realParams(originH, originW, dstH, dstW):
														
 
															+            r = min(dstH / originH, dstW / originW)  # ratio of new/old
														
 
															+            resize_h, resize_w = int(round(originH * r)), int(round(originW * r))  # resized image
														
 
															+            total_pad_h = int(dstH - resize_h)
														
 
															+            total_pad_w = int(dstW - resize_w)
														
 
															+            assert total_pad_h >= 0, "total_pad_h 必须大于等于0"
														
 
															+            assert total_pad_w >= 0, "total_pad_w 必须大于等于0"
														
 
															+
														
 
															+            assert total_pad_h == 0 or total_pad_w == 0, "total_pad_h 和 total_pad_w中必须有一个为0"
														
 
															+
														
 
															+            pad_left = int(total_pad_w // 2)
														
 
															+            pad_right = total_pad_w - pad_left
														
 
															+            pad_top = int(total_pad_h // 2)
														
 
															+            pad_bottom = total_pad_h - pad_top
														
 
															+
														
 
															+            before_letterbox_dict = {}
														
 
															+            before_letterbox_dict['ratio'] = r
														
 
															+            before_letterbox_dict['resize_h'] = resize_h
														
 
															+            before_letterbox_dict['resize_w'] = resize_w
														
 
															+            before_letterbox_dict['total_pad_h'] = total_pad_h
														
 
															+            before_letterbox_dict['total_pad_w'] = total_pad_w
														
 
															+            before_letterbox_dict['pad_left'] = pad_left
														
 
															+            before_letterbox_dict['pad_right'] = pad_right
														
 
															+            before_letterbox_dict['pad_top'] = pad_top
														
 
															+            before_letterbox_dict['pad_bottom'] = pad_bottom
														
 
															+
														
 
															+            return before_letterbox_dict
														
 
															+
														
 
															+
														
 
															+        before_letterbox_dict = fry_resize_realParams(originH,originW,dstH,dstW)
														
 
															+
														
 
															+        rect_dict = {}
														
 
															+        rect_dict['x'] = before_letterbox_dict['pad_left']
														
 
															+        rect_dict['y'] = before_letterbox_dict['pad_top']
														
 
															+        rect_dict['width'] = before_letterbox_dict['resize_w']
														
 
															+        rect_dict['height'] = before_letterbox_dict['resize_h']
														
 
															+        rect_dict['ratio'] = before_letterbox_dict['ratio']
														
 
															+        return rect_dict
														
 
															+
														
 
															+
														
 
															+    def handle_single_img(self, im):
														
 
															+        assert len(im.shape) == 3, "im 必须是3维的"
														
 
															+        assert (im.shape[2] == 1) or (im.shape[2] == 3), "im 的通道数必须是一个通道或者三个通道"
														
 
															+
														
 
															+        imh, imw = im.shape[:2]
														
 
															+
														
 
															+        r = min(self.h / imh, self.w / imw)  # ratio of new/old
														
 
															+        h, w = round(imh * r), round(imw * r)  # resized image
														
 
															+        hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w
														
 
															+        top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
														
 
															+
														
 
															+        # 这里弄成0没有关系，因为均值是0方差是1
														
 
															+        # 还是都弄成114吧
														
 
															+        if im.shape[2]==3:
														
 
															+            im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype)
														
 
															+        elif im.shape[2]==1:
														
 
															+            im_out = np.full((self.h, self.w, 1), 114, dtype=im.dtype)
														
 
															+        else:
														
 
															+            raise ValueError("图片的通道数异常")
														
 
															+
														
 
															+        if im.shape[2]==1:
														
 
															+            gray_image_hw1 = im
														
 
															+            gray_image_hw = np.squeeze(gray_image_hw1, axis=-1)
														
 
															+            singleImg = gray_image_hw
														
 
															+        else:
														
 
															+            singleImg = im
														
 
															+
														
 
															+        originImg_resized = cv2.resize(singleImg, (w, h), interpolation=cv2.INTER_LINEAR)
														
 
															+
														
 
															+
														
 
															+        if len(originImg_resized.shape)==2:
														
 
															+            newSingleImg2D = originImg_resized
														
 
															+            newSingleImg3D = np.expand_dims(newSingleImg2D, axis=-1)
														
 
															+            newSingleImg = newSingleImg3D
														
 
															+        else:
														
 
															+            newSingleImg = originImg_resized
														
 
															+
														
 
															+
														
 
															+        im_out[top:top + h, left:left + w] = newSingleImg
														
 
															+
														
 
															+        return im_out
														
 
															+
														
 
															+
														
 
															+    def handle_single_label(self, im):
														
 
															+        assert len(im.shape) == 2, "label 必须是2维的"
														
 
															+
														
 
															+        imh, imw = im.shape[:2]
														
 
															+
														
 
															+        r = min(self.h / imh, self.w / imw)  # ratio of new/old
														
 
															+        h, w = round(imh * r), round(imw * r)  # resized image
														
 
															+        hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w
														
 
															+        top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
														
 
															+
														
 
															+        # label 直接弄成255不参与计算
														
 
															+        im_out = np.full((self.h, self.w), 0, dtype=im.dtype)
														
 
															+        im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_NEAREST)
														
 
															+
														
 
															+        return im_out
														
--- a/app/utils/fry_bisenetv2_predictor_V01_250811.py
+++ b/app/utils/fry_bisenetv2_predictor_V01_250811.py
@@ -0,0 +1,666 @@
 
															+import numpy as np
														
 
															+import json
														
 
															+import torch
														
 
															+import torch.nn as nn
														
 
															+import cv2
														
 
															+from pathlib import Path
														
 
															+import copy
														
 
															+from typing import Dict, List, Tuple, Optional
														
 
															+
														
 
															+from app.utils.backbone import BiSeNetV2
														
 
															+from app.utils.predict_preprocess import predict_preprocess
														
 
															+from app.utils.create_predict_result import create_result_singleImg
														
 
															+from app.utils.handle_result import process_detection_result
														
 
															+
														
 
															+import logging
														
 
															+
														
 
															+logging.basicConfig(level=logging.INFO)
														
 
															+
														
 
															+
														
 
															+def fry_algo_print(level_str: str, info_str: str):
														
 
															+    logging.info(f"[{level_str}] : {info_str}")
														
 
															+
														
 
															+
														
 
															+def fry_cv2_imread(filename, flags=cv2.IMREAD_COLOR):
														
 
															+    """支持中文路径的图像读取"""
														
 
															+    try:
														
 
															+        with open(filename, 'rb') as f:
														
 
															+            chunk = f.read()
														
 
															+        chunk_arr = np.frombuffer(chunk, dtype=np.uint8)
														
 
															+        img = cv2.imdecode(chunk_arr, flags)
														
 
															+        if img is None:
														
 
															+            fry_algo_print("警告", f"Warning: Unable to decode image: {filename}")
														
 
															+        return img
														
 
															+    except IOError as e:
														
 
															+        fry_algo_print("错误", f"IOError: Unable to read file: {filename}")
														
 
															+        fry_algo_print("错误", f"Error details: {str(e)}")
														
 
															+        return None
														
 
															+
														
 
															+
														
 
															+def fry_cv2_imwrite(filename, img, params=None):
														
 
															+    """支持中文路径的图像保存"""
														
 
															+    try:
														
 
															+        ext = Path(filename).suffix.lower()
														
 
															+        result, encoded_img = cv2.imencode(ext, img, params)
														
 
															+
														
 
															+        if result:
														
 
															+            with open(filename, 'wb') as f:
														
 
															+                encoded_img.tofile(f)
														
 
															+            return True
														
 
															+        else:
														
 
															+            fry_algo_print("警告", f"Warning: Unable to encode image: {filename}")
														
 
															+            return False
														
 
															+    except Exception as e:
														
 
															+        fry_algo_print("错误", f"Error: Unable to write file: {filename}")
														
 
															+        fry_algo_print("错误", f"Error details: {str(e)}")
														
 
															+        return False
														
 
															+
														
 
															+
														
 
															+def fry_opencv_chinese_path_init():
														
 
															+    """初始化OpenCV中文路径支持"""
														
 
															+    cv2.imread = fry_cv2_imread
														
 
															+    cv2.imwrite = fry_cv2_imwrite
														
 
															+
														
 
															+
														
 
															+# 初始化OpenCV中文路径支持
														
 
															+OPENCV_IO_ALREADY_INIT = False
														
 
															+if not OPENCV_IO_ALREADY_INIT:
														
 
															+    fry_opencv_chinese_path_init()
														
 
															+    OPENCV_IO_ALREADY_INIT = True
														
 
															+
														
 
															+
														
 
															+class FryBisenetV2Predictor:
														
 
															+    """BiSeNetV2 语义分割预测器"""
														
 
															+
														
 
															+    def __init__(self,
														
 
															+                 pth_path: str,
														
 
															+                 real_seg_class_dict: Dict[int, str],
														
 
															+                 imgSize_train_dict: Dict[str, int],
														
 
															+                 confidence: float = 0.5,
														
 
															+                 label_colors_dict: Optional[Dict[str, Tuple[int, int, int]]] = None,
														
 
															+                 input_channels: int = 3,
														
 
															+                 aux_mode: str = "eval"):
														
 
															+        """
														
 
															+        初始化预测器
														
 
															+
														
 
															+        Args:
														
 
															+            pth_path: 模型权重文件路径
														
 
															+            real_seg_class_dict: 真实的分割类别字典，格式为 {类别id: 类别名称}
														
 
															+            imgSize_train_dict: 训练时的图像尺寸，格式为 {'width': 宽度, 'height': 高度}
														
 
															+            confidence: 置信度阈值
														
 
															+            label_colors_dict: 类别颜色字典，格式为 {类别名称: (R, G, B)}
														
 
															+            input_channels: 输入通道数
														
 
															+            aux_mode: 辅助模式
														
 
															+        """
														
 
															+        self.pth_path = pth_path
														
 
															+        self.real_seg_class_dict = real_seg_class_dict
														
 
															+        self.imgSize_train_dict = imgSize_train_dict
														
 
															+        self.confidence = confidence
														
 
															+        self.input_channels = input_channels
														
 
															+        self.aux_mode = aux_mode
														
 
															+
														
 
															+        # 构建完整的分割类别字典（包含背景类）
														
 
															+        self.seg_class_dict = {0: '___background___'}
														
 
															+        self.seg_class_dict.update(real_seg_class_dict)
														
 
															+        self.n_classes = len(self.seg_class_dict)
														
 
															+
														
 
															+        # 生成或使用提供的颜色字典
														
 
															+        self.label_colors_dict = self._generate_label_colors(label_colors_dict)
														
 
															+
														
 
															+        # 获取设备
														
 
															+        self.device = self._get_device()
														
 
															+
														
 
															+        # 初始化模型
														
 
															+        self.model = self._init_model()
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def _get_device():
														
 
															+        """获取计算设备"""
														
 
															+        return torch.device("cuda" if torch.cuda.is_available() else "cpu")
														
 
															+
														
 
															+    def _generate_label_colors(self, label_colors_dict: Optional[Dict[str, Tuple[int, int, int]]]) -> Dict[
														
 
															+        str, Tuple[int, int, int]]:
														
 
															+        """
														
 
															+        生成或补充类别颜色字典
														
 
															+
														
 
															+        Args:
														
 
															+            label_colors_dict: 用户提供的颜色字典
														
 
															+
														
 
															+        Returns:
														
 
															+            完整的颜色字典
														
 
															+        """
														
 
															+        if label_colors_dict is None:
														
 
															+            label_colors_dict = {}
														
 
															+
														
 
															+        # 为所有类别生成颜色（除了背景）
														
 
															+        np.random.seed(42)  # 设置随机种子以保证颜色一致性
														
 
															+
														
 
															+        for class_id, class_name in self.seg_class_dict.items():
														
 
															+            if class_id == 0:  # 跳过背景类
														
 
															+                continue
														
 
															+
														
 
															+            if class_name not in label_colors_dict:
														
 
															+                # 生成随机颜色，避免太暗的颜色
														
 
															+                color = tuple(np.random.randint(50, 256, 3).tolist())
														
 
															+                label_colors_dict[class_name] = color
														
 
															+
														
 
															+        return label_colors_dict
														
 
															+
														
 
															+    def _load_model_weights(self, model: nn.Module, modelLoadPth: str) -> nn.Module:
														
 
															+        """
														
 
															+        加载模型权重
														
 
															+
														
 
															+        Args:
														
 
															+            model: 模型对象
														
 
															+            modelLoadPth: 权重文件路径
														
 
															+
														
 
															+        Returns:
														
 
															+            加载权重后的模型
														
 
															+        """
														
 
															+        fry_algo_print("信息", "加载预训练参数...")
														
 
															+
														
 
															+        weights_dict = torch.load(modelLoadPth, map_location=self.device)
														
 
															+        new_weights_dict = {}
														
 
															+
														
 
															+        exclude_layer_list = ["aux2", 'aux3', 'aux4', 'aux5']
														
 
															+
														
 
															+        all_layer_num = 0
														
 
															+        ok_layer_num = 0
														
 
															+
														
 
															+        for k, v in weights_dict.items():
														
 
															+            all_layer_num += 1
														
 
															+            is_exclude = False
														
 
															+
														
 
															+            # 检查是否需要排除该层
														
 
															+            for exclude_str in exclude_layer_list:
														
 
															+                if exclude_str in k:
														
 
															+                    is_exclude = True
														
 
															+                    break
														
 
															+
														
 
															+            if not is_exclude:
														
 
															+                new_weights_dict[k] = v
														
 
															+                ok_layer_num += 1
														
 
															+            else:
														
 
															+                fry_algo_print("信息", f"被排除的层：{k}")
														
 
															+
														
 
															+        # 加载权重，不要求严格对等
														
 
															+        model.load_state_dict(new_weights_dict, strict=False)
														
 
															+        fry_algo_print("信息", f"成功加载模型层数：{ok_layer_num}/{all_layer_num}")
														
 
															+
														
 
															+        return model
														
 
															+
														
 
															+    def _init_model(self) -> nn.Module:
														
 
															+        """初始化并加载模型"""
														
 
															+        model = BiSeNetV2(self.n_classes, self.input_channels, self.aux_mode)
														
 
															+        model = model.to(self.device)
														
 
															+        model = self._load_model_weights(model, self.pth_path)
														
 
															+        model.eval()
														
 
															+        return model
														
 
															+
														
 
															+    def _predict_tensor(self, CHW: torch.Tensor) -> Dict:
														
 
															+        """
														
 
															+        对单个图像张量进行预测
														
 
															+
														
 
															+        Args:
														
 
															+            CHW: 形状为 (C, H, W) 的图像张量
														
 
															+
														
 
															+        Returns:
														
 
															+            包含预测结果的字典
														
 
															+        """
														
 
															+        with torch.no_grad():
														
 
															+            NCHW = CHW.unsqueeze(0)
														
 
															+            # 因为单张图片推理 batch norm 层会报错，所以复制一份
														
 
															+            NCHW2 = torch.cat([NCHW, NCHW], dim=0)
														
 
															+
														
 
															+            # 模型推理
														
 
															+            logits, *logits_aux = self.model(NCHW2)
														
 
															+
														
 
															+            # 计算概率和预测类别
														
 
															+            probs = torch.softmax(logits, dim=1)
														
 
															+            preds = torch.argmax(probs, dim=1)
														
 
															+
														
 
															+            # 转换为numpy数组
														
 
															+            probs_np = probs.detach().cpu().numpy()
														
 
															+            preds_np = preds.detach().cpu().numpy()
														
 
															+
														
 
															+            # 取第一张图片的结果
														
 
															+            ansImg_needSave = preds_np[0]
														
 
															+            ansProbs = probs_np[0]
														
 
															+
														
 
															+            return {
														
 
															+                "ans_img": ansImg_needSave,
														
 
															+                "probs": ansProbs,
														
 
															+                "file_name": "result"
														
 
															+            }
														
 
															+
														
 
															+    def _save_result_json(self, result: Dict, json_path: Path):
														
 
															+        """
														
 
															+        保存预测结果为JSON文件
														
 
															+
														
 
															+        Args:
														
 
															+            result: 预测结果字典
														
 
															+            json_path: JSON文件保存路径
														
 
															+        """
														
 
															+        # 将numpy数组转换为可序列化的格式
														
 
															+        json_result = {}
														
 
															+
														
 
															+        for key, value in result.items():
														
 
															+            if isinstance(value, np.ndarray):
														
 
															+                json_result[key] = value.tolist()
														
 
															+            elif isinstance(value, dict):
														
 
															+                json_result[key] = {}
														
 
															+                for k, v in value.items():
														
 
															+                    if isinstance(v, np.ndarray):
														
 
															+                        json_result[key][k] = v.tolist()
														
 
															+                    else:
														
 
															+                        json_result[key][k] = v
														
 
															+            else:
														
 
															+                json_result[key] = value
														
 
															+
														
 
															+        with open(json_path, 'w', encoding='utf-8') as f:
														
 
															+            json.dump(json_result, f, ensure_ascii=False, indent=2)
														
 
															+
														
 
															+    def predict_from_image(self, img_bgr: np.ndarray) -> Dict:
														
 
															+        """
														
 
															+        直接从解码后的图像数据（numpy数组）进行预测。
														
 
															+
														
 
															+        Args:
														
 
															+            img_bgr: BGR格式的图像，作为一个numpy数组。
														
 
															+
														
 
															+        Returns:
														
 
															+            预测结果字典。
														
 
															+        """
														
 
															+        # 检查通道数是否匹配
														
 
															+        shape = img_bgr.shape
														
 
															+        image_channel = shape[2] if len(shape) == 3 else 1
														
 
															+        if image_channel != self.input_channels:
														
 
															+            raise ValueError(
														
 
															+                f"输入图片的通道数和模型不匹配：image_channel：{image_channel}，input_channels：{self.input_channels}")
														
 
															+
														
 
															+        # 获取原始图片尺寸
														
 
															+        height, width = img_bgr.shape[:2]
														
 
															+        originImgSize = {'width': width, 'height': height}
														
 
															+
														
 
															+        # 预处理
														
 
															+        imgTensor_CHW_norm = predict_preprocess(img_bgr, self.imgSize_train_dict)
														
 
															+
														
 
															+        # 预测
														
 
															+        ansImgDict = self._predict_tensor(imgTensor_CHW_norm)
														
 
															+
														
 
															+        # 创建结果
														
 
															+        per_img_seg_result = create_result_singleImg(
														
 
															+            self.seg_class_dict,
														
 
															+            ansImgDict,
														
 
															+            originImgSize,
														
 
															+            self.imgSize_train_dict,
														
 
															+            confidence=self.confidence
														
 
															+        )
														
 
															+
														
 
															+        return per_img_seg_result
														
 
															+
														
 
															+    def predict_single_image(self,
														
 
															+                             img_path: str,
														
 
															+                             save_visualization: bool = True,
														
 
															+                             save_json: bool = True,
														
 
															+                             answer_json_dir: Optional[str] = None,
														
 
															+                             input_channels=3
														
 
															+                             ) -> Dict:
														
 
															+        """
														
 
															+        预测单张图片
														
 
															+
														
 
															+        Args:
														
 
															+            img_path: 图片路径
														
 
															+            save_visualization: 是否保存可视化结果
														
 
															+            save_json: 是否保存JSON结果
														
 
															+            answer_json_dir: JSON结果保存目录
														
 
															+
														
 
															+        Returns:
														
 
															+            预测结果字典
														
 
															+        """
														
 
															+
														
 
															+        img_path_obj = Path(img_path).resolve()
														
 
															+        img_path_parent_obj = img_path_obj.parent
														
 
															+        answer_json_dir_obj = Path(answer_json_dir).resolve()
														
 
															+
														
 
															+        # 读取图片
														
 
															+        img_bgr = cv2.imread(img_path)
														
 
															+        if img_bgr is None:
														
 
															+            raise ValueError(f"无法读取图片：{img_path}")
														
 
															+
														
 
															+        shape = img_bgr.shape
														
 
															+        image_channel = shape[2]
														
 
															+        fry_algo_print("信息", f"模型需要的通道数为：{input_channels}")
														
 
															+        fry_algo_print("信息", f"测试的图片实际的通道数为：{image_channel}")
														
 
															+
														
 
															+        if image_channel != input_channels:
														
 
															+            # if image_channel==3 and input_channels==1:
														
 
															+            #     img_bgr = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
														
 
															+            # elif image_channel==4 and input_channels==1:
														
 
															+            #     img_bgr = cv2.cvtColor(img_bgr, cv2.COLOR_BGRA2GRAY)
														
 
															+            # elif image_channel==1 and input_channels==3:
														
 
															+            #     img_bgr = cv2.cvtColor(img_bgr, cv2.COLOR_GRAY2BGR)
														
 
															+            # else:
														
 
															+            #     raise ValueError(f"输入图片的通道数和模型不匹配：image_channel：{image_channel}，input_channels：{input_channels}")
														
 
															+
														
 
															+            raise ValueError(
														
 
															+                f"输入图片的通道数和模型不匹配：image_channel：{image_channel}，input_channels：{input_channels}")
														
 
															+
														
 
															+        # 获取原始图片尺寸
														
 
															+        height, width = img_bgr.shape[:2]
														
 
															+        originImgSize = {'width': width, 'height': height}
														
 
															+
														
 
															+        # 预处理
														
 
															+        imgTensor_CHW_norm = predict_preprocess(img_bgr, self.imgSize_train_dict)
														
 
															+
														
 
															+        # 预测
														
 
															+        ansImgDict = self._predict_tensor(imgTensor_CHW_norm)
														
 
															+
														
 
															+        # 创建结果
														
 
															+        per_img_seg_result = create_result_singleImg(
														
 
															+            self.seg_class_dict,
														
 
															+            ansImgDict,
														
 
															+            originImgSize,
														
 
															+            self.imgSize_train_dict,
														
 
															+            confidence=self.confidence
														
 
															+        )
														
 
															+
														
 
															+        # 保存JSON结果
														
 
															+        if save_json and answer_json_dir:
														
 
															+            json_dir = Path(answer_json_dir)
														
 
															+            json_dir.mkdir(parents=True, exist_ok=True)
														
 
															+
														
 
															+            # 获取图片文件名（不含扩展名）
														
 
															+            img_name = Path(img_path).stem
														
 
															+            json_path = json_dir / f"{img_name}.json"
														
 
															+
														
 
															+            self._save_result_json(per_img_seg_result, json_path)
														
 
															+            fry_algo_print("成功", f"JSON结果已保存到：{json_path}")
														
 
															+
														
 
															+        # 保存可视化结果
														
 
															+        if save_visualization:
														
 
															+            result_img = process_detection_result(img_bgr, per_img_seg_result, self.label_colors_dict)
														
 
															+            output_path = str(answer_json_dir_obj / f"{Path(img_path).stem}_result.jpg")
														
 
															+
														
 
															+            cv2.imwrite(output_path, result_img)
														
 
															+            fry_algo_print("成功", f"可视化结果已保存到：{output_path}")
														
 
															+
														
 
															+        return per_img_seg_result
														
 
															+
														
 
															+    def predict_batch(self,
														
 
															+                      img_paths: List[str],
														
 
															+                      save_visualization: bool = True,
														
 
															+                      save_json: bool = True,
														
 
															+                      answer_json_dir: Optional[str] = None,
														
 
															+                      input_channels=3
														
 
															+                      ) -> List[Dict]:
														
 
															+        """
														
 
															+        批量预测图片
														
 
															+
														
 
															+        Args:
														
 
															+            img_paths: 图片路径列表
														
 
															+            save_visualization: 是否保存可视化结果
														
 
															+            save_json: 是否保存JSON结果
														
 
															+            answer_json_dir: JSON结果保存目录
														
 
															+            output_dir: 可视化结果保存目录
														
 
															+
														
 
															+        Returns:
														
 
															+            所有图片的预测结果列表
														
 
															+        """
														
 
															+
														
 
															+        answer_json_dir_obj = Path(answer_json_dir).resolve()
														
 
															+
														
 
															+        results = []
														
 
															+
														
 
															+        Path(answer_json_dir).mkdir(parents=True, exist_ok=True)
														
 
															+
														
 
															+        # 批量处理
														
 
															+        for i, img_path in enumerate(img_paths):
														
 
															+            fry_algo_print("信息", f"处理图片 {i + 1}/{len(img_paths)}: {img_path}")
														
 
															+
														
 
															+            try:
														
 
															+                # 读取图片
														
 
															+                img_bgr = cv2.imread(img_path)
														
 
															+                if img_bgr is None:
														
 
															+                    fry_algo_print("信息", f"警告：无法读取图片 {img_path}")
														
 
															+                    continue
														
 
															+
														
 
															+                shape = img_bgr.shape
														
 
															+                image_channel = shape[2]
														
 
															+
														
 
															+                if image_channel != input_channels:
														
 
															+                    fry_algo_print("信息", f"模型需要的通道数为：{input_channels}")
														
 
															+                    fry_algo_print("信息", f"测试的图片实际的通道数为：{image_channel}")
														
 
															+                    fry_algo_print("错误",
														
 
															+                                   f"输入图片的通道数和模型不匹配：image_channel：{image_channel}，input_channels：{input_channels}")
														
 
															+                    continue
														
 
															+
														
 
															+                # 获取原始图片尺寸
														
 
															+                height, width = img_bgr.shape[:2]
														
 
															+                originImgSize = {'width': width, 'height': height}
														
 
															+
														
 
															+                # 预处理
														
 
															+                imgTensor_CHW_norm = predict_preprocess(img_bgr, self.imgSize_train_dict)
														
 
															+
														
 
															+                # 预测
														
 
															+                ansImgDict = self._predict_tensor(imgTensor_CHW_norm)
														
 
															+
														
 
															+                # 创建结果
														
 
															+                per_img_seg_result = create_result_singleImg(
														
 
															+                    self.seg_class_dict,
														
 
															+                    ansImgDict,
														
 
															+                    originImgSize,
														
 
															+                    self.imgSize_train_dict,
														
 
															+                    confidence=self.confidence
														
 
															+                )
														
 
															+
														
 
															+                # 保存JSON结果
														
 
															+                if save_json and answer_json_dir:
														
 
															+                    json_dir = Path(answer_json_dir)
														
 
															+                    json_dir.mkdir(parents=True, exist_ok=True)
														
 
															+
														
 
															+                    img_name = Path(img_path).stem
														
 
															+                    json_path = json_dir / f"{img_name}.json"
														
 
															+                    self._save_result_json(per_img_seg_result, json_path)
														
 
															+
														
 
															+                # 保存可视化结果
														
 
															+                if save_visualization:
														
 
															+                    result_img = process_detection_result(img_bgr, per_img_seg_result, self.label_colors_dict)
														
 
															+
														
 
															+                    output_path = answer_json_dir_obj / f"{Path(img_path).stem}_result.jpg"
														
 
															+
														
 
															+                    cv2.imwrite(str(output_path), result_img)
														
 
															+
														
 
															+                results.append(per_img_seg_result)
														
 
															+
														
 
															+            except Exception as e:
														
 
															+                fry_algo_print("失败", f"处理图片 {img_path} 时出错：{e}")
														
 
															+                continue
														
 
															+
														
 
															+        fry_algo_print("成功", f"批量处理完成，成功处理 {len(results)}/{len(img_paths)} 张图片")
														
 
															+        return results
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+    """使用示例"""
														
 
															+    # 配置参数
														
 
															+    pth_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\Model\outer_box.pth"
														
 
															+    input_channels = 3
														
 
															+
														
 
															+    real_seg_class_dict = {1: 'outer_box'}
														
 
															+
														
 
															+    # 为不同类别设置不同颜色（可选）
														
 
															+    label_colors_dict = {
														
 
															+        'outer_box': (225, 0, 0),
														
 
															+    }
														
 
															+
														
 
															+    imgSize_train_dict = {'width': 1280, 'height': 1280}
														
 
															+    confidence = 0.5
														
 
															+
														
 
															+    # 创建预测器
														
 
															+    predictor = FryBisenetV2Predictor(
														
 
															+        pth_path=pth_path,
														
 
															+        real_seg_class_dict=real_seg_class_dict,
														
 
															+        imgSize_train_dict=imgSize_train_dict,
														
 
															+        confidence=confidence,
														
 
															+        label_colors_dict=label_colors_dict,
														
 
															+        input_channels=input_channels,
														
 
															+    )
														
 
															+
														
 
															+    # 单张图片预测
														
 
															+    print("=== 单张图片预测 ===")
														
 
															+    now_img_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\img.png"
														
 
															+    answer_json_dir = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\outer"
														
 
															+
														
 
															+    result = predictor.predict_single_image(
														
 
															+        img_path=now_img_path,
														
 
															+        save_visualization=True,
														
 
															+        save_json=True,
														
 
															+        answer_json_dir=answer_json_dir,
														
 
															+        input_channels=input_channels,
														
 
															+    )
														
 
															+
														
 
															+    # 批量预测示例
														
 
															+    # print("\n=== 批量图片预测 ===")
														
 
															+    # img_paths = [
														
 
															+    #     r"input_output\images\coaxis_0008.jpg",
														
 
															+    #     r"input_output\images\coaxis_0082.jpg",
														
 
															+    #     r"input_output\images\ring_0001.jpg",
														
 
															+    #     r"input_output\images\Pokemon_back_for_Edge_0001.jpg",
														
 
															+    # ]
														
 
															+    #
														
 
															+    # results = predictor.predict_batch(
														
 
															+    #     img_paths=img_paths,
														
 
															+    #     save_visualization=True,
														
 
															+    #     save_json=True,
														
 
															+    #     answer_json_dir=answer_json_dir,
														
 
															+    #     input_channels=input_channels,
														
 
															+    # )
														
 
															+
														
 
															+
														
 
															+def _test_pokemon_inner_box():
														
 
															+    # 配置参数
														
 
															+    pth_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\Model\inner_box.pth"
														
 
															+    input_channels = 3
														
 
															+
														
 
															+    real_seg_class_dict = {1: 'inner_box'}
														
 
															+
														
 
															+    # 为不同类别设置不同颜色（可选）
														
 
															+    label_colors_dict = {
														
 
															+        'outer_box': (255, 0, 0),
														
 
															+    }
														
 
															+
														
 
															+    imgSize_train_dict = {'width': 1280, 'height': 1280}
														
 
															+    confidence = 0.5
														
 
															+
														
 
															+    # 创建预测器
														
 
															+    predictor = FryBisenetV2Predictor(
														
 
															+        pth_path=pth_path,
														
 
															+        real_seg_class_dict=real_seg_class_dict,
														
 
															+        imgSize_train_dict=imgSize_train_dict,
														
 
															+        confidence=confidence,
														
 
															+        label_colors_dict=label_colors_dict,
														
 
															+        input_channels=input_channels,
														
 
															+    )
														
 
															+
														
 
															+    # 单张图片预测
														
 
															+    print("=== 单张图片预测 ===")
														
 
															+    now_img_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\img.png"
														
 
															+    answer_json_dir = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\inner"
														
 
															+
														
 
															+    result = predictor.predict_single_image(
														
 
															+        img_path=now_img_path,
														
 
															+        save_visualization=True,
														
 
															+        save_json=True,
														
 
															+        answer_json_dir=answer_json_dir
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def _test_pokemon_back_edge():
														
 
															+    # 配置参数
														
 
															+    pth_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\Model\back_defect.pth"
														
 
															+    input_channels = 3
														
 
															+
														
 
															+    real_seg_class_dict = {
														
 
															+        1: 'wear',
														
 
															+        2: 'wear_and_impact',
														
 
															+        3: 'impact',
														
 
															+        4: 'damaged',
														
 
															+        5: 'wear_and_stain',
														
 
															+    }
														
 
															+
														
 
															+    # 为不同类别设置不同颜色（可选）
														
 
															+    # label_colors_dict = {
														
 
															+    #     'outer_box': (255, 0, 0),
														
 
															+    # }
														
 
															+
														
 
															+    imgSize_train_dict = {'width': 512, 'height': 512}
														
 
															+    confidence = 0.5
														
 
															+
														
 
															+    # 创建预测器
														
 
															+    predictor = FryBisenetV2Predictor(
														
 
															+        pth_path=pth_path,
														
 
															+        real_seg_class_dict=real_seg_class_dict,
														
 
															+        imgSize_train_dict=imgSize_train_dict,
														
 
															+        confidence=confidence,
														
 
															+        input_channels=input_channels,
														
 
															+    )
														
 
															+
														
 
															+    # 单张图片预测
														
 
															+    print("=== 单张图片预测 ===")
														
 
															+    now_img_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\img_2.png"
														
 
															+    answer_json_dir = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\defect"
														
 
															+
														
 
															+    result = predictor.predict_single_image(
														
 
															+        img_path=now_img_path,
														
 
															+        save_visualization=True,
														
 
															+        save_json=True,
														
 
															+        answer_json_dir=answer_json_dir,
														
 
															+        input_channels=input_channels
														
 
															+    )
														
 
															+
														
 
															+def _test_pokemon_no_reflect_front():
														
 
															+    # 配置参数
														
 
															+    pth_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\Model\no_reflect_front_defect.pth"
														
 
															+    input_channels = 3
														
 
															+
														
 
															+    real_seg_class_dict = {
														
 
															+        1: 'scratch',
														
 
															+        2: 'pit',
														
 
															+        3: 'stain'
														
 
															+    }
														
 
															+
														
 
															+    # 为不同类别设置不同颜色（可选）
														
 
															+    # label_colors_dict = {
														
 
															+    #     'outer_box': (255, 0, 0),
														
 
															+    # }
														
 
															+
														
 
															+    imgSize_train_dict = {'width': 512, 'height': 512}
														
 
															+    confidence = 0.5
														
 
															+
														
 
															+    # 创建预测器
														
 
															+    predictor = FryBisenetV2Predictor(
														
 
															+        pth_path=pth_path,
														
 
															+        real_seg_class_dict=real_seg_class_dict,
														
 
															+        imgSize_train_dict=imgSize_train_dict,
														
 
															+        confidence=confidence,
														
 
															+        input_channels=input_channels,
														
 
															+    )
														
 
															+
														
 
															+    # 单张图片预测
														
 
															+    print("=== 单张图片预测 ===")
														
 
															+    now_img_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\img_1.png"
														
 
															+    answer_json_dir = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\no_reflect_front_defect"
														
 
															+
														
 
															+    result = predictor.predict_single_image(
														
 
															+        img_path=now_img_path,
														
 
															+        save_visualization=True,
														
 
															+        save_json=True,
														
 
															+        answer_json_dir=answer_json_dir,
														
 
															+        input_channels=input_channels
														
 
															+    )
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    # main()
														
 
															+    # _test_pokemon_inner_box()
														
 
															+    # _test_pokemon_back_edge()
														
 
															+    _test_pokemon_no_reflect_front()
														
--- a/app/utils/handle_result.py
+++ b/app/utils/handle_result.py
@@ -0,0 +1,198 @@
 
															+import cv2
														
 
															+import numpy as np
														
 
															+from typing import List, Dict, Tuple, Union
														
 
															+
														
 
															+
														
 
															+class ShapeDrawer:
														
 
															+    """形状绘制工具类"""
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def points_to_contour(points: List[List[int]]) -> np.ndarray:
														
 
															+        """
														
 
															+        将点集转换为轮廓
														
 
															+
														
 
															+        Args:
														
 
															+            points: 点集列表，每个点是 [x, y] 格式
														
 
															+
														
 
															+        Returns:
														
 
															+            np.ndarray: OpenCV格式的轮廓
														
 
															+        """
														
 
															+        # 确保点集是整数类型的numpy数组
														
 
															+        contour = np.array(points, dtype=np.int32)
														
 
															+        # 重塑为OpenCV轮廓格式 (N, 1, 2)
														
 
															+        return contour.reshape((-1, 1, 2))
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def contour_to_mask(contour: np.ndarray,
														
 
															+                        image_shape: Tuple[int, int]) -> np.ndarray:
														
 
															+        """
														
 
															+        将轮廓转换为掩码
														
 
															+
														
 
															+        Args:
														
 
															+            contour: OpenCV格式的轮廓
														
 
															+            image_shape: 图像形状 (height, width)
														
 
															+
														
 
															+        Returns:
														
 
															+            np.ndarray: 二值掩码
														
 
															+        """
														
 
															+        # 创建空白掩码
														
 
															+        mask = np.zeros(image_shape, dtype=np.uint8)
														
 
															+        # 填充轮廓
														
 
															+        cv2.fillPoly(mask, [contour], 255)
														
 
															+        return mask
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def apply_mask_to_image(image: np.ndarray,
														
 
															+                            mask: np.ndarray,
														
 
															+                            color: Tuple[int, int, int],
														
 
															+                            alpha: float = 0.5) -> np.ndarray:
														
 
															+        """
														
 
															+        将掩码应用到图像上
														
 
															+
														
 
															+        Args:
														
 
															+            image: 原始图像
														
 
															+            mask: 二值掩码
														
 
															+            color: BGR颜色
														
 
															+            alpha: 透明度
														
 
															+
														
 
															+        Returns:
														
 
															+            np.ndarray: 带有掩码的图像
														
 
															+        """
														
 
															+        # 创建图像副本
														
 
															+        result = image.copy()
														
 
															+
														
 
															+        # 创建彩色掩码
														
 
															+        color_mask = np.zeros_like(image)
														
 
															+        # color_mask[mask > 0] = color
														
 
															+
														
 
															+        result[mask > 0] = color
														
 
															+
														
 
															+        # 混合图像
														
 
															+        # cv2.addWeighted(color_mask, alpha, result, 1 - alpha, 0, result)
														
 
															+
														
 
															+        return result
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def draw_shape_from_points(image: np.ndarray,
														
 
															+                               points: List[List[int]],
														
 
															+                               color: Tuple[int, int, int] = (0, 255, 0),
														
 
															+                               alpha: float = 0.5,
														
 
															+                               draw_contour: bool = True) -> np.ndarray:
														
 
															+        """
														
 
															+        从点集绘制形状到图像上
														
 
															+
														
 
															+        Args:
														
 
															+            image: 原始图像
														
 
															+            points: 点集列表
														
 
															+            color: BGR颜色
														
 
															+            alpha: 透明度
														
 
															+            draw_contour: 是否绘制轮廓线
														
 
															+
														
 
															+        Returns:
														
 
															+            np.ndarray: 处理后的图像
														
 
															+        """
														
 
															+        # 1. 转换为轮廓
														
 
															+        contour = ShapeDrawer.points_to_contour(points)
														
 
															+
														
 
															+        # 2. 创建掩码
														
 
															+        mask = ShapeDrawer.contour_to_mask(contour, image.shape[:2])
														
 
															+
														
 
															+        # 3. 应用掩码
														
 
															+        result = ShapeDrawer.apply_mask_to_image(image, mask, color, alpha)
														
 
															+
														
 
															+        # 4. 可选：绘制轮廓线
														
 
															+        if draw_contour:
														
 
															+            cv2.drawContours(result, [contour], -1, color, 2)
														
 
															+
														
 
															+        return result
														
 
															+
														
 
															+
														
 
															+def process_detection_result(image: np.ndarray,
														
 
															+                             detection: Dict,
														
 
															+                             label_colors: Dict
														
 
															+                             ) -> np.ndarray:
														
 
															+    """
														
 
															+    处理检测结果并在图像上绘制
														
 
															+
														
 
															+    Args:
														
 
															+        image: 原始图像
														
 
															+        detection: 检测结果字典
														
 
															+
														
 
															+    Returns:
														
 
															+        np.ndarray: 处理后的图像
														
 
															+    """
														
 
															+    result = image.copy()
														
 
															+
														
 
															+
														
 
															+
														
 
															+    # 处理每个检测到的形状
														
 
															+    for shape in detection['shapes']:
														
 
															+        points = shape['points']
														
 
															+        label = shape['label']
														
 
															+        conf = shape['probability']
														
 
															+
														
 
															+        # 获取颜色（默认绿色）
														
 
															+        color = label_colors.get(label, (0, 255, 0))
														
 
															+
														
 
															+        # 绘制形状
														
 
															+        result = ShapeDrawer.draw_shape_from_points(
														
 
															+            result,
														
 
															+            points,
														
 
															+            color=color,
														
 
															+            alpha=0.3,
														
 
															+            draw_contour=True
														
 
															+        )
														
 
															+
														
 
															+        # 添加标签和置信度
														
 
															+        first_point = points[0]
														
 
															+        is_print_text = False
														
 
															+        if is_print_text:
														
 
															+            cv2.putText(
														
 
															+                result,
														
 
															+                f"{label}: {conf:.2f}",
														
 
															+                (first_point[0], first_point[1] - 10),
														
 
															+                cv2.FONT_HERSHEY_SIMPLEX,
														
 
															+                0.5,
														
 
															+                color,
														
 
															+                2
														
 
															+            )
														
 
															+
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+# 示例使用
														
 
															+if __name__ == "__main__":
														
 
															+    # 加载示例图像
														
 
															+    image = cv2.imread("example.jpg")
														
 
															+
														
 
															+    # 为不同类别设置不同颜色
														
 
															+    label_colors = {
														
 
															+        'baseboard': (0, 255, 0),
														
 
															+    }
														
 
															+
														
 
															+    # 示例检测结果
														
 
															+    detection_result = {
														
 
															+        'num': 1,
														
 
															+        'cls': [1],
														
 
															+        'names': ['baseboard'],
														
 
															+        'conf': 0.9966249431977074,
														
 
															+        'shapes': [{
														
 
															+            'class_num': 1,
														
 
															+            'label': 'baseboard',
														
 
															+            'probability': 0.9966249431977074,
														
 
															+            'points': [[5, 171], [4, 172], [0, 172], [0, 487], [34, 487],
														
 
															+                       # ... 其他点 ...
														
 
															+                       [1019, 172], [1018, 171]]
														
 
															+        }]
														
 
															+    }
														
 
															+
														
 
															+    # 处理图像
														
 
															+    result = process_detection_result(image, detection_result,label_colors)
														
 
															+
														
 
															+    # 显示结果
														
 
															+    cv2.imshow("Result", result)
														
 
															+    cv2.waitKey(0)
														
 
															+    cv2.destroyAllWindows()
														
 
															+
														
 
															+    # 保存结果
														
 
															+    # cv2.imwrite("result.jpg", result)
														
--- a/app/utils/predict_preprocess.py
+++ b/app/utils/predict_preprocess.py
@@ -0,0 +1,36 @@
 
															+import math
														
 
															+import cv2
														
 
															+import numpy as np
														
 
															+import math
														
 
															+import torch
														
 
															+import torch.nn as nn
														
 
															+import torchvision.transforms as transforms
														
 
															+import cv2
														
 
															+
														
 
															+from app.utils.data_augmentation import LetterBox
														
 
															+
														
 
															+
														
 
															+def predict_preprocess(img_bgr, imgSize_train):
														
 
															+    device_str = torch.device("cuda" if torch.cuda.is_available() else "cpu")
														
 
															+
														
 
															+    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
														
 
															+
														
 
															+    letterBox = LetterBox(imgSize_train)
														
 
															+    img_rgb_letterbox = letterBox.handle_single_img(img_rgb)
														
 
															+
														
 
															+    img_np = np.array(img_rgb_letterbox)
														
 
															+
														
 
															+    imgTensor = torch.tensor(img_np, dtype=torch.float32, device=device_str)
														
 
															+
														
 
															+    # 将所有元素值除以255，进行归一化
														
 
															+    imgTensor = imgTensor * (1 / 255.0)
														
 
															+
														
 
															+    # 把形状从[H, W, C] 改为 [C, H, W]
														
 
															+    imgTensor_CHW = imgTensor.permute(2, 0, 1)
														
 
															+
														
 
															+    normaliz_operate_c3 = transforms.Compose([
														
 
															+        transforms.Normalize(mean=(0, 0, 0), std=(1, 1, 1)),
														
 
															+    ])
														
 
															+    imgTensor_CHW_norm = normaliz_operate_c3(imgTensor_CHW)
														
 
															+
														
 
															+    return imgTensor_CHW_norm
														
--- a/run.py
+++ b/run.py
@@ -0,0 +1,5 @@
 
															+import uvicorn
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    print("http://127.0.0.1:7744/docs")
														
 
															+    uvicorn.run("app.main:app", host="0.0.0.0", port=7744)