AnlaAnla 3 месяцев назад
Сommit
a01c02bcff

BIN
Model/back_defect.pth


BIN
Model/inner_box.pth


BIN
Model/no_reflect_front_defect.pth


BIN
Model/outer_box.pth


+ 0 - 0
Test/test01.py


+ 0 - 0
app/__init__.py


+ 0 - 0
app/api/__init__.py


+ 40 - 0
app/api/card_inference.py

@@ -0,0 +1,40 @@
+from fastapi import APIRouter, File, UploadFile, Depends, HTTPException, Path
+from fastapi.concurrency import run_in_threadpool
+from typing import Annotated
+from enum import Enum
+from ..core.config import settings
+from app.services.card_service import CardInferenceService, card_service
+
+router = APIRouter()
+
+model_names = list(settings.CARD_MODELS_CONFIG.keys())
+InferenceType = Enum("InferenceType", {name: name for name in model_names})
+
+
+@router.post("/json_result")
+async def card_json_result(
+        inference_type: InferenceType,
+        # 依赖注入保持不变
+        service: CardInferenceService = Depends(lambda: card_service),
+        file: UploadFile = File(...)
+):
+    """
+    接收一张卡片图片,使用指定类型的模型进行推理,并返回JSON结果。
+
+    - **inference_type**: 要使用的模型类型(从下拉列表中选择)。
+    - **file**: 要上传的图片文件。
+    """
+    image_bytes = await file.read()
+
+    try:
+        # 3. 传递参数时,使用 .value 获取 Enum 的字符串值
+        json_result = await run_in_threadpool(
+            service.predict,
+            inference_type=inference_type.value,  # 使用 .value
+            image_bytes=image_bytes
+        )
+        return json_result
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"服务器内部错误: {e}")

+ 0 - 0
app/core/__init__.py


+ 58 - 0
app/core/config.py

@@ -0,0 +1,58 @@
+from pathlib import Path
+
+
+# 定义一个模型的配置结构
+class CardModelConfig:
+    pth_path: str
+    class_dict: dict
+    img_size: dict
+    confidence: float
+    input_channels: int
+
+
+class Settings:
+    API_prefix: str = "/api/card_inference"
+    BASE_PATH = Path(__file__).parent.parent.absolute()
+
+    # 使用一个字典来管理所有卡片检测模型
+    # key (如 'outer_box') 将成为 API 路径中的 {inference_type}
+    CARD_MODELS_CONFIG: dict[str, CardModelConfig] = {
+        "outer_box": {
+            "pth_path": "Model/outer_box.pth",
+            "class_dict": {1: 'outer_box'},
+            "img_size": {'width': 1280, 'height': 1280},
+            "confidence": 0.5,
+            "input_channels": 3,
+        },
+        "inner_box": {
+            "pth_path": "Model/inner_box.pth",
+            "class_dict": {1: 'inner_box'},
+            "img_size": {'width': 1280, 'height': 1280},
+            "confidence": 0.5,
+            "input_channels": 3,
+        },
+        "back_defect": {
+            "pth_path": "Model/back_defect.pth",
+            "class_dict": {
+                1: 'wear', 2: 'wear_and_impact', 3: 'impact',
+                4: 'damaged', 5: 'wear_and_stain',
+            },
+            "img_size": {'width': 512, 'height': 512},
+            "confidence": 0.5,
+            "input_channels": 3,
+        },
+        "no_reflect_front_defect": {
+            "pth_path": "Model/no_reflect_front_defect.pth",
+            "class_dict": {1: 'scratch',
+                           2: 'pit',
+                           3: 'stain'},
+            "img_size": {'width': 512, 'height': 512},
+            "confidence": 0.5,
+            "input_channels": 3,
+        }
+
+    }
+
+
+settings = Settings()
+print(settings.BASE_PATH)

+ 38 - 0
app/core/model_loader.py

@@ -0,0 +1,38 @@
+from typing import Dict
+from .config import settings
+from ..utils.fry_bisenetv2_predictor_V01_250811 import FryBisenetV2Predictor
+
+# 全局的模型预测器字典
+predictors: Dict[str, FryBisenetV2Predictor] = {}
+
+
+def load_models():
+    print("--- 开始加载卡片识别模型 ---")
+    for name, config in settings.CARD_MODELS_CONFIG.items():
+        print(f"... 正在加载模型: {name} ...")
+        try:
+            predictor = FryBisenetV2Predictor(
+                pth_path=config['pth_path'],
+                real_seg_class_dict=config['class_dict'],
+                imgSize_train_dict=config['img_size'],
+                confidence=config['confidence'],
+                input_channels=config['input_channels']
+            )
+            predictors[name] = predictor
+            print(f"--- 模型 '{name}' 加载成功 ---")
+        except Exception as e:
+            print(f"!!! 模型 '{name}' 加载失败: {e} !!!")
+
+
+def unload_models():
+    """在应用关闭时清理资源"""
+    print("... 卸载模型 ...")
+    predictors.clear()
+
+
+def get_predictor(name: str) -> FryBisenetV2Predictor:
+    """获取一个已加载的预测器实例"""
+    predictor = predictors.get(name)
+    if not predictor:
+        raise ValueError(f"模型 '{name}' 不存在或未成功加载。可用模型: {list(predictors.keys())}")
+    return predictor

+ 22 - 0
app/main.py

@@ -0,0 +1,22 @@
+# app/main.py
+from fastapi import FastAPI
+from contextlib import asynccontextmanager
+from .core.model_loader import load_models, unload_models
+from app.api.card_inference import router as card_inference_router
+
+from .core.config import settings
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    print("--- 应用启动 ---")
+    load_models()
+    yield
+
+    print("--- 应用关闭 ---")
+    unload_models()
+
+
+app = FastAPI(title="卡片框和缺陷检测服务", lifespan=lifespan)
+
+app.include_router(card_inference_router, prefix=settings.API_prefix)

+ 0 - 0
app/services/__init__.py


+ 39 - 0
app/services/card_service.py

@@ -0,0 +1,39 @@
+# app/services/card_service.py
+import cv2
+import numpy as np
+from ..core.model_loader import get_predictor
+
+
+class CardInferenceService:
+    def predict(self, inference_type: str, image_bytes: bytes) -> dict:
+        """
+        执行卡片识别推理。
+
+        Args:
+            inference_type: 模型类型 (e.g., 'outer_box').
+            image_bytes: 从API请求中获得的原始图像字节。
+
+        Returns:
+            一个包含推理结果的字典。
+        """
+        # 1. 获取对应的预测器实例
+        predictor = get_predictor(inference_type)
+
+        # 2. 将字节流解码为OpenCV图像
+        # 将字节数据转换为numpy数组
+        np_arr = np.frombuffer(image_bytes, np.uint8)
+        # 从numpy数组中解码图像
+        img_bgr = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
+
+        if img_bgr is None:
+            raise ValueError("无法解码图像,请确保上传的是有效的图片格式 (JPG, PNG, etc.)")
+
+        # 3. 调用我们新加的 predict_from_image 方法进行推理
+        result = predictor.predict_from_image(img_bgr)
+
+        # 4. 返回JSON兼容的结果
+        return result
+
+
+# 创建一个单例服务
+card_service = CardInferenceService()

+ 0 - 0
app/utils/__init__.py


+ 474 - 0
app/utils/backbone.py

@@ -0,0 +1,474 @@
+import torch
+import torch.nn as nn
+
+
+class ConvBNReLU(nn.Module):
+
+    def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1,
+                 dilation=1, groups=1, bias=False):
+        super(ConvBNReLU, self).__init__()
+        self.conv = nn.Conv2d(
+            in_chan, out_chan, kernel_size=ks, stride=stride,
+            padding=padding, dilation=dilation,
+            groups=groups, bias=bias)
+        self.bn = nn.BatchNorm2d(out_chan)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        feat = self.conv(x)
+        feat = self.bn(feat)
+        feat = self.relu(feat)
+        return feat
+
+
+class UpSample(nn.Module):
+
+    def __init__(self, n_chan, factor=2):
+        super(UpSample, self).__init__()
+        out_chan = n_chan * factor * factor
+        self.proj = nn.Conv2d(n_chan, out_chan, 1, 1, 0)
+        self.up = nn.PixelShuffle(factor)
+        self.init_weight()
+
+    def forward(self, x):
+        feat = self.proj(x)
+        feat = self.up(feat)
+        return feat
+
+    def init_weight(self):
+        nn.init.xavier_normal_(self.proj.weight, gain=1.)
+
+
+class DetailBranch(nn.Module):
+
+    def __init__(self, input_channel=3):
+        super(DetailBranch, self).__init__()
+        self.S1 = nn.Sequential(
+            ConvBNReLU(input_channel, 64, 3, stride=2),
+            ConvBNReLU(64, 64, 3, stride=1),
+        )
+        self.S2 = nn.Sequential(
+            ConvBNReLU(64, 64, 3, stride=2),
+            ConvBNReLU(64, 64, 3, stride=1),
+            ConvBNReLU(64, 64, 3, stride=1),
+        )
+        self.S3 = nn.Sequential(
+            ConvBNReLU(64, 128, 3, stride=2),
+            ConvBNReLU(128, 128, 3, stride=1),
+            ConvBNReLU(128, 128, 3, stride=1),
+        )
+
+    def forward(self, x):
+        feat = self.S1(x)
+        feat = self.S2(feat)
+        feat = self.S3(feat)
+        return feat
+
+
+class StemBlock(nn.Module):
+
+    def __init__(self, input_channel=3):
+        super(StemBlock, self).__init__()
+        self.conv = ConvBNReLU(input_channel, 16, 3, stride=2)
+        self.left = nn.Sequential(
+            ConvBNReLU(16, 8, 1, stride=1, padding=0),
+            ConvBNReLU(8, 16, 3, stride=2),
+        )
+        self.right = nn.MaxPool2d(
+            kernel_size=3, stride=2, padding=1, ceil_mode=False)
+        self.fuse = ConvBNReLU(32, 16, 3, stride=1)
+
+    def forward(self, x):
+        feat = self.conv(x)
+        feat_left = self.left(feat)
+        feat_right = self.right(feat)
+        feat = torch.cat([feat_left, feat_right], dim=1)
+        feat = self.fuse(feat)
+        return feat
+
+
+class CEBlock(nn.Module):
+
+    def __init__(self):
+        super(CEBlock, self).__init__()
+        self.bn = nn.BatchNorm2d(128)
+        self.conv_gap = ConvBNReLU(128, 128, 1, stride=1, padding=0)
+        # TODO: in paper here is naive conv2d, no bn-relu
+        self.conv_last = ConvBNReLU(128, 128, 3, stride=1)
+
+    def forward(self, x):
+        feat = torch.mean(x, dim=(2, 3), keepdim=True)
+        feat = self.bn(feat)
+        feat = self.conv_gap(feat)
+        feat = feat + x
+        feat = self.conv_last(feat)
+        return feat
+
+
+class GELayerS1(nn.Module):
+
+    def __init__(self, in_chan, out_chan, exp_ratio=6):
+        super(GELayerS1, self).__init__()
+        mid_chan = in_chan * exp_ratio
+        self.conv1 = ConvBNReLU(in_chan, in_chan, 3, stride=1)
+        self.dwconv = nn.Sequential(
+            nn.Conv2d(
+                in_chan, mid_chan, kernel_size=3, stride=1,
+                padding=1, groups=in_chan, bias=False),
+            nn.BatchNorm2d(mid_chan),
+            nn.ReLU(inplace=True),  # not shown in paper
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(
+                mid_chan, out_chan, kernel_size=1, stride=1,
+                padding=0, bias=False),
+            nn.BatchNorm2d(out_chan),
+        )
+        self.conv2[1].last_bn = True
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        feat = self.conv1(x)
+        feat = self.dwconv(feat)
+        feat = self.conv2(feat)
+        feat = feat + x
+        feat = self.relu(feat)
+        return feat
+
+
+class GELayerS2(nn.Module):
+
+    def __init__(self, in_chan, out_chan, exp_ratio=6):
+        super(GELayerS2, self).__init__()
+        mid_chan = in_chan * exp_ratio
+        self.conv1 = ConvBNReLU(in_chan, in_chan, 3, stride=1)
+        self.dwconv1 = nn.Sequential(
+            nn.Conv2d(
+                in_chan, mid_chan, kernel_size=3, stride=2,
+                padding=1, groups=in_chan, bias=False),
+            nn.BatchNorm2d(mid_chan),
+        )
+        self.dwconv2 = nn.Sequential(
+            nn.Conv2d(
+                mid_chan, mid_chan, kernel_size=3, stride=1,
+                padding=1, groups=mid_chan, bias=False),
+            nn.BatchNorm2d(mid_chan),
+            nn.ReLU(inplace=True),  # not shown in paper
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(
+                mid_chan, out_chan, kernel_size=1, stride=1,
+                padding=0, bias=False),
+            nn.BatchNorm2d(out_chan),
+        )
+        self.conv2[1].last_bn = True
+        self.shortcut = nn.Sequential(
+            nn.Conv2d(
+                in_chan, in_chan, kernel_size=3, stride=2,
+                padding=1, groups=in_chan, bias=False),
+            nn.BatchNorm2d(in_chan),
+            nn.Conv2d(
+                in_chan, out_chan, kernel_size=1, stride=1,
+                padding=0, bias=False),
+            nn.BatchNorm2d(out_chan),
+        )
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        feat = self.conv1(x)
+        feat = self.dwconv1(feat)
+        feat = self.dwconv2(feat)
+        feat = self.conv2(feat)
+        shortcut = self.shortcut(x)
+        feat = feat + shortcut
+        feat = self.relu(feat)
+        return feat
+
+
+class SegmentBranch(nn.Module):
+
+    def __init__(self, input_channel=3):
+        super(SegmentBranch, self).__init__()
+        self.S1S2 = StemBlock(input_channel)
+        self.S3 = nn.Sequential(
+            GELayerS2(16, 32),
+            GELayerS1(32, 32),
+        )
+        self.S4 = nn.Sequential(
+            GELayerS2(32, 64),
+            GELayerS1(64, 64),
+        )
+        self.S5_4 = nn.Sequential(
+            GELayerS2(64, 128),
+            GELayerS1(128, 128),
+            GELayerS1(128, 128),
+            GELayerS1(128, 128),
+        )
+        self.S5_5 = CEBlock()
+
+    def forward(self, x):
+        feat2 = self.S1S2(x)
+        feat3 = self.S3(feat2)
+        feat4 = self.S4(feat3)
+        feat5_4 = self.S5_4(feat4)
+        feat5_5 = self.S5_5(feat5_4)
+        return feat2, feat3, feat4, feat5_4, feat5_5
+
+
+class BGALayer(nn.Module):
+
+    def __init__(self):
+        super(BGALayer, self).__init__()
+        self.left1 = nn.Sequential(
+            nn.Conv2d(
+                128, 128, kernel_size=3, stride=1,
+                padding=1, groups=128, bias=False),
+            nn.BatchNorm2d(128),
+            nn.Conv2d(
+                128, 128, kernel_size=1, stride=1,
+                padding=0, bias=False),
+        )
+        self.left2 = nn.Sequential(
+            nn.Conv2d(
+                128, 128, kernel_size=3, stride=2,
+                padding=1, bias=False),
+            nn.BatchNorm2d(128),
+            nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)
+        )
+        self.right1 = nn.Sequential(
+            nn.Conv2d(
+                128, 128, kernel_size=3, stride=1,
+                padding=1, bias=False),
+            nn.BatchNorm2d(128),
+        )
+        self.right2 = nn.Sequential(
+            nn.Conv2d(
+                128, 128, kernel_size=3, stride=1,
+                padding=1, groups=128, bias=False),
+            nn.BatchNorm2d(128),
+            nn.Conv2d(
+                128, 128, kernel_size=1, stride=1,
+                padding=0, bias=False),
+        )
+        self.up1 = nn.Upsample(scale_factor=4)
+        self.up2 = nn.Upsample(scale_factor=4)
+        ##TODO: does this really has no relu?
+        self.conv = nn.Sequential(
+            nn.Conv2d(
+                128, 128, kernel_size=3, stride=1,
+                padding=1, bias=False),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),  # not shown in paper
+        )
+
+    def forward(self, x_d, x_s):
+        dsize = x_d.size()[2:]
+        left1 = self.left1(x_d)
+        left2 = self.left2(x_d)
+        right1 = self.right1(x_s)
+        right2 = self.right2(x_s)
+        right1 = self.up1(right1)
+        left = left1 * torch.sigmoid(right1)
+        right = left2 * torch.sigmoid(right2)
+        right = self.up2(right)
+        out = self.conv(left + right)
+        return out
+
+
+class SegmentHead(nn.Module):
+
+    def __init__(self, in_chan, mid_chan, n_classes, up_factor=8, aux=True):
+        super(SegmentHead, self).__init__()
+        self.conv = ConvBNReLU(in_chan, mid_chan, 3, stride=1)
+        self.drop = nn.Dropout(0.1)
+        self.up_factor = up_factor
+
+        out_chan = n_classes
+        mid_chan2 = up_factor * up_factor if aux else mid_chan
+        up_factor = up_factor // 2 if aux else up_factor
+        self.conv_out = nn.Sequential(
+            nn.Sequential(
+                nn.Upsample(scale_factor=2),
+                ConvBNReLU(mid_chan, mid_chan2, 3, stride=1)
+            ) if aux else nn.Identity(),
+            nn.Conv2d(mid_chan2, out_chan, 1, 1, 0, bias=True),
+            nn.Upsample(scale_factor=up_factor, mode='bilinear', align_corners=False)
+        )
+
+    def forward(self, x):
+        feat = self.conv(x)
+        feat = self.drop(feat)
+        feat = self.conv_out(feat)
+        return feat
+
+
+class BiSeNetV2(nn.Module):
+
+    def __init__(self, n_classes, input_channels=3, aux_mode='train'):
+        super(BiSeNetV2, self).__init__()
+        self.aux_mode = aux_mode
+        self.detail = DetailBranch(input_channels)
+        self.segment = SegmentBranch(input_channels)
+        self.bga = BGALayer()
+
+        ## TODO: what is the number of mid chan ?
+        self.head = SegmentHead(128, 1024, n_classes, up_factor=8, aux=False)
+        if self.aux_mode == 'train':
+            self.aux2 = SegmentHead(16, 128, n_classes, up_factor=4)
+            self.aux3 = SegmentHead(32, 128, n_classes, up_factor=8)
+            self.aux4 = SegmentHead(64, 128, n_classes, up_factor=16)
+            self.aux5_4 = SegmentHead(128, 128, n_classes, up_factor=32)
+
+        self.init_weights()
+
+    def forward(self, x):
+        size = x.size()[2:]
+
+        feat_d = self.detail(x)
+        feat2, feat3, feat4, feat5_4, feat_s = self.segment(x)
+        feat_head = self.bga(feat_d, feat_s)
+
+        logits = self.head(feat_head)
+        if self.aux_mode == 'train':
+            logits_aux2 = self.aux2(feat2)
+            logits_aux3 = self.aux3(feat3)
+            logits_aux4 = self.aux4(feat4)
+            logits_aux5_4 = self.aux5_4(feat5_4)
+            return logits, logits_aux2, logits_aux3, logits_aux4, logits_aux5_4
+        elif self.aux_mode == 'eval':
+            return logits,
+        elif self.aux_mode == 'pred':
+            pred = logits.argmax(dim=1)
+            return pred
+        else:
+            raise NotImplementedError
+
+    def init_weights(self):
+        for name, module in self.named_modules():
+            if isinstance(module, (nn.Conv2d, nn.Linear)):
+                nn.init.kaiming_normal_(module.weight, mode='fan_out')
+                if not module.bias is None: nn.init.constant_(module.bias, 0)
+            elif isinstance(module, nn.modules.batchnorm._BatchNorm):
+                if hasattr(module, 'last_bn') and module.last_bn:
+                    nn.init.zeros_(module.weight)
+                else:
+                    nn.init.ones_(module.weight)
+                nn.init.zeros_(module.bias)
+        self.load_pretrain()
+
+    def load_pretrain(self):
+        # 230423:推理时,不必在这里加载预训练模型
+        pass
+
+    def get_params(self):
+        def add_param_to_list(mod, wd_params, nowd_params):
+            for param in mod.parameters():
+                if param.dim() == 1:
+                    nowd_params.append(param)
+                elif param.dim() == 4:
+                    wd_params.append(param)
+                else:
+                    print(name)
+
+        wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
+        for name, child in self.named_children():
+            if 'head' in name or 'aux' in name:
+                add_param_to_list(child, lr_mul_wd_params, lr_mul_nowd_params)
+            else:
+                add_param_to_list(child, wd_params, nowd_params)
+        return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
+
+
+class OhemCELoss(nn.Module):
+    """
+    算法本质:
+    Ohem本质:核心思路是取所有损失大于阈值的像素点参与计算,但是最少也要保证取n_min个
+    """
+
+    def __init__(self, paramsDict, thresh, lb_ignore=255):
+        super(OhemCELoss, self).__init__()
+
+        self.paramsDict = paramsDict
+
+        device_str = self.paramsDict['params']['device_str']
+        # 确保模型被发送到device_str
+        device = torch.device(device_str)
+
+        # self.thresh = 0.3567
+        self.thresh = -torch.log(torch.tensor(thresh, requires_grad=False, dtype=torch.float)).to(device)
+        # self.lb_ignore = 255
+        self.lb_ignore = lb_ignore
+        self.criteria = nn.CrossEntropyLoss(ignore_index=lb_ignore, reduction='none')
+
+    def forward(self, logits, labels):
+        # logits: [2,11,1088,896]  batch,classNum,height,width
+        # labels: [2,1088,896]  batch,height,width
+
+        # 1、计算n_min(最少算多少个像素点)的大小
+        # n_min的大小:一个batch的n张h*w的label图的所有的像素点的十六分之一
+        # n_min: 121856
+        n_min = labels[labels != self.lb_ignore].numel() // 16
+        # 2、交叉熵预测得到loss之后,打平成一维的
+        # loss.shape =  (1949696,)  1949696 = 2 * 1088 * 896
+        loss = self.criteria(logits, labels).view(-1)
+        # 3、所有loss中大于阈值的,这边叫做loss hard,这些点才参与损失计算
+        # 注意,这里是优化了pytorch中 Ohem 排序的,不然排序太耗时间了
+        # loss_hard.shape = (140232,)
+        loss_hard = loss[loss > self.thresh]
+        # 4、如果总数小于了n_min,那么肯定要保证有n_min个
+        if loss_hard.numel() < n_min:
+            loss_hard, _ = loss.topk(n_min)
+        # 5、如果参与的像素点的个数大于了n_min个,那么这些点都参与计算
+        # loss_hard_mean = 0.7070
+        loss_hard_mean = torch.mean(loss_hard)
+        # 6、返回损失的均值
+        # 7、为什么Ohem的损失不能很好的评估模型的损失
+        # 因为Ohem对应的损失只考虑了大于阈值对应部分的损失,小于阈值部分的没有考虑
+        return loss_hard_mean
+
+
+# if __name__ == "__main__":
+#
+#     # ==========================================================
+#     # 支持不同输入通道的bisenetv2
+#     # ==========================================================
+#
+#     input_channels = 7
+#
+#     x = torch.randn(2, input_channels, 256, 256).cuda()
+#     # x = torch.randn(2, 3, 224, 224).cuda()
+#     print("=============输入:=============")
+#     print(x.shape)
+#
+#     model = BiSeNetV2(n_classes=19,input_channels=7)
+#
+#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+#     print(device)
+#     model = model.to(device)
+#
+#     netBeforeTime = time.time()
+#     outs = model(x)
+#     netEndTime = time.time()
+#     print("模型推理花费时间:",netEndTime-netBeforeTime)
+#     print("=============输出:=============")
+#     for out in outs:
+#         print(out.size())
+#     #  print(logits.size())
+
+
+"""
+=============输入:=============
+torch.Size([2, 7, 256, 256])
+cuda
+模型推理花费时间: 0.3020000457763672
+=============输出:=============
+torch.Size([2, 19, 256, 256])
+torch.Size([2, 19, 256, 256])
+torch.Size([2, 19, 256, 256])
+torch.Size([2, 19, 256, 256])
+torch.Size([2, 19, 256, 256])
+
+进程已结束,退出代码0
+
+
+"""

+ 117 - 0
app/utils/create_predict_result.py

@@ -0,0 +1,117 @@
+import numpy as np
+import cv2
+from pathlib import Path
+import time
+
+from app.utils.data_augmentation import LetterBox
+
+
+def point_mapTo_originImg(originImgSize, imgSize_train, now_point):
+    letterBox = LetterBox(imgSize_train)
+    rect_dict = letterBox.get_offset(originImgSize)
+
+    x_ratio = originImgSize['width'] * 1.0 / rect_dict['width']
+    y_ratio = originImgSize['height'] * 1.0 / rect_dict['height']
+
+    new_y = round((now_point[1] - rect_dict['y']) * y_ratio)
+    new_x = round((now_point[0] - rect_dict['x']) * x_ratio)
+
+    new_point = [new_x, new_y]
+
+    return new_point
+
+
+def create_result_singleImg(segClassDict, now_ansImgDict, originImgSize, imgSize_train, confidence=0.5):
+    """ansImg_list.append({"ans_img":ansImg,"probs":probs,"file_name":file_name})"""
+
+    label = now_ansImgDict['ans_img']
+    probs = now_ansImgDict['probs']
+    file_name = now_ansImgDict['file_name']
+
+    # confidence = 0.5
+    assert confidence > 0, "置信度必须大于0"
+    # label[label < confidence] = 0
+
+    per_result = {}
+    per_result['num'] = 0
+    per_result['cls'] = []
+    per_result['names'] = []
+    per_result['conf'] = []
+    per_result['shapes'] = []
+
+    # 背景肯定不需要提取
+    excludeClassList = ['___background___']
+
+    # 遍历每个分类
+    for key, val in segClassDict.items():
+
+        if val not in excludeClassList:
+
+            now_class_num = int(key)
+            now_prob_img = probs[now_class_num]
+
+            # 10、将对应分类的图片弄白,其余全黑
+            imgZero = np.zeros(label.shape, dtype=np.uint8)
+            imgZero[label == (now_class_num)] = 255
+
+            # 20、检测轮廓
+            contours, _ = cv2.findContours(imgZero, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            # drawnContourImg = np.zeros((imgZero.shape[0], imgZero.shape[1], 3), dtype=np.uint8)
+            # 把轮廓画出来了,可能label就是对应的轮廓
+            # drawnContourImg = cv2.drawContours(drawnContourImg, contours, -1, (255, 0, 0), 2)
+
+            # 30、获取shape数据
+            if len(contours):
+                maxCntArea = 5
+
+                # 找最大轮廓面积对应的index
+                for index in range(len(contours)):
+                    nowPntList = []
+                    # 面积
+                    contourArea = cv2.contourArea(contours[index])
+                    # 面积要大于5
+                    if contourArea > maxCntArea:
+
+                        # 计算置信度
+                        # 创建一个全为零的掩码图像,大小和原图像一样
+                        mask = np.zeros_like(imgZero)
+
+                        # 绘制第一个轮廓(假设我们只关心第一个轮廓)
+                        cv2.drawContours(mask, contours, index, (255), thickness=cv2.FILLED)
+
+                        # plt.imshow(now_prob_img, cmap='gray')
+                        # plt.show()
+
+                        # plt.imshow(mask, cmap='gray')
+                        # plt.show()
+
+                        # 计算轮廓区域的均值
+                        mean_val = cv2.mean(now_prob_img, mask=mask)
+                        now_conf = mean_val[0]
+
+                        externalContour = contours[index]
+                        pointNum = len(externalContour)
+                        # print(pointNum)
+
+                        for i in range(pointNum):
+                            nowPoint = externalContour[i]
+                            nowPoint_list = nowPoint[0].tolist()
+                            # 点要放到到原图上面
+                            new_point = point_mapTo_originImg(originImgSize, imgSize_train, nowPoint_list)
+
+                            nowPntList.append(new_point)
+
+                        pre_defect = {}
+                        pre_defect['class_num'] = int(key)
+                        pre_defect['label'] = str(val)
+                        pre_defect['probability'] = now_conf
+                        pre_defect['points'] = nowPntList
+
+                        if now_conf >= confidence:
+                            per_result['cls'].append(int(key))
+                            per_result['names'].append(str(val))
+                            per_result['conf'] = now_conf
+                            per_result['shapes'].append(pre_defect)
+
+    per_result['num'] = len(per_result['shapes'])
+    return per_result

+ 156 - 0
app/utils/data_augmentation.py

@@ -0,0 +1,156 @@
+import copy
+import random
+import numpy as np
+import math
+import cv2
+import numpy as np
+import math
+import cv2
+
+class LetterBox(object):
+    def __init__(self, size={'width':640,'height':640},  auto=False, stride=32,*args, **kwargs):
+        # 需要调整的额size
+        self.size = size
+        self.h = size["height"]
+        self.w = size["width"]
+        self.auto = auto  # pass max size integer, automatically solve for short side using stride
+        self.stride = stride  # used with auto
+
+
+    def __call__(self, im_lb):
+        imgList = im_lb['imgList']
+        lb = im_lb['lb']
+        if lb is not None:
+            assert imgList[0].shape[:2] == lb.shape[:2]
+
+        ans_imgList = self.handle_imgList(imgList)
+
+        # 处理label
+        # 处理label
+        if lb is not None:
+            ans_lb = self.handle_single_label(lb)
+        else:
+            ans_lb = None
+
+        returnObj = dict(imgList=ans_imgList, lb=ans_lb)
+        return returnObj
+
+
+
+    def handle_imgList(self,imgList):
+        # 处理图片
+        ans_imgList = []
+        for per_img in imgList:
+            ans_img = self.handle_single_img(per_img)
+            ans_imgList.append(ans_img)
+        return ans_imgList
+
+    def get_offset(self,originImgSize={'width':4096,'height':7000}):
+
+        # _240429_1543_
+        # [特别注意]:ResizeBeforeLetterbox中重写了这个逻辑
+
+        originH = originImgSize['height']
+        originW = originImgSize['width']
+
+        dstH = self.h
+        dstW = self.w
+
+        def fry_resize_realParams(originH, originW, dstH, dstW):
+            r = min(dstH / originH, dstW / originW)  # ratio of new/old
+            resize_h, resize_w = int(round(originH * r)), int(round(originW * r))  # resized image
+            total_pad_h = int(dstH - resize_h)
+            total_pad_w = int(dstW - resize_w)
+            assert total_pad_h >= 0, "total_pad_h 必须大于等于0"
+            assert total_pad_w >= 0, "total_pad_w 必须大于等于0"
+
+            assert total_pad_h == 0 or total_pad_w == 0, "total_pad_h 和 total_pad_w中必须有一个为0"
+
+            pad_left = int(total_pad_w // 2)
+            pad_right = total_pad_w - pad_left
+            pad_top = int(total_pad_h // 2)
+            pad_bottom = total_pad_h - pad_top
+
+            before_letterbox_dict = {}
+            before_letterbox_dict['ratio'] = r
+            before_letterbox_dict['resize_h'] = resize_h
+            before_letterbox_dict['resize_w'] = resize_w
+            before_letterbox_dict['total_pad_h'] = total_pad_h
+            before_letterbox_dict['total_pad_w'] = total_pad_w
+            before_letterbox_dict['pad_left'] = pad_left
+            before_letterbox_dict['pad_right'] = pad_right
+            before_letterbox_dict['pad_top'] = pad_top
+            before_letterbox_dict['pad_bottom'] = pad_bottom
+
+            return before_letterbox_dict
+
+
+        before_letterbox_dict = fry_resize_realParams(originH,originW,dstH,dstW)
+
+        rect_dict = {}
+        rect_dict['x'] = before_letterbox_dict['pad_left']
+        rect_dict['y'] = before_letterbox_dict['pad_top']
+        rect_dict['width'] = before_letterbox_dict['resize_w']
+        rect_dict['height'] = before_letterbox_dict['resize_h']
+        rect_dict['ratio'] = before_letterbox_dict['ratio']
+        return rect_dict
+
+
+    def handle_single_img(self, im):
+        assert len(im.shape) == 3, "im 必须是3维的"
+        assert (im.shape[2] == 1) or (im.shape[2] == 3), "im 的通道数必须是一个通道或者三个通道"
+
+        imh, imw = im.shape[:2]
+
+        r = min(self.h / imh, self.w / imw)  # ratio of new/old
+        h, w = round(imh * r), round(imw * r)  # resized image
+        hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w
+        top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
+
+        # 这里弄成0没有关系,因为均值是0方差是1
+        # 还是都弄成114吧
+        if im.shape[2]==3:
+            im_out = np.full((self.h, self.w, 3), 114, dtype=im.dtype)
+        elif im.shape[2]==1:
+            im_out = np.full((self.h, self.w, 1), 114, dtype=im.dtype)
+        else:
+            raise ValueError("图片的通道数异常")
+
+        if im.shape[2]==1:
+            gray_image_hw1 = im
+            gray_image_hw = np.squeeze(gray_image_hw1, axis=-1)
+            singleImg = gray_image_hw
+        else:
+            singleImg = im
+
+        originImg_resized = cv2.resize(singleImg, (w, h), interpolation=cv2.INTER_LINEAR)
+
+
+        if len(originImg_resized.shape)==2:
+            newSingleImg2D = originImg_resized
+            newSingleImg3D = np.expand_dims(newSingleImg2D, axis=-1)
+            newSingleImg = newSingleImg3D
+        else:
+            newSingleImg = originImg_resized
+
+
+        im_out[top:top + h, left:left + w] = newSingleImg
+
+        return im_out
+
+
+    def handle_single_label(self, im):
+        assert len(im.shape) == 2, "label 必须是2维的"
+
+        imh, imw = im.shape[:2]
+
+        r = min(self.h / imh, self.w / imw)  # ratio of new/old
+        h, w = round(imh * r), round(imw * r)  # resized image
+        hs, ws = (math.ceil(x / self.stride) * self.stride for x in (h, w)) if self.auto else self.h, self.w
+        top, left = round((hs - h) / 2 - 0.1), round((ws - w) / 2 - 0.1)
+
+        # label 直接弄成255不参与计算
+        im_out = np.full((self.h, self.w), 0, dtype=im.dtype)
+        im_out[top:top + h, left:left + w] = cv2.resize(im, (w, h), interpolation=cv2.INTER_NEAREST)
+
+        return im_out

+ 666 - 0
app/utils/fry_bisenetv2_predictor_V01_250811.py

@@ -0,0 +1,666 @@
+import numpy as np
+import json
+import torch
+import torch.nn as nn
+import cv2
+from pathlib import Path
+import copy
+from typing import Dict, List, Tuple, Optional
+
+from app.utils.backbone import BiSeNetV2
+from app.utils.predict_preprocess import predict_preprocess
+from app.utils.create_predict_result import create_result_singleImg
+from app.utils.handle_result import process_detection_result
+
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+
+def fry_algo_print(level_str: str, info_str: str):
+    logging.info(f"[{level_str}] : {info_str}")
+
+
+def fry_cv2_imread(filename, flags=cv2.IMREAD_COLOR):
+    """支持中文路径的图像读取"""
+    try:
+        with open(filename, 'rb') as f:
+            chunk = f.read()
+        chunk_arr = np.frombuffer(chunk, dtype=np.uint8)
+        img = cv2.imdecode(chunk_arr, flags)
+        if img is None:
+            fry_algo_print("警告", f"Warning: Unable to decode image: {filename}")
+        return img
+    except IOError as e:
+        fry_algo_print("错误", f"IOError: Unable to read file: {filename}")
+        fry_algo_print("错误", f"Error details: {str(e)}")
+        return None
+
+
+def fry_cv2_imwrite(filename, img, params=None):
+    """支持中文路径的图像保存"""
+    try:
+        ext = Path(filename).suffix.lower()
+        result, encoded_img = cv2.imencode(ext, img, params)
+
+        if result:
+            with open(filename, 'wb') as f:
+                encoded_img.tofile(f)
+            return True
+        else:
+            fry_algo_print("警告", f"Warning: Unable to encode image: {filename}")
+            return False
+    except Exception as e:
+        fry_algo_print("错误", f"Error: Unable to write file: {filename}")
+        fry_algo_print("错误", f"Error details: {str(e)}")
+        return False
+
+
+def fry_opencv_chinese_path_init():
+    """初始化OpenCV中文路径支持"""
+    cv2.imread = fry_cv2_imread
+    cv2.imwrite = fry_cv2_imwrite
+
+
+# 初始化OpenCV中文路径支持
+OPENCV_IO_ALREADY_INIT = False
+if not OPENCV_IO_ALREADY_INIT:
+    fry_opencv_chinese_path_init()
+    OPENCV_IO_ALREADY_INIT = True
+
+
+class FryBisenetV2Predictor:
+    """BiSeNetV2 语义分割预测器"""
+
+    def __init__(self,
+                 pth_path: str,
+                 real_seg_class_dict: Dict[int, str],
+                 imgSize_train_dict: Dict[str, int],
+                 confidence: float = 0.5,
+                 label_colors_dict: Optional[Dict[str, Tuple[int, int, int]]] = None,
+                 input_channels: int = 3,
+                 aux_mode: str = "eval"):
+        """
+        初始化预测器
+
+        Args:
+            pth_path: 模型权重文件路径
+            real_seg_class_dict: 真实的分割类别字典,格式为 {类别id: 类别名称}
+            imgSize_train_dict: 训练时的图像尺寸,格式为 {'width': 宽度, 'height': 高度}
+            confidence: 置信度阈值
+            label_colors_dict: 类别颜色字典,格式为 {类别名称: (R, G, B)}
+            input_channels: 输入通道数
+            aux_mode: 辅助模式
+        """
+        self.pth_path = pth_path
+        self.real_seg_class_dict = real_seg_class_dict
+        self.imgSize_train_dict = imgSize_train_dict
+        self.confidence = confidence
+        self.input_channels = input_channels
+        self.aux_mode = aux_mode
+
+        # 构建完整的分割类别字典(包含背景类)
+        self.seg_class_dict = {0: '___background___'}
+        self.seg_class_dict.update(real_seg_class_dict)
+        self.n_classes = len(self.seg_class_dict)
+
+        # 生成或使用提供的颜色字典
+        self.label_colors_dict = self._generate_label_colors(label_colors_dict)
+
+        # 获取设备
+        self.device = self._get_device()
+
+        # 初始化模型
+        self.model = self._init_model()
+
+    @staticmethod
+    def _get_device():
+        """获取计算设备"""
+        return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def _generate_label_colors(self, label_colors_dict: Optional[Dict[str, Tuple[int, int, int]]]) -> Dict[
+        str, Tuple[int, int, int]]:
+        """
+        生成或补充类别颜色字典
+
+        Args:
+            label_colors_dict: 用户提供的颜色字典
+
+        Returns:
+            完整的颜色字典
+        """
+        if label_colors_dict is None:
+            label_colors_dict = {}
+
+        # 为所有类别生成颜色(除了背景)
+        np.random.seed(42)  # 设置随机种子以保证颜色一致性
+
+        for class_id, class_name in self.seg_class_dict.items():
+            if class_id == 0:  # 跳过背景类
+                continue
+
+            if class_name not in label_colors_dict:
+                # 生成随机颜色,避免太暗的颜色
+                color = tuple(np.random.randint(50, 256, 3).tolist())
+                label_colors_dict[class_name] = color
+
+        return label_colors_dict
+
+    def _load_model_weights(self, model: nn.Module, modelLoadPth: str) -> nn.Module:
+        """
+        加载模型权重
+
+        Args:
+            model: 模型对象
+            modelLoadPth: 权重文件路径
+
+        Returns:
+            加载权重后的模型
+        """
+        fry_algo_print("信息", "加载预训练参数...")
+
+        weights_dict = torch.load(modelLoadPth, map_location=self.device)
+        new_weights_dict = {}
+
+        exclude_layer_list = ["aux2", 'aux3', 'aux4', 'aux5']
+
+        all_layer_num = 0
+        ok_layer_num = 0
+
+        for k, v in weights_dict.items():
+            all_layer_num += 1
+            is_exclude = False
+
+            # 检查是否需要排除该层
+            for exclude_str in exclude_layer_list:
+                if exclude_str in k:
+                    is_exclude = True
+                    break
+
+            if not is_exclude:
+                new_weights_dict[k] = v
+                ok_layer_num += 1
+            else:
+                fry_algo_print("信息", f"被排除的层:{k}")
+
+        # 加载权重,不要求严格对等
+        model.load_state_dict(new_weights_dict, strict=False)
+        fry_algo_print("信息", f"成功加载模型层数:{ok_layer_num}/{all_layer_num}")
+
+        return model
+
+    def _init_model(self) -> nn.Module:
+        """初始化并加载模型"""
+        model = BiSeNetV2(self.n_classes, self.input_channels, self.aux_mode)
+        model = model.to(self.device)
+        model = self._load_model_weights(model, self.pth_path)
+        model.eval()
+        return model
+
+    def _predict_tensor(self, CHW: torch.Tensor) -> Dict:
+        """
+        对单个图像张量进行预测
+
+        Args:
+            CHW: 形状为 (C, H, W) 的图像张量
+
+        Returns:
+            包含预测结果的字典
+        """
+        with torch.no_grad():
+            NCHW = CHW.unsqueeze(0)
+            # 因为单张图片推理 batch norm 层会报错,所以复制一份
+            NCHW2 = torch.cat([NCHW, NCHW], dim=0)
+
+            # 模型推理
+            logits, *logits_aux = self.model(NCHW2)
+
+            # 计算概率和预测类别
+            probs = torch.softmax(logits, dim=1)
+            preds = torch.argmax(probs, dim=1)
+
+            # 转换为numpy数组
+            probs_np = probs.detach().cpu().numpy()
+            preds_np = preds.detach().cpu().numpy()
+
+            # 取第一张图片的结果
+            ansImg_needSave = preds_np[0]
+            ansProbs = probs_np[0]
+
+            return {
+                "ans_img": ansImg_needSave,
+                "probs": ansProbs,
+                "file_name": "result"
+            }
+
+    def _save_result_json(self, result: Dict, json_path: Path):
+        """
+        保存预测结果为JSON文件
+
+        Args:
+            result: 预测结果字典
+            json_path: JSON文件保存路径
+        """
+        # 将numpy数组转换为可序列化的格式
+        json_result = {}
+
+        for key, value in result.items():
+            if isinstance(value, np.ndarray):
+                json_result[key] = value.tolist()
+            elif isinstance(value, dict):
+                json_result[key] = {}
+                for k, v in value.items():
+                    if isinstance(v, np.ndarray):
+                        json_result[key][k] = v.tolist()
+                    else:
+                        json_result[key][k] = v
+            else:
+                json_result[key] = value
+
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump(json_result, f, ensure_ascii=False, indent=2)
+
+    def predict_from_image(self, img_bgr: np.ndarray) -> Dict:
+        """
+        直接从解码后的图像数据(numpy数组)进行预测。
+
+        Args:
+            img_bgr: BGR格式的图像,作为一个numpy数组。
+
+        Returns:
+            预测结果字典。
+        """
+        # 检查通道数是否匹配
+        shape = img_bgr.shape
+        image_channel = shape[2] if len(shape) == 3 else 1
+        if image_channel != self.input_channels:
+            raise ValueError(
+                f"输入图片的通道数和模型不匹配:image_channel:{image_channel},input_channels:{self.input_channels}")
+
+        # 获取原始图片尺寸
+        height, width = img_bgr.shape[:2]
+        originImgSize = {'width': width, 'height': height}
+
+        # 预处理
+        imgTensor_CHW_norm = predict_preprocess(img_bgr, self.imgSize_train_dict)
+
+        # 预测
+        ansImgDict = self._predict_tensor(imgTensor_CHW_norm)
+
+        # 创建结果
+        per_img_seg_result = create_result_singleImg(
+            self.seg_class_dict,
+            ansImgDict,
+            originImgSize,
+            self.imgSize_train_dict,
+            confidence=self.confidence
+        )
+
+        return per_img_seg_result
+
+    def predict_single_image(self,
+                             img_path: str,
+                             save_visualization: bool = True,
+                             save_json: bool = True,
+                             answer_json_dir: Optional[str] = None,
+                             input_channels=3
+                             ) -> Dict:
+        """
+        预测单张图片
+
+        Args:
+            img_path: 图片路径
+            save_visualization: 是否保存可视化结果
+            save_json: 是否保存JSON结果
+            answer_json_dir: JSON结果保存目录
+
+        Returns:
+            预测结果字典
+        """
+
+        img_path_obj = Path(img_path).resolve()
+        img_path_parent_obj = img_path_obj.parent
+        answer_json_dir_obj = Path(answer_json_dir).resolve()
+
+        # 读取图片
+        img_bgr = cv2.imread(img_path)
+        if img_bgr is None:
+            raise ValueError(f"无法读取图片:{img_path}")
+
+        shape = img_bgr.shape
+        image_channel = shape[2]
+        fry_algo_print("信息", f"模型需要的通道数为:{input_channels}")
+        fry_algo_print("信息", f"测试的图片实际的通道数为:{image_channel}")
+
+        if image_channel != input_channels:
+            # if image_channel==3 and input_channels==1:
+            #     img_bgr = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
+            # elif image_channel==4 and input_channels==1:
+            #     img_bgr = cv2.cvtColor(img_bgr, cv2.COLOR_BGRA2GRAY)
+            # elif image_channel==1 and input_channels==3:
+            #     img_bgr = cv2.cvtColor(img_bgr, cv2.COLOR_GRAY2BGR)
+            # else:
+            #     raise ValueError(f"输入图片的通道数和模型不匹配:image_channel:{image_channel},input_channels:{input_channels}")
+
+            raise ValueError(
+                f"输入图片的通道数和模型不匹配:image_channel:{image_channel},input_channels:{input_channels}")
+
+        # 获取原始图片尺寸
+        height, width = img_bgr.shape[:2]
+        originImgSize = {'width': width, 'height': height}
+
+        # 预处理
+        imgTensor_CHW_norm = predict_preprocess(img_bgr, self.imgSize_train_dict)
+
+        # 预测
+        ansImgDict = self._predict_tensor(imgTensor_CHW_norm)
+
+        # 创建结果
+        per_img_seg_result = create_result_singleImg(
+            self.seg_class_dict,
+            ansImgDict,
+            originImgSize,
+            self.imgSize_train_dict,
+            confidence=self.confidence
+        )
+
+        # 保存JSON结果
+        if save_json and answer_json_dir:
+            json_dir = Path(answer_json_dir)
+            json_dir.mkdir(parents=True, exist_ok=True)
+
+            # 获取图片文件名(不含扩展名)
+            img_name = Path(img_path).stem
+            json_path = json_dir / f"{img_name}.json"
+
+            self._save_result_json(per_img_seg_result, json_path)
+            fry_algo_print("成功", f"JSON结果已保存到:{json_path}")
+
+        # 保存可视化结果
+        if save_visualization:
+            result_img = process_detection_result(img_bgr, per_img_seg_result, self.label_colors_dict)
+            output_path = str(answer_json_dir_obj / f"{Path(img_path).stem}_result.jpg")
+
+            cv2.imwrite(output_path, result_img)
+            fry_algo_print("成功", f"可视化结果已保存到:{output_path}")
+
+        return per_img_seg_result
+
+    def predict_batch(self,
+                      img_paths: List[str],
+                      save_visualization: bool = True,
+                      save_json: bool = True,
+                      answer_json_dir: Optional[str] = None,
+                      input_channels=3
+                      ) -> List[Dict]:
+        """
+        批量预测图片
+
+        Args:
+            img_paths: 图片路径列表
+            save_visualization: 是否保存可视化结果
+            save_json: 是否保存JSON结果
+            answer_json_dir: JSON结果保存目录
+            output_dir: 可视化结果保存目录
+
+        Returns:
+            所有图片的预测结果列表
+        """
+
+        answer_json_dir_obj = Path(answer_json_dir).resolve()
+
+        results = []
+
+        Path(answer_json_dir).mkdir(parents=True, exist_ok=True)
+
+        # 批量处理
+        for i, img_path in enumerate(img_paths):
+            fry_algo_print("信息", f"处理图片 {i + 1}/{len(img_paths)}: {img_path}")
+
+            try:
+                # 读取图片
+                img_bgr = cv2.imread(img_path)
+                if img_bgr is None:
+                    fry_algo_print("信息", f"警告:无法读取图片 {img_path}")
+                    continue
+
+                shape = img_bgr.shape
+                image_channel = shape[2]
+
+                if image_channel != input_channels:
+                    fry_algo_print("信息", f"模型需要的通道数为:{input_channels}")
+                    fry_algo_print("信息", f"测试的图片实际的通道数为:{image_channel}")
+                    fry_algo_print("错误",
+                                   f"输入图片的通道数和模型不匹配:image_channel:{image_channel},input_channels:{input_channels}")
+                    continue
+
+                # 获取原始图片尺寸
+                height, width = img_bgr.shape[:2]
+                originImgSize = {'width': width, 'height': height}
+
+                # 预处理
+                imgTensor_CHW_norm = predict_preprocess(img_bgr, self.imgSize_train_dict)
+
+                # 预测
+                ansImgDict = self._predict_tensor(imgTensor_CHW_norm)
+
+                # 创建结果
+                per_img_seg_result = create_result_singleImg(
+                    self.seg_class_dict,
+                    ansImgDict,
+                    originImgSize,
+                    self.imgSize_train_dict,
+                    confidence=self.confidence
+                )
+
+                # 保存JSON结果
+                if save_json and answer_json_dir:
+                    json_dir = Path(answer_json_dir)
+                    json_dir.mkdir(parents=True, exist_ok=True)
+
+                    img_name = Path(img_path).stem
+                    json_path = json_dir / f"{img_name}.json"
+                    self._save_result_json(per_img_seg_result, json_path)
+
+                # 保存可视化结果
+                if save_visualization:
+                    result_img = process_detection_result(img_bgr, per_img_seg_result, self.label_colors_dict)
+
+                    output_path = answer_json_dir_obj / f"{Path(img_path).stem}_result.jpg"
+
+                    cv2.imwrite(str(output_path), result_img)
+
+                results.append(per_img_seg_result)
+
+            except Exception as e:
+                fry_algo_print("失败", f"处理图片 {img_path} 时出错:{e}")
+                continue
+
+        fry_algo_print("成功", f"批量处理完成,成功处理 {len(results)}/{len(img_paths)} 张图片")
+        return results
+
+
+def main():
+    """使用示例"""
+    # 配置参数
+    pth_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\Model\outer_box.pth"
+    input_channels = 3
+
+    real_seg_class_dict = {1: 'outer_box'}
+
+    # 为不同类别设置不同颜色(可选)
+    label_colors_dict = {
+        'outer_box': (225, 0, 0),
+    }
+
+    imgSize_train_dict = {'width': 1280, 'height': 1280}
+    confidence = 0.5
+
+    # 创建预测器
+    predictor = FryBisenetV2Predictor(
+        pth_path=pth_path,
+        real_seg_class_dict=real_seg_class_dict,
+        imgSize_train_dict=imgSize_train_dict,
+        confidence=confidence,
+        label_colors_dict=label_colors_dict,
+        input_channels=input_channels,
+    )
+
+    # 单张图片预测
+    print("=== 单张图片预测 ===")
+    now_img_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\img.png"
+    answer_json_dir = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\outer"
+
+    result = predictor.predict_single_image(
+        img_path=now_img_path,
+        save_visualization=True,
+        save_json=True,
+        answer_json_dir=answer_json_dir,
+        input_channels=input_channels,
+    )
+
+    # 批量预测示例
+    # print("\n=== 批量图片预测 ===")
+    # img_paths = [
+    #     r"input_output\images\coaxis_0008.jpg",
+    #     r"input_output\images\coaxis_0082.jpg",
+    #     r"input_output\images\ring_0001.jpg",
+    #     r"input_output\images\Pokemon_back_for_Edge_0001.jpg",
+    # ]
+    #
+    # results = predictor.predict_batch(
+    #     img_paths=img_paths,
+    #     save_visualization=True,
+    #     save_json=True,
+    #     answer_json_dir=answer_json_dir,
+    #     input_channels=input_channels,
+    # )
+
+
+def _test_pokemon_inner_box():
+    # 配置参数
+    pth_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\Model\inner_box.pth"
+    input_channels = 3
+
+    real_seg_class_dict = {1: 'inner_box'}
+
+    # 为不同类别设置不同颜色(可选)
+    label_colors_dict = {
+        'outer_box': (255, 0, 0),
+    }
+
+    imgSize_train_dict = {'width': 1280, 'height': 1280}
+    confidence = 0.5
+
+    # 创建预测器
+    predictor = FryBisenetV2Predictor(
+        pth_path=pth_path,
+        real_seg_class_dict=real_seg_class_dict,
+        imgSize_train_dict=imgSize_train_dict,
+        confidence=confidence,
+        label_colors_dict=label_colors_dict,
+        input_channels=input_channels,
+    )
+
+    # 单张图片预测
+    print("=== 单张图片预测 ===")
+    now_img_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\img.png"
+    answer_json_dir = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\inner"
+
+    result = predictor.predict_single_image(
+        img_path=now_img_path,
+        save_visualization=True,
+        save_json=True,
+        answer_json_dir=answer_json_dir
+    )
+
+
+def _test_pokemon_back_edge():
+    # 配置参数
+    pth_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\Model\back_defect.pth"
+    input_channels = 3
+
+    real_seg_class_dict = {
+        1: 'wear',
+        2: 'wear_and_impact',
+        3: 'impact',
+        4: 'damaged',
+        5: 'wear_and_stain',
+    }
+
+    # 为不同类别设置不同颜色(可选)
+    # label_colors_dict = {
+    #     'outer_box': (255, 0, 0),
+    # }
+
+    imgSize_train_dict = {'width': 512, 'height': 512}
+    confidence = 0.5
+
+    # 创建预测器
+    predictor = FryBisenetV2Predictor(
+        pth_path=pth_path,
+        real_seg_class_dict=real_seg_class_dict,
+        imgSize_train_dict=imgSize_train_dict,
+        confidence=confidence,
+        input_channels=input_channels,
+    )
+
+    # 单张图片预测
+    print("=== 单张图片预测 ===")
+    now_img_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\img_2.png"
+    answer_json_dir = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\defect"
+
+    result = predictor.predict_single_image(
+        img_path=now_img_path,
+        save_visualization=True,
+        save_json=True,
+        answer_json_dir=answer_json_dir,
+        input_channels=input_channels
+    )
+
+def _test_pokemon_no_reflect_front():
+    # 配置参数
+    pth_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\Model\no_reflect_front_defect.pth"
+    input_channels = 3
+
+    real_seg_class_dict = {
+        1: 'scratch',
+        2: 'pit',
+        3: 'stain'
+    }
+
+    # 为不同类别设置不同颜色(可选)
+    # label_colors_dict = {
+    #     'outer_box': (255, 0, 0),
+    # }
+
+    imgSize_train_dict = {'width': 512, 'height': 512}
+    confidence = 0.5
+
+    # 创建预测器
+    predictor = FryBisenetV2Predictor(
+        pth_path=pth_path,
+        real_seg_class_dict=real_seg_class_dict,
+        imgSize_train_dict=imgSize_train_dict,
+        confidence=confidence,
+        input_channels=input_channels,
+    )
+
+    # 单张图片预测
+    print("=== 单张图片预测 ===")
+    now_img_path = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\img_1.png"
+    answer_json_dir = r"C:\Code\ML\Project\CheckCardBoxAndDefectServer\temp\no_reflect_front_defect"
+
+    result = predictor.predict_single_image(
+        img_path=now_img_path,
+        save_visualization=True,
+        save_json=True,
+        answer_json_dir=answer_json_dir,
+        input_channels=input_channels
+    )
+
+if __name__ == "__main__":
+    # main()
+    # _test_pokemon_inner_box()
+    # _test_pokemon_back_edge()
+    _test_pokemon_no_reflect_front()

+ 198 - 0
app/utils/handle_result.py

@@ -0,0 +1,198 @@
+import cv2
+import numpy as np
+from typing import List, Dict, Tuple, Union
+
+
+class ShapeDrawer:
+    """形状绘制工具类"""
+
+    @staticmethod
+    def points_to_contour(points: List[List[int]]) -> np.ndarray:
+        """
+        将点集转换为轮廓
+
+        Args:
+            points: 点集列表,每个点是 [x, y] 格式
+
+        Returns:
+            np.ndarray: OpenCV格式的轮廓
+        """
+        # 确保点集是整数类型的numpy数组
+        contour = np.array(points, dtype=np.int32)
+        # 重塑为OpenCV轮廓格式 (N, 1, 2)
+        return contour.reshape((-1, 1, 2))
+
+    @staticmethod
+    def contour_to_mask(contour: np.ndarray,
+                        image_shape: Tuple[int, int]) -> np.ndarray:
+        """
+        将轮廓转换为掩码
+
+        Args:
+            contour: OpenCV格式的轮廓
+            image_shape: 图像形状 (height, width)
+
+        Returns:
+            np.ndarray: 二值掩码
+        """
+        # 创建空白掩码
+        mask = np.zeros(image_shape, dtype=np.uint8)
+        # 填充轮廓
+        cv2.fillPoly(mask, [contour], 255)
+        return mask
+
+    @staticmethod
+    def apply_mask_to_image(image: np.ndarray,
+                            mask: np.ndarray,
+                            color: Tuple[int, int, int],
+                            alpha: float = 0.5) -> np.ndarray:
+        """
+        将掩码应用到图像上
+
+        Args:
+            image: 原始图像
+            mask: 二值掩码
+            color: BGR颜色
+            alpha: 透明度
+
+        Returns:
+            np.ndarray: 带有掩码的图像
+        """
+        # 创建图像副本
+        result = image.copy()
+
+        # 创建彩色掩码
+        color_mask = np.zeros_like(image)
+        # color_mask[mask > 0] = color
+
+        result[mask > 0] = color
+
+        # 混合图像
+        # cv2.addWeighted(color_mask, alpha, result, 1 - alpha, 0, result)
+
+        return result
+
+    @staticmethod
+    def draw_shape_from_points(image: np.ndarray,
+                               points: List[List[int]],
+                               color: Tuple[int, int, int] = (0, 255, 0),
+                               alpha: float = 0.5,
+                               draw_contour: bool = True) -> np.ndarray:
+        """
+        从点集绘制形状到图像上
+
+        Args:
+            image: 原始图像
+            points: 点集列表
+            color: BGR颜色
+            alpha: 透明度
+            draw_contour: 是否绘制轮廓线
+
+        Returns:
+            np.ndarray: 处理后的图像
+        """
+        # 1. 转换为轮廓
+        contour = ShapeDrawer.points_to_contour(points)
+
+        # 2. 创建掩码
+        mask = ShapeDrawer.contour_to_mask(contour, image.shape[:2])
+
+        # 3. 应用掩码
+        result = ShapeDrawer.apply_mask_to_image(image, mask, color, alpha)
+
+        # 4. 可选:绘制轮廓线
+        if draw_contour:
+            cv2.drawContours(result, [contour], -1, color, 2)
+
+        return result
+
+
+def process_detection_result(image: np.ndarray,
+                             detection: Dict,
+                             label_colors: Dict
+                             ) -> np.ndarray:
+    """
+    处理检测结果并在图像上绘制
+
+    Args:
+        image: 原始图像
+        detection: 检测结果字典
+
+    Returns:
+        np.ndarray: 处理后的图像
+    """
+    result = image.copy()
+
+
+
+    # 处理每个检测到的形状
+    for shape in detection['shapes']:
+        points = shape['points']
+        label = shape['label']
+        conf = shape['probability']
+
+        # 获取颜色(默认绿色)
+        color = label_colors.get(label, (0, 255, 0))
+
+        # 绘制形状
+        result = ShapeDrawer.draw_shape_from_points(
+            result,
+            points,
+            color=color,
+            alpha=0.3,
+            draw_contour=True
+        )
+
+        # 添加标签和置信度
+        first_point = points[0]
+        is_print_text = False
+        if is_print_text:
+            cv2.putText(
+                result,
+                f"{label}: {conf:.2f}",
+                (first_point[0], first_point[1] - 10),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.5,
+                color,
+                2
+            )
+
+    return result
+
+
+# 示例使用
+if __name__ == "__main__":
+    # 加载示例图像
+    image = cv2.imread("example.jpg")
+
+    # 为不同类别设置不同颜色
+    label_colors = {
+        'baseboard': (0, 255, 0),
+    }
+
+    # 示例检测结果
+    detection_result = {
+        'num': 1,
+        'cls': [1],
+        'names': ['baseboard'],
+        'conf': 0.9966249431977074,
+        'shapes': [{
+            'class_num': 1,
+            'label': 'baseboard',
+            'probability': 0.9966249431977074,
+            'points': [[5, 171], [4, 172], [0, 172], [0, 487], [34, 487],
+                       # ... 其他点 ...
+                       [1019, 172], [1018, 171]]
+        }]
+    }
+
+    # 处理图像
+    result = process_detection_result(image, detection_result,label_colors)
+
+    # 显示结果
+    cv2.imshow("Result", result)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
+
+    # 保存结果
+    # cv2.imwrite("result.jpg", result)

+ 36 - 0
app/utils/predict_preprocess.py

@@ -0,0 +1,36 @@
+import math
+import cv2
+import numpy as np
+import math
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+import cv2
+
+from app.utils.data_augmentation import LetterBox
+
+
+def predict_preprocess(img_bgr, imgSize_train):
+    device_str = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
+
+    letterBox = LetterBox(imgSize_train)
+    img_rgb_letterbox = letterBox.handle_single_img(img_rgb)
+
+    img_np = np.array(img_rgb_letterbox)
+
+    imgTensor = torch.tensor(img_np, dtype=torch.float32, device=device_str)
+
+    # 将所有元素值除以255,进行归一化
+    imgTensor = imgTensor * (1 / 255.0)
+
+    # 把形状从[H, W, C] 改为 [C, H, W]
+    imgTensor_CHW = imgTensor.permute(2, 0, 1)
+
+    normaliz_operate_c3 = transforms.Compose([
+        transforms.Normalize(mean=(0, 0, 0), std=(1, 1, 1)),
+    ])
+    imgTensor_CHW_norm = normaliz_operate_c3(imgTensor_CHW)
+
+    return imgTensor_CHW_norm

+ 5 - 0
run.py

@@ -0,0 +1,5 @@
+import uvicorn
+
+if __name__ == "__main__":
+    print("http://127.0.0.1:7744/docs")
+    uvicorn.run("app.main:app", host="0.0.0.0", port=7744)