1 ヶ月前 · 571f2d8cd4
--- a/Test/RapidOCR_test.py
+++ b/Test/RapidOCR_test.py
@@ -2,7 +2,7 @@ from rapidocr import RapidOCR
 
				 
			
 
				 engine = RapidOCR()
			
 
				 
			
 
				-img_url = r"C:\Code\ML\Image\_TEST_DATA\Card_test\test05\945e0cc0884c8766a5883ea9593def9d.png"
			
 
				+img_url = r"C:\Code\ML\Project\CardVideoSummary\static\frames\1c4e0b13-c22a-4b24-adc9-633ae8148d2c_18047000.jpg"
			
 
				 result = engine(img_url)
			
 
				 print(result)
			
 
				 
			
--- a/Test/seg_test02.py
+++ b/Test/seg_test02.py
@@ -43,5 +43,5 @@ def show(img_path):
 
				     plt.show()
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    show("../static/frames/9b6704fa-2bc7-40e7-b0a5-f41fd9b8b93f_10930288.jpg")
			
 
				+    show(r"C:\Code\ML\Project\CardVideoSummary\static\frames\1c4e0b13-c22a-4b24-adc9-633ae8148d2c_18047000.jpg")
			
 
				     print()
			
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -1,6 +1,7 @@
 
				 import os
			
 
				 import socket
			
 
				 
			
 
				+
			
 
				 def get_local_ip():
			
 
				     """获取本机局域网 IP"""
			
 
				     try:
			
@@ -16,7 +17,6 @@ def get_local_ip():
 
				     return ip
			
 
				 
			
 
				 
			
 
				-
			
 
				 class Settings:
			
 
				     LOCAL_IP: str = get_local_ip()
			
 
				     LOCAL_PORT: int = 7721
			
@@ -40,7 +40,8 @@ class Settings:
 
				     # ==========================================
			
 
				 
			
 
				     # HuggingFace 语义分割模型路径 (用于识别手和卡片)
			
 
				-    VIDEO_SEG_MODEL_DIR: str = r"C:\Code\ML\Model\Card_Seg\segformer_card_hand02_safetensors"
			
 
				+    # VIDEO_SEG_MODEL_DIR: str = r"C:\Code\ML\Model\Card_Seg\segformer_card_hand02_safetensors"
			
 
				+    VIDEO_SEG_MODEL_DIR: str = "/home/martin/ML/Model/card_seg/segformer_card_hand02_safetensors"
			
 
				 
			
 
				     # 目标时间戳前后的搜索范围 (毫秒) -> 决定了去目标时间戳附近多大范围内寻找最佳帧
			
 
				     VIDEO_SEARCH_BEFORE_MS: int = int(os.getenv("VIDEO_SEARCH_BEFORE_MS", "1000"))  # 往前找/毫秒
			
@@ -50,7 +51,7 @@ class Settings:
 
				     VIDEO_ANALYSIS_FPS: float = float(os.getenv("VIDEO_ANALYSIS_FPS", "5.0"))
			
 
				 
			
 
				     # 只对综合得分排名前 K 的候选帧进行 OCR 识别 (OCR 比较耗时，没必要每帧都跑)
			
 
				-    VIDEO_OCR_TOP_K: int = int(os.getenv("VIDEO_OCR_TOP_K", "5"))
			
 
				+    VIDEO_OCR_TOP_K: int = int(os.getenv("VIDEO_OCR_TOP_K", "15"))
			
 
				 
			
 
				     # 目标停留时间 (秒) -> 用来奖励那些在画面中稳定停留的帧 (排除一闪而过的残影)
			
 
				     VIDEO_DWELL_TARGET_SECONDS: float = float(os.getenv("VIDEO_DWELL_TARGET_SECONDS", "1.2"))
			
@@ -67,4 +68,4 @@ class Settings:
 
				 settings = Settings()
			
 
				 
			
 
				 # 确保图片输出目录存在，避免运行报错
			
 
				-os.makedirs(settings.FRAMES_DIR, exist_ok=True)
			
 
				+os.makedirs(settings.FRAMES_DIR, exist_ok=True)
			
--- a/app/services/video_service.py
+++ b/app/services/video_service.py
@@ -6,6 +6,8 @@ from dataclasses import dataclass
 
				 from typing import Any, Optional
			
 
				 
			
 
				 import cv2
			
 
				+import numpy as np
			
 
				+import difflib
			
 
				 
			
 
				 from app.core.config import settings
			
 
				 from app.core.logger import get_logger
			
@@ -87,7 +89,9 @@ class VideoService:
 
				         方差越大，说明边缘信息越丰富（越不模糊）。
			
 
				         """
			
 
				         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
			
 
				-        return float(cv2.Laplacian(gray, cv2.CV_64F).var())
			
 
				+        # 增加高斯模糊，过滤掉反光产生的噪点和高频毛刺
			
 
				+        blurred = cv2.GaussianBlur(gray, (3, 3), 0)
			
 
				+        return float(cv2.Laplacian(blurred, cv2.CV_64F).var())
			
 
				 
			
 
				     def calculate_weight(self, current_time_ms: int, target_time_ms: int) -> float:
			
 
				         """利用高斯函数计算时间权重。距离 target_time_ms 越近，返回值越接近 1.0"""
			
@@ -500,17 +504,17 @@ class VideoService:
 
				         score = 0.0
			
 
				         ocr_set = set(ocr_tokens)
			
 
				         for token in expected_tokens:
			
 
				-            if token in ocr_set:
			
 
				-                score += 1.0  # 完全命中给 1 分
			
 
				-                continue
			
 
				-
			
 
				-            # 兼容：如果目标 token 是 OCR结果的子串，或者 OCR结果是 token的子串 (例如 "PIKACHU" 匹配出 "PIKACH")
			
 
				-            partial_match = any(
			
 
				-                len(other) >= 2 and (token in other or other in token)
			
 
				-                for other in ocr_set
			
 
				-            )
			
 
				-            if partial_match:
			
 
				-                score += 0.6  # 部分匹配给 0.6 分
			
 
				+            best_ratio = 0.0
			
 
				+            for other in ocr_tokens:
			
 
				+                # 计算字符串相似度 (0 到 1)
			
 
				+                ratio = difflib.SequenceMatcher(None, token, other).ratio()
			
 
				+                if ratio > best_ratio:
			
 
				+                    best_ratio = ratio
			
 
				+
			
 
				+            if best_ratio > 0.85:
			
 
				+                score += 1.0  # 相似度极高，视为完全命中
			
 
				+            elif best_ratio > 0.6:
			
 
				+                score += 0.6  # 存在一定错别字，给部分分
			
 
				 
			
 
				         return min(score / len(expected_tokens), 1.0)
			
 
				 
			
@@ -611,8 +615,12 @@ class VideoService:
 
				         if not scoring_candidates:
			
 
				             scoring_candidates = candidates
			
 
				 
			
 
				-        # 找准当前窗口期的相对最大清晰度作为归一化基准
			
 
				-        max_sharpness = max(candidate.sharpness for candidate in scoring_candidates) if scoring_candidates else 0.0
			
 
				+        # 改为使用 90 分位数，防止单帧反光噪点拉爆整个分数池
			
 
				+        if scoring_candidates:
			
 
				+            sharpnesses = [c.sharpness for c in scoring_candidates]
			
 
				+            max_sharpness = float(np.percentile(sharpnesses, 90))
			
 
				+        else:
			
 
				+            max_sharpness = 0.0
			
 
				         segmentation_used = any(candidate.segmentation_used for candidate in candidates)
			
 
				 
			
 
				         expected = self._build_expected_text(card_output)