1 tháng trước cách đây · 1c68581f14
--- a/app/services/video_service.py
+++ b/app/services/video_service.py
@@ -544,20 +544,54 @@ class VideoService:
 
				         return min(0.65 * series_score + 0.35 * number_score, 1.0)
			
 
				 
			
 
				     def _run_ocr(self, frame, bbox: Optional[tuple[int, int, int, int]]) -> str:
			
 
				-        """调用 OCR 引擎对关注区域(卡片或手)进行文本识别"""
			
 
				+        """调用 OCR 引擎对关注区域进行文本识别，加入抗弹幕干扰的分块策略"""
			
 
				         engine = self._ensure_ocr_engine()
			
 
				         if engine is None:
			
 
				             return ""
			
 
				 
			
 
				         focus_region = self._focus_region(frame, bbox)
			
 
				+        if focus_region is None or focus_region.shape[0] == 0 or focus_region.shape[1] == 0:
			
 
				+            return ""
			
 
				+
			
 
				+        texts: list[str] = []
			
 
				+
			
 
				+        # 1. 常规全图 OCR (可能被中间的弹幕压制，但能提取出分散的编号等)
			
 
				         try:
			
 
				-            result = engine(focus_region)
			
 
				-            return self._extract_ocr_text(result)
			
 
				+            result_full = engine(focus_region)
			
 
				+            texts.append(self._extract_ocr_text(result_full))
			
 
				         except Exception as exc:
			
 
				             if not self._ocr_runtime_warning_sent:
			
 
				-                logger.warning(f"OCR runtime failure, fallback enabled: {exc}")
			
 
				+                logger.warning(f"OCR full region failure: {exc}")
			
 
				                 self._ocr_runtime_warning_sent = True
			
 
				-            return ""
			
 
				+
			
 
				+        # 2. 分块特写 OCR，避开中心弹幕区，降低识别阈值
			
 
				+        h, w = focus_region.shape[:2]
			
 
				+        if h > 60 and w > 60:
			
 
				+            # A. 专门识别底部 40% (绝大多数球星卡球员名字、宝可梦卡信息在底部)
			
 
				+            try:
			
 
				+                bottom_half = focus_region[int(h * 0.6):h, :]
			
 
				+                result_bottom = engine(bottom_half)
			
 
				+                texts.append(self._extract_ocr_text(result_bottom))
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+
			
 
				+            # B. 专门识别顶部 40% (通常有 Bowman 1st 标志、帕尼尼系列名、或宝可梦名字)
			
 
				+            try:
			
 
				+                top_half = focus_region[0:int(h * 0.4), :]
			
 
				+                result_top = engine(top_half)
			
 
				+                texts.append(self._extract_ocr_text(result_top))
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+
			
 
				+        # 将全图、顶部、底部的识别结果合并（后续的 token_overlap_score 会自动处理去重）
			
 
				+        combined_text = " ".join(texts)
			
 
				+
			
 
				+        # 3. 正则剔除常见的直播间高频干扰词 (防止误匹配)
			
 
				+        # 这里的词汇通常是海外拆卡直播间(Whatnot/TikTok)经常出现的系统提示语
			
 
				+        ignore_words = r"(?i)\b(bought|break|hobby|jumbo|box|close|spot|nice|snack|packs)\b"
			
 
				+        combined_text = re.sub(ignore_words, " ", combined_text)
			
 
				+
			
 
				+        return combined_text
			
 
				 
			
 
				     def _score_candidates(
			
 
				             self,