|
|
@@ -544,20 +544,54 @@ class VideoService:
|
|
|
return min(0.65 * series_score + 0.35 * number_score, 1.0)
|
|
|
|
|
|
def _run_ocr(self, frame, bbox: Optional[tuple[int, int, int, int]]) -> str:
|
|
|
- """调用 OCR 引擎对关注区域(卡片或手)进行文本识别"""
|
|
|
+ """调用 OCR 引擎对关注区域进行文本识别,加入抗弹幕干扰的分块策略"""
|
|
|
engine = self._ensure_ocr_engine()
|
|
|
if engine is None:
|
|
|
return ""
|
|
|
|
|
|
focus_region = self._focus_region(frame, bbox)
|
|
|
+ if focus_region is None or focus_region.shape[0] == 0 or focus_region.shape[1] == 0:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ texts: list[str] = []
|
|
|
+
|
|
|
+ # 1. 常规全图 OCR (可能被中间的弹幕压制,但能提取出分散的编号等)
|
|
|
try:
|
|
|
- result = engine(focus_region)
|
|
|
- return self._extract_ocr_text(result)
|
|
|
+ result_full = engine(focus_region)
|
|
|
+ texts.append(self._extract_ocr_text(result_full))
|
|
|
except Exception as exc:
|
|
|
if not self._ocr_runtime_warning_sent:
|
|
|
- logger.warning(f"OCR runtime failure, fallback enabled: {exc}")
|
|
|
+ logger.warning(f"OCR full region failure: {exc}")
|
|
|
self._ocr_runtime_warning_sent = True
|
|
|
- return ""
|
|
|
+
|
|
|
+ # 2. 分块特写 OCR,避开中心弹幕区,降低识别阈值
|
|
|
+ h, w = focus_region.shape[:2]
|
|
|
+ if h > 60 and w > 60:
|
|
|
+ # A. 专门识别底部 40% (绝大多数球星卡球员名字、宝可梦卡信息在底部)
|
|
|
+ try:
|
|
|
+ bottom_half = focus_region[int(h * 0.6):h, :]
|
|
|
+ result_bottom = engine(bottom_half)
|
|
|
+ texts.append(self._extract_ocr_text(result_bottom))
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # B. 专门识别顶部 40% (通常有 Bowman 1st 标志、帕尼尼系列名、或宝可梦名字)
|
|
|
+ try:
|
|
|
+ top_half = focus_region[0:int(h * 0.4), :]
|
|
|
+ result_top = engine(top_half)
|
|
|
+ texts.append(self._extract_ocr_text(result_top))
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 将全图、顶部、底部的识别结果合并(后续的 token_overlap_score 会自动处理去重)
|
|
|
+ combined_text = " ".join(texts)
|
|
|
+
|
|
|
+ # 3. 正则剔除常见的直播间高频干扰词 (防止误匹配)
|
|
|
+ # 这里的词汇通常是海外拆卡直播间(Whatnot/TikTok)经常出现的系统提示语
|
|
|
+ ignore_words = r"(?i)\b(bought|break|hobby|jumbo|box|close|spot|nice|snack|packs)\b"
|
|
|
+ combined_text = re.sub(ignore_words, " ", combined_text)
|
|
|
+
|
|
|
+ return combined_text
|
|
|
|
|
|
def _score_candidates(
|
|
|
self,
|