Parcourir la source

fix(jhs_rpc_spider): 修复市场页抓取及日志配置问题

- 启用日志记录,调整日志格式及保存策略
- 添加 JSON 解码错误重试机制,重试次数和间隔设置
- 修改市场页最大抓取页面数为800
- 增加获取页面异常捕获并记录错误日志
- 启用解析异常捕获,防止单页解析失败影响整体流程
- 修正设备ID注释,明确adb连接示例
- 注释掉自动调度启动函数,调整定时任务触发时间为05:00
charley il y a 1 semaine
Parent
commit
7c5353bd90
1 fichiers modifiés avec 34 ajouts et 25 suppressions
  1. 34 25
      jhs_rpc_spider/jhs_rpc_spider.py

+ 34 - 25
jhs_rpc_spider/jhs_rpc_spider.py

@@ -12,10 +12,12 @@ from typing import Any, Dict
 from datetime import datetime
 from mysql_pool import MySQLConnectionPool
 from jhs_raw_codec_client import JhsRawCodecClient
-from tenacity import retry, stop_after_attempt, wait_fixed
+from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
 
 # TOKEN = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJlbnYiOiJwcm9kdWN0aW9uIiwic3ViIjoyODI3NDU4LCJpc3MiOiJodHRwOi8vYXBpLmppaHVhbnNoZS5jb20vYXBpL21hcmtldC9hdXRoL2xvZ2luLW9yLXNpZ251cCIsImlhdCI6MTc3NTYzNzQzNSwiZXhwIjoxNzgwODIxNDM1LCJuYmYiOjE3NzU2Mzc0MzUsImp0aSI6InhiT3NsdUJRTzVWeHRabHQifQ.uHz7M-U0ewPgi5Qzr5P4eJbSdIUO_i_hmVE-0jsaG2Y"
-DEVICE_ID = "25051FDD4S018P"
+# DEVICE_ID = "127.0.0.1:5557" # adb connect 127.0.0.1:5557
+DEVICE_ID = "25051FDD4S018P" # adb connect 127.0.0.1:5557
+
 CLI_TARGET_SEC = 2
 TIMEOUT_SEC = 15
 
@@ -27,10 +29,10 @@ HEADERS = {
     "x-device-id": "6efe93931488e176",
 }
 
-# logger.remove()
-# logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
-#            format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
-#            level="DEBUG", retention="7 day")
+logger.remove()
+logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
+           format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
+           level="DEBUG", retention="7 day")
 
 
 def after_log(retry_state):
@@ -71,6 +73,7 @@ def get_proxys(log):
         raise e
 
 
+@retry(stop=stop_after_attempt(3), wait=wait_fixed(2), retry=retry_if_exception_type(json.JSONDecodeError), after=after_log)
 def fetch_market_page(
         log,
         page: int,
@@ -182,34 +185,39 @@ def parse_data(resp_data, sql_pool):
             "card_number": card_number,
             "rarity": rarity,
         }
-        print(data_dict)
+        # print(data_dict)
+        # print(type(data))
         info_list.append(data_dict)
 
-    # if info_list:
-    #     sql_pool.insert_many(table="jhs_product_record", data_list=info_list, ignore=True)
+    if info_list:
+        sql_pool.insert_many(table="jhs_product_record", data_list=info_list, ignore=True)
 
 
 def get_market_list(log, token: str, sql_pool):
     page = 1
-    max_page = 1000
+    max_page = 800
 
     with JhsRawCodecClient(device_id=DEVICE_ID, cli_target_sec=CLI_TARGET_SEC) as codec_client:
         with requests.Session() as http_sess:
             while page < max_page:
-                result = fetch_market_page(
-                    log=log,
-                    page=page,
-                    token=token,
-                    client=codec_client,
-                    session=http_sess,
-                    headers=HEADERS,
-                )
-                # print(page, result["decoded"])
-
                 try:
-                    parse_data(result["decoded"], sql_pool)
+                    result = fetch_market_page(
+                        log=log,
+                        page=page,
+                        token=token,
+                        client=codec_client,
+                        session=http_sess,
+                        headers=HEADERS,
+                    )
+                    # print(page, result["decoded"])
+
+                    try:
+                        parse_data(result["decoded"], sql_pool)
+                    except Exception as e:
+                        log.error(f"Error parsing page {page}: {e}")
+
                 except Exception as e:
-                    log.error(f"Error parsing page {page}: {e}")
+                    log.error(f"Error fetching page {page}: {e}")
 
                 page += 1
 
@@ -243,13 +251,14 @@ def schedule_task():
     """
     设置定时任务
     """
-    jhs_rpc_main(log=logger)
+    # jhs_rpc_main(log=logger)
 
-    schedule.every().day.at("01:31").do(jhs_rpc_main, log=logger)
+    schedule.every().day.at("05:00").do(jhs_rpc_main, log=logger)
     while True:
         schedule.run_pending()
         time.sleep(1)
 
 
 if __name__ == "__main__":
-    schedule_task()
+    schedule_task()
+    # jhs_rpc_main(log=logger)