Ver Fonte

feat(spider): 更新宝可梦爬虫任务配置

- 启用 tcg jp/us 宝可梦爬虫立即执行功能
- 将 jp/us 爬虫定时任务调整为每周一运行
- 修改 fz 爬虫任务为每日运行
- 注释掉 jz 爬虫任务调度
- 将 jp/us 爬虫最大页数限制从 200 扩展到 20000
charley há 3 semanas atrás
pai
commit
1ab5b434fb

+ 9 - 7
pokemon_tcg_spider/start_pokemon_spider.py

@@ -40,19 +40,21 @@ def schedule_task():
     # 立即运行一次任务
     # jp_spider = JPPokemonCardSpider(log=logger)
     # run_threaded(jp_spider.run)
-    #
-    # run_threaded(us_pokemon_main(log=logger))
-    # run_threaded(fz_pokemon_main(log=logger))
+
+    # tcg jp/us pokemon 立即运行
+    run_threaded(jp_pokemon_main(log=logger))
+    run_threaded(us_pokemon_main(log=logger))
 
     # 设置定时任务
     # schedule.every().day.at("02:01").do(run_threaded, JPPokemonCardSpider(log=logger).run)
     # schedule.every().day.at("01:01").do(run_threaded, us_pokemon_main, log=logger)
 
     # 20260327重启pokemon任务, jp和us的爬虫任务, 改为从tcg网站获取
-    schedule.every().day.at("02:01").do(run_threaded, jp_pokemon_main, log=logger)
-    schedule.every().day.at("01:01").do(run_threaded, us_pokemon_main, log=logger)
-    schedule.every().tuesday.at("03:01").do(run_threaded, fz_pokemon_main, log=logger)
-    schedule.every().tuesday.at("04:01").do(run_threaded, jz_pokemon_main, log=logger)
+    # 20260409 tcg jp/us 改为每周一运行
+    schedule.every().monday.at("02:01").do(run_threaded, jp_pokemon_main, log=logger)
+    schedule.every().monday.at("01:01").do(run_threaded, us_pokemon_main, log=logger)
+    schedule.every().day.at("03:01").do(run_threaded, fz_pokemon_main, log=logger)
+    # schedule.every().day.at("04:01").do(run_threaded, jz_pokemon_main, log=logger)
 
     while True:
         schedule.run_pending()

+ 1 - 1
pokemon_tcg_spider/tcg_jp_pokemon_spider.py

@@ -199,7 +199,7 @@ def get_list_data(log, setUrlName, setName, sql_pool):
     :param sql_pool: sql_pool
     """
     page = 1
-    max_page = 200
+    max_page = 20000
     while page <= max_page:
         try:
             len_data_list = get_single_page(log, setUrlName, setName, page, sql_pool)

+ 1 - 1
pokemon_tcg_spider/tcg_us_pokemon_spider.py

@@ -201,7 +201,7 @@ def get_list_data(log, setUrlName, setName, sql_pool):
     :param sql_pool: sql_pool
     """
     page = 1
-    max_page = 200
+    max_page = 20000
     while page <= max_page:
         try:
             len_data_list = get_single_page(log, setUrlName, setName, page, sql_pool)