فهرست منبع

update wait time sleep 6.13.2

lei.chen 6 ماه پیش
والد
کامیت
fe3c2c6e66
2فایلهای تغییر یافته به همراه53 افزوده شده و 53 حذف شده
  1. 51 51
      kaogujia_spider/kgj_kapai_spider.py
  2. 2 2
      kaogujia_spider/request_live_detail.py

+ 51 - 51
kaogujia_spider/kgj_kapai_spider.py

@@ -23,7 +23,7 @@ from request_live_detail import get_live_detail
 urllib3.disable_warnings()
 
 logger.remove()
-logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
+logger.add("./kapai_logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
            format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
            level="DEBUG", retention="14 day")
 
@@ -485,7 +485,7 @@ def get_linked_live(log, sql_pool, product_id, headers):
         update_state(sql_pool, product_id, "live_state", 2)
 
 
-@retry(stop=stop_after_attempt(500), wait=wait_fixed(600), after=after_log)
+@retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
 def kgj_kapai_main(log):
     log.info("开始运行 kgj_kapai_main 爬虫任务............................................................")
     sql_pool = MySQLConnectionPool(log=log)
@@ -515,52 +515,52 @@ def kgj_kapai_main(log):
 
     try:
         keyword = "卡牌"
-        log.info("开始获取 product_list 数据............................................................")
-        # sql_product_id_list = sql_pool.select_all("SELECT DISTINCT product_id FROM kgj_kapai_product_list_record")
-        # sql_product_id_list = [item[0] for item in sql_product_id_list]
-        try:
-            get_kgj_product_list(log, keyword, sql_pool, KGJ_HEADERS)
-        except Exception as e:
-            log.error(f"Error main -> getting kgj product list: {e}")
-
-        # sql_product_id_list.clear()
-        log.info("product_list 数据获取完成............................................................")
-
-        time.sleep(5)
-
-        log.info("开始获取 product_overview 数据............................................................")
-        product_id_list_for_product = sql_pool.select_all(
-            "SELECT product_id FROM kgj_kapai_product_list_record WHERE product_state = 0")
-        product_id_list = [item[0] for item in product_id_list_for_product]
-        for product_id in product_id_list:
-            try:
-                log.info(f"开始获取 product_id: {product_id} 的 product_overview 数据............................")
-                get_kgj_product_overview(log, sql_pool, product_id, KGJ_HEADERS)
-            except Exception as e:
-                log.error(f"Error main -> getting kgj product overview: {e}")
-                time.sleep(random.randint(4, 6))
-                continue
-            time.sleep(random.randint(4, 6))
-        log.info("product_overview 数据获取完成............................................................")
-
-        time.sleep(5)
-
-        log.info("开始获取 linked_live 数据............................................................")
-        product_id_list_for_live = sql_pool.select_all(
-            "SELECT product_id FROM kgj_kapai_product_list_record WHERE live_state = 0")
-        product_id_list = [item[0] for item in product_id_list_for_live]
-        for product_id in product_id_list:
-            try:
-                log.info(f"开始获取 product_id: {product_id} 的 linked_live 数据............................")
-                get_linked_live(log, sql_pool, product_id, KGJ_HEADERS)
-            except Exception as e:
-                log.error(f"Error main -> getting kgj linked_live: {e}")
-                time.sleep(random.randint(4, 6))
-                continue
-            time.sleep(random.randint(4, 6))
-        log.info("linked_live 数据获取完成............................................................")
-
-        time.sleep(5)
+        # log.info("开始获取 product_list 数据............................................................")
+        # # sql_product_id_list = sql_pool.select_all("SELECT DISTINCT product_id FROM kgj_kapai_product_list_record")
+        # # sql_product_id_list = [item[0] for item in sql_product_id_list]
+        # try:
+        #     get_kgj_product_list(log, keyword, sql_pool, KGJ_HEADERS)
+        # except Exception as e:
+        #     log.error(f"Error main -> getting kgj product list: {e}")
+        #
+        # # sql_product_id_list.clear()
+        # log.info("product_list 数据获取完成............................................................")
+        #
+        # time.sleep(5)
+        #
+        # log.info("开始获取 product_overview 数据............................................................")
+        # product_id_list_for_product = sql_pool.select_all(
+        #     "SELECT product_id FROM kgj_kapai_product_list_record WHERE product_state = 0")
+        # product_id_list = [item[0] for item in product_id_list_for_product]
+        # for product_id in product_id_list:
+        #     try:
+        #         log.info(f"开始获取 product_id: {product_id} 的 product_overview 数据............................")
+        #         get_kgj_product_overview(log, sql_pool, product_id, KGJ_HEADERS)
+        #     except Exception as e:
+        #         log.error(f"Error main -> getting kgj product overview: {e}")
+        #         time.sleep(random.randint(4, 6))
+        #         continue
+        #     time.sleep(random.randint(4, 6))
+        # log.info("product_overview 数据获取完成............................................................")
+        #
+        # time.sleep(5)
+        #
+        # log.info("开始获取 linked_live 数据............................................................")
+        # product_id_list_for_live = sql_pool.select_all(
+        #     "SELECT product_id FROM kgj_kapai_product_list_record WHERE live_state = 0")
+        # product_id_list = [item[0] for item in product_id_list_for_live]
+        # for product_id in product_id_list:
+        #     try:
+        #         log.info(f"开始获取 product_id: {product_id} 的 linked_live 数据............................")
+        #         get_linked_live(log, sql_pool, product_id, KGJ_HEADERS)
+        #     except Exception as e:
+        #         log.error(f"Error main -> getting kgj linked_live: {e}")
+        #         time.sleep(random.randint(4, 6))
+        #         continue
+        #     time.sleep(random.randint(4, 6))
+        # log.info("linked_live 数据获取完成............................................................")
+        #
+        # time.sleep(5)
 
         log.info("开始获取 live_detail 数据............................................................")
         sql_room_id_list = sql_pool.select_all(
@@ -576,9 +576,9 @@ def kgj_kapai_main(log):
                 except Exception as e:
                     log.error(f"Error main -> getting kgj live_detail: {e}")
                     # update_linked_live_state(sql_pool, sql_info[0])
-                    time.sleep(random.randint(4, 6))
+                    time.sleep(random.uniform(1.5, 2.5))
                     continue
-                time.sleep(random.randint(4, 6))
+                time.sleep(random.uniform(1.5, 2.5))
         log.info("live_detail 数据获取完成............................................................")
 
     except Exception as e:
@@ -592,7 +592,7 @@ def schedule_task():
     设置定时任务
     """
     # 立即运行一次任务
-    # kgj_kapai_main(logger)
+    kgj_kapai_main(logger)
 
     # 设置定时任务  考古加 -> 卡牌  一周一次  卡牌类的周三跑 抓取时间比较久 和其他几个类错开时间
     schedule.every().wednesday.at("01:01").do(kgj_kapai_main, logger)

+ 2 - 2
kaogujia_spider/request_live_detail.py

@@ -167,7 +167,7 @@ def get_sales_list(log, sql_info: tuple, headers):
         try:
             log.debug(f"{inspect.currentframe().f_code.co_name}: 正在获取第 {page} 页数据")
             dec_data = get_sales_one_page(log, sql_info, page, headers)
-            time.sleep(random.randint(4, 6))
+            time.sleep(random.uniform(1.5, 2.5))
 
             items = dec_data.get('items', [])
             if not items:
@@ -184,7 +184,7 @@ def get_sales_list(log, sql_info: tuple, headers):
             page += 1
         except Exception as e:
             log.error(f"{inspect.currentframe().f_code.co_name}, Error fetching page {page}: {e}")
-            time.sleep(random.randint(4, 6))
+            time.sleep(random.uniform(1.5, 2.5))
             break  # 发生错误时退出循环
 
     parse_data_list = parse_sales_list(log, all_items)