Pārlūkot izejas kodu

update ags task 1.26.1

charley 1 nedēļu atpakaļ
vecāks
revīzija
50bfe386d7
1 mainītis faili ar 16 papildinājumiem un 14 dzēšanām
  1. 16 14
      ags_spider/ags_new_daily.py

+ 16 - 14
ags_spider/ags_new_daily.py

@@ -222,17 +222,18 @@ def process_urls(ids, mysql_pool, batch_size=1000, max_workers=5):
 
 def get_new_task(sql_pool):
     # 查询最后一条数据  并且 +2000
-    # max_cert = sql_pool.select_one("SELECT MAX(cert_id) FROM ags_record LIMIT 1")
-    # end_max_cert_num = int(max_cert[0]) + 2000
-    # end_max_cert_str = f"{end_max_cert_num:08}"  # 格式化为 8 位,不足左边补 0
+    max_cert = sql_pool.select_one("SELECT MAX(cert_id) FROM ags_record LIMIT 1")
+    end_max_cert_num = int(max_cert[0]) + 2000
+    end_max_cert_str = f"{end_max_cert_num:08}"  # 格式化为 8 位,不足左边补 0
     # logger.debug(f'查询到最新的 id 为:{end_max_cert_str[0]}, 开始生成新数据, 并添加到任务表中.........')
 
     # 查询新任务列表
     """
     每日更新任务为+2000,-1000
     """
-    # ags_id_list = sql_pool.select_all("SELECT id, cert_id FROM ags_task WHERE state != 1 AND cert_id <= %s ORDER BY id DESC LIMIT 3000")
-    ags_id_list = sql_pool.select_all("SELECT id,cert_id FROM ags_task WHERE id < 927059 AND state = 0 LIMIT 10000")
+    ags_id_list = sql_pool.select_all(
+        f"SELECT id, cert_id FROM ags_task WHERE state != 1 AND cert_id <= '{end_max_cert_str}' ORDER BY id DESC LIMIT 3000")
+    # ags_id_list = sql_pool.select_all("SELECT id,cert_id FROM ags_task WHERE id < 927059 AND state = 0 LIMIT 10000")
     ags_id_list = [i for i in ags_id_list]
     return ags_id_list
 
@@ -249,16 +250,17 @@ def main():
             logger.error("数据库连接失败")
             raise Exception("数据库连接失败")
 
-        while True:
-            new_task = get_new_task(sql_pool)
-            if not new_task:
-                logger.debug(".............................. 没有新任务,结束本轮任务 ..............................")
-                break
+        # while True:
+        new_task = get_new_task(sql_pool)
+        if not new_task:
+            logger.debug(".............................. 没有新任务,结束本轮任务 ..............................")
+            # break
+            return
 
-            try:
-                process_urls(new_task, sql_pool, batch_size=1000, max_workers=5)
-            except Exception as e:
-                logger.error('process urls: ', e)
+        try:
+            process_urls(new_task, sql_pool, batch_size=1000, max_workers=5)
+        except Exception as e:
+            logger.error('process urls: ', e)
 
     except Exception as e:
         logger.error(f'error:{e}')