Przeglądaj źródła

update 26.1.16.1

charley 2 tygodni temu
rodzic
commit
b0d5e6b823

+ 103 - 0
gbca_spider/add_task.py

@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/6/9 18:53
+from mysql_pool import MySQLConnectionPool
+from loguru import logger
+
+
+# logger.remove()
+# logger.add("./logs/add_{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
+#            format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
+#            level="DEBUG", retention="15 day")
+
+
+def get_811_code_list() -> list:
+    """
+    获取811类 的 code_list  体育类
+    :return: code_list
+    """
+    code_list = [code for code in range(8110150000, 8110172000 + 1)]
+    return code_list
+
+
+# def get_821_code_list() -> list:
+#     """
+#     获取821类 的 code_list  影视/tcg类
+#     :return: code_list
+#     """
+#     code_list = [code for code in range(8210000000, 8210150000+1)]
+#     return code_list
+
+
+def get_851_code_list() -> list:
+    """
+    获取851类 的 code_list  动漫类
+    :return: code_list
+    """
+    code_list = [code for code in range(8510000000, 8510013072 + 1)]
+    return code_list
+
+
+def get_831_code_list() -> list:
+    """
+    获取851类 的 code_list  动漫类
+    :return: code_list
+    """
+    code_list = [code for code in range(8310005000, 8310010000 + 1)]
+    return code_list
+
+
+sql_pool = MySQLConnectionPool(log=logger)
+# sql_no_list = sql_pool.select_all("select rating_code from gbca_record")
+# sql_no_list = [sql_no[0] for sql_no in sql_no_list]
+
+info_list = []
+# for rating_code in get_811_code_list():
+#     # if rating_code in sql_no_list:
+#     #     logger.info(f"{rating_code} 已存在")
+#     #     continue
+#     # else:
+#     data_dict = {"keyword": rating_code}
+#     print(data_dict)
+#     info_list.append(data_dict)
+# # sql_pool.insert_many("insert into gbca_task (keyword) VALUES (%s)", info_list)
+# # sql_pool.insert_many(table="gbca_task", data_list=info_list, ignore=True)
+# info_list.clear()
+# logger.info(f"811类任务添加完成")
+#
+# # ----------------------------------------------------------------------------------------------------------------------
+# for  rating_code in get_821_code_list():
+#     if rating_code in sql_no_list:
+#         logger.info(f"{rating_code} 已存在")
+#         continue
+#     else:
+#         info_list.append(rating_code)
+# sql_pool.insert_all("insert into gbca_task (keyword) VALUES (%s)", info_list)
+# info_list.clear()
+# logger.info(f"821类任务添加完成")
+#
+# # ----------------------------------------------------------------------------------------------------------------------
+for rating_code in get_851_code_list():
+    # if rating_code in sql_no_list:
+    #     logger.info(f"{rating_code} 已存在")
+    #     continue
+    # else:
+    data_dict = {"keyword": rating_code}
+    info_list.append(data_dict)
+sql_pool.insert_many(table="gbca_task", data_list=info_list, ignore=True)
+info_list.clear()
+logger.info(f"851类任务添加完成")
+
+# ----------------------------------------------------------------------------------------------------------------------
+# for rating_code in get_831_code_list():
+#     # if rating_code in sql_no_list:
+#     #     logger.info(f"{rating_code} 已存在")
+#     #     continue
+#     # else:
+#     data_dict = {"keyword": rating_code}
+#     print(data_dict)
+#     info_list.append(data_dict)
+# sql_pool.insert_many(table="gbca_task", data_list=info_list, ignore=True)
+# info_list.clear()
+# logger.info(f"831类任务添加完成")

+ 4 - 0
gbca_spider/gbca_new_daily_spider.py

@@ -13,6 +13,10 @@ from datetime import datetime
 from mysql_pool import MySQLConnectionPool
 from tenacity import retry, stop_after_attempt, wait_fixed
 
+"""
+20260116 重启
+"""
+
 logger.remove()
 logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
            format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",

+ 60 - 12
gbca_spider/mysql_pool.py

@@ -66,7 +66,7 @@ class MySQLConnectionPool:
         except Exception as e:
             if commit:
                 conn.rollback()
-            self.log.error(f"Error executing query: {e}, Query: {query}, Args: {args}")
+            self.log.exception(f"Error executing query: {e}, Query: {query}, Args: {args}")
             raise e
 
     def select_one(self, query, args=None):
@@ -129,7 +129,7 @@ class MySQLConnectionPool:
             if conn:
                 conn.close()
 
-    def insert_one_or_dict(self, table=None, data=None, query=None, args=None, commit=True):
+    def insert_one_or_dict(self, table=None, data=None, query=None, args=None, commit=True, ignore=False):
         """
         单条插入(支持字典或原始SQL)
         :param table: 表名(字典插入时必需)
@@ -137,6 +137,7 @@ class MySQLConnectionPool:
         :param query: 直接SQL语句(与data二选一)
         :param args: SQL参数(query使用时必需)
         :param commit: 是否自动提交
+        :param ignore: 是否使用ignore
         :return: 最后插入ID
         """
         if data is not None:
@@ -145,17 +146,42 @@ class MySQLConnectionPool:
 
             keys = ', '.join([self._safe_identifier(k) for k in data.keys()])
             values = ', '.join(['%s'] * len(data))
-            query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
+
+            # query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
+            # 构建 INSERT IGNORE 语句
+            ignore_clause = "IGNORE" if ignore else ""
+            # insert_sql = f"INSERT {ignore_clause} INTO {table} ({columns}) VALUES ({placeholders})"
+            query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
             args = tuple(data.values())
         elif query is None:
             raise ValueError("Either data or query must be provided")
 
-        cursor = self._execute(query, args, commit)
-        self.log.info(f"sql insert_one_or_dict, Table: {table}, Rows: {cursor.rowcount}")
-        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one_or_dict 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
-        return cursor.lastrowid
+        # cursor = self._execute(query, args, commit)
+        # self.log.info(f"sql insert_one_or_dict, Table: {table}, Rows: {cursor.rowcount}")
+        # self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one_or_dict 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        # return cursor.lastrowid
+
+        try:
+            cursor = self._execute(query, args, commit)
+            self.log.info(f"sql insert_one_or_dict, Table: {table}, Rows: {cursor.rowcount}")
+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one_or_dict 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+            return cursor.lastrowid
+        except pymysql.err.IntegrityError as e:
+            if "Duplicate entry" in str(e):
+                self.log.warning(f"插入失败:重复条目,已跳过。错误详情: {e}")
+                # print("插入失败:重复条目", e)
+                return -1  # 返回 -1 表示重复条目被跳过
+            else:
+                self.log.exception(f"数据库完整性错误: {e}")
+                # print("插入失败:完整性错误", e)
+                raise
+        except Exception as e:
+            # self.log.error(f"未知错误: {str(e)}", exc_info=True)
+            self.log.exception(f"未知错误: {e}")  # 记录完整异常信息
+            # print("插入失败:未知错误", e)
+            raise
 
-    def insert_many(self, table=None, data_list=None, query=None, args_list=None, batch_size=500, commit=True):
+    def insert_many(self, table=None, data_list=None, query=None, args_list=None, batch_size=1000, commit=True, ignore=False):
         """
         批量插入(支持字典列表或原始SQL)
         :param table: 表名(字典插入时必需)
@@ -164,6 +190,7 @@ class MySQLConnectionPool:
         :param args_list: SQL参数列表(query使用时必需)
         :param batch_size: 分批大小
         :param commit: 是否自动提交
+        :param ignore: 是否使用ignore
         :return: 影响行数
         """
         if data_list is not None:
@@ -172,7 +199,11 @@ class MySQLConnectionPool:
 
             keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
             values = ', '.join(['%s'] * len(data_list[0]))
-            query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
+
+            # 构建 INSERT IGNORE 语句
+            ignore_clause = "IGNORE" if ignore else ""
+            # insert_sql = f"INSERT {ignore_clause} INTO {table} ({columns}) VALUES ({placeholders})"
+            query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
             args_list = [tuple(d.values()) for d in data_list]
         elif query is None:
             raise ValueError("Either data_list or query must be provided")
@@ -204,7 +235,7 @@ class MySQLConnectionPool:
                     #             self.log.error(f"插入失败: {e2}, 参数: {args}")
                     # total += rowcount
                 else:
-                    self.log.error(f"数据库错误: {e}")
+                    self.log.exception(f"数据库错误: {e}")
                     if commit:
                         conn.rollback()
                     raise e
@@ -217,10 +248,13 @@ class MySQLConnectionPool:
                 #     except Exception as e2:
                 #         self.log.error(f"Single insert failed: {e2}")
                         # continue
-        self.log.info(f"sql insert_many, Table: {table}, Total Rows: {total}")
+        if table:
+            self.log.info(f"sql insert_many, Table: {table}, Total Rows: {total}")
+        else:
+            self.log.info(f"sql insert_many, Query: {query}, Total Rows: {total}")
         return total
 
-    def insert_many_two(self, table=None, data_list=None, query=None, args_list=None, batch_size=500, commit=True):
+    def insert_many_two(self, table=None, data_list=None, query=None, args_list=None, batch_size=1000, commit=True):
         """
         批量插入(支持字典列表或原始SQL)
         :param table: 表名(字典插入时必需)
@@ -280,6 +314,7 @@ class MySQLConnectionPool:
         :param args_list: 插入参数列表
         :param batch_size: 每次插入的条数
         """
+        self.log.info(f"sql insert_too_many, Query: {query}, Total Rows: {len(args_list)}")
         for i in range(0, len(args_list), batch_size):
             batch = args_list[i:i + batch_size]
             try:
@@ -287,6 +322,7 @@ class MySQLConnectionPool:
                     with conn.cursor() as cursor:
                         cursor.executemany(query, batch)
                         conn.commit()
+                        self.log.debug(f"insert_too_many -> Total Rows: {len(batch)}")
             except Exception as e:
                 self.log.error(f"insert_too_many error. Trying single insert. Error: {e}")
                 # 当前批次降级为单条插入
@@ -529,3 +565,15 @@ class MySQLConnectionPool:
         if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
             raise ValueError(f"Invalid SQL identifier: {name}")
         return name
+
+
+if __name__ == '__main__':
+    sql_pool = MySQLConnectionPool()
+    data_dic = {'card_type_id': 111, 'card_type_name': '补充包 继承的意志【OPC-13】', 'card_type_position': 964,
+                'card_id': 5284, 'card_name': '蒙奇·D·路飞', 'card_number': 'OP13-001', 'card_rarity': 'L',
+                'card_img': 'https://source.windoent.com/OnePiecePc/Picture/1757929283612OP13-001.png',
+                'card_life': '4', 'card_attribute': '打', 'card_power': '5000', 'card_attack': '-',
+                'card_color': '红/绿', 'subscript': 4, 'card_features': '超新星/草帽一伙',
+                'card_text_desc': '【咚!!×1】【对方的攻击时】我方处于活跃状态的咚!!不多于5张的场合,可以将我方任意张数的咚!!转为休息状态。每有1张转为休息状态的咚!!,本次战斗中,此领袖或我方最多1张拥有《草帽一伙》特征的角色力量+2000。',
+                'card_offer_type': '补充包 继承的意志【OPC-13】', 'crawler_language': '简中'}
+    sql_pool.insert_one_or_dict(table="one_piece_record", data=data_dic)