1 주 전 · 35ce4f0da6
--- a/snkrdunk_spider/YamlLoader.py
+++ b/snkrdunk_spider/YamlLoader.py
@@ -0,0 +1,78 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# Author : Charley
			
 
				+# Python : 3.10.8
			
 
				+# Date   : 2025/12/22 10:44
			
 
				+import os, re
			
 
				+import yaml
			
 
				+
			
 
				+regex = re.compile(r'^\$\{(?P<ENV>[A-Z_\-]+:)?(?P<VAL>[\w.]+)}$')
			
 
				+
			
 
				+
			
 
				+class YamlConfig:
			
 
				+    def __init__(self, config):
			
 
				+        self.config = config
			
 
				+    
			
 
				+    def get(self, key: str):
			
 
				+        return YamlConfig(self.config.get(key))
			
 
				+    
			
 
				+    def getValueAsString(self, key: str):
			
 
				+        try:
			
 
				+            match = regex.match(self.config[key])
			
 
				+            group = match.groupdict()
			
 
				+            if group['ENV'] is not None:
			
 
				+                env = group['ENV'][:-1]
			
 
				+                return os.getenv(env, group['VAL'])
			
 
				+            return None
			
 
				+        except:
			
 
				+            return self.config[key]
			
 
				+    
			
 
				+    def getValueAsInt(self, key: str):
			
 
				+        try:
			
 
				+            match = regex.match(self.config[key])
			
 
				+            group = match.groupdict()
			
 
				+            if group['ENV'] is not None:
			
 
				+                env = group['ENV'][:-1]
			
 
				+                return int(os.getenv(env, group['VAL']))
			
 
				+            return 0
			
 
				+        except:
			
 
				+            return int(self.config[key])
			
 
				+    
			
 
				+    def getValueAsBool(self, key: str):
			
 
				+        try:
			
 
				+            match = regex.match(self.config[key])
			
 
				+            group = match.groupdict()
			
 
				+            if group['ENV'] is not None:
			
 
				+                env = group['ENV'][:-1]
			
 
				+                return bool(os.getenv(env, group['VAL']))
			
 
				+            return False
			
 
				+        except:
			
 
				+            return bool(self.config[key])
			
 
				+
			
 
				+
			
 
				+def readYaml(path: str = 'application.yml', profile: str = None) -> YamlConfig:
			
 
				+    if os.path.exists(path):
			
 
				+        with open(path) as fd:
			
 
				+            conf = yaml.load(fd, Loader=yaml.FullLoader)
			
 
				+    
			
 
				+    if profile is not None:
			
 
				+        result = path.split('.')
			
 
				+        profiledYaml = f'{result[0]}-{profile}.{result[1]}'
			
 
				+        if os.path.exists(profiledYaml):
			
 
				+            with open(profiledYaml) as fd:
			
 
				+                conf.update(yaml.load(fd, Loader=yaml.FullLoader))
			
 
				+    
			
 
				+    return YamlConfig(conf)
			
 
				+
			
 
				+# res = readYaml()
			
 
				+# mysqlConf = res.get('mysql')
			
 
				+# print(mysqlConf)
			
 
				+
			
 
				+# print(res.getValueAsString("host"))
			
 
				+# mysqlYaml = mysqlConf.getValueAsString("host")
			
 
				+# print(mysqlYaml)
			
 
				+# host = mysqlYaml.get("host").split(':')[-1][:-1]
			
 
				+# port = mysqlYaml.get("port").split(':')[-1][:-1]
			
 
				+# username = mysqlYaml.get("username").split(':')[-1][:-1]
			
 
				+# password = mysqlYaml.get("password").split(':')[-1][:-1]
			
 
				+# mysql_db = mysqlYaml.get("db").split(':')[-1][:-1]
			
 
				+# print(host,port,username,password)
			
--- a/snkrdunk_spider/application.yml
+++ b/snkrdunk_spider/application.yml
@@ -0,0 +1,6 @@
 
				+mysql:
			
 
				+  host: ${MYSQL_HOST:100.64.0.21}
			
 
				+  port: ${MYSQL_PROT:3306}
			
 
				+  username: ${MYSQL_USERNAME:crawler}
			
 
				+  password: ${MYSQL_PASSWORD:Pass2022}
			
 
				+  db: ${MYSQL_DATABASE:crawler}
			
--- a/snkrdunk_spider/mysql_pool.py
+++ b/snkrdunk_spider/mysql_pool.py
@@ -0,0 +1,628 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# Author : Charley
			
 
				+# Python : 3.10.8
			
 
				+# Date   : 2025/3/25 14:14
			
 
				+import re
			
 
				+import pymysql
			
 
				+import YamlLoader
			
 
				+from loguru import logger
			
 
				+from dbutils.pooled_db import PooledDB
			
 
				+
			
 
				+# 获取yaml配置
			
 
				+yaml = YamlLoader.readYaml()
			
 
				+mysqlYaml = yaml.get("mysql")
			
 
				+sql_host = mysqlYaml.getValueAsString("host")
			
 
				+sql_port = mysqlYaml.getValueAsInt("port")
			
 
				+sql_user = mysqlYaml.getValueAsString("username")
			
 
				+sql_password = mysqlYaml.getValueAsString("password")
			
 
				+sql_db = mysqlYaml.getValueAsString("db")
			
 
				+
			
 
				+
			
 
				+class MySQLConnectionPool:
			
 
				+    """
			
 
				+    MySQL连接池
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, mincached=1, maxcached=2, maxconnections=3, log=None):
			
 
				+        """
			
 
				+        初始化连接池
			
 
				+        :param mincached: 初始化时，链接池中至少创建的链接，0表示不创建
			
 
				+        :param maxcached: 池中空闲连接的最大数目（0 或 None 表示池大小不受限制）
			
 
				+        :param maxconnections: 允许的最大连接数（0 或 None 表示任意数量的连接）
			
 
				+        :param log: 自定义日志记录器
			
 
				+        """
			
 
				+        # 使用 loguru 的 logger，如果传入了其他 logger，则使用传入的 logger
			
 
				+        self.log = log or logger
			
 
				+        self.pool = PooledDB(
			
 
				+            creator=pymysql,
			
 
				+            mincached=mincached,
			
 
				+            maxcached=maxcached,
			
 
				+            maxconnections=maxconnections,
			
 
				+            blocking=True,  # 连接池中如果没有可用连接后，是否阻塞等待。True，等待；False，不等待然后报错
			
 
				+            host=sql_host,
			
 
				+            port=sql_port,
			
 
				+            user=sql_user,
			
 
				+            password=sql_password,
			
 
				+            database=sql_db,
			
 
				+            ping=1,  # 0:完全关闭（更快）, 1:仅在取连接时检查, 2:每次执行前检查连接有效性，防止使用已断开的连接
			
 
				+            connect_timeout=5,  # 连接超时时间（秒）
			
 
				+            # read_timeout=30,  # 读取超时时间（秒）
			
 
				+            write_timeout=30  # 写入超时时间（秒）
			
 
				+        )
			
 
				+
			
 
				+    def _execute(self, query, args=None, commit=False):
			
 
				+        """
			
 
				+        执行SQL
			
 
				+        :param query: SQL语句
			
 
				+        :param args: SQL参数
			
 
				+        :param commit: 是否提交事务
			
 
				+        :return: 查询结果
			
 
				+        """
			
 
				+        try:
			
 
				+            with self.pool.connection() as conn:
			
 
				+                with conn.cursor() as cursor:
			
 
				+                    cursor.execute(query, args)
			
 
				+                    if commit:
			
 
				+                        conn.commit()
			
 
				+                    self.log.debug(f"sql _execute, Query: {query}, Rows: {cursor.rowcount}")
			
 
				+                    return cursor
			
 
				+        except Exception as e:
			
 
				+            if commit and conn:
			
 
				+                conn.rollback()
			
 
				+            self.log.exception(f"Error executing query: {e}, Query: {query}, Args: {args}")
			
 
				+            raise e
			
 
				+
			
 
				+    def select_one(self, query, args=None):
			
 
				+        """
			
 
				+        执行查询，返回单个结果
			
 
				+        :param query: 查询语句
			
 
				+        :param args: 查询参数
			
 
				+        :return: 查询结果
			
 
				+        """
			
 
				+        cursor = self._execute(query, args)
			
 
				+        return cursor.fetchone()
			
 
				+
			
 
				+    def select_all(self, query, args=None):
			
 
				+        """
			
 
				+        执行查询，返回所有结果
			
 
				+        :param query: 查询语句
			
 
				+        :param args: 查询参数
			
 
				+        :return: 查询结果
			
 
				+        """
			
 
				+        cursor = self._execute(query, args)
			
 
				+        return cursor.fetchall()
			
 
				+
			
 
				+    def insert_one(self, query, args):
			
 
				+        """
			
 
				+        执行单条插入语句
			
 
				+        :param query: 插入语句
			
 
				+        :param args: 插入参数
			
 
				+        """
			
 
				+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+        cursor = self._execute(query, args, commit=True)
			
 
				+        return cursor.lastrowid  # 返回插入的ID
			
 
				+
			
 
				+    def insert_all(self, query, args_list):
			
 
				+        """
			
 
				+        执行批量插入语句,如果失败则逐条插入
			
 
				+        :param query: 插入语句
			
 
				+        :param args_list: 插入参数列表
			
 
				+        """
			
 
				+        conn = None
			
 
				+        cursor = None
			
 
				+        try:
			
 
				+            conn = self.pool.connection()
			
 
				+            cursor = conn.cursor()
			
 
				+            cursor.executemany(query, args_list)
			
 
				+            conn.commit()
			
 
				+            self.log.debug(f"sql insert_all, SQL: {query[:100]}..., Rows: {cursor.rowcount}")
			
 
				+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_all 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+        except pymysql.err.IntegrityError as e:
			
 
				+            if "Duplicate entry" in str(e):
			
 
				+                conn.rollback()
			
 
				+                self.log.warning(f"批量插入遇到重复,开始逐条插入。错误: {e}")
			
 
				+                rowcount = 0
			
 
				+                for args in args_list:
			
 
				+                    try:
			
 
				+                        self.insert_one(query, args)
			
 
				+                        rowcount += 1
			
 
				+                    except pymysql.err.IntegrityError as e2:
			
 
				+                        if "Duplicate entry" in str(e2):
			
 
				+                            self.log.debug(f"跳过重复条目: {e2}")
			
 
				+                        else:
			
 
				+                            self.log.error(f"插入失败: {e2}")
			
 
				+                    except Exception as e2:
			
 
				+                        self.log.error(f"插入失败: {e2}")
			
 
				+                self.log.info(f"逐条插入完成: {rowcount}/{len(args_list)}条")
			
 
				+            else:
			
 
				+                conn.rollback()
			
 
				+                self.log.exception(f"数据库完整性错误: {e}")
			
 
				+                raise e
			
 
				+        except Exception as e:
			
 
				+            conn.rollback()
			
 
				+            self.log.exception(f"批量插入失败: {e}")
			
 
				+            raise e
			
 
				+        finally:
			
 
				+            if cursor:
			
 
				+                cursor.close()
			
 
				+            if conn:
			
 
				+                conn.close()
			
 
				+
			
 
				+    def insert_one_or_dict(self, table=None, data=None, query=None, args=None, commit=True, ignore=False):
			
 
				+        """
			
 
				+        单条插入（支持字典或原始SQL）
			
 
				+        :param table: 表名（字典插入时必需）
			
 
				+        :param data: 字典数据 {列名: 值}
			
 
				+        :param query: 直接SQL语句（与data二选一）
			
 
				+        :param args: SQL参数（query使用时必需）
			
 
				+        :param commit: 是否自动提交
			
 
				+        :param ignore: 是否使用ignore
			
 
				+        :return: 最后插入ID
			
 
				+        """
			
 
				+        if data is not None:
			
 
				+            if not isinstance(data, dict):
			
 
				+                raise ValueError("Data must be a dictionary")
			
 
				+
			
 
				+            keys = ', '.join([self._safe_identifier(k) for k in data.keys()])
			
 
				+            values = ', '.join(['%s'] * len(data))
			
 
				+
			
 
				+            # 构建 INSERT IGNORE 语句
			
 
				+            ignore_clause = "IGNORE" if ignore else ""
			
 
				+            query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
			
 
				+            args = tuple(data.values())
			
 
				+        elif query is None:
			
 
				+            raise ValueError("Either data or query must be provided")
			
 
				+
			
 
				+        try:
			
 
				+            cursor = self._execute(query, args, commit)
			
 
				+            self.log.info(f"sql insert_one_or_dict, Table: {table}, Rows: {cursor.rowcount}")
			
 
				+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one_or_dict 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+            return cursor.lastrowid
			
 
				+        except pymysql.err.IntegrityError as e:
			
 
				+            if "Duplicate entry" in str(e):
			
 
				+                self.log.warning(f"插入失败：重复条目，已跳过。错误详情: {e}")
			
 
				+                return -1  # 返回 -1 表示重复条目被跳过
			
 
				+            else:
			
 
				+                self.log.exception(f"数据库完整性错误: {e}")
			
 
				+                raise
			
 
				+        except Exception as e:
			
 
				+            self.log.exception(f"未知错误: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    def insert_many(self, table=None, data_list=None, query=None, args_list=None, batch_size=1000, commit=True,
			
 
				+                    ignore=False):
			
 
				+        """
			
 
				+        批量插入（支持字典列表或原始SQL）
			
 
				+        :param table: 表名（字典插入时必需）
			
 
				+        :param data_list: 字典列表 [{列名: 值}]
			
 
				+        :param query: 直接SQL语句（与data_list二选一）
			
 
				+        :param args_list: SQL参数列表（query使用时必需）
			
 
				+        :param batch_size: 分批大小
			
 
				+        :param commit: 是否自动提交
			
 
				+        :param ignore: 是否使用ignore
			
 
				+        :return: 影响行数
			
 
				+        """
			
 
				+        if data_list is not None:
			
 
				+            if not data_list or not isinstance(data_list[0], dict):
			
 
				+                raise ValueError("Data_list must be a non-empty list of dictionaries")
			
 
				+
			
 
				+            keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
			
 
				+            values = ', '.join(['%s'] * len(data_list[0]))
			
 
				+
			
 
				+            # 构建 INSERT IGNORE 语句
			
 
				+            ignore_clause = "IGNORE" if ignore else ""
			
 
				+            query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
			
 
				+            args_list = [tuple(d.values()) for d in data_list]
			
 
				+        elif query is None:
			
 
				+            raise ValueError("Either data_list or query must be provided")
			
 
				+
			
 
				+        total = 0
			
 
				+        for i in range(0, len(args_list), batch_size):
			
 
				+            batch = args_list[i:i + batch_size]
			
 
				+            try:
			
 
				+                with self.pool.connection() as conn:
			
 
				+                    with conn.cursor() as cursor:
			
 
				+                        cursor.executemany(query, batch)
			
 
				+                        if commit:
			
 
				+                            conn.commit()
			
 
				+                        total += cursor.rowcount
			
 
				+            except pymysql.err.IntegrityError as e:
			
 
				+                # 处理唯一索引冲突
			
 
				+                if "Duplicate entry" in str(e):
			
 
				+                    if ignore:
			
 
				+                        # 如果使用了 INSERT IGNORE,理论上不会进这里,但以防万一
			
 
				+                        self.log.warning(f"批量插入遇到重复条目(ignore模式): {e}")
			
 
				+                    else:
			
 
				+                        # 没有使用 IGNORE,降级为逐条插入
			
 
				+                        self.log.warning(f"批量插入遇到重复条目,开始逐条插入。错误: {e}")
			
 
				+                        if commit:
			
 
				+                            conn.rollback()
			
 
				+                        
			
 
				+                        rowcount = 0
			
 
				+                        for j, args in enumerate(batch):
			
 
				+                            try:
			
 
				+                                if data_list:
			
 
				+                                    # 字典模式
			
 
				+                                    self.insert_one_or_dict(
			
 
				+                                        table=table,
			
 
				+                                        data=dict(zip(data_list[0].keys(), args)),
			
 
				+                                        commit=commit,
			
 
				+                                        ignore=False  # 单条插入时手动捕获重复
			
 
				+                                    )
			
 
				+                                else:
			
 
				+                                    # 原始SQL模式
			
 
				+                                    self.insert_one(query, args)
			
 
				+                                rowcount += 1
			
 
				+                            except pymysql.err.IntegrityError as e2:
			
 
				+                                if "Duplicate entry" in str(e2):
			
 
				+                                    self.log.debug(f"跳过重复条目[{i+j+1}]: {e2}")
			
 
				+                                else:
			
 
				+                                    self.log.error(f"插入失败[{i+j+1}]: {e2}")
			
 
				+                            except Exception as e2:
			
 
				+                                self.log.error(f"插入失败[{i+j+1}]: {e2}")
			
 
				+                        total += rowcount
			
 
				+                        self.log.info(f"批次逐条插入完成: 成功{rowcount}/{len(batch)}条")
			
 
				+                else:
			
 
				+                    # 其他完整性错误
			
 
				+                    self.log.exception(f"数据库完整性错误: {e}")
			
 
				+                    if commit:
			
 
				+                        conn.rollback()
			
 
				+                    raise e
			
 
				+            except Exception as e:
			
 
				+                # 其他数据库错误
			
 
				+                self.log.exception(f"批量插入失败: {e}")
			
 
				+                if commit:
			
 
				+                    conn.rollback()
			
 
				+                raise e
			
 
				+        if table:
			
 
				+            self.log.info(f"sql insert_many, Table: {table}, Total Rows: {total}")
			
 
				+        else:
			
 
				+            self.log.info(f"sql insert_many, Query: {query}, Total Rows: {total}")
			
 
				+        return total
			
 
				+
			
 
				+    def insert_many_two(self, table=None, data_list=None, query=None, args_list=None, batch_size=1000, commit=True,
			
 
				+                        ignore=False):
			
 
				+        """
			
 
				+        批量插入(支持字典列表或原始SQL) - 备用方法
			
 
				+        :param table: 表名(字典插入时必需)
			
 
				+        :param data_list: 字典列表 [{列名: 值}]
			
 
				+        :param query: 直接SQL语句(与data_list二选一)
			
 
				+        :param args_list: SQL参数列表(query使用时必需)
			
 
				+        :param batch_size: 分批大小
			
 
				+        :param commit: 是否自动提交
			
 
				+        :param ignore: 是否使用INSERT IGNORE
			
 
				+        :return: 影响行数
			
 
				+        """
			
 
				+        if data_list is not None:
			
 
				+            if not data_list or not isinstance(data_list[0], dict):
			
 
				+                raise ValueError("Data_list must be a non-empty list of dictionaries")
			
 
				+            keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
			
 
				+            values = ', '.join(['%s'] * len(data_list[0]))
			
 
				+            ignore_clause = "IGNORE" if ignore else ""
			
 
				+            query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
			
 
				+            args_list = [tuple(d.values()) for d in data_list]
			
 
				+        elif query is None:
			
 
				+            raise ValueError("Either data_list or query must be provided")
			
 
				+    
			
 
				+        total = 0
			
 
				+        for i in range(0, len(args_list), batch_size):
			
 
				+            batch = args_list[i:i + batch_size]
			
 
				+            try:
			
 
				+                with self.pool.connection() as conn:
			
 
				+                    with conn.cursor() as cursor:
			
 
				+                        cursor.executemany(query, batch)
			
 
				+                        if commit:
			
 
				+                            conn.commit()
			
 
				+                        total += cursor.rowcount
			
 
				+            except pymysql.err.IntegrityError as e:
			
 
				+                if "Duplicate entry" in str(e) and not ignore:
			
 
				+                    self.log.warning(f"批量插入遇到重复,降级为逐条插入: {e}")
			
 
				+                    if commit:
			
 
				+                        conn.rollback()
			
 
				+                    rowcount = 0
			
 
				+                    for args in batch:
			
 
				+                        try:
			
 
				+                            self.insert_one(query, args)
			
 
				+                            rowcount += 1
			
 
				+                        except pymysql.err.IntegrityError as e2:
			
 
				+                            if "Duplicate entry" in str(e2):
			
 
				+                                self.log.debug(f"跳过重复条目: {e2}")
			
 
				+                            else:
			
 
				+                                self.log.error(f"插入失败: {e2}")
			
 
				+                        except Exception as e2:
			
 
				+                            self.log.error(f"插入失败: {e2}")
			
 
				+                    total += rowcount
			
 
				+                else:
			
 
				+                    self.log.exception(f"数据库完整性错误: {e}")
			
 
				+                    if commit:
			
 
				+                        conn.rollback()
			
 
				+                    raise e
			
 
				+            except Exception as e:
			
 
				+                self.log.exception(f"批量插入失败: {e}")
			
 
				+                if commit:
			
 
				+                    conn.rollback()
			
 
				+                raise e
			
 
				+        self.log.info(f"sql insert_many_two, Table: {table}, Total Rows: {total}")
			
 
				+        return total
			
 
				+
			
 
				+    def insert_too_many(self, query, args_list, batch_size=1000):
			
 
				+        """
			
 
				+        执行批量插入语句，分片提交, 单次插入大于十万+时可用, 如果失败则降级为逐条插入
			
 
				+        :param query: 插入语句
			
 
				+        :param args_list: 插入参数列表
			
 
				+        :param batch_size: 每次插入的条数
			
 
				+        """
			
 
				+        self.log.info(f"sql insert_too_many, Query: {query}, Total Rows: {len(args_list)}")
			
 
				+        for i in range(0, len(args_list), batch_size):
			
 
				+            batch = args_list[i:i + batch_size]
			
 
				+            try:
			
 
				+                with self.pool.connection() as conn:
			
 
				+                    with conn.cursor() as cursor:
			
 
				+                        cursor.executemany(query, batch)
			
 
				+                        conn.commit()
			
 
				+                        self.log.debug(f"insert_too_many -> Total Rows: {len(batch)}")
			
 
				+            except Exception as e:
			
 
				+                self.log.error(f"insert_too_many error. Trying single insert. Error: {e}")
			
 
				+                # 当前批次降级为单条插入
			
 
				+                for args in batch:
			
 
				+                    self.insert_one(query, args)
			
 
				+
			
 
				+    def update_one(self, query, args):
			
 
				+        """
			
 
				+        执行单条更新语句
			
 
				+        :param query: 更新语句
			
 
				+        :param args: 更新参数
			
 
				+        """
			
 
				+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_one 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+        return self._execute(query, args, commit=True)
			
 
				+
			
 
				+    def update_all(self, query, args_list):
			
 
				+        """
			
 
				+        执行批量更新语句，如果失败则逐条更新
			
 
				+        :param query: 更新语句
			
 
				+        :param args_list: 更新参数列表
			
 
				+        """
			
 
				+        conn = None
			
 
				+        cursor = None
			
 
				+        try:
			
 
				+            conn = self.pool.connection()
			
 
				+            cursor = conn.cursor()
			
 
				+            cursor.executemany(query, args_list)
			
 
				+            conn.commit()
			
 
				+            self.log.debug(f"sql update_all, SQL: {query}, Rows: {len(args_list)}")
			
 
				+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_all 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+        except Exception as e:
			
 
				+            conn.rollback()
			
 
				+            self.log.error(f"Error executing query: {e}")
			
 
				+            # 如果批量更新失败，则逐条更新
			
 
				+            rowcount = 0
			
 
				+            for args in args_list:
			
 
				+                self.update_one(query, args)
			
 
				+                rowcount += 1
			
 
				+            self.log.debug(f'Batch update failed. Updated {rowcount} rows individually.')
			
 
				+        finally:
			
 
				+            if cursor:
			
 
				+                cursor.close()
			
 
				+            if conn:
			
 
				+                conn.close()
			
 
				+
			
 
				+    def update_one_or_dict(self, table=None, data=None, condition=None, query=None, args=None, commit=True):
			
 
				+        """
			
 
				+        单条更新（支持字典或原始SQL）
			
 
				+        :param table: 表名（字典模式必需）
			
 
				+        :param data: 字典数据 {列名: 值}（与 query 二选一）
			
 
				+        :param condition: 更新条件，支持以下格式：
			
 
				+            - 字典: {"id": 1} → "WHERE id = %s"
			
 
				+            - 字符串: "id = 1" → "WHERE id = 1"（需自行确保安全）
			
 
				+            - 元组: ("id = %s", [1]) → "WHERE id = %s"（参数化查询）
			
 
				+        :param query: 直接SQL语句（与 data 二选一）
			
 
				+        :param args: SQL参数（query 模式下必需）
			
 
				+        :param commit: 是否自动提交
			
 
				+        :return: 影响行数
			
 
				+        :raises: ValueError 参数校验失败时抛出
			
 
				+        """
			
 
				+        # 参数校验
			
 
				+        if data is not None:
			
 
				+            if not isinstance(data, dict):
			
 
				+                raise ValueError("Data must be a dictionary")
			
 
				+            if table is None:
			
 
				+                raise ValueError("Table name is required for dictionary update")
			
 
				+            if condition is None:
			
 
				+                raise ValueError("Condition is required for dictionary update")
			
 
				+
			
 
				+            # 构建 SET 子句
			
 
				+            set_clause = ", ".join([f"{self._safe_identifier(k)} = %s" for k in data.keys()])
			
 
				+            set_values = list(data.values())
			
 
				+
			
 
				+            # 解析条件
			
 
				+            condition_clause, condition_args = self._parse_condition(condition)
			
 
				+            query = f"UPDATE {self._safe_identifier(table)} SET {set_clause} WHERE {condition_clause}"
			
 
				+            args = set_values + condition_args
			
 
				+
			
 
				+        elif query is None:
			
 
				+            raise ValueError("Either data or query must be provided")
			
 
				+
			
 
				+        # 执行更新
			
 
				+        cursor = self._execute(query, args, commit)
			
 
				+        # self.log.debug(
			
 
				+        #     f"Updated table={table}, rows={cursor.rowcount}, query={query[:100]}...",
			
 
				+        #     extra={"table": table, "rows": cursor.rowcount}
			
 
				+        # )
			
 
				+        return cursor.rowcount
			
 
				+
			
 
				+    def _parse_condition(self, condition):
			
 
				+        """
			
 
				+        解析条件为 (clause, args) 格式
			
 
				+        :param condition: 字典/字符串/元组
			
 
				+        :return: (str, list) SQL 子句和参数列表
			
 
				+        """
			
 
				+        if isinstance(condition, dict):
			
 
				+            clause = " AND ".join([f"{self._safe_identifier(k)} = %s" for k in condition.keys()])
			
 
				+            args = list(condition.values())
			
 
				+        elif isinstance(condition, str):
			
 
				+            clause = condition  # 注意：需调用方确保安全
			
 
				+            args = []
			
 
				+        elif isinstance(condition, (tuple, list)) and len(condition) == 2:
			
 
				+            clause, args = condition[0], condition[1]
			
 
				+            if not isinstance(args, (list, tuple)):
			
 
				+                args = [args]
			
 
				+        else:
			
 
				+            raise ValueError("Condition must be dict/str/(clause, args)")
			
 
				+        return clause, args
			
 
				+
			
 
				+    def update_many(self, table=None, data_list=None, condition_list=None, query=None, args_list=None, batch_size=500,
			
 
				+                    commit=True):
			
 
				+        """
			
 
				+        批量更新（支持字典列表或原始SQL）
			
 
				+        :param table: 表名（字典插入时必需）
			
 
				+        :param data_list: 字典列表 [{列名: 值}]
			
 
				+        :param condition_list: 条件列表（必须为字典，与data_list等长）
			
 
				+        :param query: 直接SQL语句（与data_list二选一）
			
 
				+        :param args_list: SQL参数列表（query使用时必需）
			
 
				+        :param batch_size: 分批大小
			
 
				+        :param commit: 是否自动提交
			
 
				+        :return: 影响行数
			
 
				+        """
			
 
				+        if data_list is not None:
			
 
				+            if not data_list or not isinstance(data_list[0], dict):
			
 
				+                raise ValueError("Data_list must be a non-empty list of dictionaries")
			
 
				+            if condition_list is None or len(data_list) != len(condition_list):
			
 
				+                raise ValueError("Condition_list must be provided and match the length of data_list")
			
 
				+            if not all(isinstance(cond, dict) for cond in condition_list):
			
 
				+                raise ValueError("All elements in condition_list must be dictionaries")
			
 
				+
			
 
				+            # 获取第一个数据项和条件项的键
			
 
				+            first_data_keys = set(data_list[0].keys())
			
 
				+            first_cond_keys = set(condition_list[0].keys())
			
 
				+
			
 
				+            # 构造基础SQL
			
 
				+            set_clause = ', '.join([self._safe_identifier(k) + ' = %s' for k in data_list[0].keys()])
			
 
				+            condition_clause = ' AND '.join([self._safe_identifier(k) + ' = %s' for k in condition_list[0].keys()])
			
 
				+            base_query = f"UPDATE {self._safe_identifier(table)} SET {set_clause} WHERE {condition_clause}"
			
 
				+            total = 0
			
 
				+
			
 
				+            # 分批次处理
			
 
				+            for i in range(0, len(data_list), batch_size):
			
 
				+                batch_data = data_list[i:i + batch_size]
			
 
				+                batch_conds = condition_list[i:i + batch_size]
			
 
				+                batch_args = []
			
 
				+
			
 
				+                # 检查当前批次的结构是否一致
			
 
				+                can_batch = True
			
 
				+                for data, cond in zip(batch_data, batch_conds):
			
 
				+                    data_keys = set(data.keys())
			
 
				+                    cond_keys = set(cond.keys())
			
 
				+                    if data_keys != first_data_keys or cond_keys != first_cond_keys:
			
 
				+                        can_batch = False
			
 
				+                        break
			
 
				+                    batch_args.append(tuple(data.values()) + tuple(cond.values()))
			
 
				+
			
 
				+                if not can_batch:
			
 
				+                    # 结构不一致，转为单条更新
			
 
				+                    for data, cond in zip(batch_data, batch_conds):
			
 
				+                        self.update_one_or_dict(table=table, data=data, condition=cond, commit=commit)
			
 
				+                        total += 1
			
 
				+                    continue
			
 
				+
			
 
				+                # 执行批量更新
			
 
				+                try:
			
 
				+                    with self.pool.connection() as conn:
			
 
				+                        with conn.cursor() as cursor:
			
 
				+                            cursor.executemany(base_query, batch_args)
			
 
				+                            if commit:
			
 
				+                                conn.commit()
			
 
				+                            total += cursor.rowcount
			
 
				+                            self.log.debug(f"Batch update succeeded. Rows: {cursor.rowcount}")
			
 
				+                except Exception as e:
			
 
				+                    if commit:
			
 
				+                        conn.rollback()
			
 
				+                    self.log.error(f"Batch update failed: {e}")
			
 
				+                    # 降级为单条更新
			
 
				+                    for args, data, cond in zip(batch_args, batch_data, batch_conds):
			
 
				+                        try:
			
 
				+                            self._execute(base_query, args, commit=commit)
			
 
				+                            total += 1
			
 
				+                        except Exception as e2:
			
 
				+                            self.log.error(f"Single update failed: {e2}, Data: {data}, Condition: {cond}")
			
 
				+            self.log.info(f"Total updated rows: {total}")
			
 
				+            return total
			
 
				+        elif query is not None:
			
 
				+            # 处理原始SQL和参数列表
			
 
				+            if args_list is None:
			
 
				+                raise ValueError("args_list must be provided when using query")
			
 
				+
			
 
				+            total = 0
			
 
				+            for i in range(0, len(args_list), batch_size):
			
 
				+                batch_args = args_list[i:i + batch_size]
			
 
				+                try:
			
 
				+                    with self.pool.connection() as conn:
			
 
				+                        with conn.cursor() as cursor:
			
 
				+                            cursor.executemany(query, batch_args)
			
 
				+                            if commit:
			
 
				+                                conn.commit()
			
 
				+                            total += cursor.rowcount
			
 
				+                            self.log.debug(f"Batch update succeeded. Rows: {cursor.rowcount}")
			
 
				+                except Exception as e:
			
 
				+                    if commit:
			
 
				+                        conn.rollback()
			
 
				+                    self.log.error(f"Batch update failed: {e}")
			
 
				+                    # 降级为单条更新
			
 
				+                    for args in batch_args:
			
 
				+                        try:
			
 
				+                            self._execute(query, args, commit=commit)
			
 
				+                            total += 1
			
 
				+                        except Exception as e2:
			
 
				+                            self.log.error(f"Single update failed: {e2}, Args: {args}")
			
 
				+            self.log.info(f"Total updated rows: {total}")
			
 
				+            return total
			
 
				+        else:
			
 
				+            raise ValueError("Either data_list or query must be provided")
			
 
				+
			
 
				+    def check_pool_health(self):
			
 
				+        """
			
 
				+        检查连接池中有效连接数
			
 
				+
			
 
				+        # 使用示例
			
 
				+        # 配置 MySQL 连接池
			
 
				+        sql_pool = MySQLConnectionPool(log=log)
			
 
				+        if not sql_pool.check_pool_health():
			
 
				+            log.error("数据库连接池异常")
			
 
				+            raise RuntimeError("数据库连接池异常")
			
 
				+        """
			
 
				+        try:
			
 
				+            with self.pool.connection() as conn:
			
 
				+                conn.ping(reconnect=True)
			
 
				+                return True
			
 
				+        except Exception as e:
			
 
				+            self.log.error(f"Connection pool health check failed: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def close(self):
			
 
				+        """
			
 
				+        关闭连接池，释放所有连接
			
 
				+        """
			
 
				+        try:
			
 
				+            if hasattr(self, 'pool') and self.pool:
			
 
				+                self.pool.close()
			
 
				+                self.log.info("数据库连接池已关闭")
			
 
				+        except Exception as e:
			
 
				+            self.log.error(f"关闭连接池失败: {e}")
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _safe_identifier(name):
			
 
				+        """SQL标识符安全校验"""
			
 
				+        if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
			
 
				+            raise ValueError(f"Invalid SQL identifier: {name}")
			
 
				+        return name
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    sql_pool = MySQLConnectionPool()
			
 
				+    data_dic = {'card_type_id': 111, 'card_type_name': '补充包 继承的意志【OPC-13】', 'card_type_position': 964,
			
 
				+                'card_id': 5284, 'card_name': '蒙奇·D·路飞', 'card_number': 'OP13-001', 'card_rarity': 'L',
			
 
				+                'card_img': 'https://source.windoent.com/OnePiecePc/Picture/1757929283612OP13-001.png',
			
 
				+                'card_life': '4', 'card_attribute': '打', 'card_power': '5000', 'card_attack': '-',
			
 
				+                'card_color': '红/绿', 'subscript': 4, 'card_features': '超新星/草帽一伙',
			
 
				+                'card_text_desc': '【咚!!×1】【对方的攻击时】我方处于活跃状态的咚!!不多于5张的场合，可以将我方任意张数的咚!!转为休息状态。每有1张转为休息状态的咚!!，本次战斗中，此领袖或我方最多1张拥有《草帽一伙》特征的角色力量+2000。',
			
 
				+                'card_offer_type': '补充包 继承的意志【OPC-13】', 'crawler_language': '简中'}
			
 
				+    sql_pool.insert_one_or_dict(table="one_piece_record", data=data_dic)
			
--- a/snkrdunk_spider/mysql_pool_async.py
+++ b/snkrdunk_spider/mysql_pool_async.py
@@ -0,0 +1,667 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# Author: Charley
			
 
				+# Python: 3.10+
			
 
				+# Date: 2026/04/16
			
 
				+# Desc: 异步MySQL连接池 - 专为爬虫高并发场景优化
			
 
				+import asyncio
			
 
				+import re
			
 
				+import aiomysql
			
 
				+from loguru import logger
			
 
				+
			
 
				+
			
 
				+class AsyncMySQLPool:
			
 
				+    """
			
 
				+    异步MySQL连接池 - 专为爬虫高并发场景优化
			
 
				+
			
 
				+    相比同步版本(pymysql)的优势:
			
 
				+    1. 真正的异步非阻塞I/O，爬虫并发爬取时不会因DB写入而阻塞
			
 
				+    2. 支持连接池复用，减少连接创建开销
			
 
				+    3. 批量插入更高效，支持更大的批次
			
 
				+
			
 
				+    使用示例:
			
 
				+        pool = AsyncMySQLPool()
			
 
				+        await pool.init()
			
 
				+
			
 
				+        # 批量插入
			
 
				+        await pool.insert_many("table_name", [{"col1": "val1"}, {"col2": "val2"}])
			
 
				+
			
 
				+        # 或者在async with中使用
			
 
				+        async with pool.get_connection() as conn:
			
 
				+            async with conn.cursor() as cursor:
			
 
				+                await cursor.execute("SELECT * FROM table LIMIT 10")
			
 
				+                results = await cursor.fetchall()
			
 
				+
			
 
				+        await pool.close()
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        min_size=5,
			
 
				+        max_size=20,
			
 
				+        pool_recycle=3600,
			
 
				+        connect_timeout=10,
			
 
				+        write_timeout=30,
			
 
				+        log=None
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化异步连接池
			
 
				+        :param min_size: 池中最小连接数
			
 
				+        :param max_size: 池中最大连接数
			
 
				+        :param pool_recycle: 连接回收时间(秒)，防止MySQL 8小时超时
			
 
				+        :param connect_timeout: 连接超时(秒)
			
 
				+        :param write_timeout: 写入超时(秒)
			
 
				+        :param log: 自定义日志记录器
			
 
				+        """
			
 
				+        self.log = log or logger
			
 
				+        self.min_size = min_size
			
 
				+        self.max_size = max_size
			
 
				+        self.pool_recycle = pool_recycle
			
 
				+        self.connect_timeout = connect_timeout
			
 
				+        self.write_timeout = write_timeout
			
 
				+        self._pool = None
			
 
				+
			
 
				+        # 从 YamlLoader 读取配置
			
 
				+        from YamlLoader import readYaml
			
 
				+        yaml = readYaml()
			
 
				+        mysql_yaml = yaml.get("mysql")
			
 
				+        self.host = mysql_yaml.getValueAsString("host")
			
 
				+        self.port = mysql_yaml.getValueAsInt("port")
			
 
				+        self.user = mysql_yaml.getValueAsString("username")
			
 
				+        self.password = mysql_yaml.getValueAsString("password")
			
 
				+        self.database = mysql_yaml.getValueAsString("db")
			
 
				+
			
 
				+    async def init(self):
			
 
				+        """初始化连接池"""
			
 
				+        if self._pool is not None:
			
 
				+            return
			
 
				+
			
 
				+        try:
			
 
				+            self._pool = await aiomysql.create_pool(
			
 
				+                minsize=self.min_size,
			
 
				+                maxsize=self.max_size,
			
 
				+                host=self.host,
			
 
				+                port=self.port,
			
 
				+                user=self.user,
			
 
				+                password=self.password,
			
 
				+                db=self.database,
			
 
				+                autocommit=True,  # 自动提交，避免每次手动commit
			
 
				+                charset='utf8mb4',
			
 
				+                connect_timeout=self.connect_timeout,
			
 
				+                write_timeout=self.write_timeout,
			
 
				+                pool_recycle=self.pool_recycle,
			
 
				+            )
			
 
				+            self.log.info(
			
 
				+                f"异步MySQL连接池初始化成功: {self.host}:{self.port}/{self.database}, "
			
 
				+                f"连接池大小: {self.min_size}-{self.max_size}"
			
 
				+            )
			
 
				+        except Exception as e:
			
 
				+            self.log.error(f"异步MySQL连接池初始化失败: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    async def close(self):
			
 
				+        """关闭连接池"""
			
 
				+        if self._pool:
			
 
				+            self._pool.close()
			
 
				+            await self._pool.wait_closed()
			
 
				+            self._pool = None
			
 
				+            self.log.info("异步MySQL连接池已关闭")
			
 
				+
			
 
				+    async def get_connection(self):
			
 
				+        """
			
 
				+        获取连接(推荐使用 async with)
			
 
				+        :return: async with 可用的连接对象
			
 
				+        """
			
 
				+        if self._pool is None:
			
 
				+            await self.init()
			
 
				+        return self._pool.acquire()
			
 
				+
			
 
				+    async def check_pool_health(self):
			
 
				+        """检查连接池健康状态"""
			
 
				+        try:
			
 
				+            async with self.get_connection() as conn:
			
 
				+                async with conn.cursor() as cursor:
			
 
				+                    await cursor.execute("SELECT 1")
			
 
				+                    result = await cursor.fetchone()
			
 
				+                    return result[0] == 1
			
 
				+        except Exception as e:
			
 
				+            self.log.error(f"连接池健康检查失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _safe_identifier(name):
			
 
				+        """SQL标识符安全校验"""
			
 
				+        if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
			
 
				+            raise ValueError(f"Invalid SQL identifier: {name}")
			
 
				+        return name
			
 
				+
			
 
				+    async def execute(self, query, args=None, commit=True):
			
 
				+        """
			
 
				+        执行SQL
			
 
				+        :param query: SQL语句
			
 
				+        :param args: SQL参数
			
 
				+        :param commit: 是否提交(autocommit=True时忽略)
			
 
				+        :return: cursor
			
 
				+        """
			
 
				+        async with self.get_connection() as conn:
			
 
				+            async with conn.cursor(aiomysql.DictCursor) as cursor:
			
 
				+                await cursor.execute(query, args)
			
 
				+                if commit and not conn.get_autocommit():
			
 
				+                    await conn.commit()
			
 
				+                self.log.debug(f"SQL执行: {query[:80]}..., 影响行数: {cursor.rowcount}")
			
 
				+                return cursor
			
 
				+
			
 
				+    async def select_one(self, query, args=None):
			
 
				+        """查询单条"""
			
 
				+        cursor = await self.execute(query, args, commit=False)
			
 
				+        return await cursor.fetchone()
			
 
				+
			
 
				+    async def select_all(self, query, args=None):
			
 
				+        """查询全部"""
			
 
				+        cursor = await self.execute(query, args, commit=False)
			
 
				+        return await cursor.fetchall()
			
 
				+
			
 
				+    async def insert_one(self, query, args):
			
 
				+        """插入单条"""
			
 
				+        cursor = await self.execute(query, args)
			
 
				+        return cursor.lastrowid
			
 
				+
			
 
				+    async def insert_one_or_dict(self, table, data, ignore=False, commit=True):
			
 
				+        """
			
 
				+        字典格式插入单条
			
 
				+        :param table: 表名
			
 
				+        :param data: 字典 {列名: 值}
			
 
				+        :param ignore: 是否使用INSERT IGNORE
			
 
				+        :param commit: 是否提交
			
 
				+        """
			
 
				+        if not isinstance(data, dict):
			
 
				+            raise ValueError("data must be a dictionary")
			
 
				+
			
 
				+        keys = ', '.join([self._safe_identifier(k) for k in data.keys()])
			
 
				+        values = ', '.join(['%s'] * len(data))
			
 
				+        ignore_clause = "IGNORE" if ignore else ""
			
 
				+        query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
			
 
				+        args = tuple(data.values())
			
 
				+
			
 
				+        try:
			
 
				+            cursor = await self.execute(query, args, commit=commit)
			
 
				+            return cursor.lastrowid
			
 
				+        except aiomysql.IntegrityError as e:
			
 
				+            if "Duplicate entry" in str(e):
			
 
				+                self.log.debug(f"跳过重复条目: {e}")
			
 
				+                return -1
			
 
				+            raise
			
 
				+
			
 
				+    async def insert_many(
			
 
				+        self,
			
 
				+        table,
			
 
				+        data_list,
			
 
				+        batch_size=2000,
			
 
				+        ignore=False,
			
 
				+        commit=True
			
 
				+    ):
			
 
				+        """
			
 
				+        批量插入(字典列表) - 高性能版本
			
 
				+
			
 
				+        优化点:
			
 
				+        1. 使用事务批量提交，减少I/O次数
			
 
				+        2. 批次大小增加到2000，减少Python循环次数
			
 
				+        3. 失败时自动降级为逐条插入(仅跳过重复，不阻塞)
			
 
				+
			
 
				+        :param table: 表名
			
 
				+        :param data_list: 字典列表 [{列名: 值}, ...]
			
 
				+        :param batch_size: 每批插入数量(默认2000)
			
 
				+        :param ignore: 是否使用INSERT IGNORE
			
 
				+        :param commit: 是否提交
			
 
				+        :return: 成功插入的行数
			
 
				+        """
			
 
				+        if not data_list:
			
 
				+            return 0
			
 
				+
			
 
				+        if not isinstance(data_list[0], dict):
			
 
				+            raise ValueError("data_list must be list of dictionaries")
			
 
				+
			
 
				+        keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
			
 
				+        values_placeholder = ', '.join(['%s'] * len(data_list[0]))
			
 
				+        ignore_clause = "IGNORE" if ignore else ""
			
 
				+        query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values_placeholder})"
			
 
				+
			
 
				+        total_inserted = 0
			
 
				+        total_skipped = 0
			
 
				+
			
 
				+        async with self.get_connection() as conn:
			
 
				+            async with conn.cursor() as cursor:
			
 
				+                try:
			
 
				+                    for i in range(0, len(data_list), batch_size):
			
 
				+                        batch = data_list[i:i + batch_size]
			
 
				+                        args_list = [tuple(d.values()) for d in batch]
			
 
				+
			
 
				+                        try:
			
 
				+                            await cursor.executemany(query, args_list)
			
 
				+                            if commit:
			
 
				+                                await conn.commit()
			
 
				+                            total_inserted += cursor.rowcount
			
 
				+
			
 
				+                        except aiomysql.IntegrityError as e:
			
 
				+                            await conn.rollback()
			
 
				+                            if "Duplicate entry" in str(e) and not ignore:
			
 
				+                                # 降级为逐条插入
			
 
				+                                inserted, skipped = await self._insert_batch_one_by_one(
			
 
				+                                    cursor, query, batch, commit
			
 
				+                                )
			
 
				+                                total_inserted += inserted
			
 
				+                                total_skipped += skipped
			
 
				+                            else:
			
 
				+                                raise
			
 
				+                        except Exception as e:
			
 
				+                            await conn.rollback()
			
 
				+                            self.log.error(f"批量插入失败: {e}")
			
 
				+                            # 降级为逐条插入
			
 
				+                            inserted, skipped = await self._insert_batch_one_by_one(
			
 
				+                                cursor, query, batch, commit
			
 
				+                            )
			
 
				+                            total_inserted += inserted
			
 
				+                            total_skipped += skipped
			
 
				+
			
 
				+                except Exception as e:
			
 
				+                    self.log.exception(f"批量插入最终失败: {e}")
			
 
				+                    raise
			
 
				+
			
 
				+        if total_skipped > 0:
			
 
				+            self.log.info(f"插入完成: 成功{total_inserted}条, 跳过重复{total_skipped}条")
			
 
				+        else:
			
 
				+            self.log.info(f"插入完成: 成功{total_inserted}条")
			
 
				+
			
 
				+        return total_inserted
			
 
				+
			
 
				+    async def _insert_batch_one_by_one(self, cursor, query, batch, commit):
			
 
				+        """
			
 
				+        逐条插入(降级方案)
			
 
				+        :return: (插入数, 跳过数)
			
 
				+        """
			
 
				+        inserted = 0
			
 
				+        skipped = 0
			
 
				+        for data in batch:
			
 
				+            try:
			
 
				+                args = tuple(data.values())
			
 
				+                await cursor.execute(query, args)
			
 
				+                if commit:
			
 
				+                    await cursor.execute("COMMIT")
			
 
				+                inserted += 1
			
 
				+            except aiomysql.IntegrityError as e:
			
 
				+                if "Duplicate entry" in str(e):
			
 
				+                    skipped += 1
			
 
				+                else:
			
 
				+                    self.log.error(f"插入失败: {e}")
			
 
				+            except Exception as e:
			
 
				+                self.log.error(f"插入失败: {e}")
			
 
				+        return inserted, skipped
			
 
				+
			
 
				+    async def insert_raw_many(self, query, args_list, batch_size=2000, commit=True):
			
 
				+        """
			
 
				+        批量插入(原始SQL) - 高性能版本
			
 
				+
			
 
				+        :param query: 预编译SQL语句，使用 %s 占位符
			
 
				+        :param args_list: 参数列表 [(val1, val2), ...]
			
 
				+        :param batch_size: 每批数量
			
 
				+        :param commit: 是否提交
			
 
				+        :return: 成功插入行数
			
 
				+        """
			
 
				+        if not args_list:
			
 
				+            return 0
			
 
				+
			
 
				+        total = 0
			
 
				+        async with self.get_connection() as conn:
			
 
				+            async with conn.cursor() as cursor:
			
 
				+                for i in range(0, len(args_list), batch_size):
			
 
				+                    batch = args_list[i:i + batch_size]
			
 
				+                    try:
			
 
				+                        await cursor.executemany(query, batch)
			
 
				+                        if commit:
			
 
				+                            await conn.commit()
			
 
				+                        total += cursor.rowcount
			
 
				+                    except Exception as e:
			
 
				+                        await conn.rollback()
			
 
				+                        self.log.error(f"批量插入失败 [{i}]: {e}")
			
 
				+                        # 降级为逐条
			
 
				+                        for args in batch:
			
 
				+                            try:
			
 
				+                                await cursor.execute(query, args)
			
 
				+                                if commit:
			
 
				+                                    await conn.commit()
			
 
				+                                total += 1
			
 
				+                            except Exception as e2:
			
 
				+                                if "Duplicate" not in str(e2):
			
 
				+                                    self.log.error(f"单条插入失败: {e2}")
			
 
				+
			
 
				+        self.log.debug(f"insert_raw_many 完成: {total}条")
			
 
				+        return total
			
 
				+
			
 
				+    async def insert_or_update(self, table, data, update_columns=None, commit=True):
			
 
				+        """
			
 
				+        插入或更新 (INSERT ... ON DUPLICATE KEY UPDATE)
			
 
				+
			
 
				+        :param table: 表名
			
 
				+        :param data: 字典 {列名: 值}
			
 
				+        :param update_columns: 更新时更新的列，None表示更新所有非主键列
			
 
				+        :param commit: 是否提交
			
 
				+        """
			
 
				+        if not isinstance(data, dict):
			
 
				+            raise ValueError("data must be a dictionary")
			
 
				+
			
 
				+        keys = ', '.join([self._safe_identifier(k) for k in data.keys()])
			
 
				+        values_placeholder = ', '.join(['%s'] * len(data))
			
 
				+        query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values_placeholder})"
			
 
				+
			
 
				+        if update_columns:
			
 
				+            update_clause = ', '.join([f"{self._safe_identifier(col)} = VALUES({self._safe_identifier(col)})"
			
 
				+                                       for col in update_columns])
			
 
				+        else:
			
 
				+            update_clause = ', '.join([f"{self._safe_identifier(k)} = VALUES({self._safe_identifier(k)})"
			
 
				+                                       for k in data.keys()])
			
 
				+
			
 
				+        query += f" ON DUPLICATE KEY UPDATE {update_clause}"
			
 
				+
			
 
				+        cursor = await self.execute(query, tuple(data.values()), commit=commit)
			
 
				+        return cursor.lastrowid
			
 
				+
			
 
				+    async def insert_or_update_many(
			
 
				+        self,
			
 
				+        table,
			
 
				+        data_list,
			
 
				+        update_columns=None,
			
 
				+        batch_size=2000,
			
 
				+        commit=True
			
 
				+    ):
			
 
				+        """
			
 
				+        批量插入或更新 - 适合爬虫去重场景
			
 
				+
			
 
				+        :param table: 表名
			
 
				+        :param data_list: 字典列表
			
 
				+        :param update_columns: 更新时更新的列
			
 
				+        :param batch_size: 每批数量
			
 
				+        :param commit: 是否提交
			
 
				+        """
			
 
				+        if not data_list or not isinstance(data_list[0], dict):
			
 
				+            raise ValueError("data_list must be non-empty list of dictionaries")
			
 
				+
			
 
				+        keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
			
 
				+        values_placeholder = ', '.join(['%s'] * len(data_list[0]))
			
 
				+        query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values_placeholder})"
			
 
				+
			
 
				+        if update_columns:
			
 
				+            update_clause = ', '.join([f"{self._safe_identifier(col)} = VALUES({self._safe_identifier(col)})"
			
 
				+                                       for col in update_columns])
			
 
				+        else:
			
 
				+            update_clause = ', '.join([f"{self._safe_identifier(k)} = VALUES({self._safe_identifier(k)})"
			
 
				+                                       for k in data_list[0].keys()])
			
 
				+
			
 
				+        query += f" ON DUPLICATE KEY UPDATE {update_clause}"
			
 
				+
			
 
				+        total = 0
			
 
				+        async with self.get_connection() as conn:
			
 
				+            async with conn.cursor() as cursor:
			
 
				+                for i in range(0, len(data_list), batch_size):
			
 
				+                    batch = data_list[i:i + batch_size]
			
 
				+                    args_list = [tuple(d.values()) for d in batch]
			
 
				+                    try:
			
 
				+                        await cursor.executemany(query, args_list)
			
 
				+                        if commit:
			
 
				+                            await conn.commit()
			
 
				+                        total += cursor.rowcount
			
 
				+                    except Exception as e:
			
 
				+                        await conn.rollback()
			
 
				+                        self.log.error(f"批量插入/更新失败: {e}")
			
 
				+                        raise
			
 
				+
			
 
				+        self.log.info(f"insert_or_update_many 完成: {total}条(包含更新)")
			
 
				+        return total
			
 
				+
			
 
				+    async def update(self, query, args=None, commit=True):
			
 
				+        """执行更新"""
			
 
				+        cursor = await self.execute(query, args, commit=commit)
			
 
				+        return cursor.rowcount
			
 
				+
			
 
				+    async def update_one_or_dict(self, table, data, condition, commit=True):
			
 
				+        """
			
 
				+        字典格式更新单条
			
 
				+
			
 
				+        :param table: 表名
			
 
				+        :param data: 要更新的数据 {列名: 值}
			
 
				+        :param condition: 更新条件 {"id": 1} 或 "id = 1"
			
 
				+        """
			
 
				+        set_clause = ', '.join([f"{self._safe_identifier(k)} = %s" for k in data.keys()])
			
 
				+        query = f"UPDATE {self._safe_identifier(table)} SET {set_clause}"
			
 
				+
			
 
				+        if isinstance(condition, dict):
			
 
				+            where_clause = ' AND '.join([f"{self._safe_identifier(k)} = %s" for k in condition.keys()])
			
 
				+            query += f" WHERE {where_clause}"
			
 
				+            args = list(data.values()) + list(condition.values())
			
 
				+        else:
			
 
				+            query += f" WHERE {condition}"
			
 
				+            args = list(data.values())
			
 
				+
			
 
				+        cursor = await self.execute(query, args, commit=commit)
			
 
				+        return cursor.rowcount
			
 
				+
			
 
				+
			
 
				+class AsyncMySQLBatchWriter:
			
 
				+    """
			
 
				+    异步批量写入器 - 专为爬虫设计
			
 
				+
			
 
				+    使用方式:
			
 
				+    1. 在爬虫主循环中调用 add() 添加数据
			
 
				+    2. 内部自动定时批量写入数据库
			
 
				+    3. 程序结束时调用 flush() 确保数据全部写入
			
 
				+
			
 
				+    优势:
			
 
				+    - 非阻塞写入，不影响爬虫速度
			
 
				+    - 自动批量合并，减少数据库I/O
			
 
				+    - 自动去重(基于主键)
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        pool: AsyncMySQLPool,
			
 
				+        table: str,
			
 
				+        batch_size: int = 2000,
			
 
				+        flush_interval: float = 2.0,
			
 
				+        dedup: bool = True,
			
 
				+        update_columns: list = None
			
 
				+    ):
			
 
				+        """
			
 
				+        :param pool: AsyncMySQLPool 实例
			
 
				+        :param table: 表名
			
 
				+        :param batch_size: 每批写入数量
			
 
				+        :param flush_interval: 自动刷新间隔(秒)
			
 
				+        :param dedup: 是否基于字典去重(仅保留最新)
			
 
				+        :param update_columns: 更新时更新的列，None表示全部更新
			
 
				+        """
			
 
				+        self.pool = pool
			
 
				+        self.table = table
			
 
				+        self.batch_size = batch_size
			
 
				+        self.flush_interval = flush_interval
			
 
				+        self.dedup = dedup
			
 
				+        self.update_columns = update_columns
			
 
				+        self._buffer = {}  # 使用dict去重
			
 
				+        self._keys = None
			
 
				+        self._last_flush = asyncio.get_event_loop().time()
			
 
				+        self._lock = asyncio.Lock()
			
 
				+
			
 
				+    def add(self, data: dict):
			
 
				+        """
			
 
				+        添加数据到缓冲区(线程安全)
			
 
				+        :param data: 字典格式数据
			
 
				+        """
			
 
				+        if self._keys is None:
			
 
				+            self._keys = tuple(data.keys())
			
 
				+
			
 
				+        if self.dedup:
			
 
				+            # 基于第一个字段去重(假设是主键)
			
 
				+            key = data.get(self._keys[0])
			
 
				+            self._buffer[key] = data
			
 
				+        else:
			
 
				+            self._buffer[len(self._buffer)] = data
			
 
				+
			
 
				+    async def _should_flush(self) -> bool:
			
 
				+        """检查是否需要刷新"""
			
 
				+        if len(self._buffer) >= self.batch_size:
			
 
				+            return True
			
 
				+        elapsed = asyncio.get_event_loop().time() - self._last_flush
			
 
				+        if elapsed >= self.flush_interval and self._buffer:
			
 
				+            return True
			
 
				+        return False
			
 
				+
			
 
				+    async def flush(self):
			
 
				+        """强制刷新缓冲区"""
			
 
				+        async with self._lock:
			
 
				+            if not self._buffer:
			
 
				+                return 0
			
 
				+
			
 
				+            data_list = list(self._buffer.values())
			
 
				+            self._buffer.clear()
			
 
				+            self._last_flush = asyncio.get_event_loop().time()
			
 
				+
			
 
				+            try:
			
 
				+                if self.update_columns:
			
 
				+                    return await self.pool.insert_or_update_many(
			
 
				+                        self.table, data_list, self.update_columns
			
 
				+                    )
			
 
				+                else:
			
 
				+                    return await self.pool.insert_many(self.table, data_list)
			
 
				+            except Exception as e:
			
 
				+                logger.error(f"批量写入失败: {e}")
			
 
				+                raise
			
 
				+
			
 
				+    async def auto_flush(self):
			
 
				+        """自动刷新协程 - 应在后台运行"""
			
 
				+        while True:
			
 
				+            await asyncio.sleep(0.5)  # 每0.5秒检查一次
			
 
				+            if await self._should_flush():
			
 
				+                try:
			
 
				+                    await self.flush()
			
 
				+                except Exception as e:
			
 
				+                    logger.error(f"自动刷新失败: {e}")
			
 
				+
			
 
				+
			
 
				+# ============================================================
			
 
				+# 同步封装层 - 方便在同步代码中调用
			
 
				+# ============================================================
			
 
				+import threading
			
 
				+from concurrent.futures import ThreadPoolExecutor
			
 
				+
			
 
				+_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="mysql_async_")
			
 
				+
			
 
				+
			
 
				+def _run_async(coro):
			
 
				+    """在新线程中运行异步协程"""
			
 
				+    loop = asyncio.new_event_loop()
			
 
				+    asyncio.set_event_loop(loop)
			
 
				+    try:
			
 
				+        return loop.run_until_complete(coro)
			
 
				+    finally:
			
 
				+        loop.close()
			
 
				+
			
 
				+
			
 
				+class SyncWrapper:
			
 
				+    """
			
 
				+    同步封装 - 让异步连接池可以在同步代码中使用
			
 
				+
			
 
				+    使用示例:
			
 
				+        pool = AsyncMySQLPool()
			
 
				+        sync_pool = SyncWrapper(pool)
			
 
				+        sync_pool.init()  # 同步初始化
			
 
				+
			
 
				+        # 同步调用
			
 
				+        sync_pool.insert_many("table", [{"col": "val"}])
			
 
				+        sync_pool.close()
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, async_pool: AsyncMySQLPool):
			
 
				+        self.async_pool = async_pool
			
 
				+        self._loop = None
			
 
				+        self._thread = None
			
 
				+        self._running = False
			
 
				+
			
 
				+    def _run_loop(self):
			
 
				+        """在线程中运行事件循环"""
			
 
				+        asyncio.set_event_loop(self._loop)
			
 
				+        self._loop.run_forever()
			
 
				+
			
 
				+    def init(self):
			
 
				+        """同步初始化"""
			
 
				+        if self._running:
			
 
				+            return
			
 
				+
			
 
				+        self._loop = asyncio.new_event_loop()
			
 
				+        self._thread = threading.Thread(target=self._run_loop, daemon=True)
			
 
				+        self._thread.start()
			
 
				+        self._running = True
			
 
				+
			
 
				+        # 在新线程中初始化
			
 
				+        asyncio.run_coroutine_threadsafe(self.async_pool.init(), self._loop).result()
			
 
				+
			
 
				+    def close(self):
			
 
				+        """同步关闭"""
			
 
				+        if not self._running:
			
 
				+            return
			
 
				+
			
 
				+        asyncio.run_coroutine_threadsafe(self.async_pool.close(), self._loop).result()
			
 
				+
			
 
				+        if self._loop:
			
 
				+            self._loop.call_soon_threadsafe(self._loop.stop)
			
 
				+            self._thread.join(timeout=2)
			
 
				+            self._running = False
			
 
				+
			
 
				+    def insert_many(self, table, data_list, **kwargs):
			
 
				+        """同步批量插入"""
			
 
				+        coro = self.async_pool.insert_many(table, data_list, **kwargs)
			
 
				+        return asyncio.run_coroutine_threadsafe(coro, self._loop).result()
			
 
				+
			
 
				+    def insert_one_or_dict(self, table, data, **kwargs):
			
 
				+        """同步单条插入"""
			
 
				+        coro = self.async_pool.insert_one_or_dict(table, data, **kwargs)
			
 
				+        return asyncio.run_coroutine_threadsafe(coro, self._loop).result()
			
 
				+
			
 
				+    def select_all(self, query, args=None):
			
 
				+        """同步查询全部"""
			
 
				+        coro = self.async_pool.select_all(query, args)
			
 
				+        return asyncio.run_coroutine_threadsafe(coro, self._loop).result()
			
 
				+
			
 
				+    def select_one(self, query, args=None):
			
 
				+        """同步查询单条"""
			
 
				+        coro = self.async_pool.select_one(query, args)
			
 
				+        return asyncio.run_coroutine_threadsafe(coro, self._loop).result()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    import asyncio
			
 
				+
			
 
				+
			
 
				+    async def test():
			
 
				+        pool = AsyncMySQLPool(min_size=5, max_size=20)
			
 
				+        await pool.init()
			
 
				+
			
 
				+        # 健康检查
			
 
				+        health = await pool.check_pool_health()
			
 
				+        print(f"连接池健康: {health}")
			
 
				+
			
 
				+        # 测试插入
			
 
				+        test_data = [
			
 
				+            {"card_id": 1, "card_name": "测试卡牌1", "card_type": "角色"},
			
 
				+            {"card_id": 2, "card_name": "测试卡牌2", "card_type": "角色"},
			
 
				+        ]
			
 
				+        result = await pool.insert_many("one_piece_record", test_data, ignore=True)
			
 
				+        print(f"插入结果: {result}")
			
 
				+
			
 
				+        # 测试插入或更新
			
 
				+        await pool.insert_or_update_many(
			
 
				+            "one_piece_record",
			
 
				+            [{"card_id": 1, "card_name": "更新卡牌1"}],
			
 
				+            update_columns=["card_name"]
			
 
				+        )
			
 
				+
			
 
				+        await pool.close()
			
 
				+        print("测试完成!")
			
 
				+
			
 
				+
			
 
				+    asyncio.run(test())
			
--- a/snkrdunk_spider/snk_single_card_spider.py
+++ b/snkrdunk_spider/snk_single_card_spider.py
@@ -0,0 +1,215 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# Author : Charley
			
 
				+# Python : 3.10.8
			
 
				+# Date   : 2026/4/15 15:10
			
 
				+import time
			
 
				+import inspect
			
 
				+import requests
			
 
				+import schedule
			
 
				+import user_agent
			
 
				+from loguru import logger
			
 
				+from mysql_pool import MySQLConnectionPool
			
 
				+from tenacity import retry, stop_after_attempt, wait_fixed
			
 
				+
			
 
				+logger.remove()
			
 
				+logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
			
 
				+           format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
			
 
				+           level="DEBUG", retention="7 day")
			
 
				+
			
 
				+cookie = "ENSID=MTc3NjMxMDE1MXw1LThLaEQ5OVRZTDBLbktHUzhpTG9PR01ybUQwYmxyb2dqMlJETVpUN3dCVVV3aWpwbXE0UGtDYXkwN2ZlUU9ib3I2a3ZCa21xZGdDQjc2aVBVNElHWDlXNXlPUS1YZVd816ifsPbu7_r0ouHJxFdsRYT3jsCwBFEZ6IMhzHnGcyI=; _pin_unauth=dWlkPU5qVXlOak16TlRJdE1UVmxNUzAwTUdZNExXRmpZakl0WlRJMU1EazBaVEl6WVRRMw; _tt_enable_cookie=1; _ttp=01KP7TC2C47N8816E74NZ2S87G_.tt.1; aws-waf-token=85ea0abd-c7e7-44fa-a486-721005517367:BgoAddgnTIMdAAAA:sgUJ0isHGRMm9HGMWaserzc0yH/cfmcnAJs7tApXkvxu8CkSB2W2/+vEB9V4uBUqE+8uegKHQINRE2ExEMC9XRl6QLHoC16s5mOsvrptUYDuWqAnyQJcr8a6dAlUpokqmdLFzLRoiM2digCAKXmKRM5fbEQgY56lCzRpNqolUtcS/X9zZQIfJnj2GfmAjNw=; _gcl_au=1.1.140783051.1776231515.1213660920.1776231619.1776231619; _ga_T9G4FWRKGP=GS2.1.s1776309189$o1$g0$t1776309189$j60$l0$h0; _gid=GA1.2.339334579.1776309190; __lt__cid=074b5327-9f75-4356-a201-9879abb859a5; __lt__sid=1cd194ae-b7f570f4; __rtbh.uid=%7B%22eventType%22%3A%22uid%22%2C%22id%22%3A%22undefined%22%2C%22expiryDate%22%3A%222027-04-16T03%3A13%3A11.377Z%22%7D; __rtbh.lid=%7B%22eventType%22%3A%22lid%22%2C%22id%22%3A%221CbwgL9cLbT9BLfXJtkK%22%2C%22expiryDate%22%3A%222027-04-16T03%3A13%3A11.379Z%22%7D; _ga_WLFPCJHLHL=GS2.1.s1776309194$o1$g0$t1776309194$j60$l0$h0; _ga=GA1.1.342828207.1776231516; forterToken=6f95dbdd5df5486fb9bfe93652a6b6a7_1776309188237__UDF43-m4_27ck_; ttcsid_CEM1KGBC77U8BHMFF6SG=1776309193318::MmzC2Nw5ahbgUD_ovmJa.1.1776309203360.1; _dd_s=aid=c68687bf-9ca5-4040-9266-a9c8281287b7&logs=1&id=f2443d73-de49-431a-9dba-17f30b9410ac&created=1776309188187&expire=1776310738774&rum=0; _rdt_uuid=1776231515762.27c25d2a-f2b5-4370-89ca-ba2ba6d93c35; _rdt_em=:7fa565b08bc719fc95a07f3f9cbb8cfcd715b62ce82bc26739d3074a5196870c; ttcsid_CAP79SBC77U56BB6BI50=1776309194536::zh_5-OLx-MD4DmA4jALH.4.1776310041216.1; ttcsid=1776309194523::bhCq-3lisAc3SvWnrZng.4.1776310041216.0::1.845885.846484::955506.51.1579.4652::954188.255.4300; _ga_6H1EYVVN53=GS2.1.s1776308613$o5$g1$t1776310150$j60$l0$h0; _ga_3722WCREQR=GS2.1.s1776308613$o5$g1$t1776310150$j60$l0$h0"
			
 
				+
			
 
				+headers = {
			
 
				+    "accept": "application/json",
			
 
				+    # "referer": "https://snkrdunk.com/en/trading-cards/671489?slide=right",
			
 
				+    "user-agent": user_agent.generate_user_agent(),
			
 
				+    "cookie": cookie
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def after_log(retry_state):
			
 
				+    """
			
 
				+    retry 回调
			
 
				+    :param retry_state: RetryCallState 对象
			
 
				+    """
			
 
				+    # 检查 args 是否存在且不为空
			
 
				+    if retry_state.args and len(retry_state.args) > 0:
			
 
				+        log = retry_state.args[0]  # 获取传入的 logger
			
 
				+    else:
			
 
				+        log = logger  # 使用全局 logger
			
 
				+
			
 
				+    if retry_state.outcome.failed:
			
 
				+        log.warning(
			
 
				+            f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
			
 
				+    else:
			
 
				+        log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
			
 
				+def get_proxys(log):
			
 
				+    """
			
 
				+    获取代理配置
			
 
				+
			
 
				+    :param log: 日志对象
			
 
				+    :return: 代理字典
			
 
				+    """
			
 
				+    tunnel = "x371.kdltps.com:15818"
			
 
				+    kdl_username = "t13753103189895"
			
 
				+    kdl_password = "o0yefv6z"
			
 
				+    try:
			
 
				+        proxies = {
			
 
				+            "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
			
 
				+            "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
			
 
				+        }
			
 
				+        return proxies
			
 
				+    except Exception as e:
			
 
				+        log.error(f"Error getting proxy: {e}")
			
 
				+        raise e
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
			
 
				+def get_single_page(log, page, brand):
			
 
				+    """
			
 
				+    获取单页数据
			
 
				+    :param log: 日志对象
			
 
				+    :param page: 页码
			
 
				+    :param brand: 品牌
			
 
				+    :return: 数据列表
			
 
				+    """
			
 
				+    log.info(f"获取第 {page} 页数据，品牌为 {brand}....................................................")
			
 
				+    url = "https://snkrdunk.com/en/v1/trading-cards/used"
			
 
				+    params = {
			
 
				+        "brandId": brand,
			
 
				+        # "brandId": "pokemon",
			
 
				+        "categoryId": "25",
			
 
				+        # "page": "29",
			
 
				+        "page": page,
			
 
				+        "perPage": "20",
			
 
				+        "sortType": "latest",
			
 
				+        "isOnlyOnSale": "false"
			
 
				+    }
			
 
				+    response = requests.get(url, headers=headers, params=params, proxies=get_proxys(log), timeout=22)
			
 
				+    response.raise_for_status()
			
 
				+
			
 
				+    resp_json = response.json()
			
 
				+    # print(resp_json)
			
 
				+    usedTradingCards = resp_json.get("usedTradingCards", [])
			
 
				+    return usedTradingCards
			
 
				+
			
 
				+
			
 
				+def parse_data(log, resp_list, brand, sql_pool):
			
 
				+    """
			
 
				+    解析数据
			
 
				+    :param log: 日志对象
			
 
				+    :param resp_list: 数据列表
			
 
				+    :param brand: 品牌
			
 
				+    :param sql_pool: 数据库连接池
			
 
				+    """
			
 
				+    try:
			
 
				+        dict_list = []
			
 
				+        for data in resp_list:
			
 
				+            used_id = data.get("id")
			
 
				+            tradingCardId = data.get("tradingCardId")
			
 
				+            card_name = data.get("tradingCardName")
			
 
				+            listing_uid = data.get("listingUID")  # 进入详情页的id
			
 
				+            price = data.get("price")
			
 
				+            price = price.replace("US $", "").replace(",", "")
			
 
				+            condition = data.get("condition")
			
 
				+            front_img = data.get("thumbnailUrl")
			
 
				+            # 去除图片中的大小格式 https://cdn.snkrdunk.com/apparel_used_listings/49c34dac-27d9-4b7b-96db-72a6c70a464c/5907002.jpeg?size=m
			
 
				+            front_img = front_img.split("?")[0]
			
 
				+            is_sold = data.get("isSold")
			
 
				+            data_dict = {
			
 
				+                "brand": brand,
			
 
				+                "used_id": used_id,
			
 
				+                "trading_card_id": tradingCardId,
			
 
				+                "card_name": card_name,
			
 
				+                "listing_uid": listing_uid,
			
 
				+                "price": price,
			
 
				+                "score": condition,
			
 
				+                "front_img": front_img,
			
 
				+                "is_sold": is_sold,
			
 
				+                "category": "Trading Cards (Single Card)"
			
 
				+            }
			
 
				+            # print(data_dict)
			
 
				+            dict_list.append(data_dict)
			
 
				+        sql_pool.insert_many(table="snkrdunk_record", data_list=dict_list, ignore=True)
			
 
				+    except Exception as e:
			
 
				+        log.error(f"Error parsing data: {e}")
			
 
				+
			
 
				+
			
 
				+def get_list_data(log, brand, sql_pool):
			
 
				+    """
			
 
				+    获取列表数据
			
 
				+    :param log: 日志对象
			
 
				+    :param brand: 品牌
			
 
				+    :param sql_pool: 数据库连接池
			
 
				+    """
			
 
				+    page = 1
			
 
				+    while True:
			
 
				+        try:
			
 
				+            data_list = get_single_page(log, page, brand)
			
 
				+            if not data_list:
			
 
				+                log.info(f"No more data for brand {brand}, page {page}")
			
 
				+                break
			
 
				+        except Exception as e:
			
 
				+            log.error(f"Error getting page {page} for brand {brand}: {e}")
			
 
				+            data_list = []
			
 
				+
			
 
				+        parse_data(log, data_list, brand, sql_pool)
			
 
				+
			
 
				+        if len(data_list) < 20:
			
 
				+            log.info(f"No more data for brand {brand}, page {page}")
			
 
				+            break
			
 
				+
			
 
				+        page += 1
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
			
 
				+def snk_main(log):
			
 
				+    """
			
 
				+    主函数
			
 
				+
			
 
				+    :param log: logger对象
			
 
				+    """
			
 
				+    log.info(
			
 
				+        f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................')
			
 
				+
			
 
				+    # 配置 MySQL 连接池
			
 
				+    sql_pool = MySQLConnectionPool(log=log)
			
 
				+    if not sql_pool.check_pool_health():
			
 
				+        log.error("数据库连接池异常")
			
 
				+        raise RuntimeError("数据库连接池异常")
			
 
				+
			
 
				+    brand_list = ["pokemon", "onepiece", "yu-gi-oh"]
			
 
				+
			
 
				+    try:
			
 
				+        for brand in brand_list:
			
 
				+            log.info(f'开始采集 {brand} 数据....................................................')
			
 
				+            try:
			
 
				+                get_list_data(log, brand, sql_pool)
			
 
				+            except Exception as e:
			
 
				+                log.error(f'采集 {brand} 数据异常: {e}')
			
 
				+    except Exception as e:
			
 
				+        log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
			
 
				+    finally:
			
 
				+        log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
			
 
				+
			
 
				+
			
 
				+def schedule_task():
			
 
				+    """
			
 
				+    爬虫模块 定时任务 的启动文件
			
 
				+    """
			
 
				+    # 立即运行一次任务
			
 
				+    snk_main(log=logger)
			
 
				+
			
 
				+    # 设置定时任务
			
 
				+    schedule.every().day.at("00:01").do(snk_main, log=logger)
			
 
				+
			
 
				+    while True:
			
 
				+        schedule.run_pending()
			
 
				+        time.sleep(1)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # schedule_task()
			
 
				+    snk_main(log=logger)