6 maanden geleden · e852f94816
--- a/clove_spider/YamlLoader.py
+++ b/clove_spider/YamlLoader.py
@@ -0,0 +1,74 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+#
			
 
				+import os, re
			
 
				+import yaml
			
 
				+
			
 
				+regex = re.compile(r'^\$\{(?P<ENV>[A-Z_\-]+\:)?(?P<VAL>[\w\.]+)\}$')
			
 
				+
			
 
				+class YamlConfig:
			
 
				+    def __init__(self, config):
			
 
				+        self.config = config
			
 
				+
			
 
				+    def get(self, key:str):
			
 
				+        return YamlConfig(self.config.get(key))
			
 
				+    
			
 
				+    def getValueAsString(self, key: str):
			
 
				+        try:
			
 
				+            match = regex.match(self.config[key])
			
 
				+            group = match.groupdict()
			
 
				+            if group['ENV'] != None:
			
 
				+                env = group['ENV'][:-1]
			
 
				+                return os.getenv(env, group['VAL'])
			
 
				+            return None
			
 
				+        except:
			
 
				+            return self.config[key]
			
 
				+    
			
 
				+    def getValueAsInt(self, key: str):
			
 
				+        try:
			
 
				+            match = regex.match(self.config[key])
			
 
				+            group = match.groupdict()
			
 
				+            if group['ENV'] != None:
			
 
				+                env = group['ENV'][:-1]
			
 
				+                return int(os.getenv(env, group['VAL']))
			
 
				+            return 0
			
 
				+        except:
			
 
				+            return int(self.config[key])
			
 
				+        
			
 
				+    def getValueAsBool(self, key: str, env: str = None):
			
 
				+        try:
			
 
				+            match = regex.match(self.config[key])
			
 
				+            group = match.groupdict()
			
 
				+            if group['ENV'] != None:
			
 
				+                env = group['ENV'][:-1]
			
 
				+                return bool(os.getenv(env, group['VAL']))
			
 
				+            return False
			
 
				+        except:
			
 
				+            return bool(self.config[key])
			
 
				+    
			
 
				+def readYaml(path:str = 'application.yml', profile:str = None) -> YamlConfig:
			
 
				+    if os.path.exists(path):
			
 
				+        with open(path) as fd:
			
 
				+            conf = yaml.load(fd, Loader=yaml.FullLoader)
			
 
				+
			
 
				+    if profile != None:
			
 
				+        result = path.split('.')
			
 
				+        profiledYaml = f'{result[0]}-{profile}.{result[1]}'
			
 
				+        if os.path.exists(profiledYaml):
			
 
				+            with open(profiledYaml) as fd:
			
 
				+                conf.update(yaml.load(fd, Loader=yaml.FullLoader))
			
 
				+
			
 
				+    return YamlConfig(conf)
			
 
				+
			
 
				+# res = readYaml()
			
 
				+# mysqlConf = res.get('mysql')
			
 
				+# print(mysqlConf)
			
 
				+
			
 
				+# print(res.getValueAsString("host"))
			
 
				+# mysqlYaml = mysqlConf.getValueAsString("host")
			
 
				+# print(mysqlYaml)
			
 
				+# host = mysqlYaml.get("host").split(':')[-1][:-1]
			
 
				+# port = mysqlYaml.get("port").split(':')[-1][:-1]
			
 
				+# username = mysqlYaml.get("username").split(':')[-1][:-1]
			
 
				+# password = mysqlYaml.get("password").split(':')[-1][:-1]
			
 
				+# mysql_db = mysqlYaml.get("db").split(':')[-1][:-1]
			
 
				+# print(host,port,username,password)
			
--- a/clove_spider/application.yml
+++ b/clove_spider/application.yml
@@ -0,0 +1,6 @@
 
				+mysql:
			
 
				+  host: ${MYSQL_HOST:100.64.0.23}
			
 
				+  port: ${MYSQL_PROT:3306}
			
 
				+  username: ${MYSQL_USERNAME:crawler}
			
 
				+  password: ${MYSQL_PASSWORD:Pass2022}
			
 
				+  db: ${MYSQL_DATABASE:crawler}
			
--- a/clove_spider/clove_blind_box_spider.py
+++ b/clove_spider/clove_blind_box_spider.py
@@ -0,0 +1,327 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# Author : Charley
			
 
				+# Python : 3.10.8
			
 
				+# Date   : 2025/7/29 15:06
			
 
				+import json
			
 
				+import time
			
 
				+import requests
			
 
				+# from curl_cffi import requests
			
 
				+import inspect
			
 
				+import schedule
			
 
				+from loguru import logger
			
 
				+from parsel import Selector
			
 
				+from tenacity import retry, stop_after_attempt, wait_fixed
			
 
				+from mysql_pool import MySQLConnectionPool
			
 
				+
			
 
				+# logger.remove()
			
 
				+# logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
			
 
				+#            format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
			
 
				+#            level="DEBUG", retention="7 day")
			
 
				+
			
 
				+
			
 
				+def after_log(retry_state):
			
 
				+    """
			
 
				+    retry 回调
			
 
				+    :param retry_state: RetryCallState 对象
			
 
				+    """
			
 
				+    # 检查 args 是否存在且不为空
			
 
				+    if retry_state.args and len(retry_state.args) > 0:
			
 
				+        log = retry_state.args[0]  # 获取传入的 logger
			
 
				+    else:
			
 
				+        log = logger  # 使用全局 logger
			
 
				+
			
 
				+    if retry_state.outcome.failed:
			
 
				+        log.warning(
			
 
				+            f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
			
 
				+    else:
			
 
				+        log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
			
 
				+def get_proxys(log):
			
 
				+    """
			
 
				+    获取代理
			
 
				+    :return: 代理
			
 
				+    """
			
 
				+    tunnel = "x371.kdltps.com:15818"
			
 
				+    kdl_username = "t13753103189895"
			
 
				+    kdl_password = "o0yefv6z"
			
 
				+    try:
			
 
				+        proxies = {
			
 
				+            "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
			
 
				+            "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
			
 
				+        }
			
 
				+        return proxies
			
 
				+    except Exception as e:
			
 
				+        log.error(f"Error getting proxy: {e}")
			
 
				+        raise e
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
			
 
				+def get_blind_box_list(log):
			
 
				+    log.debug(f"{inspect.currentframe().f_code.co_name}---------------------- 开始获取盲盒列表 ----------------------")
			
 
				+    headers = {
			
 
				+        "accept": "application/graphql-response+json, application/graphql+json, application/json, text/event-stream, multipart/mixed",
			
 
				+        "content-type": "application/json",
			
 
				+        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
			
 
				+    }
			
 
				+    url = "https://api.prd.oripa.clove.jp/graphql"
			
 
				+    data = {
			
 
				+        "operationName": "orderedOripas",
			
 
				+        "query": "query orderedOripas($orderedOripasInput: OrderedOripasInput!) {\n  orderedOripas(orderedOripasInput: $orderedOripasInput) {\n    category\n    isR18\n    hasLastOne\n    id\n    isDaily\n    isAppraised\n    isUserLimited\n    requiredRankName\n    name\n    nameLogo\n    nameHidden\n    price\n    publishStatus\n    quantity\n    remaining\n    roundNumber\n    subImages\n    thumbnail\n    extraPrizeThreshold\n    video {\n      id\n      title\n      videoData\n      deleted\n      __typename\n    }\n    oripaSearchTargetPrizes {\n      searchTargetPrize {\n        titleJa\n        __typename\n      }\n      __typename\n    }\n    openAt\n    __typename\n  }\n}",
			
 
				+        "variables": {
			
 
				+            "orderedOripasInput": {
			
 
				+                "isR18": False,
			
 
				+                "sort": "LOW_REMAINING_RATE"  # 剩余数量
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    response = requests.post(url, headers=headers, json=data, timeout=60, proxies=get_proxys(log))
			
 
				+    # response = requests.post(url, headers=headers, json=data, timeout=60)
			
 
				+    # print(response.text)
			
 
				+    response.raise_for_status()
			
 
				+    return response.json()
			
 
				+
			
 
				+
			
 
				+def parse_list(log, resp_data, sql_pool):
			
 
				+    log.debug(f"{inspect.currentframe().f_code.co_name} start..........")
			
 
				+    items = resp_data["data"]["orderedOripas"]
			
 
				+    log.info("获取到%s个数据" % len(items))
			
 
				+    if not items:
			
 
				+        log.debug("没有数据")
			
 
				+        return
			
 
				+
			
 
				+    info_list = []
			
 
				+    for item in items:
			
 
				+        # print(item)
			
 
				+        pid = item.get("id")
			
 
				+        category = item.get("category")
			
 
				+        title = item.get("name")
			
 
				+        price = item.get("price")
			
 
				+        publishStatus = item.get("publishStatus")
			
 
				+        quantity = item.get("quantity")  # 总数量
			
 
				+        remaining = item.get("remaining")  # 剩余数量
			
 
				+
			
 
				+        # thumbnail有三种 {}, [], ''
			
 
				+        tag_image = item.get("thumbnail")
			
 
				+        if isinstance(tag_image, dict):
			
 
				+            # 处理字典类型的thumbnail，从中提取ja字段的值
			
 
				+            ja_images = tag_image.get("ja")
			
 
				+            if isinstance(ja_images, list) and len(ja_images) > 0:
			
 
				+                image = ja_images[0]  # 取第一个图片链接
			
 
				+            elif isinstance(ja_images, str):
			
 
				+                image = ja_images
			
 
				+            else:
			
 
				+                image = ''
			
 
				+        elif isinstance(tag_image, list):
			
 
				+            # 处理列表类型的thumbnail
			
 
				+            if len(tag_image) > 0:
			
 
				+                image = tag_image[0]
			
 
				+            else:
			
 
				+                image = ''
			
 
				+        elif isinstance(tag_image, str):
			
 
				+            # 处理字符串类型的thumbnail
			
 
				+            image = tag_image
			
 
				+        else:
			
 
				+            image = ''
			
 
				+
			
 
				+        # subImages
			
 
				+        tag_subImages = item.get("subImages")
			
 
				+        if isinstance(tag_subImages, dict):
			
 
				+            # 处理字典类型的thumbnail，从中提取ja字段的值
			
 
				+            ja_subimages = tag_subImages.get("ja")
			
 
				+            if isinstance(ja_subimages, list) and len(ja_subimages) > 0:
			
 
				+                sub_image = ja_subimages[0]  # 取第一个图片链接
			
 
				+            elif isinstance(ja_subimages, str):
			
 
				+                sub_image = ja_subimages
			
 
				+            else:
			
 
				+                sub_image = ''
			
 
				+        elif isinstance(tag_subImages, list):
			
 
				+            # 处理列表类型的thumbnail
			
 
				+            if len(tag_subImages) > 0:
			
 
				+                sub_image = tag_subImages[0]
			
 
				+            else:
			
 
				+                sub_image = ''
			
 
				+        elif isinstance(tag_subImages, str):
			
 
				+            # 处理字符串类型的thumbnail
			
 
				+            sub_image = tag_subImages
			
 
				+        else:
			
 
				+            sub_image = ''
			
 
				+
			
 
				+        open_at = item.get("openAt")
			
 
				+
			
 
				+        data_dict = {
			
 
				+            "pid": pid,
			
 
				+            "category": category,
			
 
				+            "title": title,
			
 
				+            "price": price,
			
 
				+            "publish_status": publishStatus,
			
 
				+            "quantity": quantity,
			
 
				+            "remaining": remaining,
			
 
				+            "image": image,
			
 
				+            "sub_image": sub_image,
			
 
				+            "open_at": open_at
			
 
				+        }
			
 
				+        # print(data_dict)
			
 
				+        info_list.append(data_dict)
			
 
				+
			
 
				+    # 插入数据库
			
 
				+    if info_list:
			
 
				+        try:
			
 
				+            sql_pool.insert_many(table="clove_blind_box_list_record", data_list=info_list, ignore=True)
			
 
				+            # sql = "INSERT IGNORE INTO clove_blind_box_list_record (pid, category, title, price, publish_status, quantity, remaining, image, sub_image, open_at) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
			
 
				+            # sql_pool.insert_all(sql, info_list)
			
 
				+        except Exception as e:
			
 
				+            log.warning(f"{inspect.currentframe().f_code.co_name}, {e[:500]}")
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
			
 
				+def get_blind_box_detail(log, pid, sql_pool):
			
 
				+    headers = {
			
 
				+        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
 
				+        "referer": "https://oripa.clove.jp/oripa/All",
			
 
				+        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
			
 
				+    }
			
 
				+    # url = "https://oripa.clove.jp/oripa/All/cmca1kd0i0001s601a4y07n2b"
			
 
				+    url = f"https://oripa.clove.jp/oripa/All/{pid}"
			
 
				+    # response = requests.get(url, headers=headers, cookies=cookies)
			
 
				+    response = requests.get(url, headers=headers, timeout=60, proxies=get_proxys(log))
			
 
				+    # print(response.text)
			
 
				+
			
 
				+    selector = Selector(text=response.text)
			
 
				+
			
 
				+    tag_shang = selector.xpath('//script[@id="__NEXT_DATA__"]/text()').get()
			
 
				+
			
 
				+    if not tag_shang:
			
 
				+        log.debug(f"{inspect.currentframe().f_code.co_name} 没有 tag_shang 数据..............")
			
 
				+        # 更改 task 表状态为 2
			
 
				+        sql_pool.update_one_or_dict(table="clove_blind_box_task", data={"state": 2}, condition={"pid": pid})
			
 
				+        return
			
 
				+
			
 
				+    json_data = json.loads(tag_shang)
			
 
				+    if json_data:
			
 
				+        displayedPrizes = json_data.get('props', {}).get('pageProps', {}).get('oripa', {}).get("displayedPrizes", [])
			
 
				+        log.debug(f"{inspect.currentframe().f_code.co_name} 获取到 {len(displayedPrizes)} 条数据..............")
			
 
				+        if not displayedPrizes:
			
 
				+            log.debug(f"{inspect.currentframe().f_code.co_name} 没有 displayedPrizes 数据..............")
			
 
				+            # 更改 task 表状态为 2
			
 
				+            sql_pool.update_one_or_dict(table="clove_blind_box_task", data={"state": 2}, condition={"pid": pid})
			
 
				+            return
			
 
				+
			
 
				+        info_list = []
			
 
				+        for item in displayedPrizes:
			
 
				+            prize_id = item.get("id")
			
 
				+            prizeType = item.get("prizeType")
			
 
				+            quantity = item.get("quantity")
			
 
				+            mainDescription = item.get("mainDescription")
			
 
				+            mainDescriptionEn = item.get("mainDescriptionEn")
			
 
				+            subDescription = item.get("subDescription")
			
 
				+            kataban = item.get("kataban")
			
 
				+            imageUrl = item.get("imageUrl")
			
 
				+            prize_condition = item.get("condition")
			
 
				+            isReferencePriceTarget = item.get("isReferencePriceTarget")
			
 
				+            referencePrice = item.get("referencePrice")
			
 
				+            referencePriceUpdatedAt = item.get("referencePriceUpdatedAt")
			
 
				+
			
 
				+            data_dict = {
			
 
				+                "pid": pid,
			
 
				+                "prize_id": prize_id,  # 奖品id
			
 
				+                "prize_type": prizeType,  # 奖品类型
			
 
				+                "quantity": quantity,  # 数量
			
 
				+                "main_description": mainDescription,  # 奖品描述
			
 
				+                "main_description_en": mainDescriptionEn,
			
 
				+                "sub_description": subDescription,  # 卡片等级
			
 
				+                "kataban": kataban,  # 卡标签
			
 
				+                "image_url": imageUrl,  # 图片地址
			
 
				+                "prize_condition": prize_condition,  # 奖品评分, PSA10
			
 
				+                "is_reference_price_target": isReferencePriceTarget,  # (疑似)是否开封状态 (1: 是, 0: 否)
			
 
				+                "reference_price": referencePrice,  # 参考价格
			
 
				+                "reference_price_updated_at": referencePriceUpdatedAt  # 参考价格更新时间
			
 
				+            }
			
 
				+            # print(data_dict)
			
 
				+            info_list.append(data_dict)
			
 
				+
			
 
				+        if info_list:
			
 
				+            try:
			
 
				+                sql_pool.insert_many(table="clove_blind_box_detail_record", data_list=info_list, ignore=True)
			
 
				+                # 更新 task 表状态为 1
			
 
				+                sql_pool.update_one_or_dict(table="clove_blind_box_task", data={"state": 1}, condition={"pid": pid})
			
 
				+            except Exception as e:
			
 
				+                log.warning(f"{inspect.currentframe().f_code.co_name}, {e[:500]}")
			
 
				+    else:
			
 
				+        log.debug(f"{inspect.currentframe().f_code.co_name} 请求出错, 没有数据..............")
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
			
 
				+def blind_main(log):
			
 
				+    """
			
 
				+    主函数
			
 
				+    :param log: logger对象
			
 
				+    """
			
 
				+    log.info(
			
 
				+        f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务.................................................')
			
 
				+
			
 
				+    # 配置 MySQL 连接池
			
 
				+    sql_pool = MySQLConnectionPool(log=log)
			
 
				+    if not sql_pool.check_pool_health():
			
 
				+        log.error("数据库连接池异常")
			
 
				+        raise RuntimeError("数据库连接池异常")
			
 
				+
			
 
				+    try:
			
 
				+        try:
			
 
				+            resp_data = get_blind_box_list(log)
			
 
				+            parse_list(log, resp_data, sql_pool)
			
 
				+        except Exception as e2:
			
 
				+            log.error(f"Request get_blind_box_list error: {e2}")
			
 
				+
			
 
				+        # 先把新增的任务添加到任务表, 查询每日新增的 pid, 去重并插入 task 表
			
 
				+        sql_select_pids = sql_pool.select_all(
			
 
				+            # "SELECT DISTINCT pid FROM clove_blind_box_list_record WHERE DATE(gmt_create_time) = CURDATE() - INTERVAL 1 DAY")
			
 
				+            "SELECT DISTINCT pid FROM clove_blind_box_list_record WHERE DATE(gmt_create_time) = CURDATE()")
			
 
				+        sql_select_pids = [i[0] for i in sql_select_pids]
			
 
				+        sql_pool.insert_many(
			
 
				+            query="INSERT INTO clove_blind_box_task (pid) VALUES (%s) ON DUPLICATE KEY UPDATE pid=VALUES(pid)",
			
 
				+            args_list=sql_select_pids,
			
 
				+            ignore=True
			
 
				+        )
			
 
				+
			
 
				+        time.sleep(5)
			
 
				+
			
 
				+        # 获取详情
			
 
				+        sql_pids = sql_pool.select_all("SELECT pid FROM clove_blind_box_task WHERE state=0")
			
 
				+        sql_pids = [i[0] for i in sql_pids]
			
 
				+        for pid in sql_pids:
			
 
				+            try:
			
 
				+                log.debug(f"{inspect.currentframe().f_code.co_name} 获取 pid: {pid} 详情..............")
			
 
				+                get_blind_box_detail(log, pid, sql_pool)
			
 
				+            except Exception as e:
			
 
				+                log.error(f"get_blind_box_detail error: {e}")
			
 
				+                # 更改 task 表状态为 3
			
 
				+                sql_pool.update_one_or_dict(table="clove_blind_box_task", data={"state": 3}, condition={"pid": pid})
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
			
 
				+    finally:
			
 
				+        log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
			
 
				+
			
 
				+
			
 
				+def schedule_task():
			
 
				+    """
			
 
				+    爬虫模块 定时任务 的启动文件
			
 
				+    """
			
 
				+    # 立即运行一次任务
			
 
				+    # blind_main(log=logger)
			
 
				+
			
 
				+    # 设置定时任务
			
 
				+    schedule.every().day.at("00:01").do(blind_main, log=logger)
			
 
				+
			
 
				+    while True:
			
 
				+        schedule.run_pending()
			
 
				+        time.sleep(1)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # blind_main(logger)
			
 
				+    schedule_task()
			
 
				+    # get_blind_box_detail(logger, 'cmca1kd0i0001s601a4y07n2b')
			
 
				+    # test()
			
--- a/clove_spider/clove_lucky_bag_spider.py
+++ b/clove_spider/clove_lucky_bag_spider.py
@@ -0,0 +1,345 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# Author : Charley
			
 
				+# Python : 3.10.8
			
 
				+# Date   : 2025/7/31 11:33
			
 
				+import re
			
 
				+import time
			
 
				+import requests
			
 
				+import inspect
			
 
				+import schedule
			
 
				+from loguru import logger
			
 
				+from parsel import Selector
			
 
				+from tenacity import retry, stop_after_attempt, wait_fixed
			
 
				+from mysql_pool import MySQLConnectionPool
			
 
				+from DrissionPage import ChromiumPage, ChromiumOptions
			
 
				+
			
 
				+logger.remove()
			
 
				+logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
			
 
				+           format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
			
 
				+           level="DEBUG", retention="7 day")
			
 
				+
			
 
				+
			
 
				+def after_log(retry_state):
			
 
				+    """
			
 
				+    retry 回调
			
 
				+    :param retry_state: RetryCallState 对象
			
 
				+    """
			
 
				+    # 检查 args 是否存在且不为空
			
 
				+    if retry_state.args and len(retry_state.args) > 0:
			
 
				+        log = retry_state.args[0]  # 获取传入的 logger
			
 
				+    else:
			
 
				+        log = logger  # 使用全局 logger
			
 
				+
			
 
				+    if retry_state.outcome.failed:
			
 
				+        log.warning(
			
 
				+            f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
			
 
				+    else:
			
 
				+        log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
			
 
				+def get_response(log, page_url) -> (None, None):
			
 
				+    """
			
 
				+    获取页面源码
			
 
				+    :param log: log 对象
			
 
				+    :param page_url: 页面 URL
			
 
				+    :return: 页面源码, tag_turn_href
			
 
				+    """
			
 
				+    options = ChromiumOptions().set_paths(local_port=9131, user_data_path=r'D:\Drissionpage_temp\local_port_9131')
			
 
				+    # options.set_argument("--disable-gpu")
			
 
				+    options.set_argument("-accept-lang=en-US")
			
 
				+    page = ChromiumPage(options)
			
 
				+    try:
			
 
				+        page.get(page_url)
			
 
				+        # page.wait.load_start()  # 等待页面进入加载状态
			
 
				+        # page_title = page.title.lower()
			
 
				+        # if "just a moment" or "请稍候" in page_title:
			
 
				+        #     cf_bypasser = CloudflareBypasser(page, max_retries=5, log=log)
			
 
				+        #     cf_bypasser.bypass()
			
 
				+        #     page.wait.load_start()  # 等待页面进入加载状态
			
 
				+
			
 
				+        html = page.html
			
 
				+        if html:
			
 
				+            # print(html)
			
 
				+            # parse_data(html)
			
 
				+            return html
			
 
				+        else:
			
 
				+            log.error('页面加载失败')
			
 
				+            raise '页面加载失败, 重新加载........'
			
 
				+    except Exception as e:
			
 
				+        log.error(f'get_response error: {e}')
			
 
				+        raise 'get_response error'
			
 
				+    finally:
			
 
				+        page.quit()
			
 
				+
			
 
				+
			
 
				+def get_lucky_bag_list(log, category_name, sql_pool):
			
 
				+    page = 1
			
 
				+    while page <= 500:
			
 
				+        try:
			
 
				+            log.debug(
			
 
				+                f'--------------- {inspect.currentframe().f_code.co_name}, page {page}, category_name {category_name} start ---------------')
			
 
				+            len_items = get_lucky_bag_single_page(log, category_name, page, sql_pool)
			
 
				+        except Exception as e:
			
 
				+            log.error(
			
 
				+                f"{inspect.currentframe().f_code.co_name} Request get_shop_product_sold_single_page for page:{page}, {e}")
			
 
				+            len_items = 0
			
 
				+
			
 
				+        if len_items < 72:
			
 
				+            log.debug(f'--------------- page {page} has {len_items} items, break ---------------')
			
 
				+            break
			
 
				+
			
 
				+        page += 1
			
 
				+
			
 
				+        # 设置等待时间 避免查询太频繁
			
 
				+        # time.sleep(random.uniform(0.5, 1))
			
 
				+
			
 
				+
			
 
				+def get_lucky_bag_single_page(log, category_name, page, sql_pool):
			
 
				+    log.debug(
			
 
				+        f"{inspect.currentframe().f_code.co_name} Request category_name:{category_name}, page:{page}................")
			
 
				+    # url = "https://store.clove.jp/jp/categories/pokemon?page=1"
			
 
				+    url = f"https://store.clove.jp/jp/categories/{category_name}?page={page}"
			
 
				+    # response = requests.get(url, headers=headers)
			
 
				+    response_text = get_response(log, url)
			
 
				+    # print(response.text)
			
 
				+    # with open("clove_list.html", "w", encoding="utf-8") as f:
			
 
				+    #     f.write(response.text)
			
 
				+    # print(response)
			
 
				+    # selector = Selector(text=response.text)
			
 
				+    # tag_li_list = selector.xpath('//div[@class="w-full"]/ul/li')
			
 
				+    # # print(tag_li_list)
			
 
				+    # for tag_li in tag_li_list:
			
 
				+    #     print(tag_li)
			
 
				+    #     # title = tag_li.xpath('./div/a/div/div[2]/div[1]/p[1]/font/font/text()').get()
			
 
				+    #     title = tag_li.xpath('./div/a/div/div[2]/div[1]/p[1]//text()').get()
			
 
				+    #     detail_href = tag_li.xpath('./div/a/@href').get()
			
 
				+    #     detail_href = 'https://store.clove.jp' + detail_href if detail_href else ""
			
 
				+    #     # 将图片链接 的 w和q 改为 1200 和75
			
 
				+    #     """
			
 
				+    #     src="https://store.clove.jp/_next/image?url=https%3A%2F%2Fstorage.googleapis.com%2Fclove-admin-public-resources%2Fcollectibles%2Fcm4aukaqi00k8s601pg63bli1&w=1200&q=75"
			
 
				+    #     src="                      /_next/image?url=https%3A%2F%2Fstorage.googleapis.com%2Fclove-admin-public-resources%2Fcollectibles%2Fcm4aukaqi00k8s601pg63bli1&w=3840&q=50"
			
 
				+    #     """
			
 
				+    #     image_url = tag_li.xpath('./div/a/div/div[1]//img/@src').get()
			
 
				+    #     image_url = 'https://store.clove.jp' + image_url if image_url else ""
			
 
				+    #     image_url = image_url.replace("w=3840", "w=1200").replace("q=50", "q=75") if image_url else ""
			
 
				+    #     card_number = tag_li.xpath('./div/a/div/div[2]/div[1]/p[1]/font/font/text()').get()
			
 
				+    #     price = tag_li.xpath('./div/a/div/div[2]/div[2]/div/div/p[2]//text()').getall()
			
 
				+    #     price = "".join(price).strip() if price else None
			
 
				+    #
			
 
				+    #     inventory = tag_li.xpath('./div/a/div/div[2]/div[2]/p/font[2]/font/text()').get()
			
 
				+    #     data_dict = {
			
 
				+    #         "category": category_name,
			
 
				+    #         "title": title,
			
 
				+    #         "detail_href": detail_href,
			
 
				+    #         "image_url": image_url,
			
 
				+    #         "card_number": card_number,
			
 
				+    #         "price": price,
			
 
				+    #         "inventory": inventory,
			
 
				+    #     }
			
 
				+    #     print(data_dict)
			
 
				+    selector = Selector(text=response_text)
			
 
				+    len_items = parse_products_from_html(log, selector, category_name, sql_pool)
			
 
				+
			
 
				+    return len_items
			
 
				+
			
 
				+
			
 
				+def parse_products_from_html(log, selector, category, sql_pool):
			
 
				+    """使用XPath从HTML中提取商品信息"""
			
 
				+    # 查找所有商品预览div
			
 
				+    tag_div_list = selector.xpath('//div[@data-sentry-component="ProductPreview"]')
			
 
				+    log.debug(f"找到 {len(tag_div_list)} 个商品")
			
 
				+
			
 
				+    # # 如果找不到ProductPreview组件，尝试查找其他可能的商品元素
			
 
				+    # if len(tag_div_list) == 0:
			
 
				+    #     # 尝试查找li元素作为备选方案
			
 
				+    #     tag_div_list = selector.xpath('//ul[@data-testid="products-list-loader"]/li')
			
 
				+    #     print(f"备选方案找到 {len(tag_div_list)} 个商品占位符")
			
 
				+    #
			
 
				+    #     # 如果仍然找不到，尝试从script标签中解析商品数据
			
 
				+    #     if len(tag_div_list) == 0:
			
 
				+    #         print("未找到商品元素，尝试从script标签中解析数据...")
			
 
				+    #         # 使用你已有的parse_products_from_script函数来解析
			
 
				+    #         return
			
 
				+
			
 
				+    info_list = []
			
 
				+    for tag_div in tag_div_list:
			
 
				+        # 提取商品信息
			
 
				+        title = tag_div.xpath('.//p[@data-testid="product-title"]/text()').get()
			
 
				+
			
 
				+        # subtitle可能包含卡片编号信息，如"117/139"
			
 
				+        subtitle = tag_div.xpath('.//p[contains(@class, "text-ui-fg-subtle")]/text()').get()
			
 
				+
			
 
				+        detail_href = tag_div.xpath('./a/@href').get()
			
 
				+        detail_href = 'https://store.clove.jp' + detail_href if detail_href else ""
			
 
				+
			
 
				+        # 提取价格信息 - 尝试多种可能的XPath路径
			
 
				+
			
 
				+        # price_elements = tag_div.xpath('.//p[contains(text(), "¥")]/following-sibling::p[1]//text()').getall()
			
 
				+        # price_elements = tag_div.xpath('.//p[contains(@class, "font-bold") and contains(@class, "text-base")]//text()').getall()
			
 
				+        # if not price_elements:
			
 
				+        #     # 如果上面的方法找不到，尝试其他可能的XPath路径
			
 
				+        #     price_elements = tag_div.xpath(
			
 
				+        #         './/div[@data-sentry-component="PreviewPrice"]//p//text() | .//a//div[@data-sentry-component="PreviewPrice"]//text()'
			
 
				+        #     ).getall()
			
 
				+
			
 
				+        price_elements = tag_div.xpath(
			
 
				+            './/div[@data-sentry-component="PreviewPrice"]//p//text() | .//a//div[@data-sentry-component="PreviewPrice"]//text()'
			
 
				+        ).getall()
			
 
				+        # print(price_elements)
			
 
				+
			
 
				+        price = ''.join(price_elements).strip() if price_elements else None
			
 
				+        # 清理价格数据，移除货币符号和逗号
			
 
				+        if price:
			
 
				+            price = re.sub(r'[¥,]', '', price).strip()
			
 
				+
			
 
				+        # 提取库存信息 - 增加错误处理
			
 
				+        inventory_text = tag_div.xpath('.//p[contains(text(), "在庫数")]//text()').getall()
			
 
				+        inventory = None
			
 
				+        if inventory_text and len(inventory_text) > 1:
			
 
				+            inventory = inventory_text[1].strip()
			
 
				+        elif inventory_text:
			
 
				+            # 如果只找到一个文本节点，尝试提取其中的数字
			
 
				+            inventory_full_text = ''.join(inventory_text)
			
 
				+            inventory_match = re.search(r'\d+', inventory_full_text)
			
 
				+            if inventory_match:
			
 
				+                inventory = inventory_match.group()
			
 
				+
			
 
				+        # 提取图片链接
			
 
				+        image_url = tag_div.xpath('.//div[@data-sentry-component="ImageOrPlaceholder"]//img/@src').get()
			
 
				+        image_url = 'https://store.clove.jp' + image_url if image_url else ""
			
 
				+        image_url = image_url.replace("w=3840", "w=1200").replace("q=50", "q=75") if image_url else ""
			
 
				+
			
 
				+        data_dict = {
			
 
				+            "title": title,
			
 
				+            "subtitle": subtitle,  # 类似"117/139"的卡片编号信息
			
 
				+            "detail_href": detail_href,
			
 
				+            "image_url": image_url,
			
 
				+            "price": price,
			
 
				+            "inventory": inventory,  # 库存
			
 
				+            "category": category
			
 
				+        }
			
 
				+        # print(data_dict)
			
 
				+        info_list.append(data_dict)
			
 
				+
			
 
				+    # 插入数据库
			
 
				+    if info_list:
			
 
				+        try:
			
 
				+            sql_pool.insert_many(table="clove_lucky_bag_list_record", data_list=info_list, ignore=True)
			
 
				+            # sql = "INSERT IGNORE INTO clove_lucky_bag_list_record (pid, category, title, price, publish_status, quantity, remaining, image, sub_image, open_at) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
			
 
				+            # sql_pool.insert_all(sql, info_list)
			
 
				+        except Exception as e:
			
 
				+            log.warning(f"{inspect.currentframe().f_code.co_name}, {e[:500]}")
			
 
				+
			
 
				+    return len(tag_div_list)
			
 
				+
			
 
				+
			
 
				+def get_detail(log, sql_id, detail_url, sql_pool):
			
 
				+    headers = {
			
 
				+        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
 
				+        "referer": "https://store.clove.jp/jp/categories/lorcana",
			
 
				+        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
			
 
				+    }
			
 
				+    # url = "https://store.clove.jp/jp/products/cm8cv711v00b4s60197g7tikc"
			
 
				+    response = requests.get(detail_url, headers=headers)
			
 
				+    # print(response.text)
			
 
				+    # print(response.status_code)
			
 
				+    selector = Selector(response.text)
			
 
				+    # tag_div_list = selector.xpath('//div[@class="grid gap-y-3"]/div')
			
 
				+    # for tag_div in tag_div_list:
			
 
				+    first_inventory = selector.xpath('//div[@class="grid gap-y-3"]/div[1]/div[1]/p/text()').getall()
			
 
				+    first_inventory = ''.join(first_inventory).strip() if first_inventory else None
			
 
				+    first_inventory = first_inventory.replace('在庫: ', '').replace('点', '') if first_inventory else None
			
 
				+    first_price = selector.xpath('//div[@class="grid gap-y-3"]/div[1]//span[@data-testid="product-price"]/text()').get()
			
 
				+
			
 
				+    second_inventory = selector.xpath('//div[@class="grid gap-y-3"]/div[2]/div[1]/p/text()').getall()
			
 
				+    second_inventory = ''.join(second_inventory).strip() if second_inventory else None
			
 
				+    second_inventory = second_inventory.replace('在庫: ', '').replace('点', '') if second_inventory else None
			
 
				+    second_price = selector.xpath(
			
 
				+        '//div[@class="grid gap-y-3"]/div[2]//span[@data-testid="product-price"]/text()').get()
			
 
				+
			
 
				+    third_inventory = selector.xpath('//div[@class="grid gap-y-3"]/div[3]/div[1]/p/text()').getall()
			
 
				+    third_inventory = ''.join(third_inventory).strip() if third_inventory else None
			
 
				+    third_inventory = third_inventory.replace('在庫: ', '').replace('点', '') if third_inventory else None
			
 
				+    third_price = selector.xpath('//div[@class="grid gap-y-3"]/div[3]//span[@data-testid="product-price"]/text()').get()
			
 
				+
			
 
				+    data_dict = {
			
 
				+        "first_inventory": first_inventory,
			
 
				+        "first_price": first_price,
			
 
				+        "second_inventory": second_inventory,
			
 
				+        "second_price": second_price,
			
 
				+        "third_inventory": third_inventory,
			
 
				+        "third_price": third_price
			
 
				+    }
			
 
				+    # print(data_dict)
			
 
				+    try:
			
 
				+        sql_pool.update_one_or_dict(table="clove_lucky_bag_list_record", data=data_dict, condition={"id": sql_id})
			
 
				+        # 更新任务状态为 1
			
 
				+        # sql_pool.update_one(table="clove_lucky_bag_list_record", data={"state": 1}, condition={"pid": sql_id})
			
 
				+        sql_pool.update_one(f"update clove_lucky_bag_list_record set state=1 where id={sql_id}")
			
 
				+    except Exception as e:
			
 
				+        log.warning(f"{inspect.currentframe().f_code.co_name}, {e[:500]}")
			
 
				+        # sql_pool.update_one_or_dict(table="clove_lucky_bag_list_record", data={"state": 3},
			
 
				+        #                             condition={"id": sql_id})
			
 
				+        sql_pool.update_one(f"update clove_lucky_bag_list_record set state=3 where id={sql_id}")
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
			
 
				+def lucky_main(log):
			
 
				+    """
			
 
				+    主函数
			
 
				+    :param log: logger对象
			
 
				+    """
			
 
				+    log.info(
			
 
				+        f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务.................................................')
			
 
				+
			
 
				+    # 配置 MySQL 连接池
			
 
				+    sql_pool = MySQLConnectionPool(log=log)
			
 
				+    if not sql_pool.check_pool_health():
			
 
				+        log.error("数据库连接池异常")
			
 
				+        raise RuntimeError("数据库连接池异常")
			
 
				+
			
 
				+    try:
			
 
				+        category_list = ["pokemon", "onepiece", "duel-masters", "lorcana", "fab"]
			
 
				+        for category in category_list:
			
 
				+            try:
			
 
				+                get_lucky_bag_list(log, category, sql_pool)
			
 
				+            except Exception as e2:
			
 
				+                log.error(f"Request get_lucky_bag_list error: {e2}")
			
 
				+
			
 
				+        # 获取详情页数据
			
 
				+        sql_result = sql_pool.select_all("select id, detail_href from clove_lucky_bag_list_record where state=0")
			
 
				+        if sql_result and not isinstance(sql_result, Exception):
			
 
				+            for row in sql_result:
			
 
				+                try:
			
 
				+                    pid, detail_href = row
			
 
				+                    log.debug(f"{inspect.currentframe().f_code.co_name} 获取 pid: {pid} 详情..............")
			
 
				+                    get_detail(log, pid, detail_href, sql_pool)
			
 
				+                except Exception as e:
			
 
				+                    log.error(f"Request get_detail error: {e}")
			
 
				+    except Exception as e:
			
 
				+        log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
			
 
				+    finally:
			
 
				+        log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
			
 
				+
			
 
				+
			
 
				+def schedule_task():
			
 
				+    """
			
 
				+    爬虫模块 定时任务 的启动文件
			
 
				+    """
			
 
				+    # 立即运行一次任务
			
 
				+    lucky_main(log=logger)
			
 
				+
			
 
				+    # 设置定时任务
			
 
				+    schedule.every().day.at("00:01").do(lucky_main, log=logger)
			
 
				+
			
 
				+    while True:
			
 
				+        schedule.run_pending()
			
 
				+        time.sleep(1)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # get_lucky_bag_single_page(logger, 'pokemon')
			
 
				+    # lucky_main(log=logger)
			
 
				+    # get_detail(log=logger)
			
 
				+    schedule_task()
			
--- a/clove_spider/clove_recycle_spider.py
+++ b/clove_spider/clove_recycle_spider.py
@@ -0,0 +1,155 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# Author : Charley
			
 
				+# Python : 3.10.8
			
 
				+# Date   : 2025/8/1 13:46
			
 
				+import time
			
 
				+import requests
			
 
				+import inspect
			
 
				+import schedule
			
 
				+from loguru import logger
			
 
				+from parsel import Selector
			
 
				+from tenacity import retry, stop_after_attempt, wait_fixed
			
 
				+from mysql_pool import MySQLConnectionPool
			
 
				+
			
 
				+"""
			
 
				+https://store.clove.jp/jp/buying/pokemon/featured
			
 
				+"""
			
 
				+
			
 
				+# logger.remove()
			
 
				+# logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
			
 
				+#            format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
			
 
				+#            level="DEBUG", retention="7 day")
			
 
				+
			
 
				+
			
 
				+def after_log(retry_state):
			
 
				+    """
			
 
				+    retry 回调
			
 
				+    :param retry_state: RetryCallState 对象
			
 
				+    """
			
 
				+    # 检查 args 是否存在且不为空
			
 
				+    if retry_state.args and len(retry_state.args) > 0:
			
 
				+        log = retry_state.args[0]  # 获取传入的 logger
			
 
				+    else:
			
 
				+        log = logger  # 使用全局 logger
			
 
				+
			
 
				+    if retry_state.outcome.failed:
			
 
				+        log.warning(
			
 
				+            f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
			
 
				+    else:
			
 
				+        log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
			
 
				+def get_proxys(log):
			
 
				+    """
			
 
				+    获取代理
			
 
				+    :return: 代理
			
 
				+    """
			
 
				+    tunnel = "x371.kdltps.com:15818"
			
 
				+    kdl_username = "t13753103189895"
			
 
				+    kdl_password = "o0yefv6z"
			
 
				+    try:
			
 
				+        proxies = {
			
 
				+            "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
			
 
				+            "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
			
 
				+        }
			
 
				+        return proxies
			
 
				+    except Exception as e:
			
 
				+        log.error(f"Error getting proxy: {e}")
			
 
				+        raise e
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
			
 
				+def get_recycle_list(log, category, sql_pool):
			
 
				+    log.debug(f'{inspect.currentframe().f_code.co_name} start, category:{category}....................')
			
 
				+    headers = {
			
 
				+        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
			
 
				+        "referer": "https://store.clove.jp/jp/buying/pokemon",
			
 
				+        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
			
 
				+    }
			
 
				+    # url = "https://store.clove.jp/jp/buying/pokemon/featured"
			
 
				+    url = f"https://store.clove.jp/jp/buying/{category}/featured"
			
 
				+    response = requests.get(url, headers=headers, timeout=22)
			
 
				+    # response = requests.get(url, headers=headers, proxies=get_proxys(log), timeout=22)
			
 
				+    # print(response.text)
			
 
				+    response.raise_for_status()
			
 
				+
			
 
				+    selector = Selector(text=response.text)
			
 
				+    tag_li_list = selector.xpath('//div[@class="w-full"]/ul/li')
			
 
				+    if not tag_li_list:
			
 
				+        log.warning(f"{inspect.currentframe().f_code.co_name}, 获取列表失败, category:{category}..........")
			
 
				+        return
			
 
				+
			
 
				+    info_list = []
			
 
				+    for tag_li in tag_li_list:
			
 
				+        image_url = tag_li.xpath('./div/button//img/@src').get()
			
 
				+        image_url = 'https://store.clove.jp/' + image_url if image_url else None
			
 
				+        title = tag_li.xpath('./div/div/p[1]/text()').get()
			
 
				+        subtitle = tag_li.xpath('./div/div/p[2]/text()').get()
			
 
				+        price = tag_li.xpath('./div/div/div/p[2]/text()').get()
			
 
				+        price = price.replace(',', '') if price else None
			
 
				+
			
 
				+        data_dict = {
			
 
				+            "title": title,
			
 
				+            "subtitle": subtitle,
			
 
				+            "price": price,
			
 
				+            "image_url": image_url,
			
 
				+            "category": category
			
 
				+        }
			
 
				+        # print(data_dict)
			
 
				+        info_list.append(data_dict)
			
 
				+
			
 
				+    if info_list:
			
 
				+        try:
			
 
				+            sql_pool.insert_many(table="clove_recycle_record", data_list=info_list)
			
 
				+        except Exception as e:
			
 
				+            log.warning(f"{inspect.currentframe().f_code.co_name}, {e[:500]}")
			
 
				+
			
 
				+
			
 
				+@retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
			
 
				+def recycle_main(log):
			
 
				+    """
			
 
				+    主函数
			
 
				+    :param log: logger对象
			
 
				+    """
			
 
				+    log.info(
			
 
				+        f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务.................................................')
			
 
				+
			
 
				+    # 配置 MySQL 连接池
			
 
				+    sql_pool = MySQLConnectionPool(log=log)
			
 
				+    if not sql_pool.check_pool_health():
			
 
				+        log.error("数据库连接池异常")
			
 
				+        raise RuntimeError("数据库连接池异常")
			
 
				+
			
 
				+    try:
			
 
				+        category_list = ["pokemon", "onepiece", "duel-masters", "lorcana", "fab"]
			
 
				+        for category in category_list:
			
 
				+            try:
			
 
				+                get_recycle_list(log, category, sql_pool)
			
 
				+            except Exception as e2:
			
 
				+                log.error(f"Request get_lucky_bag_list error: {e2}")
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
			
 
				+    finally:
			
 
				+        log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
			
 
				+
			
 
				+
			
 
				+def schedule_task():
			
 
				+    """
			
 
				+    爬虫模块 定时任务 的启动文件
			
 
				+    """
			
 
				+    # 立即运行一次任务
			
 
				+    recycle_main(log=logger)
			
 
				+
			
 
				+    # 设置定时任务
			
 
				+    schedule.every().day.at("00:01").do(recycle_main, log=logger)
			
 
				+
			
 
				+    while True:
			
 
				+        schedule.run_pending()
			
 
				+        time.sleep(1)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # get_recycle_list(logger)
			
 
				+    schedule_task()
			
--- a/clove_spider/mysql_pool.py
+++ b/clove_spider/mysql_pool.py
@@ -0,0 +1,569 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# Author : Charley
			
 
				+# Python : 3.10.8
			
 
				+# Date   : 2025/3/25 14:14
			
 
				+import re
			
 
				+import pymysql
			
 
				+import YamlLoader
			
 
				+from loguru import logger
			
 
				+from dbutils.pooled_db import PooledDB
			
 
				+
			
 
				+# 获取yaml配置
			
 
				+yaml = YamlLoader.readYaml()
			
 
				+mysqlYaml = yaml.get("mysql")
			
 
				+sql_host = mysqlYaml.getValueAsString("host")
			
 
				+sql_port = mysqlYaml.getValueAsInt("port")
			
 
				+sql_user = mysqlYaml.getValueAsString("username")
			
 
				+sql_password = mysqlYaml.getValueAsString("password")
			
 
				+sql_db = mysqlYaml.getValueAsString("db")
			
 
				+
			
 
				+
			
 
				+class MySQLConnectionPool:
			
 
				+    """
			
 
				+    MySQL连接池
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, mincached=4, maxcached=5, maxconnections=10, log=None):
			
 
				+        """
			
 
				+        初始化连接池
			
 
				+        :param mincached: 初始化时，链接池中至少创建的链接，0表示不创建
			
 
				+        :param maxcached: 池中空闲连接的最大数目（0 或 None 表示池大小不受限制）
			
 
				+        :param maxconnections: 允许的最大连接数（0 或 None 表示任意数量的连接）
			
 
				+        :param log: 自定义日志记录器
			
 
				+        """
			
 
				+        # 使用 loguru 的 logger，如果传入了其他 logger，则使用传入的 logger
			
 
				+        self.log = log or logger
			
 
				+        self.pool = PooledDB(
			
 
				+            creator=pymysql,
			
 
				+            mincached=mincached,
			
 
				+            maxcached=maxcached,
			
 
				+            maxconnections=maxconnections,
			
 
				+            blocking=True,  # 连接池中如果没有可用连接后，是否阻塞等待。True，等待；False，不等待然后报错
			
 
				+            host=sql_host,
			
 
				+            port=sql_port,
			
 
				+            user=sql_user,
			
 
				+            password=sql_password,
			
 
				+            database=sql_db,
			
 
				+            ping=0  # 每次连接使用时自动检查有效性（0=不检查,1=执行query前检查,2=每次执行前检查）
			
 
				+        )
			
 
				+
			
 
				+    def _execute(self, query, args=None, commit=False):
			
 
				+        """
			
 
				+        执行SQL
			
 
				+        :param query: SQL语句
			
 
				+        :param args: SQL参数
			
 
				+        :param commit: 是否提交事务
			
 
				+        :return: 查询结果
			
 
				+        """
			
 
				+        try:
			
 
				+            with self.pool.connection() as conn:
			
 
				+                with conn.cursor() as cursor:
			
 
				+                    cursor.execute(query, args)
			
 
				+                    if commit:
			
 
				+                        conn.commit()
			
 
				+                    self.log.debug(f"sql _execute, Query: {query}, Rows: {cursor.rowcount}")
			
 
				+                    return cursor
			
 
				+        except Exception as e:
			
 
				+            if commit:
			
 
				+                conn.rollback()
			
 
				+            self.log.error(f"Error executing query: {e}, Query: {query}, Args: {args}")
			
 
				+            raise e
			
 
				+
			
 
				+    def select_one(self, query, args=None):
			
 
				+        """
			
 
				+        执行查询，返回单个结果
			
 
				+        :param query: 查询语句
			
 
				+        :param args: 查询参数
			
 
				+        :return: 查询结果
			
 
				+        """
			
 
				+        cursor = self._execute(query, args)
			
 
				+        return cursor.fetchone()
			
 
				+
			
 
				+    def select_all(self, query, args=None):
			
 
				+        """
			
 
				+        执行查询，返回所有结果
			
 
				+        :param query: 查询语句
			
 
				+        :param args: 查询参数
			
 
				+        :return: 查询结果
			
 
				+        """
			
 
				+        cursor = self._execute(query, args)
			
 
				+        return cursor.fetchall()
			
 
				+
			
 
				+    def insert_one(self, query, args):
			
 
				+        """
			
 
				+        执行单条插入语句
			
 
				+        :param query: 插入语句
			
 
				+        :param args: 插入参数
			
 
				+        """
			
 
				+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+        cursor = self._execute(query, args, commit=True)
			
 
				+        return cursor.lastrowid  # 返回插入的ID
			
 
				+
			
 
				+    def insert_all(self, query, args_list):
			
 
				+        """
			
 
				+        执行批量插入语句，如果失败则逐条插入
			
 
				+        :param query: 插入语句
			
 
				+        :param args_list: 插入参数列表
			
 
				+        """
			
 
				+        conn = None
			
 
				+        cursor = None
			
 
				+        try:
			
 
				+            conn = self.pool.connection()
			
 
				+            cursor = conn.cursor()
			
 
				+            cursor.executemany(query, args_list)
			
 
				+            conn.commit()
			
 
				+            self.log.debug(f"sql insert_all, SQL: {query}, Rows: {len(args_list)}")
			
 
				+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_all 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+        except Exception as e:
			
 
				+            conn.rollback()
			
 
				+            self.log.error(f"Batch insertion failed after 5 attempts. Trying single inserts. Error: {e}")
			
 
				+            # 如果批量插入失败，则逐条插入
			
 
				+            rowcount = 0
			
 
				+            for args in args_list:
			
 
				+                self.insert_one(query, args)
			
 
				+                rowcount += 1
			
 
				+            self.log.debug(f"Batch insertion failed. Inserted {rowcount} rows individually.")
			
 
				+        finally:
			
 
				+            if cursor:
			
 
				+                cursor.close()
			
 
				+            if conn:
			
 
				+                conn.close()
			
 
				+
			
 
				+    def insert_one_or_dict(self, table=None, data=None, query=None, args=None, commit=True, ignore=False):
			
 
				+        """
			
 
				+        单条插入（支持字典或原始SQL）
			
 
				+        :param table: 表名（字典插入时必需）
			
 
				+        :param data: 字典数据 {列名: 值}
			
 
				+        :param query: 直接SQL语句（与data二选一）
			
 
				+        :param args: SQL参数（query使用时必需）
			
 
				+        :param commit: 是否自动提交
			
 
				+        :param ignore: 是否使用ignore
			
 
				+        :return: 最后插入ID
			
 
				+        """
			
 
				+        if data is not None:
			
 
				+            if not isinstance(data, dict):
			
 
				+                raise ValueError("Data must be a dictionary")
			
 
				+
			
 
				+            keys = ', '.join([self._safe_identifier(k) for k in data.keys()])
			
 
				+            values = ', '.join(['%s'] * len(data))
			
 
				+
			
 
				+            # query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
			
 
				+            # 构建 INSERT IGNORE 语句
			
 
				+            ignore_clause = "IGNORE" if ignore else ""
			
 
				+            # insert_sql = f"INSERT {ignore_clause} INTO {table} ({columns}) VALUES ({placeholders})"
			
 
				+            query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
			
 
				+            args = tuple(data.values())
			
 
				+        elif query is None:
			
 
				+            raise ValueError("Either data or query must be provided")
			
 
				+
			
 
				+        # cursor = self._execute(query, args, commit)
			
 
				+        # self.log.info(f"sql insert_one_or_dict, Table: {table}, Rows: {cursor.rowcount}")
			
 
				+        # self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one_or_dict 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+        # return cursor.lastrowid
			
 
				+
			
 
				+        try:
			
 
				+            cursor = self._execute(query, args, commit)
			
 
				+            self.log.info(f"sql insert_one_or_dict, Table: {table}, Rows: {cursor.rowcount}")
			
 
				+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one_or_dict 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+            return cursor.lastrowid
			
 
				+        except pymysql.err.IntegrityError as e:
			
 
				+            if "Duplicate entry" in str(e):
			
 
				+                self.log.warning(f"插入失败：重复条目，已跳过。错误详情: {e}")
			
 
				+                # print("插入失败：重复条目", e)
			
 
				+                return -1  # 返回 -1 表示重复条目被跳过
			
 
				+            else:
			
 
				+                self.log.error(f"数据库完整性错误: {e}")
			
 
				+                # print("插入失败：完整性错误", e)
			
 
				+                raise e
			
 
				+        except Exception as e:
			
 
				+            self.log.error(f"未知错误: {e}", exc_info=True)
			
 
				+            # print("插入失败：未知错误", e)
			
 
				+            raise e
			
 
				+
			
 
				+    def insert_many(self, table=None, data_list=None, query=None, args_list=None, batch_size=500, commit=True, ignore=False):
			
 
				+        """
			
 
				+        批量插入（支持字典列表或原始SQL）
			
 
				+        :param table: 表名（字典插入时必需）
			
 
				+        :param data_list: 字典列表 [{列名: 值}]
			
 
				+        :param query: 直接SQL语句（与data_list二选一）
			
 
				+        :param args_list: SQL参数列表（query使用时必需）
			
 
				+        :param batch_size: 分批大小
			
 
				+        :param commit: 是否自动提交
			
 
				+        :param ignore: 是否使用ignore
			
 
				+        :return: 影响行数
			
 
				+        """
			
 
				+        if data_list is not None:
			
 
				+            if not data_list or not isinstance(data_list[0], dict):
			
 
				+                raise ValueError("Data_list must be a non-empty list of dictionaries")
			
 
				+
			
 
				+            keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
			
 
				+            values = ', '.join(['%s'] * len(data_list[0]))
			
 
				+
			
 
				+            # 构建 INSERT IGNORE 语句
			
 
				+            ignore_clause = "IGNORE" if ignore else ""
			
 
				+            # insert_sql = f"INSERT {ignore_clause} INTO {table} ({columns}) VALUES ({placeholders})"
			
 
				+            query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
			
 
				+            args_list = [tuple(d.values()) for d in data_list]
			
 
				+        elif query is None:
			
 
				+            raise ValueError("Either data_list or query must be provided")
			
 
				+
			
 
				+        total = 0
			
 
				+        for i in range(0, len(args_list), batch_size):
			
 
				+            batch = args_list[i:i + batch_size]
			
 
				+            conn = None
			
 
				+            try:
			
 
				+                conn = self.pool.connection()
			
 
				+                with conn.cursor() as cursor:
			
 
				+                    cursor.executemany(query, batch)
			
 
				+                    if commit:
			
 
				+                        conn.commit()
			
 
				+                    total += cursor.rowcount
			
 
				+            except pymysql.Error as e:
			
 
				+                if conn:
			
 
				+                    try:
			
 
				+                        if commit:
			
 
				+                            conn.rollback()
			
 
				+                    except:
			
 
				+                        pass
			
 
				+                if "Duplicate entry" in str(e):
			
 
				+                    raise e
			
 
				+                else:
			
 
				+                    self.log.error(f"数据库错误: {e}")
			
 
				+                    raise e
			
 
				+            except Exception as e:
			
 
				+                if conn:
			
 
				+                    try:
			
 
				+                        if commit:
			
 
				+                            conn.rollback()
			
 
				+                    except:
			
 
				+                        pass
			
 
				+                self.log.error(f"数据库错误: {e}")
			
 
				+                raise e
			
 
				+            finally:
			
 
				+                if conn:
			
 
				+                    try:
			
 
				+                        conn.close()
			
 
				+                    except:
			
 
				+                        pass
			
 
				+                # 重新抛出异常，供外部捕获
			
 
				+                # 降级为单条插入
			
 
				+                # for args in batch:
			
 
				+                #     try:
			
 
				+                #         self.insert_one_or_dict(table=None, query=query, args=args, commit=commit)
			
 
				+                #         total += 1
			
 
				+                #     except Exception as e2:
			
 
				+                #         self.log.error(f"Single insert failed: {e2}")
			
 
				+                        # continue
			
 
				+        self.log.info(f"sql insert_many, Table: {table}, Total Rows: {total}")
			
 
				+        return total
			
 
				+
			
 
				+    def insert_many_two(self, table=None, data_list=None, query=None, args_list=None, batch_size=500, commit=True):
			
 
				+        """
			
 
				+        批量插入（支持字典列表或原始SQL）
			
 
				+        :param table: 表名（字典插入时必需）
			
 
				+        :param data_list: 字典列表 [{列名: 值}]
			
 
				+        :param query: 直接SQL语句（与data_list二选一）
			
 
				+        :param args_list: SQL参数列表（query使用时必需）
			
 
				+        :param batch_size: 分批大小
			
 
				+        :param commit: 是否自动提交
			
 
				+        :return: 影响行数
			
 
				+        """
			
 
				+        if data_list is not None:
			
 
				+            if not data_list or not isinstance(data_list[0], dict):
			
 
				+                raise ValueError("Data_list must be a non-empty list of dictionaries")
			
 
				+            keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
			
 
				+            values = ', '.join(['%s'] * len(data_list[0]))
			
 
				+            query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
			
 
				+            args_list = [tuple(d.values()) for d in data_list]
			
 
				+        elif query is None:
			
 
				+            raise ValueError("Either data_list or query must be provided")
			
 
				+
			
 
				+        total = 0
			
 
				+        for i in range(0, len(args_list), batch_size):
			
 
				+            batch = args_list[i:i + batch_size]
			
 
				+            try:
			
 
				+                with self.pool.connection() as conn:
			
 
				+                    with conn.cursor() as cursor:
			
 
				+                        # 添加调试日志：输出 SQL 和参数示例
			
 
				+                        # self.log.debug(f"Batch insert SQL: {query}")
			
 
				+                        # self.log.debug(f"Sample args: {batch[0] if batch else 'None'}")
			
 
				+                        cursor.executemany(query, batch)
			
 
				+                        if commit:
			
 
				+                            conn.commit()
			
 
				+                        total += cursor.rowcount
			
 
				+                        # self.log.debug(f"Batch insert succeeded. Rows: {cursor.rowcount}")
			
 
				+            except Exception as e:  # 明确捕获数据库异常
			
 
				+                self.log.exception(f"Batch insert failed: {e}")  # 使用 exception 记录堆栈
			
 
				+                self.log.error(f"Failed SQL: {query}, Args count: {len(batch)}")
			
 
				+                if commit:
			
 
				+                    conn.rollback()
			
 
				+                # 降级为单条插入，并记录每个错误
			
 
				+                rowcount = 0
			
 
				+                for args in batch:
			
 
				+                    try:
			
 
				+                        self.insert_one(query, args)
			
 
				+                        rowcount += 1
			
 
				+                    except Exception as e2:
			
 
				+                        self.log.error(f"Single insert failed: {e2}, Args: {args}")
			
 
				+                total += rowcount
			
 
				+                self.log.debug(f"Inserted {rowcount}/{len(batch)} rows individually.")
			
 
				+        self.log.info(f"sql insert_many, Table: {table}, Total Rows: {total}")
			
 
				+        return total
			
 
				+
			
 
				+    def insert_too_many(self, query, args_list, batch_size=1000):
			
 
				+        """
			
 
				+        执行批量插入语句，分片提交, 单次插入大于十万+时可用, 如果失败则降级为逐条插入
			
 
				+        :param query: 插入语句
			
 
				+        :param args_list: 插入参数列表
			
 
				+        :param batch_size: 每次插入的条数
			
 
				+        """
			
 
				+        for i in range(0, len(args_list), batch_size):
			
 
				+            batch = args_list[i:i + batch_size]
			
 
				+            try:
			
 
				+                with self.pool.connection() as conn:
			
 
				+                    with conn.cursor() as cursor:
			
 
				+                        cursor.executemany(query, batch)
			
 
				+                        conn.commit()
			
 
				+            except Exception as e:
			
 
				+                self.log.error(f"insert_too_many error. Trying single insert. Error: {e}")
			
 
				+                # 当前批次降级为单条插入
			
 
				+                for args in batch:
			
 
				+                    self.insert_one(query, args)
			
 
				+
			
 
				+    def update_one(self, query, args):
			
 
				+        """
			
 
				+        执行单条更新语句
			
 
				+        :param query: 更新语句
			
 
				+        :param args: 更新参数
			
 
				+        """
			
 
				+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_one 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+        return self._execute(query, args, commit=True)
			
 
				+
			
 
				+    def update_all(self, query, args_list):
			
 
				+        """
			
 
				+        执行批量更新语句，如果失败则逐条更新
			
 
				+        :param query: 更新语句
			
 
				+        :param args_list: 更新参数列表
			
 
				+        """
			
 
				+        conn = None
			
 
				+        cursor = None
			
 
				+        try:
			
 
				+            conn = self.pool.connection()
			
 
				+            cursor = conn.cursor()
			
 
				+            cursor.executemany(query, args_list)
			
 
				+            conn.commit()
			
 
				+            self.log.debug(f"sql update_all, SQL: {query}, Rows: {len(args_list)}")
			
 
				+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_all 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
			
 
				+        except Exception as e:
			
 
				+            conn.rollback()
			
 
				+            self.log.error(f"Error executing query: {e}")
			
 
				+            # 如果批量更新失败，则逐条更新
			
 
				+            rowcount = 0
			
 
				+            for args in args_list:
			
 
				+                self.update_one(query, args)
			
 
				+                rowcount += 1
			
 
				+            self.log.debug(f'Batch update failed. Updated {rowcount} rows individually.')
			
 
				+        finally:
			
 
				+            if cursor:
			
 
				+                cursor.close()
			
 
				+            if conn:
			
 
				+                conn.close()
			
 
				+
			
 
				+    def update_one_or_dict(self, table=None, data=None, condition=None, query=None, args=None, commit=True):
			
 
				+        """
			
 
				+        单条更新（支持字典或原始SQL）
			
 
				+        :param table: 表名（字典模式必需）
			
 
				+        :param data: 字典数据 {列名: 值}（与 query 二选一）
			
 
				+        :param condition: 更新条件，支持以下格式：
			
 
				+            - 字典: {"id": 1} → "WHERE id = %s"
			
 
				+            - 字符串: "id = 1" → "WHERE id = 1"（需自行确保安全）
			
 
				+            - 元组: ("id = %s", [1]) → "WHERE id = %s"（参数化查询）
			
 
				+        :param query: 直接SQL语句（与 data 二选一）
			
 
				+        :param args: SQL参数（query 模式下必需）
			
 
				+        :param commit: 是否自动提交
			
 
				+        :return: 影响行数
			
 
				+        :raises: ValueError 参数校验失败时抛出
			
 
				+        """
			
 
				+        # 参数校验
			
 
				+        if data is not None:
			
 
				+            if not isinstance(data, dict):
			
 
				+                raise ValueError("Data must be a dictionary")
			
 
				+            if table is None:
			
 
				+                raise ValueError("Table name is required for dictionary update")
			
 
				+            if condition is None:
			
 
				+                raise ValueError("Condition is required for dictionary update")
			
 
				+
			
 
				+            # 构建 SET 子句
			
 
				+            set_clause = ", ".join([f"{self._safe_identifier(k)} = %s" for k in data.keys()])
			
 
				+            set_values = list(data.values())
			
 
				+
			
 
				+            # 解析条件
			
 
				+            condition_clause, condition_args = self._parse_condition(condition)
			
 
				+            query = f"UPDATE {self._safe_identifier(table)} SET {set_clause} WHERE {condition_clause}"
			
 
				+            args = set_values + condition_args
			
 
				+
			
 
				+        elif query is None:
			
 
				+            raise ValueError("Either data or query must be provided")
			
 
				+
			
 
				+        # 执行更新
			
 
				+        cursor = self._execute(query, args, commit)
			
 
				+        # self.log.debug(
			
 
				+        #     f"Updated table={table}, rows={cursor.rowcount}, query={query[:100]}...",
			
 
				+        #     extra={"table": table, "rows": cursor.rowcount}
			
 
				+        # )
			
 
				+        return cursor.rowcount
			
 
				+
			
 
				+    def _parse_condition(self, condition):
			
 
				+        """
			
 
				+        解析条件为 (clause, args) 格式
			
 
				+        :param condition: 字典/字符串/元组
			
 
				+        :return: (str, list) SQL 子句和参数列表
			
 
				+        """
			
 
				+        if isinstance(condition, dict):
			
 
				+            clause = " AND ".join([f"{self._safe_identifier(k)} = %s" for k in condition.keys()])
			
 
				+            args = list(condition.values())
			
 
				+        elif isinstance(condition, str):
			
 
				+            clause = condition  # 注意：需调用方确保安全
			
 
				+            args = []
			
 
				+        elif isinstance(condition, (tuple, list)) and len(condition) == 2:
			
 
				+            clause, args = condition[0], condition[1]
			
 
				+            if not isinstance(args, (list, tuple)):
			
 
				+                args = [args]
			
 
				+        else:
			
 
				+            raise ValueError("Condition must be dict/str/(clause, args)")
			
 
				+        return clause, args
			
 
				+
			
 
				+    def update_many(self, table=None, data_list=None, condition_list=None, query=None, args_list=None, batch_size=500,
			
 
				+                    commit=True):
			
 
				+        """
			
 
				+        批量更新（支持字典列表或原始SQL）
			
 
				+        :param table: 表名（字典插入时必需）
			
 
				+        :param data_list: 字典列表 [{列名: 值}]
			
 
				+        :param condition_list: 条件列表（必须为字典，与data_list等长）
			
 
				+        :param query: 直接SQL语句（与data_list二选一）
			
 
				+        :param args_list: SQL参数列表（query使用时必需）
			
 
				+        :param batch_size: 分批大小
			
 
				+        :param commit: 是否自动提交
			
 
				+        :return: 影响行数
			
 
				+        """
			
 
				+        if data_list is not None:
			
 
				+            if not data_list or not isinstance(data_list[0], dict):
			
 
				+                raise ValueError("Data_list must be a non-empty list of dictionaries")
			
 
				+            if condition_list is None or len(data_list) != len(condition_list):
			
 
				+                raise ValueError("Condition_list must be provided and match the length of data_list")
			
 
				+            if not all(isinstance(cond, dict) for cond in condition_list):
			
 
				+                raise ValueError("All elements in condition_list must be dictionaries")
			
 
				+
			
 
				+            # 获取第一个数据项和条件项的键
			
 
				+            first_data_keys = set(data_list[0].keys())
			
 
				+            first_cond_keys = set(condition_list[0].keys())
			
 
				+
			
 
				+            # 构造基础SQL
			
 
				+            set_clause = ', '.join([self._safe_identifier(k) + ' = %s' for k in data_list[0].keys()])
			
 
				+            condition_clause = ' AND '.join([self._safe_identifier(k) + ' = %s' for k in condition_list[0].keys()])
			
 
				+            base_query = f"UPDATE {self._safe_identifier(table)} SET {set_clause} WHERE {condition_clause}"
			
 
				+            total = 0
			
 
				+
			
 
				+            # 分批次处理
			
 
				+            for i in range(0, len(data_list), batch_size):
			
 
				+                batch_data = data_list[i:i + batch_size]
			
 
				+                batch_conds = condition_list[i:i + batch_size]
			
 
				+                batch_args = []
			
 
				+
			
 
				+                # 检查当前批次的结构是否一致
			
 
				+                can_batch = True
			
 
				+                for data, cond in zip(batch_data, batch_conds):
			
 
				+                    data_keys = set(data.keys())
			
 
				+                    cond_keys = set(cond.keys())
			
 
				+                    if data_keys != first_data_keys or cond_keys != first_cond_keys:
			
 
				+                        can_batch = False
			
 
				+                        break
			
 
				+                    batch_args.append(tuple(data.values()) + tuple(cond.values()))
			
 
				+
			
 
				+                if not can_batch:
			
 
				+                    # 结构不一致，转为单条更新
			
 
				+                    for data, cond in zip(batch_data, batch_conds):
			
 
				+                        self.update_one_or_dict(table=table, data=data, condition=cond, commit=commit)
			
 
				+                        total += 1
			
 
				+                    continue
			
 
				+
			
 
				+                # 执行批量更新
			
 
				+                try:
			
 
				+                    with self.pool.connection() as conn:
			
 
				+                        with conn.cursor() as cursor:
			
 
				+                            cursor.executemany(base_query, batch_args)
			
 
				+                            if commit:
			
 
				+                                conn.commit()
			
 
				+                            total += cursor.rowcount
			
 
				+                            self.log.debug(f"Batch update succeeded. Rows: {cursor.rowcount}")
			
 
				+                except Exception as e:
			
 
				+                    if commit:
			
 
				+                        conn.rollback()
			
 
				+                    self.log.error(f"Batch update failed: {e}")
			
 
				+                    # 降级为单条更新
			
 
				+                    for args, data, cond in zip(batch_args, batch_data, batch_conds):
			
 
				+                        try:
			
 
				+                            self._execute(base_query, args, commit=commit)
			
 
				+                            total += 1
			
 
				+                        except Exception as e2:
			
 
				+                            self.log.error(f"Single update failed: {e2}, Data: {data}, Condition: {cond}")
			
 
				+            self.log.info(f"Total updated rows: {total}")
			
 
				+            return total
			
 
				+        elif query is not None:
			
 
				+            # 处理原始SQL和参数列表
			
 
				+            if args_list is None:
			
 
				+                raise ValueError("args_list must be provided when using query")
			
 
				+
			
 
				+            total = 0
			
 
				+            for i in range(0, len(args_list), batch_size):
			
 
				+                batch_args = args_list[i:i + batch_size]
			
 
				+                try:
			
 
				+                    with self.pool.connection() as conn:
			
 
				+                        with conn.cursor() as cursor:
			
 
				+                            cursor.executemany(query, batch_args)
			
 
				+                            if commit:
			
 
				+                                conn.commit()
			
 
				+                            total += cursor.rowcount
			
 
				+                            self.log.debug(f"Batch update succeeded. Rows: {cursor.rowcount}")
			
 
				+                except Exception as e:
			
 
				+                    if commit:
			
 
				+                        conn.rollback()
			
 
				+                    self.log.error(f"Batch update failed: {e}")
			
 
				+                    # 降级为单条更新
			
 
				+                    for args in batch_args:
			
 
				+                        try:
			
 
				+                            self._execute(query, args, commit=commit)
			
 
				+                            total += 1
			
 
				+                        except Exception as e2:
			
 
				+                            self.log.error(f"Single update failed: {e2}, Args: {args}")
			
 
				+            self.log.info(f"Total updated rows: {total}")
			
 
				+            return total
			
 
				+        else:
			
 
				+            raise ValueError("Either data_list or query must be provided")
			
 
				+
			
 
				+    def check_pool_health(self):
			
 
				+        """
			
 
				+        检查连接池中有效连接数
			
 
				+
			
 
				+        # 使用示例
			
 
				+        # 配置 MySQL 连接池
			
 
				+        sql_pool = MySQLConnectionPool(log=log)
			
 
				+        if not sql_pool.check_pool_health():
			
 
				+            log.error("数据库连接池异常")
			
 
				+            raise RuntimeError("数据库连接池异常")
			
 
				+        """
			
 
				+        try:
			
 
				+            with self.pool.connection() as conn:
			
 
				+                conn.ping(reconnect=True)
			
 
				+                return True
			
 
				+        except Exception as e:
			
 
				+            self.log.error(f"Connection pool health check failed: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _safe_identifier(name):
			
 
				+        """SQL标识符安全校验"""
			
 
				+        if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
			
 
				+            raise ValueError(f"Invalid SQL identifier: {name}")
			
 
				+        return name
			
 
				+
			
--- a/clove_spider/requirements.txt
+++ b/clove_spider/requirements.txt
@@ -0,0 +1,10 @@
 
				+-i https://mirrors.aliyun.com/pypi/simple/
			
 
				+curl_cffi==0.11.1
			
 
				+DBUtils==3.1.0
			
 
				+loguru==0.7.3
			
 
				+parsel==1.10.0
			
 
				+PyMySQL==1.1.1
			
 
				+PyYAML==6.0.2
			
 
				+requests==2.32.4
			
 
				+schedule==1.2.2
			
 
				+tenacity==9.0.0
			
--- a/clove_spider/start_clove_spider.py
+++ b/clove_spider/start_clove_spider.py
@@ -0,0 +1,48 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# Author : Charley
			
 
				+# Python : 3.10.8
			
 
				+# Date   : 2025/8/1 18:03
			
 
				+import time
			
 
				+import schedule
			
 
				+import threading
			
 
				+from loguru import logger
			
 
				+from clove_blind_box_spider import blind_main
			
 
				+from clove_recycle_spider import recycle_main
			
 
				+
			
 
				+logger.remove()
			
 
				+logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
			
 
				+           format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
			
 
				+           level="DEBUG", retention="7 day")
			
 
				+
			
 
				+
			
 
				+def run_threaded(job_func, *args, **kwargs):
			
 
				+    """
			
 
				+    在新线程中运行给定的函数，并传递参数。
			
 
				+
			
 
				+    :param job_func: 要运行的目标函数
			
 
				+    :param args: 位置参数
			
 
				+    :param kwargs: 关键字参数
			
 
				+    """
			
 
				+    job_thread = threading.Thread(target=job_func, args=args, kwargs=kwargs)
			
 
				+    job_thread.start()
			
 
				+
			
 
				+
			
 
				+def schedule_task():
			
 
				+    """
			
 
				+    爬虫模块的启动文件
			
 
				+    """
			
 
				+    # 立即运行一次任务
			
 
				+    # run_threaded(blind_main, log=logger)
			
 
				+    # run_threaded(recycle_main, log=logger)
			
 
				+
			
 
				+    # 设置定时任务
			
 
				+    schedule.every().day.at("00:06").do(run_threaded, blind_main, log=logger)
			
 
				+    schedule.every().friday.at("00:01").do(run_threaded, recycle_main, log=logger)
			
 
				+
			
 
				+    while True:
			
 
				+        schedule.run_pending()
			
 
				+        time.sleep(1)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    schedule_task()