Преглед изворни кода

feat(spider): 新增90sAuctions增量爬虫及数据库连接池实现

- 添加auctions90s_core模块,实现拍卖页面的HTTP请求、列表分页和详情解析功能
- 实现ASP.NET __doPostBack机制切换拍卖会,支持单页及全页数据抓取
- 集成代理获取及失败重试,增强抓取稳定性
- 新增auctions90s_spider模块,完成增量爬取调度与差集判断
- 通过半月定时任务执行爬虫,自动补抓详情页未完成数据
- 添加mysql_pool模块,封装MySQL连接池管理和高效批量操作
- 完善日志记录和异常处理,提升监控和错误定位能力
charley пре 1 недеља
родитељ
комит
37660ea8f2

+ 98 - 0
auctions90s_spider/YamlLoader.py

@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/12/22 10:44
+import os, re
+import yaml
+
+regex = re.compile(r'^\$\{(?P<ENV>[A-Z_\-]+:)?(?P<VAL>[\w.]+)}$')
+
+
+class YamlConfig:
+    def __init__(self, config):
+        self.config = config
+
+    def get(self, key: str):
+        return YamlConfig(self.config.get(key))
+
+    def getValueAsString(self, key: str):
+        try:
+            match = regex.match(self.config[key])
+            group = match.groupdict()
+            if group['ENV'] is not None:
+                env = group['ENV'][:-1]
+                return os.getenv(env, group['VAL'])
+            return None
+        except:
+            return self.config[key]
+
+    def getValueAsInt(self, key: str):
+        try:
+            match = regex.match(self.config[key])
+            group = match.groupdict()
+            if group['ENV'] is not None:
+                env = group['ENV'][:-1]
+                return int(os.getenv(env, group['VAL']))
+            return 0
+        except:
+            return int(self.config[key])
+
+    def getValueAsBool(self, key: str):
+        try:
+            match = regex.match(self.config[key])
+            group = match.groupdict()
+            if group['ENV'] is not None:
+                env = group['ENV'][:-1]
+                return bool(os.getenv(env, group['VAL']))
+            return False
+        except:
+            return bool(self.config[key])
+
+
+def _resolve_path(path: str) -> str:
+    """
+    解析 yaml 文件路径,按优先级查找:
+      1) 绝对路径或 cwd 下存在 → 直接用(保留旧行为,向后兼容)
+      2) 调用方主脚本所在目录 → 兜底,方便打包后从任意 cwd 启动
+    :param path: (str) 用户传入的路径,默认 'application.yml'
+    :return: (str) 实际可读取的完整路径;找不到则返回原 path 让 open() 抛错
+    """
+    # 1) 旧行为:cwd 或绝对路径
+    if os.path.exists(path):
+        return path
+
+    # 2) 主脚本目录(__main__.__file__)
+    try:
+        import __main__
+        main_file = getattr(__main__, '__file__', None)
+        if main_file:
+            candidate = os.path.join(os.path.dirname(os.path.abspath(main_file)), path)
+            if os.path.exists(candidate):
+                return candidate
+    except Exception:
+        pass
+
+    return path
+
+
+def readYaml(path: str = 'application.yml', profile: str = None) -> YamlConfig:
+    """
+    读取 yaml 配置。
+    :param path: (str) yaml 文件路径,默认 'application.yml'。
+                       优先 cwd / 绝对路径(保留旧行为),找不到再 fallback 到主脚本所在目录。
+    :param profile: (str) 可选环境后缀,如 'dev' 会额外加载 'application-dev.yml' 并 update
+    :return: (YamlConfig) 配置访问对象
+    :raises FileNotFoundError: cwd 和主脚本目录都找不到时抛出
+    """
+    real_path = _resolve_path(path)
+    with open(real_path, encoding='utf-8') as fd:
+        conf = yaml.load(fd, Loader=yaml.FullLoader)
+
+    if profile is not None:
+        result = real_path.rsplit('.', 1)
+        profiledYaml = f'{result[0]}-{profile}.{result[1]}'
+        if os.path.exists(profiledYaml):
+            with open(profiledYaml, encoding='utf-8') as fd:
+                conf.update(yaml.load(fd, Loader=yaml.FullLoader))
+
+    return YamlConfig(conf)

+ 6 - 0
auctions90s_spider/application.yml

@@ -0,0 +1,6 @@
+mysql:
+  host: ${MYSQL_HOST:100.64.0.21}
+  port: ${MYSQL_PROT:3306}
+  username: ${MYSQL_USERNAME:crawler}
+  password: ${MYSQL_PASSWORD:Pass2022}
+  db: ${MYSQL_DATABASE:crawler}

+ 365 - 0
auctions90s_spider/auctions90s_core.py

@@ -0,0 +1,365 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.12.10
+# Date   : 2026/5/28
+"""
+90sAuctions 公用模块:HTTP 配置、ASP.NET postback 切换 auction、单页解析、详情解析。
+被 auctions90s_history.py / auctions90s_spider.py / auctions90s_retry.py 复用。
+"""
+import random
+import re
+from curl_cffi import requests
+import user_agent
+from loguru import logger
+from parsel import Selector
+from curl_cffi.requests import BrowserType
+from tenacity import retry, stop_after_attempt, wait_fixed
+
+# 站点常量
+SITE_ORIGIN = "https://90sauctions.com"
+GALLERY_URL = f"{SITE_ORIGIN}/Lots/Gallery"
+
+# 数据库表名(结构复制自 lelands_record)
+TABLE_NAME = "auctions90s_record"
+
+# 直接用库内置的所有浏览器指纹
+client_identifier_list = [b.value for b in BrowserType]
+
+headers = {
+    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+    "user-agent": user_agent.generate_user_agent()
+}
+
+# 列表页价格前缀:进行中为 "CURRENT BID $...",已结束为 "SOLD FOR $..."
+PRICE_PREFIX_RE = re.compile(r'^\s*(?:SOLD\s+FOR|CURRENT\s+BID)\s*\$', re.IGNORECASE)
+
+
+def after_log(retry_state):
+    """tenacity retry 回调"""
+    if retry_state.args and len(retry_state.args) > 0:
+        log = retry_state.args[0]
+    else:
+        log = logger
+
+    if retry_state.outcome.failed:
+        log.warning(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
+    else:
+        log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(2), after=after_log)
+def get_proxys(log):
+    """
+    获取代理。
+
+    :param log: (loguru.Logger) 日志对象,用于记录代理获取异常
+    :return: (dict) 形如 {"http": "...", "https": "..."} 的代理字典
+    :raises Exception: 当代理服务不可达时由 tenacity 触发重试,最终抛出
+    """
+    http_proxy = "http://u1952150085001297:sJMHl4qc4bM0@proxy.123proxy.cn:36931"
+    https_proxy = "http://u1952150085001297:sJMHl4qc4bM0@proxy.123proxy.cn:36931"
+    try:
+        return {"http": http_proxy, "https": https_proxy}
+    except Exception as e:
+        log.error(f"Error getting proxy: {e}")
+        raise e
+
+
+def extract_auction(description, log=logger):
+    """
+    从详情页 lblOldAuction 的文本列表中提取拍卖会名称(双引号内的字符串)。
+
+    :param description: (list[str]) selector.getall() 返回的字符串列表,
+                        典型形如 ['Item was in Auction "Inaugural Auction",...']
+    :param log: (loguru.Logger) 日志对象
+    :return: (str | None) 提取到的 auction 名,失败或空时返回 None
+    """
+    try:
+        if not description or not isinstance(description, list):
+            return None
+        for item in description:
+            if not item or not isinstance(item, str):
+                continue
+            text = item.strip()
+            if not text:
+                continue
+            match = re.search(r'"(.+?)"', text)
+            if match:
+                auction = match.group(1).strip()
+                return auction if auction else None
+        return None
+    except Exception as e:
+        log.error(f"extract_auction error: {e}")
+        return None
+
+
+def _pick_hidden(selector, field_id):
+    """
+    从 ASP.NET 页面中提取隐藏字段值(__VIEWSTATE / __EVENTVALIDATION 等)。
+
+    :param selector: (parsel.Selector) 已解析的页面 Selector
+    :param field_id: (str) 隐藏字段的 id,如 "__VIEWSTATE"
+    :return: (str) 字段值,未找到时返回空字符串
+    """
+    return selector.xpath(f'//input[@id="{field_id}"]/@value').get() or ''
+
+
+def parse_auction_list(selector):
+    """
+    解析 Gallery 页面侧边栏 Auction 下拉框中的全部选项。
+
+    :param selector: (parsel.Selector) Gallery 首页 Selector
+    :return: (list[dict]) [{"id": "-1", "name": "All Auctions"}, {"id": "12", "name": "June 2026"}, ...]
+    """
+    options = selector.xpath('//select[@id="Auction"]/option')
+    result = []
+    for opt in options:
+        aid = opt.xpath('./@value').get()
+        name = opt.xpath('./text()').get()
+        if aid is None:
+            continue
+        result.append({"id": aid.strip(), "name": (name or '').strip()})
+    return result
+
+
+@retry(stop=stop_after_attempt(3), wait=wait_fixed(2), after=after_log)
+def get_auction_list(log, session, impersonate):
+    """
+    GET Gallery 首页并解析出全部具体拍卖会(排除 -1 All Auctions)。
+
+    :param log: (loguru.Logger) 日志对象
+    :param session: (curl_cffi.requests.Session) 复用的会话对象
+    :param impersonate: (str) curl_cffi 浏览器指纹标识
+    :return: (list[dict]) [{"id": "12", "name": "June 2026"}, ...]
+    :raises requests.HTTPError: 首页请求非 2xx 时抛出
+    """
+    log.info("获取全部拍卖会列表")
+    resp = session.get(GALLERY_URL, headers=headers, impersonate=impersonate,
+                       proxies=get_proxys(log), timeout=15)
+    resp.raise_for_status()
+    sel = Selector(resp.text)
+    all_opts = parse_auction_list(sel)
+    # 过滤掉 All Auctions(-1),只保留具体拍卖会
+    real = [o for o in all_opts if o["id"] != "-1"]
+    log.info(f"共解析到 {len(real)} 个拍卖会:{[(o['id'], o['name']) for o in real]}")
+    return real
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(2), after=after_log)
+def setup_auction_session(log, session, impersonate, auction_id):
+    """
+    通过 ASP.NET __doPostBack 把 Gallery 的 Auction 筛选切换到指定 auction_id。
+    切换后服务端 session 记住该选择,后续 GET /Lots/Gallery?page=N 都返回该 auction 数据。
+
+    :param log: (loguru.Logger) 日志对象
+    :param session: (curl_cffi.requests.Session) 复用的会话对象
+    :param impersonate: (str) curl_cffi 浏览器指纹标识
+    :param auction_id: (str) "-1"(All Auctions) 或具体 id 如 "12"
+    :return: None
+    :raises RuntimeError: 切换后页面中 selected option 与 auction_id 不一致时抛出
+    """
+    log.info(f"切换 Auction -> {auction_id}")
+    proxies = get_proxys(log)
+
+    # 1) 首次 GET 拿 ViewState
+    resp = session.get(GALLERY_URL, headers=headers, impersonate=impersonate,
+                       proxies=proxies, timeout=15)
+    resp.raise_for_status()
+    sel = Selector(resp.text)
+
+    # 2) 构造 postback 表单。控件名前缀均为 ctl00$
+    form_data = {
+        '__EVENTTARGET': 'ctl00$Auction',
+        '__EVENTARGUMENT': '',
+        '__LASTFOCUS': '',
+        '__VIEWSTATE': _pick_hidden(sel, '__VIEWSTATE'),
+        '__VIEWSTATEGENERATOR': _pick_hidden(sel, '__VIEWSTATEGENERATOR'),
+        '__EVENTVALIDATION': _pick_hidden(sel, '__EVENTVALIDATION'),
+        'ctl00$SearchIn': 'title',
+        'ctl00$SearchText': '',
+        'ctl00$BrowseBy': 'gallery',
+        'ctl00$Auction': str(auction_id),
+    }
+
+    post_headers = {
+        **headers,
+        'Content-Type': 'application/x-www-form-urlencoded',
+        'Referer': GALLERY_URL,
+        'Origin': SITE_ORIGIN,
+    }
+
+    resp = session.post(GALLERY_URL, headers=post_headers, data=form_data,
+                        impersonate=impersonate, proxies=proxies, timeout=20)
+    resp.raise_for_status()
+
+    # 验证切换是否成功
+    sel2 = Selector(resp.text)
+    selected_val = sel2.xpath('//select[@id="Auction"]/option[@selected]/@value').get()
+    log.info(f"切换后 Auction 选中值: {selected_val}")
+    if selected_val != str(auction_id):
+        raise RuntimeError(f"切换 Auction 失败,预期 {auction_id} 实际 {selected_val}")
+
+
+def _clean_price(raw):
+    """
+    清洗列表页的成交/当前价文本,去掉前缀和千分位逗号。
+
+    :param raw: (str | None) 形如 "SOLD FOR $1,850" / "CURRENT BID $325"
+    :return: (str | None) 纯数字字符串(如 "1850" / "325"),输入为空时返回 None
+    """
+    if not raw:
+        return None
+    price = PRICE_PREFIX_RE.sub('', raw)
+    return price.replace(',', '').strip() or None
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(2), after=after_log)
+def get_single_page(log, page, sql_pool, session, impersonate,
+                    auction_id=None, auction_name=None):
+    """
+    抓取并落库 Gallery 的单页数据。
+
+    :param log: (loguru.Logger) 日志对象
+    :param page: (int) 页码(从 1 开始)
+    :param sql_pool: (MySQLConnectionPool) 数据库连接池,None 时不落库(调试用)
+    :param session: (curl_cffi.requests.Session) 复用的会话对象(需已 setup_auction_session)
+    :param impersonate: (str) curl_cffi 浏览器指纹标识
+    :param auction_id: (str | None) 当前 session 切换到的 auction id,写入 auctions90s_record.auction_id
+    :param auction_name: (str | None) 同上,写入 auctions90s_record.auction_name
+    :return: (int) 本页解析到并落库的条数;无数据时返回 0
+    """
+    log.info(f">>>>>>>>>>>>>> 正在爬取 auction={auction_id}({auction_name}) 第 {page} 页 <<<<<<<<<<<<<<")
+
+    response = session.get(GALLERY_URL, impersonate=impersonate, headers=headers,
+                           params={"page": f"{page}"},
+                           proxies=get_proxys(log), timeout=10, allow_redirects=False)
+    response.raise_for_status()
+
+    selector = Selector(response.text)
+    tag_div_list = selector.xpath(
+        '//div[@class="items"]/div/div[@class="row"]//div[@class="col-lg-3 col-md-4 col-sm-6"]')
+
+    if not tag_div_list or len(tag_div_list) == 0:
+        log.warning(f"--------------- 第 {page} 页无数据 ---------------")
+        return 0
+
+    info_list = []
+    for tag_div in tag_div_list:
+        # 商品标题与详情页绝对地址(列表里 a 标签已是完整 url,无需拼接)
+        title = tag_div.xpath('.//p/a/text()').get()
+        detail_url = tag_div.xpath('.//p/a/@href').get()
+
+        # Bids / Opening Bid / Status 三个 strong 顺序固定
+        tag_div_p = tag_div.xpath('.//div/p[2]/strong/text()').getall()
+        bids = tag_div_p[0] if tag_div_p else None
+        opening_bid = tag_div_p[1] if len(tag_div_p) > 1 else None
+        opening_bid = opening_bid.replace('$', '').replace(',', '').strip() if opening_bid else None
+        status = tag_div_p[2] if len(tag_div_p) > 2 else None
+
+        # 价格:列表卡片底部 a 文本,进行中为 "CURRENT BID $...",结束后为 "SOLD FOR $..."
+        price = tag_div.xpath('.//div[@class="item-price"]/a/text()').get()
+        price = _clean_price(price)
+
+        data_dict = {
+            "title": title,
+            "detail_url": detail_url,
+            "bids": bids,
+            "opening_bid": opening_bid,
+            "status": status,
+            "price": price,
+            "auction_id": int(auction_id) if auction_id is not None else None,
+            "auction_name": auction_name,
+        }
+        info_list.append(data_dict)
+
+    if info_list and sql_pool is not None:
+        sql_pool.insert_many(table=TABLE_NAME, data_list=info_list, ignore=True)
+    return len(info_list)
+
+
+def crawl_one_auction(log, sql_pool, session, impersonate,
+                      auction_id, auction_name, max_page=460):
+    """
+    抓取单个拍卖会的全部页(switch 到该 auction → 翻页直到无数据)。
+
+    :param log: (loguru.Logger) 日志对象
+    :param sql_pool: (MySQLConnectionPool) 数据库连接池
+    :param session: (curl_cffi.requests.Session) 复用的会话对象
+    :param impersonate: (str) curl_cffi 浏览器指纹标识
+    :param auction_id: (str) 当前 session 切换到的 auction id
+    :param auction_name: (str) 拍卖会名,写入数据库
+    :param max_page: (int) 最大页码上限,作为兜底保护
+    :return: (int) 该 auction 抓到的总条数
+    """
+    setup_auction_session(log, session, impersonate, auction_id)
+
+    page = 1
+    total = 0
+    while page <= max_page:
+        try:
+            n = get_single_page(log, page, sql_pool, session, impersonate,
+                                auction_id=auction_id, auction_name=auction_name)
+        except Exception as e:
+            log.error(f"auction={auction_id} page={page} 抓取失败: {e}")
+            break
+        if n == 0:
+            log.info(f"auction={auction_id} 翻到第 {page} 页无数据,结束")
+            break
+        total += n
+        page += 1
+    log.info(f"auction={auction_id}({auction_name}) 共抓取 {total} 条")
+    return total
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(2), after=after_log)
+def get_details(log, url, sql_pool, sql_id):
+    """
+    获取详情页:分类、图片列表,写回数据库并把 state 置 1。
+
+    :param log: (loguru.Logger) 日志对象
+    :param url: (str) 详情页 URL(来自列表页 detail_url 字段)
+    :param sql_pool: (MySQLConnectionPool) 数据库连接池
+    :param sql_id: (int) 数据库记录主键 id
+    :return: None
+    :raises requests.HTTPError: 详情页非 2xx 时由 tenacity 重试,超限后抛出
+    """
+    log.info(f">>>>>>>>>>>>>> 正在爬取详情数据URL: {url} <<<<<<<<<<<<<<")
+    response = requests.get(url, headers=headers,
+                            impersonate=random.choice(client_identifier_list),
+                            timeout=10, proxies=get_proxys(log))
+    response.raise_for_status()
+    selector = Selector(response.text)
+    category = selector.xpath('//a[@id="MainContent_hCategory"]/text()').get()
+    # 右侧主图 + 缩略图区。缩略图列表里包含主图链接,因此需要去重保序
+    imgs = selector.xpath('//div[@class="col-md-5 col-sm-5"]//a[not(@id="Zoomer")]/@href').getall()
+    imgs = list(dict.fromkeys(imgs)) if imgs else []
+    imgs_str = ','.join(imgs) if imgs else None
+
+    sql_pool.update_one_or_dict(
+        table=TABLE_NAME,
+        data={"category": category, "imgs": imgs_str, "state": 1},
+        condition={"id": sql_id}
+    )
+
+
+def update_details_for_pending(log, sql_pool):
+    """
+    扫描库里 state != 1 的记录,逐条抓详情;详情失败置 state=2。
+
+    :param log: (loguru.Logger) 日志对象
+    :param sql_pool: (MySQLConnectionPool) 数据库连接池
+    :return: None
+    """
+    log.debug('Updating detail pages ...........................')
+    sql_result = sql_pool.select_all(f'select id, detail_url from {TABLE_NAME} where state != 1')
+    for row in sql_result:
+        sql_id, detail_url = row[0], row[1]
+        try:
+            get_details(log, detail_url, sql_pool, sql_id)
+        except Exception as e:
+            log.error(f'Error getting details for {detail_url}: {e}')
+            sql_pool.update_one_or_dict(
+                table=TABLE_NAME,
+                data={"state": 2},
+                condition={"id": sql_id}
+            )

+ 158 - 0
auctions90s_spider/auctions90s_spider.py

@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.12.10
+# Date   : 2026/5/28
+"""
+90sAuctions 增量爬虫(半月调度)
+逻辑:
+  1. GET 首页解析当前网站全部 auction id
+  2. 查库 select distinct auction_id from auctions90s_record,得到已爬过的 auction
+  3. 差集 = 新增 auction
+  4. 没有新增 → 本轮无数据可抓,结束
+  5. 对每个新增 auction:postback 切换 → 翻页 → 写库
+  6. 补抓 state != 1 的详情页
+"""
+import time
+import random
+import inspect
+import schedule
+from curl_cffi import requests
+from loguru import logger
+from tenacity import retry, stop_after_attempt, wait_fixed
+
+from mysql_pool import MySQLConnectionPool
+from auctions90s_core import (
+    TABLE_NAME,
+    client_identifier_list,
+    crawl_one_auction,
+    get_auction_list,
+    update_details_for_pending,
+    after_log,
+)
+
+logger.remove()
+logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
+           format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
+           level="DEBUG", retention="7 day")
+
+
+def get_existing_auction_ids(log, sql_pool):
+    """
+    查库返回已爬过的 auction_id 集合。
+
+    :param log: (loguru.Logger) 日志对象
+    :param sql_pool: (MySQLConnectionPool) 数据库连接池
+    :return: (set[str]) 已存在的 auction_id 集合(字符串形式,与首页解析值对齐)
+    """
+    rows = sql_pool.select_all(
+        f"select distinct auction_id from {TABLE_NAME} where auction_id is not null"
+    )
+    ids = {str(r[0]) for r in rows} if rows else set()
+    log.info(f"库中已存在 {len(ids)} 个 auction_id: {sorted(ids)}")
+    return ids
+
+
+def diff_new_auctions(log, all_auctions, existing_ids):
+    """
+    从首页解析的全部 auctions 中筛出库里没有的。
+
+    :param log: (loguru.Logger) 日志对象
+    :param all_auctions: (list[dict]) get_auction_list 返回的全部拍卖会列表
+    :param existing_ids: (set[str]) 已存在的 auction_id 集合
+    :return: (list[dict]) 新增待抓的 auction 列表
+    """
+    new_list = [a for a in all_auctions if a["id"] not in existing_ids]
+    log.info(f"新增待抓取 auction 数: {len(new_list)} -> {[(a['id'], a['name']) for a in new_list]}")
+    return new_list
+
+
+def run_incremental(log, sql_pool):
+    """
+    增量抓取主流程:拉首页 → 差集 → 逐个抓新增 auction。
+
+    :param log: (loguru.Logger) 日志对象
+    :param sql_pool: (MySQLConnectionPool) 数据库连接池
+    :return: None
+    """
+    impersonate = random.choice(client_identifier_list)
+    with requests.Session() as session:
+        try:
+            all_auctions = get_auction_list(log, session, impersonate)
+        except Exception as e:
+            log.error(f"获取拍卖会列表失败: {e}")
+            return
+
+        existing_ids = get_existing_auction_ids(log, sql_pool)
+        new_auctions = diff_new_auctions(log, all_auctions, existing_ids)
+
+        if not new_auctions:
+            log.info("本轮无新增 auction,跳过 list 抓取")
+            return
+
+        for idx, auc in enumerate(new_auctions, 1):
+            aid, name = auc["id"], auc["name"]
+            log.info(f"========== [{idx}/{len(new_auctions)}] 开始抓 auction={aid} ({name}) ==========")
+            try:
+                crawl_one_auction(log, sql_pool, session, impersonate,
+                                  auction_id=aid, auction_name=name)
+            except Exception as e:
+                log.error(f"auction={aid} 抓取异常: {e}")
+                continue
+
+
+@retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
+def nineties_main(log):
+    """
+    日调度主函数:增量 list + 补详情。失败时按小时级重试(最多 100 次)。
+
+    :param log: (loguru.Logger) 日志对象
+    :return: None
+    :raises Exception: MySQL 连接失败时抛出,由 tenacity 触发整轮重试
+    """
+    log.info(f'开始运行 {inspect.currentframe().f_code.co_name} 增量爬虫任务 ...')
+
+    sql_pool = MySQLConnectionPool(log=log)
+    if not sql_pool:
+        log.error("MySQL数据库连接失败")
+        raise Exception("MySQL数据库连接失败")
+
+    try:
+        try:
+            run_incremental(log, sql_pool)
+        except Exception as e:
+            log.error(f'增量抓取失败: {e}')
+
+        try:
+            update_details_for_pending(log, sql_pool)
+        except Exception as e:
+            log.error(f'详情补抓失败: {e}')
+
+    except Exception as e:
+        log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
+    finally:
+        log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮采集 ...')
+
+
+def schedule_task():
+    """
+    启动半月调度:脚本启动时先跑一次,之后每月 1 号和 15 号 05:00 各跑一次。
+
+    :return: None(永不返回,内部死循环)
+    """
+    nineties_main(log=logger)
+
+    def run_semimonthly():
+        # 每月 1 号和 15 号执行(半月一次)
+        from datetime import date
+        if date.today().day in (1, 15):
+            nineties_main(log=logger)
+
+    schedule.every().day.at("05:00").do(run_semimonthly)
+    while True:
+        schedule.run_pending()
+        time.sleep(1)
+
+
+if __name__ == '__main__':
+    # nineties_main(log=logger)
+    schedule_task()

+ 671 - 0
auctions90s_spider/mysql_pool.py

@@ -0,0 +1,671 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/3/25 14:14
+import re
+import pymysql
+import YamlLoader
+from loguru import logger
+from dbutils.pooled_db import PooledDB
+
+# 获取yaml配置
+yaml = YamlLoader.readYaml()
+mysqlYaml = yaml.get("mysql")
+sql_host = mysqlYaml.getValueAsString("host")
+sql_port = mysqlYaml.getValueAsInt("port")
+sql_user = mysqlYaml.getValueAsString("username")
+sql_password = mysqlYaml.getValueAsString("password")
+sql_db = mysqlYaml.getValueAsString("db")
+
+
+class MySQLConnectionPool:
+    """
+    MySQL连接池
+    """
+
+    def __init__(self, mincached=1, maxcached=2, maxconnections=3, log=None):
+        """
+        初始化连接池
+        :param mincached: 初始化时,链接池中至少创建的链接,0表示不创建
+        :param maxcached: 池中空闲连接的最大数目(0 或 None 表示池大小不受限制)
+        :param maxconnections: 允许的最大连接数(0 或 None 表示任意数量的连接)
+        :param log: 自定义日志记录器
+        """
+        # 使用 loguru 的 logger,如果传入了其他 logger,则使用传入的 logger
+        self.log = log or logger
+        self.pool = PooledDB(
+            creator=pymysql,
+            mincached=mincached,
+            maxcached=maxcached,
+            maxconnections=maxconnections,
+            blocking=True,  # 连接池中如果没有可用连接后,是否阻塞等待。True,等待;False,不等待然后报错
+            host=sql_host,
+            port=sql_port,
+            user=sql_user,
+            password=sql_password,
+            database=sql_db,
+            ping=2,  # 每次执行前检查连接有效性,防止使用已断开的连接
+            connect_timeout=5,  # 连接超时时间(秒)
+            # read_timeout=30,  # 读取超时时间(秒)
+            write_timeout=30  # 写入超时时间(秒)
+        )
+
+    # def _execute(self, query, args=None, commit=False):
+    #     """
+    #     执行SQL
+    #     :param query: SQL语句
+    #     :param args: SQL参数
+    #     :param commit: 是否提交事务
+    #     :return: 查询结果
+    #     """
+    #     try:
+    #         with self.pool.connection() as conn:
+    #             with conn.cursor() as cursor:
+    #                 cursor.execute(query, args)
+    #                 if commit:
+    #                     conn.commit()
+    #                 self.log.debug(f"sql _execute, Query: {query}, Rows: {cursor.rowcount}")
+    #                 return cursor
+    #     except Exception as e:
+    #         if commit and conn:
+    #             conn.rollback()
+    #         self.log.exception(f"Error executing query: {e}, Query: {query}, Args: {args}")
+    #         raise e
+
+    def _execute(self, query, args=None, commit=False):
+        """
+        执行SQL(带断连重试)
+        :param query: SQL语句
+        :param args: SQL参数
+        :param commit: 是否提交事务
+        :return: 查询结果
+        """
+        conn = None
+        for attempt in range(2):  # 最多重试1次
+            try:
+                with self.pool.connection() as conn:
+                    with conn.cursor() as cursor:
+                        cursor.execute(query, args)
+                        if commit:
+                            conn.commit()
+                        self.log.debug(f"sql _execute, Query: {query}, Rows: {cursor.rowcount}")
+                        return cursor
+            except pymysql.err.InterfaceError as e:
+                # 连接已断开,重试一次
+                if attempt == 0:
+                    self.log.warning(f"数据库连接断开,正在重试... Error: {e}")
+                    continue
+                self.log.error(f"重试后仍失败: {e}, Query: {query}")
+                raise e
+            except pymysql.err.IntegrityError:
+                # 完整性错误(如重复条目)交由上层处理,避免在此打印完整堆栈污染日志
+                if commit and conn:
+                    try:
+                        conn.rollback()
+                    except Exception:
+                        pass
+                raise
+            except Exception as e:
+                if commit and conn:
+                    try:
+                        conn.rollback()
+                    except Exception:
+                        pass
+                self.log.exception(f"Error executing query: {e}, Query: {query}, Args: {args}")
+                raise e
+
+    def select_one(self, query, args=None):
+        """
+        执行查询,返回单个结果
+        :param query: 查询语句
+        :param args: 查询参数
+        :return: 查询结果
+        """
+        cursor = self._execute(query, args)
+        return cursor.fetchone()
+
+    def select_all(self, query, args=None):
+        """
+        执行查询,返回所有结果
+        :param query: 查询语句
+        :param args: 查询参数
+        :return: 查询结果
+        """
+        cursor = self._execute(query, args)
+        return cursor.fetchall()
+
+    def insert_one(self, query, args):
+        """
+        执行单条插入语句
+        :param query: 插入语句
+        :param args: 插入参数
+        """
+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        cursor = self._execute(query, args, commit=True)
+        return cursor.lastrowid  # 返回插入的ID
+
+    def insert_all(self, query, args_list):
+        """
+        执行批量插入语句,如果失败则逐条插入
+        :param query: 插入语句
+        :param args_list: 插入参数列表
+        """
+        conn = None
+        cursor = None
+        try:
+            conn = self.pool.connection()
+            cursor = conn.cursor()
+            cursor.executemany(query, args_list)
+            conn.commit()
+            self.log.debug(f"sql insert_all, SQL: {query[:100]}..., Rows: {cursor.rowcount}")
+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_all 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        except pymysql.err.IntegrityError as e:
+            if "Duplicate entry" in str(e):
+                conn.rollback()
+                self.log.warning(f"批量插入遇到重复,开始逐条插入。错误: {e}")
+                rowcount = 0
+                for args in args_list:
+                    try:
+                        self.insert_one(query, args)
+                        rowcount += 1
+                    except pymysql.err.IntegrityError as e2:
+                        if "Duplicate entry" in str(e2):
+                            self.log.debug(f"跳过重复条目: {e2}")
+                        else:
+                            self.log.error(f"插入失败: {e2}")
+                    except Exception as e2:
+                        self.log.error(f"插入失败: {e2}")
+                self.log.info(f"逐条插入完成: {rowcount}/{len(args_list)}条")
+            else:
+                conn.rollback()
+                self.log.exception(f"数据库完整性错误: {e}")
+                raise e
+        except Exception as e:
+            conn.rollback()
+            self.log.exception(f"批量插入失败: {e}")
+            raise e
+        finally:
+            if cursor:
+                cursor.close()
+            if conn:
+                conn.close()
+
+    def insert_one_or_dict(self, table=None, data=None, query=None, args=None, commit=True, ignore=False):
+        """
+        单条插入(支持字典或原始SQL)
+        :param table: 表名(字典插入时必需)
+        :param data: 字典数据 {列名: 值}
+        :param query: 直接SQL语句(与data二选一)
+        :param args: SQL参数(query使用时必需)
+        :param commit: 是否自动提交
+        :param ignore: 是否使用ignore
+        :return: 最后插入ID
+        """
+        if data is not None:
+            if not isinstance(data, dict):
+                raise ValueError("Data must be a dictionary")
+
+            keys = ', '.join([self._safe_identifier(k) for k in data.keys()])
+            values = ', '.join(['%s'] * len(data))
+
+            # 构建 INSERT IGNORE 语句
+            ignore_clause = "IGNORE" if ignore else ""
+            query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
+            args = tuple(data.values())
+        elif query is None:
+            raise ValueError("Either data or query must be provided")
+
+        try:
+            cursor = self._execute(query, args, commit)
+            self.log.info(f"sql insert_one_or_dict, Table: {table}, Rows: {cursor.rowcount}")
+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one_or_dict 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+            return cursor.lastrowid
+        except pymysql.err.IntegrityError as e:
+            if "Duplicate entry" in str(e):
+                # 重复条目用 warning 简短输出,不打印堆栈
+                self.log.warning(f"插入跳过-重复条目 Table: {table}, {e.args[1] if len(e.args) > 1 else e}")
+                return -1  # 返回 -1 表示重复条目被跳过
+            else:
+                self.log.error(f"数据库完整性错误 Table: {table}, Error: {e}")
+                raise
+        except Exception as e:
+            self.log.error(f"insert_one_or_dict 失败 Table: {table}, Error: {e}")
+            raise
+
+    def insert_many(self, table=None, data_list=None, query=None, args_list=None, batch_size=1000, commit=True,
+                    ignore=False):
+        """
+        批量插入(支持字典列表或原始SQL)
+        :param table: 表名(字典插入时必需)
+        :param data_list: 字典列表 [{列名: 值}]
+        :param query: 直接SQL语句(与data_list二选一)
+        :param args_list: SQL参数列表(query使用时必需)
+        :param batch_size: 分批大小
+        :param commit: 是否自动提交
+        :param ignore: 是否使用ignore
+        :return: 影响行数
+        """
+        if data_list is not None:
+            if not data_list or not isinstance(data_list[0], dict):
+                raise ValueError("Data_list must be a non-empty list of dictionaries")
+
+            keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
+            values = ', '.join(['%s'] * len(data_list[0]))
+
+            # 构建 INSERT IGNORE 语句
+            ignore_clause = "IGNORE" if ignore else ""
+            query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
+            args_list = [tuple(d.values()) for d in data_list]
+        elif query is None:
+            raise ValueError("Either data_list or query must be provided")
+
+        total = 0
+        for i in range(0, len(args_list), batch_size):
+            batch = args_list[i:i + batch_size]
+            try:
+                with self.pool.connection() as conn:
+                    with conn.cursor() as cursor:
+                        cursor.executemany(query, batch)
+                        if commit:
+                            conn.commit()
+                        total += cursor.rowcount
+            except pymysql.err.IntegrityError as e:
+                # 处理唯一索引冲突
+                if "Duplicate entry" in str(e):
+                    if ignore:
+                        # 如果使用了 INSERT IGNORE,理论上不会进这里,但以防万一
+                        self.log.warning(f"批量插入遇到重复条目(ignore模式): {e}")
+                    else:
+                        # 没有使用 IGNORE,降级为逐条插入
+                        self.log.warning(f"批量插入遇到重复条目,开始逐条插入。错误: {e}")
+                        if commit:
+                            conn.rollback()
+                        
+                        rowcount = 0
+                        for j, args in enumerate(batch):
+                            try:
+                                if data_list:
+                                    # 字典模式
+                                    self.insert_one_or_dict(
+                                        table=table,
+                                        data=dict(zip(data_list[0].keys(), args)),
+                                        commit=commit,
+                                        ignore=False  # 单条插入时手动捕获重复
+                                    )
+                                else:
+                                    # 原始SQL模式
+                                    self.insert_one(query, args)
+                                rowcount += 1
+                            except pymysql.err.IntegrityError as e2:
+                                if "Duplicate entry" in str(e2):
+                                    self.log.debug(f"跳过重复条目[{i+j+1}]: {e2}")
+                                else:
+                                    self.log.error(f"插入失败[{i+j+1}]: {e2}")
+                            except Exception as e2:
+                                self.log.error(f"插入失败[{i+j+1}]: {e2}")
+                        total += rowcount
+                        self.log.info(f"批次逐条插入完成: 成功{rowcount}/{len(batch)}条")
+                else:
+                    # 其他完整性错误
+                    self.log.exception(f"数据库完整性错误: {e}")
+                    if commit:
+                        conn.rollback()
+                    raise e
+            except Exception as e:
+                # 其他数据库错误
+                self.log.exception(f"批量插入失败: {e}")
+                if commit:
+                    conn.rollback()
+                raise e
+        if table:
+            self.log.info(f"sql insert_many, Table: {table}, Total Rows: {total}")
+        else:
+            self.log.info(f"sql insert_many, Query: {query}, Total Rows: {total}")
+        return total
+
+    def insert_many_two(self, table=None, data_list=None, query=None, args_list=None, batch_size=1000, commit=True,
+                        ignore=False):
+        """
+        批量插入(支持字典列表或原始SQL) - 备用方法
+        :param table: 表名(字典插入时必需)
+        :param data_list: 字典列表 [{列名: 值}]
+        :param query: 直接SQL语句(与data_list二选一)
+        :param args_list: SQL参数列表(query使用时必需)
+        :param batch_size: 分批大小
+        :param commit: 是否自动提交
+        :param ignore: 是否使用INSERT IGNORE
+        :return: 影响行数
+        """
+        if data_list is not None:
+            if not data_list or not isinstance(data_list[0], dict):
+                raise ValueError("Data_list must be a non-empty list of dictionaries")
+            keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
+            values = ', '.join(['%s'] * len(data_list[0]))
+            ignore_clause = "IGNORE" if ignore else ""
+            query = f"INSERT {ignore_clause} INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
+            args_list = [tuple(d.values()) for d in data_list]
+        elif query is None:
+            raise ValueError("Either data_list or query must be provided")
+    
+        total = 0
+        for i in range(0, len(args_list), batch_size):
+            batch = args_list[i:i + batch_size]
+            try:
+                with self.pool.connection() as conn:
+                    with conn.cursor() as cursor:
+                        cursor.executemany(query, batch)
+                        if commit:
+                            conn.commit()
+                        total += cursor.rowcount
+            except pymysql.err.IntegrityError as e:
+                if "Duplicate entry" in str(e) and not ignore:
+                    self.log.warning(f"批量插入遇到重复,降级为逐条插入: {e}")
+                    if commit:
+                        conn.rollback()
+                    rowcount = 0
+                    for args in batch:
+                        try:
+                            self.insert_one(query, args)
+                            rowcount += 1
+                        except pymysql.err.IntegrityError as e2:
+                            if "Duplicate entry" in str(e2):
+                                self.log.debug(f"跳过重复条目: {e2}")
+                            else:
+                                self.log.error(f"插入失败: {e2}")
+                        except Exception as e2:
+                            self.log.error(f"插入失败: {e2}")
+                    total += rowcount
+                else:
+                    self.log.exception(f"数据库完整性错误: {e}")
+                    if commit:
+                        conn.rollback()
+                    raise e
+            except Exception as e:
+                self.log.exception(f"批量插入失败: {e}")
+                if commit:
+                    conn.rollback()
+                raise e
+        self.log.info(f"sql insert_many_two, Table: {table}, Total Rows: {total}")
+        return total
+
+    def insert_too_many(self, query, args_list, batch_size=1000):
+        """
+        执行批量插入语句,分片提交, 单次插入大于十万+时可用, 如果失败则降级为逐条插入
+        :param query: 插入语句
+        :param args_list: 插入参数列表
+        :param batch_size: 每次插入的条数
+        """
+        self.log.info(f"sql insert_too_many, Query: {query}, Total Rows: {len(args_list)}")
+        for i in range(0, len(args_list), batch_size):
+            batch = args_list[i:i + batch_size]
+            try:
+                with self.pool.connection() as conn:
+                    with conn.cursor() as cursor:
+                        cursor.executemany(query, batch)
+                        conn.commit()
+                        self.log.debug(f"insert_too_many -> Total Rows: {len(batch)}")
+            except Exception as e:
+                self.log.error(f"insert_too_many error. Trying single insert. Error: {e}")
+                # 当前批次降级为单条插入
+                for args in batch:
+                    self.insert_one(query, args)
+
+    def update_one(self, query, args):
+        """
+        执行单条更新语句
+        :param query: 更新语句
+        :param args: 更新参数
+        """
+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_one 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        return self._execute(query, args, commit=True)
+
+    def update_all(self, query, args_list):
+        """
+        执行批量更新语句,如果失败则逐条更新
+        :param query: 更新语句
+        :param args_list: 更新参数列表
+        """
+        conn = None
+        cursor = None
+        try:
+            conn = self.pool.connection()
+            cursor = conn.cursor()
+            cursor.executemany(query, args_list)
+            conn.commit()
+            self.log.debug(f"sql update_all, SQL: {query}, Rows: {len(args_list)}")
+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_all 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        except Exception as e:
+            conn.rollback()
+            self.log.error(f"Error executing query: {e}")
+            # 如果批量更新失败,则逐条更新
+            rowcount = 0
+            for args in args_list:
+                self.update_one(query, args)
+                rowcount += 1
+            self.log.debug(f'Batch update failed. Updated {rowcount} rows individually.')
+        finally:
+            if cursor:
+                cursor.close()
+            if conn:
+                conn.close()
+
+    def update_one_or_dict(self, table=None, data=None, condition=None, query=None, args=None, commit=True):
+        """
+        单条更新(支持字典或原始SQL)
+        :param table: 表名(字典模式必需)
+        :param data: 字典数据 {列名: 值}(与 query 二选一)
+        :param condition: 更新条件,支持以下格式:
+            - 字典: {"id": 1} → "WHERE id = %s"
+            - 字符串: "id = 1" → "WHERE id = 1"(需自行确保安全)
+            - 元组: ("id = %s", [1]) → "WHERE id = %s"(参数化查询)
+        :param query: 直接SQL语句(与 data 二选一)
+        :param args: SQL参数(query 模式下必需)
+        :param commit: 是否自动提交
+        :return: 影响行数
+        :raises: ValueError 参数校验失败时抛出
+        """
+        # 参数校验
+        if data is not None:
+            if not isinstance(data, dict):
+                raise ValueError("Data must be a dictionary")
+            if table is None:
+                raise ValueError("Table name is required for dictionary update")
+            if condition is None:
+                raise ValueError("Condition is required for dictionary update")
+
+            # 构建 SET 子句
+            set_clause = ", ".join([f"{self._safe_identifier(k)} = %s" for k in data.keys()])
+            set_values = list(data.values())
+
+            # 解析条件
+            condition_clause, condition_args = self._parse_condition(condition)
+            query = f"UPDATE {self._safe_identifier(table)} SET {set_clause} WHERE {condition_clause}"
+            args = set_values + condition_args
+
+        elif query is None:
+            raise ValueError("Either data or query must be provided")
+
+        # 执行更新
+        cursor = self._execute(query, args, commit)
+        # self.log.debug(
+        #     f"Updated table={table}, rows={cursor.rowcount}, query={query[:100]}...",
+        #     extra={"table": table, "rows": cursor.rowcount}
+        # )
+        return cursor.rowcount
+
+    def _parse_condition(self, condition):
+        """
+        解析条件为 (clause, args) 格式
+        :param condition: 字典/字符串/元组
+        :return: (str, list) SQL 子句和参数列表
+        """
+        if isinstance(condition, dict):
+            clause = " AND ".join([f"{self._safe_identifier(k)} = %s" for k in condition.keys()])
+            args = list(condition.values())
+        elif isinstance(condition, str):
+            clause = condition  # 注意:需调用方确保安全
+            args = []
+        elif isinstance(condition, (tuple, list)) and len(condition) == 2:
+            clause, args = condition[0], condition[1]
+            if not isinstance(args, (list, tuple)):
+                args = [args]
+        else:
+            raise ValueError("Condition must be dict/str/(clause, args)")
+        return clause, args
+
+    def update_many(self, table=None, data_list=None, condition_list=None, query=None, args_list=None, batch_size=500,
+                    commit=True):
+        """
+        批量更新(支持字典列表或原始SQL)
+        :param table: 表名(字典插入时必需)
+        :param data_list: 字典列表 [{列名: 值}]
+        :param condition_list: 条件列表(必须为字典,与data_list等长)
+        :param query: 直接SQL语句(与data_list二选一)
+        :param args_list: SQL参数列表(query使用时必需)
+        :param batch_size: 分批大小
+        :param commit: 是否自动提交
+        :return: 影响行数
+        """
+        if data_list is not None:
+            if not data_list or not isinstance(data_list[0], dict):
+                raise ValueError("Data_list must be a non-empty list of dictionaries")
+            if condition_list is None or len(data_list) != len(condition_list):
+                raise ValueError("Condition_list must be provided and match the length of data_list")
+            if not all(isinstance(cond, dict) for cond in condition_list):
+                raise ValueError("All elements in condition_list must be dictionaries")
+
+            # 获取第一个数据项和条件项的键
+            first_data_keys = set(data_list[0].keys())
+            first_cond_keys = set(condition_list[0].keys())
+
+            # 构造基础SQL
+            set_clause = ', '.join([self._safe_identifier(k) + ' = %s' for k in data_list[0].keys()])
+            condition_clause = ' AND '.join([self._safe_identifier(k) + ' = %s' for k in condition_list[0].keys()])
+            base_query = f"UPDATE {self._safe_identifier(table)} SET {set_clause} WHERE {condition_clause}"
+            total = 0
+
+            # 分批次处理
+            for i in range(0, len(data_list), batch_size):
+                batch_data = data_list[i:i + batch_size]
+                batch_conds = condition_list[i:i + batch_size]
+                batch_args = []
+
+                # 检查当前批次的结构是否一致
+                can_batch = True
+                for data, cond in zip(batch_data, batch_conds):
+                    data_keys = set(data.keys())
+                    cond_keys = set(cond.keys())
+                    if data_keys != first_data_keys or cond_keys != first_cond_keys:
+                        can_batch = False
+                        break
+                    batch_args.append(tuple(data.values()) + tuple(cond.values()))
+
+                if not can_batch:
+                    # 结构不一致,转为单条更新
+                    for data, cond in zip(batch_data, batch_conds):
+                        self.update_one_or_dict(table=table, data=data, condition=cond, commit=commit)
+                        total += 1
+                    continue
+
+                # 执行批量更新
+                try:
+                    with self.pool.connection() as conn:
+                        with conn.cursor() as cursor:
+                            cursor.executemany(base_query, batch_args)
+                            if commit:
+                                conn.commit()
+                            total += cursor.rowcount
+                            self.log.debug(f"Batch update succeeded. Rows: {cursor.rowcount}")
+                except Exception as e:
+                    if commit:
+                        conn.rollback()
+                    self.log.error(f"Batch update failed: {e}")
+                    # 降级为单条更新
+                    for args, data, cond in zip(batch_args, batch_data, batch_conds):
+                        try:
+                            self._execute(base_query, args, commit=commit)
+                            total += 1
+                        except Exception as e2:
+                            self.log.error(f"Single update failed: {e2}, Data: {data}, Condition: {cond}")
+            self.log.info(f"Total updated rows: {total}")
+            return total
+        elif query is not None:
+            # 处理原始SQL和参数列表
+            if args_list is None:
+                raise ValueError("args_list must be provided when using query")
+
+            total = 0
+            for i in range(0, len(args_list), batch_size):
+                batch_args = args_list[i:i + batch_size]
+                try:
+                    with self.pool.connection() as conn:
+                        with conn.cursor() as cursor:
+                            cursor.executemany(query, batch_args)
+                            if commit:
+                                conn.commit()
+                            total += cursor.rowcount
+                            self.log.debug(f"Batch update succeeded. Rows: {cursor.rowcount}")
+                except Exception as e:
+                    if commit:
+                        conn.rollback()
+                    self.log.error(f"Batch update failed: {e}")
+                    # 降级为单条更新
+                    for args in batch_args:
+                        try:
+                            self._execute(query, args, commit=commit)
+                            total += 1
+                        except Exception as e2:
+                            self.log.error(f"Single update failed: {e2}, Args: {args}")
+            self.log.info(f"Total updated rows: {total}")
+            return total
+        else:
+            raise ValueError("Either data_list or query must be provided")
+
+    def check_pool_health(self):
+        """
+        检查连接池中有效连接数
+
+        # 使用示例
+        # 配置 MySQL 连接池
+        sql_pool = MySQLConnectionPool(log=log)
+        if not sql_pool.check_pool_health():
+            log.error("数据库连接池异常")
+            raise RuntimeError("数据库连接池异常")
+        """
+        try:
+            with self.pool.connection() as conn:
+                conn.ping(reconnect=True)
+                return True
+        except Exception as e:
+            self.log.error(f"Connection pool health check failed: {e}")
+            return False
+
+    def close(self):
+        """
+        关闭连接池,释放所有连接
+        """
+        try:
+            if hasattr(self, 'pool') and self.pool:
+                self.pool.close()
+                self.log.info("数据库连接池已关闭")
+        except Exception as e:
+            self.log.error(f"关闭连接池失败: {e}")
+
+    @staticmethod
+    def _safe_identifier(name):
+        """SQL标识符安全校验"""
+        if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
+            raise ValueError(f"Invalid SQL identifier: {name}")
+        return name
+
+
+if __name__ == '__main__':
+    sql_pool = MySQLConnectionPool()
+    data_dic = {'card_type_id': 111, 'card_type_name': '补充包 继承的意志【OPC-13】', 'card_type_position': 964,
+                'card_id': 5284, 'card_name': '蒙奇·D·路飞', 'card_number': 'OP13-001', 'card_rarity': 'L',
+                'card_img': 'https://source.windoent.com/OnePiecePc/Picture/1757929283612OP13-001.png',
+                'card_life': '4', 'card_attribute': '打', 'card_power': '5000', 'card_attack': '-',
+                'card_color': '红/绿', 'subscript': 4, 'card_features': '超新星/草帽一伙',
+                'card_text_desc': '【咚!!×1】【对方的攻击时】我方处于活跃状态的咚!!不多于5张的场合,可以将我方任意张数的咚!!转为休息状态。每有1张转为休息状态的咚!!,本次战斗中,此领袖或我方最多1张拥有《草帽一伙》特征的角色力量+2000。',
+                'card_offer_type': '补充包 继承的意志【OPC-13】', 'crawler_language': '简中'}
+    sql_pool.insert_one_or_dict(table="one_piece_record", data=data_dic)

+ 10 - 0
auctions90s_spider/requirements.txt

@@ -0,0 +1,10 @@
+-i https://mirrors.aliyun.com/pypi/simple/
+curl_cffi==0.15.1b1
+DBUtils==3.1.2
+loguru==0.7.3
+parsel==1.11.0
+PyMySQL==1.1.2
+PyYAML==6.0.3
+schedule==1.2.2
+tenacity==9.1.4
+user_agent==0.1.14