lei.chen 6 сар өмнө
parent
commit
74aa24860b

+ 74 - 0
leka_spider/YamlLoader.py

@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+#
+import os, re
+import yaml
+
+regex = re.compile(r'^\$\{(?P<ENV>[A-Z_\-]+\:)?(?P<VAL>[\w\.]+)\}$')
+
+class YamlConfig:
+    def __init__(self, config):
+        self.config = config
+
+    def get(self, key:str):
+        return YamlConfig(self.config.get(key))
+    
+    def getValueAsString(self, key: str):
+        try:
+            match = regex.match(self.config[key])
+            group = match.groupdict()
+            if group['ENV'] != None:
+                env = group['ENV'][:-1]
+                return os.getenv(env, group['VAL'])
+            return None
+        except:
+            return self.config[key]
+    
+    def getValueAsInt(self, key: str):
+        try:
+            match = regex.match(self.config[key])
+            group = match.groupdict()
+            if group['ENV'] != None:
+                env = group['ENV'][:-1]
+                return int(os.getenv(env, group['VAL']))
+            return 0
+        except:
+            return int(self.config[key])
+        
+    def getValueAsBool(self, key: str, env: str = None):
+        try:
+            match = regex.match(self.config[key])
+            group = match.groupdict()
+            if group['ENV'] != None:
+                env = group['ENV'][:-1]
+                return bool(os.getenv(env, group['VAL']))
+            return False
+        except:
+            return bool(self.config[key])
+    
+def readYaml(path:str = 'application.yml', profile:str = None) -> YamlConfig:
+    if os.path.exists(path):
+        with open(path) as fd:
+            conf = yaml.load(fd, Loader=yaml.FullLoader)
+
+    if profile != None:
+        result = path.split('.')
+        profiledYaml = f'{result[0]}-{profile}.{result[1]}'
+        if os.path.exists(profiledYaml):
+            with open(profiledYaml) as fd:
+                conf.update(yaml.load(fd, Loader=yaml.FullLoader))
+
+    return YamlConfig(conf)
+
+# res = readYaml()
+# mysqlConf = res.get('mysql')
+# print(mysqlConf)
+
+# print(res.getValueAsString("host"))
+# mysqlYaml = mysqlConf.getValueAsString("host")
+# print(mysqlYaml)
+# host = mysqlYaml.get("host").split(':')[-1][:-1]
+# port = mysqlYaml.get("port").split(':')[-1][:-1]
+# username = mysqlYaml.get("username").split(':')[-1][:-1]
+# password = mysqlYaml.get("password").split(':')[-1][:-1]
+# mysql_db = mysqlYaml.get("db").split(':')[-1][:-1]
+# print(host,port,username,password)

+ 6 - 0
leka_spider/application.yml

@@ -0,0 +1,6 @@
+mysql:
+  host: ${MYSQL_HOST:100.64.0.25}
+  port: ${MYSQL_PROT:3306}
+  username: ${MYSQL_USERNAME:crawler}
+  password: ${MYSQL_PASSWORD:Pass2022}
+  db: ${MYSQL_DATABASE:crawler}

+ 128 - 0
leka_spider/leka_history_spider.py

@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/3/25 18:47
+import time
+
+from mysql_pool import MySQLConnectionPool
+from settings import *
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
+def get_history_sold_one_page(log, shop_id, sql_pool, page, token):
+    """
+    获取指定页面的已售数据
+    :param log: logger对象
+    :param shop_id: 商家id
+    :param sql_pool: sql_pool对象
+    :param page: 当前页码
+    :param token: token
+    :return: 所有页码, totalPage
+    """
+    url = "https://api.luckycards.com.cn/api/front/c/product/merchantProductShowList"
+    data = {
+        # "merchantCode": "81366056",
+        "merchantCode": shop_id,
+        "page": page,
+        "saleStatus": 2
+    }
+    response = make_request(log, "POST", url, data=data, token=token)
+
+    if not response:
+        log.warning(f" get_history_sold_one_page for {shop_id}: Empty response")
+        return 1
+    resp_data_ = response.get("data", {})
+    totalPage = resp_data_.get("totalPage", 1)
+    resp_data_list = resp_data_.get("list", [])
+    if not resp_data_list:
+        log.warning(f" get_history_sold_one_page for {shop_id}: Empty response")
+    else:
+        all_in_db = True
+        for resp_data in resp_data_list:
+            product_id = resp_data.get("code")
+            if not product_id:
+                log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No product_id found")
+                continue
+
+            # 查询商品id在不在数据库中
+            sql_exists_flag = """SELECT EXISTS (SELECT 1 FROM leka_product_record WHERE product_id = %s) AS exists_flag"""
+            exists_flag = sql_pool.select_one(sql_exists_flag, (product_id,))
+            exists_flag = exists_flag[0]
+            if exists_flag == 1:
+                log.info(
+                    f"----------------- The product_id {product_id} is already in the database, Not need save -----------------")
+            else:
+                all_in_db = False
+                try:
+                    get_product_details(log, product_id, sql_pool, token)
+                except Exception as e:
+                    log.error(f"Error fetching product {product_id}: {e}")
+                    continue
+        if page < 5 and all_in_db:
+            # if page == 1 and all_in_db:
+            return -1  # 特定标志值,表示第一页数据全在数据库中
+    return totalPage
+
+
+def get_history_all_sold(log, sql_pool, shop_id, token):
+    """
+    获取店铺历史 sold 信息
+    :param log: logger对象
+    :param sql_pool: sql_pool对象
+    :param shop_id: 商家id
+    :param token: token
+    """
+    page = 1
+    while True:
+        log.info(f"----------------- The shop_id: {shop_id}, page: {page} is start -----------------")
+        totalPage = get_history_sold_one_page(log, shop_id, sql_pool, page, token)
+
+        if totalPage == -1:  # 检查特定标志值, 方便断点续爬
+            log.info(f"----------------- The shop_id: {shop_id}, 第一页数据全在数据库中,跳过后续页 -----------------")
+            break
+        if page >= totalPage:
+            break
+        page += 1
+
+
+@retry(stop=stop_after_attempt(50), wait=wait_fixed(1800), after=after_log)
+def leka_history_main(log):
+    """
+    主函数
+    :param log: logger对象
+    """
+    log.info(
+        f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................')
+
+    # 配置 MySQL 连接池
+    sql_pool = MySQLConnectionPool(log=log)
+    if not sql_pool.check_pool_health():
+        log.error("数据库连接池异常")
+        raise RuntimeError("数据库连接池异常")
+
+    try:
+        token = sql_pool.select_one("SELECT token FROM leka_token")
+        token = token[0]
+        shop_id_list = sql_pool.select_all("SELECT DISTINCT shop_id FROM leka_shop_record")
+        shop_id_list = [pid[0] for pid in shop_id_list]
+        for shop_id in shop_id_list:
+            try:
+                get_history_all_sold(log, sql_pool, shop_id, token)
+            except Exception as e:
+                log.error(f"Error fetching shop_id {shop_id}: {e}")
+                continue
+        time.sleep(60)
+        # time.sleep(60)
+        get_players(log, sql_pool, token)
+        time.sleep(60)
+        get_reports(log, sql_pool, token)
+    except Exception as e:
+        log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
+    finally:
+        log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
+
+
+if __name__ == '__main__':
+    leka_history_main(logger)
+    # sql_pool_ = MySQLConnectionPool(log=logger)
+    # get_history_sold_one_page(logger, "1896238", sql_pool_, 1)

+ 263 - 0
leka_spider/leka_new_daily_spider.py

@@ -0,0 +1,263 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/3/24 14:02
+import random
+import time
+from mysql_pool import MySQLConnectionPool
+from settings import *
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
+def get_all_sold_one_page(log, page: int, last_id, lastSalePrice):
+    """
+    获取指定页面的已售数据
+    :param log: logger对象
+    :param page: 页码
+    :param last_id: last_id
+    :param lastSalePrice: lastSalePrice
+    :return: 该页的数据, totalPage, total
+    """
+    log.info(f"Starting < get_all_sold_one_page > to fetch page {page}")
+    url = "https://api.luckycards.com.cn/api/front/c/product/productShowList"
+    data = {
+        "lastId": last_id,
+        "lastSalePrice": lastSalePrice,
+        "limit": 20,
+        "openMode": "",
+        "page": page,
+        "saleStatus": "2",
+        "sort": "0"
+    }
+
+    try:
+        response = make_request(log, 'POST', url, data=data)
+        # print(response)
+        if response:
+            items = response["data"]["list"]
+            total_page = response["data"]["totalPage"]
+            total = response["data"]["total"]
+            log.info(f"Successfully fetched page {page}: {len(items)} items")
+            return items, total_page, total
+        else:
+            return [], 0, 0
+    except requests.exceptions.RequestException as e:
+        log.error(f"Error fetching page {page}: {e}")
+        raise e
+    except ValueError as e:
+        log.error(f"Error parsing JSON for page {page}: {e}")
+        raise e
+
+
+def get_shop_detail(log, shop_id):
+    """
+    获取店铺详情信息
+    :param log:
+    :param shop_id:
+    :return:
+    """
+    log.info(f"Start fetching shop {shop_id}")
+    url = f"https://api.luckycards.com.cn/api/front/c/merchant/{shop_id}"
+    try:
+        response = make_request(log, 'GET', url)
+        if response:
+            item = response["data"]
+            fans_num = item.get("fansNum")
+            group_num = item.get("salesQuantity")
+            create_time = item.get("createTime")
+            log.info(f"Successfully fetched shop {shop_id}")
+            return fans_num, group_num, create_time
+        else:
+            return None, None, None
+    except Exception as e:
+        log.error(f"Error fetching shop {shop_id}: {e}")
+        return None, None, None
+
+
+def parse_shop_items(log, items, sql_pool, sql_shop_list):
+    if not items:
+        log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No items found")
+        return
+
+    for item in items:
+        shop_id = item.get("merchantCode")
+        # fans_num, group_num, create_time = get_shop_detail(log, shop_id)
+
+        # 查询商家id在不在数据库中  如果在数据库中则更新数据 不在数据库中则插入数据
+        # sql_exists_flag = """SELECT EXISTS (SELECT 1 FROM leka_shop_record WHERE shop_id = %s) AS exists_flag"""
+        # exists_flag = sql_pool.select_one(sql_exists_flag, (shop_id,))
+        # exists_flag = exists_flag[0]
+        # if exists_flag == 1:
+
+        if shop_id in sql_shop_list:
+            log.debug(
+                f"----------------- The shop_id {shop_id} is already in the database, Not need save -----------------")
+            # sql_pool.update_one(
+            #     "UPDATE leka_shop_record SET fans_num = %s, group_num = %s, create_time = %s WHERE shop_id = %s",
+            #     (fans_num, group_num, create_time, shop_id))
+        else:
+            fans_num, group_num, create_time = get_shop_detail(log, shop_id)
+
+            shop_name = item.get("merchantName")
+            shop_info_dict = {
+                "shop_id": shop_id,
+                "shop_name": shop_name,
+                "fans_num": fans_num,
+                "group_num": group_num,
+                "create_time": create_time
+            }
+            sql_pool.insert_one_or_dict("leka_shop_record", shop_info_dict)
+            sql_shop_list.append(shop_id)
+
+
+def get_product(log, items, sql_pool, last_product_id, sql_product_id_list):
+    if not items:
+        log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No items found")
+        return
+
+    stop_page = True
+    for item in items:
+        product_id = item.get("code")
+        if not product_id:
+            log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No product_id found")
+            continue
+
+        # 查询商家id在不在数据库中
+        # sql_exists_flag = """SELECT EXISTS (SELECT 1 FROM leka_product_record WHERE product_id = %s) AS exists_flag"""
+        # exists_flag = sql_pool.select_one(sql_exists_flag, (product_id,))
+        # exists_flag = exists_flag[0]
+        # if exists_flag == 1:
+
+        if product_id in sql_product_id_list:
+            log.debug(
+                f"----------------- The product_id {product_id} is already in the database, Not need save -----------------")
+        else:
+            sql_pool.insert_one_or_dict("leka_product_record", {"product_id": product_id})
+            sql_product_id_list.append(product_id)
+            # try:
+            #     get_product_details(log, product_id, sql_pool)
+            #
+            #     # get_player_list(log, product_id, sql_pool)
+            # except Exception as e:
+            #     log.error(f"Error fetching product {product_id}: {e}")
+            #     continue
+
+        # 判断是否是昨天的最后一条id, 如果是  则停止翻页
+        if product_id == last_product_id:
+            log.info(
+                f"----------------- The product_id {product_id} is the last product_id:{last_product_id}, stop fetching -----------------")
+            stop_page = False
+
+    return stop_page
+
+
+def get_all_sold_data(log, sql_pool, last_product_id, sql_shop_list, sql_product_id_list):
+    """
+    获取 全部类别的已售数据
+    :param sql_pool: MySQL连接池对象
+    :param log: logger对象
+    :param last_product_id: last_product_id
+    :param sql_shop_list: sql_shop_list
+    :param sql_product_id_list: sql_product_id_list
+    """
+    page = 1
+    # page = 246
+    max_page = 200
+    last_id = 0
+    lastSalePrice = ''
+    while page <= max_page:
+    # while True:
+        items, total_page, total = get_all_sold_one_page(log, page, last_id, lastSalePrice)
+        if not items:
+            break
+
+        # 处理 items 数据
+        parse_shop_items(log, items, sql_pool, sql_shop_list)
+
+        stop_page = get_product(log, items, sql_pool, last_product_id, sql_product_id_list)
+        if not stop_page:
+            break
+
+        # 更新lastId为最后一条的userId
+        last_id = items[-1].get("id")
+        lastSalePrice = items[-1].get("unitPriceStr")
+        if not last_id:
+            log.error("API response missing userId in last item, cannot paginate")
+            break
+        if not lastSalePrice:
+            log.error("API response missing lastSalePrice in last item, cannot paginate")
+            break
+
+        page += 1
+        # time.sleep(random.uniform(1, 3))
+
+        log.info(f"Finished fetching all data. Total pages: {total_page}, total items: {total}")
+
+
+@retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
+def leka_main(log):
+    """
+    主函数
+    :param log: logger对象
+    """
+    log.info(
+        f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................')
+
+    # 配置 MySQL 连接池
+    sql_pool = MySQLConnectionPool(log=log)
+    if not sql_pool.check_pool_health():
+        log.error("数据库连接池异常")
+        raise RuntimeError("数据库连接池异常")
+
+    try:
+        token = sql_pool.select_one("SELECT token FROM leka_token")
+        token = token[0]
+        try:
+            # 获取最后一条pid的数据
+            last_product_id = sql_pool.select_one(
+                "SELECT product_id FROM leka_product_record ORDER BY finish_time DESC LIMIT 1")
+            last_product_id = last_product_id[0]
+
+            # 获取shop_list
+            sql_shop_list = sql_pool.select_all("SELECT shop_id FROM leka_shop_record")
+            sql_shop_list = [item[0] for item in sql_shop_list]
+
+            # 获取 product_id_list
+            sql_product_id_list = sql_pool.select_all("SELECT product_id FROM leka_product_record")
+            sql_product_id_list = [item[0] for item in sql_product_id_list]
+
+            get_all_sold_data(log, sql_pool, last_product_id, sql_shop_list, sql_product_id_list)
+            sql_shop_list.clear()
+            sql_product_id_list.clear()
+        except Exception as e:
+            log.error(f"Error fetching last_product_id: {e}")
+        time.sleep(5)
+
+        # 获取商品详情
+        try:
+            get_product_detail_list(log, sql_pool,token)
+        except Exception as e:
+            log.error(f"Error fetching product_detail_list: {e}")
+        time.sleep(5)
+
+        # 获取商品玩家
+        try:
+            get_players(log, sql_pool,token)
+        except Exception as e:
+            log.error(f"Error fetching players: {e}")
+        time.sleep(5)
+
+        #获取拆卡报告
+        try:
+            get_reports(log, sql_pool,token)
+        except Exception as e:
+            log.error(f"Error fetching reports: {e}")
+    except Exception as e:
+        log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
+    finally:
+        log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
+
+
+if __name__ == '__main__':
+    leka_main(logger)
+    # get_all_sold_one_page(logger, 1,  0, '')

+ 69 - 0
leka_spider/leke_login.py

@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/3/24 17:53
+import requests
+# from loguru import logger
+from mysql_pool import MySQLConnectionPool
+from settings import *
+
+# logger.remove()
+# logger.add("./logs/login_{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
+#            format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
+#            level="DEBUG", retention="7 day")
+
+
+def get_token(log):
+    log.debug("Start fetching token...........")
+    headers = {
+        "User-Agent": "Dart/3.5 (dart:io)",
+        "Accept-Encoding": "gzip",
+        "Content-Type": "application/json",
+        "deviceid": "763f77b1-cc16-4369-ac39-a03206ecfb48",
+        "brand": "Redmi",
+        "os": "android",
+        "content-type": "application/json; charset=utf-8",
+        "authori-zation": "",
+        "systemversion": "32",
+        "lang": "zh",
+        "verse-ua": "12fe307ef8850bdd64f6dcf1986944ae",
+        "version": "1.3.0",
+        "isphysicaldevice": "true",
+        "cid": ""
+    }
+    url = "https://api.luckycards.com.cn/api/front/login/pwd"
+    data = {
+        "phone": "19521500850",
+        "pwd": "Pass2025",
+        "spreaderInviteCode": ""
+    }
+    response = requests.post(url, headers=headers, json=data)
+    # print(response.text)
+    resp_json = response.json()
+    if resp_json["code"] == 200:
+        token = resp_json["data"]["token"]
+        log.debug("Fetch token success!")
+        return token
+    else:
+        log.error(f"Fetch token failed! Msg:{resp_json['message']}")
+        return None
+
+@retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
+def login_main():
+    # 配置 MySQL 连接池
+    sql_pool = MySQLConnectionPool(log=logger)
+    if not sql_pool.check_pool_health():
+        logger.error("数据库连接池异常")
+        raise RuntimeError("数据库连接池异常")
+
+    token = get_token(logger)
+    if token is None:
+        logger.error("获取token失败")
+        raise RuntimeError("获取token失败")
+    else:
+        sql_pool.update_one_or_dict(table="leka_token", data={"token": token}, condition={"id": 1})
+        logger.debug("------------------ 更新token成功 ------------------")
+
+
+if __name__ == '__main__':
+    login_main()

+ 461 - 0
leka_spider/mysql_pool.py

@@ -0,0 +1,461 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/3/25 14:14
+import re
+import pymysql
+import YamlLoader
+from loguru import logger
+from dbutils.pooled_db import PooledDB
+
+# 获取yaml配置
+yaml = YamlLoader.readYaml()
+mysqlYaml = yaml.get("mysql")
+sql_host = mysqlYaml.getValueAsString("host")
+sql_port = mysqlYaml.getValueAsInt("port")
+sql_user = mysqlYaml.getValueAsString("username")
+sql_password = mysqlYaml.getValueAsString("password")
+sql_db = mysqlYaml.getValueAsString("db")
+
+
+class MySQLConnectionPool:
+    """
+    MySQL连接池
+    """
+
+    def __init__(self, mincached=4, maxcached=5, maxconnections=10, log=None):
+        """
+        初始化连接池
+        :param mincached: 初始化时,链接池中至少创建的链接,0表示不创建
+        :param maxcached: 池中空闲连接的最大数目(0 或 None 表示池大小不受限制)
+        :param maxconnections: 允许的最大连接数(0 或 None 表示任意数量的连接)
+        :param log: 自定义日志记录器
+        """
+        # 使用 loguru 的 logger,如果传入了其他 logger,则使用传入的 logger
+        self.log = log or logger
+        self.pool = PooledDB(
+            creator=pymysql,
+            mincached=mincached,
+            maxcached=maxcached,
+            maxconnections=maxconnections,
+            blocking=True,  # 连接池中如果没有可用连接后,是否阻塞等待。True,等待;False,不等待然后报错
+            host=sql_host,
+            port=sql_port,
+            user=sql_user,
+            password=sql_password,
+            database=sql_db,
+            ping=0  # 每次连接使用时自动检查有效性(0=不检查,1=执行query前检查,2=每次执行前检查)
+        )
+
+    def _execute(self, query, args=None, commit=False):
+        """
+        执行SQL
+        :param query: SQL语句
+        :param args: SQL参数
+        :param commit: 是否提交事务
+        :return: 查询结果
+        """
+        try:
+            with self.pool.connection() as conn:
+                with conn.cursor() as cursor:
+                    cursor.execute(query, args)
+                    if commit:
+                        conn.commit()
+                    self.log.debug(f"sql _execute, Query: {query}, Rows: {cursor.rowcount}")
+                    return cursor
+        except Exception as e:
+            if commit:
+                conn.rollback()
+            self.log.error(f"Error executing query: {e}, Query: {query}, Args: {args}")
+            raise e
+
+    def select_one(self, query, args=None):
+        """
+        执行查询,返回单个结果
+        :param query: 查询语句
+        :param args: 查询参数
+        :return: 查询结果
+        """
+        cursor = self._execute(query, args)
+        return cursor.fetchone()
+
+    def select_all(self, query, args=None):
+        """
+        执行查询,返回所有结果
+        :param query: 查询语句
+        :param args: 查询参数
+        :return: 查询结果
+        """
+        cursor = self._execute(query, args)
+        return cursor.fetchall()
+
+    def insert_one(self, query, args):
+        """
+        执行单条插入语句
+        :param query: 插入语句
+        :param args: 插入参数
+        """
+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        cursor = self._execute(query, args, commit=True)
+        return cursor.lastrowid  # 返回插入的ID
+
+    def insert_all(self, query, args_list):
+        """
+        执行批量插入语句,如果失败则逐条插入
+        :param query: 插入语句
+        :param args_list: 插入参数列表
+        """
+        conn = None
+        cursor = None
+        try:
+            conn = self.pool.connection()
+            cursor = conn.cursor()
+            cursor.executemany(query, args_list)
+            conn.commit()
+            self.log.debug(f"sql insert_all, SQL: {query}, Rows: {len(args_list)}")
+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_all 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        except Exception as e:
+            conn.rollback()
+            self.log.error(f"Batch insertion failed after 5 attempts. Trying single inserts. Error: {e}")
+            # 如果批量插入失败,则逐条插入
+            rowcount = 0
+            for args in args_list:
+                self.insert_one(query, args)
+                rowcount += 1
+            self.log.debug(f"Batch insertion failed. Inserted {rowcount} rows individually.")
+        finally:
+            if cursor:
+                cursor.close()
+            if conn:
+                conn.close()
+
+    def insert_one_or_dict(self, table=None, data=None, query=None, args=None, commit=True):
+        """
+        单条插入(支持字典或原始SQL)
+        :param table: 表名(字典插入时必需)
+        :param data: 字典数据 {列名: 值}
+        :param query: 直接SQL语句(与data二选一)
+        :param args: SQL参数(query使用时必需)
+        :param commit: 是否自动提交
+        :return: 最后插入ID
+        """
+        if data is not None:
+            if not isinstance(data, dict):
+                raise ValueError("Data must be a dictionary")
+
+            keys = ', '.join([self._safe_identifier(k) for k in data.keys()])
+            values = ', '.join(['%s'] * len(data))
+            query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
+            args = tuple(data.values())
+        elif query is None:
+            raise ValueError("Either data or query must be provided")
+
+        cursor = self._execute(query, args, commit)
+        self.log.info(f"sql insert_one_or_dict, Table: {table}, Rows: {cursor.rowcount}")
+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one_or_dict 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        return cursor.lastrowid
+
+    def insert_many(self, table=None, data_list=None, query=None, args_list=None, batch_size=500, commit=True):
+        """
+        批量插入(支持字典列表或原始SQL)
+        :param table: 表名(字典插入时必需)
+        :param data_list: 字典列表 [{列名: 值}]
+        :param query: 直接SQL语句(与data_list二选一)
+        :param args_list: SQL参数列表(query使用时必需)
+        :param batch_size: 分批大小
+        :param commit: 是否自动提交
+        :return: 影响行数
+        """
+        if data_list is not None:
+            if not data_list or not isinstance(data_list[0], dict):
+                raise ValueError("Data_list must be a non-empty list of dictionaries")
+
+            keys = ', '.join([self._safe_identifier(k) for k in data_list[0].keys()])
+            values = ', '.join(['%s'] * len(data_list[0]))
+            query = f"INSERT INTO {self._safe_identifier(table)} ({keys}) VALUES ({values})"
+            args_list = [tuple(d.values()) for d in data_list]
+        elif query is None:
+            raise ValueError("Either data_list or query must be provided")
+
+        total = 0
+        for i in range(0, len(args_list), batch_size):
+            batch = args_list[i:i + batch_size]
+            try:
+                with self.pool.connection() as conn:
+                    with conn.cursor() as cursor:
+                        cursor.executemany(query, batch)
+                        if commit:
+                            conn.commit()
+                        total += cursor.rowcount
+                        # self.log.debug(f"sql insert_many_or_dict, SQL: {query}, Rows: {cursor.rowcount}")
+            except Exception as e:
+                if commit:
+                    conn.rollback()
+                self.log.error(f"Batch insert failed: {e}")
+                # 降级为单条插入
+                for args in batch:
+                    try:
+                        self.insert_one_or_dict(table=None, query=query, args=args, commit=commit)
+                        total += 1
+                    except Exception as e2:
+                        self.log.error(f"Single insert failed: {e2}")
+                        continue
+        self.log.info(f"sql insert_many_or_dict, Table: {table}, Total Rows: {total}")
+        return total
+
+    def insert_too_many(self, query, args_list, batch_size=1000):
+        """
+        执行批量插入语句,分片提交, 单次插入大于十万+时可用, 如果失败则降级为逐条插入
+        :param query: 插入语句
+        :param args_list: 插入参数列表
+        :param batch_size: 每次插入的条数
+        """
+        for i in range(0, len(args_list), batch_size):
+            batch = args_list[i:i + batch_size]
+            try:
+                with self.pool.connection() as conn:
+                    with conn.cursor() as cursor:
+                        cursor.executemany(query, batch)
+                        conn.commit()
+            except Exception as e:
+                self.log.error(f"insert_too_many error. Trying single insert. Error: {e}")
+                # 当前批次降级为单条插入
+                for args in batch:
+                    self.insert_one(query, args)
+
+    def update_one(self, query, args):
+        """
+        执行单条更新语句
+        :param query: 更新语句
+        :param args: 更新参数
+        """
+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_one 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        return self._execute(query, args, commit=True)
+
+    def update_all(self, query, args_list):
+        """
+        执行批量更新语句,如果失败则逐条更新
+        :param query: 更新语句
+        :param args_list: 更新参数列表
+        """
+        conn = None
+        cursor = None
+        try:
+            conn = self.pool.connection()
+            cursor = conn.cursor()
+            cursor.executemany(query, args_list)
+            conn.commit()
+            self.log.debug(f"sql update_all, SQL: {query}, Rows: {len(args_list)}")
+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_all 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        except Exception as e:
+            conn.rollback()
+            self.log.error(f"Error executing query: {e}")
+            # 如果批量更新失败,则逐条更新
+            rowcount = 0
+            for args in args_list:
+                self.update_one(query, args)
+                rowcount += 1
+            self.log.debug(f'Batch update failed. Updated {rowcount} rows individually.')
+        finally:
+            if cursor:
+                cursor.close()
+            if conn:
+                conn.close()
+
+    def update_one_or_dict(self, table=None, data=None, condition=None, query=None, args=None, commit=True):
+        """
+        单条更新(支持字典或原始SQL)
+        :param table: 表名(字典模式必需)
+        :param data: 字典数据 {列名: 值}(与 query 二选一)
+        :param condition: 更新条件,支持以下格式:
+            - 字典: {"id": 1} → "WHERE id = %s"
+            - 字符串: "id = 1" → "WHERE id = 1"(需自行确保安全)
+            - 元组: ("id = %s", [1]) → "WHERE id = %s"(参数化查询)
+        :param query: 直接SQL语句(与 data 二选一)
+        :param args: SQL参数(query 模式下必需)
+        :param commit: 是否自动提交
+        :return: 影响行数
+        :raises: ValueError 参数校验失败时抛出
+        """
+        # 参数校验
+        if data is not None:
+            if not isinstance(data, dict):
+                raise ValueError("Data must be a dictionary")
+            if table is None:
+                raise ValueError("Table name is required for dictionary update")
+            if condition is None:
+                raise ValueError("Condition is required for dictionary update")
+
+            # 构建 SET 子句
+            set_clause = ", ".join([f"{self._safe_identifier(k)} = %s" for k in data.keys()])
+            set_values = list(data.values())
+
+            # 解析条件
+            condition_clause, condition_args = self._parse_condition(condition)
+            query = f"UPDATE {self._safe_identifier(table)} SET {set_clause} WHERE {condition_clause}"
+            args = set_values + condition_args
+
+        elif query is None:
+            raise ValueError("Either data or query must be provided")
+
+        # 执行更新
+        cursor = self._execute(query, args, commit)
+        self.log.debug(
+            f"Updated table={table}, rows={cursor.rowcount}, query={query[:100]}...",
+            extra={"table": table, "rows": cursor.rowcount}
+        )
+        return cursor.rowcount
+
+    def _parse_condition(self, condition):
+        """
+        解析条件为 (clause, args) 格式
+        :param condition: 字典/字符串/元组
+        :return: (str, list) SQL 子句和参数列表
+        """
+        if isinstance(condition, dict):
+            clause = " AND ".join([f"{self._safe_identifier(k)} = %s" for k in condition.keys()])
+            args = list(condition.values())
+        elif isinstance(condition, str):
+            clause = condition  # 注意:需调用方确保安全
+            args = []
+        elif isinstance(condition, (tuple, list)) and len(condition) == 2:
+            clause, args = condition[0], condition[1]
+            if not isinstance(args, (list, tuple)):
+                args = [args]
+        else:
+            raise ValueError("Condition must be dict/str/(clause, args)")
+        return clause, args
+
+    def update_many(self, table=None, data_list=None, condition_list=None, query=None, args_list=None, batch_size=500,
+                    commit=True):
+        """
+        批量更新(支持字典列表或原始SQL)
+        :param table: 表名(字典插入时必需)
+        :param data_list: 字典列表 [{列名: 值}]
+        :param condition_list: 条件列表(必须为字典,与data_list等长)
+        :param query: 直接SQL语句(与data_list二选一)
+        :param args_list: SQL参数列表(query使用时必需)
+        :param batch_size: 分批大小
+        :param commit: 是否自动提交
+        :return: 影响行数
+        """
+        if data_list is not None:
+            if not data_list or not isinstance(data_list[0], dict):
+                raise ValueError("Data_list must be a non-empty list of dictionaries")
+            if condition_list is None or len(data_list) != len(condition_list):
+                raise ValueError("Condition_list must be provided and match the length of data_list")
+            if not all(isinstance(cond, dict) for cond in condition_list):
+                raise ValueError("All elements in condition_list must be dictionaries")
+
+            # 获取第一个数据项和条件项的键
+            first_data_keys = set(data_list[0].keys())
+            first_cond_keys = set(condition_list[0].keys())
+
+            # 构造基础SQL
+            set_clause = ', '.join([self._safe_identifier(k) + ' = %s' for k in data_list[0].keys()])
+            condition_clause = ' AND '.join([self._safe_identifier(k) + ' = %s' for k in condition_list[0].keys()])
+            base_query = f"UPDATE {self._safe_identifier(table)} SET {set_clause} WHERE {condition_clause}"
+            total = 0
+
+            # 分批次处理
+            for i in range(0, len(data_list), batch_size):
+                batch_data = data_list[i:i + batch_size]
+                batch_conds = condition_list[i:i + batch_size]
+                batch_args = []
+
+                # 检查当前批次的结构是否一致
+                can_batch = True
+                for data, cond in zip(batch_data, batch_conds):
+                    data_keys = set(data.keys())
+                    cond_keys = set(cond.keys())
+                    if data_keys != first_data_keys or cond_keys != first_cond_keys:
+                        can_batch = False
+                        break
+                    batch_args.append(tuple(data.values()) + tuple(cond.values()))
+
+                if not can_batch:
+                    # 结构不一致,转为单条更新
+                    for data, cond in zip(batch_data, batch_conds):
+                        self.update_one_or_dict(table=table, data=data, condition=cond, commit=commit)
+                        total += 1
+                    continue
+
+                # 执行批量更新
+                try:
+                    with self.pool.connection() as conn:
+                        with conn.cursor() as cursor:
+                            cursor.executemany(base_query, batch_args)
+                            if commit:
+                                conn.commit()
+                            total += cursor.rowcount
+                            self.log.debug(f"Batch update succeeded. Rows: {cursor.rowcount}")
+                except Exception as e:
+                    if commit:
+                        conn.rollback()
+                    self.log.error(f"Batch update failed: {e}")
+                    # 降级为单条更新
+                    for args, data, cond in zip(batch_args, batch_data, batch_conds):
+                        try:
+                            self._execute(base_query, args, commit=commit)
+                            total += 1
+                        except Exception as e2:
+                            self.log.error(f"Single update failed: {e2}, Data: {data}, Condition: {cond}")
+            self.log.info(f"Total updated rows: {total}")
+            return total
+        elif query is not None:
+            # 处理原始SQL和参数列表
+            if args_list is None:
+                raise ValueError("args_list must be provided when using query")
+
+            total = 0
+            for i in range(0, len(args_list), batch_size):
+                batch_args = args_list[i:i + batch_size]
+                try:
+                    with self.pool.connection() as conn:
+                        with conn.cursor() as cursor:
+                            cursor.executemany(query, batch_args)
+                            if commit:
+                                conn.commit()
+                            total += cursor.rowcount
+                            self.log.debug(f"Batch update succeeded. Rows: {cursor.rowcount}")
+                except Exception as e:
+                    if commit:
+                        conn.rollback()
+                    self.log.error(f"Batch update failed: {e}")
+                    # 降级为单条更新
+                    for args in batch_args:
+                        try:
+                            self._execute(query, args, commit=commit)
+                            total += 1
+                        except Exception as e2:
+                            self.log.error(f"Single update failed: {e2}, Args: {args}")
+            self.log.info(f"Total updated rows: {total}")
+            return total
+        else:
+            raise ValueError("Either data_list or query must be provided")
+
+    def check_pool_health(self):
+        """
+        检查连接池中有效连接数
+
+        # 使用示例
+        # 配置 MySQL 连接池
+        sql_pool = MySQLConnectionPool(log=log)
+        if not sql_pool.check_pool_health():
+            log.error("数据库连接池异常")
+            raise RuntimeError("数据库连接池异常")
+        """
+        try:
+            with self.pool.connection() as conn:
+                conn.ping(reconnect=True)
+                return True
+        except Exception as e:
+            self.log.error(f"Connection pool health check failed: {e}")
+            return False
+
+    @staticmethod
+    def _safe_identifier(name):
+        """SQL标识符安全校验"""
+        if not re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name):
+            raise ValueError(f"Invalid SQL identifier: {name}")
+        return name

+ 9 - 0
leka_spider/requirements.txt

@@ -0,0 +1,9 @@
+-i https://mirrors.aliyun.com/pypi/simple/
+bs4==0.0.2
+DBUtils==3.1.0
+loguru==0.7.3
+PyMySQL==1.1.1
+PyYAML==6.0.2
+requests==2.32.3
+schedule==1.2.2
+tenacity==9.0.0

+ 600 - 0
leka_spider/settings.py

@@ -0,0 +1,600 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/3/24 15:05
+import inspect
+import requests
+from loguru import logger
+from bs4 import BeautifulSoup
+from tenacity import retry, stop_after_attempt, wait_fixed
+
+logger.remove()
+logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
+           format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
+           level="DEBUG", retention="7 day")
+
+HEADERS = {
+    "User-Agent": "Dart/3.5 (dart:io)",
+    "Accept-Encoding": "gzip",
+    "Content-Type": "application/json",
+    "deviceid": "763f77b1-cc16-4369-ac39-a03206ecfb48",
+    "brand": "Redmi",
+    "os": "android",
+    "content-type": "application/json; charset=utf-8",
+    "authori-zation": "",
+    "systemversion": "32",
+    "theme": "dark",
+    "lang": "zh",
+    "verse-ua": "d7b3b338008806f1b20427173b983e29",
+    "version": "1.3.0",
+    "isphysicaldevice": "true",
+    "cid": "02931506",
+    "sktime": "1746343207832",
+    "sk": "fe8a84f5e1ff81813d9a998d72d1cd99"
+}
+# headers = {
+#     "User-Agent": "Dart/3.5 (dart:io)",
+#     "Accept-Encoding": "gzip",
+#     "Content-Type": "application/json",
+#     "deviceid": "763f77b1-cc16-4369-ac39-a03206ecfb48",
+#     "brand": "Redmi",
+#     "os": "android",
+#     "content-type": "application/json; charset=utf-8",
+#     "authori-zation": "a-22695f440cc94df28b39f3e804696112",
+#     "systemversion": "32",
+#     "theme": "dark",
+#     "lang": "zh",
+#     "verse-ua": "d7b3b338008806f1b20427173b983e29",
+#     "version": "1.3.0",
+#     "isphysicaldevice": "true",
+#     "sktime": "1746343207832",
+#     "cid": "02931506",
+#     "sk": "fe8a84f5e1ff81813d9a998d72d1cd99"
+# }
+
+
+def after_log(retry_state):
+    """
+    retry 回调
+    :param retry_state: RetryCallState 对象
+    """
+    # 检查 args 是否存在且不为空
+    if retry_state.args and len(retry_state.args) > 0:
+        log = retry_state.args[0]  # 获取传入的 logger
+    else:
+        log = logger  # 使用全局 logger
+
+    if retry_state.outcome.failed:
+        log.warning(
+            f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
+    else:
+        log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
+def get_proxys(log):
+    """
+    获取代理
+    :return: 代理
+    """
+    tunnel = "x371.kdltps.com:15818"
+    kdl_username = "t13753103189895"
+    kdl_password = "o0yefv6z"
+    try:
+        proxies = {
+            "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
+            "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
+        }
+        return proxies
+    except Exception as e:
+        log.error(f"Error getting proxy: {e}")
+        raise e
+
+
+# def save_shop_list(sql_pool, shop_list):
+#     """
+#     保存店铺数据
+#     :param sql_pool:
+#     :param shop_list:
+#     """
+#     sql = "INSERT INTO leka_shop_record (shop_id, shop_name, fans_num, group_num, create_time) VALUES (%s, %s, %s, %s, %s)"
+#     sql_pool.insert_all(sql, shop_list)
+
+
+# def save_product_list(sql_pool, product_list):
+#     """
+#     保存商品数据
+#     :param sql_pool:
+#     :param product_list:
+#     """
+#     sql = "INSERT INTO leka_product_record (product_id, no, create_time, title, img, price_sale, total_price, sale_num, spec_config, sort, state, shop_id, shop_name, category, on_sale_time, end_time, finish_time, video_url) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
+#     sql_pool.insert_one(sql, product_list)
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
+def make_request(log, method, url, params=None, data=None, headers=None, proxies=None, timeout=5, token=None):
+    """
+    通用请求函数
+    :param log: logger对象
+    :param method: 请求方法 ('GET' 或 'POST')
+    :param url: 请求的URL
+    :param params: GET请求的查询参数
+    :param data: POST请求的数据
+    :param headers: 请求头
+    :param proxies: 代理
+    :param timeout: 请求超时时间
+    :param token: token
+    :return: 响应的JSON数据
+    """
+    if headers is None:
+        headers = HEADERS
+        if 'getHitCardReport' or 'getCardPublicly' in url:
+            if not token:
+                token = "a-22695f440cc94df28b39f3e804696112"
+            headers["authori-zation"] = token
+
+    if proxies is None:
+        proxies = get_proxys(log)
+
+    try:
+        with requests.Session() as session:
+            if method.upper() == 'GET':
+                if proxies is None:
+                    response = session.get(url, headers=headers, params=params, timeout=timeout)
+                else:
+                    response = session.get(url, headers=headers, params=params, proxies=proxies, timeout=timeout)
+            elif method.upper() == 'POST':
+                if proxies is None:
+                    response = session.post(url, headers=headers, json=data, timeout=timeout)
+                    # print(response.text)
+                else:
+                    response = session.post(url, headers=headers, json=data, proxies=proxies, timeout=timeout)
+            else:
+                log.error(f"Unsupported request method: {method}")
+                return None
+
+            response.raise_for_status()
+            data = response.json()
+            if data["code"] == 200:
+                log.info(f"Successfully fetched {method} request to {url}")
+                return data
+            else:
+                log.warning(f"Warning {inspect.currentframe().f_code.co_name}: {data['message']}")
+                return None
+    except requests.exceptions.RequestException as e:
+        log.error(f"Error making {method} request to {url}: {e}")
+        raise e
+    except ValueError as e:
+        log.error(f"Error parsing JSON for {method} request to {url}: {e}")
+        raise e
+    except Exception as e:
+        log.error(f"Error making {method} request to {url}: {e}")
+        raise e
+
+
+def get_play_back(log, product_id, token):
+    """
+    获取 视频回放链接
+    :param log: logger对象
+    :param product_id: product_id
+    :param token: token
+    """
+    log.info(f"Starting to fetch playback for product_id {product_id}")
+    url = "https://api.luckycards.com.cn/api/front/c/product/productDetailDynamics"
+    params = {
+        # "code": "LCS1254174"
+        "code": product_id
+    }
+    try:
+        response = make_request(log, 'GET', url, params=params, token=token)
+        if response:
+            items = response.get("data", {})
+            normalLiving = items.get("normalLiving", {})
+            playback = normalLiving.get("playback")
+            return playback
+        else:
+            return None
+    except Exception as e:
+        log.error(f"Error fetching playback {product_id}: {e}")
+        return None
+
+
+def clean_texts(html_text):
+    """
+    使用 BeautifulSoup 解析并获取纯文本
+    :param html_text: 待解析的HTML格式的数据
+    :return: clean_text -> 解析后的数据
+    """
+    if not html_text:
+        return ""
+    soup = BeautifulSoup(html_text, 'html.parser')
+    # clean_text = soup.get_text(separator=' ', strip=True)
+    clean_text = soup.get_text(strip=True)
+    # 替换 &nbsp; 为普通空格
+    clean_text = clean_text.replace('&nbsp;', ' ')
+    return clean_text
+
+
+def parse_product_items(log, items, sql_pool, product_id, token):
+    """
+    解析 产品信息
+    :param log: logger对象
+    :param items: 请求response
+    :param sql_pool: MySQL连接池对象
+    :param product_id: product_id
+    :param token: token
+    """
+    if not items:
+        log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No items found")
+        return
+    no = items.get("id")
+    create_time = items.get("publishTime")
+    title = items.get("productName")
+    img = items.get("productImageIndex")
+    price_sale = items.get("unitPriceStr")
+    total_price = items.get("totalSalePrice")
+    sale_num = items.get("saleCount")  # 售出数量
+    spec_config = items.get("hitCardStandard")  # 规格
+    sort = items.get("series")  # 分类  0:全部  1:原盒  2:幸运盒  3:福盒?
+    state = items.get("status")
+    shop_id = items.get("merchantCode")
+    shop_name = items.get("merchantName")
+    category = items.get("brandId")
+    on_sale_time = items.get("onlineTime")
+    end_time = items.get("endTime")
+    finish_time = items.get("finishTime")
+    # content = items.get("purchaseNotes")
+    # if content:
+    #     content = content.replace("<p>", "").replace("</p>", "")
+    # brief = items.get("brief")
+    product_detail = items.get("productDetail")
+    if product_detail:
+        product_detail = clean_texts(product_detail)
+        # print('product_detail:',product_detail)
+
+    video_url = get_play_back(log, product_id, token)
+
+    hit_card_desc = items.get("hitCardDesc")  # 赠品介绍
+    open_mode = items.get("openMode")  # 随机球队
+    open_mode_comment = items.get("openModeComment")  # 随机球队 说明
+
+    random_mode = items.get("randomMode")  # 即买即随
+    random_mode_comment = items.get("randomModeComment")  # 即买即随 说明
+
+    info_dict = {
+        "no": no,
+        "create_time": create_time,
+        "title": title,
+        "img": img,
+        "price_sale": price_sale,
+        "total_price": total_price,
+        "sale_num": sale_num,
+        "spec_config": spec_config,
+        "sort": sort,
+        "state": state,
+        "shop_id": shop_id,
+        "shop_name": shop_name,
+        "category": category,
+        "on_sale_time": on_sale_time,
+        "end_time": end_time,
+        "finish_time": finish_time,
+        "product_detail": product_detail,
+        "video_url": video_url,
+        "hit_card_desc": hit_card_desc,
+        "open_mode": open_mode,
+        "open_mode_comment": open_mode_comment,
+        "random_mode": random_mode,
+        "random_mode_comment": random_mode_comment,
+    }
+    # print(info_dict)
+    # sql_pool.insert_one_or_dict(table="leka_product_record", data=info_dict)
+    sql_pool.update_one_or_dict(table="leka_product_record", data=info_dict, condition={"product_id": product_id})
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
+def get_product_details(log, product_id, sql_pool, token):
+    """
+    获取 商品详情 单条 信息
+    :param log: logger对象
+    :param product_id: product_id
+    :param sql_pool: MySQL连接池对象
+    :param token: token
+    """
+    log.debug(f"Getting product details for {product_id}")
+    url = "https://api.luckycards.com.cn/api/front/c/product/productDetail"
+    params = {
+        # "code": "LCS1254079"
+        "code": product_id
+    }
+    try:
+        response = make_request(log, 'GET', url, params=params, token=token)
+        if response:
+            parse_product_items(log, response.get("data"), sql_pool, product_id, token)
+        else:
+            log.error(f"Error getting product details for {product_id}: {response.get('msg')}")
+    except Exception as e:
+        log.error(f"Error getting product details for {product_id}: {e}")
+
+
+def get_product_detail_list(log, sql_pool, token):
+    """
+    获取 商品详情 列表 信息
+    :param log: logger对象
+    :param sql_pool: MySQL连接池对象
+    :param token: token
+    """
+    sql_product_id_list = sql_pool.select_all("SELECT product_id FROM leka_product_record WHERE no IS NULL")
+    sql_product_id_list = [item[0] for item in sql_product_id_list]
+    for product_id in sql_product_id_list:
+        try:
+            get_product_details(log, product_id, sql_pool, token)
+        except Exception as e:
+            log.error(f"Error get_product_detail_list fetching product {product_id}: {e}")
+            continue
+
+
+def parse_player_items(log, items, sql_pool, product_id):
+    """
+    解析 卡密公示 信息
+    :param log: logger对象
+    :param items: 请求response
+    :param product_id: product_id
+    :param sql_pool: MySQL连接池对象
+    """
+    if not items:
+        log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No items found")
+        return
+    player_list = []
+    for item in items:
+        # print(item)
+        user_code = item.get("userCode")
+        user_id = item.get("userId")
+        user_name = item.get("nickName")
+        num = item.get("cardCount")
+        # info = (product_id, user_code, num, user_id, user_name)
+        info_dict = {
+            "product_id": product_id,
+            "user_code": user_code,
+            "num": num,
+            "user_id": user_id,
+            "user_name": user_name
+        }
+        # print(info_dict)
+        player_list.append(info_dict)
+    sql_pool.insert_many(table='leka_player_record', data_list=player_list)
+    sql_pool.update_one("update leka_product_record set km_state = 1 where product_id = %s", (product_id,))
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
+def get_player_list(log, product_id, sql_pool, token):
+    """
+    抓取 kami公示 信息
+    :param log: logger对象
+    :param product_id: product_id
+    :param sql_pool: MySQL连接池对象
+    :param token: token
+    """
+    log.debug(f"Getting player list for {product_id}")
+    url = "https://api.luckycards.com.cn/api/front/c/card/getCardPublicly"
+    last_id = 0  # 初始lastId为0
+    total_players = 0
+
+    while True:
+        data = {
+            "keyword": "",
+            "lastUserId": last_id,
+            "productCode": product_id,
+            "publiclyType": 2,  # 1:赠品维度 2:玩家维度
+        }
+        # print(data)
+
+        try:
+            response = make_request(log, 'POST', url, data=data, token=token)
+            if not response:
+                log.error(f"Error getting player list for {product_id}: Empty response")
+                break
+
+            items = response.get("data", [])
+            if not items:
+                log.info(f"No more players found for product {product_id}")
+                sql_pool.update_one("update leka_product_record set km_state = 3 where product_id = %s", (product_id,))
+                break
+
+            # 处理当前页数据
+            parse_player_items(log, items, sql_pool, product_id)
+            total_players += len(items)
+
+            # 如果获取数量超过50条,说明已经获取到所有数据,结束循环
+            if total_players > 50:
+                log.debug(f"Total players found for product {product_id}: {total_players}")
+                break
+
+            # 如果获取数量不足20条,说明是最后一页
+            if len(items) < 20:
+                log.info(f"Last page detected for product {product_id} (got {len(items)} items)")
+                break
+
+            # 更新lastId为最后一条的userId
+            last_id = items[-1].get("userId")
+            # print(last_id)
+            if not last_id:
+                log.error("API response missing userId in last item, cannot paginate")
+                break
+
+            # 避免频繁请求
+            # time.sleep(0.5)
+
+        except Exception as e:
+            log.error(f"Error getting player list for {product_id} at lastId {last_id}: {e}")
+            break
+
+    log.info(f"Finished fetching players for product {product_id}, total: {total_players}")
+
+
+def get_players(log, sql_pool, token):
+    """
+    抓取 kami公示 信息
+    :param log: logger对象
+    :param sql_pool: MySQL连接池对象
+    :param token: token
+    """
+    product_list = sql_pool.select_all("SELECT product_id FROM leka_product_record WHERE km_state IN (0, 3)")
+    product_list = [product_id[0] for product_id in product_list]
+
+    # token = sql_pool.select_one("SELECT token FROM leka_token")
+    # token = token[0]
+    if not product_list:
+        log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No product_id found")
+        return
+    else:
+        log.info(f"Start fetching players data. Total products: {len(product_list)}")
+        for product_id in product_list:
+            try:
+                get_player_list(log, product_id, sql_pool, token)
+            except Exception as e:
+                log.error(f"Error fetching product {product_id}: {e}")
+                continue
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
+def get_report_one_page(log, sql_pool, productCode, page, last_id, token):
+    """
+    获取 拆卡报告 单页的信息
+    :param log: logger对象
+    :param sql_pool: MySQL连接池对象
+    :param productCode: product_id
+    :param page: 页码
+    :param last_id: last_id
+    :param token: token
+    """
+    url = "https://api.luckycards.com.cn/api/front/c/card/getHitCardReport"
+    data = {
+        "keyword": "",
+        "page": page,
+        "lastId": last_id,
+        # "productCode": "LCS1254213"
+        "productCode": productCode
+    }
+    log.info(f"Getting report data for: {productCode}, Page: {page}")
+
+    try:
+        response = make_request(log, 'POST', url, data=data, token=token)
+        # print(response)
+        if response:
+            items = response.get("data", [])
+            if items:
+                info_list = []
+                for item in items:
+                    card_id = item.get("orderNo")
+                    card_name = item.get("cardSecret")
+                    create_time = item.get("drawTime")
+                    imgs = item.get("hitPic")
+                    user_id = item.get("userCode")
+                    user_name = item.get("nickName")
+                    shop_id = item.get("merchantCode")
+                    shop_name = item.get("merchantName")
+                    card_desc = item.get("hitCardDesc")
+                    # info = (card_id, card_name, create_time, imgs, user_id, user_name, shop_id, shop_name, card_desc)
+                    info_dict = {
+                        "product_id": productCode,
+                        "card_id": card_id,
+                        "card_name": card_name,
+                        "create_time": create_time,
+                        "imgs": imgs,
+                        "user_id": user_id,
+                        "user_name": user_name,
+                        "shop_id": shop_id,
+                        "shop_name": shop_name,
+                        "card_desc": card_desc
+                    }
+                    # print(info_dict)
+                    info_list.append(info_dict)
+                sql_pool.insert_many(table='leka_report_record', data_list=info_list)
+                log.info(f"Successfully saved {len(items)} report items")
+                return items[-1].get("userCode"), len(items)
+            else:
+                log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No items found")
+                sql_pool.update_one("update leka_product_record set report_state = 3 where product_id = %s",
+                                    (productCode,))
+                return 0, 0
+        else:
+            log.error(f"Error getting report data: {response.get('msg')}")
+            return 0
+    except Exception as e:
+        log.error(f"Error getting report data: {e}")
+        raise e
+
+
+def get_report_list(log, sql_pool, product_id, token):
+    """
+    抓取 拆卡报告 单个product_id 所有页码的 信息
+    :param log: logger对象
+    :param sql_pool: MySQL连接池对象
+    :param product_id: product_id
+    :param token: token
+    """
+    # log.info(f"Start fetching report data. Product id: {product_id}")
+    page = 1
+    last_id = 0
+    # while True:
+    try:
+        last_d, len_item = get_report_one_page(log, sql_pool, product_id, page, last_id, token)
+
+        # if len_item != 0 and len_item < 20:
+        log.info(f"Finished fetching report data for product {product_id}, total: {len_item}")
+        sql_pool.update_one("update leka_product_record set report_state = 1 where product_id = %s", (product_id,))
+
+        # # 如果获取数量不足20条,说明是最后一页  ***暂时没找到第二页的***
+        # if len_item < 20:
+        #     log.info(f"Last page detected for product {product_id} (got {len_item} items)")
+        #     break
+        #
+        # # 更新lastId为最后一条的userId
+        # last_id = last_d
+        # if not last_id:
+        #     log.error("API response missing userId in last item, cannot paginate")
+        #     break
+        #
+        # page += 1
+    except Exception as e:
+        log.error(f"Error getting report data: {e}")
+        # break
+
+
+def get_reports(log, sql_pool, token):
+    """
+    抓取 拆卡报告 信息
+    :param log: logger对象
+    :param sql_pool: MySQL连接池对象
+    :param token: token
+    """
+    product_list = sql_pool.select_all("SELECT product_id FROM leka_product_record WHERE report_state IN (0, 3)")
+    product_list = [product_id[0] for product_id in product_list]
+
+    # token = sql_pool.select_one("SELECT token FROM leka_token")
+    # token = token[0]
+    if not product_list:
+        log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No product_id found")
+        return
+    else:
+        log.info(f"Start fetching report data. Total products: {len(product_list)}")
+        for product_id in product_list:
+            try:
+                get_report_list(log, sql_pool, product_id, token)
+            except Exception as e:
+                log.error(f"Error fetching product {product_id}: {e}")
+                continue
+
+
+if __name__ == '__main__':
+    pass
+
+    # pid = 'LCS1254213'
+    # pid = 'LCS1253418'
+    # pid = 'LCS1256332'
+    # from mysql_pool import MySQLConnectionPool
+    # sql_pool_ = MySQLConnectionPool(log=logger)
+
+    # get_reports(logger, None)
+    # get_player_list(logger, pid, None)
+    # get_product_details(logger, 'LCS1255968', sql_pool_)

+ 52 - 0
leka_spider/start_leka_spider.py

@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/3/25 18:44
+import time
+import schedule
+import threading
+
+from leke_login import login_main
+from settings import *
+from leka_new_daily_spider import leka_main
+from leka_history_spider import leka_history_main
+
+
+def run_threaded(job_func, *args, **kwargs):
+    """
+    在新线程中运行给定的函数,并传递参数。
+
+    :param job_func: 要运行的目标函数
+    :param args: 位置参数
+    :param kwargs: 关键字参数
+    """
+    job_thread = threading.Thread(target=job_func, args=args, kwargs=kwargs)
+    job_thread.start()
+
+
+def schedule_task():
+    """
+    爬虫模块的启动文件
+    """
+    # 立即运行一次任务
+    # login_main()
+    # time.sleep(5)
+
+    # run_threaded(leka_main, log=logger)
+    # run_threaded(leka_history_main, log=logger)
+
+    # 每周一  定时更新token
+    # schedule.every(3).days.at("00:06").do(run_threaded, login_main)
+    schedule.every().day.at("00:01").do(run_threaded, login_main)
+
+    # 设置定时任务
+    schedule.every().day.at("00:06").do(run_threaded, leka_main, log=logger)
+    # schedule.every().friday.at("08:00").do(run_threaded, leka_history_main, log=logger)
+
+    while True:
+        schedule.run_pending()
+        time.sleep(1)
+
+
+if __name__ == '__main__':
+    schedule_task()