浏览代码

add 6.12.1

lei.chen 6 月之前
父节点
当前提交
f13b40268a

+ 74 - 0
kaogujia_spider/YamlLoader.py

@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+#
+import os, re
+import yaml
+
+regex = re.compile(r'^\$\{(?P<ENV>[A-Z_\-]+\:)?(?P<VAL>[\w\.]+)\}$')
+
+class YamlConfig:
+    def __init__(self, config):
+        self.config = config
+
+    def get(self, key:str):
+        return YamlConfig(self.config.get(key))
+    
+    def getValueAsString(self, key: str):
+        try:
+            match = regex.match(self.config[key])
+            group = match.groupdict()
+            if group['ENV'] != None:
+                env = group['ENV'][:-1]
+                return os.getenv(env, group['VAL'])
+            return None
+        except:
+            return self.config[key]
+    
+    def getValueAsInt(self, key: str):
+        try:
+            match = regex.match(self.config[key])
+            group = match.groupdict()
+            if group['ENV'] != None:
+                env = group['ENV'][:-1]
+                return int(os.getenv(env, group['VAL']))
+            return 0
+        except:
+            return int(self.config[key])
+        
+    def getValueAsBool(self, key: str, env: str = None):
+        try:
+            match = regex.match(self.config[key])
+            group = match.groupdict()
+            if group['ENV'] != None:
+                env = group['ENV'][:-1]
+                return bool(os.getenv(env, group['VAL']))
+            return False
+        except:
+            return bool(self.config[key])
+    
+def readYaml(path:str = 'application.yml', profile:str = None) -> YamlConfig:
+    if os.path.exists(path):
+        with open(path) as fd:
+            conf = yaml.load(fd, Loader=yaml.FullLoader)
+
+    if profile != None:
+        result = path.split('.')
+        profiledYaml = f'{result[0]}-{profile}.{result[1]}'
+        if os.path.exists(profiledYaml):
+            with open(profiledYaml) as fd:
+                conf.update(yaml.load(fd, Loader=yaml.FullLoader))
+
+    return YamlConfig(conf)
+
+# res = readYaml()
+# mysqlConf = res.get('mysql')
+# print(mysqlConf)
+
+# print(res.getValueAsString("host"))
+# mysqlYaml = mysqlConf.getValueAsString("host")
+# print(mysqlYaml)
+# host = mysqlYaml.get("host").split(':')[-1][:-1]
+# port = mysqlYaml.get("port").split(':')[-1][:-1]
+# username = mysqlYaml.get("username").split(':')[-1][:-1]
+# password = mysqlYaml.get("password").split(':')[-1][:-1]
+# mysql_db = mysqlYaml.get("db").split(':')[-1][:-1]
+# print(host,port,username,password)

+ 6 - 0
kaogujia_spider/application.yml

@@ -0,0 +1,6 @@
+mysql:
+  host: ${MYSQL_HOST:100.64.0.25}
+  port: ${MYSQL_PROT:3306}
+  username: ${MYSQL_USERNAME:crawler}
+  password: ${MYSQL_PASSWORD:Pass2022}
+  db: ${MYSQL_DATABASE:crawler}

+ 605 - 0
kaogujia_spider/kgj_kapai_spider.py

@@ -0,0 +1,605 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/2/8 13:11
+import json
+import random
+import time
+import base64
+import schedule
+import urllib3
+import requests
+from typing import Dict
+from loguru import logger
+from urllib.parse import quote
+from Crypto.Cipher import AES
+from Crypto.Util.Padding import unpad
+from datetime import datetime, timedelta
+from mysq_pool import MySQLConnectionPool
+from tenacity import retry, stop_after_attempt, wait_fixed
+
+from request_live_detail import get_live_detail
+
+urllib3.disable_warnings()
+
+logger.remove()
+logger.add("./kapai_logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
+           format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
+           level="DEBUG", retention="14 day")
+
+MAX_PAGE = 100
+
+
+# HEADERS = settings.KGJ_HEADERS
+
+
+def after_log(retry_state):
+    """
+    retry 回调
+    :param retry_state: RetryCallState 对象
+    """
+    log = retry_state.args[0]  # 获取传入的 logger
+    if retry_state.outcome.failed:
+        log.warning(
+            f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
+    else:
+        log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
+def get_proxys(log):
+    """
+    获取代理
+    :return: 代理
+    """
+    # tunnel = "h991.kdltps.com:15818"
+    # kdl_username = "t12136177769785"
+    # kdl_password = "ety9bdi8"
+
+    tunnel = "x371.kdltps.com:15818"
+    kdl_username = "t13753103189895"
+    kdl_password = "o0yefv6z"
+    try:
+        proxies = {
+            "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
+            "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
+        }
+        return proxies
+    except Exception as e:
+        log.error(f"Error getting proxy: {e}")
+        raise e
+
+
+def decimal_to_percent(decimal_value):
+    if not decimal_value:
+        return "0%"
+
+    # 将小数转换为百分比并保留一位小数
+    percentage = decimal_value * 100
+    # 使用格式化去除末尾多余的零
+    return f"{percentage:g}%"  # 'g' 格式会自动去掉不必要的零
+
+
+def decimal_to_percent_with_dist(decimal_value):
+    if not decimal_value:
+        return "0%"
+
+    # 将小数转换为百分比
+    percentage = decimal_value * 100
+
+    # 使用格式化字符串,最多保留两位小数,去掉不必要的零
+    formatted_percentage = f"{percentage:.2f}".rstrip('0').rstrip('.')
+
+    return f"{formatted_percentage}%"
+
+
+def convert_seconds_to_hours_minutes(total_seconds: int):
+    # 计算完整的小时数
+    hours = total_seconds // 3600
+    remaining_seconds_after_hours = total_seconds % 3600
+
+    # 计算完整的分钟数
+    minutes = remaining_seconds_after_hours // 60
+    result = f"{hours}小时{minutes}分" if hours > 0 else f"{minutes}分" if minutes > 0 else "0分"
+    return result
+
+
+def get_date(offset):
+    """
+    获取指定偏移量的日期,格式为 YYYYMMDD。
+
+    参数:
+    offset (int): 日期偏移量,0 表示今天,-1 表示昨天,-6 表示6天前,-7 表示7天前
+
+    返回:
+    str: 格式为 YYYYMMDD 的日期字符串
+    """
+    today = datetime.today()
+    target_date = today + timedelta(days=offset)
+    return target_date.strftime('%Y%m%d')
+
+
+def decrypt_data(log, par_url, encrypted_data) -> Dict[str, str]:
+    """
+    解密数据
+    :param log:
+    :param par_url:
+    :param encrypted_data:
+    :return:
+    """
+    log.info("开始解密数据 ->->->->->->->->->->->->->->->->->->->->->")
+    if not isinstance(par_url, str):
+        return {}
+
+    # 对应原js中的str函数
+    def transform_str(input_str):
+        encoded = quote(input_str)
+        return base64.b64encode(encoded.encode()).decode()
+
+    str_result = transform_str(par_url) * 3
+    org_key = str_result[:16]
+    org_iv = str_result[12:28]
+
+    # 使用Crypto库解析key和iv
+    ikey = org_key.encode('utf-8')
+    iiv = org_iv.encode('utf-8')
+
+    # 解密
+    cipher = AES.new(ikey, AES.MODE_CBC, iiv)
+    decrypted_text = unpad(cipher.decrypt(base64.b64decode(encrypted_data)), AES.block_size).decode('utf-8')
+    decrypted_text = json.loads(decrypted_text)
+    # print(decrypted_text)
+
+    return decrypted_text
+
+
+def save_product_list(sql_pool, info_list):
+    sql = """
+        INSERT INTO kgj_kapai_product_list_record (product_id, title, price, price_str, cos_ratio, sales, live_sales, video_sales, other_sales, live_ratio, video_ratio, other_ratio, shop_id, shop_name, shop_cover, keyword)
+        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+        """
+    sql_pool.insert_all(sql, info_list)
+
+
+def save_live_detail(sql_pool, info_list):
+    """
+    保存直播详情数据
+    :param sql_pool: 数据库连接池对象
+    :param info_list: info_list 列表
+    """
+    sql = """
+        INSERT INTO kgj_kapai_live_detail_record (room_id, live_create_time, live_finish_time, duration, watch_users, avg_users, peak_users, through, exposed_num, stay_duration, new_fans_count, inc_fans_clubs, turn_ratio, interaction_ratio, gmv, sales, atv, explain_duration, sku_count, uv, cvr, rpm, promotion_id, product_id, product_title, product_cover, product_sales, product_putaway_time, product_sold_out_time)
+        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+        """
+    sql_pool.insert_all(sql, info_list)
+
+
+def update_linked_live_state(sql_pool, sql_id):
+    """
+    更新 kgj_linked_live_record 状态
+    :param sql_pool: 数据库连接池对象
+    :param sql_id: sql_id
+    """
+    sql = f"UPDATE kgj_kapai_linked_live_record SET live_detail_state = 1 WHERE id = %s"
+    sql_pool.update_one(sql, (sql_id,))
+
+
+def parse_product_list(log, resp_json: dict, sql_pool, keyword, stop_paging):
+    log.info("开始解析 product_list 数据......................")
+    items = resp_json.get("items", [])
+
+    info_list = []
+    for item in items:
+        sales = item.get("stat", {}).get("sales")
+        # 检查 sales 是否为 0 或 '0'
+        if sales == 0 or sales == '0':
+            stop_paging[0] = True  # 设置停止翻页的标志
+            log.info("已到达销量为0的页码,停止当前产品列表的翻页..............")
+            break  # 停止解析当前页面的产品列表
+
+        product_id = item.get("product_id")
+
+        # 20250523 删除根据 product_id 去重
+        # if product_id in sql_product_id_list:
+        #     log.debug(f"{product_id} 已存在,跳过..............")
+        #     continue
+        title = item.get("title")
+        # 如果标题中 包含["卡夹", "卡砖", "卡膜", "鼠标垫"]中的任意一个,则将keyword设置为 "周边"
+        if any(keyword in title for keyword in ["卡夹", "卡砖", "卡膜", "鼠标垫"]):
+            log.debug(f"{title} 包含关键字,为周边产品..............")
+            keyword = "周边"
+
+        price = item.get("price")
+        price_str = item.get("price_str")
+        cos_ratio = item.get("cos_ratio")
+
+        live_sales = item.get("stat", {}).get("live_sales")
+        video_sales = item.get("stat", {}).get("video_sales")
+        other_sales = item.get("stat", {}).get("other_sales")
+
+        live_ratio = item.get("market_type", {}).get("live_ratio")
+        live_ratio = decimal_to_percent(live_ratio) if live_ratio else "0%"
+
+        video_ratio = item.get("market_type", {}).get("video_ratio")
+        video_ratio = decimal_to_percent(video_ratio) if video_ratio else "0%"
+
+        other_ratio = item.get("market_type", {}).get("other_ratio")
+        other_ratio = decimal_to_percent(other_ratio) if other_ratio else "0%"
+
+        shop_id = item.get("shop_id")
+        shop_name = item.get("shop_name")
+        shop_cover = item.get("shop_cover")
+
+        info = (product_id, title, price, price_str, cos_ratio, sales, live_sales, video_sales, other_sales, live_ratio,
+                video_ratio, other_ratio, shop_id, shop_name, shop_cover, keyword)
+        info_list.append(info)
+    if info_list:
+        log.info(f"解析到 {len(info_list)} 条数据......................")
+        save_product_list(sql_pool, info_list)
+    else:
+        log.info("没有解析到数据......................")
+    # save_product_list(sql_pool, info_list)
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
+def get_kgj_product_one_page(log, page, keyword, sql_pool, stop_paging, headers):
+    url = "https://service.kaogujia.com/api/sku/search"
+    params = {
+        "limit": "50",
+        "page": str(page),
+        "sort_field": "sales",
+        "sort": "0"
+    }
+    data = {
+        "period": 7,
+        "keyword": keyword
+    }
+    response = requests.post(url, headers=headers, params=params, json=data)
+    # print(response.text)
+    # print(response)
+    response.raise_for_status()
+    resp_json = response.json()
+    if resp_json:
+        if resp_json.get("code") == 200:
+            enc_data = resp_json.get("data")
+            par_url = '/api/sku/search'
+            dec_data = decrypt_data(log, par_url, enc_data)
+            # print(dec_data)
+            parse_product_list(log, dec_data, sql_pool, keyword, stop_paging)
+        else:
+            log.warning(f"Error get_kgj_product_one_page: {resp_json.get('message')}")
+    else:
+        log.warning(f"Error resp_json")
+
+
+def get_kgj_product_list(log, keyword, sql_pool, headers):
+    stop_paging = [False]  # 使用列表来存储标志,以便在函数间传递
+    for page in range(1, MAX_PAGE + 1):
+        try:
+            log.info(f"Getting kgj product list page {page}, keyword:{keyword}")
+            get_kgj_product_one_page(log, page, keyword, sql_pool, stop_paging, headers)
+        except Exception as e:
+            log.error(f"Error getting kgj product list: {e}")
+            time.sleep(random.randint(4, 6))
+            continue
+
+        time.sleep(random.randint(4, 6))
+
+        if stop_paging[0]:  # 检查停止翻页的标志
+            log.info("停止翻页,因为 sales 为 0 或 '0'")
+            break
+
+
+def save_product_overview(sql_pool, info_list):
+    sql = """
+        INSERT INTO kgj_kapai_product_overview_record (product_id, date_all, sales_str, live_sales_str, video_sales_str, other_sales_str, users_str, lives_str, videos_str, live_ratio, video_ratio, other_ratio)
+        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+        """
+    sql_pool.insert_all(sql, info_list)
+
+
+def update_state(sql_pool, product_id, state_str, state_int):
+    """
+    更新 kgj_product_list_record 状态
+    :param sql_pool: 数据库连接池对象
+    :param product_id: 产品ID
+    :param state_str: 状态字段名称
+    :param state_int: 状态值
+    """
+    sql = f"UPDATE kgj_kapai_product_list_record SET {state_str} = %s WHERE product_id = %s"
+    sql_pool.update_one(sql, (state_int, product_id))
+
+
+def get_product_overview_percent(log, product_id, headers):
+    log.info(f"Getting kgj product overview percent, product_id:{product_id}")
+    url = "https://service.kaogujia.com/api/sku/overview/dist"
+    data = {
+        "product_id": product_id,
+        "from_dt": get_date(-7),
+        "to_dt": get_date(-1)
+    }
+    response = requests.post(url, headers=headers, json=data)
+    response.raise_for_status()
+    resp_json = response.json()
+
+    if resp_json:
+        if resp_json.get("code") == 200:
+            enc_data = resp_json.get("data")
+            par_url = '/api/sku/overview/dist'
+            dec_data = decrypt_data(log, par_url, enc_data)
+            # print(dec_data)
+            sales_list = dec_data.get("sales")
+            live_ratio = "0%"
+            video_ratio = "0%"
+            other_ratio = "0%"
+            for sales in sales_list:
+                if sales.get("name") == "直播带货":
+                    live_ratio = sales.get("percent")
+                    live_ratio = decimal_to_percent_with_dist(live_ratio) if live_ratio else "0%"
+                if sales.get("name") == "视频带货":
+                    video_ratio = sales.get("percent")
+                    video_ratio = decimal_to_percent_with_dist(video_ratio) if video_ratio else "0%"
+                if sales.get("name") == "商品卡":
+                    other_ratio = sales.get("percent")
+                    other_ratio = decimal_to_percent_with_dist(other_ratio) if other_ratio else "1%"
+
+            return live_ratio, video_ratio, other_ratio
+
+        else:
+            log.warning(f"Error get_kgj_product_overview: {resp_json.get('message')}")
+            # update_state(sql_pool, product_id, "product_state", 2)
+            return "0%", "0%", "0%"
+    else:
+        log.warning(f"Error get_kgj_product_overview resp_json")
+        # update_state(sql_pool, product_id, "product_state", 2)
+        return "0%", "0%", "0%"
+
+
+def parse_product_overview(log, sql_pool, resp_json: dict, product_id, headers):
+    log.info("开始解析 product_overview 数据......................")
+    trend_list = resp_json.get("trend_list", [])
+    if not trend_list:
+        log.debug(f"parse_product_overview trend_list is empty")
+        update_state(sql_pool, product_id, "product_state", 2)
+    else:
+        # 获取商品概览页的占比信息
+        live_ratio, video_ratio, other_ratio = get_product_overview_percent(log, product_id, headers)
+        info_list = []
+        for trend in trend_list:
+            date_all = trend.get("date_all")
+            sales_str = trend.get("sales_str")
+            live_sales_str = trend.get("live_sales_str")
+            video_sales_str = trend.get("video_sales_str")
+            other_sales_str = trend.get("other_sales_str")
+            users_str = trend.get("users_str")
+            lives_str = trend.get("lives_str")
+            videos_str = trend.get("videos_str")
+            info = (
+                product_id, date_all, sales_str, live_sales_str, video_sales_str, other_sales_str, users_str, lives_str,
+                videos_str, live_ratio, video_ratio, other_ratio)
+            info_list.append(info)
+        save_product_overview(sql_pool, info_list)
+        # sql = "UPDATE kgj_product_list_record SET product_state = 2 WHERE product_id = %s"
+        update_state(sql_pool, product_id, "product_state", 1)
+
+
+def get_kgj_product_overview(log, sql_pool, product_id, headers):
+    url = f"https://service.kaogujia.com/api/sku/trend/{product_id}"
+    params = {
+        "begin": get_date(-7),
+        # "begin": "20250115",
+        # "end": "20250121"
+        "end": get_date(-1)
+    }
+    response = requests.get(url, headers=headers, params=params)
+    # print(response.text)
+    response.raise_for_status()
+    resp_json = response.json()
+
+    if resp_json:
+        if resp_json.get("code") == 200:
+            enc_data = resp_json.get("data")
+            par_url = f'/api/sku/trend/{product_id}'
+            dec_data = decrypt_data(log, par_url, enc_data)
+            # print(dec_data)
+            parse_product_overview(log, sql_pool, dec_data, product_id, headers)
+        else:
+            log.warning(f"Error get_kgj_product_overview: {resp_json.get('message')}")
+            update_state(sql_pool, product_id, "product_state", 2)
+    else:
+        log.warning(f"Error get_kgj_product_overview resp_json")
+        update_state(sql_pool, product_id, "product_state", 2)
+
+
+def save_linded_live(sql_pool, info_list):
+    sql = """
+        INSERT INTO kgj_kapai_linked_live_record (product_id, title, duration_str, pub_time_str, uid, nick_name, fans_count, price, price_str, sales, gmv, pi, room_id, date_code)
+        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+        """
+    sql_pool.insert_all(sql, info_list)
+
+
+def parse_linked_live(log, dec_data, sql_pool, product_id):
+    log.info("开始解析 linked_live 数据......................")
+    items = dec_data.get("items", [])
+    if not items:
+        log.debug(f"parse_linked_live items is empty")
+        update_state(sql_pool, product_id, "live_state", 2)
+    else:
+        info_list = []
+        for item in items:
+            title = item.get("title")
+            duration = item.get("duration")
+            duration_str = convert_seconds_to_hours_minutes(duration)
+            pub_time = item.get("pub_time")
+            pub_time_str = datetime.fromtimestamp(pub_time).strftime('%Y-%m-%d %H:%M:%S') if pub_time else ""
+            uid = item.get("uid")
+            nick_name = item.get("nick_name")
+            fans_count = item.get("fans_count")
+            price = item.get("price")
+            price_str = item.get("price_str")
+            sales = item.get("sales")
+            gmv = item.get("gmv")
+            pi = item.get("pi")
+            room_id = item.get("room_id")
+            date_code = item.get("date_code")
+
+            info = (
+                product_id, title, duration_str, pub_time_str, uid, nick_name, fans_count, price, price_str, sales, gmv,
+                pi, room_id, date_code)
+            info_list.append(info)
+        save_linded_live(sql_pool, info_list)
+        update_state(sql_pool, product_id, "live_state", 1)
+
+
+def get_linked_live(log, sql_pool, product_id, headers):
+    url = "https://service.kaogujia.com/api/sku/live/list"
+    params = {
+        "limit": "10",
+        "page": "1",
+        "sort_field": "gmv",
+        "sort": "0"
+    }
+    data = {
+        "keyword": "",
+        "min_time": get_date(-6),
+        "max_time": get_date(0),
+        "product_id": product_id
+    }
+    response = requests.post(url, headers=headers, params=params, json=data)
+    # print(response.text)
+    response.raise_for_status()
+    resp_json = response.json()
+    if resp_json:
+        if resp_json.get("code") == 200:
+            enc_data = resp_json.get("data")
+            par_url = '/api/sku/live/list'
+            dec_data = decrypt_data(log, par_url, enc_data)
+            # print(dec_data)
+            parse_linked_live(log, dec_data, sql_pool, product_id)
+        else:
+            log.warning(f"Error get_linked_live: {resp_json.get('message')}")
+            update_state(sql_pool, product_id, "live_state", 2)
+    else:
+        log.warning(f"Error get_linked_live resp_json")
+        update_state(sql_pool, product_id, "live_state", 2)
+
+
+@retry(stop=stop_after_attempt(500), wait=wait_fixed(600), after=after_log)
+def kgj_kapai_main(log):
+    log.info("开始运行 kgj_kapai_main 爬虫任务............................................................")
+    sql_pool = MySQLConnectionPool(log=log)
+    if not sql_pool:
+        log.error("数据库连接失败")
+        raise Exception("数据库连接失败")
+
+    kgj_token = sql_pool.select_one("SELECT token FROM kgj_token")
+    # 195的账号
+    KGJ_HEADERS = {
+        "accept": "*/*",
+        "accept-language": "en,zh-CN;q=0.9,zh;q=0.8",
+        "authorization": kgj_token[0],
+        "content-type": "application/json",
+        "origin": "https://www.kaogujia.com",
+        "priority": "u=1, i",
+        "referer": "https://www.kaogujia.com/",
+        "sec-ch-ua": "\"Not(A:Brand\";v=\"99\", \"Google Chrome\";v=\"133\", \"Chromium\";v=\"133\"",
+        "sec-ch-ua-mobile": "?0",
+        "sec-ch-ua-platform": "\"Windows\"",
+        "sec-fetch-dest": "empty",
+        "sec-fetch-mode": "cors",
+        "sec-fetch-site": "same-site",
+        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
+        "version_code": "3.1"
+    }
+
+    try:
+        keyword = "卡牌"
+        log.info("开始获取 product_list 数据............................................................")
+        # sql_product_id_list = sql_pool.select_all("SELECT DISTINCT product_id FROM kgj_kapai_product_list_record")
+        # sql_product_id_list = [item[0] for item in sql_product_id_list]
+        try:
+            get_kgj_product_list(log, keyword, sql_pool, KGJ_HEADERS)
+        except Exception as e:
+            log.error(f"Error main -> getting kgj product list: {e}")
+
+        # sql_product_id_list.clear()
+        log.info("product_list 数据获取完成............................................................")
+
+        time.sleep(5)
+
+        log.info("开始获取 product_overview 数据............................................................")
+        product_id_list_for_product = sql_pool.select_all(
+            "SELECT product_id FROM kgj_kapai_product_list_record WHERE product_state = 0")
+        product_id_list = [item[0] for item in product_id_list_for_product]
+        for product_id in product_id_list:
+            try:
+                log.info(f"开始获取 product_id: {product_id} 的 product_overview 数据............................")
+                get_kgj_product_overview(log, sql_pool, product_id, KGJ_HEADERS)
+            except Exception as e:
+                log.error(f"Error main -> getting kgj product overview: {e}")
+                time.sleep(random.randint(4, 6))
+                continue
+            time.sleep(random.randint(4, 6))
+        log.info("product_overview 数据获取完成............................................................")
+
+        time.sleep(5)
+
+        log.info("开始获取 linked_live 数据............................................................")
+        product_id_list_for_live = sql_pool.select_all(
+            "SELECT product_id FROM kgj_kapai_product_list_record WHERE live_state = 0")
+        product_id_list = [item[0] for item in product_id_list_for_live]
+        for product_id in product_id_list:
+            try:
+                log.info(f"开始获取 product_id: {product_id} 的 linked_live 数据............................")
+                get_linked_live(log, sql_pool, product_id, KGJ_HEADERS)
+            except Exception as e:
+                log.error(f"Error main -> getting kgj linked_live: {e}")
+                time.sleep(random.randint(4, 6))
+                continue
+            time.sleep(random.randint(4, 6))
+        log.info("linked_live 数据获取完成............................................................")
+
+        time.sleep(5)
+
+        log.info("开始获取 live_detail 数据............................................................")
+        sql_room_id_list = sql_pool.select_all(
+            "SELECT id, uid, room_id, date_code FROM kgj_kapai_linked_live_record WHERE live_detail_state = 0 and uid is not null and room_id is not null and date_code is not null")
+        sql_room_id_list = [item for item in sql_room_id_list]
+        if sql_room_id_list:
+            for sql_info in sql_room_id_list:
+                try:
+                    log.info(f"开始获取 room_id: {sql_info[2]} 的 live_detail 数据............................")
+                    live_detail_info_list = get_live_detail(log, sql_info, KGJ_HEADERS)
+                    save_live_detail(sql_pool, live_detail_info_list)
+                    update_linked_live_state(sql_pool, sql_info[0])
+                except Exception as e:
+                    log.error(f"Error main -> getting kgj live_detail: {e}")
+                    # update_linked_live_state(sql_pool, sql_info[0])
+                    time.sleep(random.randint(4, 6))
+                    continue
+                time.sleep(random.randint(4, 6))
+        log.info("live_detail 数据获取完成............................................................")
+
+    except Exception as e:
+        log.error(e)
+    finally:
+        log.info("爬虫程序 kgj_kapai_main 运行结束,等待下一轮的采集任务.............")
+
+
+def schedule_task():
+    """
+    设置定时任务
+    """
+    # 立即运行一次任务
+    # kgj_kapai_main(logger)
+
+    # 设置定时任务  考古加 -> 卡牌  一周一次  卡牌类的周三跑 抓取时间比较久 和其他几个类错开时间
+    schedule.every().wednesday.at("01:01").do(kgj_kapai_main, logger)
+    while True:
+        schedule.run_pending()
+        time.sleep(1)
+
+
+if __name__ == '__main__':
+    schedule_task()

文件差异内容过多而无法显示
+ 601 - 0
kaogujia_spider/kgj_spider.py


+ 191 - 0
kaogujia_spider/mysq_pool.py

@@ -0,0 +1,191 @@
+# -*- coding: utf-8 -*-
+# Author  : Charley
+# Python  : 3.8.10
+# Date: 2024-08-05 19:42
+import pymysql
+import YamlLoader
+from loguru import logger
+from retrying import retry
+from dbutils.pooled_db import PooledDB
+
+# 获取yaml配置
+yaml = YamlLoader.readYaml()
+mysqlYaml = yaml.get("mysql")
+sql_host = mysqlYaml.getValueAsString("host")
+sql_port = mysqlYaml.getValueAsInt("port")
+sql_user = mysqlYaml.getValueAsString("username")
+sql_password = mysqlYaml.getValueAsString("password")
+sql_db = mysqlYaml.getValueAsString("db")
+
+
+class MySQLConnectionPool:
+    """
+    MySQL连接池
+    """
+
+    def __init__(self, mincached=4, maxcached=5, maxconnections=10, log=None):
+        """
+        初始化连接池
+        :param mincached: 初始化时,链接池中至少创建的链接,0表示不创建
+        :param maxcached: 池中空闲连接的最大数目(0 或 None 表示池大小不受限制)
+        :param maxconnections: 允许的最大连接数(0 或 None 表示任意数量的连接)
+        """
+        # 使用 loguru 的 logger,如果传入了其他 logger,则使用传入的 logger
+        self.log = log or logger
+        self.pool = PooledDB(
+            creator=pymysql,
+            mincached=mincached,
+            maxcached=maxcached,
+            maxconnections=maxconnections,
+            blocking=True,  # 连接池中如果没有可用连接后,是否阻塞等待。True,等待;False,不等待然后报错
+            host=sql_host,
+            port=sql_port,
+            user=sql_user,
+            password=sql_password,
+            database=sql_db
+        )
+
+    @retry(stop_max_attempt_number=100, wait_fixed=600000)
+    def _get_connection(self):
+        """
+        获取连接
+        :return: 连接
+        """
+        try:
+            return self.pool.connection()
+        except Exception as e:
+            self.log.error(f"Failed to get connection from pool: {e}, wait 10 mins retry")
+            raise e
+
+    @staticmethod
+    def _close_connection(conn):
+        """
+        关闭连接
+        :param conn: 连接
+        """
+        if conn:
+            conn.close()
+
+    @retry(stop_max_attempt_number=5, wait_fixed=1000)
+    def _execute(self, query, args=None, commit=False):
+        """
+        执行SQL
+        :param query: SQL语句
+        :param args: SQL参数
+        :param commit: 是否提交事务
+        :return: 查询结果
+        """
+        conn = None
+        cursor = None
+        try:
+            conn = self._get_connection()
+            cursor = conn.cursor()
+            cursor.execute(query, args)
+            if commit:
+                conn.commit()
+            self.log.debug(f"sql _execute , Query: {query}, Rows: {cursor.rowcount}")
+            return cursor
+        except Exception as e:
+            if conn and not commit:
+                conn.rollback()
+            self.log.error(f"Error executing query: {e}")
+            raise e
+        finally:
+            if cursor:
+                cursor.close()
+            self._close_connection(conn)
+
+    def select_one(self, query, args=None):
+        """
+        执行查询,返回单个结果
+        :param query: 查询语句
+        :param args: 查询参数
+        :return: 查询结果
+        """
+        cursor = self._execute(query, args)
+        return cursor.fetchone()
+
+    def select_all(self, query, args=None):
+        """
+        执行查询,返回所有结果
+        :param query: 查询语句
+        :param args: 查询参数
+        :return: 查询结果
+        """
+        cursor = self._execute(query, args)
+        return cursor.fetchall()
+
+    def insert_one(self, query, args):
+        """
+        执行单条插入语句
+        :param query: 插入语句
+        :param args: 插入参数
+        """
+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_one 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        return self._execute(query, args, commit=True)
+
+    def insert_all(self, query, args_list):
+        """
+        执行批量插入语句,如果失败则逐条插入
+        :param query: 插入语句
+        :param args_list: 插入参数列表
+        """
+        conn = None
+        cursor = None
+        try:
+            conn = self._get_connection()
+            cursor = conn.cursor()
+            cursor.executemany(query, args_list)
+            conn.commit()
+            self.log.debug(f"sql insert_all , SQL: {query}, Rows: {cursor.rowcount}")
+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data insert_all 入库中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        except Exception as e:
+            conn.rollback()
+            self.log.error(f"Batch insertion failed after 5 attempts. Trying single inserts. Error: {e}")
+            # 如果批量插入失败,则逐条插入
+            rowcount = 0
+            for args in args_list:
+                self.insert_one(query, args)
+                rowcount += 1
+            self.log.debug(f"Batch insertion failed. Inserted {rowcount} rows individually.")
+        finally:
+            cursor.close()
+            self._close_connection(conn)
+
+    def update_one(self, query, args):
+        """
+        执行单条更新语句
+        :param query: 更新语句
+        :param args: 更新参数
+        """
+        self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_one 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        return self._execute(query, args, commit=True)
+
+    def update_all(self, query, args_list):
+        """
+        执行批量更新语句,如果失败则逐条更新
+        :param query: 更新语句
+        :param args_list: 更新参数列表
+        """
+        conn = None
+        cursor = None
+        try:
+            conn = self._get_connection()
+            cursor = conn.cursor()
+            cursor.executemany(query, args_list)
+            conn.commit()
+            self.log.debug(f"sql update_all , SQL: {query}, Rows: {cursor.rowcount}")
+            self.log.info('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>data update_all 更新中>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+        except Exception as e:
+            conn.rollback()
+            self.log.error(f"Error executing query: {e}")
+            # 如果批量更新失败,则逐条更新
+            rowcount = 0
+            for args in args_list:
+                self.update_one(query, args)
+                rowcount += 1
+            self.log.debug(f'Batch update failed. Updated {rowcount} rows individually.')
+
+        finally:
+            cursor.close()
+            self._close_connection(conn)

+ 327 - 0
kaogujia_spider/request_live_detail.py

@@ -0,0 +1,327 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/2/13 11:26
+import base64
+import inspect
+import json
+import random
+import time
+import requests
+from typing import Dict
+from loguru import logger
+from datetime import datetime
+from Crypto.Cipher import AES
+from urllib.parse import quote
+from Crypto.Util.Padding import unpad
+from tenacity import retry, stop_after_attempt, wait_fixed
+
+base_live_url = "https://service.kaogujia.com"
+
+
+def after_log(retry_state):
+    """
+    retry 回调
+    :param retry_state: RetryCallState 对象
+    """
+    log = retry_state.args[0]  # 获取传入的 logger
+    if retry_state.outcome.failed:
+        log.warning(
+            f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
+    else:
+        log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
+
+
+def decrypt_data(log, par_url, encrypted_data) -> Dict[str, str]:
+    """
+    解密数据
+    :param log:
+    :param par_url:
+    :param encrypted_data:
+    :return:
+    """
+    log.info("开始解密数据 ->->->->->->->->->->->->->->->->->->->->->")
+    if not isinstance(par_url, str):
+        return {}
+
+    # 对应原js中的str函数
+    def transform_str(input_str):
+        encoded = quote(input_str)
+        return base64.b64encode(encoded.encode()).decode()
+
+    str_result = transform_str(par_url) * 3
+    org_key = str_result[:16]
+    org_iv = str_result[12:28]
+
+    # 使用Crypto库解析key和iv
+    ikey = org_key.encode('utf-8')
+    iiv = org_iv.encode('utf-8')
+
+    # 解密
+    cipher = AES.new(ikey, AES.MODE_CBC, iiv)
+    decrypted_text = unpad(cipher.decrypt(base64.b64decode(encrypted_data)), AES.block_size).decode('utf-8')
+    decrypted_text = json.loads(decrypted_text)
+    # print(decrypted_text)
+
+    return decrypted_text
+
+
+@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
+def get_sales_one_page(log, sql_info: tuple, page, headers):
+    """
+    获取 单页 商品售卖信息
+    :param headers:
+    :param log:
+    :param sql_info:
+    :param page:
+    :return dec_data: 解密后的数据
+    """
+    # uid = 'ZB8m8eWLDjG'
+    # room_id = 'kL5wMRBEVWboo'
+    # date_code = '20250206'
+    uid = sql_info[1]
+    room_id = sql_info[2]
+    date_code = sql_info[3]
+    # url = "https://service.kaogujia.com/api/live/skulist"
+    par_url = '/api/live/skulist'
+    url = f"{base_live_url}{par_url}"
+    params = {
+        "limit": "10",
+        "page": page,
+        "sort_field": "sales",
+        "sort": "0"
+    }
+    data = {
+        "room_id": room_id,
+        "uid": uid,
+        "date_code": date_code
+    }
+    response = requests.post(url, headers=headers, params=params, json=data, timeout=10)
+    # print(response.text)
+    response.raise_for_status()
+    resp_json = response.json()
+    if resp_json:
+        if resp_json.get("code") == 200:
+            enc_data = resp_json.get("data")
+            dec_data = decrypt_data(log, par_url, enc_data)
+            # print(dec_data)
+            return dec_data
+        else:
+            log.warning(f"{inspect.currentframe().f_code.co_name}: {resp_json.get('message')}")
+    else:
+        log.warning(f"{inspect.currentframe().f_code.co_name} get resp_json")
+
+
+def parse_sales_list(log, all_items: list) -> list:
+    """
+    解析商品售卖列表
+    :param log:
+    :param all_items:
+    :return data_list: data_list -> list
+    """
+    try:
+        data_list = []
+        for item in all_items:
+            promotion_id = item.get("promotion_id")
+            product_id = item.get("product_id")
+            product_title = item.get("title")
+            product_cover = item.get("cover")
+            product_sales = item.get("sales")
+            # product_gmv = item.get("gmv")
+            product_putaway_time = item.get("putaway")
+            product_putaway_time = datetime.fromtimestamp(product_putaway_time).strftime(
+                '%Y-%m-%d %H:%M:%S') if product_putaway_time else ""
+
+            product_sold_out_time = item.get("sold_out")
+            product_sold_out_time = datetime.fromtimestamp(product_sold_out_time).strftime(
+                '%Y-%m-%d %H:%M:%S') if product_sold_out_time else ""
+            dd_dict = {
+                "promotion_id": promotion_id,
+                "product_id": product_id,
+                "product_title": product_title,
+                "product_cover": product_cover,
+                "product_sales": product_sales,
+                # "product_gmv": product_gmv,
+                "product_putaway_time": product_putaway_time,
+                "product_sold_out_time": product_sold_out_time
+            }
+            data_list.append(dd_dict)
+        return data_list
+    except Exception as e:
+        log.error(f"{inspect.currentframe().f_code.co_name} Error: {e}")
+        return []
+
+
+def get_sales_list(log, sql_info: tuple, headers):
+    """
+    获取商品售卖列表
+    :param headers:
+    :param log:
+    :param sql_info:
+    :return parse_data_list: 解析后的列表
+    """
+    page = 1
+    all_items = []
+
+    while True:
+        try:
+            log.debug(f"{inspect.currentframe().f_code.co_name}: 正在获取第 {page} 页数据")
+            dec_data = get_sales_one_page(log, sql_info, page, headers)
+            time.sleep(random.randint(4, 6))
+
+            items = dec_data.get('items', [])
+            if not items:
+                log.debug(f"{inspect.currentframe().f_code.co_name}: 没有更多数据")
+                break  # 如果没有更多数据,退出循环
+
+            all_items.extend(items)
+            pagination = dec_data.get('pagination', {})
+            total_count = pagination.get('total_count', 0)
+            if len(all_items) >= total_count:
+                log.debug(f"{inspect.currentframe().f_code.co_name}: 已获取所有数据")
+                break  # 如果已获取所有数据,退出循环
+
+            page += 1
+        except Exception as e:
+            log.error(f"{inspect.currentframe().f_code.co_name}, Error fetching page {page}: {e}")
+            time.sleep(random.randint(4, 6))
+            break  # 发生错误时退出循环
+
+    parse_data_list = parse_sales_list(log, all_items)
+    return parse_data_list
+
+
+def parse_live_detail(log, dec_data, sql_info: tuple, headers) -> list:
+    """
+    解析直播详情
+    :param headers:
+    :param log:
+    :param dec_data:
+    :param sql_info:
+    :return: info_list
+    """
+    try:
+        log.info("开始解析 live_detail 数据......................")
+        is_live = dec_data.get("is_live")
+        if is_live == 0:
+            live_create_time = dec_data.get("create_time")  # 开播时间
+            live_create_time = datetime.fromtimestamp(live_create_time).strftime(
+                '%Y-%m-%d %H:%M:%S') if live_create_time else ""
+            # live_update_time = dec_data.get("update_time")
+            live_finish_time = dec_data.get("finish_time")  # 下播时间
+            live_finish_time = datetime.fromtimestamp(live_finish_time).strftime(
+                '%Y-%m-%d %H:%M:%S') if live_finish_time else ""
+            duration = dec_data.get("duration")  # 直播时长(单位:秒)
+
+            # 流量数据解析
+            watch_users = dec_data.get("flow").get("watch_users")  # 观看人次
+            avg_users = dec_data.get("flow").get("avg_users")  # 平均在线人数
+            peak_users = dec_data.get("flow").get("peak_users")  # 人气峰值
+            through = dec_data.get("flow").get("through")  # 穿透率
+            exposed_num = dec_data.get("flow").get("exposed_num")  # 曝光量
+
+            stay_duration = dec_data.get("stay_duration")  # 平均停留时长(单位:秒)
+            new_fans_count = dec_data.get("flow").get("new_fans_count")  # 新增粉丝数
+            inc_fans_clubs = dec_data.get("flow").get("inc_fans_clubs")  # 新增粉丝团
+            turn_ratio = dec_data.get("flow").get("turn_ratio")  # 转粉率
+            interaction_ratio = dec_data.get("flow").get("ratio")  # 互动率
+
+            # 成交数据解析
+            gmv = dec_data.get("volume").get("gmv")  # 直播销售额
+            sales = dec_data.get("volume").get("sales")  # 直播销量
+            atv = dec_data.get("volume").get("atv")  # 平均件单价
+            explain_duration = dec_data.get("volume").get("explain_duration")  # 讲解时长(单位:秒)
+
+            sku_count = dec_data.get("volume").get("sku_count")  # 推广商品数
+            uv = dec_data.get("volume").get("uv")  # UV价值
+            cvr = dec_data.get("volume").get("cvr")  # 转化率
+            rpm = dec_data.get("volume").get("rpm")  # RPM
+
+            response_sales_list = get_sales_list(log, sql_info, headers)
+
+            info_list = []
+            for item in response_sales_list:
+                live_detail_info = (
+                    sql_info[2], live_create_time, live_finish_time, duration, watch_users, avg_users, peak_users,
+                    through, exposed_num, stay_duration, new_fans_count, inc_fans_clubs, turn_ratio, interaction_ratio,
+                    gmv, sales, atv, explain_duration, sku_count, uv, cvr, rpm, item["promotion_id"],
+                    item["product_id"], item["product_title"], item["product_cover"], item["product_sales"],
+                    item["product_putaway_time"], item["product_sold_out_time"]
+                )
+                # print(live_detail_info)
+                info_list.append(live_detail_info)
+            return info_list
+            # try:
+            #     save_live_detail(sql_pool, info_list)
+            #     update_state(sql_pool, sql_info[1], 1)
+            # except Exception as e:
+            #     log.warning(f"{inspect.currentframe().f_code.co_name} 保存数据时出错: {e}")
+            #     update_state(sql_pool, sql_info[1], 2)
+
+
+        elif is_live == 1:
+            log.info("直播间开播中, 等待后续抓取...............")
+        else:
+            log.info("直播间状态is_live其他情况...............")
+    except Exception as e:
+        log.warning(f"{inspect.currentframe().f_code.co_name} error: {e}")
+        return []
+
+
+def get_live_detail(log, sql_info: tuple, headers):
+    """
+    获取直播详情数据
+    :param headers:
+    :param log: logger对象
+    :param sql_info: 元组 --> ("ZB8m8eWLDjG", "kL5wMRBEVWboo", "20250206")    (uid, room_id, date_code)
+    :return: ret_info_list
+    """
+    # uid = 'ZB8m8eWLDjG'
+    # room_id = 'kL5wMRBEVWboo'
+    # date_code = '20250206'
+    uid = sql_info[1]
+    room_id = sql_info[2]
+    date_code = sql_info[3]
+    par_url = f"/api/live/detail/{uid}/{date_code}/{room_id}"
+    url = f'{base_live_url}{par_url}'
+    log.info("开始抓取 live_detail 数据......................")
+    response = requests.get(url, headers=headers, timeout=10)
+    # print(response.text)
+
+    response.raise_for_status()
+    resp_json = response.json()
+    if resp_json:
+        if resp_json.get("code") == 200:
+            enc_data = resp_json.get("data")
+            dec_data = decrypt_data(log, par_url, enc_data)
+            # print(dec_data)
+            ret_info_list = parse_live_detail(log, dec_data, sql_info, headers)
+            return ret_info_list
+        else:
+            log.warning(f"{inspect.currentframe().f_code.co_name}: {resp_json.get('message')}")
+    else:
+        log.warning(f"{inspect.currentframe().f_code.co_name} get resp_json")
+
+
+if __name__ == '__main__':
+    KGJ_HEADERS = {
+        "accept": "*/*",
+        "accept-language": "en,zh-CN;q=0.9,zh;q=0.8",
+        "authorization": "Bearer eyJhbGciOiJIUzUxMiJ9.eyJhdWQiOiIxMDAwIiwiaXNzIjoia2FvZ3VqaWEuY29tIiwianRpIjoiNDI4OWQ1ZTdhODY4NDBjMmFiMTBiZGE3OTY1YTRhZDYiLCJzaWQiOjU2OTY1ODQsImlhdCI6MTc0MDAzODQ4NCwiZXhwIjoxNzQwNjQzMjg0LCJid2UiOjEsInR5cCI6MSwicF9id2UiOjB9.uGe1TroAEJ6VohgtOgNwf_V3pbtNUOv8ZA9R9r99TAF-Gblw8YcMp9kddrKs1CKrhe8amhVd3EYHiC6stI0YWw",
+        "content-type": "application/json",
+        "origin": "https://www.kaogujia.com",
+        "priority": "u=1, i",
+        "referer": "https://www.kaogujia.com/",
+        "sec-ch-ua": "\"Not(A:Brand\";v=\"99\", \"Google Chrome\";v=\"133\", \"Chromium\";v=\"133\"",
+        "sec-ch-ua-mobile": "?0",
+        "sec-ch-ua-platform": "\"Windows\"",
+        "sec-fetch-dest": "empty",
+        "sec-fetch-mode": "cors",
+        "sec-fetch-site": "same-site",
+        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
+        "version_code": "3.1"
+    }
+    s_info = (1, "ZB8m8eWLDjG", "kL5wMRBEVWboo", "20250206")
+    get_live_detail(logger, s_info, KGJ_HEADERS)
+    # get_sales_one_page(logger, None, None,1)
+    # get_sales_list(logger, None)

+ 23 - 0
kaogujia_spider/settings.py

@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+# Author : Charley
+# Python : 3.10.8
+# Date   : 2025/2/14 17:03
+
+# 195的账号
+KGJ_HEADERS = {
+    "accept": "*/*",
+    "accept-language": "en,zh-CN;q=0.9,zh;q=0.8",
+    "authorization": "Bearer eyJhbGciOiJIUzUxMiJ9.eyJhdWQiOiIxMDAwIiwiaXNzIjoia2FvZ3VqaWEuY29tIiwianRpIjoiNDE2NWYwZTIwYzQwNGVhYThmMjU5MTY4YThhNmJkNTgiLCJzaWQiOjU2OTY1ODQsImlhdCI6MTczOTMzMTg0MCwiZXhwIjoxNzM5OTM2NjQwLCJid2UiOjEsInR5cCI6MSwicF9id2UiOjB9.42Rnbx_fSVuEdinT-zABcclkyd-NRh6zJ1kzuZX080TI6S66KzdfcgPjFQDUel6Pdx-PHvihAtraEHq3Rudw5A",
+    "content-type": "application/json",
+    "origin": "https://www.kaogujia.com",
+    "priority": "u=1, i",
+    "referer": "https://www.kaogujia.com/",
+    "sec-ch-ua": "\"Not(A:Brand\";v=\"99\", \"Google Chrome\";v=\"133\", \"Chromium\";v=\"133\"",
+    "sec-ch-ua-mobile": "?0",
+    "sec-ch-ua-platform": "\"Windows\"",
+    "sec-fetch-dest": "empty",
+    "sec-fetch-mode": "cors",
+    "sec-fetch-site": "same-site",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
+    "version_code": "3.1"
+}

部分文件因为文件数量过多而无法显示