# -*- coding: utf-8 -*- # Author : Charley # Python : 3.10.8 # Date : 2026/4/15 15:10 import time import inspect import requests import schedule import user_agent from loguru import logger from mysql_pool import MySQLConnectionPool from tenacity import retry, stop_after_attempt, wait_fixed logger.remove() logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00", format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}", level="DEBUG", retention="7 day") cookie = "ENSID=MTc3NjMxMDE1MXw1LThLaEQ5OVRZTDBLbktHUzhpTG9PR01ybUQwYmxyb2dqMlJETVpUN3dCVVV3aWpwbXE0UGtDYXkwN2ZlUU9ib3I2a3ZCa21xZGdDQjc2aVBVNElHWDlXNXlPUS1YZVd816ifsPbu7_r0ouHJxFdsRYT3jsCwBFEZ6IMhzHnGcyI=; _pin_unauth=dWlkPU5qVXlOak16TlRJdE1UVmxNUzAwTUdZNExXRmpZakl0WlRJMU1EazBaVEl6WVRRMw; _tt_enable_cookie=1; _ttp=01KP7TC2C47N8816E74NZ2S87G_.tt.1; aws-waf-token=85ea0abd-c7e7-44fa-a486-721005517367:BgoAddgnTIMdAAAA:sgUJ0isHGRMm9HGMWaserzc0yH/cfmcnAJs7tApXkvxu8CkSB2W2/+vEB9V4uBUqE+8uegKHQINRE2ExEMC9XRl6QLHoC16s5mOsvrptUYDuWqAnyQJcr8a6dAlUpokqmdLFzLRoiM2digCAKXmKRM5fbEQgY56lCzRpNqolUtcS/X9zZQIfJnj2GfmAjNw=; _gcl_au=1.1.140783051.1776231515.1213660920.1776231619.1776231619; _ga_T9G4FWRKGP=GS2.1.s1776309189$o1$g0$t1776309189$j60$l0$h0; _gid=GA1.2.339334579.1776309190; __lt__cid=074b5327-9f75-4356-a201-9879abb859a5; __lt__sid=1cd194ae-b7f570f4; __rtbh.uid=%7B%22eventType%22%3A%22uid%22%2C%22id%22%3A%22undefined%22%2C%22expiryDate%22%3A%222027-04-16T03%3A13%3A11.377Z%22%7D; __rtbh.lid=%7B%22eventType%22%3A%22lid%22%2C%22id%22%3A%221CbwgL9cLbT9BLfXJtkK%22%2C%22expiryDate%22%3A%222027-04-16T03%3A13%3A11.379Z%22%7D; _ga_WLFPCJHLHL=GS2.1.s1776309194$o1$g0$t1776309194$j60$l0$h0; _ga=GA1.1.342828207.1776231516; forterToken=6f95dbdd5df5486fb9bfe93652a6b6a7_1776309188237__UDF43-m4_27ck_; ttcsid_CEM1KGBC77U8BHMFF6SG=1776309193318::MmzC2Nw5ahbgUD_ovmJa.1.1776309203360.1; _dd_s=aid=c68687bf-9ca5-4040-9266-a9c8281287b7&logs=1&id=f2443d73-de49-431a-9dba-17f30b9410ac&created=1776309188187&expire=1776310738774&rum=0; _rdt_uuid=1776231515762.27c25d2a-f2b5-4370-89ca-ba2ba6d93c35; _rdt_em=:7fa565b08bc719fc95a07f3f9cbb8cfcd715b62ce82bc26739d3074a5196870c; ttcsid_CAP79SBC77U56BB6BI50=1776309194536::zh_5-OLx-MD4DmA4jALH.4.1776310041216.1; ttcsid=1776309194523::bhCq-3lisAc3SvWnrZng.4.1776310041216.0::1.845885.846484::955506.51.1579.4652::954188.255.4300; _ga_6H1EYVVN53=GS2.1.s1776308613$o5$g1$t1776310150$j60$l0$h0; _ga_3722WCREQR=GS2.1.s1776308613$o5$g1$t1776310150$j60$l0$h0" headers = { "accept": "application/json", # "referer": "https://snkrdunk.com/en/trading-cards/671489?slide=right", "user-agent": user_agent.generate_user_agent(), "cookie": cookie } def after_log(retry_state): """ retry 回调 :param retry_state: RetryCallState 对象 """ # 检查 args 是否存在且不为空 if retry_state.args and len(retry_state.args) > 0: log = retry_state.args[0] # 获取传入的 logger else: log = logger # 使用全局 logger if retry_state.outcome.failed: log.warning( f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times") else: log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded") @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log) def get_proxys(log): """ 获取代理配置 :param log: 日志对象 :return: 代理字典 """ tunnel = "x371.kdltps.com:15818" kdl_username = "t13753103189895" kdl_password = "o0yefv6z" try: proxies = { "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}, "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel} } return proxies except Exception as e: log.error(f"Error getting proxy: {e}") raise e @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log) def get_single_page(log, page, brand): """ 获取单页数据 :param log: 日志对象 :param page: 页码 :param brand: 品牌 :return: 数据列表 """ log.info(f"获取第 {page} 页数据,品牌为 {brand}....................................................") url = "https://snkrdunk.com/en/v1/trading-cards/used" params = { "brandId": brand, # "brandId": "pokemon", "categoryId": "25", # "page": "29", "page": page, "perPage": "20", "sortType": "latest", "isOnlyOnSale": "false" } response = requests.get(url, headers=headers, params=params, proxies=get_proxys(log), timeout=22) response.raise_for_status() resp_json = response.json() # print(resp_json) usedTradingCards = resp_json.get("usedTradingCards", []) return usedTradingCards def parse_data(log, resp_list, brand, sql_pool): """ 解析数据 :param log: 日志对象 :param resp_list: 数据列表 :param brand: 品牌 :param sql_pool: 数据库连接池 """ try: dict_list = [] for data in resp_list: used_id = data.get("id") tradingCardId = data.get("tradingCardId") card_name = data.get("tradingCardName") listing_uid = data.get("listingUID") # 进入详情页的id price = data.get("price") price = price.replace("US $", "").replace(",", "") condition = data.get("condition") front_img = data.get("thumbnailUrl") # 去除图片中的大小格式 https://cdn.snkrdunk.com/apparel_used_listings/49c34dac-27d9-4b7b-96db-72a6c70a464c/5907002.jpeg?size=m front_img = front_img.split("?")[0] is_sold = data.get("isSold") data_dict = { "brand": brand, "used_id": used_id, "trading_card_id": tradingCardId, "card_name": card_name, "listing_uid": listing_uid, "price": price, "score": condition, "front_img": front_img, "is_sold": is_sold, "category": "Trading Cards (Single Card)" } # print(data_dict) dict_list.append(data_dict) sql_pool.insert_many(table="snkrdunk_record", data_list=dict_list, ignore=True) except Exception as e: log.error(f"Error parsing data: {e}") def get_list_data(log, brand, sql_pool): """ 获取列表数据 :param log: 日志对象 :param brand: 品牌 :param sql_pool: 数据库连接池 """ page = 1 while True: try: data_list = get_single_page(log, page, brand) if not data_list: log.info(f"No more data for brand {brand}, page {page}") break except Exception as e: log.error(f"Error getting page {page} for brand {brand}: {e}") data_list = [] parse_data(log, data_list, brand, sql_pool) if len(data_list) < 20: log.info(f"No more data for brand {brand}, page {page}") break page += 1 @retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log) def snk_main(log): """ 主函数 :param log: logger对象 """ log.info( f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................') # 配置 MySQL 连接池 sql_pool = MySQLConnectionPool(log=log) if not sql_pool.check_pool_health(): log.error("数据库连接池异常") raise RuntimeError("数据库连接池异常") brand_list = ["pokemon", "onepiece", "yu-gi-oh"] try: for brand in brand_list: log.info(f'开始采集 {brand} 数据....................................................') try: get_list_data(log, brand, sql_pool) except Exception as e: log.error(f'采集 {brand} 数据异常: {e}') except Exception as e: log.error(f'{inspect.currentframe().f_code.co_name} error: {e}') finally: log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............') def schedule_task(): """ 爬虫模块 定时任务 的启动文件 """ # 立即运行一次任务 # snk_main(log=logger) # 设置定时任务 schedule.every().day.at("00:01").do(snk_main, log=logger) while True: schedule.run_pending() time.sleep(1) if __name__ == '__main__': schedule_task() # snk_main(log=logger)