super_vault_on_sale_spider.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.10.8
  4. # Date : 2026/1/28 11:12
  5. import inspect
  6. import requests
  7. from loguru import logger
  8. from datetime import datetime
  9. from mysql_pool import MySQLConnectionPool
  10. from tenacity import retry, stop_after_attempt, wait_fixed
  11. """
  12. SuperVault
  13. """
  14. # logger.remove()
  15. # logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
  16. # format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
  17. # level="DEBUG", retention="7 day")
  18. HEADERS = {
  19. "User-Agent": "okhttp/4.9.0",
  20. # "Connection": "Keep-Alive",
  21. # "Accept-Encoding": "gzip",
  22. "Authorization": "",
  23. "CXX-APP-API-VERSION": "V2", # 必须添加
  24. # "deviceType": "2",
  25. # "udid": "20f902c10f6163a19bf137d801731d9f",
  26. # "time": str(int(time.time() * 1000)),
  27. "Content-Type": "application/json; charset=UTF-8"
  28. }
  29. def after_log(retry_state):
  30. """
  31. retry 回调
  32. :param retry_state: RetryCallState 对象
  33. """
  34. # 检查 args 是否存在且不为空
  35. if retry_state.args and len(retry_state.args) > 0:
  36. log = retry_state.args[0] # 获取传入的 logger
  37. else:
  38. log = logger # 使用全局 logger
  39. if retry_state.outcome.failed:
  40. log.warning(
  41. f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
  42. else:
  43. log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
  44. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  45. def get_vod_single_page(log, page_num=1):
  46. """
  47. 获取单页数据
  48. :param log: logger对象
  49. :param page_num: 页码
  50. :return: 数据
  51. """
  52. url = "https://cxx.cardsvault.net/app/teamup/list"
  53. data = {
  54. "pageSize": 20,
  55. "pageNum": page_num
  56. }
  57. response = requests.post(url, headers=HEADERS, json=data, timeout=22)
  58. response.raise_for_status()
  59. result = response.json()
  60. # print(result)
  61. if result.get("status") == 200:
  62. data = result.get("data", {})
  63. total = data.get("total", 0)
  64. current_page = data.get("pageNum", 1)
  65. items = data.get("data", [])
  66. log.info(f"当前查询的是 ->->-> 第 {current_page} 页,共 {total} 条记录")
  67. log.debug(f"当前页数据数量: {len(items)}")
  68. return {
  69. "total": total,
  70. "current_page": current_page,
  71. "items": items,
  72. }
  73. else:
  74. log.error(f"API 返回错误: {result.get('msg', '未知错误')}")
  75. return None
  76. def parse_list_items(log, items):
  77. """
  78. 解析列表项
  79. :param log: logger对象
  80. :param items: 列表项
  81. :return: 解析后的列表项
  82. """
  83. parsed_items = []
  84. log.debug(f"正在解析列表项.................")
  85. for item in items:
  86. pid = item.get("id")
  87. serial = item.get("serial") # 编号
  88. title = item.get("title")
  89. type_name = item.get("typeName") # 随机卡种
  90. isPre = item.get("isPre")
  91. count = item.get("count")
  92. totalPrice = item.get("totalPrice")
  93. totalPrice = totalPrice / 100 if totalPrice else 0
  94. signPrice = item.get("signPrice")
  95. signPrice = signPrice / 100 if signPrice else 0
  96. sellTime = item.get("sellTime")
  97. sellDays = item.get("sellDays")
  98. status = item.get("status") # 9:完成 8:待发货
  99. statusName = item.get("statusName")
  100. description = item.get("description")
  101. createTime = item.get("createTime")
  102. cover_url = item.get("cover", {}).get("url") # 封面图
  103. anchor_id = item.get("anchor", {}).get("id")
  104. anchor_userName = item.get("anchor", {}).get("userName")
  105. soldCount = item.get("soldCount")
  106. detailUrl = item.get("detailUrl")
  107. goodsUrl = item.get("goodsUrl")
  108. standardName = item.get("standardName") # 规格
  109. crawl_date = datetime.now().strftime("%Y-%m-%d")
  110. parsed_item = {
  111. "pid": pid,
  112. "title": title,
  113. "serial": serial,
  114. "type_name": type_name,
  115. "is_pre": isPre,
  116. "count": count,
  117. "total_price": totalPrice,
  118. "sign_price": signPrice,
  119. "sell_time": sellTime,
  120. "sell_days": sellDays,
  121. "status": status,
  122. "status_name": statusName,
  123. "description": description,
  124. "create_time": createTime,
  125. "cover_url": cover_url,
  126. "anchor_id": anchor_id,
  127. "anchor_username": anchor_userName,
  128. "sold_count": soldCount,
  129. "detail_url": detailUrl,
  130. "goods_url": goodsUrl,
  131. "standard_name": standardName,
  132. "crawl_date": crawl_date
  133. }
  134. # print(parsed_item)
  135. parsed_items.append(parsed_item)
  136. return parsed_items
  137. def get_vod_list(log, sql_pool):
  138. """
  139. 获取列表数据
  140. :param log: logger对象
  141. :param sql_pool: 数据库连接池
  142. """
  143. page_num = 1
  144. total_pages = 9
  145. items_per_page = 20 # pageSize
  146. while page_num <= total_pages:
  147. log.debug(f"正在获取第 {page_num} 页的数据.................")
  148. page_result = get_vod_single_page(log, page_num)
  149. if not page_result:
  150. log.error(f"获取第 {page_num} 页失败 !!!")
  151. break
  152. # 第一次请求时更新真实的总页数
  153. if page_num == 1:
  154. total_count = page_result["total"]
  155. total_pages = (total_count + items_per_page - 1) // items_per_page
  156. log.info(f"总共 {total_pages} 页")
  157. # 每页获取后立即解析
  158. items = parse_list_items(log, page_result["items"])
  159. sql_pool.insert_many(table="super_vault_on_sale_record", data_list=items, ignore=True)
  160. page_num += 1
  161. @retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
  162. def cxx_sale_main(log):
  163. """
  164. 主函数
  165. :param log: logger对象
  166. """
  167. log.info(
  168. f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................')
  169. # 配置 MySQL 连接池
  170. sql_pool = MySQLConnectionPool(log=log)
  171. if not sql_pool.check_pool_health():
  172. log.error("数据库连接池异常")
  173. raise RuntimeError("数据库连接池异常")
  174. try:
  175. # 获取所有 pid
  176. try:
  177. get_vod_list(log, sql_pool)
  178. except Exception as e:
  179. log.error(f"Error fetching last_product_id: {e}")
  180. except Exception as e:
  181. log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
  182. finally:
  183. log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
  184. if __name__ == '__main__':
  185. # get_vod_list(logger, None)
  186. # get_vod_single_page(logger, 1)
  187. cxx_sale_main(logger)
  188. # schedule_task()