yk_new_daily_spider.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.10.8
  4. # Date : 2025/12/22 10:44
  5. import time
  6. from mysql_pool import MySQLConnectionPool
  7. from settings import *
  8. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  9. def get_all_sold_one_page(log, page: int, last_id, lastSalePrice):
  10. """
  11. 获取指定页面的已售数据
  12. :param log: logger对象
  13. :param page: 页码
  14. :param last_id: last_id
  15. :param lastSalePrice: lastSalePrice
  16. :return: 该页的数据, totalPage, total
  17. """
  18. log.info(f"Starting < get_all_sold_one_page > to fetch page {page}")
  19. url = "https://api.joycard.xyz/api/front/c/product/productShowList"
  20. # https://api.joycard.xyz/api/front/c/product/productShowList
  21. data = {
  22. "lastId": last_id,
  23. "lastSalePrice": lastSalePrice,
  24. "limit": 20,
  25. "openMode": "",
  26. "page": page,
  27. "saleStatus": "2",
  28. "sort": "0"
  29. }
  30. # {
  31. # "lastId": 0,
  32. # "lastSalePrice": "",
  33. # "limit": 20,
  34. # "openMode": "",
  35. # "page": 1,
  36. # "saleStatus": "1",
  37. # "sort": "0"
  38. # }
  39. try:
  40. # data = json.dumps(data, separators=(',', ':'))
  41. response = make_request(log, 'POST', url, data=data)
  42. # print(response)
  43. if response:
  44. items = response["data"]["list"]
  45. total_page = response["data"]["totalPage"]
  46. total = response["data"]["total"]
  47. log.info(f"Successfully fetched page {page}: {len(items)} items")
  48. return items, total_page, total
  49. else:
  50. return [], 0, 0
  51. except requests.exceptions.RequestException as e:
  52. log.error(f"Error fetching page {page}: {e}")
  53. raise e
  54. except ValueError as e:
  55. log.error(f"Error parsing JSON for page {page}: {e}")
  56. raise e
  57. def get_shop_detail(log, shop_id):
  58. """
  59. 获取店铺详情信息
  60. :param log:
  61. :param shop_id:
  62. :return:
  63. """
  64. log.info(f"Start fetching shop {shop_id}")
  65. url = f"https://api.joycard.xyz/api/front/c/merchant/{shop_id}"
  66. try:
  67. response = make_request(log, 'GET', url)
  68. if response:
  69. item = response["data"]
  70. fans_num = item.get("fansNum")
  71. group_num = item.get("salesQuantity")
  72. create_time = item.get("createTime")
  73. log.info(f"Successfully fetched shop {shop_id}")
  74. return fans_num, group_num, create_time
  75. else:
  76. return None, None, None
  77. except Exception as e:
  78. log.error(f"Error fetching shop {shop_id}: {e}")
  79. return None, None, None
  80. def parse_shop_items(log, items, sql_pool):
  81. if not items:
  82. log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No items found")
  83. return
  84. info_list = []
  85. for item in items:
  86. shop_id = item.get("merchantCode")
  87. fans_num, group_num, create_time = get_shop_detail(log, shop_id)
  88. shop_name = item.get("merchantName")
  89. shop_info_dict = {
  90. "shop_id": shop_id,
  91. "shop_name": shop_name,
  92. "fans_num": fans_num,
  93. "group_num": group_num,
  94. "create_time": create_time
  95. }
  96. info_list.append(shop_info_dict)
  97. # 插入数据
  98. if info_list:
  99. sql_pool.insert_many(table="yueka_shop_record", data_list=info_list, ignore=True)
  100. def get_product(log, items, sql_pool, last_product_id):
  101. if not items:
  102. log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No items found")
  103. return True
  104. should_stop = False
  105. info_list = []
  106. for item in items:
  107. product_id = item.get("code")
  108. if not product_id:
  109. log.warning(f"Warning {inspect.currentframe().f_code.co_name}: No product_id found")
  110. continue
  111. info_list.append(
  112. {
  113. "product_id": product_id,
  114. }
  115. )
  116. # 判断是否是昨天的最后一条id, 如果是 则停止翻页 / 只有当 last_product_id 不为 None 时才判断是否停止翻页
  117. if last_product_id and product_id == last_product_id:
  118. log.info(
  119. f"----------------- The product_id {product_id} is the last product_id:{last_product_id}, stop fetching -----------------")
  120. should_stop = True
  121. # 插入数据
  122. if info_list:
  123. sql_pool.insert_many(table="yueka_product_record", data_list=info_list, ignore=True)
  124. # 如果 items 数量小于 20,说明已经到达最后一页
  125. if len(items) < 20:
  126. log.info(
  127. f"----------------- {len(items)} items found, less than 20, stop fetching -----------------")
  128. should_stop = True
  129. return should_stop
  130. def get_all_sold_data(log, sql_pool, last_product_id):
  131. """
  132. 获取 全部类别的已售数据
  133. :param sql_pool: MySQL连接池对象
  134. :param log: logger对象
  135. :param last_product_id: last_product_id,如果为 None 则表示从头开始采集
  136. """
  137. page = 1
  138. max_page = 500
  139. last_id = 0
  140. lastSalePrice = ''
  141. while page <= max_page:
  142. items, total_page, total = get_all_sold_one_page(log, page, last_id, lastSalePrice)
  143. if not items:
  144. break
  145. # 处理 items 数据
  146. parse_shop_items(log, items, sql_pool)
  147. stop_page = get_product(log, items, sql_pool, last_product_id)
  148. if stop_page:
  149. log.info(
  150. f"----------------- The product_id {last_product_id} is the last product_id, stop fetching -----------------")
  151. break
  152. # 更新lastId为最后一条的userId
  153. last_id = items[-1].get("id")
  154. lastSalePrice = items[-1].get("unitPriceStr")
  155. if not last_id:
  156. log.error("API response missing userId in last item, cannot paginate")
  157. break
  158. if not lastSalePrice:
  159. log.error("API response missing lastSalePrice in last item, cannot paginate")
  160. break
  161. page += 1
  162. log.info(f"Finished fetching all data. Total pages: {total_page}, total items: {total}")
  163. @retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
  164. def yueka_main(log):
  165. """
  166. 主函数
  167. :param log: logger对象
  168. """
  169. log.info(
  170. f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................')
  171. # 配置 MySQL 连接池
  172. sql_pool = MySQLConnectionPool(log=log)
  173. if not sql_pool.check_pool_health():
  174. log.error("数据库连接池异常")
  175. raise RuntimeError("数据库连接池异常")
  176. try:
  177. token = sql_pool.select_one("SELECT token FROM yueka_token")
  178. token = token[0]
  179. try:
  180. # 获取最后一条pid的数据
  181. last_product_id_result = sql_pool.select_one(
  182. "SELECT product_id FROM yueka_product_record ORDER BY finish_time DESC LIMIT 1")
  183. # 如果表中没有数据,last_product_id_result 为 None
  184. last_product_id = last_product_id_result[0] if last_product_id_result else None
  185. get_all_sold_data(log, sql_pool, last_product_id)
  186. except Exception as e:
  187. log.error(f"Error fetching last_product_id: {e}")
  188. time.sleep(5)
  189. # 获取商品详情
  190. try:
  191. get_product_detail_list(log, sql_pool, token)
  192. except Exception as e:
  193. log.error(f"Error fetching product_detail_list: {e}")
  194. time.sleep(5)
  195. # 获取商品玩家
  196. try:
  197. get_players(log, sql_pool, token)
  198. except Exception as e:
  199. log.error(f"Error fetching players: {e}")
  200. time.sleep(5)
  201. # 获取拆卡报告
  202. try:
  203. get_reports(log, sql_pool, token)
  204. except Exception as e:
  205. log.error(f"Error fetching reports: {e}")
  206. except Exception as e:
  207. log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
  208. finally:
  209. log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
  210. if __name__ == '__main__':
  211. yueka_main(logger)
  212. # get_all_sold_one_page(logger, 1, 0, '')