weika_bidding_spider.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.8.10
  4. # Date: 2024-09-11 14:17
  5. import random
  6. import time
  7. import requests
  8. from retrying import retry
  9. from datetime import datetime
  10. from mysq_pool import MySQLConnectionPool
  11. def save_data(sql_pool, info):
  12. """
  13. 保存数据
  14. :param sql_pool:
  15. :param info:
  16. """
  17. sql = "INSERT INTO weikajia_bidding (cabinetId, imgs, title, price, lastBidPrice, auctionItemId, auctionStart, auctionEnd, currBidIndex) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
  18. sql_pool.insert_all(sql, info)
  19. def transform(timestamp):
  20. # 将Unix时间戳转换为datetime对象
  21. dt_object = datetime.fromtimestamp(int(timestamp))
  22. # 格式化时间
  23. formatted_time = dt_object.strftime('%Y-%m-%d %H:%M:%S')
  24. return formatted_time
  25. @retry(stop_max_attempt_number=3, wait_fixed=10000)
  26. def get_list_page(headers, logger):
  27. url = "https://api.weikajia.com/search/searchAuctionItem"
  28. data = {
  29. "page": 1,
  30. "pageSize": 10,
  31. "hideLoading": True,
  32. "ascSort": "desc",
  33. "sortType": "auction_start",
  34. "orderStatus": "1"
  35. }
  36. response = requests.post(url, headers=headers, json=data, timeout=5)
  37. # print(f'get_list_page: {response.json()}')
  38. if response.json()["resultCode"] != 200:
  39. logger.debug("get_list_page resultCode 请求失败,重试中...........")
  40. raise Exception("请求失败")
  41. total = response.json().get('data').get('total')
  42. if total:
  43. return total
  44. else:
  45. logger.debug("get_list_page total 请求失败,重试中...........")
  46. raise Exception("get_list_page请求失败,重试中...........")
  47. @retry(stop_max_attempt_number=3, wait_fixed=1000)
  48. def get_list(sql_pool, pp, headers, logger):
  49. """
  50. 获取列表页信息
  51. :param logger:
  52. :param sql_pool:
  53. :param pp:
  54. :param headers:
  55. :return:
  56. """
  57. url = "https://api.weikajia.com/search/searchAuctionItem"
  58. # data = {
  59. # "page": int(pp),
  60. # "pageSize": 10,
  61. # "hideLoading": True,
  62. # "ascSort": "asc",
  63. # "sortType": "auction_end",
  64. # "orderStatus": "1"
  65. # }
  66. data = {
  67. "page": int(pp),
  68. "pageSize": 10,
  69. "hideLoading": True,
  70. "ascSort": "desc",
  71. "sortType": "auction_start",
  72. "orderStatus": "1"
  73. }
  74. response = requests.post(url, headers=headers, json=data, timeout=5)
  75. # print(f'get_list: {response.json()}')
  76. if response.json()["resultCode"] != 200:
  77. logger.debug("请求失败,重试中...........")
  78. raise Exception("请求失败")
  79. logger.debug(f'第{pp}页请求成功..............')
  80. cardCabinet = response.json().get('data', {}).get('cardCabinet', [])
  81. if cardCabinet:
  82. info_list = []
  83. for item in cardCabinet:
  84. cabinetId = item.get("cabinetId")
  85. imgs = item.get("imgs")
  86. title = item.get("title")
  87. price = item.get("price")
  88. lastBidPrice = item.get("lastBidPrice")
  89. auctionItemId = item.get("auctionItemId")
  90. auctionStart_ = item.get("auctionStart")
  91. auctionStart = transform(auctionStart_)
  92. auctionEnd_ = item.get("auctionEnd")
  93. auctionEnd = transform(auctionEnd_)
  94. currBidIndex = item.get("currBidIndex")
  95. info = (cabinetId, imgs, title, price, lastBidPrice, auctionItemId, auctionStart, auctionEnd, currBidIndex)
  96. # print(info)
  97. info_list.append(info)
  98. save_data(sql_pool, info_list)
  99. @retry(stop_max_attempt_number=100, wait_fixed=3600000)
  100. def bidding_main(log):
  101. try:
  102. log.info("开始运行 bidding_spider 爬虫任务............................................................")
  103. sql_pool = MySQLConnectionPool(log=log)
  104. if not sql_pool:
  105. log.error("数据库连接失败")
  106. raise Exception("数据库连接失败")
  107. # token = sql_pool.select_one("select token from wkj_token")
  108. headers = {
  109. "appVersion": "1.6.5",
  110. "osVersion": "9",
  111. "deviceModel": "M2007J22C",
  112. "appVersionCode": "168",
  113. "deviceBrand": "xiaomi",
  114. "platform": "android",
  115. # "token": token[0],
  116. "user-agent": "Mozilla/5.0 (Linux; Android 9; M2007J22C Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/92.0.4515.131 Mobile Safari/537.36",
  117. "Content-Type": "application/json",
  118. "Connection": "Keep-Alive"
  119. }
  120. total = get_list_page(headers, log)
  121. pages = (total + 9) // 10 # 计算页码
  122. log.info(
  123. f'----------------------------------------总条数为{total}条, 总页码为{pages}页----------------------------------------')
  124. for i in range(1, pages + 1):
  125. log.debug(f'正在爬取第{i}页..............')
  126. try:
  127. get_list(sql_pool, i, headers, log)
  128. time.sleep(random.randint(3, 5))
  129. except Exception as e:
  130. log.error(f'第{i}页出错, {e}')
  131. except Exception as e:
  132. log.error(e)
  133. finally:
  134. log.info("爬虫程序运行结束,等待下一轮的采集任务.............")
  135. if __name__ == '__main__':
  136. from loguru import logger
  137. bidding_main(logger)