rhyf_spider.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.10.8
  4. # Date : 2025/12/8 13:44
  5. import time
  6. import random
  7. import inspect
  8. import requests
  9. import user_agent
  10. from loguru import logger
  11. from mysql_pool import MySQLConnectionPool
  12. from tenacity import retry, stop_after_attempt, wait_fixed
  13. from utils import create_request_data
  14. logger.remove()
  15. logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
  16. format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
  17. level="DEBUG", retention="7 day")
  18. auth = "266577ZDRB9Vb65wzF4G6!1xyVJv7BGKi7fcI757ILRQW!1sI9jAV5AOWQhXJjA!2oIrMsaLY!2WU7AnCtCshdk4EC3dvjQ=="
  19. headers = {
  20. 'authority': 'wechatapp.ichibankuji.cn',
  21. 'content-type': 'application/json',
  22. 'referer': 'https://servicewechat.com/wxd21e3190b2a44f73/21/page-frame.html',
  23. "terminalos": "YFSXZF",
  24. 'user-agent': user_agent.generate_user_agent(),
  25. 'authorization': auth
  26. }
  27. def after_log(retry_state):
  28. """
  29. retry 回调
  30. :param retry_state: RetryCallState 对象
  31. """
  32. # 检查 args 是否存在且不为空
  33. if retry_state.args and len(retry_state.args) > 0:
  34. log = retry_state.args[0] # 获取传入的 logger
  35. else:
  36. log = logger # 使用全局 logger
  37. if retry_state.outcome.failed:
  38. log.warning(
  39. f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
  40. else:
  41. log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
  42. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  43. def get_proxys(log):
  44. """
  45. 获取代理
  46. :return: 代理
  47. """
  48. tunnel = "x371.kdltps.com:15818"
  49. kdl_username = "t13753103189895"
  50. kdl_password = "o0yefv6z"
  51. try:
  52. proxies = {
  53. "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
  54. "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
  55. }
  56. return proxies
  57. except Exception as e:
  58. log.error(f"Error getting proxy: {e}")
  59. raise e
  60. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  61. def get_set_single_page(log, page=1, sql_pool=None):
  62. """
  63. 获取单个页面数据
  64. :param log: 日志对象
  65. :param page: 页码
  66. :param sql_pool: 数据库连接池对象
  67. :return: 数据长度
  68. """
  69. log.debug(f"{inspect.currentframe().f_code.co_name}, 正在获取第{page}页数据........")
  70. original_data = {
  71. "orderType": 0,
  72. "pageNum": page,
  73. "pageSize": 10,
  74. "status": 0,
  75. "type": 1
  76. }
  77. url = "https://wechatapp.ichibankuji.cn/wechat/yfs/getSetList"
  78. request_data = create_request_data(original_data)
  79. payload = {
  80. "da": request_data
  81. }
  82. response = requests.post(
  83. url,
  84. headers=headers,
  85. json=payload,
  86. timeout=22
  87. )
  88. # print("响应内容:", response.text)
  89. response.raise_for_status()
  90. resp_json = response.json().get("data", {}).get("list", [])
  91. if not resp_json:
  92. log.debug("Not resp_json, 没有数据........")
  93. return 0
  94. info_list = []
  95. for item in resp_json:
  96. box_id = item.get("id")
  97. name = item.get("name")
  98. price = item.get("price")
  99. onsaleTime = item.get("onsaleTime")
  100. headUrl = item.get("headUrl")
  101. if headUrl:
  102. headUrl = f"https://static.ichibankuji.cn{headUrl}"
  103. scoresDeduct = item.get("scoresDeduct") # 积分抵扣
  104. status = item.get("status")
  105. themeId = item.get("themeId")
  106. total = item.get("total")
  107. box_type = item.get("type")
  108. data_dict = {
  109. "set_id": box_id,
  110. "set_name": name,
  111. "price": price,
  112. "onsale_time": onsaleTime,
  113. "head_url": headUrl,
  114. "scores_deduct": scoresDeduct,
  115. "status": status,
  116. "theme_id": themeId,
  117. "set_total": total,
  118. "set_type": box_type
  119. }
  120. # print(data_dict)
  121. info_list.append(data_dict)
  122. if info_list:
  123. sql_pool.insert_many(table="rhyf_product_list_record", data_list=info_list, ignore=True)
  124. return len(info_list)
  125. def get_setlist_all_page(log, sql_pool=None):
  126. """
  127. 获取所有页面数据
  128. :param log: 日志对象
  129. :param sql_pool: 数据库连接池对象
  130. :return:
  131. """
  132. page = 1
  133. while True:
  134. try:
  135. len_data = get_set_single_page(log, page, sql_pool)
  136. if len_data < 10:
  137. log.debug(f"当前页为{page}, 数据长度为{len_data} ,数据已经获取完毕........")
  138. break
  139. page += 1
  140. except Exception as e:
  141. log.error(f"Error getting set single page: {e}")
  142. break
  143. # ----------------------------------------------------------------------------------------------------------------------
  144. @retry(stop=stop_after_attempt(5), wait=wait_fixed(5), after=after_log)
  145. def get_box_detail(log, set_id: int, sql_pool=None):
  146. """
  147. 获取箱子详情
  148. :param log: 日志对象
  149. :param set_id: 箱子ID
  150. :param sql_pool: 数据库连接池对象
  151. :return: 数据长度
  152. """
  153. log.debug(f"{inspect.currentframe().f_code.co_name}, set_id:{set_id}, 获取 <详情> 数据........")
  154. url = "https://wechatapp.ichibankuji.cn/wechat/yfs/firstBoxDetail"
  155. original_data = {
  156. "queueMode": 2,
  157. "setId": f"{set_id}"
  158. }
  159. request_data = create_request_data(original_data)
  160. payload = {
  161. "da": request_data
  162. }
  163. response = requests.post(url, headers=headers, json=payload, timeout=22)
  164. # print(response.text)
  165. response.raise_for_status()
  166. resp_json = response.json().get("data", {})
  167. if not resp_json:
  168. log.debug("Not resp_json, 没有数据........")
  169. return 0
  170. boxNo = resp_json.get("boxNo") # 箱号
  171. box_id = resp_json.get("id")
  172. orderNo = resp_json.get("orderNo")
  173. # price = resp_json.get("price")
  174. # status = resp_json.get("status")
  175. stock = resp_json.get("stock") # 库存
  176. total = resp_json.get("total") # 总抽数
  177. data_dict = {
  178. "set_id": set_id,
  179. "box_no": boxNo,
  180. "box_id": box_id,
  181. "order_no": orderNo,
  182. "stock": stock,
  183. "total": total
  184. }
  185. # print(data_dict)
  186. sql_pool.insert_one_or_dict(table="rhyf_product_detail_record", data=data_dict)
  187. @retry(stop=stop_after_attempt(5), wait=wait_fixed(5), after=after_log)
  188. def get_draw_single_page(log, box_id: int, page=1, sql_pool=None):
  189. """
  190. 获取箱子抽奖数据
  191. :param log: 日志对象
  192. :param box_id: 箱子ID
  193. :param page: 页码
  194. :param sql_pool: 数据库连接池对象
  195. :return: 数据长度
  196. """
  197. log.debug(f"{inspect.currentframe().f_code.co_name}, box_id:{box_id}, 获取 <抽奖> 数据........")
  198. url = "https://wechatapp.ichibankuji.cn/wechat/yfs/getBoxDrawLog"
  199. original_data = {
  200. # "boxId": 84633,
  201. "boxId": box_id,
  202. "pageSize": 30,
  203. "isMine": 0,
  204. "pageNum": page
  205. }
  206. request_data = create_request_data(original_data)
  207. payload = {
  208. "da": request_data
  209. }
  210. response = requests.post(url, headers=headers, json=payload, timeout=22)
  211. # print(response.text)
  212. response.raise_for_status()
  213. resp_json = response.json().get("data", {}).get("list", [])
  214. if not resp_json:
  215. log.debug("Not resp_json, 没有数据........")
  216. return 0
  217. info_list = []
  218. for item in resp_json:
  219. set_id = item.get("setId")
  220. draw_id = item.get("id")
  221. nickName = item.get("nickName")
  222. createTime = item.get("createTime")
  223. awardInfo = item.get("awardInfo", [])
  224. if not awardInfo:
  225. log.debug(f"box_id:{box_id}, Not awardInfo List, 没有数据........")
  226. continue
  227. for award in awardInfo:
  228. award_type = award.get("typeStr") # 奖品等级
  229. award_total = award.get("total") # 奖品数量
  230. data_dict = {
  231. "set_id": set_id,
  232. "box_id": box_id,
  233. "draw_id": draw_id,
  234. "nick_name": nickName,
  235. "create_time": createTime,
  236. "award_type": award_type,
  237. "award_total": award_total
  238. }
  239. # print(data_dict)
  240. info_list.append(data_dict)
  241. # 写入数据库
  242. if info_list:
  243. sql_pool.insert_many(table="rhyf_product_draw_record", data_list=info_list, ignore=True)
  244. return len(info_list)
  245. def get_draw_list(log, box_id: int, sql_pool=None):
  246. """
  247. 获取箱子所有抽奖数据
  248. :param log: 日志对象
  249. :param box_id: 箱子ID
  250. :param sql_pool: 数据库连接池对象
  251. :return:
  252. """
  253. page = 1
  254. while True:
  255. try:
  256. len_data = get_draw_single_page(log, box_id, page, sql_pool)
  257. if len_data < 30:
  258. log.debug(f"当前页为{page}, 数据长度为{len_data} ,数据已经获取完毕........")
  259. break
  260. page += 1
  261. except Exception as e:
  262. log.error(f"Error getting draw single page: {e}")
  263. break
  264. # ----------------------------------------------------------------------------------------------------------------------
  265. @retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
  266. def rhyf_main(log):
  267. """
  268. 主函数 自动售货机
  269. :param log: logger对象
  270. """
  271. log.info(
  272. f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................')
  273. # 配置 MySQL 连接池
  274. sql_pool = MySQLConnectionPool(log=log)
  275. if not sql_pool.check_pool_health():
  276. log.error("数据库连接池异常")
  277. raise RuntimeError("数据库连接池异常")
  278. try:
  279. try:
  280. log.debug('------------------- 获取所有页码商品 -------------------')
  281. get_setlist_all_page(log, sql_pool)
  282. except Exception as e:
  283. log.error(f'get_all_page error: {e}')
  284. # --------------------------------------------------------------------
  285. try:
  286. log.debug('------------------- 获取所有商品详情 -------------------')
  287. sql_set_id_list = sql_pool.select_all("select set_id from rhyf_product_list_record where detail_state = 0")
  288. sql_set_id_list = [item[0] for item in sql_set_id_list]
  289. for set_id in sql_set_id_list:
  290. try:
  291. get_box_detail(log, set_id, sql_pool)
  292. # 更改状态
  293. sql_pool.update_one_or_dict(
  294. table="rhyf_product_list_record",
  295. data={"detail_state": 1},
  296. condition={"set_id": set_id}
  297. )
  298. except Exception as e:
  299. log.error(f'get_box_detail error: {e}')
  300. sql_pool.update_one_or_dict(
  301. table="rhyf_product_list_record",
  302. data={"detail_state": 2},
  303. condition={"set_id": set_id}
  304. )
  305. except Exception as e:
  306. log.error(f'get_box_detail error: {e}')
  307. # ---------------------------------------------------------------------
  308. try:
  309. log.debug('------------------- 获取所有商品抽奖数据 -------------------')
  310. sql_box_id_list = sql_pool.select_all("select box_id from rhyf_product_detail_record where draw_state = 0")
  311. sql_box_id_list = [item[0] for item in sql_box_id_list]
  312. for box_id in sql_box_id_list:
  313. try:
  314. get_draw_list(log, box_id, sql_pool)
  315. # 更改状态
  316. sql_pool.update_one_or_dict(
  317. table="rhyf_product_detail_record",
  318. data={"draw_state": 1},
  319. condition={"box_id": box_id}
  320. )
  321. except Exception as e:
  322. log.error(f'get_draw_list error: {e}')
  323. sql_pool.update_one_or_dict(
  324. table="rhyf_product_detail_record",
  325. data={"draw_state": 2},
  326. condition={"box_id": box_id}
  327. )
  328. except Exception as e:
  329. log.error(f'get_draw_list error: {e}')
  330. except Exception as e:
  331. log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
  332. finally:
  333. log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
  334. if __name__ == '__main__':
  335. # get_set_single_page(logger)
  336. # get_box_detail(logger, 9936)
  337. # get_draw_single_page(logger, 84633)
  338. rhyf_main(logger)