fsz_reward_spider.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.10.8
  4. # Date : 2025/11/11 11:54
  5. import time
  6. import random
  7. import inspect
  8. import requests
  9. from loguru import logger
  10. from datetime import datetime
  11. from mysql_pool import MySQLConnectionPool
  12. from tenacity import retry, stop_after_attempt, wait_fixed
  13. logger.remove()
  14. logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
  15. format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
  16. level="DEBUG", retention="7 day")
  17. headers = {
  18. "authority": "kurabu.feishezhang.com",
  19. "accept": "*/*",
  20. "accept-language": "zh-CN,zh;q=0.9",
  21. "content-type": "application/json",
  22. "referer": "https://servicewechat.com/wxa5880b2d8e8a0f37/17/page-frame.html",
  23. "sec-fetch-dest": "empty",
  24. "sec-fetch-mode": "cors",
  25. "sec-fetch-site": "cross-site",
  26. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x63090c33) XWEB/9129",
  27. "xweb_xhr": "1"
  28. }
  29. token = "26eed48c80bad053717687daef3a8668"
  30. crawl_category = "一番赏"
  31. def after_log(retry_state):
  32. """
  33. retry 回调
  34. :param retry_state: RetryCallState 对象
  35. """
  36. # 检查 args 是否存在且不为空
  37. if retry_state.args and len(retry_state.args) > 0:
  38. log = retry_state.args[0] # 获取传入的 logger
  39. else:
  40. log = logger # 使用全局 logger
  41. if retry_state.outcome.failed:
  42. log.warning(
  43. f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
  44. else:
  45. log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
  46. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  47. def get_proxys(log):
  48. """
  49. 获取代理
  50. :return: 代理
  51. """
  52. tunnel = "x371.kdltps.com:15818"
  53. kdl_username = "t13753103189895"
  54. kdl_password = "o0yefv6z"
  55. try:
  56. proxies = {
  57. "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
  58. "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
  59. }
  60. return proxies
  61. except Exception as e:
  62. log.error(f"Error getting proxy: {e}")
  63. raise e
  64. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  65. def get_reward_single_page(log, page, sql_pool, sql_reward_list):
  66. """
  67. 获取 赏池 单个页面数据
  68. :param log: 日志对象
  69. :param page: 页码
  70. :param sql_pool: 数据库连接池对象
  71. :param sql_reward_list: sql_reward_list
  72. :return: len(data_list) -> 获取数据长度
  73. """
  74. log.debug(f"{inspect.currentframe().f_code.co_name} 开始获取第{page}页数据")
  75. url = "https://kurabu.feishezhang.com/api.php"
  76. params = {
  77. "s": "reward/index",
  78. "system_type": "default",
  79. "application": "app",
  80. "application_client_type": "weixin",
  81. "token": token,
  82. "uuid": "227de1d4-c429-4e73-b972-42c26560ed83",
  83. "ajax": "ajax"
  84. }
  85. data = {
  86. # "page": 1,
  87. "page": page,
  88. "type": 0
  89. }
  90. # data = json.dumps(data, separators=(',', ':'))
  91. response = requests.post(url, headers=headers, params=params, json=data)
  92. response.raise_for_status()
  93. resp_json = response.json()
  94. if resp_json['code'] == 0:
  95. data_list = resp_json.get('data', {}).get('data', [])
  96. info_list = []
  97. for cdata in data_list:
  98. reward_pool_id = cdata.get('id') # 赏池id
  99. title = cdata.get('title')
  100. simple_desc = cdata.get('simple_desc')
  101. show_image = cdata.get('show_image')
  102. total_sessions = cdata.get('total_sessions') # 总箱数
  103. price = cdata.get('price_per_draw')
  104. status = cdata.get('status')
  105. reward_type = cdata.get('type')
  106. sales_count = cdata.get('sales_count')
  107. # 判断是否在库中
  108. if reward_pool_id in sql_reward_list:
  109. log.debug(f"{inspect.currentframe().f_code.co_name} 赏池ID: {reward_pool_id} 已存在")
  110. sql_pool.update_one_or_dict(table="feishezhang_reward_list_record",
  111. data={"total_sessions": total_sessions, "price": price,
  112. "sales_count": sales_count},
  113. condition={"reward_pool_id": reward_pool_id})
  114. continue
  115. sale_time_int = cdata.get('sale_time') # 开售时间
  116. sale_time = datetime.fromtimestamp(sale_time_int).strftime('%Y-%m-%d %H:%M:%S') if sale_time_int != 0 else 0
  117. order_time_int = cdata.get('order_time') # 下单时间
  118. order_time = datetime.fromtimestamp(order_time_int).strftime(
  119. '%Y-%m-%d %H:%M:%S') if order_time_int != 0 else 0
  120. data_dict = {
  121. "reward_pool_id": reward_pool_id,
  122. "title": title,
  123. "simple_desc": simple_desc,
  124. "show_image": show_image,
  125. "total_sessions": total_sessions,
  126. "price": price,
  127. "status": status,
  128. "reward_type": reward_type,
  129. "sales_count": sales_count,
  130. "sale_time": sale_time,
  131. "order_time": order_time,
  132. "crawl_category": crawl_category
  133. }
  134. # print(data_dict)
  135. info_list.append(data_dict)
  136. # 保存数据
  137. if info_list:
  138. sql_pool.insert_many(table="feishezhang_reward_list_record", data_list=info_list, ignore=True)
  139. return len(data_list)
  140. else:
  141. log.warning(resp_json['msg'])
  142. return 0
  143. def get_reward_list(log, sql_pool, sql_reward_list):
  144. """
  145. 获取 赏池 列表
  146. :param log: 日志对象
  147. :param sql_pool: 数据库连接池对象
  148. :param sql_reward_list: sql_reward_list
  149. """
  150. log.debug(f"{inspect.currentframe().f_code.co_name} 开始获取列表数据")
  151. page = 1
  152. max_page = 1000
  153. while page <= max_page:
  154. len_data_list = get_reward_single_page(log, page, sql_pool, sql_reward_list)
  155. if len_data_list < 15:
  156. log.debug(f"当前页数据不足15条,已结束获取数据,当前页数:{page}")
  157. break
  158. page += 1
  159. time.sleep(random.uniform(1, 2))
  160. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  161. def get_reward_detail(log, reward_pool_id, sql_pool):
  162. """
  163. 获取 赏池 详情
  164. :param log: 日志对象
  165. :param reward_pool_id: 赏池id
  166. :param sql_pool: 数据库连接池对象
  167. """
  168. log.debug(f"{inspect.currentframe().f_code.co_name} 开始获取详情数据")
  169. url = "https://kurabu.feishezhang.com/api.php"
  170. params = {
  171. "s": "reward/GetRandomAvailableSession",
  172. "system_type": "default",
  173. "application": "app",
  174. "application_client_type": "weixin",
  175. "token": token,
  176. "uuid": "227de1d4-c429-4e73-b972-42c26560ed83",
  177. "ajax": "ajax"
  178. }
  179. data = {
  180. # "reward_pool_id": "3cd3fa34-b0b7-4b1b-a593-411191fc745f",
  181. "reward_pool_id": reward_pool_id,
  182. "session_id": ""
  183. }
  184. # data = json.dumps(data, separators=(',', ':'))
  185. response = requests.post(url, headers=headers, params=params, json=data)
  186. response.raise_for_status()
  187. resp_json = response.json()
  188. # print(resp_json)
  189. if resp_json['code'] == 0:
  190. session_data = resp_json.get('data', {}).get('session', {})
  191. session_id = session_data.get('id')
  192. prize_items = resp_json.get('data', {}).get('prize_items', [])
  193. info_list = []
  194. for item in prize_items:
  195. prize_id = item.get('id')
  196. prize_name = item.get('name')
  197. level_nickname = item.get('level_nickname')
  198. parameter = item.get('parameter')
  199. prize_image = item.get('image')
  200. prize_num = item.get('num')
  201. goods_ids = item.get('goods_ids', [])
  202. goods_ids = ','.join(map(str, goods_ids)) if goods_ids else None
  203. prize_price = item.get('reference_price')
  204. prize_inventory = item.get('inventory') # 剩余库存
  205. data_dict = {
  206. "reward_pool_id": reward_pool_id,
  207. "session_id": session_id, # int
  208. "prize_id": prize_id,
  209. "prize_name": prize_name,
  210. "level_nickname": level_nickname,
  211. "parameter": parameter,
  212. "prize_image": prize_image,
  213. "prize_num": prize_num,
  214. "goods_ids": goods_ids,
  215. "prize_price": prize_price,
  216. "prize_inventory": prize_inventory,
  217. "crawl_category": crawl_category
  218. }
  219. # print(data_dict)
  220. info_list.append(data_dict)
  221. sql_pool.insert_many(table="feishezhang_reward_detail_record", data_list=info_list, ignore=True)
  222. else:
  223. log.warning(resp_json['msg'])
  224. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  225. def get_reward_order_single_page(log, reward_pool_id, session_id, page, sql_pool):
  226. """
  227. 获取 赏池 订单列表
  228. :param log: 日志对象
  229. :param reward_pool_id: 赏池id
  230. :param session_id: 订单id
  231. :param page: 页码
  232. :param sql_pool: 数据库连接池对象
  233. :return: len(data_list) -> 获取数据长度
  234. """
  235. log.debug(f"{inspect.currentframe().f_code.co_name} 开始获取第{page}页数据, session_id:{session_id}")
  236. url = "https://kurabu.feishezhang.com/api.php"
  237. params = {
  238. "s": "reward/MiniappGetCurrentSessionDrawRecords",
  239. "system_type": "default",
  240. "application": "app",
  241. "application_client_type": "weixin",
  242. "token": token,
  243. "uuid": "227de1d4-c429-4e73-b972-42c26560ed83",
  244. "ajax": "ajax",
  245. # "session_id": "3065",
  246. "session_id": session_id,
  247. "record_page": str(page)
  248. # "record_page": "1"
  249. }
  250. response = requests.get(url, headers=headers, params=params)
  251. response.raise_for_status()
  252. resp_json = response.json()
  253. if resp_json['code'] == 0:
  254. data_list = resp_json.get('data', [])
  255. if not data_list:
  256. log.debug(f"当前页数据为空,已结束获取数据,当前页数:{page}, session_id:{session_id}")
  257. return 0
  258. info_list = []
  259. for cdata in data_list:
  260. order_id = cdata.get('id')
  261. goods_id = cdata.get('goods_id')
  262. draw_time = cdata.get('draw_time')
  263. level_nickname = cdata.get('level_nickname')
  264. user_id = cdata.get('user_id')
  265. images = cdata.get('images')
  266. title = cdata.get('title')
  267. nickname = cdata.get('nickname')
  268. data_dict = {
  269. "reward_pool_id": reward_pool_id,
  270. "session_id": session_id,
  271. "order_id": order_id,
  272. "goods_id": goods_id,
  273. "draw_time": draw_time,
  274. "level_nickname": level_nickname,
  275. "user_id": user_id,
  276. "images": images,
  277. "title": title,
  278. "nickname": nickname,
  279. "crawl_category": crawl_category
  280. }
  281. # print(data_dict)
  282. info_list.append(data_dict)
  283. sql_pool.insert_many(table="feishezhang_reward_order_record", data_list=info_list, ignore=True)
  284. return len(data_list)
  285. else:
  286. log.warning(resp_json['msg'])
  287. return 0
  288. def get_reward_order_list(log, reward_pool_id, total_sessions, sql_pool):
  289. """
  290. 获取 赏池 订单列表
  291. :param log: 日志对象
  292. :param reward_pool_id: 赏池id
  293. :param total_sessions: 订单总箱数
  294. :param sql_pool: 数据库连接池对象
  295. """
  296. log.debug(f"{inspect.currentframe().f_code.co_name} 开始获取数据")
  297. session_id_list = get_all_boxes(log, reward_pool_id, total_sessions)
  298. for session_id in session_id_list:
  299. log.debug(f"{inspect.currentframe().f_code.co_name} 获取第{session_id}的订单数据")
  300. page = 1
  301. max_page = 1000
  302. while page <= max_page:
  303. try:
  304. len_data_list = get_reward_order_single_page(log, reward_pool_id, session_id, page, sql_pool)
  305. except Exception as e:
  306. log.error(e)
  307. len_data_list = 0
  308. if len_data_list < 20:
  309. log.debug(f"当前页数据不足20条,已结束获取数据,当前页数:{page}")
  310. break
  311. page += 1
  312. # time.sleep(random.uniform(1, 2))
  313. def get_all_boxes(log, reward_pool_id, total_sessions):
  314. url = "https://kurabu.feishezhang.com/api.php"
  315. params = {
  316. "s": "reward/MiniappGetPreviousAvailableSession",
  317. "system_type": "default",
  318. "application": "app",
  319. "application_client_type": "weixin",
  320. # "token": "26eed48c80bad053717687daef3a8668",
  321. "token": token,
  322. "uuid": "0b2f5448-a8ca-4dbe-9bac-0c49a10d9873",
  323. "ajax": "ajax"
  324. }
  325. session_id_list = []
  326. for i in range(1, total_sessions + 1):
  327. log.debug(f"{inspect.currentframe().f_code.co_name} 获取reward_pool_id:{reward_pool_id}, 第{i}箱赏")
  328. data = {
  329. "reward_pool_id": reward_pool_id,
  330. # "reward_pool_id": "65dc0347-63b5-41de-8bb8-811958a3b6c8",
  331. # "current_session_num": 2
  332. "current_session_num": i
  333. }
  334. response = requests.post(url, headers=headers, params=params, json=data)
  335. response.raise_for_status()
  336. resp_json = response.json()
  337. if resp_json['code'] == 0:
  338. session_id = resp_json.get('data', {}).get('id')
  339. session_id_list.append(session_id)
  340. else:
  341. log.warning(f"{resp_json['msg']}")
  342. session_id_list = list(set(session_id_list))
  343. # print(session_id_list)
  344. return session_id_list
  345. @retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
  346. def fs_yifan_main(log):
  347. """
  348. 主函数
  349. :param log: logger对象
  350. """
  351. log.info(
  352. f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................')
  353. # 配置 MySQL 连接池
  354. sql_pool = MySQLConnectionPool(log=log)
  355. if not sql_pool.check_pool_health():
  356. log.error("数据库连接池异常")
  357. raise RuntimeError("数据库连接池异常")
  358. try:
  359. # 获取赏池列表数据
  360. try:
  361. sql_reward_list = sql_pool.select_all(
  362. f"select distinct reward_pool_id from feishezhang_reward_list_record where crawl_category = '{crawl_category}'")
  363. sql_reward_list = [item[0] for item in sql_reward_list]
  364. get_reward_list(log, sql_pool, sql_reward_list)
  365. except Exception as e:
  366. log.error(f'get_reward_list -> error: {e}')
  367. # # 获取赏列表数据
  368. # sql_detail_list = sql_pool.select_all(
  369. # f"select id, reward_pool_id from feishezhang_reward_list_record where detail_state = 0 and crawl_category = '{crawl_category}'")
  370. # for sql_detail in sql_detail_list:
  371. # sql_id = sql_detail[0]
  372. # reward_pool_id = sql_detail[1]
  373. # log.info(f"开始处理数据:{reward_pool_id}")
  374. # try:
  375. # get_reward_detail(log, reward_pool_id, sql_pool)
  376. # sql_pool.update_one(
  377. # "update feishezhang_reward_list_record set detail_state = 1 where id = %s", (sql_id,))
  378. # except Exception as e:
  379. # log.error(f'get_reward_detail -> error: {e}')
  380. # sql_pool.update_one(
  381. # "update feishezhang_reward_list_record set detail_state = 2 where id = %s", (sql_id,))
  382. #
  383. # time.sleep(random.uniform(0.1, 1))
  384. # 获取订单列表数据
  385. sql_order_list = sql_pool.select_all(
  386. f"select reward_pool_id, total_sessions from feishezhang_reward_list_record where yi_order_state = 0 and crawl_category = '{crawl_category}'")
  387. for sql_order in sql_order_list:
  388. reward_pool_id = sql_order[0]
  389. total_sessions = sql_order[1]
  390. log.info(f"开始处理数据, reward_pool_id:{reward_pool_id}, total_sessions:{total_sessions}")
  391. try:
  392. get_reward_order_list(log, reward_pool_id, total_sessions, sql_pool)
  393. sql_pool.update_one(
  394. "update feishezhang_reward_list_record set yi_order_state = 1 where reward_pool_id = %s",
  395. (reward_pool_id,)
  396. )
  397. except Exception as e:
  398. log.error(f'get_reward_order_list -> error: {e}')
  399. sql_pool.update_one(
  400. "update feishezhang_reward_list_record set yi_order_state = 2 where reward_pool_id = %s",
  401. (reward_pool_id,)
  402. )
  403. except Exception as e:
  404. log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
  405. finally:
  406. log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
  407. if __name__ == '__main__':
  408. # get_login()
  409. # get_reward_list(logger)
  410. # get_reward_detail(logger, "3cd3fa34-b0b7-4b1b-a593-411191fc745f", None)
  411. # get_reward_order_list(logger, "3cd3fa34-b0b7-4b1b-a593-411191fc745f", "3065", None)
  412. # get_all_boxes(logger, "c058b102-8ea6-48c2-9195-1216b010f579", 3)
  413. fs_yifan_main(logger)