fsz_gu_spider.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.10.8
  4. # Date : 2025/11/11 11:54
  5. import time
  6. import random
  7. import inspect
  8. import requests
  9. from loguru import logger
  10. from datetime import datetime
  11. from mysql_pool import MySQLConnectionPool
  12. from tenacity import retry, stop_after_attempt, wait_fixed
  13. """
  14. 谷赏 只有 feishezhang_reward_list_record 和 feishezhang_reward_order_record 两个表
  15. """
  16. logger.remove()
  17. logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
  18. format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
  19. level="DEBUG", retention="7 day")
  20. headers = {
  21. "authority": "kurabu.feishezhang.com",
  22. "accept": "*/*",
  23. "accept-language": "zh-CN,zh;q=0.9",
  24. "content-type": "application/json",
  25. "referer": "https://servicewechat.com/wxa5880b2d8e8a0f37/17/page-frame.html",
  26. "sec-fetch-dest": "empty",
  27. "sec-fetch-mode": "cors",
  28. "sec-fetch-site": "cross-site",
  29. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI MiniProgramEnv/Windows WindowsWechat/WMPF WindowsWechat(0x63090c33) XWEB/9129",
  30. "xweb_xhr": "1"
  31. }
  32. token = "fb899abaca52caa9932dc4d7efc1dec8"
  33. crawl_category = "谷赏"
  34. def after_log(retry_state):
  35. """
  36. retry 回调
  37. :param retry_state: RetryCallState 对象
  38. """
  39. # 检查 args 是否存在且不为空
  40. if retry_state.args and len(retry_state.args) > 0:
  41. log = retry_state.args[0] # 获取传入的 logger
  42. else:
  43. log = logger # 使用全局 logger
  44. if retry_state.outcome.failed:
  45. log.warning(
  46. f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
  47. else:
  48. log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
  49. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  50. def get_proxys(log):
  51. """
  52. 获取代理
  53. :return: 代理
  54. """
  55. tunnel = "x371.kdltps.com:15818"
  56. kdl_username = "t13753103189895"
  57. kdl_password = "o0yefv6z"
  58. try:
  59. proxies = {
  60. "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
  61. "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
  62. }
  63. return proxies
  64. except Exception as e:
  65. log.error(f"Error getting proxy: {e}")
  66. raise e
  67. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  68. def get_reward_single_page(log, page, sql_pool):
  69. """
  70. 获取 赏品 单个页面数据
  71. :param log: 日志对象
  72. :param page: 页码
  73. :param sql_pool: 数据库连接池对象
  74. :return: len(data_list) -> 获取数据长度
  75. """
  76. log.debug(f"{inspect.currentframe().f_code.co_name} 开始获取第{page}页数据")
  77. url = "https://kurabu.feishezhang.com/api.php"
  78. params = {
  79. "s": "prize/search",
  80. "system_type": "default",
  81. "application": "app",
  82. "application_client_type": "weixin",
  83. "token": token,
  84. "uuid": "7601421a-dc03-4370-b7c7-c17ac84d72eb",
  85. "ajax": "ajax"
  86. }
  87. data = {
  88. "categoryIds": "",
  89. "keyword": None,
  90. "page": page
  91. }
  92. # data = json.dumps(data, separators=(',', ':'))
  93. response = requests.post(url, headers=headers, params=params, json=data)
  94. response.raise_for_status()
  95. resp_json = response.json()
  96. if resp_json['code'] == 0:
  97. data_list = resp_json.get('data', {}).get('data', [])
  98. info_list = []
  99. for cdata in data_list:
  100. reward_pool_id = cdata.get('id') # 赏池id
  101. title = cdata.get('title')
  102. simple_desc = cdata.get('simple_desc')
  103. show_image = cdata.get('show_image')
  104. total_sessions = cdata.get('spec') # 总箱数
  105. price = cdata.get('price')
  106. # status = cdata.get('status')
  107. # reward_type = cdata.get('type')
  108. sales_count = cdata.get('sales_count')
  109. sale_time_int = cdata.get('add_time') # 开售时间
  110. sale_time = datetime.fromtimestamp(sale_time_int).strftime('%Y-%m-%d %H:%M:%S') if sale_time_int != 0 else 0
  111. order_time_int = cdata.get('upd_time') # 下单时间
  112. order_time = datetime.fromtimestamp(order_time_int).strftime(
  113. '%Y-%m-%d %H:%M:%S') if order_time_int != 0 else 0
  114. category_id = cdata.get('category_id')
  115. data_dict = {
  116. "reward_pool_id": reward_pool_id,
  117. "title": title,
  118. "simple_desc": simple_desc,
  119. "show_image": show_image,
  120. "total_sessions": total_sessions,
  121. "price": price,
  122. # "status": status,
  123. # "reward_type": reward_type,
  124. "sales_count": sales_count,
  125. "sale_time": sale_time,
  126. "order_time": order_time,
  127. "category_id": category_id,
  128. "crawl_category": crawl_category
  129. }
  130. # print(data_dict)
  131. info_list.append(data_dict)
  132. sql_pool.insert_many(table="feishezhang_reward_list_record", data_list=info_list, ignore=True)
  133. return len(data_list)
  134. else:
  135. log.warning(resp_json['msg'])
  136. return 0
  137. def get_reward_list(log, sql_pool):
  138. """
  139. 获取 赏品 列表
  140. :param log: 日志对象
  141. :param sql_pool: 数据库连接池对象
  142. """
  143. log.debug(f"{inspect.currentframe().f_code.co_name} 开始获取列表数据")
  144. page = 1
  145. max_page = 1000
  146. while page <= max_page:
  147. len_data_list = get_reward_single_page(log, page, sql_pool)
  148. if len_data_list < 20:
  149. log.debug(f"当前页数据不足20条,已结束获取数据,当前页数:{page}")
  150. break
  151. page += 1
  152. time.sleep(random.uniform(1, 2))
  153. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  154. def get_reward_order_single_page(log, reward_pool_id, sql_pool, page=1):
  155. """
  156. 获取 赏池 订单列表
  157. :param log: 日志对象
  158. :param reward_pool_id: 赏池id
  159. :param page: 页码
  160. :param sql_pool: 数据库连接池对象
  161. :return: len(data_list) -> 获取数据长度
  162. """
  163. log.debug(f"{inspect.currentframe().f_code.co_name} 开始获取第 1 页数据")
  164. url = "https://kurabu.feishezhang.com/api.php"
  165. params = {
  166. "s": "prize/GetLotteryRecord",
  167. "system_type": "default",
  168. "application": "app",
  169. "application_client_type": "weixin",
  170. "token": token,
  171. "uuid": "7601421a-dc03-4370-b7c7-c17ac84d72eb",
  172. "ajax": "ajax",
  173. "page": str(page),
  174. # "page": '1',
  175. "prizePoolId": reward_pool_id
  176. }
  177. response = requests.get(url, headers=headers, params=params)
  178. response.raise_for_status()
  179. resp_json = response.json()
  180. if resp_json['code'] == 0:
  181. data_list = resp_json.get('data', {}).get('data', [])
  182. # print(data_list)
  183. if not data_list:
  184. log.debug(f"当前页数据为空,已结束获取数据,当前页数:{page}")
  185. return 0
  186. info_list = []
  187. for cdata in data_list:
  188. # order_id = cdata.get('id')
  189. order_id = cdata.get('order_id')
  190. goods_id = cdata.get('goods_id')
  191. draw_time = cdata.get('add_time')
  192. # level_nickname = cdata.get('level_nickname')
  193. user_id = cdata.get('user_id')
  194. images = cdata.get('images')
  195. title = cdata.get('title')
  196. nickname = cdata.get('nickname')
  197. price = cdata.get('price')
  198. buy_number = cdata.get('buy_number')
  199. data_dict = {
  200. "reward_pool_id": reward_pool_id,
  201. "order_id": order_id,
  202. "goods_id": goods_id,
  203. "draw_time": draw_time,
  204. # "level_nickname": level_nickname,
  205. "user_id": user_id,
  206. "images": images,
  207. "title": title,
  208. "nickname": nickname,
  209. "price": price,
  210. "buy_number": buy_number,
  211. "crawl_category": crawl_category
  212. }
  213. # print(data_dict)
  214. info_list.append(data_dict)
  215. sql_pool.insert_many(table="feishezhang_reward_order_record", data_list=info_list, ignore=True)
  216. return len(data_list)
  217. else:
  218. log.warning(resp_json['msg'])
  219. return 0
  220. @retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
  221. def fsz_gu_main(log):
  222. """
  223. 主函数
  224. :param log: logger对象
  225. """
  226. log.info(
  227. f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................')
  228. # 配置 MySQL 连接池
  229. sql_pool = MySQLConnectionPool(log=log)
  230. if not sql_pool.check_pool_health():
  231. log.error("数据库连接池异常")
  232. raise RuntimeError("数据库连接池异常")
  233. try:
  234. # 获取赏池列表数据
  235. # try:
  236. # get_reward_list(log, sql_pool)
  237. # except Exception as e:
  238. # log.error(f'get_reward_list -> error: {e}')
  239. # 获取订单列表数据
  240. sql_order_list = sql_pool.select_all(
  241. f"select id, reward_pool_id from feishezhang_reward_list_record where order_state != 1 and crawl_category = '{crawl_category}'")
  242. # f"select id, reward_pool_id from feishezhang_reward_list_record where order_state = 0 and crawl_category = '{crawl_category}'")
  243. for sql_order in sql_order_list:
  244. sql_id = sql_order[0]
  245. reward_pool_id = sql_order[1]
  246. log.info(f"开始处理数据:{reward_pool_id}")
  247. try:
  248. get_reward_order_single_page(log, reward_pool_id, sql_pool)
  249. sql_pool.update_one(
  250. "update feishezhang_reward_list_record set order_state = 1 where id = %s", (sql_id,)
  251. )
  252. except Exception as e:
  253. log.error(f'get_reward_order_single_page -> error: {e}')
  254. sql_pool.update_one(
  255. "update feishezhang_reward_list_record set order_state = 2 where id = %s", (sql_id,)
  256. )
  257. except Exception as e:
  258. log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
  259. finally:
  260. log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
  261. if __name__ == '__main__':
  262. # get_reward_list(logger, None)
  263. # get_reward_order_list(logger, "2876", None)
  264. # get_reward_order_single_page(logger, "2373", None)
  265. fsz_gu_main(logger)