xc_spider.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.10.8
  4. # Date : 2025/7/14 15:16
  5. import datetime
  6. import random
  7. import time
  8. import requests
  9. from get_sign import get_sign
  10. import inspect
  11. import schedule
  12. from loguru import logger
  13. from tenacity import retry, stop_after_attempt, wait_fixed
  14. from mysql_pool import MySQLConnectionPool
  15. logger.remove()
  16. logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
  17. format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
  18. level="DEBUG", retention="7 day")
  19. HEADERS = {
  20. "User-Agent": "Mozilla/5.0 (Linux; Android 11; Pixel 5 Build/RQ3A.211001.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/83.0.4103.106 Mobile Safari/537.36 uni-app Html5Plus/1.0 (Immersed/52.727272)",
  21. "Connection": "Keep-Alive",
  22. "Accept": "application/json",
  23. "Accept-Encoding": "gzip",
  24. "Content-Type": "application/json",
  25. "Cache-Control": "no-cache",
  26. # "sign": "3c5028da758dc416455f575334cadaab",
  27. # "x-token": "782a2174df1a19aa26904dad1d347c97",
  28. "client": "yingyongbao",
  29. "appversion": "2.1.10",
  30. # "nonce": "507060933e1e0585",
  31. "deviceid": "null",
  32. "jrd": "100d85590861f713a85",
  33. # "timestamp": f"{ts}"
  34. }
  35. def after_log(retry_state):
  36. """
  37. retry 回调
  38. :param retry_state: RetryCallState 对象
  39. """
  40. # 检查 args 是否存在且不为空
  41. if retry_state.args and len(retry_state.args) > 0:
  42. log = retry_state.args[0] # 获取传入的 logger
  43. else:
  44. log = logger # 使用全局 logger
  45. if retry_state.outcome.failed:
  46. log.warning(
  47. f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
  48. else:
  49. log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
  50. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  51. def get_proxys(log):
  52. """
  53. 获取代理
  54. :return: 代理
  55. """
  56. tunnel = "x371.kdltps.com:15818"
  57. kdl_username = "t13753103189895"
  58. kdl_password = "o0yefv6z"
  59. try:
  60. proxies = {
  61. "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
  62. "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
  63. }
  64. return proxies
  65. except Exception as e:
  66. log.error(f"Error getting proxy: {e}")
  67. raise e
  68. def transfer_ts(timestamp):
  69. """
  70. 将10位时间戳转换为指定格式的时间字符串
  71. :param timestamp 1730389419
  72. :return formatted_time
  73. """
  74. dt_object = datetime.datetime.fromtimestamp(timestamp)
  75. formatted_time = dt_object.strftime('%Y-%m-%d %H:%M:%S')
  76. return formatted_time
  77. def get_product_list(log, sql_pool, sql_shop_id_list):
  78. page = 1
  79. while page <= 100:
  80. try:
  81. log.debug(f'--------------- page {page} start ---------------')
  82. len_items, total_page = get_product_single_page(log, page, sql_pool, sql_shop_id_list)
  83. except Exception as e:
  84. log.error(f"{inspect.currentframe().f_code.co_name} Request get_product_single_page for page:{page}, {e}")
  85. len_items = 0
  86. total_page = 0
  87. if len_items < 10:
  88. log.debug(f'--------------- page {page} has {len_items} items, break ---------------')
  89. break
  90. if total_page == page:
  91. log.debug(f'--------------- page {page} has {total_page} pages, break ---------------')
  92. break
  93. page += 1
  94. # 设置等待时间 避免查询太频繁
  95. time.sleep(random.uniform(0.5, 1))
  96. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  97. def get_product_single_page(log, page: int, sql_pool, sql_shop_id_list):
  98. url = "https://api.xingchao6.com/AppClient/v1.1/product/list/page"
  99. ts = int((time.time()) * 1000)
  100. data = {
  101. # "page": 1,
  102. "page": page,
  103. "page_size": 10,
  104. "timestamp": ts
  105. }
  106. sign, nonce = get_sign(data, ts)
  107. headers = HEADERS.copy()
  108. headers["sign"] = sign
  109. headers["nonce"] = nonce
  110. headers["timestamp"] = str(ts)
  111. # response = requests.post(url, headers=headers, json=data)
  112. response = requests.post(url, headers=headers, json=data, proxies=get_proxys(log), timeout=10)
  113. # print(response.text)
  114. response.raise_for_status()
  115. if response.json().get("code") == 200:
  116. total_page = response.json().get("data", {}).get("page", {}).get("total_page", 0)
  117. log.debug(f'--------------- page {page} has {total_page} items ---------------')
  118. if total_page > 0:
  119. items = response.json().get("data", {}).get("list", [])
  120. if not items:
  121. log.debug(f'--------------- page {page} has no items ---------------')
  122. return
  123. info_list = []
  124. for item in items:
  125. # log.debug(f'--------------- item: {item} ---------------')
  126. shop_data = item.get("shop", {})
  127. shop_id = shop_data.get("id")
  128. if shop_id and shop_id in sql_shop_id_list:
  129. log.debug(f'--------------- shop_id: {shop_id} is exist, skip ---------------')
  130. continue
  131. shop_name = shop_data.get("name")
  132. data_dict = {
  133. "shop_id": shop_id,
  134. "shop_name": shop_name
  135. }
  136. # print(data_dict)
  137. info_list.append(data_dict)
  138. sql_shop_id_list.append(shop_id)
  139. # 插入数据库
  140. if info_list:
  141. try:
  142. sql_pool.insert_many(table="xingchao_shop_record", data_list=info_list)
  143. except Exception as e:
  144. log.warning(f"{inspect.currentframe().f_code.co_name}, {e[:500]}")
  145. return len(items), total_page
  146. return 0, total_page
  147. else:
  148. log.debug(f'--------------- page {page} error, {response.json().get("message")} ---------------')
  149. return 0, 0
  150. # ----------------------------------------------------------------------------------------------------------------------
  151. def get_shop_product_sold_list(log, shop_id, sql_pool, sql_product_id_list, x_token):
  152. page = 1
  153. while page <= 100:
  154. try:
  155. log.debug(f'--------------- page {page}, shop_id {shop_id} start ---------------')
  156. len_items, total_page = get_shop_product_sold_single_page(log, page, sql_pool, shop_id, sql_product_id_list,
  157. x_token)
  158. except Exception as e:
  159. log.error(
  160. f"{inspect.currentframe().f_code.co_name} Request get_shop_product_sold_single_page for page:{page}, {e}")
  161. len_items = 0
  162. total_page = 0
  163. if len_items < 20:
  164. log.debug(f'--------------- page {page} has {len_items} items, break ---------------')
  165. break
  166. if total_page == page:
  167. log.debug(f'--------------- page {page} has {total_page} pages, break ---------------')
  168. break
  169. page += 1
  170. # 设置等待时间 避免查询太频繁
  171. time.sleep(random.uniform(0.5, 1))
  172. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  173. def get_shop_product_sold_single_page(log, page: int, sql_pool, shop_id, sql_product_id_list, x_token):
  174. url = "https://api.xingchao6.com/AppClient/v1.1/product/list/page"
  175. ts = int((time.time()) * 1000)
  176. data = {
  177. "shop_id": shop_id,
  178. # "shop_id": 53,
  179. "state": 2,
  180. "page": page,
  181. # "page": 1,
  182. "ob_k": "show_time",
  183. "ob_v": 1,
  184. "timestamp": f"{ts}"
  185. }
  186. sign, nonce = get_sign(data, ts)
  187. headers = HEADERS.copy()
  188. headers["sign"] = sign
  189. headers["nonce"] = nonce
  190. headers["timestamp"] = str(ts)
  191. # response = requests.post(url, headers=headers, json=data)
  192. response = requests.post(url, headers=headers, json=data, proxies=get_proxys(log), timeout=10)
  193. # print(response.text)
  194. response.raise_for_status()
  195. if response.json().get("code") == 200:
  196. total_page = response.json().get("data", {}).get("page", {}).get("total_page", 0)
  197. log.debug(f'--------------- page {page} has {total_page} items ---------------')
  198. if total_page > 0:
  199. items = response.json().get("data", {}).get("list", [])
  200. if not items:
  201. log.debug(f'--------------- page {page} has no items ---------------')
  202. return
  203. info_list = []
  204. for item in items:
  205. product_id = item.get("id")
  206. no = item.get("no")
  207. if product_id and product_id in sql_product_id_list:
  208. log.debug(f'--------------- product_id {product_id} has been crawled ---------------')
  209. continue
  210. cate_id = item.get("cate_id")
  211. create_time = item.get("create_time")
  212. title = item.get("title")
  213. subtitle = item.get("subtitle")
  214. spec_config = item.get("spec_config")
  215. spec_total = item.get("spec_total")
  216. sort = item.get("sort")
  217. is_rnd_show = item.get("is_rnd_show")
  218. tag_type = item.get("tag_type")
  219. tag_type_cn = item.get("tag_type_cn")
  220. state = item.get("state")
  221. state_cn = item.get("state_cn")
  222. shop_id = item.get("shop", {}).get("id")
  223. shop_name = item.get("shop", {}).get("name")
  224. is_purchase_limit = item.get("is_purchase_limit")
  225. on_sale_time = item.get("on_sale_time")
  226. if on_sale_time:
  227. on_sale_time = transfer_ts(on_sale_time)
  228. discount_config = item.get("discount_config")
  229. try:
  230. product_detail = get_sold_detail(log, no, x_token)
  231. except Exception as e:
  232. log.error(f"获取商品详情失败: {e}")
  233. product_detail = {}
  234. data_dict = {
  235. "product_id": product_id,
  236. "no": no,
  237. "cate_id": cate_id,
  238. "create_time": create_time,
  239. "title": title,
  240. "subtitle": subtitle,
  241. "img": product_detail["img"],
  242. "price_sale": product_detail["price_sale"],
  243. "total_price": product_detail["total_price"],
  244. "sale_num": product_detail["sale_num"],
  245. "spec_config": spec_config,
  246. "spec_total": spec_total,
  247. "sort": sort,
  248. "is_rnd_show": is_rnd_show,
  249. "tag_type": tag_type,
  250. "tag_type_cn": tag_type_cn,
  251. "state": state,
  252. "state_cn": state_cn,
  253. "shop_id": shop_id,
  254. "shop_name": shop_name,
  255. "is_purchase_limit": is_purchase_limit,
  256. "category": product_detail["category"],
  257. "on_sale_time": on_sale_time,
  258. "end_time": product_detail["end_time"],
  259. "finish_time": product_detail["finish_time"],
  260. "discount_config": str(discount_config),
  261. "spec_config_desc": product_detail["spec_config_desc"],
  262. "content": product_detail["content"],
  263. "brief": product_detail["brief"],
  264. "video_url": product_detail["video_url"]
  265. }
  266. # print(data_dict)
  267. info_list.append(data_dict)
  268. sql_product_id_list.append(product_id)
  269. if info_list:
  270. sql_pool.insert_many(table="xingchao_product_record", data_list=info_list)
  271. return len(items), total_page
  272. return 0, total_page
  273. else:
  274. log.debug(f'--------------- page {page} error, {response.json().get("message")} ---------------')
  275. return 0, 0
  276. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  277. def get_sold_detail(log, no, x_token):
  278. log.debug(f'--------------- get_sold_detail:{no} ---------------')
  279. url = "https://api.xingchao6.com/AppClient/v2/product/detail"
  280. ts = int((time.time()) * 1000)
  281. params = {
  282. # "no": "SN10763635",
  283. "no": no,
  284. "timestamp": f"{ts}"
  285. }
  286. sign, nonce = get_sign(params, ts)
  287. headers = HEADERS.copy()
  288. headers["sign"] = sign
  289. headers["nonce"] = nonce
  290. headers["timestamp"] = str(ts)
  291. # response = requests.get(url, headers=headers, params=params)
  292. response = requests.get(url, headers=headers, params=params, proxies=get_proxys(log), timeout=10)
  293. # print(response.text)
  294. response.raise_for_status()
  295. if response.json().get("code") == 200:
  296. item = response.json().get("data", {})
  297. if not item:
  298. return {}
  299. price_sale = item.get("price_sale")
  300. sale_num = item.get("sale_num")
  301. if price_sale:
  302. results = sale_num * float(price_sale)
  303. total_price = round(results, 2)
  304. else:
  305. total_price = None
  306. defined_config = item.get("defined_config", [])
  307. if defined_config:
  308. brief = defined_config[0].get("brief") # 活动
  309. else:
  310. brief = None
  311. content = item.get("content", "")
  312. # content = html.escape(item.get("content", ""))
  313. # print("content:", content)
  314. try:
  315. video_url = get_video_info(log, no, x_token)
  316. except Exception as e:
  317. log.debug(e)
  318. video_url = None
  319. detail_dict = {
  320. "price_sale": price_sale,
  321. "sale_num": sale_num,
  322. "total_price": str(total_price),
  323. "img": ','.join([url for url_list in item['img'].values() for url in url_list]),
  324. "category": item.get("category").get("name"),
  325. "end_time": transfer_ts(item.get("end_time")),
  326. "finish_time": item.get("finish_time"),
  327. "spec_config_desc": item.get("template", {}).get("spec_config_desc"),
  328. "content": str(content),
  329. "brief": brief,
  330. "video_url": video_url
  331. }
  332. # print('detail_dict:', detail_dict)
  333. return detail_dict
  334. else:
  335. log.debug(
  336. f'-------------{inspect.currentframe().f_code.co_name}-- no {no} error, {response.json().get("message")} ---------------')
  337. return {}
  338. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  339. def get_video_info(log, no, x_token):
  340. log.debug(f'-------------{inspect.currentframe().f_code.co_name}-- no {no} ---------------')
  341. url = "https://api.xingchao6.com/AppClient/v1.1/live/watch"
  342. ts = int((time.time()) * 1000)
  343. params = {
  344. # "product_no": "SN10763635",
  345. "product_no": no,
  346. "timestamp": str(ts)
  347. }
  348. sign, nonce = get_sign(params, ts)
  349. headers = HEADERS.copy()
  350. headers["sign"] = sign
  351. headers["nonce"] = nonce
  352. headers["timestamp"] = str(ts)
  353. headers["x-token"] = x_token
  354. # response = requests.get(url, headers=headers, params=params)
  355. response = requests.get(url, headers=headers, params=params, proxies=get_proxys(log), timeout=10)
  356. # print(response.text)
  357. response.raise_for_status()
  358. if response.json().get("code") == 200:
  359. item = response.json().get("data", {})
  360. if not item:
  361. return None
  362. video_url = item.get("url")
  363. return video_url
  364. else:
  365. log.debug(
  366. f'-------------{inspect.currentframe().f_code.co_name}-- no {no} error, {response.json().get("message")} ---------------')
  367. return None
  368. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  369. def get_player_list(log, no, sql_pool, x_token):
  370. """
  371. 获取玩家列表 type:1 -> 福运签列表 / type:2 -> 用户列表
  372. :param log:
  373. :param sql_pool:
  374. :param no:
  375. :param x_token:
  376. :return: True表示成功获取并保存数据,False表示商品不存在或数据为空(状态已内部处理)
  377. """
  378. log.debug(f'-------------{inspect.currentframe().f_code.co_name}-- get {no} ---------------')
  379. url = "https://api.xingchao6.com/AppClient/v1/product/order/km/list/page"
  380. next_id = None
  381. while True:
  382. ts = int((time.time()) * 1000)
  383. params = {
  384. "type": "2",
  385. # "no": "SN10763635",
  386. "no": no,
  387. "timestamp": str(ts)
  388. }
  389. if next_id:
  390. params["next_id"] = next_id
  391. sign, nonce = get_sign(params, ts)
  392. headers = HEADERS.copy()
  393. headers["sign"] = sign
  394. headers["nonce"] = nonce
  395. headers["timestamp"] = str(ts)
  396. headers["x-token"] = x_token
  397. # response = requests.get(url, headers=headers, params=params)
  398. response = requests.get(url, headers=headers, params=params, proxies=get_proxys(log), timeout=10)
  399. # print('get_player_list:', response.json())
  400. response.raise_for_status()
  401. resp_json = response.json()
  402. # print('get_player_list:',resp_json)
  403. if response.json().get("code") == 500:
  404. log.error(f"{no}商品不存在, 更改状态为2..........")
  405. sql_pool.update_one("UPDATE xingchao_product_record SET player_stats = 2 WHERE no = %s", (no,))
  406. return False
  407. data_list = resp_json.get("data", {}).get("list", [])
  408. if not data_list:
  409. logger.debug("data_list is empty, get_player_list end")
  410. # data_list为空 更改状态为3 未上传
  411. sql_pool.update_one("UPDATE xingchao_product_record SET player_stats = 3 WHERE no = %s", (no,))
  412. return False
  413. player_data_list = []
  414. for item in data_list:
  415. card_id = item.get("card_id")
  416. card_name = item.get("card_name")
  417. card_set = item.get("card_set")
  418. create_time = item.get("create_time")
  419. km_id = item.get("id")
  420. num = item.get("num")
  421. # ori_id = item.get("ori_id")
  422. ori_id = item.get("ori_template_id")
  423. player = item.get("player")
  424. player_cn = item.get("player_cn")
  425. seq = item.get("seq")
  426. team = item.get("team")
  427. team_cn = item.get("team_cn")
  428. user_id = item.get("user_id")
  429. user_name = item.get("user_name")
  430. # player_data = (
  431. # no, card_id, card_name, card_set, create_time, km_id, num, ori_id, player, player_cn, seq, team,
  432. # team_cn, user_id, user_name
  433. # )
  434. player_dict = {
  435. "no": no,
  436. "card_id": card_id,
  437. "card_name": card_name,
  438. "card_set": card_set,
  439. "create_time": create_time,
  440. "km_id": km_id,
  441. "num": num,
  442. "ori_id": ori_id,
  443. "player": player,
  444. "player_cn": player_cn,
  445. "seq": seq,
  446. "team": team,
  447. "team_cn": team_cn,
  448. "user_id": user_id,
  449. "user_name": user_name
  450. }
  451. # print('player_data:', player_data)
  452. player_data_list.append(player_dict)
  453. try:
  454. sql_pool.insert_many(table="xingchao_player_record", data_list=player_data_list)
  455. except Exception as e:
  456. log.warning(f"保存数据失败:{e[:500]}")
  457. # next_id = data_list[-1].get("id") if data_list else None
  458. next_id = data_list[-1].get("id") if len(data_list) == 20 else None
  459. if not next_id:
  460. logger.debug("next_id is not, get_player_list end")
  461. return True
  462. # 设置等待时间 避免查询太频繁
  463. time.sleep(random.uniform(0.5, 1))
  464. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  465. def get_good_report_list():
  466. """
  467. 获得本期好卡
  468. """
  469. url = "https://api.xingchao6.com/AppClient/v1/product/unpackage/list/page"
  470. ts = int((time.time()) * 1000)
  471. params = {
  472. "get_type": "0",
  473. "type": "2",
  474. "no": "SN10763635",
  475. "page_size": "10",
  476. "timestamp": "1752560144500"
  477. }
  478. sign, nonce = get_sign(params, ts)
  479. headers = HEADERS.copy()
  480. headers["sign"] = sign
  481. headers["nonce"] = nonce
  482. headers["timestamp"] = str(ts)
  483. response = requests.get(url, headers=headers, params=params)
  484. print(response.text)
  485. print(response)
  486. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  487. def get_others_report_list(log, no, sql_pool):
  488. """
  489. 其他赠品
  490. """
  491. try:
  492. next_id = None
  493. all_data = []
  494. while True:
  495. url = "https://api.xingchao6.com/AppClient/v1/product/unpackage/list/page"
  496. ts = int((time.time()) * 1000)
  497. params = {
  498. "get_type": "0",
  499. "type": "3",
  500. # "no": "SN10763635",
  501. "no": no,
  502. "page_size": "100",
  503. "timestamp": f"{ts}"
  504. }
  505. if next_id:
  506. params["next_id"] = next_id
  507. sign, nonce = get_sign(params, ts)
  508. headers = HEADERS.copy()
  509. headers["sign"] = sign
  510. headers["nonce"] = nonce
  511. headers["timestamp"] = str(ts)
  512. # response = requests.get(url, headers=headers, params=params)
  513. response = requests.get(url, headers=headers, params=params, proxies=get_proxys(log), timeout=10)
  514. # print(response.text)
  515. response.raise_for_status()
  516. if response.json().get("code") == 500:
  517. log.error(f"{no}商品不存在, 更改状态为2..........")
  518. sql_pool.update_one("UPDATE xingchao_product_record SET report_stats = 2 WHERE no = %s", (no,))
  519. break
  520. data = response.json().get("data", {}).get("data", [])
  521. if not data:
  522. log.debug(f"{no}response.json() data 为空, 更改状态为3.........")
  523. sql_pool.update_one("UPDATE xingchao_product_record SET report_stats = 3 WHERE no = %s", (no,))
  524. break
  525. all_data.extend(data)
  526. # 获取下一页的标识符
  527. next_id = data[-1].get("id")
  528. if len(data) < 100:
  529. log.debug(f"{no} -> len data 长度小于100, 停止查询.........")
  530. break
  531. # 设置等待时间 避免查询太频繁
  532. time.sleep(random.uniform(0.5, 1))
  533. info_list = []
  534. if all_data:
  535. for item in all_data:
  536. card_id = item.get("card_id")
  537. card_name = item.get("card_name")
  538. card_set = item.get("card_set")
  539. create_time = item.get("create_time")
  540. report_id = item.get("id")
  541. imgs = item.get("img")
  542. is_good = item.get("is_good")
  543. lottery_num = item.get("lottery_num")
  544. player = item.get("player")
  545. player_cn = item.get("player_cn")
  546. status = item.get("status")
  547. status_cn = item.get("status_cn")
  548. team = item.get("team")
  549. team_cn = item.get("team_cn")
  550. user_name = item.get("user_name")
  551. # info = (
  552. # no, card_id, card_name, card_set, create_time, report_id, imgs, is_good, lottery_num, player,
  553. # player_cn, status, status_cn, team, team_cn, user_name)
  554. data_dict = {
  555. "no": no,
  556. "card_id": card_id,
  557. "card_name": card_name,
  558. "card_set": card_set,
  559. "create_time": create_time,
  560. "report_id": report_id,
  561. "imgs": imgs,
  562. "is_good": is_good,
  563. "lottery_num": lottery_num,
  564. "player": player,
  565. "player_cn": player_cn,
  566. "status": status,
  567. "status_cn": status_cn,
  568. "team": team,
  569. "team_cn": team_cn,
  570. "user_name": user_name
  571. }
  572. # print(data_dict)
  573. info_list.append(data_dict)
  574. if info_list:
  575. sql_pool.insert_many(table="xingchao_report_record", data_list=info_list)
  576. log.info(f"{no}商品数据采集完成, 更改状态为1..........")
  577. sql_pool.update_one("UPDATE xingchao_product_record SET report_stats = 1 WHERE no = %s", (no,))
  578. except Exception as e:
  579. log.error(e)
  580. @retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
  581. def xc_main(log):
  582. """
  583. 主函数
  584. :param log: logger对象
  585. """
  586. log.info(
  587. f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................')
  588. # 配置 MySQL 连接池
  589. sql_pool = MySQLConnectionPool(log=log)
  590. if not sql_pool.check_pool_health():
  591. log.error("数据库连接池异常")
  592. raise RuntimeError("数据库连接池异常")
  593. try:
  594. token = sql_pool.select_one("select token from xingchao_token")
  595. x_token = token[0]
  596. logger.debug(f"x_token:{x_token}")
  597. try:
  598. sql_shop_id_list = sql_pool.select_all("select shop_id from xingchao_shop_record")
  599. sql_shop_id_list = [i[0] for i in sql_shop_id_list]
  600. get_product_list(log, sql_pool, sql_shop_id_list)
  601. except Exception as e:
  602. log.error(f"Request shop id error: {e}")
  603. time.sleep(10)
  604. try:
  605. sql_product_id_list = sql_pool.select_all("select product_id from xingchao_product_record")
  606. sql_product_id_list = [i[0] for i in sql_product_id_list]
  607. sql_query_shop_id_list = sql_pool.select_all("select shop_id from xingchao_shop_record")
  608. sql_query_shop_id_list = [i[0] for i in sql_query_shop_id_list]
  609. for shop_id in sql_query_shop_id_list:
  610. try:
  611. get_shop_product_sold_list(log, shop_id, sql_pool, sql_product_id_list, x_token)
  612. except Exception as e:
  613. log.error(e)
  614. except Exception as e:
  615. log.error(f"Request get_shop_product_sold_list error: {e}")
  616. time.sleep(10)
  617. try:
  618. sql_player_list = sql_pool.select_all("select no from xingchao_product_record where player_stats = 0")
  619. sql_player_list = [i[0] for i in sql_player_list]
  620. for noid in sql_player_list:
  621. try:
  622. success = get_player_list(log, noid, sql_pool, x_token)
  623. # 只有成功获取并保存数据时才更改状态为1
  624. if success:
  625. sql_pool.update_one("UPDATE xingchao_product_record SET player_stats = 1 WHERE no = %s", (noid,))
  626. except Exception as e:
  627. log.error(f"Request get_player_list error: {e}")
  628. # 异常情况(包括token过期等)改为状态2
  629. sql_pool.update_one("UPDATE xingchao_product_record SET player_stats = 2 WHERE no = %s", (noid,))
  630. except Exception as e:
  631. log.error(f"Request player list error: {e}")
  632. time.sleep(10)
  633. try:
  634. sql_report_list = sql_pool.select_all("select no from xingchao_product_record where report_stats = 0")
  635. sql_report_list = [i[0] for i in sql_report_list]
  636. for noid in sql_report_list:
  637. try:
  638. get_others_report_list(log, noid, sql_pool)
  639. except Exception as e:
  640. log.error(f"Request get_report_list error: {e}")
  641. except Exception as e:
  642. log.error(f"Request get_shop_data_list error: {e}")
  643. except Exception as e:
  644. log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
  645. finally:
  646. log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
  647. def schedule_task():
  648. """
  649. 爬虫模块 定时任务 的启动文件
  650. """
  651. # 立即运行一次任务
  652. xc_main(log=logger)
  653. # 设置定时任务
  654. schedule.every().day.at("00:01").do(xc_main, log=logger)
  655. while True:
  656. schedule.run_pending()
  657. time.sleep(1)
  658. if __name__ == '__main__':
  659. # get_product_list()
  660. # get_product_single_page(logger, 1, None)
  661. # get_shop_product_sold()
  662. # get_sold_detail()
  663. # get_player_list(logger,'SN10763635',None,'782a2174df1a19aa26904dad1d347c97')
  664. # get_good_report_list()
  665. # get_others_report_list(logger, 'SN10763635')
  666. # get_video()
  667. # xc_main(logger)
  668. schedule_task()