xc_spider.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.10.8
  4. # Date : 2025/7/14 15:16
  5. import datetime
  6. import random
  7. import time
  8. import requests
  9. from get_sign import get_sign
  10. import inspect
  11. import schedule
  12. from loguru import logger
  13. from tenacity import retry, stop_after_attempt, wait_fixed
  14. from mysql_pool import MySQLConnectionPool
  15. logger.remove()
  16. logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
  17. format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
  18. level="DEBUG", retention="7 day")
  19. HEADERS = {
  20. "User-Agent": "Mozilla/5.0 (Linux; Android 11; Pixel 5 Build/RQ3A.211001.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/83.0.4103.106 Mobile Safari/537.36 uni-app Html5Plus/1.0 (Immersed/52.727272)",
  21. "Connection": "Keep-Alive",
  22. "Accept": "application/json",
  23. "Accept-Encoding": "gzip",
  24. "Content-Type": "application/json",
  25. "Cache-Control": "no-cache",
  26. # "sign": "3c5028da758dc416455f575334cadaab",
  27. # "x-token": "782a2174df1a19aa26904dad1d347c97",
  28. "client": "yingyongbao",
  29. "appversion": "2.1.10",
  30. # "nonce": "507060933e1e0585",
  31. "deviceid": "null",
  32. "jrd": "100d85590861f713a85",
  33. # "timestamp": f"{ts}"
  34. }
  35. def after_log(retry_state):
  36. """
  37. retry 回调
  38. :param retry_state: RetryCallState 对象
  39. """
  40. # 检查 args 是否存在且不为空
  41. if retry_state.args and len(retry_state.args) > 0:
  42. log = retry_state.args[0] # 获取传入的 logger
  43. else:
  44. log = logger # 使用全局 logger
  45. if retry_state.outcome.failed:
  46. log.warning(
  47. f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
  48. else:
  49. log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
  50. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  51. def get_proxys(log):
  52. """
  53. 获取代理
  54. :return: 代理
  55. """
  56. tunnel = "x371.kdltps.com:15818"
  57. kdl_username = "t13753103189895"
  58. kdl_password = "o0yefv6z"
  59. try:
  60. proxies = {
  61. "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
  62. "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
  63. }
  64. return proxies
  65. except Exception as e:
  66. log.error(f"Error getting proxy: {e}")
  67. raise e
  68. def transfer_ts(timestamp):
  69. """
  70. 将10位时间戳转换为指定格式的时间字符串
  71. :param timestamp 1730389419
  72. :return formatted_time
  73. """
  74. dt_object = datetime.datetime.fromtimestamp(timestamp)
  75. formatted_time = dt_object.strftime('%Y-%m-%d %H:%M:%S')
  76. return formatted_time
  77. def get_product_list(log, sql_pool, sql_shop_id_list):
  78. page = 1
  79. while page <= 100:
  80. try:
  81. log.debug(f'--------------- page {page} start ---------------')
  82. len_items, total_page = get_product_single_page(log, page, sql_pool, sql_shop_id_list)
  83. except Exception as e:
  84. log.error(f"{inspect.currentframe().f_code.co_name} Request get_product_single_page for page:{page}, {e}")
  85. len_items = 0
  86. total_page = 0
  87. if len_items < 10:
  88. log.debug(f'--------------- page {page} has {len_items} items, break ---------------')
  89. break
  90. if total_page == page:
  91. log.debug(f'--------------- page {page} has {total_page} pages, break ---------------')
  92. break
  93. page += 1
  94. # 设置等待时间 避免查询太频繁
  95. time.sleep(random.uniform(0.5, 1))
  96. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  97. def get_product_single_page(log, page: int, sql_pool, sql_shop_id_list):
  98. url = "https://api.xingchao6.com/AppClient/v1.1/product/list/page"
  99. ts = int((time.time()) * 1000)
  100. data = {
  101. # "page": 1,
  102. "page": page,
  103. "page_size": 10,
  104. "timestamp": ts
  105. }
  106. sign, nonce = get_sign(data, ts)
  107. headers = HEADERS.copy()
  108. headers["sign"] = sign
  109. headers["nonce"] = nonce
  110. headers["timestamp"] = str(ts)
  111. # response = requests.post(url, headers=headers, json=data)
  112. response = requests.post(url, headers=headers, json=data, proxies=get_proxys(log), timeout=10)
  113. # print(response.text)
  114. response.raise_for_status()
  115. if response.json().get("code") == 200:
  116. total_page = response.json().get("data", {}).get("page", {}).get("total_page", 0)
  117. log.debug(f'--------------- page {page} has {total_page} items ---------------')
  118. if total_page > 0:
  119. items = response.json().get("data", {}).get("list", [])
  120. if not items:
  121. log.debug(f'--------------- page {page} has no items ---------------')
  122. return
  123. info_list = []
  124. for item in items:
  125. # log.debug(f'--------------- item: {item} ---------------')
  126. shop_data = item.get("shop", {})
  127. shop_id = shop_data.get("id")
  128. if shop_id and shop_id in sql_shop_id_list:
  129. log.debug(f'--------------- shop_id: {shop_id} is exist, skip ---------------')
  130. continue
  131. shop_name = shop_data.get("name")
  132. data_dict = {
  133. "shop_id": shop_id,
  134. "shop_name": shop_name
  135. }
  136. # print(data_dict)
  137. info_list.append(data_dict)
  138. sql_shop_id_list.append(shop_id)
  139. # 插入数据库
  140. if info_list:
  141. try:
  142. sql_pool.insert_many(table="xingchao_shop_record", data_list=info_list)
  143. except Exception as e:
  144. log.warning(f"{inspect.currentframe().f_code.co_name}, {e[:500]}")
  145. return len(items), total_page
  146. return 0, total_page
  147. else:
  148. log.debug(f'--------------- page {page} error, {response.json().get("message")} ---------------')
  149. return 0, 0
  150. # ----------------------------------------------------------------------------------------------------------------------
  151. def get_shop_product_sold_list(log, shop_id, sql_pool, sql_product_id_list, x_token):
  152. page = 1
  153. while page <= 100:
  154. try:
  155. log.debug(f'--------------- page {page}, shop_id {shop_id} start ---------------')
  156. len_items, total_page = get_shop_product_sold_single_page(log, page, sql_pool, shop_id, sql_product_id_list,
  157. x_token)
  158. except Exception as e:
  159. log.error(
  160. f"{inspect.currentframe().f_code.co_name} Request get_shop_product_sold_single_page for page:{page}, {e}")
  161. len_items = 0
  162. total_page = 0
  163. if len_items < 20:
  164. log.debug(f'--------------- page {page} has {len_items} items, break ---------------')
  165. break
  166. if total_page == page:
  167. log.debug(f'--------------- page {page} has {total_page} pages, break ---------------')
  168. break
  169. page += 1
  170. # 设置等待时间 避免查询太频繁
  171. time.sleep(random.uniform(0.5, 1))
  172. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  173. def get_shop_product_sold_single_page(log, page: int, sql_pool, shop_id, sql_product_id_list, x_token):
  174. url = "https://api.xingchao6.com/AppClient/v1.1/product/list/page"
  175. ts = int((time.time()) * 1000)
  176. data = {
  177. "shop_id": shop_id,
  178. # "shop_id": 53,
  179. "state": 2,
  180. "page": page,
  181. # "page": 1,
  182. "ob_k": "show_time",
  183. "ob_v": 1,
  184. "timestamp": f"{ts}"
  185. }
  186. sign, nonce = get_sign(data, ts)
  187. headers = HEADERS.copy()
  188. headers["sign"] = sign
  189. headers["nonce"] = nonce
  190. headers["timestamp"] = str(ts)
  191. # response = requests.post(url, headers=headers, json=data)
  192. response = requests.post(url, headers=headers, json=data, proxies=get_proxys(log), timeout=10)
  193. # print(response.text)
  194. response.raise_for_status()
  195. if response.json().get("code") == 200:
  196. total_page = response.json().get("data", {}).get("page", {}).get("total_page", 0)
  197. log.debug(f'--------------- page {page} has {total_page} items ---------------')
  198. if total_page > 0:
  199. items = response.json().get("data", {}).get("list", [])
  200. if not items:
  201. log.debug(f'--------------- page {page} has no items ---------------')
  202. return
  203. info_list = []
  204. for item in items:
  205. product_id = item.get("id")
  206. no = item.get("no")
  207. if product_id and product_id in sql_product_id_list:
  208. log.debug(f'--------------- product_id {product_id} has been crawled ---------------')
  209. continue
  210. cate_id = item.get("cate_id")
  211. create_time = item.get("create_time")
  212. title = item.get("title")
  213. subtitle = item.get("subtitle")
  214. spec_config = item.get("spec_config")
  215. spec_total = item.get("spec_total")
  216. sort = item.get("sort")
  217. is_rnd_show = item.get("is_rnd_show")
  218. tag_type = item.get("tag_type")
  219. tag_type_cn = item.get("tag_type_cn")
  220. state = item.get("state")
  221. state_cn = item.get("state_cn")
  222. shop_id = item.get("shop", {}).get("id")
  223. shop_name = item.get("shop", {}).get("name")
  224. is_purchase_limit = item.get("is_purchase_limit")
  225. on_sale_time = item.get("on_sale_time")
  226. if on_sale_time:
  227. on_sale_time = transfer_ts(on_sale_time)
  228. discount_config = item.get("discount_config")
  229. try:
  230. product_detail = get_sold_detail(log, no, x_token)
  231. except Exception as e:
  232. log.error(f"获取商品详情失败: {e}")
  233. product_detail = {}
  234. data_dict = {
  235. "product_id": product_id,
  236. "no": no,
  237. "cate_id": cate_id,
  238. "create_time": create_time,
  239. "title": title,
  240. "subtitle": subtitle,
  241. "img": product_detail["img"],
  242. "price_sale": product_detail["price_sale"],
  243. "total_price": product_detail["total_price"],
  244. "sale_num": product_detail["sale_num"],
  245. "spec_config": spec_config,
  246. "spec_total": spec_total,
  247. "sort": sort,
  248. "is_rnd_show": is_rnd_show,
  249. "tag_type": tag_type,
  250. "tag_type_cn": tag_type_cn,
  251. "state": state,
  252. "state_cn": state_cn,
  253. "shop_id": shop_id,
  254. "shop_name": shop_name,
  255. "is_purchase_limit": is_purchase_limit,
  256. "category": product_detail["category"],
  257. "on_sale_time": on_sale_time,
  258. "end_time": product_detail["end_time"],
  259. "finish_time": product_detail["finish_time"],
  260. "discount_config": str(discount_config),
  261. "spec_config_desc": product_detail["spec_config_desc"],
  262. "content": product_detail["content"],
  263. "brief": product_detail["brief"],
  264. "video_url": product_detail["video_url"]
  265. }
  266. # print(data_dict)
  267. info_list.append(data_dict)
  268. sql_product_id_list.append(product_id)
  269. if info_list:
  270. sql_pool.insert_many(table="xingchao_product_record", data_list=info_list)
  271. return len(items), total_page
  272. return 0, total_page
  273. else:
  274. log.debug(f'--------------- page {page} error, {response.json().get("message")} ---------------')
  275. return 0, 0
  276. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  277. def get_sold_detail(log, no, x_token):
  278. log.debug(f'--------------- get_sold_detail:{no} ---------------')
  279. url = "https://api.xingchao6.com/AppClient/v2/product/detail"
  280. ts = int((time.time()) * 1000)
  281. params = {
  282. # "no": "SN10763635",
  283. "no": no,
  284. "timestamp": f"{ts}"
  285. }
  286. sign, nonce = get_sign(params, ts)
  287. headers = HEADERS.copy()
  288. headers["sign"] = sign
  289. headers["nonce"] = nonce
  290. headers["timestamp"] = str(ts)
  291. # response = requests.get(url, headers=headers, params=params)
  292. response = requests.get(url, headers=headers, params=params, proxies=get_proxys(log), timeout=10)
  293. # print(response.text)
  294. response.raise_for_status()
  295. if response.json().get("code") == 200:
  296. item = response.json().get("data", {})
  297. if not item:
  298. return {}
  299. price_sale = item.get("price_sale")
  300. sale_num = item.get("sale_num")
  301. if price_sale:
  302. results = sale_num * float(price_sale)
  303. total_price = round(results, 2)
  304. else:
  305. total_price = None
  306. defined_config = item.get("defined_config", [])
  307. if defined_config:
  308. brief = defined_config[0].get("brief") # 活动
  309. else:
  310. brief = None
  311. content = item.get("content", "")
  312. # content = html.escape(item.get("content", ""))
  313. # print("content:", content)
  314. try:
  315. video_url = get_video_info(log, no, x_token)
  316. except Exception as e:
  317. log.debug(e)
  318. video_url = None
  319. detail_dict = {
  320. "price_sale": price_sale,
  321. "sale_num": sale_num,
  322. "total_price": str(total_price),
  323. "img": ','.join([url for url_list in item['img'].values() for url in url_list]),
  324. "category": item.get("category").get("name"),
  325. "end_time": transfer_ts(item.get("end_time")),
  326. "finish_time": item.get("finish_time"),
  327. "spec_config_desc": item.get("template", {}).get("spec_config_desc"),
  328. "content": str(content),
  329. "brief": brief,
  330. "video_url": video_url
  331. }
  332. # print('detail_dict:', detail_dict)
  333. return detail_dict
  334. else:
  335. log.debug(
  336. f'-------------{inspect.currentframe().f_code.co_name}-- no {no} error, {response.json().get("message")} ---------------')
  337. return {}
  338. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  339. def get_video_info(log, no, x_token):
  340. log.debug(f'-------------{inspect.currentframe().f_code.co_name}-- no {no} ---------------')
  341. url = "https://api.xingchao6.com/AppClient/v1.1/live/watch"
  342. ts = int((time.time()) * 1000)
  343. params = {
  344. # "product_no": "SN10763635",
  345. "product_no": no,
  346. "timestamp": str(ts)
  347. }
  348. sign, nonce = get_sign(params, ts)
  349. headers = HEADERS.copy()
  350. headers["sign"] = sign
  351. headers["nonce"] = nonce
  352. headers["timestamp"] = str(ts)
  353. headers["x-token"] = x_token
  354. # response = requests.get(url, headers=headers, params=params)
  355. response = requests.get(url, headers=headers, params=params, proxies=get_proxys(log), timeout=10)
  356. # print(response.text)
  357. response.raise_for_status()
  358. if response.json().get("code") == 200:
  359. item = response.json().get("data", {})
  360. if not item:
  361. return None
  362. video_url = item.get("url")
  363. return video_url
  364. else:
  365. log.debug(
  366. f'-------------{inspect.currentframe().f_code.co_name}-- no {no} error, {response.json().get("message")} ---------------')
  367. return None
  368. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  369. def get_player_list(log, no, sql_pool, x_token):
  370. """
  371. 获取玩家列表 type:1 -> 福运签列表 / type:2 -> 用户列表
  372. :param log:
  373. :param sql_pool:
  374. :param no:
  375. :param x_token:
  376. """
  377. log.debug(f'-------------{inspect.currentframe().f_code.co_name}-- get {no} ---------------')
  378. url = "https://api.xingchao6.com/AppClient/v1/product/order/km/list/page"
  379. next_id = None
  380. while True:
  381. ts = int((time.time()) * 1000)
  382. params = {
  383. "type": "2",
  384. # "no": "SN10763635",
  385. "no": no,
  386. "timestamp": str(ts)
  387. }
  388. if next_id:
  389. params["next_id"] = next_id
  390. sign, nonce = get_sign(params, ts)
  391. headers = HEADERS.copy()
  392. headers["sign"] = sign
  393. headers["nonce"] = nonce
  394. headers["timestamp"] = str(ts)
  395. headers["x-token"] = x_token
  396. # response = requests.get(url, headers=headers, params=params)
  397. response = requests.get(url, headers=headers, params=params, proxies=get_proxys(log), timeout=10)
  398. # print('get_player_list:', response.json())
  399. response.raise_for_status()
  400. resp_json = response.json()
  401. # print('get_player_list:',resp_json)
  402. if response.json().get("code") == 500:
  403. log.error(f"{no}商品不存在, 更改状态为2..........")
  404. sql_pool.update_one("UPDATE xingchao_product_record SET player_stats = 2 WHERE no = %s", (no,))
  405. break
  406. data_list = resp_json.get("data", {}).get("list", [])
  407. if not data_list:
  408. logger.debug("data_list is empty, get_player_list end")
  409. # data_list为空 更改状态为3 未上传
  410. sql_pool.update_one("UPDATE xingchao_product_record SET player_stats = 3 WHERE no = %s", (no,))
  411. break
  412. player_data_list = []
  413. for item in data_list:
  414. card_id = item.get("card_id")
  415. card_name = item.get("card_name")
  416. card_set = item.get("card_set")
  417. create_time = item.get("create_time")
  418. km_id = item.get("id")
  419. num = item.get("num")
  420. # ori_id = item.get("ori_id")
  421. ori_id = item.get("ori_template_id")
  422. player = item.get("player")
  423. player_cn = item.get("player_cn")
  424. seq = item.get("seq")
  425. team = item.get("team")
  426. team_cn = item.get("team_cn")
  427. user_id = item.get("user_id")
  428. user_name = item.get("user_name")
  429. # player_data = (
  430. # no, card_id, card_name, card_set, create_time, km_id, num, ori_id, player, player_cn, seq, team,
  431. # team_cn, user_id, user_name
  432. # )
  433. player_dict = {
  434. "no": no,
  435. "card_id": card_id,
  436. "card_name": card_name,
  437. "card_set": card_set,
  438. "create_time": create_time,
  439. "km_id": km_id,
  440. "num": num,
  441. "ori_id": ori_id,
  442. "player": player,
  443. "player_cn": player_cn,
  444. "seq": seq,
  445. "team": team,
  446. "team_cn": team_cn,
  447. "user_id": user_id,
  448. "user_name": user_name
  449. }
  450. # print('player_data:', player_data)
  451. player_data_list.append(player_dict)
  452. try:
  453. sql_pool.insert_many(table="xingchao_player_record", data_list=player_data_list)
  454. except Exception as e:
  455. log.warning(f"保存数据失败:{e[:500]}")
  456. # next_id = data_list[-1].get("id") if data_list else None
  457. next_id = data_list[-1].get("id") if len(data_list) == 20 else None
  458. if not next_id:
  459. logger.debug("next_id is not, get_player_list end")
  460. break
  461. # 设置等待时间 避免查询太频繁
  462. time.sleep(random.uniform(0.5, 1))
  463. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  464. def get_good_report_list():
  465. """
  466. 获得本期好卡
  467. """
  468. url = "https://api.xingchao6.com/AppClient/v1/product/unpackage/list/page"
  469. ts = int((time.time()) * 1000)
  470. params = {
  471. "get_type": "0",
  472. "type": "2",
  473. "no": "SN10763635",
  474. "page_size": "10",
  475. "timestamp": "1752560144500"
  476. }
  477. sign, nonce = get_sign(params, ts)
  478. headers = HEADERS.copy()
  479. headers["sign"] = sign
  480. headers["nonce"] = nonce
  481. headers["timestamp"] = str(ts)
  482. response = requests.get(url, headers=headers, params=params)
  483. print(response.text)
  484. print(response)
  485. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  486. def get_others_report_list(log, no, sql_pool):
  487. """
  488. 其他赠品
  489. """
  490. try:
  491. next_id = None
  492. all_data = []
  493. while True:
  494. url = "https://api.xingchao6.com/AppClient/v1/product/unpackage/list/page"
  495. ts = int((time.time()) * 1000)
  496. params = {
  497. "get_type": "0",
  498. "type": "3",
  499. # "no": "SN10763635",
  500. "no": no,
  501. "page_size": "100",
  502. "timestamp": f"{ts}"
  503. }
  504. if next_id:
  505. params["next_id"] = next_id
  506. sign, nonce = get_sign(params, ts)
  507. headers = HEADERS.copy()
  508. headers["sign"] = sign
  509. headers["nonce"] = nonce
  510. headers["timestamp"] = str(ts)
  511. # response = requests.get(url, headers=headers, params=params)
  512. response = requests.get(url, headers=headers, params=params, proxies=get_proxys(log), timeout=10)
  513. # print(response.text)
  514. response.raise_for_status()
  515. if response.json().get("code") == 500:
  516. log.error(f"{no}商品不存在, 更改状态为2..........")
  517. sql_pool.update_one("UPDATE xingchao_product_record SET report_stats = 2 WHERE no = %s", (no,))
  518. break
  519. data = response.json().get("data", {}).get("data", [])
  520. if not data:
  521. log.debug(f"{no}response.json() data 为空, 更改状态为3.........")
  522. sql_pool.update_one("UPDATE xingchao_product_record SET report_stats = 3 WHERE no = %s", (no,))
  523. break
  524. all_data.extend(data)
  525. # 获取下一页的标识符
  526. next_id = data[-1].get("id")
  527. if len(data) < 100:
  528. log.debug(f"{no} -> len data 长度小于100, 停止查询.........")
  529. break
  530. # 设置等待时间 避免查询太频繁
  531. time.sleep(random.uniform(0.5, 1))
  532. info_list = []
  533. if all_data:
  534. for item in all_data:
  535. card_id = item.get("card_id")
  536. card_name = item.get("card_name")
  537. card_set = item.get("card_set")
  538. create_time = item.get("create_time")
  539. report_id = item.get("id")
  540. imgs = item.get("img")
  541. is_good = item.get("is_good")
  542. lottery_num = item.get("lottery_num")
  543. player = item.get("player")
  544. player_cn = item.get("player_cn")
  545. status = item.get("status")
  546. status_cn = item.get("status_cn")
  547. team = item.get("team")
  548. team_cn = item.get("team_cn")
  549. user_name = item.get("user_name")
  550. # info = (
  551. # no, card_id, card_name, card_set, create_time, report_id, imgs, is_good, lottery_num, player,
  552. # player_cn, status, status_cn, team, team_cn, user_name)
  553. data_dict = {
  554. "no": no,
  555. "card_id": card_id,
  556. "card_name": card_name,
  557. "card_set": card_set,
  558. "create_time": create_time,
  559. "report_id": report_id,
  560. "imgs": imgs,
  561. "is_good": is_good,
  562. "lottery_num": lottery_num,
  563. "player": player,
  564. "player_cn": player_cn,
  565. "status": status,
  566. "status_cn": status_cn,
  567. "team": team,
  568. "team_cn": team_cn,
  569. "user_name": user_name
  570. }
  571. # print(data_dict)
  572. info_list.append(data_dict)
  573. if info_list:
  574. sql_pool.insert_many(table="xingchao_report_record", data_list=info_list)
  575. log.info(f"{no}商品数据采集完成, 更改状态为1..........")
  576. sql_pool.update_one("UPDATE xingchao_product_record SET report_stats = 1 WHERE no = %s", (no,))
  577. except Exception as e:
  578. log.error(e)
  579. @retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
  580. def xc_main(log):
  581. """
  582. 主函数
  583. :param log: logger对象
  584. """
  585. log.info(
  586. f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务....................................................')
  587. # 配置 MySQL 连接池
  588. sql_pool = MySQLConnectionPool(log=log)
  589. if not sql_pool.check_pool_health():
  590. log.error("数据库连接池异常")
  591. raise RuntimeError("数据库连接池异常")
  592. try:
  593. token = sql_pool.select_one("select token from xingchao_token")
  594. x_token = token[0]
  595. logger.debug(f"x_token:{x_token}")
  596. try:
  597. sql_shop_id_list = sql_pool.select_all("select shop_id from xingchao_shop_record")
  598. sql_shop_id_list = [i[0] for i in sql_shop_id_list]
  599. get_product_list(log, sql_pool, sql_shop_id_list)
  600. except Exception as e:
  601. log.error(f"Request shop id error: {e}")
  602. time.sleep(10)
  603. try:
  604. sql_product_id_list = sql_pool.select_all("select product_id from xingchao_product_record")
  605. sql_product_id_list = [i[0] for i in sql_product_id_list]
  606. sql_query_shop_id_list = sql_pool.select_all("select shop_id from xingchao_shop_record")
  607. sql_query_shop_id_list = [i[0] for i in sql_query_shop_id_list]
  608. for shop_id in sql_query_shop_id_list:
  609. try:
  610. get_shop_product_sold_list(log, shop_id, sql_pool, sql_product_id_list, x_token)
  611. except Exception as e:
  612. log.error(e)
  613. except Exception as e:
  614. log.error(f"Request get_shop_product_sold_list error: {e}")
  615. time.sleep(10)
  616. try:
  617. sql_player_list = sql_pool.select_all("select no from xingchao_product_record where player_stats = 0")
  618. sql_player_list = [i[0] for i in sql_player_list]
  619. for noid in sql_player_list:
  620. try:
  621. get_player_list(log, noid, sql_pool, x_token)
  622. # 保存完成 更改状态为1
  623. sql_pool.update_one("UPDATE xingchao_product_record SET player_stats = 1 WHERE no = %s", (noid,))
  624. except Exception as e:
  625. log.error(f"Request get_player_list error: {e}")
  626. sql_pool.update_one("UPDATE xingchao_product_record SET player_stats = 2 WHERE no = %s", (noid,))
  627. except Exception as e:
  628. log.error(f"Request player list error: {e}")
  629. time.sleep(10)
  630. try:
  631. sql_report_list = sql_pool.select_all("select no from xingchao_product_record where report_stats = 0")
  632. sql_report_list = [i[0] for i in sql_report_list]
  633. for noid in sql_report_list:
  634. try:
  635. get_others_report_list(log, noid, sql_pool)
  636. except Exception as e:
  637. log.error(f"Request get_report_list error: {e}")
  638. except Exception as e:
  639. log.error(f"Request get_shop_data_list error: {e}")
  640. except Exception as e:
  641. log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
  642. finally:
  643. log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
  644. def schedule_task():
  645. """
  646. 爬虫模块 定时任务 的启动文件
  647. """
  648. # 立即运行一次任务
  649. # xc_main(log=logger)
  650. # 设置定时任务
  651. schedule.every().day.at("00:01").do(xc_main, log=logger)
  652. while True:
  653. schedule.run_pending()
  654. time.sleep(1)
  655. if __name__ == '__main__':
  656. # get_product_list()
  657. # get_product_single_page(logger, 1, None)
  658. # get_shop_product_sold()
  659. # get_sold_detail()
  660. # get_player_list(logger,'SN10763635',None,'782a2174df1a19aa26904dad1d347c97')
  661. # get_good_report_list()
  662. # get_others_report_list(logger, 'SN10763635')
  663. # get_video()
  664. # xc_main(logger)
  665. schedule_task()