jian_pokemon_card_spider.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.10.8
  4. # Date : 2025/8/26 10:47
  5. import datetime
  6. import inspect
  7. import random
  8. import time
  9. import requests
  10. import user_agent
  11. from loguru import logger
  12. from parsel import Selector
  13. from urllib.parse import quote
  14. from tenacity import retry, stop_after_attempt, wait_fixed
  15. from mysql_pool import MySQLConnectionPool
  16. from wx_pokemon_aes_tool import pokemon_aes_encrypt, pokemon_aes_decrypt, api_sign
  17. max_page = 1000
  18. crawler_language = "简中"
  19. logger.remove()
  20. logger.add("./logs/jian_{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
  21. format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
  22. level="DEBUG", retention="7 day")
  23. def after_log(retry_state):
  24. """
  25. retry 回调
  26. :param retry_state: RetryCallState 对象
  27. """
  28. # 检查 args 是否存在且不为空
  29. if retry_state.args and len(retry_state.args) > 0:
  30. log = retry_state.args[0] # 获取传入的 logger
  31. else:
  32. log = logger # 使用全局 logger
  33. if retry_state.outcome.failed:
  34. log.warning(
  35. f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
  36. else:
  37. log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")
  38. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  39. def get_proxys(log):
  40. """
  41. 获取代理
  42. :return: 代理
  43. """
  44. tunnel = "x371.kdltps.com:15818"
  45. kdl_username = "t13753103189895"
  46. kdl_password = "o0yefv6z"
  47. try:
  48. proxies = {
  49. "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel},
  50. "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": kdl_username, "pwd": kdl_password, "proxy": tunnel}
  51. }
  52. return proxies
  53. except Exception as e:
  54. log.error(f"Error getting proxy: {e}")
  55. raise e
  56. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  57. def get_parent_single_page(log, page, sql_pool):
  58. """
  59. 单页请求
  60. :param log:
  61. :param page:
  62. :param sql_pool:
  63. :return: decrypted_res -> 解密后的数据
  64. """
  65. log.debug(f'Request {inspect.currentframe().f_code.co_name} for page: {page} .................')
  66. par = {"pageNum": str(page), "pageSize": "20"}
  67. sign_result = api_sign(
  68. timeout=1221,
  69. user_token="",
  70. params=par
  71. )
  72. req_data = pokemon_aes_encrypt(sign_result["secretJsonParams"])
  73. headers = {
  74. "Content-Type": "application/json",
  75. "User-Agent": user_agent.generate_user_agent(),
  76. "Api-Access-Token": "",
  77. "Nonce": str(sign_result["nonce"]),
  78. "Signature": sign_result["signature"],
  79. "Timestamp": sign_result["timestamp"],
  80. }
  81. url = "https://app-api.pokemon-tcg.cn/app-api/v1/app/commodity/queryParent"
  82. data = {
  83. "encryptionBodyParams": req_data
  84. }
  85. response = requests.post(url, headers=headers, json=data)
  86. # print(response.text)
  87. response.raise_for_status()
  88. decrypted_res = pokemon_aes_decrypt(response.text)
  89. # print(decrypted_res)
  90. return decrypted_res
  91. def parse_parent_list(log, list_data, sql_pool):
  92. """
  93. 解析 parent 数据
  94. :param log:
  95. :param list_data:
  96. :param sql_pool:
  97. :return:
  98. """
  99. if not list_data:
  100. log.error(f"{inspect.currentframe().f_code.co_name} list_data is None")
  101. return
  102. # info_list = []
  103. for item in list_data:
  104. parent_id = item.get("id")
  105. expansion_img = item.get("imageUrl")
  106. expansion_series = item.get("name")
  107. # 20260327 增加系列过滤 '周边'
  108. if "周边" in expansion_series:
  109. log.debug(f"{inspect.currentframe().f_code.co_name}过滤系列: {expansion_series}")
  110. continue
  111. # data_dict = {
  112. # "parent_id": parent_id,
  113. # "expansion_series": expansion_series,
  114. # "expansion_img": expansion_img,
  115. # # "crawler_language": crawler_language
  116. # }
  117. # print(data_dict)
  118. # info_list.append(data_dict)
  119. try:
  120. log.debug(f"{inspect.currentframe().f_code.co_name} Request -> get_child_list, parent_id: {parent_id}")
  121. get_child_list(log, sql_pool, parent_id, expansion_series, expansion_img)
  122. except Exception as e:
  123. log.error(f"Error parsing child list: {e}")
  124. def get_parent_list(log, sql_pool):
  125. """
  126. 分页获取所有数据,支持多种停止条件
  127. :param log:
  128. :param sql_pool:
  129. """
  130. all_data = []
  131. page = 1
  132. total_fetched = 0 # 已获取的总记录数
  133. total_expected = None # 从第一次响应中获取 total
  134. while page <= max_page:
  135. try:
  136. result = get_parent_single_page(log, page, sql_pool)
  137. if result.get("code") != 0:
  138. log.error(f"请求失败: {result.get('message')}")
  139. break
  140. data = result.get("data", {})
  141. list_data = data.get("list", [])
  142. # 解析数据
  143. parse_parent_list(log, list_data, sql_pool)
  144. total = data.get("total")
  145. pages = data.get("pages")
  146. # has_next_page = data.get("hasNextPage", True) # 默认为 True
  147. # is_last_page = data.get("isLastPage", False) # 接口中该字段有问题
  148. # 记录总条数(首次获取)
  149. if total_expected is None:
  150. total_expected = total
  151. log.info(f"总条数: {total_expected}")
  152. # 停止条件判断
  153. if (
  154. len(list_data) < 20 or # 没有数据
  155. # is_last_page or # API 明确表示是最后一页
  156. (pages and page >= pages) or # 当前页 >= 总页数
  157. (total_expected and total_fetched + len(list_data) >= total_expected) # 已取完所有数据
  158. ):
  159. log.info(f"停止翻页,当前页: {page}, 已获取: {total_fetched + len(list_data)} 条")
  160. break
  161. # 添加到结果
  162. all_data.extend(list_data)
  163. total_fetched += len(list_data)
  164. log.info(f"第 {page} 页获取 {len(list_data)} 条,累计 {total_fetched} 条")
  165. page += 1
  166. except Exception as e:
  167. log.error(f"第 {page} 页请求异常: {e}")
  168. break
  169. log.info(f"共获取 {len(all_data)} 条数据")
  170. # return all_data
  171. # ---------------------------------------------------------------------------------------------------------------------
  172. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  173. def get_child_single_page(log, page, parentId):
  174. """
  175. 获取获取 每个父系列的 子系列 单页数据
  176. :param log:
  177. :param page:
  178. :param parentId: 查询的父系列id
  179. :return:
  180. """
  181. log.debug(f'Request {inspect.currentframe().f_code.co_name} for page: {page} .................')
  182. par = {'pageNum': str(page), 'pageSize': '20', 'parentId': str(parentId)}
  183. sign_result = api_sign(
  184. timeout=1221,
  185. user_token="",
  186. params=par
  187. )
  188. req_data = pokemon_aes_encrypt(sign_result["secretJsonParams"])
  189. headers = {
  190. "Content-Type": "application/json",
  191. "User-Agent": user_agent.generate_user_agent(),
  192. "Api-Access-Token": "",
  193. "Nonce": str(sign_result["nonce"]),
  194. "Signature": sign_result["signature"],
  195. "Timestamp": sign_result["timestamp"],
  196. }
  197. url = "https://app-api.pokemon-tcg.cn/app-api/v1/app/commodity/queryByParentId"
  198. data = {
  199. "encryptionBodyParams": req_data
  200. }
  201. response = requests.post(url, headers=headers, json=data)
  202. # print(response.text)
  203. response.raise_for_status()
  204. decrypted_res = pokemon_aes_decrypt(response.text)
  205. # print(decrypted_res)
  206. return decrypted_res
  207. def parse_child_list(log, list_data, sql_pool, parentId, expansion_series, expansion_img):
  208. """
  209. 解析 child 数据
  210. :param log:
  211. :param list_data:
  212. :param sql_pool:
  213. :param parentId:
  214. :param expansion_series:
  215. :param expansion_img:
  216. :return:
  217. """
  218. if not list_data:
  219. log.error(f"{inspect.currentframe().f_code.co_name} list_data is None")
  220. return
  221. info_list = []
  222. for item in list_data:
  223. child_id = item.get("id")
  224. commodityCode = item.get("commodityCode") # 商品编码
  225. # series = item.get("series")
  226. imageUrl = item.get("imageUrl")
  227. child_name = item.get("name")
  228. salesDate = item.get("salesDate")
  229. description_html = item.get("description")
  230. selector = Selector(description_html)
  231. description_list = selector.xpath('//p/text()').getall()
  232. description = '\n'.join(description_list)
  233. data_dict = {
  234. "parent_id": parentId,
  235. "expansion_series": expansion_series,
  236. "expansion_img": expansion_img,
  237. "child_id": child_id,
  238. "child_name": child_name,
  239. "commodity_code": commodityCode,
  240. # "series": series,
  241. "child_image_url": imageUrl,
  242. "sales_date": salesDate,
  243. "description": description,
  244. "crawler_language": crawler_language
  245. }
  246. # print(data_dict)
  247. info_list.append(data_dict)
  248. if info_list:
  249. sql_pool.insert_many(table="pokemon_jianz_category", data_list=info_list, ignore=True)
  250. def get_child_list(log, sql_pool, parent_id, expansion_series, expansion_img):
  251. """
  252. 获取 每个父系列的 子系列 列表
  253. :param log:
  254. :param sql_pool:
  255. :param parent_id:
  256. :param expansion_series:
  257. :param expansion_img:
  258. :return:
  259. """
  260. all_data = []
  261. page = 1
  262. total_fetched = 0
  263. total_expected = None
  264. while page <= max_page:
  265. try:
  266. result = get_child_single_page(log, page, parent_id)
  267. if result.get("code") != 0:
  268. log.error(f"请求失败: {result.get('message')}")
  269. break
  270. data = result.get("data", {})
  271. list_data = data.get("list", [])
  272. parse_child_list(log, list_data, sql_pool, parent_id, expansion_series, expansion_img)
  273. total = data.get("total")
  274. pages = data.get("pages")
  275. # has_next_page = data.get("hasNextPage", True)
  276. # is_last_page = data.get("isLastPage", False)
  277. # 记录总条数(首次获取)
  278. if total_expected is None:
  279. total_expected = total
  280. log.info(f"总条数: {total_expected}")
  281. if (
  282. len(list_data) < 20 or
  283. # is_last_page or
  284. (pages and page >= pages) or
  285. (total_expected and total_fetched + len(list_data) >= total_expected)
  286. ):
  287. log.info(f"停止翻页,当前页: {page}, 已获取: {total_fetched + len(list_data)} 条")
  288. break
  289. all_data.extend(list_data)
  290. total_fetched += len(list_data)
  291. log.info(f"第 {page} 页获取 {len(list_data)} 条,累计 {total_fetched} 条")
  292. page += 1
  293. except Exception as e:
  294. log.error(f"第 {page} 页请求异常: {e}")
  295. break
  296. log.info(f"共获取 {len(all_data)} 条数据")
  297. # ---------------------------------------------------------------------------------------------------------------------
  298. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  299. def get_series_single_page(log, item_tuple, page):
  300. """
  301. 获取 每个子系列的 列表页 单页请求
  302. :param log:
  303. :param item_tuple:
  304. :param page:
  305. :return:
  306. """
  307. child_id = item_tuple[0]
  308. child_name = item_tuple[1]
  309. commodity_code = item_tuple[2]
  310. sales_date = item_tuple[3]
  311. # print(child_id, child_name, commodity_code, sales_date)
  312. log.debug(f'Request {inspect.currentframe().f_code.co_name} for ID: {child_id}, page: {page} .................')
  313. par = {"banCardFlag": "0", "commodityIds": str(child_id), "commoditySelectedList": [
  314. {"id": str(child_id), "commodityName": child_name, "commodityCode": commodity_code, "salesDate": sales_date}],
  315. "pageNum": str(page), "pageSize": "50"}
  316. # print( par)
  317. # test_string = '{"banCardFlag":"0","commodityIds":"279","commoditySelectedList":[{"id":"279","commodityName":"收集啦151 惊","commodityCode":"151C3","salesDate":"2025-07-18"}],"pageNum":"8","pageSize":"50"}9124711756448767584fWS21MVyxkYwEoCIAHieg7Tqn0jPl3GzQvRsDJcb'
  318. # 字符串:{"banCardFlag":"0","commodityIds":"279","commoditySelectedList":[{"id":"279","commodityName":"收集啦151 惊","commodityCode":"151C3","salesDate":"2025-07-18"}],"pageNum":"8","pageSize":"50"}9124711756448767584fWS21MVyxkYwEoCIAHieg7Tqn0jPl3GzQvRsDJcb
  319. sign_result = api_sign(
  320. timeout=1221,
  321. user_token="",
  322. params=par,
  323. need_md5=True
  324. )
  325. # print(sign_result)
  326. # print(type(sign_result))
  327. # req_data = pokemon_aes_encrypt(sign_result["secretJsonParams"])
  328. req_data = pokemon_aes_encrypt(par)
  329. # print(req_data)
  330. headers = {
  331. "Content-Type": "application/json",
  332. "User-Agent": user_agent.generate_user_agent(),
  333. "Api-Access-Token": "",
  334. "Nonce": str(sign_result["nonce"]),
  335. "Signature": sign_result["signature"],
  336. "Timestamp": sign_result["timestamp"],
  337. }
  338. url = "https://app-api.pokemon-tcg.cn/app-api/v1/app/card/query"
  339. data = {
  340. "encryptionBodyParams": req_data
  341. }
  342. response = requests.post(url, headers=headers, json=data)
  343. # print(response.text)
  344. response.raise_for_status()
  345. decrypted_res = pokemon_aes_decrypt(response.text)
  346. # print(decrypted_res)
  347. return decrypted_res
  348. def parse_series(log, list_data, sql_pool, item_tuple):
  349. """
  350. 解析 子系列 详情 数据
  351. :param log:
  352. :param list_data:
  353. :param sql_pool:
  354. :param item_tuple:
  355. :return:
  356. """
  357. if not list_data:
  358. log.error(f"{inspect.currentframe().f_code.co_name} list_data is None")
  359. return
  360. # info_list = []
  361. for item in list_data:
  362. detail_id = item.get("id") # int
  363. # yoren_code = item.get("yorenCode") # 洋文代码
  364. # cdnImgUrl = item.get("cdnImgUrl")
  365. # cardType = item.get("cardType")
  366. # nameSamePokemonId = item.get("nameSamePokemonId") # 口袋妖怪id int
  367. # data_dict = {
  368. # "child_id": child_id,
  369. # "card_id": detail_id,
  370. # "crawler_language": crawler_language
  371. #
  372. # }
  373. # print(data_dict)
  374. # info_list.append(data_dict)
  375. try:
  376. get_details_page(log, detail_id, item_tuple, sql_pool)
  377. except Exception as e:
  378. log.error(f"{inspect.currentframe().f_code.co_name} {detail_id} 请求异常: {e}")
  379. time.sleep(random.randint(1, 5) / 10)
  380. # if info_list:
  381. # sql_pool.insert_many(table="pokemon_card_child", data_list=info_list, ignore=True)
  382. def get_series_list(log, item_tuple, sql_pool):
  383. """
  384. 获取 每个子系列的 列表页
  385. :param log:
  386. :param item_tuple:
  387. :param sql_pool:
  388. :return:
  389. """
  390. all_data = []
  391. page = 1
  392. total_fetched = 0
  393. total_expected = None
  394. while page <= max_page:
  395. try:
  396. result = get_series_single_page(log, item_tuple, page)
  397. if result.get("code") != 0:
  398. log.error(f"请求失败: {result.get('message')}")
  399. break
  400. data = result.get("data", {})
  401. list_data = data.get("list", [])
  402. parse_series(log, list_data, sql_pool, item_tuple)
  403. total = data.get("total")
  404. pages = data.get("pages")
  405. # 记录总条数(首次获取)
  406. if total_expected is None:
  407. total_expected = total
  408. log.info(f"总条数: {total_expected}")
  409. # 停止条件判断
  410. if (
  411. len(list_data) < 20 or # 没有数据
  412. (pages and page >= pages) or # 当前页 >= 总页数
  413. (total_expected and total_fetched + len(list_data) >= total_expected) # 已取完所有数据
  414. ):
  415. log.info(f"停止翻页,当前页: {page}, 已获取: {total_fetched + len(list_data)} 条")
  416. break
  417. # 添加到结果
  418. all_data.extend(list_data)
  419. total_fetched += len(list_data)
  420. log.info(f"第 {page} 页获取 {len(list_data)} 条,累计 {total_fetched} 条")
  421. page += 1
  422. except Exception as e:
  423. log.error(f"第 {page} 页请求异常: {e}")
  424. break
  425. # ---------------------------------------------------------------------------------------------------------------------
  426. @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
  427. def get_details_page(log, card_id, item_tuple, sql_pool):
  428. """
  429. 获取 详情页 数据
  430. :param log:
  431. :param card_id:
  432. :param item_tuple:
  433. :param sql_pool:
  434. """
  435. log.debug(
  436. f'Request {inspect.currentframe().f_code.co_name} for card_id: {card_id} .................')
  437. par = {"id": str(card_id)}
  438. sign_result = api_sign(
  439. timeout=1221,
  440. user_token="",
  441. params=par,
  442. # need_md5=True
  443. )
  444. # print(f"解密结果: {sign_result}")
  445. # print(type(sign_result))
  446. # req_data = pokemon_aes_encrypt(sign_result["secretJsonParams"])
  447. req_data = pokemon_aes_encrypt(par)
  448. # E = this.encryptionEnable ? "{}" == A ? {} : n({}, "GET" == o ? "encryptionUrlParams" : "encryptionBodyParams", "GET" == o ? encodeURIComponent(P) : P) : i,
  449. # Python 的 quote() 函数默认会将空格编码为 + 为了模拟 encodeURIComponent() 将空格编码为 %20 的行为,必须将 safe 参数设置为空字符串 ''
  450. req_data = quote(req_data, safe='')
  451. # print(req_data)
  452. headers = {
  453. "Content-Type": "application/json",
  454. "User-Agent": user_agent.generate_user_agent(),
  455. "Api-Access-Token": "",
  456. "Nonce": str(sign_result["nonce"]),
  457. "Signature": sign_result["signature"],
  458. "Timestamp": sign_result["timestamp"]
  459. }
  460. url = "https://app-api.pokemon-tcg.cn/app-api/v1/app/card/get"
  461. params = {
  462. # "encryptionUrlParams": "%2BsGoBpMkJzVjwAgE2Gca2Q%3D%3D"
  463. "encryptionUrlParams": req_data
  464. }
  465. response = requests.get(url, headers=headers, params=params)
  466. # print('response.text:', response.text)
  467. response.raise_for_status()
  468. decrypted_res = pokemon_aes_decrypt(response.text)
  469. # print('decrypted_res:', decrypted_res)
  470. try:
  471. parse_details(log, decrypted_res, sql_pool, card_id, item_tuple)
  472. except Exception as e:
  473. log.error(f"解析详情页数据异常: {e}")
  474. def parse_details(log, list_data, sql_pool, card_id, item_tuple):
  475. """
  476. 解析详情页数据
  477. :param log:
  478. :param list_data:
  479. :param sql_pool:
  480. :param card_id:
  481. :param item_tuple:
  482. :return:
  483. """
  484. child_id = item_tuple[0]
  485. child_name = item_tuple[1]
  486. commodity_code = item_tuple[2]
  487. expansion_series = item_tuple[4]
  488. """
  489. pg_value -> commodity_code
  490. pg_label -> child_name
  491. major_category_name -> expansion_series
  492. """
  493. if not list_data:
  494. log.error(f"{inspect.currentframe().f_code.co_name} list_data is None")
  495. return
  496. item = list_data.get("data")
  497. if not item:
  498. log.error(f"{inspect.currentframe().f_code.co_name} item is None")
  499. return
  500. # pg_value
  501. # major_category_name
  502. img = item.get("imgUrl")
  503. # evolveText = item.get("evolveText")
  504. card_name = item.get("cardName")
  505. # regulationMarkText = item.get("regulationMarkText")
  506. card_no = item.get("collectionNumber")
  507. rarity = item.get("rarityText")
  508. data_dict = {
  509. "child_id": child_id,
  510. "major_category_name": expansion_series,
  511. "pg_value": commodity_code,
  512. "pg_label": child_name,
  513. "card_id": card_id,
  514. "card_name": card_name,
  515. "card_no": card_no,
  516. "rarity": rarity,
  517. # "regulationMarkText": regulationMarkText,
  518. "img": img,
  519. "crawler_language": crawler_language
  520. }
  521. # print(data_dict)
  522. sql_pool.insert_one_or_dict(table="pokemon_card_record", data=data_dict, ignore=True)
  523. @retry(stop=stop_after_attempt(100), wait=wait_fixed(3600), after=after_log)
  524. def jz_pokemon_main(log):
  525. """
  526. 主函数
  527. """
  528. log.info(f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务.............................................')
  529. # 配置 MySQL 连接池
  530. sql_pool = MySQLConnectionPool(log=log)
  531. if not sql_pool.check_pool_health():
  532. log.error("数据库连接池异常")
  533. raise RuntimeError("数据库连接池异常")
  534. try:
  535. # 获取分类列表
  536. # log.debug(".......... 获取分类列表 ..........")
  537. # try:
  538. # get_parent_list(log, sql_pool)
  539. # except Exception as e:
  540. # log.error(f"{inspect.currentframe().f_code.co_name} Request get_category_list error: {e}")
  541. # 获取商品详情
  542. log.debug(f"........... 获取商品详情 ..........")
  543. # par = {"banCardFlag": "0", "commodityIds": "279", "commoditySelectedList": [
  544. # {"id": "279", "commodityName": "收集啦151 惊", "commodityCode": "151C3", "salesDate": "2025-07-18"}],
  545. # "pageNum": str(page), "pageSize": "50"}
  546. # 2026-03-27 18:11:00 gmt_create_time字段 查询今天的数据
  547. sql_ietm_id_list = sql_pool.select_all(
  548. f"SELECT DISTINCT child_id,child_name,commodity_code,sales_date,expansion_series FROM pokemon_jianz_category WHERE crawler_language='{crawler_language}' AND gmt_create_time >= '{datetime.datetime.now().strftime('%Y-%m-%d 00:00:00')}'")
  549. # sql_ietm_id_list = [item_id[0] for item_id in sql_ietm_id_list]
  550. log.debug(f"获取商品详情长度为: {len(sql_ietm_id_list)}")
  551. for item_tuple in sql_ietm_id_list:
  552. try:
  553. get_series_list(log, item_tuple, sql_pool)
  554. # get_details_page(log, item_id, sql_pool)
  555. except Exception as e:
  556. log.error(f"Request get_details error: {e}")
  557. except Exception as e:
  558. log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
  559. finally:
  560. log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
  561. if __name__ == '__main__':
  562. # get_parent_list(logger, None)
  563. # get_child_list(logger, None)
  564. # get_series_list(logger, None)
  565. # get_details_page(logger, 11364, None)
  566. jz_pokemon_main(logger)