fanz_update.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # -*- coding: utf-8 -*-
  2. # Author : Charley
  3. # Python : 3.10.8
  4. # Date : 2026/1/4 18:55
  5. import inspect
  6. from datetime import datetime
  7. import requests
  8. import user_agent
  9. from loguru import logger
  10. from parsel import Selector
  11. from tenacity import retry, stop_after_attempt, wait_fixed
  12. from mysql_pool import MySQLConnectionPool
  13. headers = {
  14. # "referer": "https://asia.pokemon-card.com/tw/card-search/list/",
  15. "user-agent": user_agent.generate_user_agent()
  16. }
  17. crawler_language = "繁中"
  18. def get_details(log, sql_id_detail_url: tuple, sql_pool):
  19. log.debug(f'Request get_details for sql_id_detail_url: {sql_id_detail_url}')
  20. # url = "https://asia.pokemon-card.com/tw/card-search/detail/13958/"
  21. url = sql_id_detail_url[1]
  22. # response = requests.get(url, headers=headers, timeout=10, proxies=get_proxys(log))
  23. response = requests.get(url, headers=headers, timeout=10)
  24. # print(response.text)
  25. response.raise_for_status()
  26. selector = Selector(response.text)
  27. pg_label = selector.xpath('//section[@class="expansionLinkColumn"]/a/text()').get()
  28. pg_label = pg_label.strip() if pg_label else None
  29. # expansion_release_time 为'03-28-2025'格式 转换为正常的 年月日
  30. sql_expansion_release_time = sql_pool.select_one(
  31. f"select expansion_series, expansion_release_time from pokemon_fanz_category where expansion_title = '{pg_label}' and crawler_language = '{crawler_language}'")
  32. date_obj = datetime.strptime(sql_expansion_release_time[1], '%m-%d-%Y')
  33. expansion_release_time = date_obj.strftime('%Y-%m-%d')
  34. major_category_name = sql_expansion_release_time[0]
  35. data_dict = {
  36. "major_category_name": major_category_name,
  37. "pg_label": pg_label,
  38. "sales_date": expansion_release_time
  39. }
  40. log.debug(f'data_dict -> {data_dict}')
  41. sql_pool.update_one_or_dict(
  42. table="pokemon_card_record",
  43. data=data_dict,
  44. condition={"id": sql_id_detail_url[0]}
  45. )
  46. def fz_pokemon_main(log):
  47. """
  48. 主函数
  49. """
  50. log.info(f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务.............................................')
  51. # 配置 MySQL 连接池
  52. sql_pool = MySQLConnectionPool(log=log)
  53. if not sql_pool.check_pool_health():
  54. log.error("数据库连接池异常")
  55. raise RuntimeError("数据库连接池异常")
  56. try:
  57. # 获取商品详情
  58. log.debug(f"........... 获取商品详情 ..........")
  59. sql_ietm_id_list = sql_pool.select_all(
  60. f"SELECT id, detail_url FROM pokemon_card_record WHERE pg_label IS NULL AND crawler_language='{crawler_language}'")
  61. for item_id in sql_ietm_id_list:
  62. try:
  63. get_details(log, item_id, sql_pool)
  64. except Exception as e:
  65. log.error(f"Request get_details error: {e}")
  66. except Exception as e:
  67. log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
  68. finally:
  69. log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
  70. if __name__ == '__main__':
  71. fz_pokemon_main(logger)