| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- # -*- coding: utf-8 -*-
- # Author : Charley
- # Python : 3.10.8
- # Date : 2026/1/4 18:55
- import inspect
- from datetime import datetime
- import requests
- import user_agent
- from loguru import logger
- from parsel import Selector
- from tenacity import retry, stop_after_attempt, wait_fixed
- from mysql_pool import MySQLConnectionPool
- headers = {
- # "referer": "https://asia.pokemon-card.com/tw/card-search/list/",
- "user-agent": user_agent.generate_user_agent()
- }
- crawler_language = "繁中"
- def get_details(log, sql_id_detail_url: tuple, sql_pool):
- log.debug(f'Request get_details for sql_id_detail_url: {sql_id_detail_url}')
- # url = "https://asia.pokemon-card.com/tw/card-search/detail/13958/"
- url = sql_id_detail_url[1]
- # response = requests.get(url, headers=headers, timeout=10, proxies=get_proxys(log))
- response = requests.get(url, headers=headers, timeout=10)
- # print(response.text)
- response.raise_for_status()
- selector = Selector(response.text)
- pg_label = selector.xpath('//section[@class="expansionLinkColumn"]/a/text()').get()
- pg_label = pg_label.strip() if pg_label else None
- # expansion_release_time 为'03-28-2025'格式 转换为正常的 年月日
- sql_expansion_release_time = sql_pool.select_one(
- f"select expansion_series, expansion_release_time from pokemon_fanz_category where expansion_title = '{pg_label}' and crawler_language = '{crawler_language}'")
- date_obj = datetime.strptime(sql_expansion_release_time[1], '%m-%d-%Y')
- expansion_release_time = date_obj.strftime('%Y-%m-%d')
- major_category_name = sql_expansion_release_time[0]
- data_dict = {
- "major_category_name": major_category_name,
- "pg_label": pg_label,
- "sales_date": expansion_release_time
- }
- log.debug(f'data_dict -> {data_dict}')
- sql_pool.update_one_or_dict(
- table="pokemon_card_record",
- data=data_dict,
- condition={"id": sql_id_detail_url[0]}
- )
- def fz_pokemon_main(log):
- """
- 主函数
- """
- log.info(f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务.............................................')
- # 配置 MySQL 连接池
- sql_pool = MySQLConnectionPool(log=log)
- if not sql_pool.check_pool_health():
- log.error("数据库连接池异常")
- raise RuntimeError("数据库连接池异常")
- try:
- # 获取商品详情
- log.debug(f"........... 获取商品详情 ..........")
- sql_ietm_id_list = sql_pool.select_all(
- f"SELECT id, detail_url FROM pokemon_card_record WHERE pg_label IS NULL AND crawler_language='{crawler_language}'")
- for item_id in sql_ietm_id_list:
- try:
- get_details(log, item_id, sql_pool)
- except Exception as e:
- log.error(f"Request get_details error: {e}")
- except Exception as e:
- log.error(f'{inspect.currentframe().f_code.co_name} error: {e}')
- finally:
- log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............')
- if __name__ == '__main__':
- fz_pokemon_main(logger)
|