# -*- coding: utf-8 -*- # Author : Charley # Python : 3.10.8 # Date : 2026/1/4 18:55 import inspect from datetime import datetime import requests import user_agent from loguru import logger from parsel import Selector from tenacity import retry, stop_after_attempt, wait_fixed from mysql_pool import MySQLConnectionPool headers = { # "referer": "https://asia.pokemon-card.com/tw/card-search/list/", "user-agent": user_agent.generate_user_agent() } crawler_language = "繁中" def get_details(log, sql_id_detail_url: tuple, sql_pool): log.debug(f'Request get_details for sql_id_detail_url: {sql_id_detail_url}') # url = "https://asia.pokemon-card.com/tw/card-search/detail/13958/" url = sql_id_detail_url[1] # response = requests.get(url, headers=headers, timeout=10, proxies=get_proxys(log)) response = requests.get(url, headers=headers, timeout=10) # print(response.text) response.raise_for_status() selector = Selector(response.text) pg_label = selector.xpath('//section[@class="expansionLinkColumn"]/a/text()').get() pg_label = pg_label.strip() if pg_label else None # expansion_release_time 为'03-28-2025'格式 转换为正常的 年月日 sql_expansion_release_time = sql_pool.select_one( f"select expansion_series, expansion_release_time from pokemon_fanz_category where expansion_title = '{pg_label}' and crawler_language = '{crawler_language}'") date_obj = datetime.strptime(sql_expansion_release_time[1], '%m-%d-%Y') expansion_release_time = date_obj.strftime('%Y-%m-%d') major_category_name = sql_expansion_release_time[0] data_dict = { "major_category_name": major_category_name, "pg_label": pg_label, "sales_date": expansion_release_time } log.debug(f'data_dict -> {data_dict}') sql_pool.update_one_or_dict( table="pokemon_card_record", data=data_dict, condition={"id": sql_id_detail_url[0]} ) def fz_pokemon_main(log): """ 主函数 """ log.info(f'开始运行 {inspect.currentframe().f_code.co_name} 爬虫任务.............................................') # 配置 MySQL 连接池 sql_pool = MySQLConnectionPool(log=log) if not sql_pool.check_pool_health(): log.error("数据库连接池异常") raise RuntimeError("数据库连接池异常") try: # 获取商品详情 log.debug(f"........... 获取商品详情 ..........") sql_ietm_id_list = sql_pool.select_all( f"SELECT id, detail_url FROM pokemon_card_record WHERE pg_label IS NULL AND crawler_language='{crawler_language}'") for item_id in sql_ietm_id_list: try: get_details(log, item_id, sql_pool) except Exception as e: log.error(f"Request get_details error: {e}") except Exception as e: log.error(f'{inspect.currentframe().f_code.co_name} error: {e}') finally: log.info(f'爬虫程序 {inspect.currentframe().f_code.co_name} 运行结束,等待下一轮的采集任务............') if __name__ == '__main__': fz_pokemon_main(logger)