# -*- coding: utf-8 -*- # Author : Charley # Python : 3.8.10 # Date: 2024-09-11 14:17 import random import time import requests from retrying import retry from datetime import datetime from mysq_pool import MySQLConnectionPool def save_data(sql_pool, info): """ 保存数据 :param sql_pool: :param info: """ sql = "INSERT INTO weikajia_bidding (cabinetId, imgs, title, price, lastBidPrice, auctionItemId, auctionStart, auctionEnd, currBidIndex) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)" sql_pool.insert_all(sql, info) def transform(timestamp): # 将Unix时间戳转换为datetime对象 dt_object = datetime.fromtimestamp(int(timestamp)) # 格式化时间 formatted_time = dt_object.strftime('%Y-%m-%d %H:%M:%S') return formatted_time @retry(stop_max_attempt_number=3, wait_fixed=10000) def get_list_page(headers, logger): url = "https://api.weikajia.com/search/searchAuctionItem" data = { "page": 1, "pageSize": 10, "hideLoading": True, "ascSort": "desc", "sortType": "auction_start", "orderStatus": "1" } response = requests.post(url, headers=headers, json=data, timeout=5) # print(f'get_list_page: {response.json()}') if response.json()["resultCode"] != 200: logger.debug("get_list_page resultCode 请求失败,重试中...........") raise Exception("请求失败") total = response.json().get('data').get('total') if total: return total else: logger.debug("get_list_page total 请求失败,重试中...........") raise Exception("get_list_page请求失败,重试中...........") @retry(stop_max_attempt_number=3, wait_fixed=1000) def get_list(sql_pool, pp, headers, logger): """ 获取列表页信息 :param logger: :param sql_pool: :param pp: :param headers: :return: """ url = "https://api.weikajia.com/search/searchAuctionItem" # data = { # "page": int(pp), # "pageSize": 10, # "hideLoading": True, # "ascSort": "asc", # "sortType": "auction_end", # "orderStatus": "1" # } data = { "page": int(pp), "pageSize": 10, "hideLoading": True, "ascSort": "desc", "sortType": "auction_start", "orderStatus": "1" } response = requests.post(url, headers=headers, json=data, timeout=5) # print(f'get_list: {response.json()}') if response.json()["resultCode"] != 200: logger.debug("请求失败,重试中...........") raise Exception("请求失败") logger.debug(f'第{pp}页请求成功..............') cardCabinet = response.json().get('data', {}).get('cardCabinet', []) if cardCabinet: info_list = [] for item in cardCabinet: cabinetId = item.get("cabinetId") imgs = item.get("imgs") title = item.get("title") price = item.get("price") lastBidPrice = item.get("lastBidPrice") auctionItemId = item.get("auctionItemId") auctionStart_ = item.get("auctionStart") auctionStart = transform(auctionStart_) auctionEnd_ = item.get("auctionEnd") auctionEnd = transform(auctionEnd_) currBidIndex = item.get("currBidIndex") info = (cabinetId, imgs, title, price, lastBidPrice, auctionItemId, auctionStart, auctionEnd, currBidIndex) # print(info) info_list.append(info) save_data(sql_pool, info_list) @retry(stop_max_attempt_number=100, wait_fixed=3600000) def bidding_main(log): try: log.info("开始运行 bidding_spider 爬虫任务............................................................") sql_pool = MySQLConnectionPool(log=log) if not sql_pool: log.error("数据库连接失败") raise Exception("数据库连接失败") # token = sql_pool.select_one("select token from wkj_token") headers = { "appVersion": "1.6.5", "osVersion": "9", "deviceModel": "M2007J22C", "appVersionCode": "168", "deviceBrand": "xiaomi", "platform": "android", # "token": token[0], "user-agent": "Mozilla/5.0 (Linux; Android 9; M2007J22C Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/92.0.4515.131 Mobile Safari/537.36", "Content-Type": "application/json", "Connection": "Keep-Alive" } total = get_list_page(headers, log) pages = (total + 9) // 10 # 计算页码 log.info( f'----------------------------------------总条数为{total}条, 总页码为{pages}页----------------------------------------') for i in range(1, pages + 1): log.debug(f'正在爬取第{i}页..............') try: get_list(sql_pool, i, headers, log) time.sleep(random.randint(3, 5)) except Exception as e: log.error(f'第{i}页出错, {e}') except Exception as e: log.error(e) finally: log.info("爬虫程序运行结束,等待下一轮的采集任务.............") if __name__ == '__main__': from loguru import logger bidding_main(logger)