| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- # -*- coding: utf-8 -*-
- # Author : Charley
- # Python : 3.8.10
- # Date: 2024-09-11 14:17
- import random
- import time
- import requests
- from retrying import retry
- from datetime import datetime
- from mysq_pool import MySQLConnectionPool
- def save_data(sql_pool, info):
- """
- 保存数据
- :param sql_pool:
- :param info:
- """
- sql = "INSERT INTO weikajia_bidding (cabinetId, imgs, title, price, lastBidPrice, auctionItemId, auctionStart, auctionEnd, currBidIndex) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
- sql_pool.insert_all(sql, info)
- def transform(timestamp):
- # 将Unix时间戳转换为datetime对象
- dt_object = datetime.fromtimestamp(int(timestamp))
- # 格式化时间
- formatted_time = dt_object.strftime('%Y-%m-%d %H:%M:%S')
- return formatted_time
- @retry(stop_max_attempt_number=3, wait_fixed=10000)
- def get_list_page(headers, logger):
- url = "https://api.weikajia.com/search/searchAuctionItem"
- data = {
- "page": 1,
- "pageSize": 10,
- "hideLoading": True,
- "ascSort": "desc",
- "sortType": "auction_start",
- "orderStatus": "1"
- }
- response = requests.post(url, headers=headers, json=data, timeout=5)
- # print(f'get_list_page: {response.json()}')
- if response.json()["resultCode"] != 200:
- logger.debug("get_list_page resultCode 请求失败,重试中...........")
- raise Exception("请求失败")
- total = response.json().get('data').get('total')
- if total:
- return total
- else:
- logger.debug("get_list_page total 请求失败,重试中...........")
- raise Exception("get_list_page请求失败,重试中...........")
- @retry(stop_max_attempt_number=3, wait_fixed=1000)
- def get_list(sql_pool, pp, headers, logger):
- """
- 获取列表页信息
- :param logger:
- :param sql_pool:
- :param pp:
- :param headers:
- :return:
- """
- url = "https://api.weikajia.com/search/searchAuctionItem"
- # data = {
- # "page": int(pp),
- # "pageSize": 10,
- # "hideLoading": True,
- # "ascSort": "asc",
- # "sortType": "auction_end",
- # "orderStatus": "1"
- # }
- data = {
- "page": int(pp),
- "pageSize": 10,
- "hideLoading": True,
- "ascSort": "desc",
- "sortType": "auction_start",
- "orderStatus": "1"
- }
- response = requests.post(url, headers=headers, json=data, timeout=5)
- # print(f'get_list: {response.json()}')
- if response.json()["resultCode"] != 200:
- logger.debug("请求失败,重试中...........")
- raise Exception("请求失败")
- logger.debug(f'第{pp}页请求成功..............')
- cardCabinet = response.json().get('data', {}).get('cardCabinet', [])
- if cardCabinet:
- info_list = []
- for item in cardCabinet:
- cabinetId = item.get("cabinetId")
- imgs = item.get("imgs")
- title = item.get("title")
- price = item.get("price")
- lastBidPrice = item.get("lastBidPrice")
- auctionItemId = item.get("auctionItemId")
- auctionStart_ = item.get("auctionStart")
- auctionStart = transform(auctionStart_)
- auctionEnd_ = item.get("auctionEnd")
- auctionEnd = transform(auctionEnd_)
- currBidIndex = item.get("currBidIndex")
- info = (cabinetId, imgs, title, price, lastBidPrice, auctionItemId, auctionStart, auctionEnd, currBidIndex)
- # print(info)
- info_list.append(info)
- save_data(sql_pool, info_list)
- @retry(stop_max_attempt_number=100, wait_fixed=3600000)
- def bidding_main(log):
- try:
- log.info("开始运行 bidding_spider 爬虫任务............................................................")
- sql_pool = MySQLConnectionPool(log=log)
- if not sql_pool:
- log.error("数据库连接失败")
- raise Exception("数据库连接失败")
- # token = sql_pool.select_one("select token from wkj_token")
- headers = {
- "appVersion": "1.6.5",
- "osVersion": "9",
- "deviceModel": "M2007J22C",
- "appVersionCode": "168",
- "deviceBrand": "xiaomi",
- "platform": "android",
- # "token": token[0],
- "user-agent": "Mozilla/5.0 (Linux; Android 9; M2007J22C Build/QP1A.190711.020; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/92.0.4515.131 Mobile Safari/537.36",
- "Content-Type": "application/json",
- "Connection": "Keep-Alive"
- }
- total = get_list_page(headers, log)
- pages = (total + 9) // 10 # 计算页码
- log.info(
- f'----------------------------------------总条数为{total}条, 总页码为{pages}页----------------------------------------')
- for i in range(1, pages + 1):
- log.debug(f'正在爬取第{i}页..............')
- try:
- get_list(sql_pool, i, headers, log)
- time.sleep(random.randint(3, 5))
- except Exception as e:
- log.error(f'第{i}页出错, {e}')
- except Exception as e:
- log.error(e)
- finally:
- log.info("爬虫程序运行结束,等待下一轮的采集任务.............")
- if __name__ == '__main__':
- from loguru import logger
- bidding_main(logger)
|