|
|
@@ -3,19 +3,28 @@
|
|
|
# Python : 3.10.8
|
|
|
# Date : 2025/6/9 15:56
|
|
|
import time
|
|
|
+import json
|
|
|
import inspect
|
|
|
import requests
|
|
|
import schedule
|
|
|
+import hashlib
|
|
|
+import base64
|
|
|
import user_agent
|
|
|
from loguru import logger
|
|
|
-from tenacity import retry, stop_after_attempt, wait_fixed
|
|
|
+from datetime import datetime
|
|
|
+from Crypto.Cipher import AES
|
|
|
+from Crypto.Util.Padding import unpad
|
|
|
from mysql_pool import MySQLConnectionPool
|
|
|
+from tenacity import retry, stop_after_attempt, wait_fixed
|
|
|
|
|
|
logger.remove()
|
|
|
logger.add("./logs/{time:YYYYMMDD}.log", encoding='utf-8', rotation="00:00",
|
|
|
format="[{time:YYYY-MM-DD HH:mm:ss.SSS}] {level} {message}",
|
|
|
level="DEBUG", retention="7 day")
|
|
|
|
|
|
+RESPONSE_KEY = b"3bd48ea5e910b195843941351be7cbae" # 16字节AES密钥(UTF8编码)
|
|
|
+REQUEST_KEY = "1ba48ea2e910b666843941351be7cbad"
|
|
|
+
|
|
|
|
|
|
def after_log(retry_state):
|
|
|
"""
|
|
|
@@ -55,21 +64,43 @@ def get_proxys(log):
|
|
|
raise e
|
|
|
|
|
|
|
|
|
+def make_sign():
|
|
|
+ """生成sign: MD5(REQUEST_KEY + 当前时间戳秒)"""
|
|
|
+ now = datetime.now()
|
|
|
+ dt_str = now.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
+ # JS: new Date(o).getTime() / 1e3 — 取秒级时间戳
|
|
|
+ timestamp = int(datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S").timestamp())
|
|
|
+ raw = REQUEST_KEY + str(timestamp)
|
|
|
+ return hashlib.md5(raw.encode()).hexdigest(), dt_str
|
|
|
+
|
|
|
+
|
|
|
+def decrypt_response(data_b64, iv_hex):
|
|
|
+ """AES-CBC解密响应数据"""
|
|
|
+ iv = iv_hex.encode("utf-8") # JS: CryptoJS.enc.Utf8.parse(iv) — 直接UTF8编码
|
|
|
+ cipher = AES.new(RESPONSE_KEY, AES.MODE_CBC, iv)
|
|
|
+ ciphertext = base64.b64decode(data_b64)
|
|
|
+ plaintext = unpad(cipher.decrypt(ciphertext), AES.block_size)
|
|
|
+ return json.loads(plaintext.decode("utf-8"))
|
|
|
+
|
|
|
+
|
|
|
@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), after=after_log)
|
|
|
def get_request_one_page(log, rating_no) -> dict:
|
|
|
+ """
|
|
|
+ 获取单页数据
|
|
|
+ :param log: logger
|
|
|
+ :param rating_no: 证书编号
|
|
|
+ :return: dict
|
|
|
+ """
|
|
|
+ sign, dt_str = make_sign()
|
|
|
+
|
|
|
headers = {
|
|
|
"accept": "*/*",
|
|
|
"accept-language": "en,zh-CN;q=0.9,zh;q=0.8",
|
|
|
"content-type": "application/json;charset=UTF-8",
|
|
|
+ "datetime": dt_str,
|
|
|
"origin": "https://www.zhongjianjiantong.com",
|
|
|
- "priority": "u=1, i",
|
|
|
"referer": "https://www.zhongjianjiantong.com/web/index.html",
|
|
|
- "sec-ch-ua": "\"Not(A:Brand\";v=\"99\", \"Google Chrome\";v=\"133\", \"Chromium\";v=\"133\"",
|
|
|
- "sec-ch-ua-mobile": "?1",
|
|
|
- "sec-ch-ua-platform": "\"Android\"",
|
|
|
- "sec-fetch-dest": "empty",
|
|
|
- "sec-fetch-mode": "cors",
|
|
|
- "sec-fetch-site": "same-origin",
|
|
|
+ "sign": sign,
|
|
|
"user-agent": user_agent.generate_user_agent()
|
|
|
}
|
|
|
url = "https://www.zhongjianjiantong.com/Api/OrderRatingGoods/detail"
|
|
|
@@ -81,36 +112,46 @@ def get_request_one_page(log, rating_no) -> dict:
|
|
|
response = session.post(url, headers=headers, json=data, proxies=get_proxys(log), timeout=5)
|
|
|
# print(response.text)
|
|
|
response.raise_for_status()
|
|
|
- return response.json()
|
|
|
+ result = response.json()
|
|
|
+ if result["code"] == 200 and result.get("iv"):
|
|
|
+ decrypted = decrypt_response(result["data"], result["iv"])
|
|
|
+ return decrypted
|
|
|
+ else:
|
|
|
+ return result
|
|
|
|
|
|
|
|
|
def parse_data(resp_json, sql_pool):
|
|
|
- card_id = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('id')
|
|
|
- order_no = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('order_no')
|
|
|
- tag_no = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('tag_no') # 标签号/查询的号码
|
|
|
-
|
|
|
- images = resp_json.get('data', {}).get('obj_order_rating_goods', []).get('images')
|
|
|
- card_create_time = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('create_time')
|
|
|
- card_update_time = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('update_time')
|
|
|
- score = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('score') # 中检评分
|
|
|
- corners = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get('corners') # 四角
|
|
|
- eoges = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get('eoges') # 边缘
|
|
|
- surface = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get('surface') # 表面
|
|
|
- centering = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get('centering') # 居中
|
|
|
- colour = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get('colour') # 颜色
|
|
|
- repair = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get('repair') # 修复
|
|
|
-
|
|
|
- rating_no = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('rating_no') # 证书编号
|
|
|
- obj_brand_title = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_brand', {}).get(
|
|
|
+ """
|
|
|
+ 解析数据
|
|
|
+ :param resp_json: 响应数据
|
|
|
+ :param sql_pool: 数据库连接池
|
|
|
+ """
|
|
|
+ card_id = resp_json.get('obj_order_rating_goods', {}).get('id')
|
|
|
+ order_no = resp_json.get('obj_order_rating_goods', {}).get('order_no')
|
|
|
+ tag_no = resp_json.get('obj_order_rating_goods', {}).get('tag_no') # 标签号/查询的号码
|
|
|
+
|
|
|
+ images = resp_json.get('obj_order_rating_goods', []).get('images')
|
|
|
+ card_create_time = resp_json.get('obj_order_rating_goods', {}).get('create_time')
|
|
|
+ card_update_time = resp_json.get('obj_order_rating_goods', {}).get('update_time')
|
|
|
+ score = resp_json.get('obj_order_rating_goods', {}).get('score') # 中检评分
|
|
|
+ corners = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get('corners') # 四角
|
|
|
+ eoges = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get('eoges') # 边缘
|
|
|
+ surface = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get('surface') # 表面
|
|
|
+ centering = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get('centering') # 居中
|
|
|
+ colour = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get('colour') # 颜色
|
|
|
+ repair = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get('repair') # 修复
|
|
|
+
|
|
|
+ rating_no = resp_json.get('obj_order_rating_goods', {}).get('rating_no') # 证书编号
|
|
|
+ obj_brand_title = resp_json.get('obj_order_rating_goods', {}).get('obj_brand', {}).get(
|
|
|
'title') # 商品品牌
|
|
|
- obj_detail_spxl = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get(
|
|
|
+ obj_detail_spxl = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get(
|
|
|
'spxl') # 商品系列
|
|
|
- obj_detail_spmc = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get(
|
|
|
+ obj_detail_spmc = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get(
|
|
|
'spmc') # 商品名称
|
|
|
- obj_detail_fxnf = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get(
|
|
|
+ obj_detail_fxnf = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get(
|
|
|
'fxnf') # 发行年份
|
|
|
- obj_detail_yy = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get('yy') # 语言
|
|
|
- obj_detail_spbh = resp_json.get('data', {}).get('obj_order_rating_goods', {}).get('obj_detail', {}).get(
|
|
|
+ obj_detail_yy = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get('yy') # 语言
|
|
|
+ obj_detail_spbh = resp_json.get('obj_order_rating_goods', {}).get('obj_detail', {}).get(
|
|
|
'spbh') # 商品编号
|
|
|
|
|
|
info = (
|
|
|
@@ -118,6 +159,7 @@ def parse_data(resp_json, sql_pool):
|
|
|
centering,
|
|
|
colour, repair, rating_no, obj_brand_title, obj_detail_spxl, obj_detail_spmc, obj_detail_fxnf, obj_detail_yy,
|
|
|
obj_detail_spbh)
|
|
|
+ # print(info)
|
|
|
sql = """
|
|
|
INSERT INTO zhongjian_record (card_id, order_no, tag_no, images, card_create_time, card_update_time, score, corners, eoges, surface, centering, colour, repair, rating_no, obj_brand_title, obj_detail_spxl, obj_detail_spmc, obj_detail_fxnf, obj_detail_yy, obj_detail_spbh)
|
|
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
|
|
@@ -126,6 +168,12 @@ def parse_data(resp_json, sql_pool):
|
|
|
|
|
|
|
|
|
def loop_rating_no(log, sql_pool, sql_ra_no_list):
|
|
|
+ """
|
|
|
+ 循环处理每个证书编号
|
|
|
+ :param log: logger
|
|
|
+ :param sql_pool: 数据库连接池
|
|
|
+ :param sql_ra_no_list: 证书编号列表
|
|
|
+ """
|
|
|
# sql_ra_no_list = sql_pool.select_all('select tag_no from zhongjian_task where state = 0 limit 10000')
|
|
|
# sql_ra_no_list = [i[0] for i in sql_ra_no_list]
|
|
|
for rating_no_ in sql_ra_no_list:
|
|
|
@@ -166,7 +214,10 @@ def zhongjian_main(log):
|
|
|
|
|
|
try:
|
|
|
# while True:
|
|
|
- sql_ra_no_list = sql_pool.select_all('select tag_no from zhongjian_task where state = 0 limit 10000')
|
|
|
+ # sql_ra_no_list = sql_pool.select_all('select tag_no from zhongjian_task where state = 0 limit 10000')
|
|
|
+ sql_ra_no_list = sql_pool.select_all(
|
|
|
+ "select tag_no from zhongjian_task where tag_no like '529%' and state = 0 limit 50000")
|
|
|
+ # sql_ra_no_list = sql_pool.select_all("select tag_no from zhongjian_task where tag_no > '519354131' and state != 1 limit 10000")
|
|
|
sql_ra_no_list = [i[0] for i in sql_ra_no_list]
|
|
|
if not sql_ra_no_list:
|
|
|
log.info(f'没有需要处理的数据,等待下一轮处理........................................................')
|
|
|
@@ -185,7 +236,7 @@ def zhongjian_main(log):
|
|
|
|
|
|
def schedule_task():
|
|
|
"""
|
|
|
- 爬虫模块的启动文件
|
|
|
+ 爬虫模块 定时任务 的启动文件
|
|
|
"""
|
|
|
# 立即运行一次任务
|
|
|
zhongjian_main(log=logger)
|