# -*- coding: utf-8 -*-
# Author : Charley
# Python : 3.12.10
# Date   : 2026/5/21
"""
Lelands 公用模块：HTTP 配置、ASP.NET postback 切换 auction、单页解析、详情解析。
被 lelands_history.py / lelands_spider.py 复用。
"""
import random
import re
import user_agent
from loguru import logger
from parsel import Selector
from curl_cffi import requests
from curl_cffi.requests import BrowserType
from tenacity import retry, stop_after_attempt, wait_fixed

GALLERY_URL = "https://auction.lelands.com/lots/gallery/"

# 直接用库内置的所有浏览器指纹
client_identifier_list = [b.value for b in BrowserType]

headers = {
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "user-agent": user_agent.generate_user_agent()
}


def after_log(retry_state):
    """tenacity retry 回调"""
    if retry_state.args and len(retry_state.args) > 0:
        log = retry_state.args[0]
    else:
        log = logger

    if retry_state.outcome.failed:
        log.warning(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} Times")
    else:
        log.info(f"Function '{retry_state.fn.__name__}', Attempt {retry_state.attempt_number} succeeded")


@retry(stop=stop_after_attempt(5), wait=wait_fixed(2), after=after_log)
def get_proxys(log):
    """
    获取代理
    :param log: logger 对象
    :return: 代理字典
    """
    http_proxy = "http://u1952150085001297:sJMHl4qc4bM0@proxy.123proxy.cn:36931"
    https_proxy = "http://u1952150085001297:sJMHl4qc4bM0@proxy.123proxy.cn:36931"
    try:
        return {"http": http_proxy, "https": https_proxy}
    except Exception as e:
        log.error(f"Error getting proxy: {e}")
        raise e


def extract_auction(description, log=logger):
    """
    从 description 列表中提取 Auction 字段（双引号内的内容）
    :param description: selector.getall() 返回的字符串列表
    :param log: logger 对象
    :return: 提取到的 auction 字符串，失败返回 None
    """
    try:
        if not description or not isinstance(description, list):
            return None
        for item in description:
            if not item or not isinstance(item, str):
                continue
            text = item.strip()
            if not text:
                continue
            match = re.search(r'"(.+?)"', text)
            if match:
                auction = match.group(1).strip()
                return auction if auction else None
        return None
    except Exception as e:
        log.error(f"extract_auction error: {e}")
        return None


def _pick_hidden(selector, field_id):
    """
    从页面提取 ASP.NET 隐藏字段（__VIEWSTATE 等）
    :param selector: parsel.Selector 对象
    :param field_id: 隐藏字段的 id，如 __VIEWSTATE
    :return: 隐藏字段的值，失败返回空字符串
    """
    return selector.xpath(f'//input[@id="{field_id}"]/@value').get() or ''


def parse_auction_list(selector):
    """
    从 gallery 页面解析所有拍卖会下拉项
    :param selector: parsel.Selector 对象
    :return: [{"id": "-1", "name": "All Auctions"}, {"id": "1005", "name": "2026 Spring Classic"}, ...]
    """
    options = selector.xpath('//select[@id="Auction"]/option')
    result = []
    for opt in options:
        aid = opt.xpath('./@value').get()
        name = opt.xpath('./text()').get()
        if aid is None:
            continue
        result.append({"id": aid.strip(), "name": (name or '').strip()})
    return result


@retry(stop=stop_after_attempt(3), wait=wait_fixed(2), after=after_log)
def get_auction_list(log, session, impersonate):
    """
    GET gallery 首页，解析出全部拍卖会列表（排除 -1 All Auctions）
    :param log: logger 对象
    :param session: requests.Session 对象
    :param impersonate: 浏览器指纹标识（与 setup 时一致）
    :return: [{"id": "1005", "name": "2026 Spring Classic"}, ...]
    """
    log.info("获取全部拍卖会列表")
    resp = session.get(GALLERY_URL, headers=headers, impersonate=impersonate,
                       proxies=get_proxys(log), timeout=15)
    resp.raise_for_status()
    sel = Selector(resp.text)
    all_opts = parse_auction_list(sel)
    # 过滤掉 All Auctions(-1)，只保留具体拍卖会
    real = [o for o in all_opts if o["id"] != "-1"]
    log.info(f"共解析到 {len(real)} 个拍卖会：{[(o['id'], o['name']) for o in real]}")
    return real


@retry(stop=stop_after_attempt(5), wait=wait_fixed(2), after=after_log)
def setup_auction_session(log, session, impersonate, auction_id):
    """
    通过 ASP.NET __doPostBack 将 Auction 筛选切换到指定 auction_id。
    切换后服务端 session 记住该选择，后续 GET /lots/gallery?page=N 都返回该 auction 数据。
    :param log: logger 对象
    :param session: requests.Session 对象
    :param impersonate: 浏览器指纹标识（与 setup 时一致）
    :param auction_id: '-1'(All Auctions) 或具体 id 如 '1005'
    """
    log.info(f"切换 Auction -> {auction_id}")
    proxies = get_proxys(log)

    # 1) 首次 GET 拿 ViewState
    resp = session.get(GALLERY_URL, headers=headers, impersonate=impersonate,
                       proxies=proxies, timeout=15)
    resp.raise_for_status()
    sel = Selector(resp.text)

    form_data = {
        '__EVENTTARGET': 'ctl00$Auction',
        '__EVENTARGUMENT': '',
        '__LASTFOCUS': '',
        '__VIEWSTATE': _pick_hidden(sel, '__VIEWSTATE'),
        '__VIEWSTATEGENERATOR': _pick_hidden(sel, '__VIEWSTATEGENERATOR'),
        '__EVENTVALIDATION': _pick_hidden(sel, '__EVENTVALIDATION'),
        'ctl00$SearchIn': 'title',
        'ctl00$SearchText': '',
        'ctl00$BrowseBy': 'gallery',
        'ctl00$Auction': str(auction_id),
    }

    post_headers = {
        **headers,
        'Content-Type': 'application/x-www-form-urlencoded',
        'Referer': GALLERY_URL,
        'Origin': 'https://auction.lelands.com',
    }

    resp = session.post(GALLERY_URL, headers=post_headers, data=form_data,
                        impersonate=impersonate, proxies=proxies, timeout=20)
    resp.raise_for_status()

    # 验证切换是否成功
    sel2 = Selector(resp.text)
    selected_val = sel2.xpath('//select[@id="Auction"]/option[@selected]/@value').get()
    log.info(f"切换后 Auction 选中值: {selected_val}")
    if selected_val != str(auction_id):
        raise RuntimeError(f"切换 Auction 失败，预期 {auction_id} 实际 {selected_val}")


@retry(stop=stop_after_attempt(5), wait=wait_fixed(2), after=after_log)
def get_single_page(log, page, sql_pool, session, impersonate,
                    auction_id=None, auction_name=None):
    """
    获取单页数据
    :param log: logger 对象
    :param page: 页码
    :param sql_pool: mysql连接池
    :param session: requests.Session 对象
    :param impersonate: 浏览器指纹标识（与 setup 时一致）
    :param auction_id: 当前 session 切换到的 auction id，会写入 lelands_record.auction_id
    :param auction_name: 同上，写入 lelands_record.auction_name
    :return: 该页解析到的条数
    """
    log.info(f">>>>>>>>>>>>>> 正在爬取 auction={auction_id}({auction_name}) 第 {page} 页 <<<<<<<<<<<<<<")

    response = session.get(GALLERY_URL, impersonate=impersonate, headers=headers,
                           params={"page": f"{page}"},
                           proxies=get_proxys(log), timeout=10, allow_redirects=False)
    response.raise_for_status()

    selector = Selector(response.text)
    tag_div_list = selector.xpath(
        '//div[@class="items"]/div/div[@class="row"]//div[@class="col-lg-3 col-md-4 col-sm-6"]')

    if not tag_div_list or len(tag_div_list) == 0:
        log.warning(f"--------------- 第 {page} 页无数据 ---------------")
        return 0

    info_list = []
    for tag_div in tag_div_list:
        title = tag_div.xpath('.//p/a/text()').get()
        detail_url = tag_div.xpath('.//p/a/@href').get()

        tag_div_p = tag_div.xpath('.//div/p[2]/strong/text()').getall()
        bids = tag_div_p[0] if tag_div_p else None
        opening_bid = tag_div_p[1] if len(tag_div_p) > 1 else None
        opening_bid = opening_bid.replace('$', '').replace(',', '').strip() if opening_bid else None

        status = tag_div_p[2] if len(tag_div_p) > 2 else None
        price = tag_div.xpath('.//div[@class="item-price"]/a/text()').get()
        price = price.replace('SOLD FOR $', '').replace(',', '').strip() if price else None

        data_dict = {
            "title": title,
            "detail_url": detail_url,
            "bids": bids,
            "opening_bid": opening_bid,
            "status": status,
            "price": price,
            "auction_id": int(auction_id) if auction_id is not None else None,
            "auction_name": auction_name,
        }
        info_list.append(data_dict)

    if info_list and sql_pool is not None:
        sql_pool.insert_many(table="lelands_record", data_list=info_list, ignore=True)
    return len(info_list)


def crawl_one_auction(log, sql_pool, session, impersonate,
                      auction_id, auction_name, max_page=460):
    """
    抓取单个拍卖会的全部页（switch 到该 auction → 翻页直到无数据）
    :param log: logger 对象
    :param sql_pool: mysql连接池
    :param session: requests.Session 对象
    :param impersonate: 浏览器指纹标识（与 setup 时一致）
    :param auction_id: 当前 session 切换到的 auction id，会写入 lelands_record.auction_id
    :param auction_name: 同上，写入 lelands_record.auction_name
    :param max_page: 最大页码
    :return: 该 auction 抓到的总条数
    """
    setup_auction_session(log, session, impersonate, auction_id)

    page = 1
    total = 0
    while page <= max_page:
        try:
            n = get_single_page(log, page, sql_pool, session, impersonate,
                                auction_id=auction_id, auction_name=auction_name)
        except Exception as e:
            log.error(f"auction={auction_id} page={page} 抓取失败: {e}")
            break
        if n == 0:
            log.info(f"auction={auction_id} 翻到第 {page} 页无数据，结束")
            break
        total += n
        page += 1
    log.info(f"auction={auction_id}({auction_name}) 共抓取 {total} 条")
    return total


@retry(stop=stop_after_attempt(5), wait=wait_fixed(2), after=after_log)
def get_details(log, url, sql_pool, sql_id):
    """
    获取详情页：分类、auction 名称、图片列表，写回数据库
    :param log: logger 对象
    :param url: 详情页 URL
    :param sql_pool: mysql连接池
    :param sql_id: 数据库记录 ID
    """
    log.info(f">>>>>>>>>>>>>> 正在爬取详情数据URL: {url} <<<<<<<<<<<<<<")
    response = requests.get(url, headers=headers,
                            impersonate=random.choice(client_identifier_list),
                            timeout=10, proxies=get_proxys(log))
    response.raise_for_status()
    selector = Selector(response.text)
    category = selector.xpath('//a[@id="MainContent_hCategory"]/text()').get()
    # description = selector.xpath('//*[@id="MainContent_lblOldAuction"]/text()').getall()
    # auction = extract_auction(description, log)
    imgs = selector.xpath('//div[@class="col-md-5 col-sm-5"]//a[not(@id="Zoomer")]/@href').getall()
    imgs = ','.join(imgs) if imgs else None

    sql_pool.update_one_or_dict(
        table="lelands_record",
        data={"category": category, "imgs": imgs, "state": 1},
        condition={"id": sql_id}
    )


def update_details_for_pending(log, sql_pool):
    """
    扫描库里 state != 1 的记录，逐条抓详情
    :param log: logger 对象
    :param sql_pool: mysql连接池
    """
    log.debug('Updating detail pages ...........................')
    sql_result = sql_pool.select_all('select id, detail_url from lelands_record where state != 1')
    for row in sql_result:
        sql_id, detail_url = row[0], row[1]
        try:
            get_details(log, detail_url, sql_pool, sql_id)
        except Exception as e:
            log.error(f'Error getting details for {detail_url}: {e}')
            sql_pool.update_one_or_dict(
                table="lelands_record",
                data={"state": 2},
                condition={"id": sql_id}
            )