"""
Carwow reader.

Reads the logged in Carwow dealer stock list at dealers.carwow.co.uk and turns
each card into a Car. Carwow shows more on the card than Motorway: registration,
make, model and derivative, year, mileage, fuel, transmission, service history,
reserve, CAP value, condition grade and distance, plus the listing state.

Auction only. Steven never buys a buy it now listing, so any card whose state is
not an auction state is discarded here, on both platforms.

Nothing here bids. It reads the screen only.
"""

import re
import json
import html as htmllib
from ..pricing import Car
from .. import browser

PLATE = re.compile(r"\b[A-Z]{2}\d{2}\s?[A-Z]{3}\b")
YEAR = re.compile(r"^(19[89]\d|20[0-4]\d)$")
DISTANCE = re.compile(r"^(\d+)\s*miles? away$", re.I)
MONEY = re.compile(r"£\s*([\d,]+)")
PURE_NUM = re.compile(r"^\d{1,3}(?:,\d{3})+$|^\d{3,6}$")
FUELS = {"petrol", "diesel", "hybrid", "electric", "petrol hybrid", "plug-in hybrid", "phev"}


def _num(s):
    if s is None:
        return None
    s = str(s).replace(",", "").strip()
    return int(s) if s.isdigit() else None


def _is_auction(state):
    # Auction listings carry a state mentioning auction, for example
    # waiting_for_auction or in_auction. Buy it now states do not.
    return "auction" in (state or "").lower()


def _service_history(lines_lower):
    text = " ".join(lines_lower)
    if "no sh" in text or "no service history" in text:
        return "none"
    if "full" in text and "sh" in text or "fsh" in text:
        return "full"
    if "part" in text and ("sh" in text or "service" in text):
        return "partial"
    return ""


def _card_blocks(page_html):
    blocks = re.split(r'(?=data-listing-id=")', page_html)
    return [b for b in blocks if b.startswith('data-listing-id=')]


def parse_listing(page_html):
    cars = []
    for block in _card_blocks(page_html):
        mid = re.match(r'data-listing-id="(\d+)"', block)
        listing_id = mid.group(1) if mid else ""
        state = re.search(r'data-listing-state="([^"]+)"', block)
        state = state.group(1) if state else ""

        # Auction only.
        if not _is_auction(state):
            continue

        # Photo: first image in the lazy photos JSON.
        photo = ""
        mph = re.search(r'lazy-photos-value="([^"]+)"', block)
        if mph:
            try:
                arr = json.loads(htmllib.unescape(mph.group(1)))
                if arr and isinstance(arr, list):
                    photo = arr[0].get("url", "")
            except Exception:
                photo = ""

        # Drop svg and script noise, then read the visible lines.
        b = re.sub(r"<svg[^>]*>.*?</svg>", " ", block, flags=re.S)
        b = re.sub(r"<script[^>]*>.*?</script>", " ", b, flags=re.S)
        lines = [l.strip() for l in re.sub(r"<[^>]+>", "\n", htmllib.unescape(b)).split("\n") if l.strip()]
        lines_lower = [l.lower() for l in lines]

        reg = ""
        for l in lines:
            m = PLATE.search(l)
            if m:
                reg = m.group(0).replace(" ", "")
                break

        distance = None
        for l in lines:
            m = DISTANCE.match(l)
            if m:
                distance = _num(m.group(1))
                break

        grade = None
        for i, l in enumerate(lines):
            if l.lower() == "grade" and i + 1 < len(lines):
                grade = _num(lines[i + 1])
                break

        reserve = None
        for i, l in enumerate(lines):
            if "reserve price" in l.lower():
                for j in range(i, min(i + 3, len(lines))):
                    mm = MONEY.search(lines[j])
                    if mm:
                        reserve = _num(mm.group(1))
                        break
                break

        cap = None
        for i, l in enumerate(lines):
            low = l.lower()
            if "cap" in low:
                if "no cap" in low:
                    cap = None
                else:
                    mm = MONEY.search(l) or (MONEY.search(lines[i + 1]) if i + 1 < len(lines) else None)
                    if mm:
                        cap = _num(mm.group(1))
                break

        fuel = ""
        for l in lines:
            if l.lower() in FUELS:
                fuel = l
                break

        transmission = ""
        for l in lines:
            low = l.strip().lower()
            if low in ("manual", "automatic", "auto", "cvt", "semi-automatic", "semi automatic"):
                transmission = "Automatic" if low == "auto" else l.strip()
                break

        # Year, then the lines just before it give make, model and derivative.
        year = None
        year_idx = None
        for i, l in enumerate(lines):
            if YEAR.match(l):
                year = _num(l)
                year_idx = i
                break

        make = model = derivative = ""
        if year_idx is not None and year_idx >= 2:
            make_model = lines[year_idx - 2]
            derivative = lines[year_idx - 1]
            toks = make_model.split()
            if toks:
                make = toks[0]
                model = " ".join(toks[1:])

        # Mileage: a pure number line that is not the year and looks like miles.
        mileage = None
        for i, l in enumerate(lines):
            if PURE_NUM.match(l) and _num(l) != year and (i != year_idx):
                v = _num(l)
                if v and v >= 100:
                    mileage = v
                    break

        sh = _service_history(lines_lower)

        car = Car(
            reg=reg,
            make=make,
            model=model,
            derivative=derivative,
            year=year,
            mileage=mileage,
            grade=grade,
            reserve=reserve,
            cap_clean=cap,
            distance_miles=distance,
            service_history=sh,
            engine=(derivative + (" " + fuel if fuel else "")).strip(),
            fuel=fuel,
            transmission=transmission,
            photo_url=photo,
            listing_url=browser.SITES["carwow"]["base_url"] + "/dealers/listings/" + listing_id,
            source="Carwow",
        )
        car._listing_id = listing_id
        car._state = state
        car._fuel = fuel
        cars.append(car)

    return cars


def enrich_from_detail(page, car):
    """Open a Carwow car's own page and read what the list card does not show:
    previous owners (Carwow calls them former keepers) and VAT qualifying.
    Reads the rendered page, leaving fields unset if they cannot be read so the
    brain can hold the car back rather than guess."""
    if not car.listing_url:
        return car
    page.goto(car.listing_url, wait_until="domcontentloaded", timeout=45000)
    try:
        page.wait_for_load_state("networkidle", timeout=10000)
    except Exception:
        pass
    lines = [l.strip() for l in page.inner_text("body").split("\n") if l.strip()]

    def value_after(label):
        for i, l in enumerate(lines):
            if l.lower() == label.lower() and i + 1 < len(lines):
                return lines[i + 1].strip()
        return None

    owners = value_after("Former keepers")
    if owners and owners.isdigit():
        car.owners = int(owners)

    vat = value_after("VAT qualifying")
    if vat is not None:
        car.vat_qualifying = vat.strip().lower() in ("yes", "y", "true")

    return car


def read_live(playwright, limit=None, max_pages=25):
    """Read the whole filtered Carwow stock, page by page (Carwow paginates with
    &page=N). Accumulates auction cars across pages, stopping when a page shows
    no new listings. Auction only: non auction states (for example second chance
    quotes) are filtered out by parse_listing, which can legitimately leave zero
    cars outside the auction window."""
    base = browser.SITES["carwow"]["stock_url"]
    sep = "&" if "?" in base else "?"
    cars = []
    seen_cars = set()
    seen_raw = set()
    ctx = browser.open_reader_context(playwright, "carwow", headless=True)
    try:
        page = ctx.pages[0] if ctx.pages else ctx.new_page()
        for n in range(1, max_pages + 1):
            page.goto(f"{base}{sep}page={n}", wait_until="domcontentloaded", timeout=45000)
            try:
                page.wait_for_selector('[data-listing-id]', timeout=20000 if n == 1 else 8000)
            except Exception:
                break  # no listings on this page, end of list
            html = page.content()
            raw_ids = set(re.findall(r'data-listing-id="(\d+)"', html))
            new_raw = raw_ids - seen_raw
            if not new_raw:
                break  # the site has run out of pages and is repeating
            seen_raw |= raw_ids
            for c in parse_listing(html):
                lid = getattr(c, "_listing_id", "")
                if lid and lid not in seen_cars:
                    seen_cars.add(lid)
                    cars.append(c)
            if limit and len(cars) >= limit:
                break
    finally:
        ctx.close()

    return cars[:limit] if limit else cars
