"""
Motorway reader.

Reads the logged in dealer stock list at pro.motorway.co.uk/vehicles and turns
each card into a Car. The list card carries registration, make, model and
derivative, year, mileage, fuel, transmission, distance, condition grade,
reserve price, the photo and the listing link.

Some fields are not on the list card and live on each car's own page: CAP Clean,
previous owners, service history and VAT status, and the exact engine. Those are
fetched by visiting the car's page, but only for cars worth the look, so the run
stays quick. See read_live below.

Nothing here bids. It reads the screen only.
"""

import os
import re
import html as htmllib
from ..pricing import Car
from .. import browser

PLATE = re.compile(r"\b[A-Z]{2}\d{2}\s?[A-Z]{3}\b")
YEAR = re.compile(r"\b(19[89]\d|20[0-4]\d)\b")
MILEAGE = re.compile(r"^([\d,]+)\s*mi$")
DISTANCE = re.compile(r"^(\d+)\s*mi away$", re.I)
MONEY = re.compile(r"£\s*([\d,]+)")
FUELS = {"petrol", "diesel", "hybrid", "electric", "plug-in hybrid", "petrol hybrid"}


def _num(s):
    if s is None:
        return None
    s = str(s).replace(",", "").strip()
    return int(s) if s.isdigit() else None


# Each card is wrapped by its own anchor, for example
#   <a class="..." id="vehicle_card_20811275" href="/vehicles/20811275">
# There is exactly one such anchor per card and its href is the card's true
# link, so use these anchors as the card boundaries.
_ANCHOR_RE = re.compile(r'<a\b[^>]*?href="(/vehicles/\d+)"[^>]*?>', re.I)


def _card_blocks(page_html):
    """Yield (listing_path, block_html) for each vehicle card, the link taken
    from that card's own wrapping anchor so a card is never paired with the
    next card's link."""
    anchors = list(_ANCHOR_RE.finditer(page_html))
    out = []
    for i, m in enumerate(anchors):
        start = m.end()
        end = anchors[i + 1].start() if i + 1 < len(anchors) else len(page_html)
        out.append((m.group(1), page_html[start:end]))
    return out


def _text_lines(block):
    text = re.sub(r"<[^>]+>", "\n", block)
    text = htmllib.unescape(text)
    return [l.strip() for l in text.split("\n") if l.strip()]


def parse_listing(page_html):
    """Turn the saved or live stock page HTML into a list of Car objects with
    the fields the list card provides. Detail only fields stay None for now."""
    cars = []
    for listing_path, block in _card_blocks(page_html):
        # Listing link and vehicle id, taken from the card's own anchor.
        if not listing_path:
            continue
        vid = listing_path.rsplit("/", 1)[-1]

        # Photo and the alt text that holds make, model, derivative and year.
        alt = re.search(r'<img[^>]*\balt="([^"]+)"', block)
        src = re.search(r'<img[^>]*\bsrc="([^"]+)"', block)
        alt_text = htmllib.unescape(alt.group(1)) if alt else ""

        lines = _text_lines(block)

        reg = ""
        for l in lines:
            mm = PLATE.search(l)
            if mm:
                reg = mm.group(0).replace(" ", "")
                break

        mileage = None
        distance = None
        for l in lines:
            mi = MILEAGE.match(l)
            if mi and mileage is None:
                mileage = _num(mi.group(1))
            di = DISTANCE.match(l)
            if di and distance is None:
                distance = _num(di.group(1))

        # Grade sits as its own number right after a "Grade" label line.
        grade = None
        for i, l in enumerate(lines):
            if l.lower() == "grade" and i + 1 < len(lines):
                grade = _num(lines[i + 1])
                break

        # Reserve is the money amount right after the "Reserve price" label.
        reserve = None
        for i, l in enumerate(lines):
            if "reserve price" in l.lower():
                for j in range(i, min(i + 3, len(lines))):
                    mo = MONEY.search(lines[j])
                    if mo:
                        reserve = _num(mo.group(1))
                        break
                break

        # Make, model, derivative and year come from the alt text, which reads
        # like "Nissan Qashqai Tekna DCI 2017".
        make = model = derivative = ""
        year = None
        if alt_text:
            ym = YEAR.search(alt_text)
            if ym:
                year = _num(ym.group(0))
                alt_text_nodate = alt_text[: ym.start()].strip()
            else:
                alt_text_nodate = alt_text
            toks = alt_text_nodate.split()
            if toks:
                make = toks[0]
                if len(toks) > 1:
                    model = toks[1]
                if len(toks) > 2:
                    derivative = " ".join(toks[2:])

        fuel = ""
        for l in lines:
            if l.lower() in FUELS:
                fuel = l
                break

        car = Car(
            reg=reg,
            make=make,
            model=model,
            derivative=derivative,
            year=year,
            mileage=mileage,
            grade=grade,
            reserve=reserve,
            distance_miles=distance,
            photo_url=(src.group(1) if src else ""),
            listing_url=browser.SITES["motorway"]["base_url"] + listing_path,
            source="Motorway",
            engine=(derivative if fuel else derivative),  # placeholder, refined from detail page
        )
        # Stash the id and fuel for the detail step.
        car._vid = vid
        car._fuel = fuel
        cars.append(car)

    return cars


def _find_first(obj, key):
    """Find the first value for a key anywhere in a nested dict or list."""
    if isinstance(obj, dict):
        if key in obj:
            return obj[key]
        for v in obj.values():
            found = _find_first(v, key)
            if found is not None:
                return found
    elif isinstance(obj, list):
        for v in obj:
            found = _find_first(v, key)
            if found is not None:
                return found
    return None


def enrich_from_detail(page, car):
    """Open a car's own page and read the fields the list card does not show:
    previous owners, exact engine size and fuel, service history and CAP. Reads
    the page's embedded data, not guessed HTML. Leaves a field as is if it
    cannot be read, so the brain can hold the car back rather than guess."""
    import json as _json
    url = car.listing_url or (browser.SITES["motorway"]["base_url"] + "/vehicles/" + getattr(car, "_vid", ""))
    page.goto(url, wait_until="domcontentloaded", timeout=45000)
    try:
        # A script tag is never visible, so wait for it to be attached, not visible.
        page.wait_for_selector("#__NEXT_DATA__", state="attached", timeout=15000)
    except Exception:
        return car
    blob = page.eval_on_selector("#__NEXT_DATA__", "el => el.textContent")
    try:
        data = _json.loads(blob)
    except Exception:
        return car

    owners = _find_first(data, "previousOwners")
    if isinstance(owners, dict) and isinstance(owners.get("count"), int):
        car.owners = owners["count"]

    eng = _find_first(data, "engineSize")
    fuel = _find_first(data, "fuel")
    if eng or fuel:
        # Keep the trim wording from the list and add the litres and fuel, so
        # the ban check has the fullest engine description we can give it.
        litres = ""
        try:
            litres = f"{round(int(eng)/1000, 1)}" if eng else ""
        except Exception:
            litres = ""
        car.engine = " ".join(x for x in [car.derivative, litres, str(fuel or "")] if x).strip()
        car._fuel = str(fuel or car._fuel)

    sh = _find_first(data, "serviceHistory")
    if isinstance(sh, str) and sh:
        car.service_history = sh

    # CAP Clean. Motorway holds it as a price entry whose source is CAP.
    m = re.search(r'"value":(\d+)\}\s*,\s*"priceSource":"CAP"', blob)
    if not m:
        m = re.search(r'"priceSource":"CAP"[^}]*?"value":(\d+)', blob)
    if m:
        car.cap_clean = int(m.group(1))

    return car


def _money(s):
    if not s:
        return None
    m = re.search(r"[\d,]+", str(s))
    if not m:
        return None
    n = m.group(0).replace(",", "")
    return int(n) if n.isdigit() else None


def _engine_from_csv(model, engine_size, fuel):
    """Build the fullest engine description for the ban check: the litres from
    the engine size, the model and trim text (which carries family names like
    TSI, TFSI, EcoBoost, PureTech, DCI), and the fuel."""
    litres = ""
    cc = re.sub(r"\D", "", str(engine_size or ""))
    if cc.isdigit() and int(cc) > 0:
        litres = f"{round(int(cc) / 1000, 1)}"
    return " ".join(x for x in [litres, str(model or ""), str(fuel or "")] if x).strip()


def read_export(playwright, headless=True):
    """Download the filtered stock as a CSV (the report Motorway provides) and
    turn every row into a Car. This reads the whole brief matching list in one
    go and carries owners, engine size, CAP, service history and the link, so no
    per car detail page is needed. Fails loudly if the download or parse fails."""
    import csv
    import tempfile
    out = os.path.join(tempfile.gettempdir(), "bidbrain_mw_export.csv")
    ctx = browser.open_reader_context(playwright, "motorway", headless=headless)
    try:
        page = ctx.pages[0] if ctx.pages else ctx.new_page()
        page.goto(browser.SITES["motorway"]["stock_url"], wait_until="domcontentloaded", timeout=45000)
        page.wait_for_selector('[data-testid="vehicleCardLink"]', timeout=30000)
        page.click('button:has-text("Download")', timeout=8000)
        page.wait_for_timeout(700)
        try:
            page.click('text=Filtered vehicles', timeout=4000)
            page.wait_for_timeout(400)
        except Exception:
            pass
        buttons = page.query_selector_all('button:has-text("Download")')
        if not buttons:
            raise RuntimeError("Motorway download button not found. The page may have changed.")
        with page.expect_download(timeout=90000) as dl_info:
            buttons[-1].click()
        dl_info.value.save_as(out)
    finally:
        ctx.close()

    with open(out, encoding="utf-8-sig") as f:
        rows = list(csv.DictReader(f))
    if not rows:
        raise RuntimeError("Motorway CSV downloaded but had no rows. Stopping loudly.")

    cars = []
    for r in rows:
        # Auction only. Live sale is the auction buying type.
        if "live sale" not in str(r.get("Buying type", "")).lower():
            continue
        car = Car(
            reg=(r.get("VRM") or "").strip(),
            make=(r.get("Make") or "").strip(),
            model=(r.get("Model") or "").strip(),
            year=_num(r.get("Year")),
            mileage=_num(r.get("Mileage")),
            owners=_num(r.get("Number of owners")),
            grade=_num(r.get("Exterior grade")),
            reserve=_money(r.get("Reserve price")),
            cap_clean=_money(r.get("CAP clean value")),
            service_history=(r.get("Service history") or "").strip(),
            engine=_engine_from_csv(r.get("Model"), r.get("Engine size"), r.get("Fuel")),
            transmission=(r.get("Transmission") or "").strip(),
            fuel=(r.get("Fuel") or "").strip(),
            body_type=(r.get("Body type") or "").strip(),
            equipment=((r.get("Equipment") or "") + " " + (r.get("Additional specifications") or "")).strip(),
            location=(r.get("Location") or "").strip(),
            listing_url=(r.get("Motorway vehicle link") or "").strip(),
            source="Motorway",
        )
        car._fuel = (r.get("Fuel") or "").strip()
        cars.append(car)
    if not cars:
        raise RuntimeError("Motorway CSV had rows but no auction cars. Stopping loudly.")
    return cars


def read_live(playwright, limit=None):
    """Open the logged in stock list and read the cards. Fails loudly if the
    page does not look like the stock list, so a changed page never passes
    quietly wrong data through."""
    ctx = browser.open_reader_context(playwright, "motorway", headless=True)
    try:
        page = ctx.pages[0] if ctx.pages else ctx.new_page()
        page.goto(browser.SITES["motorway"]["stock_url"], wait_until="domcontentloaded", timeout=45000)
        # Wait for the cards to render.
        try:
            page.wait_for_selector('[data-testid="vehicleCardLink"]', timeout=30000)
        except Exception:
            raise RuntimeError(
                "Motorway stock list did not show any vehicle cards. The page may "
                "have changed, or the login may have expired, or it is before the "
                "4:30pm stock time. Stopping rather than guessing."
            )
        page_html = page.content()
    finally:
        ctx.close()

    cars = parse_listing(page_html)
    if not cars:
        raise RuntimeError("Read the Motorway page but found no cars. Stopping loudly.")
    return cars[:limit] if limit else cars
