""" Motorway reader. Reads the logged in dealer stock list at pro.motorway.co.uk/vehicles and turns each card into a Car. The list card carries registration, make, model and derivative, year, mileage, fuel, transmission, distance, condition grade, reserve price, the photo and the listing link. Some fields are not on the list card and live on each car's own page: CAP Clean, previous owners, service history and VAT status, and the exact engine. Those are fetched by visiting the car's page, but only for cars worth the look, so the run stays quick. See read_live below. Nothing here bids. It reads the screen only. """ import os import re import html as htmllib from ..pricing import Car from .. import browser PLATE = re.compile(r"\b[A-Z]{2}\d{2}\s?[A-Z]{3}\b") YEAR = re.compile(r"\b(19[89]\d|20[0-4]\d)\b") MILEAGE = re.compile(r"^([\d,]+)\s*mi$") DISTANCE = re.compile(r"^(\d+)\s*mi away$", re.I) MONEY = re.compile(r"£\s*([\d,]+)") FUELS = {"petrol", "diesel", "hybrid", "electric", "plug-in hybrid", "petrol hybrid"} def _num(s): if s is None: return None s = str(s).replace(",", "").strip() return int(s) if s.isdigit() else None # Each card is wrapped by its own anchor, for example # # There is exactly one such anchor per card and its href is the card's true # link, so use these anchors as the card boundaries. _ANCHOR_RE = re.compile(r']*?href="(/vehicles/\d+)"[^>]*?>', re.I) def _card_blocks(page_html): """Yield (listing_path, block_html) for each vehicle card, the link taken from that card's own wrapping anchor so a card is never paired with the next card's link.""" anchors = list(_ANCHOR_RE.finditer(page_html)) out = [] for i, m in enumerate(anchors): start = m.end() end = anchors[i + 1].start() if i + 1 < len(anchors) else len(page_html) out.append((m.group(1), page_html[start:end])) return out def _text_lines(block): text = re.sub(r"<[^>]+>", "\n", block) text = htmllib.unescape(text) return [l.strip() for l in text.split("\n") if l.strip()] def parse_listing(page_html): """Turn the saved or live stock page HTML into a list of Car objects with the fields the list card provides. Detail only fields stay None for now.""" cars = [] for listing_path, block in _card_blocks(page_html): # Listing link and vehicle id, taken from the card's own anchor. if not listing_path: continue vid = listing_path.rsplit("/", 1)[-1] # Photo and the alt text that holds make, model, derivative and year. alt = re.search(r']*\balt="([^"]+)"', block) src = re.search(r']*\bsrc="([^"]+)"', block) alt_text = htmllib.unescape(alt.group(1)) if alt else "" lines = _text_lines(block) reg = "" for l in lines: mm = PLATE.search(l) if mm: reg = mm.group(0).replace(" ", "") break mileage = None distance = None for l in lines: mi = MILEAGE.match(l) if mi and mileage is None: mileage = _num(mi.group(1)) di = DISTANCE.match(l) if di and distance is None: distance = _num(di.group(1)) # Grade sits as its own number right after a "Grade" label line. grade = None for i, l in enumerate(lines): if l.lower() == "grade" and i + 1 < len(lines): grade = _num(lines[i + 1]) break # Reserve is the money amount right after the "Reserve price" label. reserve = None for i, l in enumerate(lines): if "reserve price" in l.lower(): for j in range(i, min(i + 3, len(lines))): mo = MONEY.search(lines[j]) if mo: reserve = _num(mo.group(1)) break break # Make, model, derivative and year come from the alt text, which reads # like "Nissan Qashqai Tekna DCI 2017". make = model = derivative = "" year = None if alt_text: ym = YEAR.search(alt_text) if ym: year = _num(ym.group(0)) alt_text_nodate = alt_text[: ym.start()].strip() else: alt_text_nodate = alt_text toks = alt_text_nodate.split() if toks: make = toks[0] if len(toks) > 1: model = toks[1] if len(toks) > 2: derivative = " ".join(toks[2:]) fuel = "" for l in lines: if l.lower() in FUELS: fuel = l break car = Car( reg=reg, make=make, model=model, derivative=derivative, year=year, mileage=mileage, grade=grade, reserve=reserve, distance_miles=distance, photo_url=(src.group(1) if src else ""), listing_url=browser.SITES["motorway"]["base_url"] + listing_path, source="Motorway", engine=(derivative if fuel else derivative), # placeholder, refined from detail page ) # Stash the id and fuel for the detail step. car._vid = vid car._fuel = fuel cars.append(car) return cars def _find_first(obj, key): """Find the first value for a key anywhere in a nested dict or list.""" if isinstance(obj, dict): if key in obj: return obj[key] for v in obj.values(): found = _find_first(v, key) if found is not None: return found elif isinstance(obj, list): for v in obj: found = _find_first(v, key) if found is not None: return found return None def enrich_from_detail(page, car): """Open a car's own page and read the fields the list card does not show: previous owners, exact engine size and fuel, service history and CAP. Reads the page's embedded data, not guessed HTML. Leaves a field as is if it cannot be read, so the brain can hold the car back rather than guess.""" import json as _json url = car.listing_url or (browser.SITES["motorway"]["base_url"] + "/vehicles/" + getattr(car, "_vid", "")) page.goto(url, wait_until="domcontentloaded", timeout=45000) try: # A script tag is never visible, so wait for it to be attached, not visible. page.wait_for_selector("#__NEXT_DATA__", state="attached", timeout=15000) except Exception: return car blob = page.eval_on_selector("#__NEXT_DATA__", "el => el.textContent") try: data = _json.loads(blob) except Exception: return car owners = _find_first(data, "previousOwners") if isinstance(owners, dict) and isinstance(owners.get("count"), int): car.owners = owners["count"] eng = _find_first(data, "engineSize") fuel = _find_first(data, "fuel") if eng or fuel: # Keep the trim wording from the list and add the litres and fuel, so # the ban check has the fullest engine description we can give it. litres = "" try: litres = f"{round(int(eng)/1000, 1)}" if eng else "" except Exception: litres = "" car.engine = " ".join(x for x in [car.derivative, litres, str(fuel or "")] if x).strip() car._fuel = str(fuel or car._fuel) sh = _find_first(data, "serviceHistory") if isinstance(sh, str) and sh: car.service_history = sh # CAP Clean. Motorway holds it as a price entry whose source is CAP. m = re.search(r'"value":(\d+)\}\s*,\s*"priceSource":"CAP"', blob) if not m: m = re.search(r'"priceSource":"CAP"[^}]*?"value":(\d+)', blob) if m: car.cap_clean = int(m.group(1)) return car def _money(s): if not s: return None m = re.search(r"[\d,]+", str(s)) if not m: return None n = m.group(0).replace(",", "") return int(n) if n.isdigit() else None def _engine_from_csv(model, engine_size, fuel): """Build the fullest engine description for the ban check: the litres from the engine size, the model and trim text (which carries family names like TSI, TFSI, EcoBoost, PureTech, DCI), and the fuel.""" litres = "" cc = re.sub(r"\D", "", str(engine_size or "")) if cc.isdigit() and int(cc) > 0: litres = f"{round(int(cc) / 1000, 1)}" return " ".join(x for x in [litres, str(model or ""), str(fuel or "")] if x).strip() def read_export(playwright, headless=True): """Download the filtered stock as a CSV (the report Motorway provides) and turn every row into a Car. This reads the whole brief matching list in one go and carries owners, engine size, CAP, service history and the link, so no per car detail page is needed. Fails loudly if the download or parse fails.""" import csv import tempfile out = os.path.join(tempfile.gettempdir(), "bidbrain_mw_export.csv") ctx = browser.open_reader_context(playwright, "motorway", headless=headless) try: page = ctx.pages[0] if ctx.pages else ctx.new_page() page.goto(browser.SITES["motorway"]["stock_url"], wait_until="domcontentloaded", timeout=45000) page.wait_for_selector('[data-testid="vehicleCardLink"]', timeout=30000) page.click('button:has-text("Download")', timeout=8000) page.wait_for_timeout(700) try: page.click('text=Filtered vehicles', timeout=4000) page.wait_for_timeout(400) except Exception: pass buttons = page.query_selector_all('button:has-text("Download")') if not buttons: raise RuntimeError("Motorway download button not found. The page may have changed.") with page.expect_download(timeout=90000) as dl_info: buttons[-1].click() dl_info.value.save_as(out) finally: ctx.close() with open(out, encoding="utf-8-sig") as f: rows = list(csv.DictReader(f)) if not rows: raise RuntimeError("Motorway CSV downloaded but had no rows. Stopping loudly.") cars = [] for r in rows: # Auction only. Live sale is the auction buying type. if "live sale" not in str(r.get("Buying type", "")).lower(): continue car = Car( reg=(r.get("VRM") or "").strip(), make=(r.get("Make") or "").strip(), model=(r.get("Model") or "").strip(), year=_num(r.get("Year")), mileage=_num(r.get("Mileage")), owners=_num(r.get("Number of owners")), grade=_num(r.get("Exterior grade")), reserve=_money(r.get("Reserve price")), cap_clean=_money(r.get("CAP clean value")), service_history=(r.get("Service history") or "").strip(), engine=_engine_from_csv(r.get("Model"), r.get("Engine size"), r.get("Fuel")), transmission=(r.get("Transmission") or "").strip(), fuel=(r.get("Fuel") or "").strip(), body_type=(r.get("Body type") or "").strip(), equipment=((r.get("Equipment") or "") + " " + (r.get("Additional specifications") or "")).strip(), location=(r.get("Location") or "").strip(), listing_url=(r.get("Motorway vehicle link") or "").strip(), source="Motorway", ) car._fuel = (r.get("Fuel") or "").strip() cars.append(car) if not cars: raise RuntimeError("Motorway CSV had rows but no auction cars. Stopping loudly.") return cars def read_live(playwright, limit=None): """Open the logged in stock list and read the cards. Fails loudly if the page does not look like the stock list, so a changed page never passes quietly wrong data through.""" ctx = browser.open_reader_context(playwright, "motorway", headless=True) try: page = ctx.pages[0] if ctx.pages else ctx.new_page() page.goto(browser.SITES["motorway"]["stock_url"], wait_until="domcontentloaded", timeout=45000) # Wait for the cards to render. try: page.wait_for_selector('[data-testid="vehicleCardLink"]', timeout=30000) except Exception: raise RuntimeError( "Motorway stock list did not show any vehicle cards. The page may " "have changed, or the login may have expired, or it is before the " "4:30pm stock time. Stopping rather than guessing." ) page_html = page.content() finally: ctx.close() cars = parse_listing(page_html) if not cars: raise RuntimeError("Read the Motorway page but found no cars. Stopping loudly.") return cars[:limit] if limit else cars