Source code for app.propertyme.cache

"""PropertyMe cache loading and privacy-safe mapping.

This module turns the local privacy-shaped PropertyMe seed file into
``PropertyRecord`` objects used by Alba Core. It is the boundary between source
data and matching data.

Do not add raw owner, tenant, or OAuth fields to the mapped record. If future
PropertyMe sync work needs raw exports, keep them outside Git and map only the
fields needed for matching.
"""

from __future__ import annotations

import html
import json
import re
from datetime import date
from pathlib import Path
from typing import Any

from app.alba_core.models import PropertyRecord


[docs] def load_property_cache(path: str | Path) -> list[PropertyRecord]: """Load privacy-shaped real PropertyMe data from disk. The loader intentionally fails if the cache is missing. Tests and demos should use real cached records, not silently fall back to fake properties. """ cache_path = Path(path) if not cache_path.exists(): raise FileNotFoundError(f"Property cache not found: {cache_path}") payload = json.loads(cache_path.read_text(encoding="utf-8")) items = payload.get("items") if isinstance(payload, dict) else payload if not isinstance(items, list) or not items: raise ValueError(f"Property cache has no usable items: {cache_path}") records = [_map_item(item) for item in items if isinstance(item, dict)] if not records: raise ValueError(f"Property cache did not contain valid property records: {cache_path}") return records
def _map_item(item: dict[str, Any]) -> PropertyRecord: """Map one cached JSON item into Alba Core's matching model.""" address = _address_from(item) return PropertyRecord( property_id=str(item.get("property_id") or item.get("id") or address), address=address, suburb=_text(item.get("suburb")), city=_text(item.get("city")), region=_text(item.get("state_region") or item.get("state")), postcode=_text(item.get("zip_code") or item.get("zip")), property_type=_text(item.get("property_type")), rent_pw=_float(item.get("rent_pw") or item.get("Rent_PW") or item.get("RentAmount")), bedrooms=_int(item.get("bedrooms") or item.get("Bedrooms")), bathrooms=_int(item.get("bathrooms") or item.get("Bathrooms")), parking_spaces=_parking_spaces(item), availability=_text(item.get("availability_today")), available_from=_date(item.get("available_from")), furnishing=_text(item.get("furnishing")), pets=_text(item.get("pets")), smoker=_text(item.get("smoker")), pool=_text(item.get("pool")), spa=_text(item.get("spa")), view_type=_text(item.get("view_type")), description=_clean_description(item.get("generated_description") or item.get("description")), listing_url=_text(item.get("listing_url")), ) def _address_from(item: dict[str, Any]) -> str: """Build a display address from safe address fields.""" parts = [ item.get("apartment_number"), item.get("street_number"), item.get("street_address"), item.get("suburb"), item.get("city"), ] address = ", ".join(str(part).strip() for part in parts if part not in (None, "")) return address or str(item.get("property_id") or "Unknown address") def _parking_spaces(item: dict[str, Any]) -> int | None: """Combine garage and other parking counts into one matcher field.""" garages = _int(item.get("garages")) other = _int(item.get("other_parking_spots")) if garages is None and other is None: return None return int(garages or 0) + int(other or 0) def _text(value: Any) -> str | None: """Normalise blank-ish source values to None.""" if value is None: return None text = str(value).strip() return text or None def _float(value: Any) -> float | None: """Coerce source numeric values without raising on bad data.""" if value in (None, ""): return None try: return float(value) except (TypeError, ValueError): return None def _int(value: Any) -> int | None: """Coerce source numeric values into an int when possible.""" number = _float(value) if number is None: return None return int(number) def _date(value: Any) -> date | None: """Parse an ISO-style date from PropertyMe/cache data.""" text = _text(value) if not text: return None try: return date.fromisoformat(text[:10]) except ValueError: return None def _clean_description(value: Any) -> str | None: """Strip HTML and collapse whitespace in source descriptions.""" text = _text(value) if not text: return None text = re.sub(r"<[^>]+>", " ", text) text = html.unescape(text) text = re.sub(r"\s+", " ", text) return text.strip() or None