Kingsolima · Kingsolima · May 30, 2026 · May 30, 2026 · May 30, 2026
diff --git a/backend/calculators/__init__.py b/backend/calculators/__init__.py
diff --git a/backend/calculators/traffic.py b/backend/calculators/traffic.py
@@ -0,0 +1,141 @@
+"""
+ITE-based daily trip calculator with TTC transit proximity modifier.
+
+Replaces the XGBoost traffic model. The ITE formula is deterministic and
+more accurate for new buildings than fitting XGBoost on ITE-derived labels.
+Reference: ITE Trip Generation Manual, 11th edition.
+"""
+
+from __future__ import annotations
+from pathlib import Path
+
+DATA_DIR = Path(__file__).parent.parent.parent / "data"
+
+# ITE Trip Generation Manual 11th edition rates.
+# "per": "unit"      → rate × total_dwelling_units  (floors × units_per_floor)
+# "per": "1000sqft"  → rate × (total_sqft / 1000)
+_ITE_RATES: dict[str, dict] = {
+    # Residential — ITE 220/221/222. High-rise has lower car ownership → fewer trips/unit.
+    "residential":             {"rate": 6.65, "per": "unit"},   # ITE 221 mid-rise
+    "residential (mid-rise)":  {"rate": 6.65, "per": "unit"},   # ITE 221
+    "residential (high-rise)": {"rate": 4.20, "per": "unit"},   # ITE 222 urban high-rise
+    # Mixed-use: residential tower above ground-floor retail.
+    # Blended ~80% residential / 20% retail with 15% internal-capture discount.
+    "mixed-use":               {"rate": 12.0, "per": "1000sqft"},
+    # Commercial
+    "commercial office":       {"rate": 11.03, "per": "1000sqft"},  # ITE 710
+    # Retail
+    "retail / podium":         {"rate": 42.70, "per": "1000sqft"},  # ITE 820
+    "retail":                  {"rate": 42.70, "per": "1000sqft"},
+    # Industrial
+    "industrial":              {"rate": 6.97,  "per": "1000sqft"},  # ITE 110
+    # Institutional — separate from office (ITE 610 hospital, ITE 520 school)
+    "institutional":           {"rate": 13.22, "per": "1000sqft"},
+    "hospital":                {"rate": 13.22, "per": "1000sqft"},  # ITE 610
+    "school":                  {"rate": 14.30, "per": "1000sqft"},  # ITE 520
+    # Assembly
+    "assembly":                {"rate": 9.11,  "per": "1000sqft"},  # ITE 560 place of worship
+    "place of worship":        {"rate": 9.11,  "per": "1000sqft"},  # ITE 560
+}
+_DEFAULT_RATE = {"rate": 11.03, "per": "1000sqft"}  # office as fallback
+
+# Transit proximity discounts applied to base ITE trips.
+# All TTC stops treated equally; higher tiers for closer proximity.
+_TRANSIT_DISCOUNTS: dict[str, float] = {
+    "transit_within_400m": 0.30,
+    "transit_within_800m": 0.15,
+    "none":                0.00,
+}
+
+_ttc_stops = None  # lazy-loaded GeoDataFrame; None if parquet is missing
+
+
+def _load_ttc() -> None:
+    global _ttc_stops
+    if _ttc_stops is not None:
+        return
+    path = DATA_DIR / "ttc_stops.parquet"
+    if not path.exists():
+        return
+    try:
+        import geopandas as gpd
+        _ttc_stops = gpd.read_parquet(path)
+    except Exception as exc:
+        print(f"[traffic] WARNING: could not load ttc_stops: {exc}")
+
+
+def _transit_tier(lat: float, lng: float) -> str:
+    """Return the transit discount tier for a lat/lng coordinate."""
+    _load_ttc()
+    if _ttc_stops is None:
+        return "none"
+    try:
+        import geopandas as gpd
+        from shapely.geometry import Point
+        # Reproject to UTM zone 17N (metres) for accurate distance
+        pt_m = gpd.GeoSeries([Point(lng, lat)], crs="EPSG:4326").to_crs("EPSG:26917").iloc[0]
+        nearest_m = float(_ttc_stops.to_crs("EPSG:26917").distance(pt_m).min())
+        if nearest_m < 400:
+            return "transit_within_400m"
+        if nearest_m < 800:
+            return "transit_within_800m"
+        return "none"
+    except Exception as exc:
+        print(f"[traffic] transit tier error: {exc}")
+        return "none"
+
+
+def estimate_daily_trips(building: dict) -> dict | None:
+    """
+    Estimate daily vehicle trips for a proposed building.
+
+    Returns a dict with keys: score, daily_trips, daily_trips_base,
+    transit_tier, description.  Returns None on unexpected error.
+    """
+    try:
+        btype = (building.get("type") or "residential").lower().strip()
+        ite = _ITE_RATES.get(btype, _DEFAULT_RATE)
+
+        floors        = building.get("floors")        or 10
+        footprint_m2  = building.get("footprint_m2")  or 2000
+        units_per_floor = building.get("units_per_floor") or 10
+
+        if ite["per"] == "unit":
+            size = floors * units_per_floor
+        else:
+            total_sqft = footprint_m2 * floors * 10.764  # m² → sqft
+            size = total_sqft / 1000
+
+        base_trips = ite["rate"] * size
+
+        lat = building.get("lat")
+        lng = building.get("lng")
+        tier = _transit_tier(lat, lng) if (lat and lng) else "none"
+        discount = _TRANSIT_DISCOUNTS[tier]
+        trips = base_trips * (1 - discount)
+
+        # Impact score: 0 = minimal, 100 = extreme (2 000 trips → 100)
+        score = min(100, int(trips / 20))
+
+        transit_note = (
+            f" TTC access within {'400' if '400' in tier else '800'}m"
+            f" reduces vehicle trips by {int(discount * 100)}%."
+            if tier != "none" else ""
+        )
+        severity = "significant" if trips > 500 else "moderate" if trips > 200 else "low"
+
+        return {
+            "score": score,
+            "daily_trips": round(trips),
+            "daily_trips_base": round(base_trips),
+            "transit_tier": tier,
+            "description": (
+                f"Estimated {trips:.0f} daily vehicle trips "
+                f"(ITE {ite['rate']}/{'unit' if ite['per'] == 'unit' else '1,000 sqft'})."
+                f"{transit_note} "
+                f"Peak-hour intersection impact: {severity}."
+            ),
+        }
+    except Exception as exc:
+        print(f"[traffic] estimate error: {exc}")
+        return None
diff --git a/backend/xgb_models.py b/backend/xgb_models.py
@@ -6,6 +6,7 @@
 """
 
 import json
+import joblib
 import numpy as np
 from pathlib import Path
 
@@ -44,11 +45,23 @@
     "repair": 4, "renovation": 5, "other": 6,
 }
 
+# GFA column → Ontario EWRB building type name used during training.
+# These are PrimPropTypCalc values from the Ontario public EWRB release.
+_GFA_TO_EWRB = {
+    "RESIDENTIAL":                    "Multifamily Housing",
+    "BUSINESS_AND_PERSONAL_SERVICES":  "Office",
+    "MERCANTILE":                      "Retail Store",
+    "INDUSTRIAL":                      "Distribution Center",
+    "ASSEMBLY":                        "Other",
+    "INSTITUTIONAL":                   "Other",
+}
+
 
 class _Models:
-    energy:   "xgb.XGBRegressor | None" = None
-    traffic:  "xgb.XGBRegressor | None" = None
-    economic: "xgb.XGBRegressor | None" = None
+    energy:         "xgb.XGBRegressor | None" = None
+    energy_gas:     "xgb.XGBRegressor | None" = None
+    economic:       "xgb.XGBRegressor | None" = None
+    energy_encoder: object = None   # sklearn LabelEncoder loaded from .pkl
     meta: dict = {}
 
 
@@ -58,7 +71,8 @@ class _Models:
 def load_models():
     if not _XGB_AVAILABLE:
         return
-    for name in ("energy_model", "traffic_model", "economic_model"):
+    # Traffic is handled by the ITE calculator — no model file needed.
+    for name in ("energy_model", "economic_model"):
         path = MODEL_DIR / f"{name}.json"
         meta_path = MODEL_DIR / f"{name}_meta.json"
         if not path.exists():
@@ -73,6 +87,28 @@ def load_models():
         except Exception as e:
             print(f"[xgb] WARNING: could not load {name}: {e}")
 
+    # Gas model (same feature schema as electricity model)
+    gas_path = MODEL_DIR / "energy_gas_model.json"
+    if gas_path.exists():
+        try:
+            model = xgb.XGBRegressor()
+            model.load_model(str(gas_path))
+            _m.energy_gas = model
+            gas_meta_path = MODEL_DIR / "energy_gas_model_meta.json"
+            if gas_meta_path.exists():
+                _m.meta["energy_gas_model"] = json.loads(gas_meta_path.read_text())
+            print("[xgb] Loaded energy_gas_model.json")
+        except Exception as e:
+            print(f"[xgb] WARNING: could not load energy_gas_model: {e}")
+
+    enc_path = MODEL_DIR / "energy_building_type_encoder.pkl"
+    if enc_path.exists():
+        try:
+            _m.energy_encoder = joblib.load(str(enc_path))
+            print(f"[xgb] Loaded energy_building_type_encoder.pkl")
+        except Exception as e:
+            print(f"[xgb] WARNING: could not load energy encoder: {e}")
+
 
 def _build_feature_row(building: dict) -> dict:
     """Convert a building spec dict → feature dict matching training columns."""
@@ -109,38 +145,74 @@ def _row_to_array(row: dict, feature_list: list) -> np.ndarray:
 
 
 def predict_energy(building: dict) -> dict | None:
-    """Returns predicted annual kWh from real EWRB data and a 0-100 environmental score."""
+    """
+    Predicts electricity and gas intensity (kWh/m²) then scales by GFA.
+    Trained on Ontario EWRB (private buildings) + Toronto EWRB (municipal).
+    Score: 0 = low energy use (good), 100 = high energy use (bad).
+    """
     if _m.energy is None:
         return None
     try:
         meta = _m.meta.get("energy_model", {})
         row  = _build_feature_row(building)
+        gfa  = row["total_gfa_m2"]
 
-        # Energy model uses only floor_area_m2 + building_type_enc
-        btype = building.get("type", "residential").lower()
+        btype        = building.get("type", "residential").lower()
         dominant_col = TYPE_TO_GFA.get(btype, "RESIDENTIAL")
-        type_enc_map = {"RESIDENTIAL": 0, "BUSINESS_AND_PERSONAL_SERVICES": 1,
-                        "MERCANTILE": 2, "INDUSTRIAL": 3, "ASSEMBLY": 4, "INSTITUTIONAL": 5}
-        feature_row = {
-            "floor_area_m2":     row["total_gfa_m2"],
-            "building_type_enc": float(type_enc_map.get(dominant_col, 0)),
-        }
-        features = meta.get("features", ["floor_area_m2", "building_type_enc"])
+        ewrb_type    = _GFA_TO_EWRB.get(dominant_col, "Other")
+
+        if _m.energy_encoder is not None:
+            enc      = _m.energy_encoder
+            classes  = list(enc.classes_)
+            target   = ewrb_type if ewrb_type in classes else "Office"
+            type_enc = float(enc.transform([target])[0])
+        else:
+            classes  = meta.get("building_type_classes", [])
+            target   = ewrb_type if ewrb_type in classes else (classes[0] if classes else "Other")
+            type_enc = float(classes.index(target)) if target in classes else 0.0
+
+        # Model predicts log1p(kWh/m²); multiply intensity × GFA for annual total
+        feature_row = {"building_type_enc": type_enc}
+        features    = meta.get("features", ["building_type_enc"])
         X = _row_to_array(feature_row, features)
 
-        log_kwh = float(_m.energy.predict(X)[0])
-        kwh = np.expm1(log_kwh)
-        gfa = row["total_gfa_m2"]
-        intensity = kwh / max(gfa, 1)
-        score = min(100, int(intensity / 3))   # ~300 kWh/m² → score 100
+        elec_intensity = float(np.expm1(float(_m.energy.predict(X)[0])))  # kWh/m²
+        kwh = elec_intensity * gfa
+
+        # Gas intensity prediction (same feature schema, no sanity gate needed)
+        gas_kwh_eq = 0.0
+        if _m.energy_gas is not None:
+            try:
+                gas_meta      = _m.meta.get("energy_gas_model", {})
+                Xg            = _row_to_array(feature_row, gas_meta.get("features", list(feature_row.keys())))
+                gas_intensity = float(np.expm1(float(_m.energy_gas.predict(Xg)[0])))  # kWh/m²
+                gas_kwh_eq    = gas_intensity * gfa
+            except Exception:
+                pass
+
+        total_kwh       = kwh + gas_kwh_eq
+        total_intensity = total_kwh / max(gfa, 1)
+        gas_gj          = gas_kwh_eq / 277.78 if gas_kwh_eq > 0 else None
+
+        # Score: ~800 kWh/m² total → 100 (high-energy industrial); typical office ~400 → 50
+        environmental_impact_score = min(100, int(total_intensity / 8))
+
+        gas_note = (
+            f" + {gas_gj:,.0f} GJ gas ({gas_kwh_eq / 1_000:.0f} MWh equiv.)"
+            if gas_gj else ""
+        )
         return {
-            "score": score,
+            "score": environmental_impact_score,
+            "score_meaning": "0 = low energy use (good), 100 = high energy use (bad)",
             "annual_kwh": round(kwh),
-            "intensity_kwh_per_m2": round(intensity, 1),
+            "annual_gas_gj": round(gas_gj) if gas_gj else None,
+            "total_energy_kwh": round(total_kwh),
+            "intensity_kwh_per_m2": round(total_intensity, 1),
             "description": (
-                f"Predicted annual electricity: {kwh/1000:.0f} MWh "
-                f"({intensity:.0f} kWh/m²) — trained on {meta.get('source', 'Toronto EWRB')} data. "
-                f"{'Above' if intensity > 200 else 'Within'} Toronto benchmark for this building type."
+                f"Predicted annual electricity: {kwh / 1_000:.0f} MWh{gas_note}. "
+                f"Total energy intensity: {total_intensity:.0f} kWh/m² "
+                f"({'above' if total_intensity > 300 else 'within'} typical Toronto benchmark). "
+                f"Environmental impact: {environmental_impact_score}/100 — higher means greater energy use."
             ),
         }
     except Exception as e:
@@ -149,27 +221,9 @@ def predict_energy(building: dict) -> dict | None:
 
 
 def predict_traffic(building: dict) -> dict | None:
-    """Returns predicted daily vehicle trips and a 0-100 traffic impact score."""
-    if _m.traffic is None:
-        return None
-    try:
-        meta  = _m.meta.get("traffic_model", {})
-        row   = _build_feature_row(building)
-        X     = _row_to_array(row, meta.get("features", list(row.keys())))
-        trips = max(0.0, float(_m.traffic.predict(X)[0]))
-        score = min(100, int(trips / 20))   # 2000 trips → score 100
-        return {
-            "score": score,
-            "daily_trips": round(trips),
-            "description": (
-                f"Estimated +{trips:.0f} daily vehicle trips generated. "
-                f"Peak-hour impact on surrounding intersections: "
-                f"{'significant' if trips > 500 else 'moderate' if trips > 200 else 'low'}."
-            ),
-        }
-    except Exception as e:
-        print(f"[xgb] traffic predict error: {e}")
-        return None
+    """Returns ITE-estimated daily vehicle trips and a 0-100 traffic impact score."""
+    from calculators.traffic import estimate_daily_trips
+    return estimate_daily_trips(building)
 
 
 def predict_economic(building: dict) -> dict | None:

diff --git a/data/test.py b/data/test.py
@@ -0,0 +1,41 @@
+"""Quick smoke-test for all three inference paths. Run from project root:
+    python data/test.py
+"""
+import sys
+sys.path.insert(0, "backend")
+
+import xgb_models as xm
+from calculators.traffic import estimate_daily_trips
+
+print("=== Fix 1: LabelEncoder encoding per building type ===")
+buildings = [
+    {"type": "commercial office",       "footprint_m2": 2000, "floors": 10},
+    {"type": "retail",                   "footprint_m2": 2000, "floors": 10},
+    {"type": "residential (high-rise)",  "footprint_m2": 2000, "floors": 10},
+]
+for b in buildings:
+    btype  = b["type"].lower()
+    col    = xm.TYPE_TO_GFA.get(btype, "RESIDENTIAL")
+    ewrb   = xm._GFA_TO_EWRB.get(col, "Other")
+    enc    = int(xm._m.energy_encoder.transform([ewrb])[0])
+    result = xm.predict_energy(b)
+    print(f"  {b['type']:30s}  ewrb={ewrb:40s}  enc={enc:2d}  kwh={result['annual_kwh']:>12,}  score={result['score']}")
+
+print()
+print("=== Fix 2: ITE calculator — transit discount + correct residential unit ===")
+cases = [
+    ("downtown residential", {"type": "residential (high-rise)", "floors": 30, "footprint_m2": 1200, "units_per_floor": 8,  "lat": 43.6532, "lng": -79.3832}),
+    ("suburban residential", {"type": "residential (high-rise)", "floors": 30, "footprint_m2": 1200, "units_per_floor": 8,  "lat": 43.780,  "lng": -79.560}),
+    ("suburban retail",      {"type": "retail",                  "floors": 1,  "footprint_m2": 5000, "units_per_floor": 0,  "lat": 43.780,  "lng": -79.560}),
+]
+for label, b in cases:
+    r = estimate_daily_trips(b)
+    print(f"  {label:25s}  tier={r['transit_tier']:25s}  base={r['daily_trips_base']:5d}  final={r['daily_trips']:5d}  score={r['score']}")
+
+print()
+print("=== Integration: all three models together ===")
+b = {"type": "residential (high-rise)", "floors": 30, "footprint_m2": 1200,
+     "units_per_floor": 8, "lat": 43.6532, "lng": -79.3832}
+print("  Energy  :", xm.predict_energy(b))
+print("  Traffic :", xm.predict_traffic(b))
+print("  Economic:", xm.predict_economic(b))