Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added backend/calculators/__init__.py
Empty file.
141 changes: 141 additions & 0 deletions backend/calculators/traffic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""
ITE-based daily trip calculator with TTC transit proximity modifier.

Replaces the XGBoost traffic model. The ITE formula is deterministic and
more accurate for new buildings than fitting XGBoost on ITE-derived labels.
Reference: ITE Trip Generation Manual, 11th edition.
"""

from __future__ import annotations
from pathlib import Path

DATA_DIR = Path(__file__).parent.parent.parent / "data"

# ITE Trip Generation Manual 11th edition rates.
# "per": "unit" → rate × total_dwelling_units (floors × units_per_floor)
# "per": "1000sqft" → rate × (total_sqft / 1000)
_ITE_RATES: dict[str, dict] = {
# Residential — ITE 220/221/222. High-rise has lower car ownership → fewer trips/unit.
"residential": {"rate": 6.65, "per": "unit"}, # ITE 221 mid-rise
"residential (mid-rise)": {"rate": 6.65, "per": "unit"}, # ITE 221
"residential (high-rise)": {"rate": 4.20, "per": "unit"}, # ITE 222 urban high-rise
# Mixed-use: residential tower above ground-floor retail.
# Blended ~80% residential / 20% retail with 15% internal-capture discount.
"mixed-use": {"rate": 12.0, "per": "1000sqft"},
# Commercial
"commercial office": {"rate": 11.03, "per": "1000sqft"}, # ITE 710
# Retail
"retail / podium": {"rate": 42.70, "per": "1000sqft"}, # ITE 820
"retail": {"rate": 42.70, "per": "1000sqft"},
# Industrial
"industrial": {"rate": 6.97, "per": "1000sqft"}, # ITE 110
# Institutional — separate from office (ITE 610 hospital, ITE 520 school)
"institutional": {"rate": 13.22, "per": "1000sqft"},
"hospital": {"rate": 13.22, "per": "1000sqft"}, # ITE 610
"school": {"rate": 14.30, "per": "1000sqft"}, # ITE 520
# Assembly
"assembly": {"rate": 9.11, "per": "1000sqft"}, # ITE 560 place of worship
"place of worship": {"rate": 9.11, "per": "1000sqft"}, # ITE 560
}
_DEFAULT_RATE = {"rate": 11.03, "per": "1000sqft"} # office as fallback

# Transit proximity discounts applied to base ITE trips.
# All TTC stops treated equally; higher tiers for closer proximity.
_TRANSIT_DISCOUNTS: dict[str, float] = {
"transit_within_400m": 0.30,
"transit_within_800m": 0.15,
"none": 0.00,
}

_ttc_stops = None # lazy-loaded GeoDataFrame; None if parquet is missing


def _load_ttc() -> None:
global _ttc_stops
if _ttc_stops is not None:
return
path = DATA_DIR / "ttc_stops.parquet"
if not path.exists():
return
try:
import geopandas as gpd
_ttc_stops = gpd.read_parquet(path)
except Exception as exc:
print(f"[traffic] WARNING: could not load ttc_stops: {exc}")


def _transit_tier(lat: float, lng: float) -> str:
"""Return the transit discount tier for a lat/lng coordinate."""
_load_ttc()
if _ttc_stops is None:
return "none"
try:
import geopandas as gpd
from shapely.geometry import Point
# Reproject to UTM zone 17N (metres) for accurate distance
pt_m = gpd.GeoSeries([Point(lng, lat)], crs="EPSG:4326").to_crs("EPSG:26917").iloc[0]
nearest_m = float(_ttc_stops.to_crs("EPSG:26917").distance(pt_m).min())
if nearest_m < 400:
return "transit_within_400m"
if nearest_m < 800:
return "transit_within_800m"
return "none"
except Exception as exc:
print(f"[traffic] transit tier error: {exc}")
return "none"


def estimate_daily_trips(building: dict) -> dict | None:
"""
Estimate daily vehicle trips for a proposed building.

Returns a dict with keys: score, daily_trips, daily_trips_base,
transit_tier, description. Returns None on unexpected error.
"""
try:
btype = (building.get("type") or "residential").lower().strip()
ite = _ITE_RATES.get(btype, _DEFAULT_RATE)

floors = building.get("floors") or 10
footprint_m2 = building.get("footprint_m2") or 2000
units_per_floor = building.get("units_per_floor") or 10

if ite["per"] == "unit":
size = floors * units_per_floor
else:
total_sqft = footprint_m2 * floors * 10.764 # m² → sqft
size = total_sqft / 1000

base_trips = ite["rate"] * size

lat = building.get("lat")
lng = building.get("lng")
tier = _transit_tier(lat, lng) if (lat and lng) else "none"
discount = _TRANSIT_DISCOUNTS[tier]
trips = base_trips * (1 - discount)

# Impact score: 0 = minimal, 100 = extreme (2 000 trips → 100)
score = min(100, int(trips / 20))

transit_note = (
f" TTC access within {'400' if '400' in tier else '800'}m"
f" reduces vehicle trips by {int(discount * 100)}%."
if tier != "none" else ""
)
severity = "significant" if trips > 500 else "moderate" if trips > 200 else "low"

return {
"score": score,
"daily_trips": round(trips),
"daily_trips_base": round(base_trips),
"transit_tier": tier,
"description": (
f"Estimated {trips:.0f} daily vehicle trips "
f"(ITE {ite['rate']}/{'unit' if ite['per'] == 'unit' else '1,000 sqft'})."
f"{transit_note} "
f"Peak-hour intersection impact: {severity}."
),
}
except Exception as exc:
print(f"[traffic] estimate error: {exc}")
return None
144 changes: 99 additions & 45 deletions backend/xgb_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import json
import joblib
import numpy as np
from pathlib import Path

Expand Down Expand Up @@ -44,11 +45,23 @@
"repair": 4, "renovation": 5, "other": 6,
}

# GFA column → Ontario EWRB building type name used during training.
# These are PrimPropTypCalc values from the Ontario public EWRB release.
_GFA_TO_EWRB = {
"RESIDENTIAL": "Multifamily Housing",
"BUSINESS_AND_PERSONAL_SERVICES": "Office",
"MERCANTILE": "Retail Store",
"INDUSTRIAL": "Distribution Center",
"ASSEMBLY": "Other",
"INSTITUTIONAL": "Other",
}


class _Models:
energy: "xgb.XGBRegressor | None" = None
traffic: "xgb.XGBRegressor | None" = None
economic: "xgb.XGBRegressor | None" = None
energy: "xgb.XGBRegressor | None" = None
energy_gas: "xgb.XGBRegressor | None" = None
economic: "xgb.XGBRegressor | None" = None
energy_encoder: object = None # sklearn LabelEncoder loaded from .pkl
meta: dict = {}


Expand All @@ -58,7 +71,8 @@ class _Models:
def load_models():
if not _XGB_AVAILABLE:
return
for name in ("energy_model", "traffic_model", "economic_model"):
# Traffic is handled by the ITE calculator — no model file needed.
for name in ("energy_model", "economic_model"):
path = MODEL_DIR / f"{name}.json"
meta_path = MODEL_DIR / f"{name}_meta.json"
if not path.exists():
Expand All @@ -73,6 +87,28 @@ def load_models():
except Exception as e:
print(f"[xgb] WARNING: could not load {name}: {e}")

# Gas model (same feature schema as electricity model)
gas_path = MODEL_DIR / "energy_gas_model.json"
if gas_path.exists():
try:
model = xgb.XGBRegressor()
model.load_model(str(gas_path))
_m.energy_gas = model
gas_meta_path = MODEL_DIR / "energy_gas_model_meta.json"
if gas_meta_path.exists():
_m.meta["energy_gas_model"] = json.loads(gas_meta_path.read_text())
print("[xgb] Loaded energy_gas_model.json")
except Exception as e:
print(f"[xgb] WARNING: could not load energy_gas_model: {e}")

enc_path = MODEL_DIR / "energy_building_type_encoder.pkl"
if enc_path.exists():
try:
_m.energy_encoder = joblib.load(str(enc_path))
print(f"[xgb] Loaded energy_building_type_encoder.pkl")
except Exception as e:
print(f"[xgb] WARNING: could not load energy encoder: {e}")


def _build_feature_row(building: dict) -> dict:
"""Convert a building spec dict → feature dict matching training columns."""
Expand Down Expand Up @@ -109,38 +145,74 @@ def _row_to_array(row: dict, feature_list: list) -> np.ndarray:


def predict_energy(building: dict) -> dict | None:
"""Returns predicted annual kWh from real EWRB data and a 0-100 environmental score."""
"""
Predicts electricity and gas intensity (kWh/m²) then scales by GFA.
Trained on Ontario EWRB (private buildings) + Toronto EWRB (municipal).
Score: 0 = low energy use (good), 100 = high energy use (bad).
"""
if _m.energy is None:
return None
try:
meta = _m.meta.get("energy_model", {})
row = _build_feature_row(building)
gfa = row["total_gfa_m2"]

# Energy model uses only floor_area_m2 + building_type_enc
btype = building.get("type", "residential").lower()
btype = building.get("type", "residential").lower()
dominant_col = TYPE_TO_GFA.get(btype, "RESIDENTIAL")
type_enc_map = {"RESIDENTIAL": 0, "BUSINESS_AND_PERSONAL_SERVICES": 1,
"MERCANTILE": 2, "INDUSTRIAL": 3, "ASSEMBLY": 4, "INSTITUTIONAL": 5}
feature_row = {
"floor_area_m2": row["total_gfa_m2"],
"building_type_enc": float(type_enc_map.get(dominant_col, 0)),
}
features = meta.get("features", ["floor_area_m2", "building_type_enc"])
ewrb_type = _GFA_TO_EWRB.get(dominant_col, "Other")

if _m.energy_encoder is not None:
enc = _m.energy_encoder
classes = list(enc.classes_)
target = ewrb_type if ewrb_type in classes else "Office"
type_enc = float(enc.transform([target])[0])
else:
classes = meta.get("building_type_classes", [])
target = ewrb_type if ewrb_type in classes else (classes[0] if classes else "Other")
type_enc = float(classes.index(target)) if target in classes else 0.0

# Model predicts log1p(kWh/m²); multiply intensity × GFA for annual total
feature_row = {"building_type_enc": type_enc}
features = meta.get("features", ["building_type_enc"])
X = _row_to_array(feature_row, features)

log_kwh = float(_m.energy.predict(X)[0])
kwh = np.expm1(log_kwh)
gfa = row["total_gfa_m2"]
intensity = kwh / max(gfa, 1)
score = min(100, int(intensity / 3)) # ~300 kWh/m² → score 100
elec_intensity = float(np.expm1(float(_m.energy.predict(X)[0]))) # kWh/m²
kwh = elec_intensity * gfa

# Gas intensity prediction (same feature schema, no sanity gate needed)
gas_kwh_eq = 0.0
if _m.energy_gas is not None:
try:
gas_meta = _m.meta.get("energy_gas_model", {})
Xg = _row_to_array(feature_row, gas_meta.get("features", list(feature_row.keys())))
gas_intensity = float(np.expm1(float(_m.energy_gas.predict(Xg)[0]))) # kWh/m²
gas_kwh_eq = gas_intensity * gfa
except Exception:
pass

total_kwh = kwh + gas_kwh_eq
total_intensity = total_kwh / max(gfa, 1)
gas_gj = gas_kwh_eq / 277.78 if gas_kwh_eq > 0 else None

# Score: ~800 kWh/m² total → 100 (high-energy industrial); typical office ~400 → 50
environmental_impact_score = min(100, int(total_intensity / 8))

gas_note = (
f" + {gas_gj:,.0f} GJ gas ({gas_kwh_eq / 1_000:.0f} MWh equiv.)"
if gas_gj else ""
)
return {
"score": score,
"score": environmental_impact_score,
"score_meaning": "0 = low energy use (good), 100 = high energy use (bad)",
"annual_kwh": round(kwh),
"intensity_kwh_per_m2": round(intensity, 1),
"annual_gas_gj": round(gas_gj) if gas_gj else None,
"total_energy_kwh": round(total_kwh),
"intensity_kwh_per_m2": round(total_intensity, 1),
"description": (
f"Predicted annual electricity: {kwh/1000:.0f} MWh "
f"({intensity:.0f} kWh/m²) — trained on {meta.get('source', 'Toronto EWRB')} data. "
f"{'Above' if intensity > 200 else 'Within'} Toronto benchmark for this building type."
f"Predicted annual electricity: {kwh / 1_000:.0f} MWh{gas_note}. "
f"Total energy intensity: {total_intensity:.0f} kWh/m² "
f"({'above' if total_intensity > 300 else 'within'} typical Toronto benchmark). "
f"Environmental impact: {environmental_impact_score}/100 — higher means greater energy use."
),
}
except Exception as e:
Expand All @@ -149,27 +221,9 @@ def predict_energy(building: dict) -> dict | None:


def predict_traffic(building: dict) -> dict | None:
"""Returns predicted daily vehicle trips and a 0-100 traffic impact score."""
if _m.traffic is None:
return None
try:
meta = _m.meta.get("traffic_model", {})
row = _build_feature_row(building)
X = _row_to_array(row, meta.get("features", list(row.keys())))
trips = max(0.0, float(_m.traffic.predict(X)[0]))
score = min(100, int(trips / 20)) # 2000 trips → score 100
return {
"score": score,
"daily_trips": round(trips),
"description": (
f"Estimated +{trips:.0f} daily vehicle trips generated. "
f"Peak-hour impact on surrounding intersections: "
f"{'significant' if trips > 500 else 'moderate' if trips > 200 else 'low'}."
),
}
except Exception as e:
print(f"[xgb] traffic predict error: {e}")
return None
"""Returns ITE-estimated daily vehicle trips and a 0-100 traffic impact score."""
from calculators.traffic import estimate_daily_trips
return estimate_daily_trips(building)


def predict_economic(building: dict) -> dict | None:
Expand Down
41 changes: 41 additions & 0 deletions data/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Quick smoke-test for all three inference paths. Run from project root:
python data/test.py
"""
import sys
sys.path.insert(0, "backend")

import xgb_models as xm
from calculators.traffic import estimate_daily_trips

print("=== Fix 1: LabelEncoder encoding per building type ===")
buildings = [
{"type": "commercial office", "footprint_m2": 2000, "floors": 10},
{"type": "retail", "footprint_m2": 2000, "floors": 10},
{"type": "residential (high-rise)", "footprint_m2": 2000, "floors": 10},
]
for b in buildings:
btype = b["type"].lower()
col = xm.TYPE_TO_GFA.get(btype, "RESIDENTIAL")
ewrb = xm._GFA_TO_EWRB.get(col, "Other")
enc = int(xm._m.energy_encoder.transform([ewrb])[0])
result = xm.predict_energy(b)
print(f" {b['type']:30s} ewrb={ewrb:40s} enc={enc:2d} kwh={result['annual_kwh']:>12,} score={result['score']}")

print()
print("=== Fix 2: ITE calculator — transit discount + correct residential unit ===")
cases = [
("downtown residential", {"type": "residential (high-rise)", "floors": 30, "footprint_m2": 1200, "units_per_floor": 8, "lat": 43.6532, "lng": -79.3832}),
("suburban residential", {"type": "residential (high-rise)", "floors": 30, "footprint_m2": 1200, "units_per_floor": 8, "lat": 43.780, "lng": -79.560}),
("suburban retail", {"type": "retail", "floors": 1, "footprint_m2": 5000, "units_per_floor": 0, "lat": 43.780, "lng": -79.560}),
]
for label, b in cases:
r = estimate_daily_trips(b)
print(f" {label:25s} tier={r['transit_tier']:25s} base={r['daily_trips_base']:5d} final={r['daily_trips']:5d} score={r['score']}")

print()
print("=== Integration: all three models together ===")
b = {"type": "residential (high-rise)", "floors": 30, "footprint_m2": 1200,
"units_per_floor": 8, "lat": 43.6532, "lng": -79.3832}
print(" Energy :", xm.predict_energy(b))
print(" Traffic :", xm.predict_traffic(b))
print(" Economic:", xm.predict_economic(b))
Loading
Loading