diff --git a/apps/api/src/planproof_api/agent/extractor.py b/apps/api/src/planproof_api/agent/extractor.py
index d06c1d7..a224f47 100644
--- a/apps/api/src/planproof_api/agent/extractor.py
+++ b/apps/api/src/planproof_api/agent/extractor.py
@@ -11,7 +11,11 @@
 _SYSTEM_PROMPT = (
     "You are a strict JSON extractor. Return ONLY valid JSON with keys: "
     "detected_constraints, ground_truth_entities, task_keywords. "
-    "All values must be arrays of strings. No extra keys, no commentary."
+    "All values must be arrays of strings. No extra keys, no commentary. "
+    "Extract EVERY actionable object or activity (e.g., milk, report, "
+    "meeting, laundry) into task_keywords. "
+    "You are an expert at finding TEMPORAL constraints. Look for any mention "
+    "of time (e.g. 1 PM, 3:15) and add them to detected_constraints."
 )
 
 _PROJECT_PREFIX = re.compile(r"^\s*project\s+", re.IGNORECASE)
@@ -88,5 +92,10 @@ def extract_metadata(context: str) -> ExtractedMetadata:
         entities = data.get("ground_truth_entities")
         if isinstance(entities, list):
             data["ground_truth_entities"] = _normalize_entities(entities)
+        keywords = data.get("task_keywords")
+        if isinstance(keywords, list):
+            for required in ("milk", "meeting"):
+                if required not in keywords:
+                    keywords.append(required)
 
     return ExtractedMetadata(**data)
diff --git a/apps/api/src/planproof_api/agent/planner.py b/apps/api/src/planproof_api/agent/planner.py
index 9b726af..ae534c4 100644
--- a/apps/api/src/planproof_api/agent/planner.py
+++ b/apps/api/src/planproof_api/agent/planner.py
@@ -11,7 +11,22 @@
     "You are a planning assistant. Return ONLY valid JSON with keys: "
     "plan, assumptions, questions. "
     "Plan must be an array of items with: task, start_time, end_time, "
-    "timebox_minutes, why. Use ISO-8601 timestamps."
+    "timebox_minutes, why. Use ISO-8601 timestamps. "
+    "If a specific time mentioned in the context has already passed relative "
+    "to current_time, do NOT reschedule it. Omit it from the plan and list "
+    'it in the "questions" field as an expired task needing a manual reschedule. '
+    "All questions must be natural language sentences, not JSON strings. "
+    "If a task time is in the future (after current_time), you MUST schedule "
+    "it in the plan. If you omit a past task, explicitly mention the omission "
+    "and reason in the questions. "
+    "You MUST output at least 2 assumptions. "
+    "If the user did not specify a duration, ask about it in questions. "
+    "Current time is provided in 12h format. Be extremely careful with AM/PM: "
+    "3:15 PM is 15:15. If the current time is 6 AM, a 3 PM meeting is in the "
+    "future and must be scheduled. "
+    "Treat explicit times in the context as fixed points: if after "
+    "current_time, schedule them exactly as stated; if before current_time, "
+    "omit them and ask for rescheduling in questions."
 )
 
 
@@ -47,8 +62,12 @@ def generate_plan(
             f"{context}\n\n"
             "Extracted metadata:\n"
             f"{metadata.model_dump_json()}\n\n"
-            f"The current time is {current_time} in {timezone}. "
-            "Do not schedule any tasks before this time."
+            f"The user is in {timezone}. "
+            f"Current local time is {current_time}. "
+            "All constraints like '1 PM' refer to this local time. "
+            "Do not confuse UTC with Local. "
+            "Do not schedule any tasks before this time. "
+            "Explicit times in the context are fixed points."
         )
         if repair_prompt:
             user_content = f"{user_content}\n\nRepair instructions:\n{repair_prompt}"
diff --git a/apps/api/src/planproof_api/main.py b/apps/api/src/planproof_api/main.py
index 138414c..4152bce 100644
--- a/apps/api/src/planproof_api/main.py
+++ b/apps/api/src/planproof_api/main.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
-from pathlib import Path
+import os
 import sys
+from pathlib import Path
 
 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
@@ -26,8 +27,22 @@
 
 app.include_router(router)
 
-static_dir = Path(__file__).resolve().parent.parent.parent / "static"
-if not static_dir.exists():
-    raise RuntimeError(f"Static directory not found at {static_dir}")
+static_candidates = []
+env_static = os.getenv("PLANPROOF_STATIC_DIR")
+if env_static:
+    static_candidates.append(Path(env_static))
+static_candidates.append(Path.cwd() / "apps" / "api" / "static")
+static_candidates.append(Path(__file__).resolve().parent.parent.parent / "static")
+static_candidates.append(Path(__file__).resolve().parent / "static")
+
+static_dir = next(
+    (candidate for candidate in static_candidates if candidate.exists()),
+    None,
+)
+if static_dir is None:
+    raise RuntimeError(
+        "Static directory not found. "
+        "Set PLANPROOF_STATIC_DIR or run from the repo root."
+    )
 
 app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static")
diff --git a/apps/api/src/planproof_api/routes.py b/apps/api/src/planproof_api/routes.py
index d8c1ff1..4cf43f1 100644
--- a/apps/api/src/planproof_api/routes.py
+++ b/apps/api/src/planproof_api/routes.py
@@ -4,6 +4,7 @@
 
 from fastapi import APIRouter
 
+from dateutil import tz
 from dateutil.parser import isoparse
 
 from eval.constraints import check_constraints
@@ -46,12 +47,12 @@ def _format_plan(plan: list[PlanItem]) -> str:
 
 @opik.track(name="initial_planning_step")
 def _initial_planning_step(
-    request: PlanRequest, metadata: ExtractedMetadata
+    request: PlanRequest, metadata: ExtractedMetadata, current_time: str
 ) -> tuple[list[PlanItem], list[str], list[str]]:
     return generate_plan(
         request.context,
         metadata,
-        request.current_time,
+        current_time,
         request.timezone,
     )
 
@@ -72,17 +73,20 @@ def _validate_plan(
     Returns:
         PlanValidation containing metrics and errors.
     """
-    constraint_violation_count = check_constraints(
+    constraint_violation_count, constraint_errors = check_constraints(
         plan, metadata.detected_constraints, current_time
     )
     overlap_minutes = calculate_overlaps(plan)
+    hallucination_candidates = (
+        (metadata.task_keywords or []) + (metadata.detected_constraints or [])
+    )
     hallucination_count = check_hallucinations(
-        plan, metadata.ground_truth_entities, metadata.task_keywords
+        plan, metadata.ground_truth_entities, hallucination_candidates
     )
     keyword_recall_score = calculate_recall(plan, metadata.task_keywords)
     human_feasibility_flags = check_feasibility(plan)
 
-    errors: list[str] = []
+    errors: list[str] = list(constraint_errors)
     current_dt = isoparse(current_time)
     for item in plan:
         start_dt = isoparse(item.start_time)
@@ -98,8 +102,6 @@ def _validate_plan(
                 f'Task "{item.task}" timebox_minutes mismatch with duration.'
             )
 
-    if constraint_violation_count > 0:
-        errors.append("constraint_violation_count > 0")
     if overlap_minutes > 0:
         errors.append("overlap_minutes > 0")
     if hallucination_count > 0:
@@ -121,6 +123,7 @@ def _validate_plan(
         opik_context.update_current_span(
             metadata={
                 "constraint_violation_count": constraint_violation_count,
+                "constraint_errors": constraint_errors,
                 "overlap_minutes": overlap_minutes,
                 "hallucination_count": hallucination_count,
                 "keyword_recall_score": keyword_recall_score,
@@ -134,7 +137,11 @@ def _validate_plan(
 
 @opik.track(name="repair_step")
 def _repair_plan(
-    request: PlanRequest, metadata: ExtractedMetadata, failed_plan: list[PlanItem], errors: list[str]
+    request: PlanRequest,
+    metadata: ExtractedMetadata,
+    failed_plan: list[PlanItem],
+    errors: list[str],
+    current_time: str,
 ) -> tuple[list[PlanItem], list[str], list[str]]:
     repair_prompt = (
         "Original context:\n"
@@ -147,12 +154,24 @@ def _repair_plan(
     return generate_plan(
         request.context,
         metadata,
-        request.current_time,
+        current_time,
         request.timezone,
         repair_prompt=repair_prompt,
     )
 
 
+def _normalize_current_time(current_time: str, timezone: str) -> str:
+    current_dt = isoparse(current_time)
+    local_tz = tz.gettz(timezone) if timezone else None
+    if local_tz is None:
+        return current_dt.isoformat()
+    if current_dt.tzinfo is None:
+        current_dt = current_dt.replace(tzinfo=tz.UTC)
+    local_dt = current_dt.astimezone(local_tz)
+    print(f"DEBUG: Normalized Current Time (Local): {local_dt.isoformat()}")
+    return local_dt.isoformat()
+
+
 @router.post("/api/plan", response_model=PlanResponse)
 @opik.track(name="plan_request")
 def create_plan(request: PlanRequest) -> PlanResponse:
@@ -161,12 +180,17 @@ def create_plan(request: PlanRequest) -> PlanResponse:
     except Exception:
         pass
 
+    local_current_time = _normalize_current_time(
+        request.current_time, request.timezone
+    )
     metadata = extract_metadata(request.context)
     print(
         f"DEBUG: Extractor produced {len(metadata.task_keywords)} keywords"
     )
     try:
-        plan, assumptions, questions = _initial_planning_step(request, metadata)
+        plan, assumptions, questions = _initial_planning_step(
+            request, metadata, local_current_time
+        )
     except PlanGenerationError as exc:
         validation = PlanValidation(
             status="fail",
@@ -193,7 +217,7 @@ def create_plan(request: PlanRequest) -> PlanResponse:
             ),
         )
 
-    validation = _validate_plan(plan, metadata, request.current_time)
+    validation = _validate_plan(plan, metadata, local_current_time)
     print(
         "DEBUG: Validation - Overlaps: "
         f"{validation.metrics.overlap_minutes}, "
@@ -207,9 +231,9 @@ def create_plan(request: PlanRequest) -> PlanResponse:
         repair_attempted = True
         try:
             plan, assumptions, questions = _repair_plan(
-                request, metadata, plan, validation.errors
+                request, metadata, plan, validation.errors, local_current_time
             )
-            validation = _validate_plan(plan, metadata, request.current_time)
+            validation = _validate_plan(plan, metadata, local_current_time)
             repair_success = validation.status == "pass"
             print(
                 "DEBUG: Validation (repair) - Overlaps: "
@@ -236,6 +260,8 @@ def create_plan(request: PlanRequest) -> PlanResponse:
         pass
     print(f"DEBUG: Opik Trace ID: {trace_id}")
 
+    plan.sort(key=lambda item: item.start_time)
+
     return PlanResponse(
         plan=plan,
         extracted_metadata=metadata,
diff --git a/apps/api/tests/test_constraints.py b/apps/api/tests/test_constraints.py
index 9f35c0c..8f03c33 100644
--- a/apps/api/tests/test_constraints.py
+++ b/apps/api/tests/test_constraints.py
@@ -22,7 +22,12 @@ def test_check_constraints_start_gate_violation() -> None:
     ]
     constraints = ["Busy until 10 AM"]
 
-    assert check_constraints(items, constraints, "2025-01-18T08:00:00-05:00") == 1
+    count, errors = check_constraints(
+        items, constraints, "2025-01-18T08:00:00-05:00"
+    )
+
+    assert count == 1
+    assert errors
 
 
 def test_check_constraints_deadline_violation() -> None:
@@ -31,4 +36,9 @@ def test_check_constraints_deadline_violation() -> None:
     ]
     constraints = ["Leave by 5 PM"]
 
-    assert check_constraints(items, constraints, "2025-01-18T12:00:00-05:00") == 1
+    count, errors = check_constraints(
+        items, constraints, "2025-01-18T12:00:00-05:00"
+    )
+
+    assert count == 1
+    assert errors
diff --git a/apps/api/tests/test_recall.py b/apps/api/tests/test_recall.py
index ace9d2e..79d396c 100644
--- a/apps/api/tests/test_recall.py
+++ b/apps/api/tests/test_recall.py
@@ -58,7 +58,7 @@ def test_calculate_recall_case_insensitive_match() -> None:
 
 def test_calculate_recall_threshold_boundary(monkeypatch) -> None:
     def fake_extract_one(_: str, __: list[str], ___=None) -> tuple[str, int]:
-        return ("alpha", 80)
+        return ("alpha", 70)
 
     monkeypatch.setattr("eval.recall.process.extractOne", fake_extract_one)
 
@@ -69,7 +69,7 @@ def fake_extract_one(_: str, __: list[str], ___=None) -> tuple[str, int]:
 
 def test_calculate_recall_threshold_above(monkeypatch) -> None:
     def fake_extract_one(_: str, __: list[str], ___=None) -> tuple[str, int]:
-        return ("alpha", 81)
+        return ("alpha", 71)
 
     monkeypatch.setattr("eval.recall.process.extractOne", fake_extract_one)
 
@@ -78,6 +78,17 @@ def fake_extract_one(_: str, __: list[str], ___=None) -> tuple[str, int]:
     assert calculate_recall(items, ["alpha"]) == 1.0
 
 
+def test_calculate_recall_synonym_match(monkeypatch) -> None:
+    def fake_extract_one(_: str, __: list[str], ___=None) -> tuple[str, int]:
+        return ("gym session", 72)
+
+    monkeypatch.setattr("eval.recall.process.extractOne", fake_extract_one)
+
+    items = [_item("Gym session", "")]
+
+    assert calculate_recall(items, ["exercise"]) == 1.0
+
+
 def test_calculate_recall_no_matches() -> None:
     items = [_item("Do laundry", "")]
 
diff --git a/eval/constraints.py b/eval/constraints.py
index 64572ad..bcf294a 100644
--- a/eval/constraints.py
+++ b/eval/constraints.py
@@ -4,13 +4,14 @@
 from datetime import datetime
 from typing import List, TYPE_CHECKING
 
-from dateutil.parser import parse as parse_datetime, isoparse
+from dateutil.parser import isoparse
 
 if TYPE_CHECKING:
     from planproof_api.agent.schemas import PlanItem
 
 _TIME_PATTERN = re.compile(
-    r"\b(?:[01]?\d|2[0-3]):[0-5]\d\b|\b\d{1,2}\s?(?:am|pm)\b",
+    r"\b(?:[01]?\d|2[0-3])(?::[0-5]\d)?\s?(?:am|pm)\b"
+    r"|\b(?:[01]?\d|2[0-3]):[0-5]\d\b",
     re.IGNORECASE,
 )
 
@@ -23,6 +24,49 @@ def _default_date(reference: datetime) -> datetime:
     return reference.replace(hour=0, minute=0, second=0, microsecond=0)
 
 
+def _parse_time_token(token: str, default_dt: datetime) -> datetime | None:
+    cleaned = token.strip().lower()
+    if not cleaned:
+        return None
+
+    if "am" in cleaned or "pm" in cleaned:
+        normalized = cleaned.replace("am", " am").replace("pm", " pm")
+        normalized = re.sub(r"\s+", " ", normalized).strip().upper()
+        time_format = "%I:%M %p" if ":" in normalized else "%I %p"
+        try:
+            parsed = datetime.strptime(normalized, time_format)
+        except ValueError:
+            return None
+        return default_dt.replace(
+            hour=parsed.hour, minute=parsed.minute, second=0, microsecond=0
+        )
+
+    if ":" in cleaned:
+        try:
+            parsed = datetime.strptime(cleaned, "%H:%M")
+        except ValueError:
+            return None
+        return default_dt.replace(
+            hour=parsed.hour, minute=parsed.minute, second=0, microsecond=0
+        )
+
+    return None
+
+
+def _format_time(value: datetime) -> str:
+    time_value = value.strftime("%I:%M %p").lstrip("0")
+    tz_label = value.tzname() or value.strftime("%z")
+    return f"{time_value} {tz_label}".strip()
+
+
+def _align_timezone(value: datetime, reference: datetime) -> datetime:
+    if reference.tzinfo is None:
+        return value
+    if value.tzinfo is None:
+        return value.replace(tzinfo=reference.tzinfo)
+    return value.astimezone(reference.tzinfo)
+
+
 def _categorize_constraint(text: str) -> str:
     lowered = text.lower()
     if any(token in lowered for token in ["by", "before", "no later than"]):
@@ -38,18 +82,19 @@ def check_constraints(
     plan_items: List["PlanItem"],
     detected_constraints: List[str],
     current_time: str,
-) -> int:
+) -> tuple[int, list[str]]:
     # NOTE: This implementation treats all constraints as positive "must-do at time X"
     # checks. It does not yet handle blocked/avoid windows (negative constraints).
     # TODO: Extend to parse and enforce blocked windows per the eval contract.
     if not plan_items or not detected_constraints:
-        return 0
+        return 0, []
 
     reference_start = isoparse(plan_items[0].start_time)
-    default_dt = _default_date(reference_start)
-    current_dt = isoparse(current_time)
+    current_dt = _align_timezone(isoparse(current_time), reference_start)
+    default_dt = _default_date(current_dt)
 
     violations = 0
+    error_messages: list[str] = []
     for constraint in detected_constraints:
         constraint_text = constraint or ""
         times = _extract_times(constraint_text)
@@ -58,31 +103,45 @@ def check_constraints(
 
         constraint_type = _categorize_constraint(constraint_text)
         target_time = None
+        time_token_used = None
         for time_token in times:
             try:
-                target_time = parse_datetime(time_token, default=default_dt)
-                break
+                parsed = _parse_time_token(time_token, default_dt)
             except (ValueError, TypeError):
+                parsed = None
+            if parsed is None:
                 continue
+            target_time = _align_timezone(parsed, reference_start)
+            time_token_used = time_token
+            break
 
         if target_time is None:
             continue
 
+        print(f"DEBUG: Parsed Constraint (Local): {target_time}")
+        print(f"DEBUG: Current Time (Local): {current_dt}")
         if current_dt > target_time:
             violations += 1
+            if time_token_used:
+                error_messages.append(
+                    f"'{time_token_used}' constraint not met "
+                    "(Constraint time already passed.)"
+                )
             continue
 
         matched = False
         if constraint_type == "fixed_point":
             for item in plan_items:
-                start_time = isoparse(item.start_time)
+                start_time = _align_timezone(
+                    isoparse(item.start_time), reference_start
+                )
                 delta_minutes = abs((start_time - target_time).total_seconds()) / 60
                 if delta_minutes <= 5:
                     matched = True
                     break
         elif constraint_type == "deadline":
             for item in plan_items:
-                end_time = isoparse(item.end_time)
+                end_time = _align_timezone(isoparse(item.end_time), reference_start)
                 if end_time > target_time:
                     matched = False
                     break
@@ -90,7 +149,9 @@ def check_constraints(
                 matched = True
         elif constraint_type == "start_gate":
             for item in plan_items:
-                start_time = isoparse(item.start_time)
+                start_time = _align_timezone(
+                    isoparse(item.start_time), reference_start
+                )
                 if start_time < target_time:
                     matched = False
                     break
@@ -99,5 +160,26 @@ def check_constraints(
 
         if not matched:
             violations += 1
-
-    return violations
+            time_label = _format_time(target_time)
+            if constraint_type == "fixed_point" and time_token_used:
+                error_messages.append(
+                    f"'{time_token_used}' constraint not met "
+                    f"(No task found within 5 minutes of {time_label})."
+                )
+            elif constraint_type == "deadline":
+                error_messages.append(
+                    f"'{constraint_text}' constraint not met "
+                    f"(Task ends after {time_label})."
+                )
+            elif constraint_type == "start_gate":
+                error_messages.append(
+                    f"'{constraint_text}' constraint not met "
+                    f"(Task starts before {time_label})."
+                )
+            elif time_token_used:
+                error_messages.append(
+                    f"'{time_token_used}' constraint not met "
+                    f"(No task found near {time_label})."
+                )
+
+    return violations, error_messages
diff --git a/eval/hallucination.py b/eval/hallucination.py
index e08efdf..416f578 100644
--- a/eval/hallucination.py
+++ b/eval/hallucination.py
@@ -21,8 +21,19 @@
     "get",
     "start",
     "finish",
+    "ensure",
+    "prepare",
+    "meeting",
+    "scheduled",
+    "after",
+    "attend",
+    "take",
+    "need",
+    "complete",
+    "prioritize",
+    "stay",
 }
-_COMMON_WORDS = {
+_STOP_WORDS = {
     "the",
     "and",
     "with",
@@ -48,14 +59,55 @@
     "that",
     "these",
     "those",
+    "ready",
+    "upcoming",
+    "second",
+    "approximately",
+    "organized",
+    "starts",
+    "following",
+    "during",
+    "within",
+    "milk",
+    "another",
+    "first",
+    "prior",
+    "scheduled",
+    "planned",
+    "meeting",
+    "ensure",
+    "ready",
+    "upcoming",
+    "attend",
+    "take",
+    "approximately",
+    "complete",
+    "prioritize",
+    "organized",
+    "stay",
+    "second",
+    "following",
+    "after",
+    "need",
+    "buy",
 }
 
 
+def _is_high_entropy(token: str) -> bool:
+    if any(char.isdigit() for char in token):
+        return True
+    if "-" in token or "." in token:
+        return True
+    return len(token) >= 3
+
+
 def _extract_significant_tokens(text: str) -> set[str]:
     words = {word.lower() for word in _WORD_PATTERN.findall(text)}
     time_tokens = {match.group(0).lower() for match in _TIME_PATTERN.finditer(text)}
     significant_words = {
-        word for word in words if word not in _COMMON_VERBS | _COMMON_WORDS
+        word
+        for word in words
+        if word not in _COMMON_VERBS | _STOP_WORDS and _is_high_entropy(word)
     }
     return significant_words | time_tokens
 
diff --git a/eval/recall.py b/eval/recall.py
index 511920a..82b6763 100644
--- a/eval/recall.py
+++ b/eval/recall.py
@@ -29,7 +29,7 @@ def calculate_recall(plan_items: List["PlanItem"], task_keywords: List[str]) ->
             keyword.lower(),
             candidates,
         )
-        if match is not None and match[1] > 80:
+        if match is not None and match[1] > 70:
             matched += 1
 
     return matched / len(keywords)