silviudr · silviudr · Jan 22, 2026 · Jan 21, 2026 · Jan 21, 2026 · Copilot
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -198,8 +198,8 @@ PlanProof values correctness over persuasion.
 - [X] PR 2.4: Validator Unit Tests
 - [X] PR 3.1: Metadata Extractor
 - [x] PR 3.2: Validation Wiring
-- [ ] PR 3.3: 1-Shot Repair Loop
-- [ ] PR 4.1: Opik Trace Scaffolding
+- [X] PR 3.3: 1-Shot Repair Loop
+- [X] PR 4.1: Opik Trace Scaffolding
 - [ ] PR 4.2: Opik Metrics Integration
 
 ## Infrastructure

diff --git a/apps/api/src/planproof_api/agent/extractor.py b/apps/api/src/planproof_api/agent/extractor.py
@@ -70,7 +70,7 @@ def _normalize_entities(entities: list[str]) -> list[str]:
     return normalized
 
 
-@opik.track(name="extract_metadata")
+@opik.track(name="extraction_step")
 def extract_metadata(context: str) -> ExtractedMetadata:
     client = OpenAI()
     response = client.chat.completions.create(

diff --git a/apps/api/src/planproof_api/agent/planner.py b/apps/api/src/planproof_api/agent/planner.py
@@ -5,7 +5,6 @@
 from openai import OpenAI
 
 from planproof_api.agent.schemas import ExtractedMetadata, PlanItem
-from planproof_api.observability.opik import opik
 
 _SYSTEM_PROMPT = (
     "You are a planning assistant. Return ONLY valid JSON with keys: "
@@ -19,7 +18,6 @@ class PlanGenerationError(RuntimeError):
     pass
 
 
-@opik.track(name="generate_plan")
 def generate_plan(
     context: str,
     metadata: ExtractedMetadata,

diff --git a/apps/api/src/planproof_api/observability/opik.py b/apps/api/src/planproof_api/observability/opik.py
@@ -1,12 +1,30 @@
 import os
+import socket
+import sys
 
 if "OPIK_PROJECT_NAME" not in os.environ:
-    os.environ["OPIK_PROJECT_NAME"] = "Hackaton"
+    os.environ["OPIK_PROJECT_NAME"] = "PlanProof"
+
+
+def _warn(message: str) -> None:
+    print(f"OPIK WARNING: {message}", file=sys.stderr)
+
+
+def _network_available() -> bool:
+    try:
+        socket.getaddrinfo("www.comet.com", 443)
+        return True
+    except OSError:
+        return False
+
 
 try:
     import opik as _opik  # type: ignore
-except Exception:  # pragma: no cover - optional tracing dependency
+    from opik import opik_context as _opik_context  # type: ignore
+except Exception as exc:  # pragma: no cover - optional tracing dependency
+    _warn(f"Opik import failed; tracing disabled. ({exc})")
     _opik = None
+    _opik_context = None
 
 
 class _NoOpOpik:
@@ -18,4 +36,24 @@ def decorator(func):
         return decorator
 
 
-opik = _opik if _opik is not None else _NoOpOpik()
+class _NoOpOpikContext:
+    @staticmethod
+    def update_current_span(*_args, **_kwargs) -> None:
+        return None
+
+    @staticmethod
+    def update_current_trace(*_args, **_kwargs) -> None:
+        return None
+
+
+_opik_enabled = bool(_opik and _opik_context)
+if not os.environ.get("OPIK_API_KEY"):
+    _warn("OPIK_API_KEY not set; tracing disabled.")
+    _opik_enabled = False
+
+if _opik_enabled and not _network_available():
+    _warn("Network unavailable; tracing disabled.")
+    _opik_enabled = False
+
+opik = _opik if _opik_enabled else _NoOpOpik()
+opik_context = _opik_context if _opik_enabled else _NoOpOpikContext()
diff --git a/apps/api/src/planproof_api/routes.py b/apps/api/src/planproof_api/routes.py
@@ -22,7 +22,7 @@
     PlanValidation,
     ValidationMetrics,
 )
-from planproof_api.observability.opik import opik
+from planproof_api.observability.opik import opik, opik_context
 
 router = APIRouter()
 
@@ -43,7 +43,19 @@ def _format_plan(plan: list[PlanItem]) -> str:
     return json.dumps([item.model_dump() for item in plan], indent=2)
 
 
-@opik.track(name="validate_plan")
+@opik.track(name="initial_planning_step")
+def _initial_planning_step(
+    request: PlanRequest, metadata: ExtractedMetadata
+) -> tuple[list[PlanItem], list[str], list[str]]:
+    return generate_plan(
+        request.context,
+        metadata,
+        request.current_time,
+        request.timezone,
+    )
+
+
+@opik.track(name="validation_step")
 def _validate_plan(
     plan: list[PlanItem], metadata: ExtractedMetadata, current_time: str
 ) -> PlanValidation:
@@ -104,10 +116,22 @@ def _validate_plan(
         keyword_recall_score=keyword_recall_score,
         human_feasibility_flags=human_feasibility_flags,
     )
+    try:
+        opik_context.update_current_span(
+            metadata={
+                "constraint_violation_count": constraint_violation_count,
+                "overlap_minutes": overlap_minutes,
+                "hallucination_count": hallucination_count,
+                "keyword_recall_score": keyword_recall_score,
+                "human_feasibility_flags": human_feasibility_flags,
+            }
+        )
+    except Exception:
+        pass
     return PlanValidation(status=status, metrics=metrics, errors=errors)
 
 
-@opik.track(name="repair_plan")
+@opik.track(name="repair_step")
 def _repair_plan(
     request: PlanRequest, metadata: ExtractedMetadata, failed_plan: list[PlanItem], errors: list[str]
 ) -> tuple[list[PlanItem], list[str], list[str]]:
@@ -129,12 +153,16 @@ def _repair_plan(
 
 
 @router.post("/api/plan", response_model=PlanResponse)
+@opik.track(name="plan_request")
 def create_plan(request: PlanRequest) -> PlanResponse:
+    try:
+        opik_context.update_current_trace(metadata={"variant": request.variant})
+    except Exception:
-    except Exception:
+    except Exception:
+        # Telemetry/tracing must never break request handling; ignore failures here.
-    except Exception:
+    except Exception:
+        # Telemetry/tracing must never break request handling; ignore failures here.
+        pass
+
     metadata = extract_metadata(request.context)
     try:
-        plan, assumptions, questions = generate_plan(
-            request.context, metadata, request.current_time, request.timezone
-        )
+        plan, assumptions, questions = _initial_planning_step(request, metadata)
     except PlanGenerationError as exc:
         validation = PlanValidation(
             status="fail",

diff --git a/docs/assistant_prompts/codex_tasks.md b/docs/assistant_prompts/codex_tasks.md
@@ -42,18 +42,18 @@ The implementation must conform to:
 
 - [X] **PR 2.1:** Implement `eval/time_math.py` to detect overlaps between `start_time` and `end_time`.
 - [X] **PR 2.2:** Implement `eval/hallucination.py` for Proper Noun matching between context and plan.
-- [ ] **PR 2.3:** Implement `eval/recall.py` for Keyword Recall score calculation (deterministic string match).
+- [X] **PR 2.3:** Implement `eval/recall.py` for Keyword Recall score calculation (deterministic string match).
 - [X] **PR 2.4:** Add unit tests for all validators in `apps/api/tests/`.
 
 ## Phase 3 — Agent: The Sandwich Pipeline
 
 - [X] **PR 3.1:** Implement the "Extractor" logic (LLM call to parse constraints and keywords).
 - [X] **PR 3.2:** Wire the Validator to run after the Planner and populate `validation.status` and `errors`.
-- [ ] **PR 3.3:** Implement the 1-shot "Repair Attempt" logic (if FAIL, retry once with errors in prompt).
+- [X] **PR 3.3:** Implement the 1-shot "Repair Attempt" logic (if FAIL, retry once with errors in prompt).
 
 ## Phase 4 — Observability
 
-- [ ] **PR 4.1:** Integrate Opik tracing hooks for each step (Extract -> Plan -> Validate -> Repair).
+- [X] **PR 4.1:** Integrate Opik tracing hooks for each step (Extract -> Plan -> Validate -> Repair).
 - [ ] **PR 4.2:** Ensure `validation.metrics` are logged as properties in the Opik trace.
 
 ---