diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b071d57..3f234d5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -198,8 +198,8 @@ PlanProof values correctness over persuasion. - [X] PR 2.4: Validator Unit Tests - [X] PR 3.1: Metadata Extractor - [x] PR 3.2: Validation Wiring -- [ ] PR 3.3: 1-Shot Repair Loop -- [ ] PR 4.1: Opik Trace Scaffolding +- [X] PR 3.3: 1-Shot Repair Loop +- [X] PR 4.1: Opik Trace Scaffolding - [ ] PR 4.2: Opik Metrics Integration ## Infrastructure diff --git a/apps/api/src/planproof_api/agent/extractor.py b/apps/api/src/planproof_api/agent/extractor.py index f7a0ca6..d06c1d7 100644 --- a/apps/api/src/planproof_api/agent/extractor.py +++ b/apps/api/src/planproof_api/agent/extractor.py @@ -70,7 +70,7 @@ def _normalize_entities(entities: list[str]) -> list[str]: return normalized -@opik.track(name="extract_metadata") +@opik.track(name="extraction_step") def extract_metadata(context: str) -> ExtractedMetadata: client = OpenAI() response = client.chat.completions.create( diff --git a/apps/api/src/planproof_api/agent/planner.py b/apps/api/src/planproof_api/agent/planner.py index 9b726af..6cbc93d 100644 --- a/apps/api/src/planproof_api/agent/planner.py +++ b/apps/api/src/planproof_api/agent/planner.py @@ -5,7 +5,6 @@ from openai import OpenAI from planproof_api.agent.schemas import ExtractedMetadata, PlanItem -from planproof_api.observability.opik import opik _SYSTEM_PROMPT = ( "You are a planning assistant. Return ONLY valid JSON with keys: " @@ -19,7 +18,6 @@ class PlanGenerationError(RuntimeError): pass -@opik.track(name="generate_plan") def generate_plan( context: str, metadata: ExtractedMetadata, diff --git a/apps/api/src/planproof_api/observability/opik.py b/apps/api/src/planproof_api/observability/opik.py index 66fbbde..5b93f04 100644 --- a/apps/api/src/planproof_api/observability/opik.py +++ b/apps/api/src/planproof_api/observability/opik.py @@ -1,12 +1,30 @@ import os +import socket +import sys if "OPIK_PROJECT_NAME" not in os.environ: - os.environ["OPIK_PROJECT_NAME"] = "Hackaton" + os.environ["OPIK_PROJECT_NAME"] = "PlanProof" + + +def _warn(message: str) -> None: + print(f"OPIK WARNING: {message}", file=sys.stderr) + + +def _network_available() -> bool: + try: + socket.getaddrinfo("www.comet.com", 443) + return True + except OSError: + return False + try: import opik as _opik # type: ignore -except Exception: # pragma: no cover - optional tracing dependency + from opik import opik_context as _opik_context # type: ignore +except Exception as exc: # pragma: no cover - optional tracing dependency + _warn(f"Opik import failed; tracing disabled. ({exc})") _opik = None + _opik_context = None class _NoOpOpik: @@ -18,4 +36,24 @@ def decorator(func): return decorator -opik = _opik if _opik is not None else _NoOpOpik() +class _NoOpOpikContext: + @staticmethod + def update_current_span(*_args, **_kwargs) -> None: + return None + + @staticmethod + def update_current_trace(*_args, **_kwargs) -> None: + return None + + +_opik_enabled = bool(_opik and _opik_context) +if not os.environ.get("OPIK_API_KEY"): + _warn("OPIK_API_KEY not set; tracing disabled.") + _opik_enabled = False + +if _opik_enabled and not _network_available(): + _warn("Network unavailable; tracing disabled.") + _opik_enabled = False + +opik = _opik if _opik_enabled else _NoOpOpik() +opik_context = _opik_context if _opik_enabled else _NoOpOpikContext() diff --git a/apps/api/src/planproof_api/routes.py b/apps/api/src/planproof_api/routes.py index 93bb5e6..be1530f 100644 --- a/apps/api/src/planproof_api/routes.py +++ b/apps/api/src/planproof_api/routes.py @@ -22,7 +22,7 @@ PlanValidation, ValidationMetrics, ) -from planproof_api.observability.opik import opik +from planproof_api.observability.opik import opik, opik_context router = APIRouter() @@ -43,7 +43,19 @@ def _format_plan(plan: list[PlanItem]) -> str: return json.dumps([item.model_dump() for item in plan], indent=2) -@opik.track(name="validate_plan") +@opik.track(name="initial_planning_step") +def _initial_planning_step( + request: PlanRequest, metadata: ExtractedMetadata +) -> tuple[list[PlanItem], list[str], list[str]]: + return generate_plan( + request.context, + metadata, + request.current_time, + request.timezone, + ) + + +@opik.track(name="validation_step") def _validate_plan( plan: list[PlanItem], metadata: ExtractedMetadata, current_time: str ) -> PlanValidation: @@ -104,10 +116,22 @@ def _validate_plan( keyword_recall_score=keyword_recall_score, human_feasibility_flags=human_feasibility_flags, ) + try: + opik_context.update_current_span( + metadata={ + "constraint_violation_count": constraint_violation_count, + "overlap_minutes": overlap_minutes, + "hallucination_count": hallucination_count, + "keyword_recall_score": keyword_recall_score, + "human_feasibility_flags": human_feasibility_flags, + } + ) + except Exception: + pass return PlanValidation(status=status, metrics=metrics, errors=errors) -@opik.track(name="repair_plan") +@opik.track(name="repair_step") def _repair_plan( request: PlanRequest, metadata: ExtractedMetadata, failed_plan: list[PlanItem], errors: list[str] ) -> tuple[list[PlanItem], list[str], list[str]]: @@ -129,12 +153,16 @@ def _repair_plan( @router.post("/api/plan", response_model=PlanResponse) +@opik.track(name="plan_request") def create_plan(request: PlanRequest) -> PlanResponse: + try: + opik_context.update_current_trace(metadata={"variant": request.variant}) + except Exception: + pass + metadata = extract_metadata(request.context) try: - plan, assumptions, questions = generate_plan( - request.context, metadata, request.current_time, request.timezone - ) + plan, assumptions, questions = _initial_planning_step(request, metadata) except PlanGenerationError as exc: validation = PlanValidation( status="fail", diff --git a/docs/assistant_prompts/codex_tasks.md b/docs/assistant_prompts/codex_tasks.md index 1aba00a..4015931 100644 --- a/docs/assistant_prompts/codex_tasks.md +++ b/docs/assistant_prompts/codex_tasks.md @@ -42,18 +42,18 @@ The implementation must conform to: - [X] **PR 2.1:** Implement `eval/time_math.py` to detect overlaps between `start_time` and `end_time`. - [X] **PR 2.2:** Implement `eval/hallucination.py` for Proper Noun matching between context and plan. -- [ ] **PR 2.3:** Implement `eval/recall.py` for Keyword Recall score calculation (deterministic string match). +- [X] **PR 2.3:** Implement `eval/recall.py` for Keyword Recall score calculation (deterministic string match). - [X] **PR 2.4:** Add unit tests for all validators in `apps/api/tests/`. ## Phase 3 — Agent: The Sandwich Pipeline - [X] **PR 3.1:** Implement the "Extractor" logic (LLM call to parse constraints and keywords). - [X] **PR 3.2:** Wire the Validator to run after the Planner and populate `validation.status` and `errors`. -- [ ] **PR 3.3:** Implement the 1-shot "Repair Attempt" logic (if FAIL, retry once with errors in prompt). +- [X] **PR 3.3:** Implement the 1-shot "Repair Attempt" logic (if FAIL, retry once with errors in prompt). ## Phase 4 — Observability -- [ ] **PR 4.1:** Integrate Opik tracing hooks for each step (Extract -> Plan -> Validate -> Repair). +- [X] **PR 4.1:** Integrate Opik tracing hooks for each step (Extract -> Plan -> Validate -> Repair). - [ ] **PR 4.2:** Ensure `validation.metrics` are logged as properties in the Opik trace. ---