-
Notifications
You must be signed in to change notification settings - Fork 1
feat: add opik tracing wiring #34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -22,7 +22,7 @@ | |||||||
| PlanValidation, | ||||||||
| ValidationMetrics, | ||||||||
| ) | ||||||||
| from planproof_api.observability.opik import opik | ||||||||
| from planproof_api.observability.opik import opik, opik_context | ||||||||
|
|
||||||||
| router = APIRouter() | ||||||||
|
|
||||||||
|
|
@@ -43,7 +43,19 @@ def _format_plan(plan: list[PlanItem]) -> str: | |||||||
| return json.dumps([item.model_dump() for item in plan], indent=2) | ||||||||
|
|
||||||||
|
|
||||||||
| @opik.track(name="validate_plan") | ||||||||
| @opik.track(name="initial_planning_step") | ||||||||
| def _initial_planning_step( | ||||||||
| request: PlanRequest, metadata: ExtractedMetadata | ||||||||
| ) -> tuple[list[PlanItem], list[str], list[str]]: | ||||||||
| return generate_plan( | ||||||||
| request.context, | ||||||||
| metadata, | ||||||||
| request.current_time, | ||||||||
| request.timezone, | ||||||||
| ) | ||||||||
|
|
||||||||
|
|
||||||||
| @opik.track(name="validation_step") | ||||||||
| def _validate_plan( | ||||||||
| plan: list[PlanItem], metadata: ExtractedMetadata, current_time: str | ||||||||
| ) -> PlanValidation: | ||||||||
|
|
@@ -104,10 +116,22 @@ def _validate_plan( | |||||||
| keyword_recall_score=keyword_recall_score, | ||||||||
| human_feasibility_flags=human_feasibility_flags, | ||||||||
| ) | ||||||||
| try: | ||||||||
| opik_context.update_current_span( | ||||||||
| metadata={ | ||||||||
| "constraint_violation_count": constraint_violation_count, | ||||||||
| "overlap_minutes": overlap_minutes, | ||||||||
| "hallucination_count": hallucination_count, | ||||||||
| "keyword_recall_score": keyword_recall_score, | ||||||||
| "human_feasibility_flags": human_feasibility_flags, | ||||||||
| } | ||||||||
| ) | ||||||||
| except Exception: | ||||||||
|
||||||||
| pass | ||||||||
| return PlanValidation(status=status, metrics=metrics, errors=errors) | ||||||||
|
|
||||||||
|
|
||||||||
| @opik.track(name="repair_plan") | ||||||||
| @opik.track(name="repair_step") | ||||||||
| def _repair_plan( | ||||||||
| request: PlanRequest, metadata: ExtractedMetadata, failed_plan: list[PlanItem], errors: list[str] | ||||||||
| ) -> tuple[list[PlanItem], list[str], list[str]]: | ||||||||
|
|
@@ -129,12 +153,16 @@ def _repair_plan( | |||||||
|
|
||||||||
|
|
||||||||
| @router.post("/api/plan", response_model=PlanResponse) | ||||||||
| @opik.track(name="plan_request") | ||||||||
| def create_plan(request: PlanRequest) -> PlanResponse: | ||||||||
| try: | ||||||||
| opik_context.update_current_trace(metadata={"variant": request.variant}) | ||||||||
| except Exception: | ||||||||
|
||||||||
| except Exception: | |
| except Exception: | |
| # Telemetry/tracing must never break request handling; ignore failures here. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,18 +42,18 @@ The implementation must conform to: | |
|
|
||
| - [X] **PR 2.1:** Implement `eval/time_math.py` to detect overlaps between `start_time` and `end_time`. | ||
| - [X] **PR 2.2:** Implement `eval/hallucination.py` for Proper Noun matching between context and plan. | ||
| - [ ] **PR 2.3:** Implement `eval/recall.py` for Keyword Recall score calculation (deterministic string match). | ||
| - [X] **PR 2.3:** Implement `eval/recall.py` for Keyword Recall score calculation (deterministic string match). | ||
| - [X] **PR 2.4:** Add unit tests for all validators in `apps/api/tests/`. | ||
|
|
||
| ## Phase 3 — Agent: The Sandwich Pipeline | ||
|
|
||
| - [X] **PR 3.1:** Implement the "Extractor" logic (LLM call to parse constraints and keywords). | ||
| - [X] **PR 3.2:** Wire the Validator to run after the Planner and populate `validation.status` and `errors`. | ||
| - [ ] **PR 3.3:** Implement the 1-shot "Repair Attempt" logic (if FAIL, retry once with errors in prompt). | ||
| - [X] **PR 3.3:** Implement the 1-shot "Repair Attempt" logic (if FAIL, retry once with errors in prompt). | ||
|
|
||
| ## Phase 4 — Observability | ||
|
|
||
| - [ ] **PR 4.1:** Integrate Opik tracing hooks for each step (Extract -> Plan -> Validate -> Repair). | ||
| - [X] **PR 4.1:** Integrate Opik tracing hooks for each step (Extract -> Plan -> Validate -> Repair). | ||
|
Comment on lines
+45
to
+56
|
||
| - [ ] **PR 4.2:** Ensure `validation.metrics` are logged as properties in the Opik trace. | ||
|
|
||
| --- | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This PR marks multiple tasks as complete (PR 3.3 and PR 4.1), but according to the Micro-PR Mandate, "One Checkbox = One PR." If PR 3.3 was completed in a previous PR, it should have been marked complete in that PR. If it was never completed before, this PR should not include its implementation. This PR should only mark PR 4.1 as complete, as it implements the Opik tracing scaffolding.