Sankhya-AI · Ashish-dwi99 · Mar 31, 2026 · Mar 31, 2026
diff --git a/dhee/adapters/base.py b/dhee/adapters/base.py
@@ -264,6 +264,7 @@ def checkpoint(
                 user_id=uid, task_type=task_type or "general",
                 what_worked=what_worked, what_failed=what_failed,
                 key_decision=key_decision,
+                outcome_score=score if score is not None else None,
             )
             result["insights_created"] = len(insights)
 

diff --git a/dhee/core/buddhi.py b/dhee/core/buddhi.py
@@ -1002,13 +1002,17 @@ def reflect(
         what_worked: Optional[str] = None,
         what_failed: Optional[str] = None,
         key_decision: Optional[str] = None,
+        outcome_score: Optional[float] = None,
     ) -> List[Insight]:
         """Agent-triggered reflection. Synthesizes insights from experience.
 
         Called when an agent completes a task or wants to record learnings.
         This is the explicit version of DGM-H's persistent memory —
         the agent tells Dhee what it learned, and Dhee stores it as
         transferable insight.
+
+        If outcome_score is provided, policy utility is updated using the
+        performance delta between the moving-average baseline and actual score.
         """
         new_insights = []
 
@@ -1076,16 +1080,31 @@ def reflect(
             except Exception:
                 pass
 
-        # Phase 3: Extract policy from task outcomes
+        # Phase 3: Extract policy from task outcomes, with utility deltas
+        # Compute baseline from moving average for utility scoring (D2Skill)
+        baseline_score = None
+        if outcome_score is not None:
+            try:
+                key = f"{user_id}:{task_type}"
+                records = self._performance.get(key, [])
+                if len(records) >= 2:
+                    recent = records[-min(10, len(records)):]
+                    baseline_score = sum(r["score"] for r in recent) / len(recent)
+            except Exception:
+                pass
+
         if what_worked:
             try:
                 p_store = self._get_policy_store()
-                # Record success for any matching active policies
                 matched = p_store.match_policies(user_id, task_type, f"{task_type} task")
                 for policy in matched:
-                    p_store.record_outcome(policy.id, success=True)
+                    p_store.record_outcome(
+                        policy.id,
+                        success=True,
+                        baseline_score=baseline_score,
+                        actual_score=outcome_score,
+                    )
 
-                # If we have enough task history, try to extract a new policy
                 ts_store = self._get_task_state_store()
                 completed = ts_store.get_tasks_by_type(user_id, task_type, limit=10)
                 if len(completed) >= 3:
@@ -1099,7 +1118,12 @@ def reflect(
                 p_store = self._get_policy_store()
                 matched = p_store.match_policies(user_id, task_type, f"{task_type} task")
                 for policy in matched:
-                    p_store.record_outcome(policy.id, success=False)
+                    p_store.record_outcome(
+                        policy.id,
+                        success=False,
+                        baseline_score=baseline_score,
+                        actual_score=outcome_score,
+                    )
             except Exception:
                 pass