Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dhee/adapters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ def checkpoint(
user_id=uid, task_type=task_type or "general",
what_worked=what_worked, what_failed=what_failed,
key_decision=key_decision,
outcome_score=score if score is not None else None,
)
result["insights_created"] = len(insights)

Expand Down
34 changes: 29 additions & 5 deletions dhee/core/buddhi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,13 +1002,17 @@ def reflect(
what_worked: Optional[str] = None,
what_failed: Optional[str] = None,
key_decision: Optional[str] = None,
outcome_score: Optional[float] = None,
) -> List[Insight]:
"""Agent-triggered reflection. Synthesizes insights from experience.

Called when an agent completes a task or wants to record learnings.
This is the explicit version of DGM-H's persistent memory —
the agent tells Dhee what it learned, and Dhee stores it as
transferable insight.

If outcome_score is provided, policy utility is updated using the
performance delta between the moving-average baseline and actual score.
"""
new_insights = []

Expand Down Expand Up @@ -1076,16 +1080,31 @@ def reflect(
except Exception:
pass

# Phase 3: Extract policy from task outcomes
# Phase 3: Extract policy from task outcomes, with utility deltas
# Compute baseline from moving average for utility scoring (D2Skill)
baseline_score = None
if outcome_score is not None:
try:
key = f"{user_id}:{task_type}"
records = self._performance.get(key, [])
if len(records) >= 2:
recent = records[-min(10, len(records)):]
baseline_score = sum(r["score"] for r in recent) / len(recent)
except Exception:
pass

if what_worked:
try:
p_store = self._get_policy_store()
# Record success for any matching active policies
matched = p_store.match_policies(user_id, task_type, f"{task_type} task")
for policy in matched:
p_store.record_outcome(policy.id, success=True)
p_store.record_outcome(
policy.id,
success=True,
baseline_score=baseline_score,
actual_score=outcome_score,
)

# If we have enough task history, try to extract a new policy
ts_store = self._get_task_state_store()
completed = ts_store.get_tasks_by_type(user_id, task_type, limit=10)
if len(completed) >= 3:
Expand All @@ -1099,7 +1118,12 @@ def reflect(
p_store = self._get_policy_store()
matched = p_store.match_policies(user_id, task_type, f"{task_type} task")
for policy in matched:
p_store.record_outcome(policy.id, success=False)
p_store.record_outcome(
policy.id,
success=False,
baseline_score=baseline_score,
actual_score=outcome_score,
)
except Exception:
pass

Expand Down
Loading
Loading