diff --git a/dhee/adapters/base.py b/dhee/adapters/base.py
index a1f7644..c695095 100644
--- a/dhee/adapters/base.py
+++ b/dhee/adapters/base.py
@@ -195,8 +195,13 @@ def context(
         self,
         task_description: Optional[str] = None,
         user_id: Optional[str] = None,
+        operational: bool = False,
     ) -> Dict[str, Any]:
-        """HyperAgent session bootstrap. Returns everything the agent needs."""
+        """HyperAgent session bootstrap. Returns everything the agent needs.
+
+        Args:
+            operational: If True, return compact actionable-only format.
+        """
         uid = user_id or self._user_id
         self._tracker.on_context(task_description)
         hyper_ctx = self._buddhi.get_hyper_context(
@@ -204,6 +209,8 @@ def context(
             task_description=task_description,
             memory=self._engram._memory,
         )
+        if operational:
+            return hyper_ctx.to_operational_dict()
         return hyper_ctx.to_dict()
 
     # ------------------------------------------------------------------
diff --git a/dhee/core/buddhi.py b/dhee/core/buddhi.py
index 02ca3c3..cef2380 100644
--- a/dhee/core/buddhi.py
+++ b/dhee/core/buddhi.py
@@ -30,6 +30,7 @@
 
 import json
 import logging
+import math
 import os
 import re
 import time
@@ -66,12 +67,45 @@ class Insight:
     invalidation_count: int         # how many times contradicted
     tags: List[str]
 
+    # Utility tracking (EMA of measured performance deltas)
+    utility: float = 0.0
+    apply_count: int = 0
+    _UTILITY_ALPHA: float = 0.3
+
+    def record_outcome(
+        self,
+        success: bool,
+        baseline_score: Optional[float] = None,
+        actual_score: Optional[float] = None,
+    ) -> float:
+        """Record outcome with optional measured delta."""
+        self.apply_count += 1
+        if success:
+            self.validation_count += 1
+            self.confidence = min(1.0, self.confidence + 0.05)
+        else:
+            self.invalidation_count += 1
+            self.confidence = max(0.0, self.confidence - 0.1)
+        self.last_validated = datetime.now(timezone.utc).isoformat()
+
+        delta = 0.0
+        if baseline_score is not None and actual_score is not None:
+            delta = actual_score - baseline_score
+            self.utility = (
+                self._UTILITY_ALPHA * delta
+                + (1 - self._UTILITY_ALPHA) * self.utility
+            )
+        return delta
+
     def strength(self) -> float:
-        """Net strength: validated - invalidated, normalized."""
+        """Net strength: validated - invalidated, normalized, utility-weighted."""
         total = self.validation_count + self.invalidation_count
         if total == 0:
             return self.confidence
-        return self.confidence * (self.validation_count / total)
+        base = self.confidence * (self.validation_count / total)
+        # Incorporate utility via sigmoid
+        utility_factor = 0.5 + 0.5 * (1.0 / (1.0 + math.exp(-3.0 * self.utility)))
+        return base * utility_factor
 
     def to_dict(self) -> Dict[str, Any]:
         return {
@@ -83,6 +117,8 @@ def to_dict(self) -> Dict[str, Any]:
             "source_task_types": self.source_task_types,
             "validations": self.validation_count,
             "tags": self.tags,
+            "utility": round(self.utility, 3),
+            "apply_count": self.apply_count,
         }
 
 
@@ -152,6 +188,13 @@ class HyperContext:
     policies: List[Dict[str, Any]] = field(default_factory=list)
     beliefs: List[Dict[str, Any]] = field(default_factory=list)
 
+    # Phase 4: operational cognition packet
+    active_step: Optional[Dict[str, Any]] = None
+    step_policies: List[Dict[str, Any]] = field(default_factory=list)
+    critical_blockers: List[str] = field(default_factory=list)
+    contradictions: List[Dict[str, Any]] = field(default_factory=list)
+    action_items: List[str] = field(default_factory=list)
+
     def to_dict(self) -> Dict[str, Any]:
         return {
             "user_id": self.user_id,
@@ -173,6 +216,11 @@ def to_dict(self) -> Dict[str, Any]:
                  "strength": m.get("strength", 1.0)}
                 for m in self.memories[:10]
             ],
+            "active_step": self.active_step,
+            "step_policies": self.step_policies[:5],
+            "critical_blockers": self.critical_blockers[:5],
+            "contradictions": self.contradictions[:5],
+            "action_items": self.action_items[:10],
             "meta": {
                 "n_insights": len(self.insights),
                 "n_active_intentions": len(self.intentions),
@@ -183,10 +231,34 @@ def to_dict(self) -> Dict[str, Any]:
                 "n_task_states": len(self.task_states),
                 "n_policies": len(self.policies),
                 "n_beliefs": len(self.beliefs),
+                "has_active_step": self.active_step is not None,
+                "n_action_items": len(self.action_items),
+                "n_critical_blockers": len(self.critical_blockers),
                 "performance_tracked": len(self.performance) > 0,
             },
         }
 
+    def to_operational_dict(self) -> Dict[str, Any]:
+        """Compact operational format for per-turn agent consumption.
+
+        Contains ONLY actionable items -- not full history.
+        Use to_dict() for deep context.
+        """
+        result: Dict[str, Any] = {}
+        if self.active_step:
+            result["current_step"] = self.active_step
+        if self.step_policies:
+            result["step_policies"] = self.step_policies[:3]
+        if self.action_items:
+            result["action_items"] = self.action_items[:5]
+        if self.critical_blockers:
+            result["critical_blockers"] = self.critical_blockers[:3]
+        if self.warnings:
+            result["warnings"] = self.warnings[:3]
+        if self.contradictions:
+            result["contradictions"] = self.contradictions[:3]
+        return result
+
 
 # ---------------------------------------------------------------------------
 # Buddhi — the proactive cognition layer
@@ -379,6 +451,49 @@ def get_hyper_context(
         beliefs = cog_state.get("beliefs", [])
         warnings.extend(cog_state.get("belief_warnings", []))
 
+        # 15. Operational cognition packet (Phase 4)
+        active_step_desc = cog_state.get("active_step")
+        active_step = {"description": active_step_desc} if active_step_desc else None
+        step_policies_list = cog_state.get("step_policies", [])
+
+        # Critical blockers from active task states
+        critical_blockers = []
+        for ts in task_states:
+            if ts.get("status") in ("in_progress", "blocked") and ts.get("blockers"):
+                critical_blockers.extend(ts["blockers"])
+
+        # Contradictions from belief store
+        contradictions_list = []
+        try:
+            contradiction_pairs = self._kernel.beliefs.get_contradictions(user_id)
+            for b1, b2 in contradiction_pairs[:5]:
+                severity = abs(b1.confidence - b2.confidence)
+                contradictions_list.append({
+                    "belief_a": b1.claim[:100],
+                    "belief_b": b2.claim[:100],
+                    "confidence_a": round(b1.confidence, 2),
+                    "confidence_b": round(b2.confidence, 2),
+                    "severity": round(1.0 - severity, 2),
+                })
+        except Exception:
+            pass
+
+        # Build prioritized action items
+        action_items = []
+        for intention in triggered:
+            action_items.append(f"[INTENTION] {intention.action_payload}")
+        if active_step_desc:
+            action_items.append(f"[NEXT STEP] {active_step_desc}")
+        for sp in step_policies_list[:3]:
+            action_items.append(f"[CORRECTION] {sp.get('do', '')[:100]}")
+            avoid = sp.get("avoid")
+            if avoid:
+                avoids = avoid if isinstance(avoid, list) else [avoid]
+                for a in avoids[:2]:
+                    action_items.append(f"[AVOID] {a[:100]}")
+        for blocker in critical_blockers[:3]:
+            action_items.append(f"[BLOCKER] Resolve: {blocker}")
+
         return HyperContext(
             user_id=user_id,
             session_id=str(uuid.uuid4()),
@@ -395,6 +510,11 @@ def get_hyper_context(
             task_states=task_states,
             policies=policies,
             beliefs=beliefs,
+            active_step=active_step,
+            step_policies=step_policies_list,
+            critical_blockers=critical_blockers,
+            contradictions=contradictions_list,
+            action_items=action_items,
         )
 
     # ------------------------------------------------------------------
@@ -871,6 +991,19 @@ def reflect(
             except Exception:
                 pass
 
+        # Compute baseline from moving average for utility scoring (D2Skill)
+        # Moved before heuristic/policy blocks so all can use it
+        baseline_score = None
+        if outcome_score is not None:
+            try:
+                key = f"{user_id}:{task_type}"
+                records = self._performance.get(key, [])
+                if len(records) >= 2:
+                    recent = records[-min(10, len(records)):]
+                    baseline_score = sum(r["score"] for r in recent) / len(recent)
+            except Exception:
+                pass
+
         # Phase 2: Distill heuristic from what_worked
         if what_worked:
             try:
@@ -882,22 +1015,12 @@ def reflect(
                     what_failed=what_failed,
                     user_id=user_id,
                 )
-                # Close the heuristic validation loop: validate any previously
-                # retrieved heuristics that were used for this task type
-                self._validate_used_heuristics(user_id, task_type, what_worked is not None)
-            except Exception:
-                pass
-
-        # Phase 3: Extract policy from task outcomes, with utility deltas
-        # Compute baseline from moving average for utility scoring (D2Skill)
-        baseline_score = None
-        if outcome_score is not None:
-            try:
-                key = f"{user_id}:{task_type}"
-                records = self._performance.get(key, [])
-                if len(records) >= 2:
-                    recent = records[-min(10, len(records)):]
-                    baseline_score = sum(r["score"] for r in recent) / len(recent)
+                # Close the heuristic validation loop with measured deltas
+                self._validate_used_heuristics(
+                    user_id, task_type, what_worked is not None,
+                    baseline_score=baseline_score,
+                    actual_score=outcome_score,
+                )
             except Exception:
                 pass
 
@@ -922,6 +1045,10 @@ def reflect(
                     self._kernel.policies.extract_from_tasks(
                         user_id, task_dicts, task_type,
                     )
+                    # Step-level policy extraction from failure patterns
+                    self._kernel.policies.extract_step_policies(
+                        user_id, task_dicts, task_type,
+                    )
             except Exception:
                 pass
 
@@ -940,6 +1067,19 @@ def reflect(
             except Exception:
                 pass
 
+            # Extract step policies from failure patterns
+            try:
+                completed = self._kernel.tasks.get_tasks_by_type(
+                    user_id, task_type, limit=10,
+                )
+                if len(completed) >= 3:
+                    task_dicts = [t.to_dict() for t in completed]
+                    self._kernel.policies.extract_step_policies(
+                        user_id, task_dicts, task_type,
+                    )
+            except Exception:
+                pass
+
         # Update beliefs based on outcomes (via kernel)
         if what_worked:
             try:
@@ -965,16 +1105,122 @@ def reflect(
             except Exception:
                 pass
 
+            # Cross-structure: when beliefs are challenged, check dependent policies
+            try:
+                for belief in relevant:
+                    if belief.confidence < 0.3:
+                        claim_words = set(belief.claim.lower().split()[:5])
+                        for policy in self._kernel.policies._policies.values():
+                            if policy.user_id != user_id:
+                                continue
+                            approach_words = set(policy.action.approach.lower().split())
+                            if len(claim_words & approach_words) >= 2:
+                                policy.utility *= 0.8
+                                policy.updated_at = time.time()
+            except Exception:
+                pass
+
+        # Record outcomes on matched insights (utility tracking)
+        if what_worked:
+            try:
+                matched_insights = self._get_relevant_insights(
+                    user_id, f"{task_type} task",
+                )
+                for insight in matched_insights[:5]:
+                    insight.record_outcome(
+                        success=True,
+                        baseline_score=baseline_score,
+                        actual_score=outcome_score,
+                    )
+                self._save_insights()
+            except Exception:
+                pass
+
+        if what_failed:
+            try:
+                matched_insights = self._get_relevant_insights(
+                    user_id, f"{task_type} task",
+                )
+                for insight in matched_insights[:5]:
+                    insight.record_outcome(
+                        success=False,
+                        baseline_score=baseline_score,
+                        actual_score=outcome_score,
+                    )
+                self._save_insights()
+            except Exception:
+                pass
+
+        # Record outcomes on matched contrastive pairs
+        try:
+            store = self._get_contrastive()
+            matched_pairs = store.retrieve_contrasts(
+                f"{task_type} task", user_id=user_id, limit=5,
+            )
+            task_succeeded = what_worked is not None
+            for pair in matched_pairs:
+                pair.record_outcome(
+                    success=task_succeeded,
+                    baseline_score=baseline_score,
+                    actual_score=outcome_score,
+                )
+            store._save_all()
+        except Exception:
+            pass
+
+        # Cross-structure: positive policy delta -> reinforce related heuristics
+        if what_worked:
+            try:
+                matched_policies = self._kernel.policies.match_policies(
+                    user_id, task_type, f"{task_type} task",
+                )
+                for policy in matched_policies:
+                    if policy.last_delta > 0:
+                        distiller = self._get_heuristic_distiller()
+                        related = distiller.retrieve_relevant(
+                            policy.action.approach, user_id=user_id, limit=3,
+                        )
+                        for h in related:
+                            h.record_outcome(
+                                success=True,
+                                baseline_score=baseline_score,
+                                actual_score=outcome_score,
+                            )
+                        distiller._save_all()
+            except Exception:
+                pass
+
+        # Record outcomes on recently triggered intentions
+        try:
+            triggered = [
+                i for i in self._kernel.intentions._intentions.values()
+                if i.user_id == user_id
+                and i.status == "triggered"
+                and i.was_useful is None
+            ]
+            task_succeeded = what_worked is not None and (
+                outcome_score is None or outcome_score >= 0.5
+            )
+            for intention in triggered:
+                self._kernel.intentions.record_outcome(
+                    intention.id,
+                    useful=task_succeeded,
+                    outcome_score=outcome_score,
+                )
+        except Exception:
+            pass
+
         return new_insights
 
     def _validate_used_heuristics(
         self, user_id: str, task_type: str, success: bool,
+        baseline_score: Optional[float] = None,
+        actual_score: Optional[float] = None,
     ) -> None:
-        """Close the heuristic validation loop.
+        """Close the heuristic validation loop with measured utility deltas.
 
-        When a task completes, validate heuristics that were retrieved for
-        this task type. This is the missing feedback loop that turns
-        scaffolding into real self-improvement.
+        When a task completes, update heuristics that were retrieved for
+        this task type with EMA utility tracking (not just boolean validate).
         """
         try:
             distiller = self._get_heuristic_distiller()
@@ -984,7 +1230,12 @@ def _validate_used_heuristics(
                 limit=5,
             )
             for h in relevant:
-                distiller.validate(h.id, validated=success)
+                h.record_outcome(
+                    success=success,
+                    baseline_score=baseline_score,
+                    actual_score=actual_score,
+                )
+            distiller._save_all()
         except Exception:
             pass
 
@@ -1009,6 +1260,8 @@ def _save_insights(self) -> None:
                         "validation_count": insight.validation_count,
                         "invalidation_count": insight.invalidation_count,
                         "tags": insight.tags,
+                        "utility": insight.utility,
+                        "apply_count": insight.apply_count,
                     }
                     f.write(json.dumps(row, ensure_ascii=False) + "\n")
         except OSError as e:
@@ -1050,6 +1303,8 @@ def _load_state(self) -> None:
                             validation_count=row.get("validation_count", 0),
                             invalidation_count=row.get("invalidation_count", 0),
                             tags=row.get("tags", []),
+                            utility=row.get("utility", 0.0),
+                            apply_count=row.get("apply_count", 0),
                         )
                         self._insights[insight.id] = insight
             except (OSError, json.JSONDecodeError) as e:
diff --git a/dhee/core/cognition_kernel.py b/dhee/core/cognition_kernel.py
index aea8821..b99a7d8 100644
--- a/dhee/core/cognition_kernel.py
+++ b/dhee/core/cognition_kernel.py
@@ -88,12 +88,18 @@ def get_cognitive_state(
         except Exception:
             result["episodes"] = []
 
-        # Task states
+        # Task states + active step context
+        step_context = ""
+        active_task_type = task_description or "general"
         try:
             task_states = []
             active = self.tasks.get_active_task(user_id)
             if active:
                 task_states.append(active.to_compact())
+                current = active.current_step
+                if current:
+                    step_context = current.description
+                active_task_type = active.task_type or active_task_type
             recent_tasks = self.tasks.get_recent_tasks(user_id, limit=3)
             for t in recent_tasks:
                 c = t.to_compact()
@@ -103,18 +109,37 @@ def get_cognitive_state(
         except Exception:
             result["task_states"] = []
 
-        # Policies
+        result["active_step"] = step_context if step_context else None
+
+        # Policies (pass step_context for better matching)
         try:
             matched = self.policies.match_policies(
                 user_id=user_id,
-                task_type=task_description or "general",
+                task_type=active_task_type,
                 task_description=task_description or "",
+                step_context=step_context,
                 limit=3,
             )
             result["policies"] = [p.to_compact() for p in matched]
         except Exception:
             result["policies"] = []
 
+        # Step policies (separate, for operational context)
+        try:
+            if step_context:
+                step_matched = self.policies.match_step_policies(
+                    user_id=user_id,
+                    task_type=active_task_type,
+                    task_description=task_description or "",
+                    step_context=step_context,
+                    limit=3,
+                )
+                result["step_policies"] = [p.to_compact() for p in step_matched]
+            else:
+                result["step_policies"] = []
+        except Exception:
+            result["step_policies"] = []
+
         # Beliefs
         belief_warnings: List[str] = []
         try:
@@ -169,6 +194,25 @@ def record_checkpoint_event(
                 content=summary[:500],
                 metadata={"status": status, "outcome_score": outcome_score},
             )
+
+            # Wire episode.connection_count for cross-primitive links
+            try:
+                open_eps = getattr(self.episodes, '_open_episodes', {})
+                ep_id = open_eps.get(user_id)
+                if ep_id:
+                    eps_dict = getattr(self.episodes, '_episodes', {})
+                    ep = eps_dict.get(ep_id)
+                    if ep:
+                        active_task = self.tasks.get_active_task(user_id)
+                        if active_task:
+                            ep.connection_count += 1
+                        matched_policies = self.policies.match_policies(
+                            user_id, summary[:50], summary[:200], limit=3,
+                        )
+                        ep.connection_count += len(matched_policies)
+            except Exception:
+                pass
+
             if status == "completed":
                 episode = self.episodes.end_episode(
                     user_id, outcome_score, summary
@@ -231,10 +275,56 @@ def update_task_on_checkpoint(
                     result["task_completed"] = active_task.id
 
                 self.tasks.update_task(active_task)
+
+                # Record outcomes on STEP policies for completed/failed steps
+                if status == "completed" and active_task.plan:
+                    for step in active_task.plan:
+                        if step.status.value == "completed":
+                            self.record_step_outcome(
+                                user_id, task_type, step.description,
+                                success=True, actual_score=outcome_score,
+                            )
+                        elif step.status.value == "failed":
+                            self.record_step_outcome(
+                                user_id, task_type, step.description,
+                                success=False, actual_score=outcome_score,
+                            )
         except Exception:
             pass
         return result
 
+    def record_step_outcome(
+        self,
+        user_id: str,
+        task_type: str,
+        step_description: str,
+        success: bool,
+        baseline_score: Optional[float] = None,
+        actual_score: Optional[float] = None,
+    ) -> None:
+        """Record outcome on STEP policies matching a completed/failed step.
+
+        Finds matching STEP policies and records their outcomes.
+        Zero LLM calls.
+        """
+        try:
+            matched = self.policies.match_step_policies(
+                user_id=user_id,
+                task_type=task_type,
+                task_description=f"{task_type} task",
+                step_context=step_description,
+                limit=5,
+            )
+            for policy in matched:
+                self.policies.record_outcome(
+                    policy.id,
+                    success=success,
+                    baseline_score=baseline_score,
+                    actual_score=actual_score,
+                )
+        except Exception:
+            pass
+
     def selective_forget(
         self,
         user_id: str,
diff --git a/dhee/core/contrastive.py b/dhee/core/contrastive.py
index ec443dd..f257047 100644
--- a/dhee/core/contrastive.py
+++ b/dhee/core/contrastive.py
@@ -43,6 +43,30 @@ class ContrastivePair:
     tags: List[str] = field(default_factory=list)
     validation_count: int = 0       # times this contrast proved useful
 
+    # Utility tracking (EMA of measured performance deltas)
+    utility: float = 0.0
+    apply_count: int = 0
+    _UTILITY_ALPHA: float = 0.3
+
+    def record_outcome(
+        self,
+        success: bool,
+        baseline_score: Optional[float] = None,
+        actual_score: Optional[float] = None,
+    ) -> float:
+        """Record outcome with optional measured delta."""
+        self.apply_count += 1
+        if success:
+            self.validation_count += 1
+        delta = 0.0
+        if baseline_score is not None and actual_score is not None:
+            delta = actual_score - baseline_score
+            self.utility = (
+                self._UTILITY_ALPHA * delta
+                + (1 - self._UTILITY_ALPHA) * self.utility
+            )
+        return delta
+
     def to_dict(self) -> Dict[str, Any]:
         return {
             "id": self.id,
@@ -55,6 +79,8 @@ def to_dict(self) -> Dict[str, Any]:
             "user_id": self.user_id,
             "tags": self.tags,
             "validation_count": self.validation_count,
+            "utility": self.utility,
+            "apply_count": self.apply_count,
         }
 
     @classmethod
@@ -70,6 +96,8 @@ def from_dict(cls, d: Dict[str, Any]) -> ContrastivePair:
             user_id=d.get("user_id", "default"),
             tags=d.get("tags", []),
             validation_count=d.get("validation_count", 0),
+            utility=d.get("utility", 0.0),
+            apply_count=d.get("apply_count", 0),
         )
 
     def to_compact(self) -> Dict[str, str]:
@@ -79,6 +107,7 @@ def to_compact(self) -> Dict[str, str]:
             "do": self.success_approach[:300],
             "avoid": self.failure_approach[:300],
             "confidence": round(min(1.0, 0.5 + 0.1 * self.validation_count), 2),
+            "utility": round(self.utility, 3),
         }
 
 
diff --git a/dhee/core/heuristic.py b/dhee/core/heuristic.py
index 89767e6..188caa4 100644
--- a/dhee/core/heuristic.py
+++ b/dhee/core/heuristic.py
@@ -22,6 +22,7 @@
 
 import json
 import logging
+import math
 import os
 import time
 import uuid
@@ -46,11 +47,47 @@ class Heuristic:
     invalidation_count: int = 0
     tags: List[str] = field(default_factory=list)
 
+    # Utility tracking (EMA of measured performance deltas)
+    utility: float = 0.0
+    last_delta: float = 0.0
+    apply_count: int = 0
+
+    # EMA smoothing factor (class constant, not serialized)
+    _UTILITY_ALPHA: float = 0.3
+
+    def record_outcome(
+        self,
+        success: bool,
+        baseline_score: Optional[float] = None,
+        actual_score: Optional[float] = None,
+    ) -> float:
+        """Record outcome with optional measured delta. Mirrors PolicyCase."""
+        self.apply_count += 1
+        if success:
+            self.validation_count += 1
+            self.confidence = min(1.0, self.confidence + 0.05)
+        else:
+            self.invalidation_count += 1
+            self.confidence = max(0.0, self.confidence - 0.1)
+
+        delta = 0.0
+        if baseline_score is not None and actual_score is not None:
+            delta = actual_score - baseline_score
+            self.last_delta = delta
+            self.utility = (
+                self._UTILITY_ALPHA * delta
+                + (1 - self._UTILITY_ALPHA) * self.utility
+            )
+        return delta
+
     def strength(self) -> float:
         total = self.validation_count + self.invalidation_count
         if total == 0:
             return self.confidence
-        return self.confidence * (self.validation_count / total)
+        base = self.confidence * (self.validation_count / total)
+        # Incorporate utility via sigmoid: maps utility to [0.5, 1.0] range
+        utility_factor = 0.5 + 0.5 * (1.0 / (1.0 + math.exp(-3.0 * self.utility)))
+        return base * utility_factor
 
     def to_dict(self) -> Dict[str, Any]:
         return {
@@ -65,6 +102,9 @@ def to_dict(self) -> Dict[str, Any]:
             "validation_count": self.validation_count,
             "invalidation_count": self.invalidation_count,
             "tags": self.tags,
+            "utility": self.utility,
+            "last_delta": self.last_delta,
+            "apply_count": self.apply_count,
         }
 
     @classmethod
@@ -80,6 +120,9 @@ def from_dict(cls, d: Dict[str, Any]) -> Heuristic:
             validation_count=d.get("validation_count", 0),
             invalidation_count=d.get("invalidation_count", 0),
             tags=d.get("tags", []),
+            utility=d.get("utility", 0.0),
+            last_delta=d.get("last_delta", 0.0),
+            apply_count=d.get("apply_count", 0),
         )
 
     def to_compact(self) -> Dict[str, Any]:
@@ -88,6 +131,7 @@ def to_compact(self) -> Dict[str, Any]:
             "heuristic": self.content[:300],
             "level": self.abstraction_level,
             "confidence": round(self.strength(), 2),
+            "utility": round(self.utility, 3),
             "applies_to": self.source_task_types[:3],
         }
 
diff --git a/dhee/core/intention.py b/dhee/core/intention.py
index cc0773b..82a1f98 100644
--- a/dhee/core/intention.py
+++ b/dhee/core/intention.py
@@ -44,6 +44,10 @@ class Intention:
     created_at: str
     triggered_at: Optional[str]
 
+    # Outcome tracking
+    outcome_score: Optional[float] = None
+    was_useful: Optional[bool] = None
+
     def to_dict(self) -> Dict[str, Any]:
         return {
             "id": self.id,
@@ -53,6 +57,8 @@ def to_dict(self) -> Dict[str, Any]:
             "status": self.status,
             "trigger_keywords": self.trigger_keywords,
             "trigger_after": self.trigger_after,
+            "outcome_score": self.outcome_score,
+            "was_useful": self.was_useful,
         }
 
 
@@ -198,6 +204,20 @@ def get_active(self, user_id: str) -> List[Intention]:
             if i.user_id == user_id and i.status == "active"
         ]
 
+    def record_outcome(
+        self,
+        intention_id: str,
+        useful: bool,
+        outcome_score: Optional[float] = None,
+    ) -> None:
+        """Record whether a triggered intention was useful."""
+        intention = self._intentions.get(intention_id)
+        if not intention:
+            return
+        intention.was_useful = useful
+        intention.outcome_score = outcome_score
+        self._save()
+
     def get_stats(self, user_id: Optional[str] = None) -> Dict[str, Any]:
         """Stats for health checks."""
         intentions = list(self._intentions.values())
@@ -229,6 +249,8 @@ def _save(self) -> None:
                         "status": intention.status,
                         "created_at": intention.created_at,
                         "triggered_at": intention.triggered_at,
+                        "outcome_score": intention.outcome_score,
+                        "was_useful": intention.was_useful,
                     }
                     f.write(json.dumps(row, ensure_ascii=False) + "\n")
         except OSError as e:
@@ -256,6 +278,8 @@ def _load(self) -> None:
                         status=row.get("status", "active"),
                         created_at=row.get("created_at", ""),
                         triggered_at=row.get("triggered_at"),
+                        outcome_score=row.get("outcome_score"),
+                        was_useful=row.get("was_useful"),
                     )
                     self._intentions[intention.id] = intention
         except (OSError, json.JSONDecodeError) as e:
diff --git a/dhee/core/policy.py b/dhee/core/policy.py
index d52c8d4..a4a3327 100644
--- a/dhee/core/policy.py
+++ b/dhee/core/policy.py
@@ -535,6 +535,99 @@ def extract_from_tasks(
             source_task_ids=[t.get("id", "") for t in successful[:5]],
         )
 
+    def extract_step_policies(
+        self,
+        user_id: str,
+        completed_tasks: List[Dict[str, Any]],
+        task_type: str,
+    ) -> List[PolicyCase]:
+        """Extract STEP-granularity policies from repeated failure patterns.
+
+        Analyzes failed steps across completed tasks of the same type.
+        When the same step fails >=2 times at the same position AND a
+        different approach succeeded at that position in other tasks,
+        creates a STEP correction policy.
+
+        Pure structural analysis, zero LLM calls.
+        """
+        # Collect failed steps grouped by (position, normalized description)
+        failure_counts: Dict[tuple, List[str]] = {}  # (pos, desc) -> [task_ids]
+        success_at_pos: Dict[int, List[str]] = {}    # pos -> [successful step descriptions]
+
+        for task in completed_tasks:
+            plan = task.get("plan", [])
+            task_id = task.get("id", "")
+            task_score = task.get("outcome_score", 0)
+
+            for idx, step in enumerate(plan):
+                status = step.get("status", "pending")
+                desc = step.get("description", "").lower().strip()
+                if not desc:
+                    continue
+
+                if status == "failed":
+                    key = (idx, desc)
+                    if key not in failure_counts:
+                        failure_counts[key] = []
+                    failure_counts[key].append(task_id)
+                elif status == "completed" and task_score >= 0.6:
+                    if idx not in success_at_pos:
+                        success_at_pos[idx] = []
+                    success_at_pos[idx].append(desc)
+
+        # Filter to steps that fail >=2 times
+        new_policies: List[PolicyCase] = []
+        for (pos, failed_desc), task_ids in failure_counts.items():
+            if len(task_ids) < 2:
+                continue
+
+            # Find a successful alternative at the same position
+            alternatives = success_at_pos.get(pos, [])
+            if not alternatives:
+                continue
+
+            # Pick the most common successful alternative
+            alt_freq: Dict[str, int] = {}
+            for alt in alternatives:
+                if alt != failed_desc:  # Must be different from the failing step
+                    alt_freq[alt] = alt_freq.get(alt, 0) + 1
+            if not alt_freq:
+                continue
+            best_alt = max(alt_freq, key=alt_freq.get)
+
+            # Extract keywords from failed step as step_patterns
+            stop = {"the", "a", "an", "to", "of", "in", "for", "on", "and", "or", "is", "it", "with"}
+            step_patterns = [
+                w for w in failed_desc.split()
+                if len(w) > 2 and w not in stop
+            ][:5]
+
+            if not step_patterns:
+                continue
+
+            # Deduplicate against existing STEP policies
+            existing = self._find_similar_step_policy(user_id, task_type, step_patterns)
+            if existing:
+                existing.success_count += 1
+                existing.apply_count += 1
+                existing.updated_at = time.time()
+                self._save_policy(existing)
+                new_policies.append(existing)
+                continue
+
+            policy = self.create_step_policy(
+                user_id=user_id,
+                name=f"{task_type}_step_fix_v{len(self._policies) + 1}",
+                task_types=[task_type],
+                step_patterns=step_patterns,
+                approach=best_alt,
+                avoid=[failed_desc],
+                source_task_ids=task_ids[:5],
+            )
+            new_policies.append(policy)
+
+        return new_policies
+
     def match_policies(
         self,
         user_id: str,
@@ -741,6 +834,31 @@ def _find_similar_policy(
 
         return None
 
+    def _find_similar_step_policy(
+        self, user_id: str, task_type: str, step_patterns: List[str],
+    ) -> Optional[PolicyCase]:
+        """Find an existing STEP policy with similar step_patterns."""
+        pattern_words = set(w.lower() for w in step_patterns)
+        if not pattern_words:
+            return None
+
+        for policy in self._policies.values():
+            if policy.user_id != user_id:
+                continue
+            if policy.granularity != PolicyGranularity.STEP:
+                continue
+            if task_type not in policy.condition.task_types:
+                continue
+
+            existing_words = set(w.lower() for w in policy.condition.step_patterns)
+            if not existing_words:
+                continue
+            overlap = len(pattern_words & existing_words) / len(pattern_words | existing_words)
+            if overlap > self.SIMILARITY_THRESHOLD:
+                return policy
+
+        return None
+
     def _delete_policy(self, policy_id: str) -> None:
         self._policies.pop(policy_id, None)
         path = os.path.join(self._dir, f"{policy_id}.json")
diff --git a/dhee/simple.py b/dhee/simple.py
index 26c29ec..2142acc 100644
--- a/dhee/simple.py
+++ b/dhee/simple.py
@@ -528,6 +528,7 @@ def context(
         self,
         task_description: Optional[str] = None,
         user_id: Optional[str] = None,
+        operational: bool = False,
     ) -> Dict[str, Any]:
         """HyperAgent session bootstrap. Call once at conversation start.
 
@@ -538,10 +539,11 @@ def context(
         Args:
             task_description: What you're about to work on.
             user_id: Override default user_id.
+            operational: If True, return compact actionable-only format
+                for per-turn consumption instead of full context.
 
         Returns:
-            HyperContext dict with keys: warnings, insights, intentions,
-            performance, memories, last_session, meta.
+            HyperContext dict (full or operational).
         """
         uid = user_id or self._user_id
         self._tracker.on_context(task_description)
@@ -550,6 +552,8 @@ def context(
             task_description=task_description,
             memory=self._engram._memory,
         )
+        if operational:
+            return hyper_ctx.to_operational_dict()
         return hyper_ctx.to_dict()
 
     # ------------------------------------------------------------------
diff --git a/tests/test_cognition_kernel.py b/tests/test_cognition_kernel.py
index eebc5a9..6002f85 100644
--- a/tests/test_cognition_kernel.py
+++ b/tests/test_cognition_kernel.py
@@ -227,3 +227,317 @@ def test_kernel_stores_accessible(self, tmp_path):
         assert d.kernel.episodes is not None
         assert d.kernel.policies is not None
         assert d.kernel.intentions is not None
+
+
+# ── Step-Level Policy Extraction (Phase 2) ────────────────────────
+
+
+class TestStepPolicyExtraction:
+    @pytest.fixture
+    def store(self, tmp_path):
+        from dhee.core.policy import PolicyStore
+        return PolicyStore(data_dir=str(tmp_path / "policies"))
+
+    def _make_task(self, task_id, task_type, plan_steps, outcome_score):
+        """Helper: build a task dict with plan steps."""
+        plan = []
+        for desc, status in plan_steps:
+            plan.append({
+                "id": f"step-{desc[:8]}",
+                "description": desc,
+                "status": status,
+            })
+        return {
+            "id": task_id,
+            "task_type": task_type,
+            "outcome_score": outcome_score,
+            "plan": plan,
+        }
+
+    def test_extract_step_policies_from_failures(self, store):
+        """When the same step fails >=2 times and another succeeds, extract STEP policy."""
+        tasks = [
+            self._make_task("t1", "bug_fix", [
+                ("reproduce bug", "completed"),
+                ("check imports", "failed"),
+                ("write test", "completed"),
+            ], 0.3),
+            self._make_task("t2", "bug_fix", [
+                ("reproduce bug", "completed"),
+                ("check imports", "failed"),
+                ("write test", "completed"),
+            ], 0.4),
+            self._make_task("t3", "bug_fix", [
+                ("reproduce bug", "completed"),
+                ("trace call stack", "completed"),
+                ("write test", "completed"),
+            ], 0.9),
+        ]
+
+        policies = store.extract_step_policies("u", tasks, "bug_fix")
+        assert len(policies) >= 1
+        step_policy = policies[0]
+        assert step_policy.granularity.value == "step"
+        assert "trace call stack" in step_policy.action.approach
+        assert "check imports" in step_policy.action.avoid
+
+    def test_no_step_policy_below_threshold(self, store):
+        """Need >=2 failures of same step to extract."""
+        tasks = [
+            self._make_task("t1", "bug_fix", [
+                ("reproduce bug", "completed"),
+                ("check imports", "failed"),
+            ], 0.3),
+            self._make_task("t2", "bug_fix", [
+                ("reproduce bug", "completed"),
+                ("trace call stack", "completed"),
+            ], 0.9),
+        ]
+        policies = store.extract_step_policies("u", tasks, "bug_fix")
+        assert len(policies) == 0
+
+    def test_dedup_step_policies(self, store):
+        """Similar step policies boost existing rather than duplicate."""
+        tasks = [
+            self._make_task("t1", "bug_fix", [("check imports", "failed")], 0.3),
+            self._make_task("t2", "bug_fix", [("check imports", "failed")], 0.3),
+            self._make_task("t3", "bug_fix", [("trace call stack", "completed")], 0.9),
+        ]
+        p1 = store.extract_step_policies("u", tasks, "bug_fix")
+        assert len(p1) == 1
+        count_after_first = p1[0].apply_count
+
+        # Extract again — should boost, not duplicate
+        p2 = store.extract_step_policies("u", tasks, "bug_fix")
+        assert len(p2) == 1
+        assert p2[0].id == p1[0].id
+        assert p2[0].apply_count > count_after_first
+
+    def test_step_context_in_cognitive_state(self, tmp_path):
+        """get_cognitive_state includes active_step and step_policies."""
+        from dhee.core.cognition_kernel import CognitionKernel
+        kernel = CognitionKernel(data_dir=str(tmp_path / "kernel"))
+        # Create active task with current step
+        task = kernel.tasks.create_task("u", "Fix auth", "bug_fix", plan=["step 1", "step 2"])
+        task.start()  # starts first step
+        kernel.tasks.update_task(task)
+
+        state = kernel.get_cognitive_state("u", "bug_fix")
+        assert "active_step" in state
+        assert state["active_step"] == "step 1"
+        assert "step_policies" in state
+
+    def test_record_step_outcome(self, tmp_path):
+        """record_step_outcome finds and updates matching STEP policies."""
+        from dhee.core.cognition_kernel import CognitionKernel
+        kernel = CognitionKernel(data_dir=str(tmp_path / "kernel"))
+
+        # Create a STEP policy
+        kernel.policies.create_step_policy(
+            user_id="u",
+            name="check_imports_fix",
+            task_types=["bug_fix"],
+            step_patterns=["check", "imports"],
+            approach="trace call stack instead",
+        )
+
+        # Record step outcome
+        kernel.record_step_outcome(
+            "u", "bug_fix", "check imports first",
+            success=True, actual_score=0.8,
+        )
+
+        # Verify policy was updated
+        policies = list(kernel.policies._policies.values())
+        step_policy = [p for p in policies if p.granularity.value == "step"][0]
+        assert step_policy.apply_count == 1
+        assert step_policy.success_count == 1
+
+
+# ── Utility Tracking (Phase 3) ───────────────────────────────────
+
+
+class TestUtilityTracking:
+    def test_heuristic_record_outcome(self, tmp_path):
+        """Heuristic.record_outcome updates utility via EMA."""
+        from dhee.core.heuristic import Heuristic
+        h = Heuristic(
+            id="h1", content="test heuristic", abstraction_level="domain",
+            source_task_types=["bug_fix"], confidence=0.6, created_at=0.0,
+        )
+        delta = h.record_outcome(success=True, baseline_score=0.5, actual_score=0.8)
+        assert delta == pytest.approx(0.3)
+        assert h.utility > 0
+        assert h.apply_count == 1
+        assert h.validation_count == 1
+
+    def test_heuristic_strength_includes_utility(self, tmp_path):
+        """strength() incorporates utility factor."""
+        from dhee.core.heuristic import Heuristic
+        h1 = Heuristic(
+            id="h1", content="test", abstraction_level="domain",
+            source_task_types=["t"], confidence=0.8, created_at=0.0,
+            validation_count=3, invalidation_count=1,
+        )
+        base_strength = h1.strength()
+
+        h2 = Heuristic(
+            id="h2", content="test", abstraction_level="domain",
+            source_task_types=["t"], confidence=0.8, created_at=0.0,
+            validation_count=3, invalidation_count=1,
+            utility=0.5,  # positive utility
+        )
+        boosted_strength = h2.strength()
+        assert boosted_strength > base_strength
+
+    def test_insight_record_outcome(self):
+        """Insight.record_outcome updates utility via EMA."""
+        from dhee.core.buddhi import Insight
+        i = Insight(
+            id="i1", user_id="u", content="test insight",
+            insight_type="strategy", source_task_types=["bug_fix"],
+            confidence=0.7, created_at="2026-01-01", last_validated="2026-01-01",
+            validation_count=0, invalidation_count=0, tags=["test"],
+        )
+        delta = i.record_outcome(success=True, baseline_score=0.4, actual_score=0.9)
+        assert delta == pytest.approx(0.5)
+        assert i.utility > 0
+        assert i.apply_count == 1
+
+    def test_intention_record_outcome(self, tmp_path):
+        """IntentionStore.record_outcome marks intention usefulness."""
+        from dhee.core.intention import IntentionStore
+        store = IntentionStore(data_dir=str(tmp_path / "intentions"))
+        i = store.store("u", "run tests after deploy", trigger_keywords=["deploy"])
+        # Simulate trigger
+        i.status = "triggered"
+        store.record_outcome(i.id, useful=True, outcome_score=0.9)
+        assert i.was_useful is True
+        assert i.outcome_score == 0.9
+
+    def test_contrastive_record_outcome(self):
+        """ContrastivePair.record_outcome updates utility."""
+        from dhee.core.contrastive import ContrastivePair
+        pair = ContrastivePair(
+            id="c1", task_description="test task", task_type="bug_fix",
+            success_approach="do X", failure_approach="do Y",
+            outcome_delta=0.5, created_at=0.0,
+        )
+        delta = pair.record_outcome(success=True, baseline_score=0.5, actual_score=0.8)
+        assert delta == pytest.approx(0.3)
+        assert pair.utility > 0
+        assert pair.apply_count == 1
+        assert pair.validation_count == 1
+
+    def test_episode_connection_count_incremented(self, tmp_path):
+        """connection_count increases on checkpoint when task exists."""
+        from dhee.core.cognition_kernel import CognitionKernel
+        kernel = CognitionKernel(data_dir=str(tmp_path / "kernel"))
+        # Create a task and start an episode
+        kernel.tasks.create_task("u", "fix auth", "bug_fix")
+        kernel.episodes.begin_episode("u", "working on auth", "bug_fix")
+
+        kernel.record_checkpoint_event("u", "progress on auth", "paused")
+
+        # Check episode connection_count was incremented
+        open_eps = getattr(kernel.episodes, '_open_episodes', {})
+        ep_id = open_eps.get("u")
+        if ep_id:
+            ep = kernel.episodes._episodes.get(ep_id)
+            assert ep.connection_count >= 1
+
+
+# ── Operational Context (Phase 4) ────────────────────────────────
+
+
+class TestOperationalContext:
+    def test_hyper_context_has_operational_fields(self, tmp_path):
+        """HyperContext includes active_step, step_policies, action_items."""
+        from dhee.core.buddhi import Buddhi
+        from dhee.core.cognition_kernel import CognitionKernel
+        kernel = CognitionKernel(data_dir=str(tmp_path / "buddhi"))
+        buddhi = Buddhi(data_dir=str(tmp_path / "buddhi"), kernel=kernel)
+
+        # Create active task with current step
+        task = kernel.tasks.create_task("u", "Fix auth", "bug_fix", plan=["step 1", "step 2"])
+        task.start()
+        kernel.tasks.update_task(task)
+
+        ctx = buddhi.get_hyper_context(user_id="u", task_description="bug_fix")
+        d = ctx.to_dict()
+        assert "active_step" in d
+        assert d["active_step"] == {"description": "step 1"}
+        assert "step_policies" in d
+        assert "action_items" in d
+        assert "critical_blockers" in d
+        assert "contradictions" in d
+        assert any("[NEXT STEP]" in item for item in d["action_items"])
+
+    def test_to_operational_dict_compact(self, tmp_path):
+        """to_operational_dict() returns only actionable items."""
+        from dhee.core.buddhi import Buddhi
+        from dhee.core.cognition_kernel import CognitionKernel
+        kernel = CognitionKernel(data_dir=str(tmp_path / "buddhi"))
+        buddhi = Buddhi(data_dir=str(tmp_path / "buddhi"), kernel=kernel)
+
+        task = kernel.tasks.create_task("u", "Fix auth", "bug_fix", plan=["step 1"])
+        task.start()
+        kernel.tasks.update_task(task)
+
+        ctx = buddhi.get_hyper_context(user_id="u", task_description="bug_fix")
+        op = ctx.to_operational_dict()
+
+        # Should have current_step and action_items
+        assert "current_step" in op
+        assert "action_items" in op
+        # Should NOT have full history fields
+        assert "insights" not in op
+        assert "performance" not in op
+        assert "memories" not in op
+
+    def test_action_items_priority_order(self, tmp_path):
+        """Intentions come before steps in action_items."""
+        from dhee.core.buddhi import Buddhi
+        from dhee.core.cognition_kernel import CognitionKernel
+        kernel = CognitionKernel(data_dir=str(tmp_path / "buddhi"))
+        buddhi = Buddhi(data_dir=str(tmp_path / "buddhi"), kernel=kernel)
+
+        # Create intention + active step
+        kernel.intentions.store("u", "run tests after deploy", trigger_keywords=["bug_fix"])
+        task = kernel.tasks.create_task("u", "Fix auth", "bug_fix", plan=["debug code"])
+        task.start()
+        kernel.tasks.update_task(task)
+
+        ctx = buddhi.get_hyper_context(user_id="u", task_description="bug_fix")
+        items = ctx.action_items
+
+        intention_idx = next((i for i, x in enumerate(items) if "[INTENTION]" in x), None)
+        step_idx = next((i for i, x in enumerate(items) if "[NEXT STEP]" in x), None)
+        if intention_idx is not None and step_idx is not None:
+            assert intention_idx < step_idx
+
+    def test_context_operational_flag(self, tmp_path):
+        """context(operational=True) returns compact format."""
+        from dhee.simple import Dhee
+        d = Dhee(in_memory=True, data_dir=str(tmp_path))
+        full = d.context("test task")
+        op = d.context("test task", operational=True)
+        # Full has many keys, operational is a subset
+        assert "user_id" in full
+        assert "user_id" not in op
+
+    def test_critical_blockers_surfaced(self, tmp_path):
+        """Blockers from active task appear in critical_blockers."""
+        from dhee.core.buddhi import Buddhi
+        from dhee.core.cognition_kernel import CognitionKernel
+        kernel = CognitionKernel(data_dir=str(tmp_path / "buddhi"))
+        buddhi = Buddhi(data_dir=str(tmp_path / "buddhi"), kernel=kernel)
+
+        task = kernel.tasks.create_task("u", "Fix auth", "bug_fix", plan=["step 1"])
+        task.start()
+        task.add_blocker("missing API key", severity="hard")
+        kernel.tasks.update_task(task)
+
+        ctx = buddhi.get_hyper_context(user_id="u", task_description="bug_fix")
+        assert len(ctx.critical_blockers) >= 1
+        assert any("API key" in b for b in ctx.critical_blockers)