Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
894e399
chore: track new harness/instance modules at current state
codejunkie99 Apr 25, 2026
df388c7
fix(install): parse flags before assigning ADAPTER/TARGET
codejunkie99 Apr 25, 2026
ca84c99
fix(onboard): support `--` separator and whitelist known flags
codejunkie99 Apr 25, 2026
9c4f2d7
fix(runtime): clear active_instance when worker marks itself stopped
codejunkie99 Apr 25, 2026
133aa11
fix(pre_tool_call): enforce schema blocked/approval patterns for shell
codejunkie99 Apr 25, 2026
f494db9
ci: add GH Actions workflow gating verifiers + installer smoke
codejunkie99 Apr 25, 2026
7be3dc8
fix(install.ps1): add `pi` adapter parity with install.sh
codejunkie99 Apr 25, 2026
512829b
test(formula): cover documented no-path form and full .agent tree
codejunkie99 Apr 25, 2026
be1fa2b
fix(skill_loader): contain skill names and precondition paths
codejunkie99 Apr 25, 2026
795b4be
fix(instances): serialize worker spawn with per-instance flock
codejunkie99 Apr 25, 2026
3e8fdf5
fix(context_budget): enforce budget across all sections
codejunkie99 Apr 25, 2026
215130c
fix(review_state): validate candidate_id and contain lifecycle paths
codejunkie99 Apr 25, 2026
ded112f
fix(graduate): validate candidate_id at CLI entry and on path build
codejunkie99 Apr 25, 2026
c6322f4
fix(lesson_store): make provisional lessons visible to dedup
codejunkie99 Apr 26, 2026
9c51e08
fix(auto_dream): atomic rewrite + .bak snapshot for episodic log
codejunkie99 Apr 26, 2026
5639da1
fix(learn): merge prior decisions on re-stage instead of overwriting
codejunkie99 Apr 26, 2026
eca0d57
fix(promote): atomic temp+rename for staged candidate JSON writes
codejunkie99 Apr 26, 2026
43c42d1
fix(render_lessons): read JSONL through the locked file pointer
codejunkie99 Apr 26, 2026
9808226
fix(hooks): lock episodic JSONL appends via shared helper
codejunkie99 Apr 26, 2026
bece775
fix(control_plane): quarantine corrupt queued jobs instead of deleting
codejunkie99 Apr 26, 2026
4e3ad62
docs: add trust console tui design
ftrtyfytfjh May 5, 2026
a5daba1
feat: add trust console tui
ftrtyfytfjh May 5, 2026
93956ee
fix(trust-console): apply codex review followups
ftrtyfytfjh May 5, 2026
bddc63b
Merge origin/master into feature/trust-console-tui
ftrtyfytfjh May 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
246 changes: 221 additions & 25 deletions .agent/harness/context_budget.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,57 +132,253 @@ def _top_lessons(query, lessons_md, char_budget=8000):
return _lines_up_to_budget([l for _, _, l in relevant], char_budget)


_TRUNC_MARKER = "\n\n[truncated to fit budget]"
_OMIT_MARKER_FMT = "[{n} items omitted: budget exceeded]"


class _UsedTokens(int):
"""int subclass that carries an `overflow` flag.

Existing callers do `ctx, used = build_context(...)` and treat `used` as an
int — they still see the correct number. New callers can read
`used.overflow` to learn whether enforcement had to drop or truncate
content. This keeps the public 2-tuple signature compatible.
"""

# int subclasses can't accept __slots__ for instance attrs (variable-size
# base type), so we override __new__ to stash overflow on the instance dict
# via plain assignment after relying on the default dict.
def __new__(cls, value, overflow=False):
obj = super().__new__(cls, value)
obj.overflow = overflow
return obj


def _truncate_to_tokens(text, max_tokens):
"""Truncate text so its token estimate fits in max_tokens, with marker.

Uses the same chars-to-tokens ratio as `_token_estimate` (4 chars/token).
Reserves room for the truncation marker so the post-truncation estimate
still fits the budget the caller passed in.
"""
if max_tokens <= 0:
return ""
if _token_estimate(text) <= max_tokens:
return text
marker_tokens = _token_estimate(_TRUNC_MARKER)
char_budget = max(0, (max_tokens - marker_tokens) * 4)
if char_budget <= 0:
# No room even for body. Emit just the marker so the section still
# signals presence (required sections must remain in the output).
return _TRUNC_MARKER.lstrip()
return text[:char_budget] + _TRUNC_MARKER


def build_context(user_input: str, budget: int = 88000):
"""Returns (context_string, tokens_used). Lean and query-aware."""
"""Returns (context_string, used_tokens). Lean, query-aware, budget-enforced.

Budget enforcement (P1 fix):
* Required sections (AGENTS map, active workspace, permissions) are
always present in the output. If they would overflow the budget,
their content is truncated to fit and tagged with a
`[truncated to fit budget]` marker — never dropped silently.
* Optional sections (lessons, episodes, matched skills) are skipped
entirely with an `[N items omitted: budget exceeded]` marker when
they would overflow.
* Every assembled context ends with a `[budget: used X / Y tokens]`
summary so callers can see the final accounting.

Return shape is preserved: a 2-tuple `(context_string, used_tokens)`.
`used_tokens` is an int subclass that exposes an `overflow: bool`
attribute for new callers; existing callers that treat it as a plain
int are unaffected.
"""
parts, used = [], 0
overflow = False

# Each appended block costs its own tokens *plus* the `\n\n---\n\n`
# separator that join() will add between it and the next block. We track
# separator overhead explicitly so the budget check matches what the
# caller actually receives.
SEPARATOR_TOKENS = _token_estimate("\n\n---\n\n") # 9 chars → 2 tokens
# Reserve room for the final `[budget: used X / Y tokens]` summary line
# plus its leading separator. Width here is a conservative upper bound.
SUMMARY_RESERVE_TOKENS = _token_estimate("[budget: used 99999999 / 99999999 tokens]") + SEPARATOR_TOKENS
# Per-block `len(s)//4` truncation undercounts vs the post-join estimate.
# Reserve small headroom so the final joined string still fits the budget.
DRIFT_HEADROOM_TOKENS = 4

# always load: personal preferences + live workspace + AGENTS map + DECISIONS
# AGENTS.md and DECISIONS.md were missing despite AGENTS.md specifying the
# read order — the standalone path was not faithful to its own contract.
for rel in (
# Required sections are *mandatory* — their headers + omission markers
# are floor-cost overhead. To keep the joined output within budget, we
# pre-reserve that floor so early required sections don't eat budget
# that later required sections need just for their stub.
required_files = (
"AGENTS.md",
"memory/personal/PREFERENCES.md",
"memory/working/WORKSPACE.md",
"memory/working/REVIEW_QUEUE.md",
"memory/semantic/DECISIONS.md",
):
)
perms_path = "protocols/permissions.md"

def _stub_cost(rel_or_label):
"""Token cost of the minimum stub (header + omission marker + sep)."""
if rel_or_label == perms_path:
header = "# PERMISSIONS\n"
else:
header = f"# {rel_or_label}\n"
stub = header + _OMIT_MARKER_FMT.format(n=1)
return _token_estimate(stub) + SEPARATOR_TOKENS

# Floor = stub cost for every required file that exists on disk + perms.
required_floor = 0
for rel in required_files:
if _read(rel):
required_floor += _stub_cost(rel)
if _read(perms_path):
required_floor += _stub_cost(perms_path)

def _append(block):
"""Append a block, charging both its tokens and the join separator."""
nonlocal used
parts.append(block)
# First block has no preceding separator; subsequent blocks do.
sep_cost = SEPARATOR_TOKENS if len(parts) > 1 else 0
used += _token_estimate(block) + sep_cost

def _block_cost(block):
"""Token cost of appending `block` (block + separator if not first)."""
sep_cost = SEPARATOR_TOKENS if len(parts) >= 1 else 0
return _token_estimate(block) + sep_cost

# Track how much of `required_floor` we've already paid; the remainder
# is reserved out of `_room()` so we don't overspend on early sections.
paid_floor = 0

def _room():
# Remaining required_floor we haven't paid yet stays reserved.
remaining_floor = max(0, required_floor - paid_floor)
return budget - used - SUMMARY_RESERVE_TOKENS - DRIFT_HEADROOM_TOKENS - remaining_floor

# ------------------------------------------------------------------
# Required sections — must appear in output. Truncate if oversized.
# Preserves the original load order: AGENTS map first, then personal
# preferences, live workspace, review queue, semantic decisions. These
# are the sections agentic-stack treats as always-on context.
# ------------------------------------------------------------------
for rel in required_files:
text = _read(rel)
if text:
parts.append(f"# {rel}\n{text}")
used += _token_estimate(text)
if not text:
continue
header = f"# {rel}\n"
# Pay this section's floor first so _room() releases its reservation.
paid_floor += _stub_cost(rel)
# Room for the *body*, after subtracting header and separator overhead.
sep_cost = SEPARATOR_TOKENS if parts else 0
body_room = _room() - _token_estimate(header) - sep_cost
body_tokens = _token_estimate(text)
if body_room <= 0:
# No room left at all. Emit header + omission marker so the
# caller still sees the section name in the assembled context.
block = header + _OMIT_MARKER_FMT.format(n=1)
_append(block)
overflow = True
continue
if body_tokens > body_room:
text = _truncate_to_tokens(text, body_room)
overflow = True
_append(header + text)

# query-aware lessons
# ------------------------------------------------------------------
# Optional: query-aware lessons. Skip with marker on overflow.
# ------------------------------------------------------------------
lessons_raw = _read("memory/semantic/LESSONS.md")
if lessons_raw:
lessons = _top_lessons(user_input, lessons_raw, char_budget=8000)
if lessons:
parts.append(f"# LESSONS (query-relevant)\n{lessons}")
used += _token_estimate(lessons)
header = "# LESSONS (query-relevant)\n"
block = header + lessons
if _block_cost(block) <= _room():
_append(block)
else:
n = sum(1 for ln in lessons.splitlines() if ln.strip().startswith("- "))
marker_block = header + _OMIT_MARKER_FMT.format(n=max(n, 1))
if _block_cost(marker_block) <= _room():
_append(marker_block)
overflow = True

# query-aware top episodes
# ------------------------------------------------------------------
# Optional: query-aware top episodes. Skip with marker on overflow.
# ------------------------------------------------------------------
episodes = _top_episodes(user_input, k=5)
if episodes:
parts.append(f"# RECENT EPISODES (salience x relevance)\n{episodes}")
used += _token_estimate(episodes)
header = "# RECENT EPISODES (salience x relevance)\n"
block = header + episodes
if _block_cost(block) <= _room():
_append(block)
else:
n = sum(1 for ln in episodes.splitlines() if ln.strip().startswith("- "))
marker_block = header + _OMIT_MARKER_FMT.format(n=max(n, 1))
if _block_cost(marker_block) <= _room():
_append(marker_block)
overflow = True

# matched skills only (progressive_load is already input-matched).
# ------------------------------------------------------------------
# Optional: matched skills (progressive_load is already input-matched).
# Lazy import so a missing skill_loader doesn't kill context assembly.
# ------------------------------------------------------------------
try:
from skill_loader import progressive_load
skills = progressive_load(user_input)
except Exception:
skills = []
skipped_skills = 0
for s in skills:
block = f"## Skill: {s['name']}\n{s['content']}"
t = _token_estimate(block)
if used + t < budget:
parts.append(block)
used += t
if _block_cost(block) <= _room():
_append(block)
else:
skipped_skills += 1
overflow = True
if skipped_skills:
marker_block = _OMIT_MARKER_FMT.format(n=skipped_skills) + " (skills)"
if _block_cost(marker_block) <= _room():
_append(marker_block)

# permissions always last, small, safety-critical
perms = _read("protocols/permissions.md")
# ------------------------------------------------------------------
# Required: permissions. Last and safety-critical — must appear,
# truncated if oversized.
# ------------------------------------------------------------------
perms = _read(perms_path)
if perms:
parts.append(f"# PERMISSIONS\n{perms}")
used += _token_estimate(perms)
header = "# PERMISSIONS\n"
# Pay the perms floor so _room() releases its reservation.
paid_floor += _stub_cost(perms_path)
sep_cost = SEPARATOR_TOKENS if parts else 0
body_room = _room() - _token_estimate(header) - sep_cost
body_tokens = _token_estimate(perms)
if body_room <= 0:
block = header + _OMIT_MARKER_FMT.format(n=1)
_append(block)
overflow = True
else:
if body_tokens > body_room:
perms = _truncate_to_tokens(perms, body_room)
overflow = True
_append(header + perms)

# ------------------------------------------------------------------
# Final summary line. Always appended so callers can audit the
# assembled context's accounting at a glance.
# ------------------------------------------------------------------
summary = f"[budget: used {used} / {budget} tokens]"
_append(summary)

return "\n\n---\n\n".join(parts), used
# Reconcile the running tally against the actually joined string. Per-block
# `len(s) // 4` integer truncation undercounts vs the concatenated whole,
# so prefer the post-join estimate as the authoritative number returned.
final = "\n\n---\n\n".join(parts)
final_tokens = _token_estimate(final)
if final_tokens > budget:
overflow = True
return final, _UsedTokens(final_tokens, overflow)
Loading
Loading