From 4e7958eb6513c80f3f25b901216368da16a7626c Mon Sep 17 00:00:00 2001 From: Nathan Schram <5553883+nathanschram@users.noreply.github.com> Date: Mon, 18 May 2026 19:50:44 +1000 Subject: [PATCH] fix(rc20): #524 outbox + #526 watchdog approval-pending follow-ups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both issues shipped rc19 fixes (PR #555) but /monitor audits on 2026-05-18 showed each regression still firing in production because the rc19 patch landed in only one of two code paths. #524 — outbox silently drops directory entries rc19 surfaced 📎 Outbox skipped notices on the normal-completion path in handle_message but missed two adjacent paths: the pre-auto-continue delivery (subprocess 1 stuck-after-tool-result recovery) and the run_ok=False failed-run branch. Both still silently dropped the agent's intended deliverable. This commit extracts the surfacing logic into _surface_outbox_skipped in runner_bridge.py and wires it into both gap paths. On a failed run the code still skips the actual file send (preserving the original gating) but does a cheap scan_outbox() to collect skipped items and surface them, so the user always learns what the agent intended to ship. Honours the existing outbox_notify_skipped config flag and filters the "..." overflow pseudo-entry from the user-facing block. #526 — approval-pending stalls misclassified rc19 demoted the bridge-side WARN (progress_edits.stall_detected) to a paced INFO (subprocess.approval_pending) when _has_pending_approval() returned true. The watchdog-side detector in runner.py (which emits subprocess.liveness_stall and is the actual signal untether-issue-watcher auto-files on) was untouched, so the daemon kept filing GitHub issues on routine approval-pending sessions and the nsd audit (2026-05-18) showed a user cancelling a productive 15-minute investigation because the chat-side reassurance came too late (1800s threshold). This commit: - Adds _recent_event_is_control_request helper in runner.py — uses the stream's recent_events ring buffer as the approval-pending signal, consistent with the bridge's inline-keyboard predicate but accessible to runner-scope code. - Plumbs the predicate into _watchdog_loop: when the last JSONL event is control_request, emit subprocess.approval_pending INFO instead of liveness_stall WARN. Skip the auto-kill branch entirely. Pace INFO emission to once per 30 min via the shared _APPROVAL_PENDING_REFIRE_S constant (now defined once in runner.py and imported by the bridge). - Splits _STALL_THRESHOLD_APPROVAL into _STALL_THRESHOLD_APPROVAL_FIRST (600s) and the existing 1800s refire so the user gets a reassuring "tap a button above" chat message at 10 min on first occurrence, matching the watchdog's liveness threshold and avoiding the nsd-style early cancellation. - Rewords the chat-side approval reminder copy to make the "tap a button above to proceed (no action needed otherwise)" affordance explicit, directly quoting the audit's recommended text. Tests cover both code paths: - tests/test_outbox_delivery.py (existing) — format helper + settings default unchanged; no new file-level tests needed. - tests/test_exec_bridge.py — failed-run surfacing, notify_skipped=false suppression, only-overflow filter, two-tier first-reminder threshold, reworded copy. - tests/test_exec_runner.py — predicate truth-table coverage, watchdog demotion via integration with a fake codex script emitting control _request, watchdog WARN still fires when no control_request is recent. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/how-to/file-transfer.md | 10 +- pyproject.toml | 2 +- src/untether/runner.py | 140 ++++++++++++++++++------- src/untether/runner_bridge.py | 190 ++++++++++++++++++++++++---------- tests/test_exec_bridge.py | 190 ++++++++++++++++++++++++++++++++++ tests/test_exec_runner.py | 172 ++++++++++++++++++++++++++++++ uv.lock | 2 +- 7 files changed, 607 insertions(+), 99 deletions(-) diff --git a/docs/how-to/file-transfer.md b/docs/how-to/file-transfer.md index aa15895d..59dd844f 100644 --- a/docs/how-to/file-transfer.md +++ b/docs/how-to/file-transfer.md @@ -139,12 +139,12 @@ Files are sent in alphabetical order, one at a time, immediately after the agent Outbox delivery reuses the same security rules as `/file get`: -- **Deny globs** — files matching `.git/**`, `.env`, `.envrc`, `**/*.pem`, `**/.ssh/**` (and any custom deny globs) are silently skipped -- **Size limit** — files larger than 50 MB are skipped +- **Deny globs** — files matching `.git/**`, `.env`, `.envrc`, `**/*.pem`, `**/.ssh/**` (and any custom deny globs) are surfaced to the user as a `📎 Outbox skipped` notice rather than silently dropped (#524) +- **Size limit** — files larger than 50 MB are skipped (and surfaced via the same notice) - **Path traversal** — symlinks pointing outside the project root are rejected - **File count** — capped at `outbox_max_files` per run (default 10) - **Auto-cleanup** — sent files are deleted after delivery by default, preventing sensitive data accumulation -- **Successful runs only** — outbox is not scanned on errored or cancelled runs +- **Failed/auto-continued runs** — actual file delivery is still gated on a successful run, but skipped items (directories, deny-globbed files, oversized files) are surfaced even when the run fails or auto-continues, so you always learn what the agent intended to send. Opt out via `outbox_notify_skipped = false`. ### Engine compatibility @@ -164,8 +164,8 @@ All engines support outbox delivery — any agent that can write files to disk c ### Limitations -- **Flat scan only** — only files directly in `.untether-outbox/` are sent; subdirectories are skipped. Agents can zip nested structures if needed. -- **Successful runs only** — if the agent errors or is cancelled, the outbox is not scanned. +- **Flat scan only** — only files directly in `.untether-outbox/` are sent; subdirectories are surfaced as `📎 Outbox skipped` (#524) but not delivered. Agents can zip nested structures if needed. +- **Failed runs deliver no files** — the actual file send is still gated on a successful run, but the skipped-items notice fires either way so the user always learns what the agent intended to ship. - **No real-time delivery** — files are sent after the run completes, not during. diff --git a/pyproject.toml b/pyproject.toml index 68dbc197..f5064dcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "untether" authors = [{name = "Little Bear Apps", email = "hello@littlebearapps.com"}] maintainers = [{name = "Little Bear Apps", email = "hello@littlebearapps.com"}] -version = "0.35.3rc19" +version = "0.35.3rc20" keywords = ["telegram", "claude-code", "codex", "opencode", "pi", "gemini-cli", "amp", "ai-agents", "coding-assistant", "remote-control", "cli-bridge"] description = "Run AI coding agents from your phone. Bridges Claude Code, Codex, OpenCode, Pi, Gemini CLI, and Amp to Telegram with interactive permissions, voice input, cost tracking, and live progress." readme = {file = "README.md", content-type = "text/markdown"} diff --git a/src/untether/runner.py b/src/untether/runner.py index 54144f82..7133b585 100644 --- a/src/untether/runner.py +++ b/src/untether/runner.py @@ -153,6 +153,33 @@ def _rc_label(rc: int) -> str: # the parser saw. recent_events still records them for diagnostics. _CONTROL_CHANNEL_EVENT_TYPES = frozenset({"control_request", "control_response"}) +# #526 rc20 follow-up: shared with runner_bridge.py for paced +# ``subprocess.approval_pending`` INFO emission. The user-side stall +# detector (bridge) and the watchdog-side liveness detector (here) +# both honour the same 30-min refire window so operators see at most +# one INFO per session per 30 min of an approval-waiting state. +_APPROVAL_PENDING_REFIRE_S = 1800.0 + + +def _recent_event_is_control_request(stream: JsonlStreamState) -> bool: + """True if the most recent JSONL event in the ring buffer is a + Claude ``control_request`` frame — i.e. the session is awaiting an + approval response on the control channel. + + Used by ``_watchdog_loop`` to demote ``subprocess.liveness_stall`` + WARN → ``subprocess.approval_pending`` INFO, mirroring the bridge-side + behaviour added in rc19. The bridge-side predicate inspects the + inline-keyboard payload of the most recent action; the watchdog has + no access to bridge state, so it consults the JSONL event stream + directly. Both signals agree in the common case where Claude emitted + a ``control_request`` and we're waiting for the user to click a + button (or otherwise resolve the approval). + """ + if not stream.recent_events: + return False + _, label = stream.recent_events[-1] + return label == "control_request" + def _classify_jsonl_event(raw: Any) -> str: """Return "tool_result" | "assistant" | "other" for a decoded JSONL event. @@ -1062,6 +1089,11 @@ async def _subprocess_watchdog( liveness_warned = False prev_diag = None + # #526 rc20 follow-up: pace ``subprocess.approval_pending`` INFO + # so the watchdog emits at most once per 30 min while the user + # deliberates. Tracked as a local rather than on the stream so + # the lifetime matches the watchdog loop (per-subprocess). + last_approval_pending_emit_at: float = 0.0 # Poll until the process is dead or the reader finishes. while not reader_done.is_set(): @@ -1092,46 +1124,84 @@ async def _subprocess_watchdog( ): idle = time.monotonic() - stream.last_stdout_at if idle >= self._LIVENESS_TIMEOUT_SECONDS: - liveness_warned = True - stream.liveness_stalls += 1 - diag = collect_proc_diag(pid) - cpu_active = is_cpu_active(prev_diag, diag) - recent = list(stream.recent_events)[-5:] - logger.warning( - "subprocess.liveness_stall", - pid=pid, - idle_seconds=round(idle, 1), - event_count=stream.event_count, - last_event_type=stream.last_event_type, - tcp_established=diag.tcp_established if diag else None, - rss_kb=diag.rss_kb if diag else None, - cpu_active=cpu_active, - recent_events=[(round(t, 1), lbl) for t, lbl in recent], - ) - # Auto-kill: config enabled + zero TCP + CPU NOT active - if ( - self._stall_auto_kill - and diag is not None - and diag.tcp_established == 0 - and diag.alive - and cpu_active is not True - ): + # #526 rc20 follow-up: when the most recent JSONL + # event is a ``control_request``, the subprocess + # is awaiting a user approval — emit a paced + # ``subprocess.approval_pending`` INFO instead of + # the ``subprocess.liveness_stall`` WARN. Skip the + # auto-kill branch entirely (approval-waiting is + # by definition not a hang). Without latching + # ``liveness_warned`` so a later genuine hang + # (post-approval) can still fire the WARN. + if _recent_event_is_control_request(stream): + now = time.monotonic() + if ( + last_approval_pending_emit_at == 0.0 + or now - last_approval_pending_emit_at + >= _APPROVAL_PENDING_REFIRE_S + ): + last_approval_pending_emit_at = now + diag = collect_proc_diag(pid) + cpu_active = is_cpu_active(prev_diag, diag) + recent = list(stream.recent_events)[-5:] + logger.info( + "subprocess.approval_pending", + pid=pid, + idle_seconds=round(idle, 1), + event_count=stream.event_count, + last_event_type=stream.last_event_type, + cpu_active=cpu_active, + recent_events=[(round(t, 1), lbl) for t, lbl in recent], + approval_pending=True, + source="watchdog", + ) + prev_diag = diag + else: + liveness_warned = True + stream.liveness_stalls += 1 + diag = collect_proc_diag(pid) + cpu_active = is_cpu_active(prev_diag, diag) + recent = list(stream.recent_events)[-5:] logger.warning( - "subprocess.liveness_kill", + "subprocess.liveness_stall", pid=pid, - reason="zero_tcp_zero_cpu", + idle_seconds=round(idle, 1), + event_count=stream.event_count, + last_event_type=stream.last_event_type, + tcp_established=diag.tcp_established if diag else None, + rss_kb=diag.rss_kb if diag else None, + cpu_active=cpu_active, + recent_events=[(round(t, 1), lbl) for t, lbl in recent], + approval_pending=False, ) - try: - _os.killpg(pid, signal.SIGKILL) - except (ProcessLookupError, PermissionError, OSError) as e: - logger.debug( - "subprocess.watchdog.suppressed", + # Auto-kill: config enabled + zero TCP + CPU NOT active + if ( + self._stall_auto_kill + and diag is not None + and diag.tcp_established == 0 + and diag.alive + and cpu_active is not True + ): + logger.warning( + "subprocess.liveness_kill", pid=pid, - error=str(e), - error_type=e.__class__.__name__, - context="liveness_kill", + reason="zero_tcp_zero_cpu", ) - prev_diag = diag + try: + _os.killpg(pid, signal.SIGKILL) + except ( + ProcessLookupError, + PermissionError, + OSError, + ) as e: + logger.debug( + "subprocess.watchdog.suppressed", + pid=pid, + error=str(e), + error_type=e.__class__.__name__, + context="liveness_kill", + ) + prev_diag = diag await anyio.sleep(self._WATCHDOG_POLL_SECONDS) if stream.did_emit_completed or reader_done.is_set(): diff --git a/src/untether/runner_bridge.py b/src/untether/runner_bridge.py index 39fead04..29a1f13d 100644 --- a/src/untether/runner_bridge.py +++ b/src/untether/runner_bridge.py @@ -18,7 +18,7 @@ from .model import ActionEvent, CompletedEvent, ResumeToken, StartedEvent, UntetherEvent from .presenter import Presenter from .progress import ProgressTracker -from .runner import Runner +from .runner import _APPROVAL_PENDING_REFIRE_S, Runner from .transport import ( ChannelId, MessageId, @@ -307,6 +307,45 @@ def _format_outbox_skipped_notice(skipped: list[tuple[str, str]]) -> str: return "\n".join(lines) +async def _surface_outbox_skipped( + cfg: ExecBridgeConfig, + incoming: IncomingMessage, + user_ref: MessageRef, + skipped: list[tuple[str, str]], + outbox_config: Any, +) -> None: + """#524 rc20 follow-up: send the 📎 Outbox skipped notice as a follow-up + Telegram message. Extracted so the same surface fires from both the + normal-completion and pre-auto-continue paths in handle_message, and + from the run_ok=False branch where outbox delivery itself is skipped + but the user still needs to know what the agent intended to send. + + The "..." pseudo-entry is the max-files-exceeded notice which we keep + in logs but skip from the user-facing block (the per-file reason there + isn't actionable). + """ + if not skipped: + return + if not getattr(outbox_config, "outbox_notify_skipped", True): + return + notable = [(name, reason) for (name, reason) in skipped if name != "..."] + if not notable: + return + text = _format_outbox_skipped_notice(notable) + try: + await cfg.transport.send( + channel_id=incoming.channel_id, + message=RenderedMessage(text=text, extra={}), + options=SendOptions( + reply_to=user_ref, + notify=False, + thread_id=incoming.thread_id, + ), + ) + except Exception: # noqa: BLE001 + logger.warning("outbox.skipped_notice_failed", exc_info=True) + + def _format_auto_continue_notice(auto_continued_count: int) -> str: """#551 Tier 1: build the Telegram notice text shown when auto-continue fires. The 🔁 prefix distinguishes auto-resume from a fresh start so @@ -1157,7 +1196,14 @@ async def _stall_monitor(self) -> None: # tool, or when child processes are active (Agent subagents). mcp_server = self._has_running_mcp_tool() if self._has_pending_approval(): - threshold = self._STALL_THRESHOLD_APPROVAL + # #526 rc20 follow-up: first reminder at 600 s (so users + # get a visible "no action needed" message in the same + # window as a normal stall), subsequent reminders gated + # by the 1800 s refire threshold. + if self._last_approval_pending_emit_at == 0.0: + threshold = self._STALL_THRESHOLD_APPROVAL_FIRST + else: + threshold = self._STALL_THRESHOLD_APPROVAL threshold_reason = "pending_approval" elif mcp_server is not None: threshold = self._STALL_THRESHOLD_MCP_TOOL @@ -1266,7 +1312,6 @@ async def _stall_monitor(self) -> None: # to treat WARNs as auto-fileable, so this also stops # spurious GitHub issue creation (closes #533). if threshold_reason == "pending_approval": - _APPROVAL_PENDING_REFIRE_S = 1800.0 if ( self._last_approval_pending_emit_at == 0.0 or now - self._last_approval_pending_emit_at @@ -1282,6 +1327,7 @@ async def _stall_monitor(self) -> None: last_action=last_action, recent_events=[(round(t, 1), lbl) for t, lbl in recent[-3:]], approval_pending=True, + source="bridge", ) else: logger.warning( @@ -1644,7 +1690,14 @@ async def _stall_monitor(self) -> None: # warning is expected, not a sign the agent has frozen. # Distinguish from genuine "no progress" copy so the user # realises the buttons above are theirs to action. - parts = [f"⏳ Awaiting your approval ({mins} min)"] + # #526 rc20 follow-up: nsd evidence (2026-05-18) showed + # users cancelling at ~13 min because the original copy + # didn't make the "tap a button" affordance explicit + # enough — they assumed the session had hung. + parts = [ + f"⏳ Awaiting your approval ({mins} min) — tap a " + "button above to proceed (no action needed otherwise)" + ] elif mcp_server is not None: parts = [f"⏳ MCP tool running: {mcp_server} ({mins} min)"] elif threshold_reason == "active_children": @@ -2401,7 +2454,15 @@ async def _delete_outlines( _STALL_THRESHOLD_TOOL: float = 600.0 # 10 minutes when a tool is actively running _STALL_THRESHOLD_MCP_TOOL: float = 900.0 # 15 min for MCP tools (network-bound) _STALL_THRESHOLD_SUBAGENT: float = 900.0 # 15 min for child process / subagent work - _STALL_THRESHOLD_APPROVAL: float = 1800.0 # 30 minutes when waiting for approval + # #526 rc20 follow-up: two-tier threshold for approval-pending stalls. + # First reminder fires at 600 s so users get a reassuring "tap a button + # above" message within the same window as a normal-tool stall (10 min) + # — without it, nsd evidence (2026-05-18) showed users ``/cancel``-ing + # productive sessions after ~13 min of silence. Subsequent reminders + # fall back to 1800 s (30 min) so the chat doesn't get noisy on long + # deliberations. + _STALL_THRESHOLD_APPROVAL_FIRST: float = 600.0 + _STALL_THRESHOLD_APPROVAL: float = 1800.0 # refire threshold after first _STALL_MAX_WARNINGS: int = 10 # absolute cap _STALL_MAX_WARNINGS_NO_PID: int = 3 # aggressive cap when pid=None + no events _TCP_ACTIVE_THRESHOLD: int = 20 # TCP connections above this suggest active work @@ -3139,6 +3200,20 @@ async def run_edits() -> None: skipped=len(result.skipped), cleaned=result.cleaned, ) + # #524 rc20 follow-up: surface skipped items from the + # pre-auto-continue scan too. Without this, agents that + # write a directory (e.g. ``guides/``) and then hit the + # stuck-after-tool-results recovery never tell the user + # the deliverable existed — the directory is left in + # place for subprocess 2 to re-find, but the user sees + # nothing in chat about the first attempt. + await _surface_outbox_skipped( + cfg, + incoming, + user_ref, + result.skipped, + _oc, + ) except Exception: # noqa: BLE001 logger.warning( "outbox.auto_continue_delivery_failed", exc_info=True @@ -3372,61 +3447,62 @@ async def run_edits() -> None: session_key = f"{incoming.channel_id}:{progress_ref.message_id}" unregister_progress(_PROGRESS_PERSISTENCE_PATH, session_key) - # Deliver outbox files (agent-initiated file delivery) - if ( - cfg.send_file is not None - and cfg.outbox_config is not None - and run_ok is not False - ): - from .telegram.outbox_delivery import deliver_outbox_files + # Deliver outbox files (agent-initiated file delivery). + # #524 rc20 follow-up: surface skipped items even when run_ok is False. + # Delivery of *sent* files still requires a successful run (failures + # may leave the outbox in a partially-written state), but the user + # should always learn what the agent intended to send. + if cfg.send_file is not None and cfg.outbox_config is not None: + from .telegram.outbox_delivery import ( + OutboxResult, + deliver_outbox_files, + scan_outbox, + ) from .utils.paths import get_run_base_dir _run_root = get_run_base_dir() if _run_root is not None: _oc = cfg.outbox_config - try: - _outbox_result = await deliver_outbox_files( - send_file=cfg.send_file, - channel_id=incoming.channel_id, - thread_id=incoming.thread_id, - reply_to_msg_id=user_ref.message_id, - run_root=_run_root, - outbox_dir=_oc.outbox_dir, - deny_globs=_oc.deny_globs, - max_download_bytes=_oc.max_download_bytes, - max_files=_oc.outbox_max_files, - cleanup=_oc.outbox_cleanup, + _outbox_result: OutboxResult | None = None + if run_ok is not False: + try: + _outbox_result = await deliver_outbox_files( + send_file=cfg.send_file, + channel_id=incoming.channel_id, + thread_id=incoming.thread_id, + reply_to_msg_id=user_ref.message_id, + run_root=_run_root, + outbox_dir=_oc.outbox_dir, + deny_globs=_oc.deny_globs, + max_download_bytes=_oc.max_download_bytes, + max_files=_oc.outbox_max_files, + cleanup=_oc.outbox_cleanup, + ) + except Exception: # noqa: BLE001 + logger.warning("outbox.delivery_failed", exc_info=True) + _outbox_result = None + else: + # Failed run: skip file delivery but still scan so the user + # gets the 📎 Outbox skipped notice for any directory or + # blocked entry the agent left behind. + try: + _, _failed_skipped = scan_outbox( + _run_root, + outbox_dir=_oc.outbox_dir, + deny_globs=_oc.deny_globs, + max_download_bytes=_oc.max_download_bytes, + max_files=_oc.outbox_max_files, + ) + except Exception: # noqa: BLE001 + logger.debug("outbox.failed_run_scan_error", exc_info=True) + _failed_skipped = [] + _outbox_result = OutboxResult(skipped=_failed_skipped) + + if _outbox_result is not None: + await _surface_outbox_skipped( + cfg, + incoming, + user_ref, + _outbox_result.skipped, + _oc, ) - except Exception: # noqa: BLE001 - logger.warning("outbox.delivery_failed", exc_info=True) - _outbox_result = None - - # #524: surface skipped items so the agent's "I prepared the - # guides folder for you" final message doesn't become a silent - # lie. The "..." pseudo-entry is the max-files-exceeded notice - # which we keep in logs but skip from the user-facing block - # (the per-file reason there isn't actionable). - if ( - _outbox_result is not None - and _outbox_result.skipped - and getattr(_oc, "outbox_notify_skipped", True) - ): - notable_skipped = [ - (name, reason) - for (name, reason) in _outbox_result.skipped - if name != "..." - ] - if notable_skipped: - skipped_text = _format_outbox_skipped_notice(notable_skipped) - try: - await cfg.transport.send( - channel_id=incoming.channel_id, - message=RenderedMessage(text=skipped_text, extra={}), - options=SendOptions( - reply_to=user_ref, - notify=False, - thread_id=incoming.thread_id, - ), - ) - except Exception: # noqa: BLE001 - logger.warning("outbox.skipped_notice_failed", exc_info=True) diff --git a/tests/test_exec_bridge.py b/tests/test_exec_bridge.py index 30cf0c5b..45e3db3b 100644 --- a/tests/test_exec_bridge.py +++ b/tests/test_exec_bridge.py @@ -2389,6 +2389,10 @@ async def test_stall_fires_after_approval_threshold() -> None: edits = _make_edits(transport, presenter, clock=clock) edits._stall_check_interval = 0.01 edits._STALL_THRESHOLD_SECONDS = 0.05 + # #526 rc20 follow-up: approval-pending now uses a two-tier threshold + # (FIRST then refire). Override both so the test still exercises the + # first-reminder path. + edits._STALL_THRESHOLD_APPROVAL_FIRST = 0.1 edits._STALL_THRESHOLD_APPROVAL = 0.1 # short for test from untether.model import Action, ActionEvent @@ -2448,6 +2452,8 @@ async def test_stall_approval_pending_demotes_warn_to_info(monkeypatch) -> None: edits = _make_edits(transport, presenter, clock=clock) edits._stall_check_interval = 0.01 edits._STALL_THRESHOLD_SECONDS = 0.05 + # #526 rc20 follow-up: two-tier approval threshold (FIRST + refire). + edits._STALL_THRESHOLD_APPROVAL_FIRST = 0.1 edits._STALL_THRESHOLD_APPROVAL = 0.1 from untether.model import Action, ActionEvent @@ -2509,6 +2515,8 @@ async def test_stall_approval_pending_info_event_paced_to_30_min( edits = _make_edits(transport, presenter, clock=clock) edits._stall_check_interval = 0.01 edits._STALL_THRESHOLD_SECONDS = 0.05 + # #526 rc20 follow-up: two-tier approval threshold (FIRST + refire). + edits._STALL_THRESHOLD_APPROVAL_FIRST = 0.05 edits._STALL_THRESHOLD_APPROVAL = 0.05 edits._stall_repeat_seconds = 0.0 # bypass the per-tick repeat guard @@ -2554,6 +2562,65 @@ async def drive() -> None: ) +@pytest.mark.anyio +async def test_first_approval_reminder_uses_lower_threshold() -> None: + """#526 rc20 follow-up: the FIRST chat-side reminder for an + approval-pending session fires at ``_STALL_THRESHOLD_APPROVAL_FIRST`` + (default 600 s — same as the tool stall) rather than the 1800 s + refire threshold. Without this fix, nsd evidence (2026-05-18) + showed users cancelling productive sessions after ~13 min of + silence because no chat-side reassurance had been emitted yet. + Subsequent reminders fall back to the 1800 s refire threshold. + """ + transport = FakeTransport() + presenter = _KeyboardPresenter() + clock = _FakeClock(start=100.0) + edits = _make_edits(transport, presenter, clock=clock) + edits._stall_check_interval = 0.01 + edits._STALL_THRESHOLD_SECONDS = 100.0 # normal: very long, shouldn't match + edits._STALL_THRESHOLD_APPROVAL_FIRST = 0.1 # first: short + edits._STALL_THRESHOLD_APPROVAL = 100.0 # refire: very long + + from untether.model import Action, ActionEvent + + evt = ActionEvent( + engine="claude", + action=Action( + id="ctrl.1", + kind="warning", + title="Permission Request [CanUseTool] - tool: ExitPlanMode", + detail={"inline_keyboard": {"buttons": [[{"text": "Approve"}]]}}, + ), + phase="started", + ) + await edits.on_event(evt) + clock.set(100.0) + + async with anyio.create_task_group() as tg: + + async def drive() -> None: + clock.set(100.2) # past FIRST (0.1) but not refire (100) + await anyio.sleep(0.05) + edits.signal_send.close() + + tg.start_soon(edits.run) + tg.start_soon(drive) + + # The chat-side reminder fired with the reworded copy quoted from + # the audit's recommended text (covers the "tap a button above" + # affordance + the "no action needed otherwise" reassurance). + approval_msgs = [ + c for c in transport.send_calls if "Awaiting your approval" in c["message"].text + ] + assert len(approval_msgs) >= 1, ( + f"Expected reworded approval reminder, saw: " + f"{[c['message'].text[:80] for c in transport.send_calls]}" + ) + msg_text = approval_msgs[0]["message"].text + assert "tap a button above" in msg_text + assert "no action needed" in msg_text + + @pytest.mark.anyio async def test_stall_normal_threshold_without_approval() -> None: """Stall monitor uses normal threshold when no pending approval.""" @@ -4745,6 +4812,129 @@ async def test_outbox_not_scanned_on_error(tmp_path) -> None: send_file.assert_not_called() +@pytest.mark.anyio +async def test_outbox_skipped_surfaced_on_failed_run(tmp_path) -> None: + """#524 rc20 follow-up: when a run fails (run_ok=False) but the outbox + contains a directory or other blocked entry, the user should still get + the ``📎 Outbox skipped`` follow-up message. Without this fix, failed + runs silently lose all evidence of intended deliveries.""" + from unittest.mock import AsyncMock + + from untether.settings import TelegramFilesSettings + from untether.utils.paths import reset_run_base_dir, set_run_base_dir + + outbox = tmp_path / ".untether-outbox" + outbox.mkdir() + # Directory entry — always "skipped" by scan_outbox + (outbox / "guides").mkdir() + + send_file = AsyncMock() + files_cfg = TelegramFilesSettings(enabled=True) + transport = FakeTransport() + runner = ScriptRunner([ErrorReturn(error="failed")], engine=CODEX_ENGINE) + cfg = ExecBridgeConfig( + transport=transport, + presenter=MarkdownPresenter(), + final_notify=False, + send_file=send_file, + outbox_config=files_cfg, + ) + incoming = IncomingMessage(channel_id=1, message_id=1, text="test") + token = set_run_base_dir(tmp_path) + try: + await handle_message(cfg, runner=runner, incoming=incoming, resume_token=None) + finally: + reset_run_base_dir(token) + + # No actual file delivery on a failed run. + send_file.assert_not_called() + # But the skipped notice IS sent — the user learns that ``guides/`` was + # left behind. + skipped_notices = [ + c + for c in transport.send_calls + if "Outbox skipped" in c["message"].text and "guides" in c["message"].text + ] + assert len(skipped_notices) == 1, ( + f"Expected exactly one Outbox skipped notice on failed run, " + f"saw {len(skipped_notices)}: " + f"{[c['message'].text[:80] for c in transport.send_calls]}" + ) + + +@pytest.mark.anyio +async def test_outbox_skipped_surfaced_when_notify_disabled_stays_silent( + tmp_path, +) -> None: + """The ``outbox_notify_skipped`` config flag opts the user out of + skipped-item surfacing entirely — verify it suppresses the failed-run + path too (not just the normal-completion path tested in rc19).""" + from unittest.mock import AsyncMock + + from untether.settings import TelegramFilesSettings + from untether.utils.paths import reset_run_base_dir, set_run_base_dir + + outbox = tmp_path / ".untether-outbox" + outbox.mkdir() + (outbox / "guides").mkdir() + + send_file = AsyncMock() + files_cfg = TelegramFilesSettings(enabled=True, outbox_notify_skipped=False) + transport = FakeTransport() + runner = ScriptRunner([ErrorReturn(error="failed")], engine=CODEX_ENGINE) + cfg = ExecBridgeConfig( + transport=transport, + presenter=MarkdownPresenter(), + final_notify=False, + send_file=send_file, + outbox_config=files_cfg, + ) + incoming = IncomingMessage(channel_id=1, message_id=1, text="test") + token = set_run_base_dir(tmp_path) + try: + await handle_message(cfg, runner=runner, incoming=incoming, resume_token=None) + finally: + reset_run_base_dir(token) + + skipped_notices = [ + c for c in transport.send_calls if "Outbox skipped" in c["message"].text + ] + assert skipped_notices == [] + + +@pytest.mark.anyio +async def test_surface_outbox_skipped_helper_only_overflow_entries_silent( + tmp_path, +) -> None: + """#524 rc20 follow-up: the ``...`` pseudo-entry from max_files + overflow is filtered out of the user-facing notice. If the only + skipped item is the overflow rollup, no message is sent at all.""" + from untether.runner_bridge import _surface_outbox_skipped + from untether.settings import TelegramFilesSettings + from untether.transport import MessageRef + + files_cfg = TelegramFilesSettings(enabled=True) + transport = FakeTransport() + cfg = ExecBridgeConfig( + transport=transport, + presenter=MarkdownPresenter(), + final_notify=False, + outbox_config=files_cfg, + ) + incoming = IncomingMessage(channel_id=1, message_id=1, text="test") + user_ref = MessageRef(channel_id=1, message_id=1) + + await _surface_outbox_skipped( + cfg, + incoming, + user_ref, + [("...", "3 more files exceeded max_files=10")], + files_cfg, + ) + + assert transport.send_calls == [] + + # ── _should_auto_continue detection (#34142/#30333) ── diff --git a/tests/test_exec_runner.py b/tests/test_exec_runner.py index 3fe8387d..b7716cca 100644 --- a/tests/test_exec_runner.py +++ b/tests/test_exec_runner.py @@ -761,6 +761,178 @@ def test_jsonl_stream_state_recent_events_ring_buffer() -> None: assert stream.recent_events[0] == (5.0, "type_5") +# =========================================================================== +# #526 rc20 follow-up — watchdog approval-pending awareness +# =========================================================================== + + +def test_recent_event_is_control_request_true_when_last_label_matches() -> None: + """#526 rc20: the watchdog uses ``recent_events[-1] == 'control_request'`` + as its approval-pending signal so a session waiting on an + ExitPlanMode/CanUseTool/AskUserQuestion approval doesn't flood the + operator dashboard with ``subprocess.liveness_stall`` WARNs. + """ + from untether.runner import JsonlStreamState, _recent_event_is_control_request + + stream = JsonlStreamState(expected_session=None) + stream.recent_events.append((1.0, "assistant")) + stream.recent_events.append((2.0, "control_request")) + + assert _recent_event_is_control_request(stream) is True + + +def test_recent_event_is_control_request_false_when_resolved() -> None: + """Once the approval resolves and Claude emits a ``control_response`` + (followed by assistant work), the predicate must report False — the + session is no longer awaiting user input and a subsequent stall + SHOULD escalate to the normal WARN path.""" + from untether.runner import JsonlStreamState, _recent_event_is_control_request + + stream = JsonlStreamState(expected_session=None) + stream.recent_events.append((1.0, "control_request")) + stream.recent_events.append((2.0, "control_response")) + stream.recent_events.append((3.0, "assistant")) + + assert _recent_event_is_control_request(stream) is False + + +def test_recent_event_is_control_request_false_when_buffer_empty() -> None: + """A fresh subprocess with no JSONL events yet is not approval-pending + — return False rather than raising IndexError.""" + from untether.runner import JsonlStreamState, _recent_event_is_control_request + + stream = JsonlStreamState(expected_session=None) + assert _recent_event_is_control_request(stream) is False + + +def test_approval_pending_refire_constant_is_30_min() -> None: + """rc19 picked a 30-minute pacing window for the bridge-side INFO. The + watchdog (rc20 follow-up) reuses the SAME constant so both detectors + agree on the heartbeat cadence — operators don't see one INFO per + detector per session per stall window.""" + from untether.runner import _APPROVAL_PENDING_REFIRE_S + + assert _APPROVAL_PENDING_REFIRE_S == 1800.0 + + +@pytest.mark.anyio +async def test_watchdog_demotes_to_approval_pending_when_control_request_recent( + tmp_path, +) -> None: + """#526 rc20 follow-up: when the most recent JSONL event in + ``stream.recent_events`` is ``control_request`` (Claude awaiting an + approval), the watchdog must emit ``subprocess.approval_pending`` + INFO instead of the ``subprocess.liveness_stall`` WARN. This is what + stops ``untether-issue-watcher`` from auto-filing GitHub issues on + routine approval-pending sessions. + """ + from structlog.testing import capture_logs + + thread_id = "019b73c4-0c3f-7701-a0bb-aac6b4d8a3bc" + codex_path = tmp_path / "codex" + # Emit the codex thread.started event (so the runner stays alive past + # the schema bootstrap) and then a ``control_request``-typed line so + # recent_events[-1] is ``"control_request"`` when the watchdog fires. + # Then sleep past the liveness threshold so the watchdog fires. + codex_path.write_text( + "#!/usr/bin/env python3\n" + "import json\n" + "import sys\n" + "import time\n" + "\n" + "sys.stdin.read()\n" + f"print(json.dumps({{'type': 'thread.started', 'thread_id': '{thread_id}'}}), flush=True)\n" + "print(json.dumps({'type': 'control_request', 'request_id': 'req_1'}), flush=True)\n" + "time.sleep(1.0)\n", + encoding="utf-8", + ) + codex_path.chmod(0o755) + + runner = CodexRunner(codex_cmd=str(codex_path), extra_args=[]) + runner._LIVENESS_TIMEOUT_SECONDS = 0.2 + runner._WATCHDOG_POLL_SECONDS = 0.05 + runner._WATCHDOG_GRACE_SECONDS = 0.5 + + with capture_logs() as logs: + with anyio.fail_after(5): + _ = [evt async for evt in runner.run("hi", None)] + + stream = runner.current_stream + assert stream is not None + + # The WARN must NOT have been emitted. + liveness_warns = [r for r in logs if r.get("event") == "subprocess.liveness_stall"] + assert liveness_warns == [], ( + f"Watchdog must demote WARN to INFO when control_request is most " + f"recent, got: {liveness_warns}" + ) + + # The INFO replacement MUST have been emitted exactly once. + approval_infos = [ + r for r in logs if r.get("event") == "subprocess.approval_pending" + ] + assert len(approval_infos) == 1, ( + f"Expected exactly 1 subprocess.approval_pending INFO, " + f"got {len(approval_infos)}: {approval_infos!r}" + ) + assert approval_infos[0].get("approval_pending") is True + assert approval_infos[0].get("source") == "watchdog" + # The latch (liveness_stalls counter) must NOT have been bumped — that + # field is reserved for the WARN path so session.summary still reflects + # approval-pending separately from actual liveness fires. + assert stream.liveness_stalls == 0 + + +@pytest.mark.anyio +async def test_watchdog_warn_still_fires_when_no_control_request(tmp_path) -> None: + """The rc20 follow-up must NOT silence the WARN for genuinely-hung + sessions. When the most recent event is a plain ``assistant`` (or + anything that's not ``control_request``), keep the existing WARN + + liveness_stalls counter behaviour.""" + from structlog.testing import capture_logs + + thread_id = "019b73c4-0c3f-7701-a0bb-aac6b4d8a3bd" + codex_path = tmp_path / "codex" + # No control_request — last recent_event will be the thread.started. + codex_path.write_text( + "#!/usr/bin/env python3\n" + "import json\n" + "import sys\n" + "import time\n" + "\n" + "sys.stdin.read()\n" + f"print(json.dumps({{'type': 'thread.started', 'thread_id': '{thread_id}'}}), flush=True)\n" + "time.sleep(1.0)\n", + encoding="utf-8", + ) + codex_path.chmod(0o755) + + runner = CodexRunner(codex_cmd=str(codex_path), extra_args=[]) + runner._LIVENESS_TIMEOUT_SECONDS = 0.2 + runner._WATCHDOG_POLL_SECONDS = 0.05 + runner._WATCHDOG_GRACE_SECONDS = 0.5 + + with capture_logs() as logs: + with anyio.fail_after(5): + _ = [evt async for evt in runner.run("hi", None)] + + stream = runner.current_stream + assert stream is not None + assert stream.liveness_stalls == 1 + + # WARN fired exactly once with approval_pending=False as the new + # disambiguating field. + liveness_warns = [r for r in logs if r.get("event") == "subprocess.liveness_stall"] + assert len(liveness_warns) == 1 + assert liveness_warns[0].get("approval_pending") is False + + # No approval-pending INFO. + approval_infos = [ + r for r in logs if r.get("event") == "subprocess.approval_pending" + ] + assert approval_infos == [] + + # =========================================================================== # Phase 2e: _ResumeLineProxy.current_stream forwarding (#98) # =========================================================================== diff --git a/uv.lock b/uv.lock index 5d8df42d..ddc91783 100644 --- a/uv.lock +++ b/uv.lock @@ -2069,7 +2069,7 @@ wheels = [ [[package]] name = "untether" -version = "0.35.3rc19" +version = "0.35.3rc20" source = { editable = "." } dependencies = [ { name = "aiohttp" },