From ddba754d1efb7bd99c9dfa48e4906b01d5cbd106 Mon Sep 17 00:00:00 2001 From: Nathan Schram <5553883+nathanschram@users.noreply.github.com> Date: Sun, 17 May 2026 14:47:44 +1000 Subject: [PATCH] =?UTF-8?q?fix(claude):=20#551=20=E2=80=94=20auto-continue?= =?UTF-8?q?=20outbox=20+=20UX=20recovery=20signal=20+=20chore:=20staging?= =?UTF-8?q?=200.35.3rc18=20+=20Task=204b?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three independent rc18 changes shipped together. The two #333 / #550 fixes are on separate PRs (fix/333-post-result-hang, fix/550-ask-question-keyboard-clear). - **Tier 0 — pre-swap outbox delivery (functional, ~3.6 % silent loss fix):** At the auto-continue trigger site (runner_bridge.py:~2935), call deliver_outbox_files BEFORE subprocess 2 spawns. Without this, files written by subprocess 1 during the stuck-after-tool-results window were orphaned (subprocess 2 starts fresh, never scans the outbox). Delivery is best-effort — a failure logs outbox.auto_continue_delivery_failed and does NOT block auto-continue. - **Tier 1 — reworded notice (UX):** changed the chat-side text from "⚠️ Auto-continuing — Claude stopped before processing tool results" to "🔁 Auto-resuming session after upstream Claude Code event". The 🔁 prefix signals recovery rather than failure and discourages /cancel-ing the salvage. Extracted into a small _format_auto_continue_notice() helper for testability. Task 4b — stall-suppression counter: - JsonlStreamState.stall_suppression_counts: dict[str, int]. - _bump_stall_suppression(reason) helper increments at three suppression sites: expected_wait (auto-cancel suppression), post_result (notification suppression), children_active (sleeping-main + active children). - session.summary now includes stall_suppressions=expected_wait:N,post_result:N,children_active:N so log audits can see suppression cascades without parsing nested JSON. chore: version bump 0.35.3rc17 → 0.35.3rc18 in pyproject.toml; uv.lock synced. CHANGELOG.md entry for v0.35.3rc18 covers #333 + #550 + #551 (the other two PRs reference the same entry). Tests (4 new): - test_exec_bridge.py: - test_551_auto_continue_notice_first_attempt - test_551_auto_continue_notice_repeat_attempt - test_4b_bump_stall_suppression_records_counts - test_4b_stall_suppression_count_bumped_on_post_result Full suite: 2675 passed, 2 skipped. preservation) deferred per scope decision in the rc18 plan. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 4 ++ pyproject.toml | 2 +- src/untether/runner.py | 7 +-- src/untether/runner_bridge.py | 89 ++++++++++++++++++++++++++++++--- tests/test_exec_bridge.py | 94 +++++++++++++++++++++++++++++++++++ uv.lock | 2 +- 6 files changed, 187 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea2b6b21..31f9ecda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,10 @@ ### fixes +- **fix:** rc18 — `_post_result_idle_watchdog` post-result hang root cause + AskUserQuestion final-keyboard clear + auto-continue outbox+UX. Three independent rc18 fixes shipped together. + - **#333 — post-result hang fix (Tier 1+2+3 + Task 4a):** rc17 (#549) added entry/exit/tick instrumentation to the watchdog; that instrumentation caught the limbo on channelo session `8876c902` (2026-05-17, 26.6 min wasted). Root cause: when Claude Code v2.1.143 closes stdout while keeping the subprocess alive, the watchdog exited early via `task_exited reason=reader_done`, bypassing the 600 s countdown — and stall-detector suppression cascades (post_result + MCP-heartbeat-driven children-active) hid the limbo from auto-cancel indefinitely. **Tier 1 (`claude.py`):** when `reader_done` fires while `proc.returncode is None`, the new `_post_result_subcountdown` re-arms a stdout-closed countdown, defers on pending control_request / ask_question, then SIGTERMs the process group after `timeout_s`, 5 s grace, SIGKILL if still alive. New `task_exited` reasons: `reader_done_but_alive_timeout`, `subprocess_exited_during_subcountdown`. **Tier 2 (`runner_bridge.py`):** new `_POST_RESULT_LIMBO_THRESHOLD_S = 660.0` class const + `_post_result_idle_age_seconds()` helper; when post-result idle age exceeds the threshold AND no other expected-wait flag is set, the stall detector stops suppressing auto-cancel. One-shot `progress_edits.post_result_limbo_detected` warning. **Tier 3 (`claude.py`):** new `runner.limbo_detected` warning fired 30 s into the subcountdown when the subprocess is still alive — picked up automatically by `untether-issue-watcher` for `auto:error-report` filing on future regressions. **Task 4a (`runner.py` + `claude.py`):** `JsonlStreamState.lifecycle_state` + `_transition_lifecycle()` helper emits `subprocess.state.` info logs at every transition (`reader_eof`, `subcountdown`, `limbo`, `sigterm_sent`, `sigkill_sent`, `exited`). Permanent canary for future hang-class issues. 7 new tests (4 in `tests/test_claude_runner.py`, 3 in `tests/test_exec_bridge.py`) [#333](https://github.com/littlebearapps/untether/issues/333) + - **#550 — AskUserQuestion final-keyboard clear:** after the user answers the last question in a multi-question `AskUserQuestion` flow, the inline keyboard on the question message is now stripped via `ctx.executor.edit` (Approach A from the rc18 handover). Previously the buttons stayed clickable and fired `ask_question.flow_missing` warnings since the flow state was already cleaned up. Failure modes preserved: `answer_ask_question_with_options` returning `False` leaves the buttons in place (so the user can retry); `ctx.executor.edit` raising logs `ask_question.keyboard_clear_failed` but does NOT block the answer-sent return. 4 new tests in `tests/test_ask_user_question.py` [#550](https://github.com/littlebearapps/untether/issues/550) + - **#551 — auto-continue outbox + UX (Tier 0 + Tier 1):** **Tier 0:** outbox files written by subprocess 1 during the stuck-after-tool-results window are now delivered BEFORE subprocess 2 spawns, eliminating the ~3.6% silent loss observed on lba-1. The pre-swap call mirrors the existing `deliver_outbox_files` plumbing at the final-message site (cleanup=True so subprocess 2 starts fresh). Failure to deliver does NOT block auto-continue — the recovery is more important than any single batch of files; new `outbox.delivered_pre_auto_continue` info + `outbox.auto_continue_delivery_failed` warning logs. **Tier 1:** the auto-continue Telegram notice text changed from `⚠️ Auto-continuing — Claude stopped before processing tool results` to `🔁 Auto-resuming session after upstream Claude Code event`. The 🔁 prefix signals recovery rather than failure and discourages users from `/cancel`-ing the salvage. **Task 4b (`runner.py` + `runner_bridge.py`):** `JsonlStreamState.stall_suppression_counts: dict[str, int]` + `_bump_stall_suppression()` helper increments per-suppression-reason counters at three sites (`expected_wait`, `post_result`, `children_active`). `session.summary` now includes a stable `stall_suppressions=expected_wait:N,post_result:N,children_active:N` summary line so log audits can spot suppression cascades without parsing nested JSON. Stretch tiers (#551 Tier 2/3/4 — catalog-staleness suppression window, rate-limit-aware deferral, registry preservation) deferred to a future patch [#551](https://github.com/littlebearapps/untether/issues/551) - **fix:** rc17 — `_post_result_idle_watchdog` entry/exit/tick instrumentation (#333) + `last_bg_bash_launched_at` scalar (latent #347 sibling defect). Channelo VPS on rc16 (which already shipped the #544 ScheduleWakeup arm-delay scalar) hit a 43+ min post-result hang on session `b5c1c3e0-…` with `pending_wakeup=False` — i.e. NO `ScheduleWakeup` involved, so the #544 fix didn't apply. Logs showed `post_result=True` (so `state.result_received_at` IS set), `[watchdog]` config used the default `post_result_idle_enabled=true`, and the subprocess + children stayed alive (so `reader_done` was NOT set) — yet **zero** `claude.post_result_idle.closing_stdin` / `…deferred` log lines existed despite elapsed ≫ 600 s. Three of the four #333 candidates ruled out via logs + live `py-spy dump`; the remaining "task crashed silently / never started" candidate cannot be discriminated without entry/exit instrumentation. The CHANGELOG line in rc16 deferred #333 to v0.35.4 pending instrumentation — rc17 lands the instrumentation now and overrides that deferral. **Instrumentation:** `_post_result_idle_watchdog` now emits `claude.post_result_idle.task_started` (session_id, timeout_s, poll_interval_s) at entry; `claude.post_result_idle.tick` every iteration (armed, elapsed_s, effective_timeout_s, dead_wakeup, pending_requests, pending_asks, would_close, last_bg_bash_launched_at_age_s, last_schedule_wakeup_arm_delay); `claude.post_result_idle.tick_error` (warning + exc_info) on transient per-tick failures with one-interval backoff; and `claude.post_result_idle.task_exited` (reason ∈ `reader_done` | `stdin_closed` | `cancelled` | `loop_exited`) in a guaranteed `finally`. Per-tick `try/except` (not loop-wide) mirrors `_subprocess_watchdog` / `_drain_catalog_refresh` conventions so a transient error never cancels the sibling `_iter_jsonl_events` task in the task group. Verbose by design — at 30 s poll × hours of session = O(120) lines, trivial; rate-limiting now would create ambiguity in the next reproduction. **`last_bg_bash_launched_at` scalar:** `_clear_background_handle` (claude.py:550) pops `live_bg_bashes` on tool_result mirroring the original #507 ScheduleWakeup defect that #544 fixed via a scalar high-water-mark; new `ClaudeStreamState.last_bg_bash_launched_at: float | None` is set in `_register_background_handle` at the `Bash + run_in_background` branch, NOT cleared in `_clear_background_handle`, and reset on the same fresh-user-prompt path that resets `last_schedule_wakeup_arm_delay`. Critically a LAUNCH tracker, not a LIFETIME tracker — bg-bashes can outlive multiple user turns (long `npm install`, `tail -f`) so per-turn reset is correct. **Observability-only today**; the bridge's existing `_has_fresh_bash_output` / `_has_recent_bash_action` (runner_bridge.py:1738, 1753) remain the higher-fidelity bash-liveness proxies and the new scalar deliberately does NOT replace them in any suppression path. 7 new tests in `tests/test_claude_runner.py` (5 scalar lifecycle + 2 watchdog instrumentation covering `task_started`/`tick`/`task_exited` ordering and the `reader_done` exit path). The actual fix for whatever the new instrumentation reveals lands in a follow-up rc — rc17 is the diagnostic [#333](https://github.com/littlebearapps/untether/issues/333) (cross-ref [#544](https://github.com/littlebearapps/untether/issues/544), [#347](https://github.com/littlebearapps/untether/issues/347), [#374](https://github.com/littlebearapps/untether/issues/374)) - **fix:** rc16 — `ScheduleWakeup` post-result hold-open redux. The rc11 #507 fix added a `state.live_wakeups_arm_delay: dict[str, float]` populated in `_register_background_handle` and read in `_post_result_idle_watchdog` to shorten the 600 s timeout to `max_armed_delay + 60 s` when /loop is OFF. But the dict was wiped by `_clear_background_handle` on the ScheduleWakeup tool_result — which is the schedule-confirmation, not a terminal signal — so by the time the watchdog ticked (after the `result` event, which lands AFTER tool_result) the dict was empty and the dead-wakeup shortcut never engaged. Live impact: channelo VPS auditor-toolkit session `d11739ee-…` on rc15, 24+ min hold-open with `pending_wakeup=False` despite `last_action='tool:ScheduleWakeup (done)'`. Replaced the per-tool_id dict with `ClaudeStreamState.last_schedule_wakeup_arm_delay: float | None` — a per-turn scalar high-water-mark (`max` semantics for multi-wakeup turns) that survives `_clear_background_handle` and resets on each fresh user prompt (`StreamUserMessage` with non-tool_result content; mixed batches preserve the scalar). 4 new tests in `tests/test_claude_runner.py` cover the full tool_use → tool_result → result lifecycle (the #507 unit tests bypassed `_clear_background_handle`, which is why this slipped through), multi-wakeup max selection, new-turn reset, and the mixed-batch edge case. The two existing #507 tests now seed the scalar instead of the dict. The broader background-task-lifecycle refactor (terminal-vs-arm signal per primitive + deadline-expiry sweeps) tracked in [#374](https://github.com/littlebearapps/untether/issues/374) stays in v0.35.4; the sibling defect where the 600 s safety-net watchdog silently doesn't fire stays in [#333](https://github.com/littlebearapps/untether/issues/333) for v0.35.4 pending entry/exit instrumentation [#544](https://github.com/littlebearapps/untether/issues/544) - **fix:** rc14 — `claude.rate_limit_event` logs no longer drop `retry_after_s` on subscription-cap (reset-window) throttles. The Claude CLI emits two shapes of `rate_limit_event`: a full form carrying `retry_after_ms` (already covered) and a bare/reset-window form that carries `requests_reset` / `tokens_reset` ISO timestamps but no `retry_after_ms`. Untether's translate path only consumed `retry_after_ms`, so reset-window events fell into the "no retry hint" branch — `retry_after_s` stayed `None`, `ClaudeStreamState.rate_limit_total_s` never accumulated, and the chat surfaced the generic "⏳ Rate limited — waiting to retry" with no actionable wait time. The rc13 audit observed this firing across a 5-event burst on the `bip` chat that preceded a subscription-cap exhaustion across 3 chats — every event logged `retry_after_s=None cumulative_s=0.0` despite the upstream payload containing actionable wait info. New `_derive_retry_after_s(info)` helper in `runners/claude.py` picks the EARLIER of `requests_reset` / `tokens_reset` (the rate limit lifts as soon as either budget refills), clamps ≥ 0, tolerates both `Z` and `+00:00` ISO suffixes, and returns `None` for unparseable / missing timestamps. The translate path now falls back to the derived value when `retry_after_ms` is `None` and tracks which path fed the field via a new `retry_after_source=retry_after_ms|reset_ts` log key. The structured `claude.rate_limit_event` is also enriched to include every present `RateLimitInfo` field under `info=...` (`requests_limit`, `requests_remaining`, `requests_reset`, `tokens_limit`, `tokens_remaining`, `tokens_reset`, `retry_after_ms`) so future audits can see what upstream actually sent. The two subscription-error message variants observed in the audit ("out of extra usage", "hit your limit") already map to the same friendly hint via `error_hints.py:52-60`, so no work is needed there. Pre-emptive 75/90% budget warnings are out of scope for this fix — deferred as a discrete feature. 4 new tests in `tests/test_claude_runner.py` (`test_translate_rate_limit_event_derives_retry_after_from_reset_ts`, `test_translate_rate_limit_event_prefers_earlier_reset_when_both_present`, `test_translate_rate_limit_event_retry_after_ms_takes_precedence`, `test_translate_rate_limit_event_handles_unparseable_reset_ts`); all four existing tests still pass [#518](https://github.com/littlebearapps/untether/issues/518) diff --git a/pyproject.toml b/pyproject.toml index 067643e1..d56958a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "untether" authors = [{name = "Little Bear Apps", email = "hello@littlebearapps.com"}] maintainers = [{name = "Little Bear Apps", email = "hello@littlebearapps.com"}] -version = "0.35.3rc17" +version = "0.35.3rc18" keywords = ["telegram", "claude-code", "codex", "opencode", "pi", "gemini-cli", "amp", "ai-agents", "coding-assistant", "remote-control", "cli-bridge"] description = "Run AI coding agents from your phone. Bridges Claude Code, Codex, OpenCode, Pi, Gemini CLI, and Amp to Telegram with interactive permissions, voice input, cost tracking, and live progress." readme = {file = "README.md", content-type = "text/markdown"} diff --git a/src/untether/runner.py b/src/untether/runner.py index 51379df1..54144f82 100644 --- a/src/untether/runner.py +++ b/src/untether/runner.py @@ -346,9 +346,10 @@ class JsonlStreamState: lifecycle_state_entered_at: float = 0.0 # #333 Task 4b: per-suppression-reason counter, summarised in # ``session.summary``. Bumped by the bridge stall detector each - # tick a suppression branch fires (post_result, children_active, - # expected_wait). Plain dict (not defaultdict) so the slots-dataclass - # encoding stays trivial; bump via ``counts.get(k, 0) + 1``. + # tick a suppression branch fires (``expected_wait``, + # ``post_result``, ``children_active``). Plain dict so the + # slots-dataclass encoding stays trivial; bump via + # ``counts[k] = counts.get(k, 0) + 1`` from the call site. stall_suppression_counts: dict[str, int] = field(default_factory=dict) diff --git a/src/untether/runner_bridge.py b/src/untether/runner_bridge.py index f775ca3d..95313b06 100644 --- a/src/untether/runner_bridge.py +++ b/src/untether/runner_bridge.py @@ -288,6 +288,18 @@ def _should_auto_continue( return auto_continued_count < max_retries +def _format_auto_continue_notice(auto_continued_count: int) -> str: + """#551 Tier 1: build the Telegram notice text shown when auto-continue + fires. The 🔁 prefix distinguishes auto-resume from a fresh start so + users don't ``/cancel`` the salvage. Appends an attempt suffix once we + are past the first retry. + """ + notice = "\U0001f501 Auto-resuming session after upstream Claude Code event" + if auto_continued_count > 0: + notice += f" (attempt {auto_continued_count + 1})" + return notice + + _DEFAULT_PREAMBLE = ( "[Untether] You are running via Untether, a Telegram bridge for coding agents. " "The user is interacting through Telegram on a mobile device.\n\n" @@ -1254,6 +1266,7 @@ async def _stall_monitor(self) -> None: # whether the wait state still holds; once Claude resumes # emitting events, _stall_warned resets via _last_event_at # and the warn_count effectively rolls back. + self._bump_stall_suppression("expected_wait") logger.info( "progress_edits.stall_auto_cancel_suppressed_expected_wait", channel_id=self.channel_id, @@ -1382,6 +1395,7 @@ async def _stall_monitor(self) -> None: # heartbeat bump keeps the elapsed-time tail current # without resetting stall counters. if not frozen_escalate and _post_result_idle: + self._bump_stall_suppression("post_result") logger.info( "progress_edits.stall_post_result_suppressed", channel_id=self.channel_id, @@ -1491,6 +1505,7 @@ async def _stall_monitor(self) -> None: # already sent, suppress repeats. Similar to tool-active # suppression but triggered by tree CPU (child processes) # instead of tracked tool state. + self._bump_stall_suppression("children_active") logger.info( "progress_edits.stall_children_active_suppressed", channel_id=self.channel_id, @@ -1657,6 +1672,22 @@ def _bump_heartbeat(self) -> None: ): self.signal_send.send_nowait(None) + def _bump_stall_suppression(self, reason: str) -> None: + """#333 Task 4b: count a suppression event for ``session.summary``. + + ``reason`` is a stable kebab-case label (e.g. ``"post_result"``, + ``"children_active"``, ``"expected_wait"``). Stored on the + stream's ``stall_suppression_counts`` dict so the summary line + in ``session.summary`` (emitted from ``run_runner_with_cancel``) + can render ``stall_suppressions=expected_wait:N,post_result:N``. + """ + if self.stream is None: + return + counts = getattr(self.stream, "stall_suppression_counts", None) + if counts is None: + return + counts[reason] = counts.get(reason, 0) + 1 + def _is_post_result_idle(self) -> bool: """#470: suppression — Claude session is past its `result` event. @@ -2632,6 +2663,13 @@ async def thread_pid() -> None: # Session completion summary duration = time.monotonic() - start_time event_count = edits.stream.event_count if edits.stream else 0 + # #333 Task 4b: render the per-reason suppression counter as a stable + # comma-separated string (e.g. ``expected_wait:4,post_result:3``) so + # log audits can grep without parsing nested JSON. + suppression_counts = getattr(edits.stream, "stall_suppression_counts", None) or {} + suppression_summary = ",".join( + f"{k}:{v}" for k, v in sorted(suppression_counts.items()) + ) logger.info( "session.summary", session_id=outcome.resume.value if outcome.resume else None, @@ -2645,6 +2683,7 @@ async def thread_pid() -> None: last_event_type=edits.stream.last_event_type if edits.stream else None, cancelled=outcome.cancelled, ok=outcome.completed.ok if outcome.completed else None, + stall_suppressions=suppression_summary, ) if event_count == 0 and not outcome.cancelled: logger.warning( @@ -2999,12 +3038,50 @@ async def run_edits() -> None: attempt=_auto_continued_count + 1, max_retries=ac_settings.max_retries, ) - notice = ( - "\u26a0\ufe0f Auto-continuing \u2014 " - "Claude stopped before processing tool results" - ) - if _auto_continued_count > 0: - notice += f" (attempt {_auto_continued_count + 1})" + + # #551 Tier 0: deliver outbox files from subprocess 1 BEFORE + # subprocess 2 spawns. Without this, any files the agent wrote + # to ``.untether-outbox/`` during the stuck-after-tool-results + # window are orphaned (subprocess 2 starts fresh and the + # original outbox is never scanned). ~3.6% silent loss observed + # on lba-1 before this fix. Failure to deliver must NOT block + # auto-continue itself \u2014 the recovery is more important than + # any single batch of files. + if cfg.send_file is not None and cfg.outbox_config is not None: + from .telegram.outbox_delivery import deliver_outbox_files + from .utils.paths import get_run_base_dir + + _run_root = get_run_base_dir() + if _run_root is not None: + _oc = cfg.outbox_config + try: + result = await deliver_outbox_files( + send_file=cfg.send_file, + channel_id=incoming.channel_id, + thread_id=incoming.thread_id, + reply_to_msg_id=user_ref.message_id, + run_root=_run_root, + outbox_dir=_oc.outbox_dir, + deny_globs=_oc.deny_globs, + max_download_bytes=_oc.max_download_bytes, + max_files=_oc.outbox_max_files, + cleanup=True, # subprocess 2 starts fresh + ) + logger.info( + "outbox.delivered_pre_auto_continue", + sent=len(result.sent), + skipped=len(result.skipped), + cleaned=result.cleaned, + ) + except Exception: # noqa: BLE001 + logger.warning( + "outbox.auto_continue_delivery_failed", exc_info=True + ) + + # #551 Tier 1: reworded notice signals recovery, not failure. + # The \ud83d\udd01 prefix distinguishes auto-resume from a fresh start + # and discourages users from /cancel-ing the salvage. + notice = _format_auto_continue_notice(_auto_continued_count) notice_msg = RenderedMessage(text=notice, extra={}) await cfg.transport.send( channel_id=incoming.channel_id, diff --git a/tests/test_exec_bridge.py b/tests/test_exec_bridge.py index 57d42b09..735e29fe 100644 --- a/tests/test_exec_bridge.py +++ b/tests/test_exec_bridge.py @@ -5746,3 +5746,97 @@ async def drive() -> None: # _real_pending was True (wakeup), so _expected_wait stays True even # though _post_result_limbo also went True. Auto-cancel does NOT fire. assert not cancel_event.is_set() + + +# --------------------------------------------------------------------------- +# #333 Task 4b — stall-suppression counter + session.summary integration +# --------------------------------------------------------------------------- + + +@pytest.mark.anyio +async def test_4b_bump_stall_suppression_records_counts() -> None: + """Task 4b: _bump_stall_suppression increments per-reason counters + on JsonlStreamState. Stream missing or counter dict missing must be + no-ops (defensive — the stall detector should never break on bookkeeping).""" + from untether.runner import JsonlStreamState + + transport = FakeTransport() + presenter = _KeyboardPresenter() + edits = _make_edits(transport, presenter, clock=_FakeClock(start=0.0)) + + # Stream is None initially -> no-op + edits.stream = None + edits._bump_stall_suppression("post_result") # must not raise + + # With a real stream, counts accumulate. + stream = JsonlStreamState(expected_session=None) + edits.stream = stream + edits._bump_stall_suppression("post_result") + edits._bump_stall_suppression("post_result") + edits._bump_stall_suppression("expected_wait") + edits._bump_stall_suppression("children_active") + + assert stream.stall_suppression_counts == { + "post_result": 2, + "expected_wait": 1, + "children_active": 1, + } + + +def test_551_auto_continue_notice_first_attempt() -> None: + """#551 Tier 1: first auto-continue (count=0) notice has 🔁 prefix and + no attempt suffix.""" + from untether.runner_bridge import _format_auto_continue_notice + + text = _format_auto_continue_notice(0) + assert text.startswith("\U0001f501 ") + assert "Auto-resuming" in text + assert "attempt" not in text # no suffix on first attempt + + +def test_551_auto_continue_notice_repeat_attempt() -> None: + """#551 Tier 1: repeat auto-continue (count=1+) shows attempt N+1.""" + from untether.runner_bridge import _format_auto_continue_notice + + text = _format_auto_continue_notice(1) + assert text.startswith("\U0001f501 ") + assert "(attempt 2)" in text + + +@pytest.mark.anyio +async def test_4b_stall_suppression_count_bumped_on_post_result() -> None: + """Task 4b: when the bridge stall detector takes the post-result + suppression branch, ``stall_suppression_counts['post_result']`` bumps.""" + from untether.runner import JsonlStreamState + + transport = FakeTransport() + presenter = _KeyboardPresenter() + clock = _FakeClock(start=1000.0) + edits = _make_edits(transport, presenter, clock=clock) + edits._stall_check_interval = 0.01 + edits._STALL_THRESHOLD_SECONDS = 0.05 + edits._STALL_THRESHOLD_TOOL = 0.05 + edits._STALL_THRESHOLD_APPROVAL = 10.0 + edits._stall_repeat_seconds = 1000.0 + edits._STALL_MAX_WARNINGS = 5 + edits._POST_RESULT_LIMBO_THRESHOLD_S = 600.0 + cancel_event = anyio.Event() + edits.cancel_event = cancel_event + + # post-result armed only 5 s ago — well within the limbo threshold. + stream = JsonlStreamState(expected_session=None) + stream.last_event_type = "result" + stream.engine_state = _make_engine_state(result_received_at=995.0) + edits.stream = stream + + async with anyio.create_task_group() as tg: + + async def drive() -> None: + clock.set(1005.0) + await anyio.sleep(0.2) + edits.signal_send.close() + + tg.start_soon(edits.run) + tg.start_soon(drive) + + assert stream.stall_suppression_counts.get("post_result", 0) >= 1 diff --git a/uv.lock b/uv.lock index 0bee39e7..99bb1742 100644 --- a/uv.lock +++ b/uv.lock @@ -2069,7 +2069,7 @@ wheels = [ [[package]] name = "untether" -version = "0.35.3rc17" +version = "0.35.3rc18" source = { editable = "." } dependencies = [ { name = "aiohttp" },