Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .serena/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/cache
/project.local.yml
154 changes: 154 additions & 0 deletions .serena/project.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# the name by which the project can be referenced within Serena
project_name: "twd-cli"


# list of languages for which language servers are started; choose from:
# al bash clojure cpp csharp
# csharp_omnisharp dart elixir elm erlang
# fortran fsharp go groovy haskell
# haxe java julia kotlin lua
# markdown
# matlab nix pascal perl php
# php_phpactor powershell python python_jedi r
# rego ruby ruby_solargraph rust scala
# swift terraform toml typescript typescript_vts
# vue yaml zig
# (This list may be outdated. For the current list, see values of Language enum here:
# https://github.com/oraios/serena/blob/main/src/solidlsp/ls_config.py
# For some languages, there are alternative language servers, e.g. csharp_omnisharp, ruby_solargraph.)
# Note:
# - For C, use cpp
# - For JavaScript, use typescript
# - For Free Pascal/Lazarus, use pascal
# Special requirements:
# Some languages require additional setup/installations.
# See here for details: https://oraios.github.io/serena/01-about/020_programming-languages.html#language-servers
# When using multiple languages, the first language server that supports a given file will be used for that file.
# The first language is the default language and the respective language server will be used as a fallback.
# Note that when using the JetBrains backend, language servers are not used and this list is correspondingly ignored.
languages:
- typescript

# the encoding used by text files in the project
# For a list of possible encodings, see https://docs.python.org/3.11/library/codecs.html#standard-encodings
encoding: "utf-8"

# line ending convention to use when writing source files.
# Possible values: unset (use global setting), "lf", "crlf", or "native" (platform default)
# This does not affect Serena's own files (e.g. memories and configuration files), which always use native line endings.
line_ending:

# The language backend to use for this project.
# If not set, the global setting from serena_config.yml is used.
# Valid values: LSP, JetBrains
# Note: the backend is fixed at startup. If a project with a different backend
# is activated post-init, an error will be returned.
language_backend:

# whether to use project's .gitignore files to ignore files
ignore_all_files_in_gitignore: true

# advanced configuration option allowing to configure language server-specific options.
# Maps the language key to the options.
# Have a look at the docstring of the constructors of the LS implementations within solidlsp (e.g., for C# or PHP) to see which options are available.
# No documentation on options means no options are available.
ls_specific_settings: {}

# list of additional paths to ignore in this project.
# Same syntax as gitignore, so you can use * and **.
# Note: global ignored_paths from serena_config.yml are also applied additively.
ignored_paths: []

# whether the project is in read-only mode
# If set to true, all editing tools will be disabled and attempts to use them will result in an error
# Added on 2025-04-18
read_only: false

# list of tool names to exclude.
# This extends the existing exclusions (e.g. from the global configuration)
#
# Below is the complete list of tools for convenience.
# To make sure you have the latest list of tools, and to view their descriptions,
# execute `uv run scripts/print_tool_overview.py`.
#
# * `activate_project`: Activates a project based on the project name or path.
# * `check_onboarding_performed`: Checks whether project onboarding was already performed.
# * `create_text_file`: Creates/overwrites a file in the project directory.
# * `delete_memory`: Delete a memory file. Should only happen if a user asks for it explicitly,
# for example by saying that the information retrieved from a memory file is no longer correct
# or no longer relevant for the project.
# * `edit_memory`: Replaces content matching a regular expression in a memory.
# * `execute_shell_command`: Executes a shell command.
# * `find_file`: Finds files in the given relative paths
# * `find_referencing_symbols`: Finds symbols that reference the given symbol using the language server backend
# * `find_symbol`: Performs a global (or local) search using the language server backend.
# * `get_current_config`: Prints the current configuration of the agent, including the active and available projects, tools, contexts, and modes.
# * `get_symbols_overview`: Gets an overview of the top-level symbols defined in a given file.
# * `initial_instructions`: Provides instructions Serena usage (i.e. the 'Serena Instructions Manual')
# for clients that do not read the initial instructions when the MCP server is connected.
# * `insert_after_symbol`: Inserts content after the end of the definition of a given symbol.
# * `insert_before_symbol`: Inserts content before the beginning of the definition of a given symbol.
# * `list_dir`: Lists files and directories in the given directory (optionally with recursion).
# * `list_memories`: List available memories. Any memory can be read using the `read_memory` tool.
# * `onboarding`: Performs onboarding (identifying the project structure and essential tasks, e.g. for testing or building).
# * `read_file`: Reads a file within the project directory.
# * `read_memory`: Read the content of a memory file. This tool should only be used if the information
# is relevant to the current task. You can infer whether the information
# is relevant from the memory file name.
# You should not read the same memory file multiple times in the same conversation.
# * `rename_memory`: Renames or moves a memory. Moving between project and global scope is supported
# (e.g., renaming "global/foo" to "bar" moves it from global to project scope).
# * `rename_symbol`: Renames a symbol throughout the codebase using language server refactoring capabilities.
# For JB, we use a separate tool.
# * `replace_content`: Replaces content in a file (optionally using regular expressions).
# * `replace_symbol_body`: Replaces the full definition of a symbol using the language server backend.
# * `safe_delete_symbol`:
# * `search_for_pattern`: Performs a search for a pattern in the project.
# * `write_memory`: Write some information (utf-8-encoded) about this project that can be useful for future tasks to a memory in md format.
# The memory name should be meaningful.
excluded_tools: []

# list of tools to include that would otherwise be disabled (particularly optional tools that are disabled by default).
# This extends the existing inclusions (e.g. from the global configuration).
included_optional_tools: []

# fixed set of tools to use as the base tool set (if non-empty), replacing Serena's default set of tools.
# This cannot be combined with non-empty excluded_tools or included_optional_tools.
fixed_tools: []

# list of mode names to that are always to be included in the set of active modes
# The full set of modes to be activated is base_modes + default_modes.
# If the setting is undefined, the base_modes from the global configuration (serena_config.yml) apply.
# Otherwise, this setting overrides the global configuration.
# Set this to [] to disable base modes for this project.
# Set this to a list of mode names to always include the respective modes for this project.
base_modes:

# list of mode names that are to be activated by default.
# The full set of modes to be activated is base_modes + default_modes.
# If the setting is undefined, the default_modes from the global configuration (serena_config.yml) apply.
# Otherwise, this overrides the setting from the global configuration (serena_config.yml).
# This setting can, in turn, be overridden by CLI parameters (--mode).
default_modes:

# initial prompt for the project. It will always be given to the LLM upon activating the project
# (contrary to the memories, which are loaded on demand).
initial_prompt: ""

# time budget (seconds) per tool call for the retrieval of additional symbol information
# such as docstrings or parameter information.
# This overrides the corresponding setting in the global configuration; see the documentation there.
# If null or missing, use the setting from the global configuration.
symbol_info_budget:

# list of regex patterns which, when matched, mark a memory entry as read‑only.
# Extends the list from the global configuration, merging the two lists.
read_only_memory_patterns: []

# list of regex patterns for memories to completely ignore.
# Matching memories will not appear in list_memories or activate_project output
# and cannot be accessed via read_memory or write_memory.
# To access ignored memory files, use the read_file tool on the raw file path.
# Extends the list from the global configuration, merging the two lists.
# Example: ["_archive/.*", "_episodes/.*"]
ignored_memory_patterns: []
4 changes: 3 additions & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ The codebase is a small ESM-only Node.js CLI with two core source files:

**`bin/twd-cli.js`** — CLI entry point. Parses `process.argv` for the `run` command, calls `runTests()`, and exits with code 0 (pass) or 1 (failure).

**`src/config.js`** — `loadConfig()` reads `twd.config.json` from `process.cwd()`, merges it with defaults (url, timeout, coverage, headless, puppeteerArgs), and returns the merged config. Falls back to defaults if the file is missing or unparseable.
**`src/config.js`** — `loadConfig()` reads `twd.config.json` from `process.cwd()`, merges it with defaults (url, timeout, coverage, headless, puppeteerArgs, retryCount, protocolTimeout), and returns the merged config. Falls back to defaults if the file is missing or unparseable.

`protocolTimeout` (default `300000`, 5 min) is passed to `puppeteer.launch` and bounds Puppeteer's CDP commands. It matters because the entire suite runs inside a single `page.evaluate` (`Runtime.callFunctionOn`), so Puppeteer's implicit 180000ms ceiling would abort long-but-passing suites with no per-test output. Raise it for slow CI; `0` means no timeout.

**`src/index.js`** — `runTests()` is the main orchestrator:
1. Loads config via `loadConfig()`
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ Create a `twd.config.json` file in your project root:
"nycOutputDir": "./.nyc_output",
"headless": true,
"puppeteerArgs": ["--no-sandbox", "--disable-setuid-sandbox"],
"retryCount": 2
"retryCount": 2,
"protocolTimeout": 300000
}
```

Expand All @@ -53,6 +54,7 @@ Create a `twd.config.json` file in your project root:
| `headless` | boolean | `true` | Run browser in headless mode |
| `puppeteerArgs` | string[] | `["--no-sandbox", "--disable-setuid-sandbox"]` | Additional Puppeteer launch arguments |
| `retryCount` | number | `2` | Number of attempts per test before reporting failure. Set to `1` to disable retries |
| `protocolTimeout` | number | `300000` | Puppeteer CDP `protocolTimeout` in ms (5 min). The whole suite runs inside one `page.evaluate`, so this bounds the **entire run** — raise it (e.g. `600000`) for slow CI; `0` means no timeout. Defaults above Puppeteer's implicit 180000ms ceiling |
| `contracts` | array | — | OpenAPI contract validation specs (see [Contract Validation](#contract-validation)) |
| `contractReportPath` | string | — | Path to write a markdown report for CI/PR integration |

Expand Down
94 changes: 94 additions & 0 deletions docs/spec-protocol-timeout.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Spec: Stop the full suite from dying at puppeteer's 180s `protocolTimeout`

## Problem

`twd-cli` runs the **entire** test suite inside a single `page.evaluate(...)` call
(`src/index.js:58`). Puppeteer maps every `page.evaluate` to one
`Runtime.callFunctionOn` CDP command, and that command is bound by
`protocolTimeout`, which defaults to **180000 ms (180s)**.

When the whole suite takes longer than 180s — common on slow CI runners even
when it passes locally — puppeteer aborts with:

```
Error running tests: ProtocolError: Runtime.callFunctionOn timed out.
Increase the 'protocolTimeout' setting in launch/connect calls for a higher timeout if needed.
```

This is a hard failure with **no per-test output** (the whole batch is lost), so
it looks like a crash rather than a slow suite. Observed in
holafly/web-checkout CI run 26503182725: page loaded at 09:40:46, timed out at
09:43:46 — exactly 180s — while the same suite finishes in ~130s locally.

The current `timeout` config field (default 10000) is **only** passed to
`page.waitForSelector` (`src/index.js:54`), not to puppeteer's protocol layer,
so there is no way to raise the ceiling today.

## Goal

Make the suite robust against the 180s protocol ceiling, and give a clearer
failure mode when a run is genuinely too slow.

## Scope

Two changes, independently shippable. Phase 1 unblocks immediately; Phase 2 is
the durable fix.

### Phase 1 — Configurable `protocolTimeout` (quick, low-risk)

1. Add `protocolTimeout` to `DEFAULT_CONFIG` in `src/config.js`. Default to a
safe value above the current implicit one, e.g. `300000` (5 min). Document
that `0` means "no timeout" (puppeteer treats `0` as unlimited).
2. Pass it through to `puppeteer.launch(...)` in `src/index.js:27`:
```js
browser = await puppeteer.launch({
headless: config.headless,
args: config.puppeteerArgs,
protocolTimeout: config.protocolTimeout,
});
```
3. Update `twd.config.example.json`, `README.md`, and `CLAUDE.md` to document
the new field and explain *why* it exists (whole-suite-in-one-evaluate).

**Acceptance:** a consumer can set `"protocolTimeout": 600000` in
`twd.config.json` and a 200s suite passes in CI.

### Phase 2 — Run tests incrementally, not in one `page.evaluate` (durable)

The deeper issue is that one slow run blows the whole batch and emits no
streamed output. Drive the in-browser `TestRunner` test-by-test (or in bounded
batches) from Node, so each `page.evaluate` is short and bounded by the
per-test `timeout`, not the suite total.

Sketch:
- Expose the test list from the browser first:
`const tests = await page.evaluate(() => window.__testRunner.list())`
(add a `list()`/`getTests()` accessor to the runner in `twd-js` if absent).
- Loop in Node, evaluating one test (or a batch of N) per call, collecting
`testStatus` incrementally and printing results as they complete (matches the
streamed output `twd-relay` already gives).
- Each call is now bounded by per-test time, so `protocolTimeout` only has to
cover the slowest single test/batch, not the whole suite.

Benefits: no whole-suite loss on timeout, live progress in CI logs, natural
place to enforce a real per-test timeout and to short-circuit on first failure
if desired.

**Acceptance:** CI logs show per-test pass/fail as the run progresses; a single
hanging test fails only that test (or that batch) instead of the entire run.

## Out of scope

- Parallelizing across multiple pages/browsers.
- Changing `retryCount` semantics.

## Notes for the implementer

- `src/index.js:58-82` is the single `page.evaluate` to refactor for Phase 2.
- `src/config.js` `DEFAULT_CONFIG` (lines 4-13) is where the new field lands.
- Keep Phase 1 even after Phase 2 ships — a generous `protocolTimeout` is still
a sane safety net for the per-batch calls.
- Tests for this repo live under `tests/`; add config-merge coverage for the new
field and, for Phase 2, a fixture app with an artificially slow test.
</content>
</invoke>
1 change: 1 addition & 0 deletions src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const DEFAULT_CONFIG = {
headless: true,
puppeteerArgs: ['--no-sandbox', '--disable-setuid-sandbox'],
retryCount: 2,
protocolTimeout: 300000,
};

export function loadConfig() {
Expand Down
16 changes: 16 additions & 0 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ import { generateContractMarkdown } from './contractMarkdown.js';
import { buildTestPath } from './buildTestPath.js';
import { formatTestSummary, formatFailedTestsBlock } from './testSummary.js';

function isProtocolTimeout(error) {
const message = error && error.message ? error.message : '';
return (
(error && error.name === 'ProtocolError' && /timed out/i.test(message)) ||
/protocolTimeout/i.test(message)
);
}

export async function runTests() {
let browser;
try {
Expand All @@ -27,6 +35,7 @@ export async function runTests() {
browser = await puppeteer.launch({
headless: config.headless,
args: config.puppeteerArgs,
protocolTimeout: config.protocolTimeout,
});

const page = await browser.newPage();
Expand Down Expand Up @@ -173,6 +182,13 @@ export async function runTests() {

} catch (error) {
console.error('Error running tests:', error);
if (isProtocolTimeout(error)) {
console.error(
'\nThis looks like a Puppeteer protocolTimeout. The whole test suite runs in a single\n' +
'page.evaluate call, so the run aborts if it takes longer than "protocolTimeout" (ms).\n' +
'Raise it in twd.config.json, e.g. { "protocolTimeout": 600000 } (0 = no timeout).'
);
}
if (browser) await browser.close();
throw error;
}
Expand Down
13 changes: 13 additions & 0 deletions tests/config.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ describe('loadConfig', () => {
headless: true,
puppeteerArgs: ['--no-sandbox', '--disable-setuid-sandbox'],
retryCount: 2,
protocolTimeout: 300000,
});
expect(fs.existsSync).toHaveBeenCalledWith(path.resolve(mockCwd, 'twd.config.json'));
});
Expand All @@ -56,6 +57,7 @@ describe('loadConfig', () => {
headless: true,
puppeteerArgs: ['--no-sandbox', '--disable-setuid-sandbox'],
retryCount: 2,
protocolTimeout: 300000,
});
expect(fs.readFileSync).toHaveBeenCalledWith(
path.resolve(mockCwd, 'twd.config.json'),
Expand All @@ -73,6 +75,7 @@ describe('loadConfig', () => {
headless: false,
puppeteerArgs: ['--disable-dev-shm-usage'],
retryCount: 3,
protocolTimeout: 600000,
};

vi.mocked(fs.existsSync).mockReturnValue(true);
Expand Down Expand Up @@ -100,6 +103,7 @@ describe('loadConfig', () => {
headless: true,
puppeteerArgs: ['--no-sandbox', '--disable-setuid-sandbox'],
retryCount: 2,
protocolTimeout: 300000,
});
expect(consoleWarnSpy).toHaveBeenCalledWith(
expect.stringContaining('Warning: Could not parse twd.config.json'),
Expand All @@ -123,6 +127,15 @@ describe('loadConfig', () => {
expect(config.url).toBe('http://localhost:5173');
});

it('should default protocolTimeout and allow user override', () => {
vi.mocked(fs.existsSync).mockReturnValue(false);
expect(loadConfig().protocolTimeout).toBe(300000);

vi.mocked(fs.existsSync).mockReturnValue(true);
vi.mocked(fs.readFileSync).mockReturnValue(JSON.stringify({ protocolTimeout: 0 }));
expect(loadConfig().protocolTimeout).toBe(0);
});

it('should handle partial user config correctly', () => {
const userConfig = {
headless: false,
Expand Down
Loading
Loading