From 0cbfac01102425c9070c08f321e0850bf9973e32 Mon Sep 17 00:00:00 2001 From: chr1syy Date: Fri, 26 Jun 2026 16:04:10 +0200 Subject: [PATCH 1/4] feat(kernel): render markdown tables as aligned fenced ASCII MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent responses frequently contain GitHub-flavored markdown tables, which no chat platform renders — Discord has no table syntax and Slack mrkdwn has none, so they arrive as raw `| a | b |` pipes. Normalize them in one provider-agnostic place in the kernel: - `src/core/renderTables.ts`: detect GFM table blocks and re-emit each as a width-aligned ASCII table wrapped in a code fence (every target platform renders fences in fixed-width font, so columns line up). Honors `:--`/`--:`/ `:--:` alignment, normalizes ragged rows, unescapes `\|`, truncates cells past MAX_TABLE_WIDTH with an ellipsis, and leaves already-fenced tables untouched. - `src/core/splitMessage.ts`: make splitting fence-aware so a code block split across the length limit is closed on one chunk and re-opened on the next, preserving the original fence-free behavior exactly. - Wire `renderTables` into the queue just before split/send, so Discord, Slack, and every future provider benefit without touching adapter code. Docs: document the behavior in architecture.md (Output rendering) with one-line pointers from discord.md and slack.md. Tests: 10 new renderTables cases plus fence-aware splitMessage cases; full suite 245 passing. Co-Authored-By: Claude Opus 4.8 --- docs/architecture.md | 26 +++- docs/discord.md | 1 + docs/slack.md | 1 + src/__tests__/renderTables.test.ts | 101 +++++++++++++++ src/__tests__/splitMessage.test.ts | 23 ++++ src/core/queue.ts | 3 +- src/core/renderTables.ts | 200 +++++++++++++++++++++++++++++ src/core/splitMessage.ts | 55 +++++++- 8 files changed, 405 insertions(+), 5 deletions(-) create mode 100644 src/__tests__/renderTables.test.ts create mode 100644 src/core/renderTables.ts diff --git a/docs/architecture.md b/docs/architecture.md index 243cee9..5483b86 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -29,7 +29,7 @@ The kernel speaks only in `IncomingMessage` / `OutgoingMessage` / `ChannelTarget - Resolves the conversation via `provider.resolveConversation` (returns `{agentId, sessionId, readOnly, persistSession}`) - Downloads any attachments to the agent's `cwd` - Calls `maestro.send(agentId, content, sessionId, readOnly)` - - Splits the response and calls `provider.send(target, {text})` for each part + - Normalizes markdown tables in the response (see **Output rendering** below), splits it, and calls `provider.send(target, {text})` for each part - Posts a usage footer: tokens, cost, context % - Persists the maestro session id on the first response via `conv.persistSession` 5. Errors are logged to `logs/errors.log` and surfaced as a `⚠️` reply in the channel. @@ -42,6 +42,28 @@ Each thread is bound to the user who created it (via mention or `/session new`). - Messages from other users are silently ignored — no error reply, no forwarding. - This prevents cross-talk and keeps each conversation scoped to one user. +## Output rendering + +Agent responses often contain GitHub-flavored markdown tables, which no chat +platform renders (Discord has no table syntax; Slack's *mrkdwn* has none) — they +arrive as raw `| a | b |` pipes. The kernel normalizes them in one +provider-agnostic place so every provider benefits: + +- `src/core/renderTables.ts` (`renderTables`) detects GFM table blocks and + re-emits each as a width-aligned ASCII table wrapped in a ``` code fence, + which every target platform renders in a fixed-width font so columns line up. + Column alignment markers (`:--`, `--:`, `:--:`) are honored. Tables wider than + `MAX_TABLE_WIDTH` have their widest columns truncated with an ellipsis (`…`). + Tables already inside a code fence are left untouched. +- `src/core/splitMessage.ts` is fence-aware: when a long response is split across + the per-message length limit, an open code fence is closed on one chunk and + re-opened on the next, so a rendered table never loses its monospace block. + +Both run in the queue just before `provider.send`, so providers receive +already-normalized text. Caveat: inline markdown inside a cell (bold, links) +renders literally, since the cell now lives inside a code fence — alignment is +the deliberate tradeoff. + ## Read-only mode `/agents readonly on` puts an agent channel into read-only mode. In this mode the bridge relays messages from the agent (via the HTTP API) but does **not** forward user messages to the agent. Use `/agents readonly off` to resume normal two-way messaging. @@ -68,6 +90,8 @@ The schema upgrades on first start: legacy `agent_channels` (single-PK `channel_ | `src/core/maestro.ts` | `maestro-cli` wrapper | | `src/core/transcription.ts` | ffmpeg + whisper pipeline | | `src/core/attachments.ts` | Provider-agnostic attachment download | +| `src/core/renderTables.ts` | GFM tables → fenced, aligned ASCII tables | +| `src/core/splitMessage.ts` | Fence-aware message splitting to the length limit | | `src/core/errors.ts` | Typed errors (`RateLimitError`, `AgentNotFoundError`) | | `src/providers/discord/adapter.ts` | DiscordProvider implementing BridgeProvider | | `src/providers/discord/messageCreate.ts` | Discord message → IncomingMessage | diff --git a/docs/discord.md b/docs/discord.md index eef0a1b..b94faf6 100644 --- a/docs/discord.md +++ b/docs/discord.md @@ -85,6 +85,7 @@ npm run deploy-commands - **Read-only mode** via `/agents readonly on` lets the bridge POST agent updates to the channel (via the HTTP API) without forwarding user messages back. Toggle off with `/agents readonly off`. - **Reactions**: `⏳` while a message is queued, `🎧` while a voice message is being transcribed. - **Usage stats** are appended below each agent reply (tokens, cost, context %). +- **Markdown tables** in agent replies are rendered as aligned, fenced ASCII tables so they display correctly (Discord has no native table syntax). See [architecture.md → Output rendering](architecture.md#output-rendering). ## Security diff --git a/docs/slack.md b/docs/slack.md index 31978c4..6abcc26 100644 --- a/docs/slack.md +++ b/docs/slack.md @@ -69,6 +69,7 @@ The Slack provider deliberately ships a smaller command surface than Discord — - **Reactions**: `⏳` (`hourglass_flowing_sand`) while a message is queued. The Slack API requires emoji *names*, not Unicode characters; the adapter maps `⏳ 🎧 ✅ ❌` to the corresponding Slack names — pass any of them to `provider.react()` and the mapping happens automatically. - **Typing indicator**: not exposed by Slack's Web API; `sendTyping` is a no-op on this provider. - **Usage stats** are appended below each agent reply (tokens, cost, context %). +- **Markdown tables** in agent replies are rendered as aligned, fenced ASCII tables so they display correctly (Slack mrkdwn has no native table syntax). See [architecture.md → Output rendering](architecture.md#output-rendering). - **Channel naming**: agent channels are named `maestro--`, where `id-prefix` is the first 8 alphanumeric characters of the agent ID. The agent ID makes the name unique even when two different agents normalize to the same display name. The whole result is capped at 80 characters. Both `/agents new` and the HTTP-API auto-create path (`POST /api/send`) use the same helper. If the channel already exists but is archived, the adapter unarchives it; if unarchive fails, it falls back to creating a fresh channel with a `-` suffix appended to the base name. ## Storage diff --git a/src/__tests__/renderTables.test.ts b/src/__tests__/renderTables.test.ts new file mode 100644 index 0000000..a74d642 --- /dev/null +++ b/src/__tests__/renderTables.test.ts @@ -0,0 +1,101 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { renderTables, MAX_TABLE_WIDTH } from '../core/renderTables'; + +test('renderTables converts a basic table to a fenced ASCII table', () => { + const input = ['| Name | Qty |', '| --- | --- |', '| widget | 12 |', '| gadget | 3 |'].join( + '\n', + ); + const out = renderTables(input); + + assert.ok(out.startsWith('```\n'), 'wrapped in an opening fence'); + assert.ok(out.endsWith('\n```'), 'wrapped in a closing fence'); + assert.ok(out.includes('+--------+-----+'), 'has aligned rule lines'); + assert.ok(out.includes('| Name | Qty |'), 'header is padded to column width'); + assert.ok(out.includes('| widget | 12 |'), 'body cells are padded to column width'); +}); + +test('renderTables honors column alignment markers', () => { + const input = ['| L | R | C |', '| :-- | --: | :-: |', '| a | b | c |'].join('\n'); + const out = renderTables(input); + // Column widths are 1; with a width-1 column alignment is a no-op, so widen. + const wide = ['| Left | Right | Mid |', '| :-- | --: | :-: |', '| a | b | c |'].join('\n'); + const w = renderTables(wide); + + assert.ok(out.includes('| a | b | c |')); + assert.ok( + w.includes('| a | b | c |'), + 'left col left-aligned, right col right-aligned, middle centered', + ); +}); + +test('renderTables normalizes ragged rows to the header column count', () => { + const input = ['| A | B | C |', '| - | - | - |', '| 1 |', '| 1 | 2 | 3 | 4 |'].join('\n'); + const out = renderTables(input); + + // Short row is padded with empty cells; long row is truncated to 3 columns. + assert.ok(out.includes('| 1 | | |')); + assert.ok(out.includes('| 1 | 2 | 3 |')); + assert.ok(!out.includes('| 4 |')); +}); + +test('renderTables unescapes \\| inside cells', () => { + const input = ['| Expr |', '| --- |', '| a \\| b |'].join('\n'); + const out = renderTables(input); + assert.ok(out.includes('a | b')); +}); + +test('renderTables handles tables without outer pipes', () => { + const input = ['Name | Qty', '--- | ---', 'widget | 12'].join('\n'); + const out = renderTables(input); + assert.ok(out.includes('| widget |')); + assert.ok(out.includes('| Name | Qty |')); +}); + +test('renderTables leaves non-table text untouched', () => { + const input = 'Just a sentence.\nAnother line with a | pipe but no table.'; + assert.equal(renderTables(input), input); +}); + +test('renderTables preserves surrounding prose', () => { + const input = ['Here are results:', '', '| A | B |', '| - | - |', '| 1 | 2 |', '', 'Done.'].join( + '\n', + ); + const out = renderTables(input); + assert.ok(out.startsWith('Here are results:\n\n```')); + assert.ok(out.endsWith('```\n\nDone.')); +}); + +test('renderTables does not touch a table already inside a code fence', () => { + const input = ['```', '| A | B |', '| - | - |', '| 1 | 2 |', '```'].join('\n'); + assert.equal(renderTables(input), input); +}); + +test('renderTables converts multiple tables in one message', () => { + const input = [ + '| A | B |', + '| - | - |', + '| 1 | 2 |', + '', + 'between', + '', + '| C | D |', + '| - | - |', + '| 3 | 4 |', + ].join('\n'); + const out = renderTables(input); + const fences = out.split('```').length - 1; + assert.equal(fences, 4, 'two tables → two open + two close fences'); + assert.ok(out.includes('between')); +}); + +test('renderTables truncates wide cells with an ellipsis under the width cap', () => { + const long = 'x'.repeat(120); + const input = ['| Col |', '| --- |', `| ${long} |`].join('\n'); + const out = renderTables(input); + + assert.ok(out.includes('…'), 'truncation marker present'); + // No rendered content row should exceed the cap budget plus borders/padding. + const longestRow = Math.max(...out.split('\n').map((l) => l.length)); + assert.ok(longestRow <= MAX_TABLE_WIDTH + 4, `row width ${longestRow} within cap`); +}); diff --git a/src/__tests__/splitMessage.test.ts b/src/__tests__/splitMessage.test.ts index ff228b1..5b47faf 100644 --- a/src/__tests__/splitMessage.test.ts +++ b/src/__tests__/splitMessage.test.ts @@ -30,3 +30,26 @@ test('splitMessage hard-splits and trims leading whitespace', () => { assert.equal(parts[1], 'y'); assert.ok(parts.every((part) => part.length <= MAX_LENGTH)); }); + +test('splitMessage re-fences a code block split across a boundary', () => { + const before = 'p'.repeat(1500); + const code = 'c'.repeat(1500); + const input = `${before}\n\`\`\`\n${code}\n\`\`\``; + const parts = splitMessage(input); + + assert.ok(parts.length >= 2, 'message was split'); + // Every part must contain a balanced number of fence delimiters. + for (const part of parts) { + const fences = part.split('```').length - 1; + assert.equal(fences % 2, 0, `part has balanced fences: ${fences}`); + } + // The fenced content survives across the boundary. + assert.ok(parts.some((p) => p.includes(code.slice(0, 100)))); +}); + +test('splitMessage leaves fence-free splits unchanged', () => { + const left = 'a'.repeat(1000); + const right = 'b'.repeat(1200); + const parts = splitMessage(`${left}\n${right}`); + assert.deepEqual(parts, [left, right]); +}); diff --git a/src/core/queue.ts b/src/core/queue.ts index 50adc51..0003991 100644 --- a/src/core/queue.ts +++ b/src/core/queue.ts @@ -7,6 +7,7 @@ import type { ReactionHandle, } from './types'; import { splitMessage as defaultSplitMessage } from './splitMessage'; +import { renderTables } from './renderTables'; import { downloadAttachments as defaultDownload, formatAttachmentRefs } from './attachments'; interface QueueEntry { @@ -191,7 +192,7 @@ export function createQueue(deps: QueueDeps) { `agent=${conv.agentId} session=${conv.sessionId ?? 'new'} channel=${message.channelId} error=${result.error}`, ); } - const parts = split(result.response); + const parts = split(renderTables(result.response)); for (const part of parts) { await provider.send(target, { text: part }); } diff --git a/src/core/renderTables.ts b/src/core/renderTables.ts new file mode 100644 index 0000000..e3cc98b --- /dev/null +++ b/src/core/renderTables.ts @@ -0,0 +1,200 @@ +/** + * Provider-agnostic GitHub-flavored-markdown table normalization. + * + * Chat platforms (Discord, Slack mrkdwn, ...) don't render markdown tables — + * they show the raw `| a | b |` pipes. Every platform we target does render + * triple-backtick code blocks in a fixed-width font, so we detect GFM tables + * and re-emit them as width-aligned ASCII tables wrapped in a code fence. The + * columns then line up on every client. + * + * This module is pure and provider-free by design (see CLAUDE.md): adapters + * and the kernel call `renderTables` on outbound agent text; nothing here + * imports a chat SDK. + */ + +/** Max combined content width (sum of column widths) before we truncate cells. */ +export const MAX_TABLE_WIDTH = 56; + +/** Minimum width a column may be shrunk to (room for one char + the ellipsis). */ +const MIN_COL_WIDTH = 3; + +type Align = 'left' | 'right' | 'center'; + +/** A fenced-code delimiter line: ``` or ~~~ (optionally indented, optional info string). */ +function isFenceDelimiter(line: string): boolean { + return /^\s*(```|~~~)/.test(line); +} + +/** + * A GFM separator row, e.g. `|---|:--:|` or `:-- | --:`. + * A pipe is required: without one, `---` is a thematic break, not a table. + */ +function isSeparator(line: string): boolean { + const s = line.trim(); + if (!s.includes('|') || !s.includes('-')) return false; + return /^\|?\s*:?-+:?\s*(\|\s*:?-+:?\s*)*\|?$/.test(s); +} + +/** A candidate table row: contains a pipe and isn't a fence delimiter. */ +function isTableRow(line: string): boolean { + return line.includes('|') && line.trim().length > 0 && !isFenceDelimiter(line); +} + +/** Split a markdown table row into trimmed cells, honoring `\|` escapes and optional outer pipes. */ +function splitCells(row: string): string[] { + const s = row.trim(); + const cells: string[] = []; + let cur = ''; + for (let i = 0; i < s.length; i++) { + const ch = s[i]; + if (ch === '\\' && s[i + 1] === '|') { + cur += '|'; + i++; + continue; + } + if (ch === '|') { + cells.push(cur); + cur = ''; + continue; + } + cur += ch; + } + cells.push(cur); + // Outer pipes produce empty leading/trailing cells — drop a single one of each. + if (cells.length > 1 && cells[0].trim() === '') cells.shift(); + if (cells.length > 1 && cells[cells.length - 1].trim() === '') cells.pop(); + return cells.map((c) => c.trim()); +} + +function parseAligns(separator: string, columns: number): Align[] { + const cells = splitCells(separator); + const aligns: Align[] = []; + for (let i = 0; i < columns; i++) { + const spec = cells[i] ?? ''; + const left = spec.startsWith(':'); + const right = spec.endsWith(':'); + if (left && right) aligns.push('center'); + else if (right) aligns.push('right'); + else aligns.push('left'); + } + return aligns; +} + +/** Shrink the widest columns until the total content width fits the cap. */ +function capWidths(widths: number[], cap: number): number[] { + const w = [...widths]; + const sum = () => w.reduce((a, b) => a + b, 0); + while (sum() > cap) { + let idx = 0; + for (let i = 1; i < w.length; i++) if (w[i] > w[idx]) idx = i; + if (w[idx] <= MIN_COL_WIDTH) break; // can't shrink further without losing all signal + w[idx]--; + } + return w; +} + +/** Truncate a cell to `width`, marking the cut with an ellipsis. */ +function fit(s: string, width: number): string { + if (s.length <= width) return s; + if (width <= 1) return s.slice(0, width); + return s.slice(0, width - 1) + '…'; +} + +function pad(s: string, width: number, align: Align): string { + const space = width - s.length; + if (space <= 0) return s; + if (align === 'right') return ' '.repeat(space) + s; + if (align === 'center') { + const l = Math.floor(space / 2); + return ' '.repeat(l) + s + ' '.repeat(space - l); + } + return s + ' '.repeat(space); +} + +function rule(widths: number[]): string { + return '+' + widths.map((w) => '-'.repeat(w + 2)).join('+') + '+'; +} + +function renderRow(cells: string[], widths: number[], aligns: Align[]): string { + return ( + '|' + + widths + .map((w, i) => ' ' + pad(fit(cells[i] ?? '', w), w, aligns[i]) + ' ') + .join('|') + + '|' + ); +} + +/** Render one parsed table (header + body rows) as a fenced ASCII table. */ +function renderTable(headerLine: string, separatorLine: string, bodyLines: string[]): string { + const header = splitCells(headerLine); + const columns = header.length; + const aligns = parseAligns(separatorLine, columns); + const body = bodyLines.map((l) => { + const cells = splitCells(l); + // Normalize each row to the header's column count. + return Array.from({ length: columns }, (_, i) => cells[i] ?? ''); + }); + + let widths = Array.from({ length: columns }, (_, i) => + Math.max(header[i].length, ...body.map((r) => r[i].length), 1), + ); + widths = capWidths(widths, MAX_TABLE_WIDTH); + + const lines: string[] = []; + lines.push(rule(widths)); + lines.push(renderRow(header, widths, aligns)); + lines.push(rule(widths)); + for (const row of body) lines.push(renderRow(row, widths, aligns)); + lines.push(rule(widths)); + + return '```\n' + lines.join('\n') + '\n```'; +} + +/** + * Replace every GFM table block in `text` with a fenced ASCII rendering. + * Non-table text is returned unchanged; tables already inside a code fence + * are left alone (the agent fenced them deliberately). + */ +export function renderTables(text: string): string { + if (!text.includes('|')) return text; // fast path: no pipes, no tables + const lines = text.split('\n'); + const out: string[] = []; + let inFence = false; + let i = 0; + + while (i < lines.length) { + const line = lines[i]; + + if (isFenceDelimiter(line)) { + inFence = !inFence; + out.push(line); + i++; + continue; + } + + if ( + !inFence && + isTableRow(line) && + i + 1 < lines.length && + isSeparator(lines[i + 1]) + ) { + const header = line; + const separator = lines[i + 1]; + const body: string[] = []; + let j = i + 2; + while (j < lines.length && isTableRow(lines[j]) && !isSeparator(lines[j])) { + body.push(lines[j]); + j++; + } + out.push(renderTable(header, separator, body)); + i = j; + continue; + } + + out.push(line); + i++; + } + + return out.join('\n'); +} diff --git a/src/core/splitMessage.ts b/src/core/splitMessage.ts index 6d6cba4..a8ae5f4 100644 --- a/src/core/splitMessage.ts +++ b/src/core/splitMessage.ts @@ -1,10 +1,10 @@ export const DEFAULT_MAX_LENGTH = 1990; /** - * Split a string into chunks that fit within `maxLength`. - * Tries to split on newlines when possible to preserve formatting. + * Greedy newline-preserving split (the original behavior). Splits on the last + * newline under `maxLength`, hard-splitting only when a single line is too long. */ -export function splitMessage(text: string, maxLength: number = DEFAULT_MAX_LENGTH): string[] { +function rawSplit(text: string, maxLength: number): string[] { if (text.length <= maxLength) return [text]; const parts: string[] = []; @@ -21,3 +21,52 @@ export function splitMessage(text: string, maxLength: number = DEFAULT_MAX_LENGT if (remaining.length > 0) parts.push(remaining); return parts; } + +/** The open fence marker (``` or ~~~) left dangling at the end of `chunk`, or null if balanced. */ +function danglingFence(chunk: string): string | null { + let open: string | null = null; + for (const line of chunk.split('\n')) { + const m = line.match(/^\s*(`{3,}|~{3,})/); + if (!m) continue; + const marker = m[1][0].repeat(3); // normalize to a 3-char marker + if (open === null) open = marker; + else if (open[0] === marker[0]) open = null; // same fence char closes the block + } + return open; +} + +/** + * Re-balance code fences across chunk boundaries: when a chunk ends inside a + * fenced block, close the fence on that chunk and re-open it on the next, so a + * code block (e.g. a rendered table) never loses its monospace rendering when + * a long message is split. Adds at most a 4-char fence line per affected chunk, + * which stays within the 2000-char platform ceiling given DEFAULT_MAX_LENGTH. + */ +function repairFences(parts: string[]): string[] { + const out: string[] = []; + let carry: string | null = null; // fence to re-open at the start of the next chunk + + for (let part of parts) { + if (carry) part = carry + '\n' + part; + const open = danglingFence(part); + if (open) { + part = part + '\n' + open; // close the still-open fence + carry = open; + } else { + carry = null; + } + out.push(part); + } + return out; +} + +/** + * Split a string into chunks that fit within `maxLength`. + * Splits on newlines when possible to preserve formatting, and keeps fenced + * code blocks intact by re-fencing across chunk boundaries. + */ +export function splitMessage(text: string, maxLength: number = DEFAULT_MAX_LENGTH): string[] { + const parts = rawSplit(text, maxLength); + if (parts.length <= 1) return parts; + return repairFences(parts); +} From 3a81e57b3b244ee27278a6cecf368a22144232c3 Mon Sep 17 00:00:00 2001 From: chr1syy Date: Fri, 26 Jun 2026 16:24:09 +0200 Subject: [PATCH 2/4] fix(kernel): address Codex review on table rendering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves the two blocking findings plus the non-blocking/nit items from the Discord Maestro Codex review of PR #52: - renderTables: a body row that looks like a separator (e.g. `| --- |`) no longer terminates the table — it is valid table data. The body scan now ends only at the first non-row line, fixing silent data loss where trailing rows leaked out as raw markdown. - Fence handling is now CommonMark-correct via a new shared `src/core/fences.ts` (parseFenceLine/closesFence/danglingFence): fences longer than three chars are tracked by exact char+length, so a ``` line inside a ```` block is treated as content, not a close. Both renderTables and splitMessage use it. - renderTables now requires the separator's column count to match the header's, avoiding false-positive conversion of table-ish prose. - splitMessage reserves a small budget when fences are present so re-fencing never exceeds a caller-supplied maxLength; preserves the original fence marker length and info string (language) when re-opening a split block. Tests: +5 cases (dash-row data-loss regression, inner-``` inside ```` block, column-count mismatch, custom maxLength, long-fence split). Full suite 250 passing; build clean. Co-Authored-By: Claude Opus 4.8 --- .gitignore | 3 ++ src/__tests__/renderTables.test.ts | 24 ++++++++++++ src/__tests__/splitMessage.test.ts | 27 ++++++++++++++ src/core/fences.ts | 59 ++++++++++++++++++++++++++++++ src/core/renderTables.ts | 32 ++++++++++------ src/core/splitMessage.ts | 56 ++++++++++++++++------------ 6 files changed, 166 insertions(+), 35 deletions(-) create mode 100644 src/core/fences.ts diff --git a/.gitignore b/.gitignore index b46f021..80b25c5 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ logs/ Auto Run Docs/ models .maestro/ + +# local PR-review worktree (not part of the project) +.pr52-review/ diff --git a/src/__tests__/renderTables.test.ts b/src/__tests__/renderTables.test.ts index a74d642..26d7da8 100644 --- a/src/__tests__/renderTables.test.ts +++ b/src/__tests__/renderTables.test.ts @@ -89,6 +89,30 @@ test('renderTables converts multiple tables in one message', () => { assert.ok(out.includes('between')); }); +test('renderTables keeps body rows that look like a separator (regression: data loss)', () => { + // A dash-only data row must not terminate the table early. + const input = ['| A | B |', '| --- | --- |', '| --- | --- |', '| 1 | 2 |'].join('\n'); + const out = renderTables(input); + + // All three body rows survive inside one fenced table; nothing leaks as raw markdown. + assert.equal(out.split('```').length - 1, 2, 'exactly one fenced block'); + assert.ok(out.includes('| --- | --- |'), 'dash row rendered as data'); + assert.ok(out.includes('| 1 | 2 |'), 'trailing row survives'); +}); + +test('renderTables ignores a table-like block inside a longer (4-backtick) fence', () => { + const input = ['````', '| A | B |', '| - | - |', '| 1 | 2 |', '```', 'still inside', '````'].join( + '\n', + ); + // The inner ``` does not close the ```` block, so the table stays untouched. + assert.equal(renderTables(input), input); +}); + +test('renderTables does not convert header/separator column-count mismatches', () => { + const input = ['a | b | c', '--- | ---', '1 | 2'].join('\n'); + assert.equal(renderTables(input), input); +}); + test('renderTables truncates wide cells with an ellipsis under the width cap', () => { const long = 'x'.repeat(120); const input = ['| Col |', '| --- |', `| ${long} |`].join('\n'); diff --git a/src/__tests__/splitMessage.test.ts b/src/__tests__/splitMessage.test.ts index 5b47faf..a0373b5 100644 --- a/src/__tests__/splitMessage.test.ts +++ b/src/__tests__/splitMessage.test.ts @@ -53,3 +53,30 @@ test('splitMessage leaves fence-free splits unchanged', () => { const parts = splitMessage(`${left}\n${right}`); assert.deepEqual(parts, [left, right]); }); + +test('splitMessage treats an inner ``` inside a 4-backtick block as content', () => { + const before = 'p'.repeat(1500); + const inner = 'c'.repeat(800) + '\n```\n' + 'd'.repeat(800); // a literal ``` line inside + const input = `${before}\n\`\`\`\`\n${inner}\n\`\`\`\``; + const parts = splitMessage(input); + + assert.ok(parts.length >= 2, 'message was split'); + // Each chunk must have balanced 4-backtick fences; the inner ``` must not be + // mistaken for a close/open, which would corrupt the block. + for (const part of parts) { + const four = part.split('````').length - 1; + assert.equal(four % 2, 0, `balanced 4-backtick fences: ${four}`); + } +}); + +test('splitMessage honors a custom maxLength even when re-fencing', () => { + const code = 'c'.repeat(120); + const input = `\`\`\`\n${code}\n\`\`\``; + const max = 40; + const parts = splitMessage(input, max); + + assert.ok(parts.length > 1, 'split into multiple chunks'); + for (const part of parts) { + assert.ok(part.length <= max, `chunk length ${part.length} <= ${max}`); + } +}); diff --git a/src/core/fences.ts b/src/core/fences.ts new file mode 100644 index 0000000..36e48c6 --- /dev/null +++ b/src/core/fences.ts @@ -0,0 +1,59 @@ +/** + * Minimal, CommonMark-aware fenced-code-block tracking, shared by + * `renderTables` (skip tables inside fences) and `splitMessage` (re-fence + * across chunk boundaries). + * + * The important subtlety: fences may be longer than three characters (e.g. a + * ```` ```` block deliberately wraps content that itself contains ```), and a + * closing fence must use the *same character*, be *at least as long* as the + * opener, and carry *no info string*. Collapsing every marker to a 3-char token + * would mistake an inner ``` line for a close. + */ + +export interface Fence { + char: '`' | '~'; + len: number; + /** The info string (e.g. language) that follows an opening fence; '' for a bare/closing fence. */ + info: string; +} + +/** Parse a line as a fenced-code delimiter, or return null if it isn't one. */ +export function parseFenceLine(line: string): Fence | null { + const m = line.match(/^\s*(`{3,}|~{3,})\s*(.*)$/); + if (!m) return null; + return { char: m[1][0] as '`' | '~', len: m[1].length, info: m[2].trim() }; +} + +/** Whether `fence` can close an open block started by `open`. */ +export function closesFence(open: Fence, fence: Fence): boolean { + return fence.char === open.char && fence.len >= open.len && fence.info === ''; +} + +/** The literal line that re-opens a block, preserving the original info string. */ +export function openLine(f: Fence): string { + return f.char.repeat(f.len) + (f.info ? f.info : ''); +} + +/** The literal line that closes a block (bare marker, no info string). */ +export function closeLine(f: Fence): string { + return f.char.repeat(f.len); +} + +/** + * Scan `text` and return the fence left open at the end, or null if balanced. + * A fence-looking line that can't close the current block is treated as content. + */ +export function danglingFence(text: string): Fence | null { + let open: Fence | null = null; + for (const line of text.split('\n')) { + const fence = parseFenceLine(line); + if (!fence) continue; + if (open) { + if (closesFence(open, fence)) open = null; + // else: shorter/different/info-bearing fence inside the block → content + } else { + open = fence; + } + } + return open; +} diff --git a/src/core/renderTables.ts b/src/core/renderTables.ts index e3cc98b..d7be754 100644 --- a/src/core/renderTables.ts +++ b/src/core/renderTables.ts @@ -12,6 +12,8 @@ * imports a chat SDK. */ +import { parseFenceLine, closesFence, type Fence } from './fences'; + /** Max combined content width (sum of column widths) before we truncate cells. */ export const MAX_TABLE_WIDTH = 56; @@ -20,11 +22,6 @@ const MIN_COL_WIDTH = 3; type Align = 'left' | 'right' | 'center'; -/** A fenced-code delimiter line: ``` or ~~~ (optionally indented, optional info string). */ -function isFenceDelimiter(line: string): boolean { - return /^\s*(```|~~~)/.test(line); -} - /** * A GFM separator row, e.g. `|---|:--:|` or `:-- | --:`. * A pipe is required: without one, `---` is a thematic break, not a table. @@ -37,7 +34,7 @@ function isSeparator(line: string): boolean { /** A candidate table row: contains a pipe and isn't a fence delimiter. */ function isTableRow(line: string): boolean { - return line.includes('|') && line.trim().length > 0 && !isFenceDelimiter(line); + return line.includes('|') && line.trim().length > 0 && parseFenceLine(line) === null; } /** Split a markdown table row into trimmed cells, honoring `\|` escapes and optional outer pipes. */ @@ -160,30 +157,41 @@ export function renderTables(text: string): string { if (!text.includes('|')) return text; // fast path: no pipes, no tables const lines = text.split('\n'); const out: string[] = []; - let inFence = false; + let open: Fence | null = null; // current open code fence, or null let i = 0; while (i < lines.length) { const line = lines[i]; - if (isFenceDelimiter(line)) { - inFence = !inFence; + const fence = parseFenceLine(line); + if (fence) { + if (open) { + if (closesFence(open, fence)) open = null; + } else { + open = fence; + } out.push(line); i++; continue; } if ( - !inFence && + open === null && isTableRow(line) && i + 1 < lines.length && - isSeparator(lines[i + 1]) + isSeparator(lines[i + 1]) && + // A real GFM table's separator has the same column count as its header; + // requiring this avoids rewriting prose that merely looks table-ish. + splitCells(lines[i + 1]).length === splitCells(line).length ) { const header = line; const separator = lines[i + 1]; const body: string[] = []; let j = i + 2; - while (j < lines.length && isTableRow(lines[j]) && !isSeparator(lines[j])) { + // Collect every subsequent table row. A row of dashes (e.g. `| --- |`) + // is valid table data, so the separator check is NOT a terminator here — + // GFM ends a table at the first non-row line (typically a blank line). + while (j < lines.length && isTableRow(lines[j])) { body.push(lines[j]); j++; } diff --git a/src/core/splitMessage.ts b/src/core/splitMessage.ts index a8ae5f4..8a0ca8c 100644 --- a/src/core/splitMessage.ts +++ b/src/core/splitMessage.ts @@ -1,5 +1,20 @@ +import { + type Fence, + danglingFence, + openLine, + closeLine, + parseFenceLine, +} from './fences'; + export const DEFAULT_MAX_LENGTH = 1990; +/** + * Headroom reserved from `maxLength` when the text contains code fences, so the + * extra fence line that re-fencing prepends/appends to a chunk cannot push it + * past `maxLength`. Covers a long fence marker plus its newline and info string. + */ +const FENCE_RESERVE = 16; + /** * Greedy newline-preserving split (the original behavior). Splits on the last * newline under `maxLength`, hard-splitting only when a single line is too long. @@ -22,35 +37,22 @@ function rawSplit(text: string, maxLength: number): string[] { return parts; } -/** The open fence marker (``` or ~~~) left dangling at the end of `chunk`, or null if balanced. */ -function danglingFence(chunk: string): string | null { - let open: string | null = null; - for (const line of chunk.split('\n')) { - const m = line.match(/^\s*(`{3,}|~{3,})/); - if (!m) continue; - const marker = m[1][0].repeat(3); // normalize to a 3-char marker - if (open === null) open = marker; - else if (open[0] === marker[0]) open = null; // same fence char closes the block - } - return open; -} - /** * Re-balance code fences across chunk boundaries: when a chunk ends inside a - * fenced block, close the fence on that chunk and re-open it on the next, so a - * code block (e.g. a rendered table) never loses its monospace rendering when - * a long message is split. Adds at most a 4-char fence line per affected chunk, - * which stays within the 2000-char platform ceiling given DEFAULT_MAX_LENGTH. + * fenced block, close the fence on that chunk and re-open it (preserving the + * original marker length and info string) on the next, so a code block — e.g. a + * rendered table — never loses its monospace rendering when a long message is + * split. */ function repairFences(parts: string[]): string[] { const out: string[] = []; - let carry: string | null = null; // fence to re-open at the start of the next chunk + let carry: Fence | null = null; // fence to re-open at the start of the next chunk for (let part of parts) { - if (carry) part = carry + '\n' + part; + if (carry) part = openLine(carry) + '\n' + part; const open = danglingFence(part); if (open) { - part = part + '\n' + open; // close the still-open fence + part = part + '\n' + closeLine(open); carry = open; } else { carry = null; @@ -60,13 +62,21 @@ function repairFences(parts: string[]): string[] { return out; } +/** Does the text contain at least one fenced-code delimiter line? */ +function hasFence(text: string): boolean { + return text.split('\n').some((line) => parseFenceLine(line) !== null); +} + /** * Split a string into chunks that fit within `maxLength`. * Splits on newlines when possible to preserve formatting, and keeps fenced - * code blocks intact by re-fencing across chunk boundaries. + * code blocks intact by re-fencing across chunk boundaries. When fences are + * present a small budget is reserved so re-fencing never exceeds `maxLength`. */ export function splitMessage(text: string, maxLength: number = DEFAULT_MAX_LENGTH): string[] { - const parts = rawSplit(text, maxLength); + const fenced = hasFence(text); + const budget = fenced ? Math.max(1, maxLength - FENCE_RESERVE) : maxLength; + const parts = rawSplit(text, budget); if (parts.length <= 1) return parts; - return repairFences(parts); + return fenced ? repairFences(parts) : parts; } From 600415bc7429d852f6b5bc630003f3b9b87b0873 Mon Sep 17 00:00:00 2001 From: chr1syy Date: Fri, 26 Jun 2026 17:13:40 +0200 Subject: [PATCH 3/4] fix(kernel): address second Codex review pass on table rendering Two valid P2 findings from the GitHub Codex review of 3a81e57: - splitMessage: the fixed 16-char fence reserve was smaller than the repair text for fences with language labels (e.g. re-opening ```typescript is 14 chars + a 4-char close = 18), so chunks could land at 1992 > maxLength. The reserve is now computed from the actual fences in the text (longest re-open + longest close), so it covers any fence length or info string; fence-free splitting stays byte-for-byte unchanged. - fences: parseFenceLine accepted any indentation via \s*, so an over-indented ``` line (4+ spaces = indented code per CommonMark, not a fence) wrongly opened a fence and suppressed table conversion for everything after it. Restricted the indent to at most three spaces. Tests: +2 cases (language-label reserve, table after an over-indented line). Full suite 252 passing; build clean. Declined (with reason): the no-pipe continuation-row finding. cmark-gfm does continue a table across a pipe-less line, but in a chat bridge requiring a pipe per row avoids the far more common failure of swallowing a trailing sentence (written without a blank line) into the table. No-pipe ragged rows are vanishingly rare in agent output; the trade-off favors not eating prose. Co-Authored-By: Claude Opus 4.8 --- src/__tests__/renderTables.test.ts | 11 ++++++++++ src/__tests__/splitMessage.test.ts | 14 ++++++++++++ src/core/fences.ts | 8 +++++-- src/core/splitMessage.ts | 35 +++++++++++++++++++----------- 4 files changed, 53 insertions(+), 15 deletions(-) diff --git a/src/__tests__/renderTables.test.ts b/src/__tests__/renderTables.test.ts index 26d7da8..0cd097a 100644 --- a/src/__tests__/renderTables.test.ts +++ b/src/__tests__/renderTables.test.ts @@ -113,6 +113,17 @@ test('renderTables does not convert header/separator column-count mismatches', ( assert.equal(renderTables(input), input); }); +test('renderTables still converts a table after an over-indented (non-)fence line', () => { + // 4+ leading spaces is indented code, not a fence, so it must NOT suppress + // table detection for the table that follows. + const input = [' ```', 'indented sample', '', '| A | B |', '| - | - |', '| 1 | 2 |'].join( + '\n', + ); + const out = renderTables(input); + assert.ok(out.includes('+---+---+'), 'table after indented line is still rendered'); + assert.ok(out.startsWith(' ```'), 'the indented line is left as-is'); +}); + test('renderTables truncates wide cells with an ellipsis under the width cap', () => { const long = 'x'.repeat(120); const input = ['| Col |', '| --- |', `| ${long} |`].join('\n'); diff --git a/src/__tests__/splitMessage.test.ts b/src/__tests__/splitMessage.test.ts index a0373b5..69d1e07 100644 --- a/src/__tests__/splitMessage.test.ts +++ b/src/__tests__/splitMessage.test.ts @@ -69,6 +69,20 @@ test('splitMessage treats an inner ``` inside a 4-backtick block as content', () } }); +test('splitMessage reserves for fence info strings (language labels)', () => { + // A re-opened ```typescript line is longer than a bare ``` — the reserve must + // be sized from the actual fence, not a fixed constant. + const input = '```typescript\n' + 'x'.repeat(5000) + '\n```'; + const parts = splitMessage(input); + + assert.ok(parts.length > 1, 'split into multiple chunks'); + for (const part of parts) { + assert.ok(part.length <= MAX_LENGTH, `chunk length ${part.length} <= ${MAX_LENGTH}`); + } + // The language label is preserved on every re-opened chunk. + assert.ok(parts.slice(1, -1).every((p) => p.startsWith('```typescript'))); +}); + test('splitMessage honors a custom maxLength even when re-fencing', () => { const code = 'c'.repeat(120); const input = `\`\`\`\n${code}\n\`\`\``; diff --git a/src/core/fences.ts b/src/core/fences.ts index 36e48c6..8cda7dd 100644 --- a/src/core/fences.ts +++ b/src/core/fences.ts @@ -17,9 +17,13 @@ export interface Fence { info: string; } -/** Parse a line as a fenced-code delimiter, or return null if it isn't one. */ +/** + * Parse a line as a fenced-code delimiter, or return null if it isn't one. + * Per CommonMark a fence may be indented at most three spaces; four or more + * makes it an indented-code line, not a fence, so we must not treat it as one. + */ export function parseFenceLine(line: string): Fence | null { - const m = line.match(/^\s*(`{3,}|~{3,})\s*(.*)$/); + const m = line.match(/^ {0,3}(`{3,}|~{3,})\s*(.*)$/); if (!m) return null; return { char: m[1][0] as '`' | '~', len: m[1].length, info: m[2].trim() }; } diff --git a/src/core/splitMessage.ts b/src/core/splitMessage.ts index 8a0ca8c..f42ecdc 100644 --- a/src/core/splitMessage.ts +++ b/src/core/splitMessage.ts @@ -9,11 +9,24 @@ import { export const DEFAULT_MAX_LENGTH = 1990; /** - * Headroom reserved from `maxLength` when the text contains code fences, so the - * extra fence line that re-fencing prepends/appends to a chunk cannot push it - * past `maxLength`. Covers a long fence marker plus its newline and info string. + * Worst-case headroom that re-fencing can add to a single chunk: a continuation + * chunk may be prepended with the re-opened fence line (marker + info + newline) + * AND appended with a closing fence line (marker + newline). Sized from the + * actual fences in the text so it covers any fence length or language label — a + * fixed reserve cannot, since info strings are unbounded. Returns 0 when there + * are no fences, leaving fence-free splitting byte-for-byte unchanged. */ -const FENCE_RESERVE = 16; +function fenceReserve(text: string): number { + let maxPrepend = 0; + let maxAppend = 0; + for (const line of text.split('\n')) { + const f = parseFenceLine(line); + if (!f) continue; + maxPrepend = Math.max(maxPrepend, openLine(f).length + 1); // re-opened line + '\n' + maxAppend = Math.max(maxAppend, closeLine(f).length + 1); // closing line + '\n' + } + return maxPrepend + maxAppend; +} /** * Greedy newline-preserving split (the original behavior). Splits on the last @@ -62,21 +75,17 @@ function repairFences(parts: string[]): string[] { return out; } -/** Does the text contain at least one fenced-code delimiter line? */ -function hasFence(text: string): boolean { - return text.split('\n').some((line) => parseFenceLine(line) !== null); -} - /** * Split a string into chunks that fit within `maxLength`. * Splits on newlines when possible to preserve formatting, and keeps fenced * code blocks intact by re-fencing across chunk boundaries. When fences are - * present a small budget is reserved so re-fencing never exceeds `maxLength`. + * present, budget is reserved up front (sized from the actual fences) so + * re-fencing never pushes a chunk past `maxLength`. */ export function splitMessage(text: string, maxLength: number = DEFAULT_MAX_LENGTH): string[] { - const fenced = hasFence(text); - const budget = fenced ? Math.max(1, maxLength - FENCE_RESERVE) : maxLength; + const reserve = fenceReserve(text); + const budget = reserve > 0 ? Math.max(1, maxLength - reserve) : maxLength; const parts = rawSplit(text, budget); if (parts.length <= 1) return parts; - return fenced ? repairFences(parts) : parts; + return reserve > 0 ? repairFences(parts) : parts; } From f829d593bb60816c21948a0054d835bcb3733dc5 Mon Sep 17 00:00:00 2001 From: chr1syy Date: Fri, 26 Jun 2026 17:32:27 +0200 Subject: [PATCH 4/4] fix(kernel): address third review pass (CodeRabbit + Codex) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit (on 600415b): - fences: reject backtick fence openers whose info string contains a backtick (invalid per CommonMark), so such lines aren't misread as fences. - test: assert the language label on every reopened chunk via slice(1) (the prior slice(1,-1) was vacuous for two-part splits). Codex (on 600415b): - renderTables: end the table body at the start of another block (list, blockquote, heading) even when that line contains a pipe, instead of folding it into the ASCII table. - renderTables: skip table detection for 4+ space indented lines — that is an indented code block, not a table. - renderTables: clamp each column width to the cap up front so capWidths' loop is bounded by the cap, not by cell length (a pasted 100k-char cell no longer stalls the event loop: ~22ms for a 200k cell). - splitMessage: restore the `text.length <= maxLength` fast path so a fenced message that already fits isn't needlessly split by the fence reserve. Deferred (P3, with reason): measuring cell display width (CJK/emoji). Proper wcwidth is non-trivial without a dep, and Discord/Slack don't render emoji at a stable cell width inside code fences anyway, so the math wouldn't reliably fix alignment. Can add a CJK-focused width helper if it proves needed. Tests: +3 (block-start termination, indented-code table, fits-in-one-chunk). Full suite 256 passing; build clean. Co-Authored-By: Claude Opus 4.8 --- src/__tests__/renderTables.test.ts | 29 ++++++++++++++++++++++++++++ src/__tests__/splitMessage.test.ts | 13 +++++++++++-- src/core/fences.ts | 7 ++++++- src/core/renderTables.ts | 31 +++++++++++++++++++++++++----- src/core/splitMessage.ts | 3 +++ 5 files changed, 75 insertions(+), 8 deletions(-) diff --git a/src/__tests__/renderTables.test.ts b/src/__tests__/renderTables.test.ts index 0cd097a..ba75a68 100644 --- a/src/__tests__/renderTables.test.ts +++ b/src/__tests__/renderTables.test.ts @@ -113,6 +113,35 @@ test('renderTables does not convert header/separator column-count mismatches', ( assert.equal(renderTables(input), input); }); +test('renderTables does not treat a backtick line with backticks in its info as a fence', () => { + // Per CommonMark a backtick fence opener may not have backticks in its info + // string, so this line is plain text and must not suppress the table below. + const input = ['```js `inline`', 'just prose', '', '| A | B |', '| - | - |', '| 1 | 2 |'].join( + '\n', + ); + const out = renderTables(input); + assert.ok(out.includes('+---+---+'), 'the following table is still converted'); +}); + +test('renderTables stops the table at a following list/blockquote even with a pipe', () => { + const input = ['| A | B |', '| - | - |', '| 1 | 2 |', '- next | note', '> quote | here'].join( + '\n', + ); + const out = renderTables(input); + + // The table ends before the list item; list and quote stay as raw markdown. + assert.ok(out.includes('| 1 | 2 |'), 'table body rendered'); + assert.ok(out.includes('\n- next | note'), 'list item left intact outside the table'); + assert.ok(out.includes('\n> quote | here'), 'blockquote left intact outside the table'); + assert.equal(out.split('```').length - 1, 2, 'exactly one fenced block'); +}); + +test('renderTables does not convert a 4-space-indented (indented-code) table', () => { + const input = [' | A | B |', ' | - | - |', ' | 1 | 2 |'].join('\n'); + // Indented 4+ spaces is a code block, not a table — leave it verbatim. + assert.equal(renderTables(input), input); +}); + test('renderTables still converts a table after an over-indented (non-)fence line', () => { // 4+ leading spaces is indented code, not a fence, so it must NOT suppress // table detection for the table that follows. diff --git a/src/__tests__/splitMessage.test.ts b/src/__tests__/splitMessage.test.ts index 69d1e07..e7ae962 100644 --- a/src/__tests__/splitMessage.test.ts +++ b/src/__tests__/splitMessage.test.ts @@ -69,6 +69,14 @@ test('splitMessage treats an inner ``` inside a 4-backtick block as content', () } }); +test('splitMessage does not split a fenced message that already fits', () => { + // Length between (maxLength - reserve) and maxLength must still be one chunk. + const body = 'x'.repeat(DEFAULT_MAX_LENGTH - 10); + const input = '```\n' + body + '\n```'; + assert.ok(input.length <= DEFAULT_MAX_LENGTH, 'precondition: fits in one message'); + assert.deepEqual(splitMessage(input), [input]); +}); + test('splitMessage reserves for fence info strings (language labels)', () => { // A re-opened ```typescript line is longer than a bare ``` — the reserve must // be sized from the actual fence, not a fixed constant. @@ -79,8 +87,9 @@ test('splitMessage reserves for fence info strings (language labels)', () => { for (const part of parts) { assert.ok(part.length <= MAX_LENGTH, `chunk length ${part.length} <= ${MAX_LENGTH}`); } - // The language label is preserved on every re-opened chunk. - assert.ok(parts.slice(1, -1).every((p) => p.startsWith('```typescript'))); + // The language label is preserved on every re-opened chunk (slice(1) so the + // assertion is non-vacuous even for a two-part split). + assert.ok(parts.slice(1).every((p) => p.startsWith('```typescript'))); }); test('splitMessage honors a custom maxLength even when re-fencing', () => { diff --git a/src/core/fences.ts b/src/core/fences.ts index 8cda7dd..4296f68 100644 --- a/src/core/fences.ts +++ b/src/core/fences.ts @@ -25,7 +25,12 @@ export interface Fence { export function parseFenceLine(line: string): Fence | null { const m = line.match(/^ {0,3}(`{3,}|~{3,})\s*(.*)$/); if (!m) return null; - return { char: m[1][0] as '`' | '~', len: m[1].length, info: m[2].trim() }; + const char = m[1][0] as '`' | '~'; + const info = m[2].trim(); + // CommonMark: a backtick fence's info string may not contain a backtick + // (it would be ambiguous with inline code), so such a line is not a fence. + if (char === '`' && info.includes('`')) return null; + return { char, len: m[1].length, info }; } /** Whether `fence` can close an open block started by `open`. */ diff --git a/src/core/renderTables.ts b/src/core/renderTables.ts index d7be754..b49a9b2 100644 --- a/src/core/renderTables.ts +++ b/src/core/renderTables.ts @@ -37,6 +37,21 @@ function isTableRow(line: string): boolean { return line.includes('|') && line.trim().length > 0 && parseFenceLine(line) === null; } +/** Number of leading spaces (CommonMark: 4+ means the line is indented code, not a table). */ +function leadingSpaces(line: string): number { + const m = line.match(/^ */); + return m ? m[0].length : 0; +} + +/** + * A line that begins a different block-level structure (list item, blockquote, + * or ATX heading). GFM ends a table at such a line even when it contains a pipe, + * so the body scan must stop here rather than fold it into the table. + */ +function isBlockStart(line: string): boolean { + return /^ {0,3}([-*+]\s|\d+[.)]\s|>|#{1,6}\s)/.test(line); +} + /** Split a markdown table row into trimmed cells, honoring `\|` escapes and optional outer pipes. */ function splitCells(row: string): string[] { const s = row.trim(); @@ -133,8 +148,11 @@ function renderTable(headerLine: string, separatorLine: string, bodyLines: strin return Array.from({ length: columns }, (_, i) => cells[i] ?? ''); }); + // Clamp each column to the cap up front: no single column can exceed the + // total budget, and this keeps capWidths' per-character loop bounded by the + // cap (not by cell length) so a pasted 100k-char cell can't stall the loop. let widths = Array.from({ length: columns }, (_, i) => - Math.max(header[i].length, ...body.map((r) => r[i].length), 1), + Math.min(Math.max(header[i].length, ...body.map((r) => r[i].length), 1), MAX_TABLE_WIDTH), ); widths = capWidths(widths, MAX_TABLE_WIDTH); @@ -178,6 +196,8 @@ export function renderTables(text: string): string { if ( open === null && isTableRow(line) && + // 4+ leading spaces is an indented code block, not a table (CommonMark). + leadingSpaces(line) <= 3 && i + 1 < lines.length && isSeparator(lines[i + 1]) && // A real GFM table's separator has the same column count as its header; @@ -188,10 +208,11 @@ export function renderTables(text: string): string { const separator = lines[i + 1]; const body: string[] = []; let j = i + 2; - // Collect every subsequent table row. A row of dashes (e.g. `| --- |`) - // is valid table data, so the separator check is NOT a terminator here — - // GFM ends a table at the first non-row line (typically a blank line). - while (j < lines.length && isTableRow(lines[j])) { + // Collect subsequent table rows. A row of dashes (e.g. `| --- |`) is valid + // table data, so the separator check is NOT a terminator here. GFM ends a + // table at the first non-row line or the start of another block (list, + // blockquote, heading) — even one that happens to contain a pipe. + while (j < lines.length && isTableRow(lines[j]) && !isBlockStart(lines[j])) { body.push(lines[j]); j++; } diff --git a/src/core/splitMessage.ts b/src/core/splitMessage.ts index f42ecdc..e4f372e 100644 --- a/src/core/splitMessage.ts +++ b/src/core/splitMessage.ts @@ -83,6 +83,9 @@ function repairFences(parts: string[]): string[] { * re-fencing never pushes a chunk past `maxLength`. */ export function splitMessage(text: string, maxLength: number = DEFAULT_MAX_LENGTH): string[] { + // A message that already fits is sent as-is — the fence reserve only matters + // once a split is actually required, so don't let it force an unneeded split. + if (text.length <= maxLength) return [text]; const reserve = fenceReserve(text); const budget = reserve > 0 ? Math.max(1, maxLength - reserve) : maxLength; const parts = rawSplit(text, budget);