diff --git a/.claude/agents/context/20260325-pr-critical-reviewer-CONTEXT.md b/.claude/agents/context/20260325-pr-critical-reviewer-CONTEXT.md new file mode 100644 index 00000000..eb1978c2 --- /dev/null +++ b/.claude/agents/context/20260325-pr-critical-reviewer-CONTEXT.md @@ -0,0 +1,129 @@ +--- +agent: pr-critical-reviewer +timestamp: 2026-03-25T00:00:00Z +session_id: issue-166-heading-loss-fix +prior_context: [] +next_agents: [] +--- + +# Agent Context: PR Critical Reviewer + +## Mission Summary +**PR Reviewed:** Fix for heading loss tolerance in isSuspiciouslyIncompleteTranslation (issue-166) +**Review Status:** Approved +**Critical Issues:** 0 + +## Key Findings from Prior Context +**Expected from Spec:** N/A — no prior spec context files found +**Expected from Plan:** N/A — no prior implementation planner context found +**Actual vs Expected:** Fix matches the reviewer's stated intent + +## Analysis Results +**Code Changes Reviewed:** +- Files changed: 1 +- Lines changed: 1 (single operator fix) +- Complexity assessment: Low + +**File reviewed:** +`/home/luandro/Dev/digidem/comapeo-docs/.worktrees/issue-166/scripts/notion-translate/translateFrontMatter.ts` +Lines 598-640 (isSuspiciouslyIncompleteTranslation function) + +**Test file reviewed:** +`/home/luandro/Dev/digidem/comapeo-docs/.worktrees/issue-166/scripts/notion-translate/translateFrontMatter.test.ts` + +## Fix Details + +**Before:** +```typescript +const headingLoss = + sourceMetrics.headingCount > 0 && + translatedMetrics.headingCount < sourceMetrics.headingCount - 1; +``` + +**After:** +```typescript +const headingLoss = + sourceMetrics.headingCount > 0 && + translatedMetrics.headingCount < sourceMetrics.headingCount; +``` + +## Analysis Results + +**Critical Issues Identified:** None + +**Severity Breakdown:** +| Type | Count | Severity | +|------|-------|----------| +| Bugs | 0 | - | +| Security | 0 | - | +| Performance | 0 | - | +| Correctness | 0 | - | + +## Edge Case Analysis + +**headingCount = 0:** +Safe. The `sourceMetrics.headingCount > 0` guard on line 612 short-circuits +the entire headingLoss sub-expression to false. Zero-heading documents are +unaffected. + +**headingCount = 1:** +Correct. If the source has one heading and the translation has zero, headingLoss +is now true and a retry fires. This is the right behavior — dropping the only +heading is a genuine structural loss. + +**Legitimate LLM heading merging (false-positive risk):** +Low risk. If an LLM merges two headings into one, the new strict check will +trigger a retry. The retry path (lines 995-1016) uses isCritical: false and +retries with smaller chunks up to TRANSLATION_COMPLETENESS_MAX_RETRIES times. +The cost is extra API calls only; the final translation is not broken. Silent +heading loss is a more severe failure mode than a spurious retry, so the +trade-off is acceptable and consistent with the design intent for all other +structural checks in the same function. + +**Consistency with sibling checks:** +fencedBlockLoss, admonitionLoss, and tableLoss all use strict zero-tolerance +comparisons. The old headingLoss tolerance of -1 was the only outlier. The fix +makes headingLoss consistent with the rest of the function. + +## Test Coverage Assessment + +The test "retries with smaller chunks when a valid response omits a section" +(translateFrontMatter.test.ts line 126) is the primary coverage for headingLoss. +It uses a 4-heading source and a 2-heading response. + +- Old condition: 2 < (4 - 1) = 2 < 3 = true — test passed before the fix +- New condition: 2 < 4 = true — test still passes after the fix + +The test remains valid. No test updates are required. + +**Coverage gap (non-blocking):** There is no test that specifically covers the +boundary case the fix addresses — source with N headings, translation returning +exactly N-1 headings. This gap existed before and still exists. It is not a +blocker since the existing test exercises the core path correctly. + +## Actions Taken +**Review Process:** +- Read full translateFrontMatter.ts (1044 lines) +- Read full translateFrontMatter.test.ts (572 lines) +- Analyzed isSuspiciouslyIncompleteTranslation logic and all sibling checks +- Analyzed retry/recovery path in translateText +- Verified edge cases for headingCount 0, 1, and N +- Verified test coverage adequacy + +**Sub-Agents Spawned:** None — fix is approved, no fixer needed + +## Recommendations + +**Before Merge:** +- No blocking items + +**Optional follow-up (not blocking):** +- Add a targeted unit test for the N-1 heading boundary case that was the + subject of the fix, to prevent future regressions on the exact threshold + +## Handoff Notes + +**For Developer:** +- Fix is correct and complete as written +- No test changes required +- Re-review not required diff --git a/.output2.txt b/.output2.txt new file mode 100644 index 00000000..bff54e94 --- /dev/null +++ b/.output2.txt @@ -0,0 +1,470 @@ +diff --git a/i18n/pt/code.json b/i18n/pt/code.json +index 7c22c3c..c3984d3 100644 +--- a/i18n/pt/code.json ++++ b/i18n/pt/code.json +@@ -1,201 +1,264 @@ + { +- "theme.TOC.title": { +- "message": "Nesta página", +- "description": "Title for the table of contents section" +- }, +- "Introduction": { +- "message": "Introdução" +- }, +- "Preparing to Use CoMapeo": { +- "message": "Preparando para usar do CoMapeo (Mobile)" +- }, +- "Understanding CoMapeo's Core Concepts and Functions": { +- "message": "Nova Página" +- }, +- "Getting Started Essentials": { +- "message": "Novo título da seção" +- }, +- "Gathering the Right Equipment for CoMapeo": { +- "message": "Reunindo o Equipamento Certo para o CoMapeo" +- }, +- "Device Setup and Maintenance for CoMapeo": { +- "message": "Nova Página" +- }, +- "Installing CoMapeo & Onboarding": { +- "message": "Nova Página" +- }, +- "Initial Use and CoMapeo Settings": { +- "message": "Nova Página" +- }, +- "Uninstalling CoMapeo": { +- "message": "Nova Página" +- }, +- "Customizing CoMapeo": { +- "message": "Novo Alternar" +- }, +- "Organizing Key Materials for Projects": { +- "message": "Nova Página" +- }, +- "Building a Custom Categories Set": { +- "message": "Nova Página" +- }, +- "Building Custom Background Maps": { +- "message": "Nova Página" +- }, +- "Observations & Tracks": { +- "message": "Novo título da seção" +- }, +- "Gathering Observations & Tracks": { +- "message": "Coletando Observações" +- }, +- "Creating a New Observation": { +- "message": "Nova Página" +- }, +- "Creating a New Track": { +- "message": "Nova Página" +- }, +- "Reviewing Observations": { +- "message": "Revisando Observações" +- }, +- "Exploring the Observations List": { +- "message": "Nova Página" +- }, +- "Reviewing an Observation": { +- "message": "Nova Página" +- }, +- "Editing Observations": { +- "message": "Nova Página" +- }, +- "Data Privacy & Security": { +- "message": "Novo título da seção" +- }, +- "Encryption and Security": { +- "message": "Nova Página" +- }, +- "Managing Data Privacy & Security": { +- "message": "Gerenciamento de dados e privacidade" +- }, +- "Using an App Passcode for Security": { +- "message": "Nova Página" +- }, +- "Adjusting Data Sharing and Privacy": { +- "message": "Nova Página" +- }, +- "Mapping with Collaborators": { +- "message": "Nova Página" +- }, +- "Managing Projects": { +- "message": "Gerenciando Projetos" +- }, +- "Understanding Projects": { +- "message": "Nova Página" +- }, +- "Creating a New Project": { +- "message": "Nova Página" +- }, +- "Changing Categories Set": { +- "message": "Nova Página" +- }, +- "Managing a Team": { +- "message": "Nova Página" +- }, +- "Inviting Collaborators": { +- "message": "Nova Página" +- }, +- "Ending a Project": { +- "message": "Nova Página" +- }, +- "Exchanging Project Data": { +- "message": "Troca de Dados do Projeto" +- }, +- "Understanding How Exchange Works": { +- "message": "Nova Página A" +- }, +- "Using Exchange Offline": { +- "message": "Nova Página" +- }, +- "Using a Remote Archive": { +- "message": "Nova Página" +- }, +- "Moving Observations & Tracks Outside of CoMapeo": { +- "message": "Compartilhando observações fora do CoMapeo" +- }, +- "Sharing a Single Observation and Metadata": { +- "message": "Nova Página" +- }, +- "Exporting all Observations": { +- "message": "Nova Página" +- }, +- "Using Observations outside of CoMapeo": { +- "message": "Nova Página" +- }, +- "Miscellaneous": { +- "message": "Variado" +- }, +- "FAQ": { +- "message": "Perguntas frequentes" +- }, +- "Glossary": { +- "message": "Glossário" +- }, +- "Troubleshooting": { +- "message": "Resolução de Problemas" +- }, +- "Common Solutions": { +- "message": "Nova Página" +- }, +- "Troubleshooting: Setup and Customization": { +- "message": "Nova Página" +- }, +- "Troubleshooting: Observations and Tracks": { +- "message": "Nova Página" +- }, +- "Troubleshooting: Data Privacy and Security": { +- "message": "Nova Página" +- }, +- "Troubleshooting: Mapping with Collaborators": { +- "message": "Nova Página" +- }, +- "Troubleshooting: Moving Observations and Tracks outside of CoMapeo": { +- "message": "Nova Página" +- }, +- "Elementos de Conteúdo de Teste": { +- "message": "Elementos de Conteúdo de Teste" +- }, +- "Testing links": { +- "message": "Nova Página" +- }, +- "Understanding CoMapeo's Core Concepts and Functions": { +- "message": "Nova Página" +- }, +- "Installing CoMapeo and Onboarding": { +- "message": "Nova Página" +- }, +- "Planning and Preparing for a Project": { +- "message": "Nova Página" +- }, +- "Observations and Tracks": { +- "message": "Novo título da seção" +- }, +- "Gathering Observations and Tracks": { +- "message": "Coletando Observações" +- }, +- "Data Privacy and Security": { +- "message": "Novo título da seção" +- }, +- "Managing Data Privacy and Security": { +- "message": "Gerenciamento de dados e privacidade" +- }, +- "Moving Observations and Tracks Outside of CoMapeo": { +- "message": "Compartilhando observações fora do CoMapeo" +- }, +- "Developer Tools": { +- "message": "Ferramentas de desenvolvedor" +- }, +- "API Reference": { +- "message": "Referência de API" +- }, +- "CLI Reference": { +- "message": "Referência de CLI" +- } +-} ++ "theme.TOC.title": { ++ "message": "Nesta página", ++ "description": "Title for the table of contents section" ++ }, ++ "Introduction": { ++ "message": "Introdução" ++ }, ++ "Preparing to Use CoMapeo": { ++ "message": "Preparação para usar CoMapeo" ++ }, ++ "Understanding CoMapeo's Core Concepts and Functions": { ++ "message": "Nova Página" ++ }, ++ "Getting Started Essentials": { ++ "message": "Novo título da seção" ++ }, ++ "Gathering the Right Equipment for CoMapeo": { ++ "message": "Reunindo o Equipamento Adequado para CoMapeo" ++ }, ++ "Device Setup and Maintenance for CoMapeo": { ++ "message": "Nova Página" ++ }, ++ "Installing CoMapeo & Onboarding": { ++ "message": "Instalando o CoMapeo e Integração" ++ }, ++ "Initial Use and CoMapeo Settings": { ++ "message": "Nova Página" ++ }, ++ "Uninstalling CoMapeo": { ++ "message": "Desinstalando o CoMapeo" ++ }, ++ "Customizing CoMapeo": { ++ "message": "Personalizando CoMapeo" ++ }, ++ "Organizing Key Materials for Projects": { ++ "message": "Nova Página" ++ }, ++ "Building a Custom Categories Set": { ++ "message": "Nova Página" ++ }, ++ "Building Custom Background Maps": { ++ "message": "Nova Página" ++ }, ++ "Observations & Tracks": { ++ "message": "Observações e Trilhas" ++ }, ++ "Gathering Observations & Tracks": { ++ "message": "Coletando Observações e Trilhas" ++ }, ++ "Creating a New Observation": { ++ "message": "Criando uma Nova Observação" ++ }, ++ "Creating a New Track": { ++ "message": "Criando uma Nova Trilha" ++ }, ++ "Reviewing Observations": { ++ "message": "Revisando Observações" ++ }, ++ "Exploring the Observations List": { ++ "message": "Explorando a Lista de Observações" ++ }, ++ "Reviewing an Observation": { ++ "message": "Revisando uma observação" ++ }, ++ "Editing Observations": { ++ "message": "Editando observações" ++ }, ++ "Data Privacy & Security": { ++ "message": "Privacidade e segurança de dados" ++ }, ++ "Encryption and Security": { ++ "message": "Nova Página" ++ }, ++ "Managing Data Privacy & Security": { ++ "message": "Gestão de Privacidade de Dados e Segurança" ++ }, ++ "Using an App Passcode for Security": { ++ "message": "Utilize uma senha para o CoMapeo por motivos de segurança" ++ }, ++ "Adjusting Data Sharing and Privacy": { ++ "message": "Nova Página" ++ }, ++ "Mapping with Collaborators": { ++ "message": "Mapeamento com Colaboradores" ++ }, ++ "Managing Projects": { ++ "message": "Gerenciando Projetos" ++ }, ++ "Understanding Projects": { ++ "message": "Entenda os Fundamentos de Projetos" ++ }, ++ "Creating a New Project": { ++ "message": "Criar um novo projeto" ++ }, ++ "Changing Categories Set": { ++ "message": "Alterando o Conjunto de Categorias" ++ }, ++ "Managing a Team": { ++ "message": "Nova Página" ++ }, ++ "Inviting Collaborators": { ++ "message": "Convidar colaboradores" ++ }, ++ "Ending a Project": { ++ "message": "Nova Página" ++ }, ++ "Exchanging Project Data": { ++ "message": "Troca de Dados do Projeto" ++ }, ++ "Understanding How Exchange Works": { ++ "message": "Entendendo Como a Troca Funciona" ++ }, ++ "Using Exchange Offline": { ++ "message": "Trocar informações sem conexão com a internet" ++ }, ++ "Using a Remote Archive": { ++ "message": "Usar um arquivo remoto" ++ }, ++ "Moving Observations & Tracks Outside of CoMapeo": { ++ "message": "Compartilhando observações fora do CoMapeo" ++ }, ++ "Sharing a Single Observation and Metadata": { ++ "message": "Nova Página" ++ }, ++ "Exporting all Observations": { ++ "message": "Exportar todas as observações" ++ }, ++ "Using Observations outside of CoMapeo": { ++ "message": "Utilizando observações fora do CoMapeo" ++ }, ++ "Miscellaneous": { ++ "message": "Variado" ++ }, ++ "FAQ": { ++ "message": "Perguntas frequentes" ++ }, ++ "Glossary": { ++ "message": "Glossário" ++ }, ++ "Troubleshooting": { ++ "message": "Solução de problemas" ++ }, ++ "Common Solutions": { ++ "message": "Soluções Comuns" ++ }, ++ "Troubleshooting: Setup and Customization": { ++ "message": "Nova Página" ++ }, ++ "Troubleshooting: Observations and Tracks": { ++ "message": "Nova Página" ++ }, ++ "Troubleshooting: Data Privacy and Security": { ++ "message": "Nova Página" ++ }, ++ "Troubleshooting: Mapping with Collaborators": { ++ "message": "Nova Página" ++ }, ++ "Troubleshooting: Moving Observations and Tracks outside of CoMapeo": { ++ "message": "Nova Página" ++ }, ++ "Elementos de Conteúdo de Teste": { ++ "message": "Elementos de Conteúdo de Teste" ++ }, ++ "Testing links": { ++ "message": "Nova Página" ++ }, ++ "Installing CoMapeo and Onboarding": { ++ "message": "Nova Página" ++ }, ++ "Planning and Preparing for a Project": { ++ "message": "Nova Página" ++ }, ++ "Observations and Tracks": { ++ "message": "Novo título da seção" ++ }, ++ "Gathering Observations and Tracks": { ++ "message": "Coletando Observações" ++ }, ++ "Data Privacy and Security": { ++ "message": "Novo título da seção" ++ }, ++ "Managing Data Privacy and Security": { ++ "message": "Gerenciamento de dados e privacidade" ++ }, ++ "Moving Observations and Tracks Outside of CoMapeo": { ++ "message": "Compartilhando observações fora do CoMapeo" ++ }, ++ "Developer Tools": { ++ "message": "Ferramentas de desenvolvedor" ++ }, ++ "API Reference": { ++ "message": "Referência de API" ++ }, ++ "CLI Reference": { ++ "message": "Referência de CLI" ++ }, ++ "Understanding CoMapeo’s Core Concepts & Functions": { ++ "message": "Entendendo os Conceitos e Funções Principais do CoMapeo" ++ }, ++ "Getting Started - Essentials": { ++ "message": "Introdução - Noções básicas" ++ }, ++ "Device Setup & Maintenance for CoMapeo": { ++ "message": "Configuração e manutenção do dispositivo para o CoMapeo" ++ }, ++ "Initial Use & CoMapeo Settings": { ++ "message": "Uso inicial e Configurações do CoMapeo" ++ }, ++ "Planning & Preparing for a Project": { ++ "message": "Planejamento e Preparação para um Projeto" ++ }, ++ "Creating a Custom Categories Set": { ++ "message": "Construindo um Conjunto de Categorias Personalizado" ++ }, ++ "Creating Custom Background Maps": { ++ "message": "Criando mapas de fundo personalizados" ++ }, ++ "Reviewing and Editing Tracks": { ++ "message": "Revisão e Edição de Trilha" ++ }, ++ "Encryption & Security": { ++ "message": "Criptografia e Segurança" ++ }, ++ "Adjusting Data Sharing & Privacy": { ++ "message": "Ajuste o compartilhamento e a privacidade dos dados" ++ }, ++ "Selecting Device Roles & Teams": { ++ "message": "Seleção de funções e equipes de dispositivos" ++ }, ++ "Leave a project": { ++ "message": "Abandonar um projeto" ++ }, ++ "Removing a device from a Project": { ++ "message": "Remover um dispositivo de um projeto" ++ }, ++ "Completing or Ending a Project": { ++ "message": "Concluir um projeto" ++ }, ++ "Exchanging Observations": { ++ "message": "Tracar Observao…" ++ }, ++ "Sharing Background Map": { ++ "message": "Compartilhe o mapa de fundo" ++ }, ++ "Sharing a Single Observation & Metadata": { ++ "message": "Compartilhe uma única observação e metadados." ++ }, ++ "Site Map": { ++ "message": "Mapa do site" ++ }, ++ "Troubleshooting: Setup & Customization": { ++ "message": "Solução de Problemas: Configuração e Personalização" ++ }, ++ "CoMapeo Data & Privacy (translating for public page)": { ++ "message": "Nova Página" ++ }, ++ "[TEST] Installation Guide": { ++ "message": "[TESTE] Guia de Instalação" ++ }, ++ "Changing Backgroud Maps": { ++ "message": "Alterando mapas de fundo" ++ } ++} +\ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..491840e0 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,17 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). + +## [Unreleased] + +### Added + +- **Pre-Release Safety:** Added validation checks to ensure all translations (locales) are complete. + +### Fixed + +- **Translation Completeness:** Fixed several issues with how the system measures if a page is fully translated. +- **Long-form Content Translation:** Prevented issues where content could be lost when translating very long pages. +- **Build Scripts:** Resolved bugs in the TypeScript compilation and Markdown parsing scripts. diff --git a/plans/2026-03-19-PLAN-v1.md b/plans/2026-03-19-PLAN-v1.md new file mode 100644 index 00000000..e69e81ec --- /dev/null +++ b/plans/2026-03-19-PLAN-v1.md @@ -0,0 +1,160 @@ +# Long-Form Translation Reliability Plan + +## Objective + +Improve automatic Notion translation reliability for long-form documentation by proactively chunking markdown before unsafe request sizes, detecting structurally incomplete model responses, retrying with smaller chunks, and ensuring failures prevent both localized markdown writes and Notion-side translated page creation. + +## Scope + +### In Scope + +- Localized markdown generation under `i18n/...` via `scripts/notion-translate/translateFrontMatter.ts` and the save path in `scripts/notion-translate/index.ts` +- Notion-side translated page creation in `scripts/notion-translate/index.ts` and `scripts/notion-translate/translateBlocks.ts`, specifically the workflow gating that must prevent page creation when full-page markdown translation is incomplete +- Focused unit and workflow tests in `scripts/notion-translate/translateFrontMatter.test.ts`, `scripts/notion-translate/index.test.ts`, and `scripts/notion-translate/translateBlocks.test.ts` + +### Out of Scope + +- Redesigning `scripts/notion-translate/translateBlocks.ts` into a new translation architecture +- Unifying markdown and block translation into a single pipeline +- Changes outside the translation scripts and their focused tests + +## Current Workflow Summary + +- `bun run notion:translate` creates translation pages in Notion, updates `code.json`, translates theme strings, and saves localized markdown. +- For each non-title page, `processSinglePageTranslation()` first translates full markdown with `translateText()`, then builds Notion-side translated blocks with `translateNotionBlocksDirectly()`, then creates or updates the Notion page with `createNotionPageWithBlocks()`, then writes translated markdown with `saveTranslatedContentToDisk()`. +- `translateNotionBlocksDirectly()` is not independent of the markdown reliability work: `translateRichTextArray()` also routes rich-text translation through `translateText()`. Chunking, completeness validation, and retry behavior therefore affect both localized markdown and block translation behavior. +- Because both outputs are produced in the same workflow, an incomplete full-page translation must fail the page before either output is persisted. No partial success is acceptable where only one output is written. + +## Problem Statement + +Long-form markdown can remain on a single large model call, and the pipeline currently accepts structurally partial but schema-valid responses. That allows missing sections to propagate silently into generated locale files and into the translated Notion page created for the same source page. + +## Concrete Decisions + +- Proactive chunk cap: `120_000` total request characters per markdown translation call, regardless of larger model context windows. +- Retry floor: `8_000` total request characters. +- Completeness retry depth: `4` retries maximum, halving the chunk limit on each retry until the floor is reached. +- Title handling: the first chunk owns the translated title; later chunks send an empty title. +- Error classification: persistent incompleteness surfaces as a non-critical translation failure (`isCritical: false`) after completeness retries are exhausted. +- Workflow continuation semantics: the run continues processing remaining pages and languages after a page-level incompleteness failure, records the failure in the summary, and exits non-zero at the end if any document translation failed. +- Completeness validation signals: + - heading count loss + - fenced code block count loss + - admonition count loss + - table disappearance + - complete loss of bullet list items when the source has at least 3 bullet items + - complete loss of numbered list items when the source has at least 3 numbered items + - severe length shrinkage when source content length is at least `4_000` characters and translated/source ratio is below `0.55` +- Placeholder and image-path integrity checks remain mandatory and must continue to run alongside completeness validation. + +## Requirements + +### Functional Requirements + +- Add a reliability-oriented markdown chunk cap independent of model-advertised context limits. +- Validate translated markdown structure before accepting single-call responses and after chunk reassembly. +- Retry suspiciously incomplete translations with smaller chunks before surfacing failure. +- Treat persistent incompleteness as a document translation failure that: + - prevents create/update of the translated Notion page + - prevents writing localized markdown to disk + - increments `failedTranslations` for the language + - appears in `failures` and `TRANSLATION_SUMMARY` + - records `language`, `title`, `pageId`, `error`, and `isCritical` in the failure entry + - causes `bun run notion:translate` to exit non-zero per the workflow contract +- Apply workflow-level failure handling only after completeness retries are exhausted inside `translateFrontMatter.ts`. +- Continue processing remaining pages and languages after a page-level incompleteness failure, then fail the overall run from the final summary if any document translation failed. +- Preserve existing Notion block behavior for URL sanitization, image mapping, inline-image path consumption, recursive child block translation, and metadata stripping. +- Preserve existing behavior for short pages, title handling, placeholder integrity, and post-translation image validation. +- Do not add new dependencies. + +### Non-Functional Requirements + +- Keep changes localized to `scripts/constants.ts`, `scripts/notion-translate/translateFrontMatter.ts`, `scripts/notion-translate/index.ts`, and focused tests. +- Prefer deterministic structural checks over semantic or language-specific heuristics. +- Preserve the current block-translation architecture; this iteration adds workflow gating for both outputs, not a block-translator redesign. + +## Implementation Plan + +1. Add translation reliability constants in `scripts/constants.ts`. + - `TRANSLATION_CHUNK_MAX_CHARS = 120_000` + - `TRANSLATION_MIN_CHUNK_MAX_CHARS = 8_000` + - `TRANSLATION_COMPLETENESS_MAX_RETRIES = 4` + - Keep existing model-context helpers, but stop using them as the only practical chunk ceiling for long-form markdown. + +2. Update `translateText()` in `scripts/notion-translate/translateFrontMatter.ts`. + - Compute an effective chunk limit from the new cap. + - Use the fast path only when the full request fits within that limit. + - Reuse the existing section, paragraph, and line splitters for proactive chunking. + +3. Add completeness detection in `scripts/notion-translate/translateFrontMatter.ts`. + - Collect structural metrics on source and translated markdown. + - Ignore YAML frontmatter content when counting list and structure markers. + - Ignore marker-like text inside fenced code blocks. + - Validate both single-call responses and reassembled chunked responses. + +4. Add recoverable completeness retries in `scripts/notion-translate/translateFrontMatter.ts`. + - Classify incompleteness as a non-critical retryable translation failure. + - Halve the chunk limit on each retry until the `8_000`-character floor. + - Stop retrying after 4 completeness retries and rethrow the failure as a non-critical document translation error. + +5. Ensure page-level workflow gating in `scripts/notion-translate/index.ts`. + - Fail the page before `createNotionPageWithBlocks()` and `saveTranslatedContentToDisk()` if full-page markdown translation fails completeness checks. + - Keep localized markdown and Notion-side page creation behavior consistent for the same source page. + - Continue processing remaining pages and languages, then fail the overall run from the summary when `failedTranslations > 0`. + +6. Add focused unit coverage in `scripts/notion-translate/translateFrontMatter.test.ts`. + - Proactive chunking for long-form content below model-derived maxima + - Retry when a valid response omits a middle section + - Failure after repeated incomplete responses + - Severe length shrinkage detection + - Frontmatter and fenced-code false-positive guards + - No regression for placeholder integrity and token-overflow fallback + +7. Add block-translation regression coverage in `scripts/notion-translate/translateBlocks.test.ts`. + - Keep existing coverage for URL sanitization, image mapping, inline-image path consumption, metadata stripping, and recursive child-block handling. + - Add one regression test proving the new completeness logic does not falsely reject normal short rich-text block translations routed through `translateText()`. + +8. Add workflow coverage in `scripts/notion-translate/index.test.ts`. + - Add a dedicated incompleteness test instead of relying only on the generic translation API error case. + - Mock `translateText()` to fail only after completeness retries are exhausted. + - Verify `failedTranslations` increments for each affected language and `failures.length` matches. + - Verify each failure entry includes `language`, source page `title`, `pageId`, the incompleteness error text, and `isCritical: false`. + - Verify `TRANSLATION_SUMMARY` is still emitted on failure. + - Verify `main()` rejects so the CLI exits non-zero. + - Verify Notion page creation/update and localized markdown save are not executed for the failed page. + +9. Run targeted validation on touched files only. + - `bunx eslint scripts/constants.ts scripts/notion-translate/translateFrontMatter.ts scripts/notion-translate/index.ts scripts/notion-translate/translateFrontMatter.test.ts scripts/notion-translate/index.test.ts scripts/notion-translate/translateBlocks.test.ts --fix` + - `bunx prettier --write scripts/constants.ts scripts/notion-translate/translateFrontMatter.ts scripts/notion-translate/index.ts scripts/notion-translate/translateFrontMatter.test.ts scripts/notion-translate/index.test.ts scripts/notion-translate/translateBlocks.test.ts` + - `bunx vitest run scripts/notion-translate/translateFrontMatter.test.ts scripts/notion-translate/index.test.ts scripts/notion-translate/translateBlocks.test.ts` + +## Verification Criteria + +- Long-form markdown that previously fit the theoretical model limit is chunked once it exceeds the `120_000`-character cap. +- A structurally partial but schema-valid translation is retried instead of silently accepted. +- Persistent incompleteness surfaces as a page failure in `TRANSLATION_SUMMARY` and causes non-zero exit. +- A page that fails completeness checks produces neither a translated Notion page nor a saved localized markdown file. +- The workflow continues processing other pages and languages after the failed page, but the final run still exits non-zero when any document translation failed. +- Existing Notion block behavior remains intact, and short rich-text block translations do not regress under the shared `translateText()` reliability changes. +- Existing placeholder, image integrity, and overflow fallback behavior still pass after the change. + +## Risks and Mitigations + +1. **Risk: More aggressive chunking may reduce cross-section consistency.** + Mitigation: Keep structure-aware chunking and source the translated title from the first chunk only. + +2. **Risk: Structural checks may reject valid translations.** + Mitigation: Use conservative, deterministic signals and add false-positive regression tests for frontmatter and fenced code blocks. + +3. **Risk: Retry logic increases runtime and API cost.** + Mitigation: Only retry on completeness failures and keep the short-page fast path unchanged. + +4. **Risk: The root cause may later prove to exist in the block-translator path itself.** + Mitigation: This iteration gates both outputs on the validated full-page translation. A follow-up can add block-level completeness checks if evidence warrants it. + +## Ready-to-Implement Definition + +- Scope explicitly covers both localized markdown and Notion-side translated pages. +- Constants, heuristics, retry behavior, and failure semantics are fixed in the plan. +- The test matrix includes `translateFrontMatter.ts`, `translateBlocks.ts`, and `index.ts` coverage. +- No open design questions remain for the first implementation pass. diff --git a/plans/2026-03-25-fix-output-truncation-token-overflow-v1.md b/plans/2026-03-25-fix-output-truncation-token-overflow-v1.md new file mode 100644 index 00000000..97391366 --- /dev/null +++ b/plans/2026-03-25-fix-output-truncation-token-overflow-v1.md @@ -0,0 +1,48 @@ +# Fix: Output Truncation Classified as Non-Retryable Error + +## Objective + +When the OpenAI API returns a response with `finish_reason: "length"` (output token limit hit), the current code passes the truncated string to `JSON.parse`, which throws and is caught as a critical `schema_invalid` error — permanently killing translation for that page with no retry. The fix intercepts `finish_reason: "length"` before parsing and re-classifies it as a non-critical `token_overflow` error, so the existing overflow retry machinery in `translateChunkWithOverflowFallback` can re-attempt with a smaller chunk automatically. + +## Implementation Plan + +- [ ] Task 1. **Check `finish_reason` before calling `parseTranslationPayload` in `translateTextSingleCall`** + + In `scripts/notion-translate/translateFrontMatter.ts`, inside `translateTextSingleCall`, after the `response.choices[0]?.message?.content` read (currently around line 762), add a check for `finish_reason` on the same choice object. If it equals `"length"`, throw a `TranslationError` with code `"token_overflow"` and `isCritical: false`. The `token_overflow` code is the correct signal here: the existing handler in `translateChunkWithOverflowFallback` already detects this code and triggers a recursive split-and-retry. No new retry path needs to be written — the fix is purely a re-classification. + + The check must be placed **before** the `if (!content)` guard and the `parseTranslationPayload` call, so that a truncated-but-non-empty response is caught before `JSON.parse` sees it. + + Error message should be descriptive: `"OpenAI output was truncated (finish_reason: length) — chunk too large for model output budget"`. + +- [ ] Task 2. **Add two tests in `translateFrontMatter.test.ts` covering the new behaviour** + + **Test A — classification:** Mock `openai.chat.completions.create` to return an HTTP-200 response where `choices[0].finish_reason` is `"length"` and `choices[0].message.content` is a truncated JSON string (e.g. `'{"markdown":"partial content'`). Assert that `translateText` rejects with a `TranslationError` whose `code` is `"token_overflow"` and `isCritical` is `false`. This mirrors the pattern used in the existing `"continues to classify token overflow errors..."` test at line 338. + + **Test B — retry integration:** Mock the first call to return `finish_reason: "length"`, then let subsequent calls succeed (using `installStructuredTranslationMock` or a similar inline mock). Assert that `mockOpenAIChatCompletionCreate` is called more than once and that the final result contains the expected translated content. This mirrors the existing `"retries the fast path with adaptive splitting on token overflow"` test at line 413. + + Both tests go in the existing `describe("notion-translate translateFrontMatter", ...)` block, alongside the other classification and retry tests. + +- [ ] Task 3. **Run the test file and typecheck** + + Execute `bunx vitest run scripts/notion-translate/translateFrontMatter.test.ts` and confirm all tests pass, including the two new ones. Then run `bun run typecheck --noEmit` scoped to the changed files to confirm no TypeScript regressions. + +## Verification Criteria + +- `finish_reason: "length"` responses produce a `TranslationError` with `code: "token_overflow"` and `isCritical: false` — not `schema_invalid`. +- A subsequent retry is triggered automatically (call count > 1) when the first call returns `finish_reason: "length"`, without any changes to the retry orchestration logic. +- All existing tests in `translateFrontMatter.test.ts` continue to pass. +- TypeScript compilation produces no errors. + +## Potential Risks and Mitigations + +1. **Mock shape divergence**: The existing test mocks omit `finish_reason` (the field is `undefined`). The new check must only fire when `finish_reason === "length"` exactly, not when it is `undefined` or `"stop"`. A strict equality check (`=== "length"`) ensures backward compatibility with all existing mock responses. + Mitigation: Use strict equality; verify existing tests still pass after the change. + +2. **Infinite retry loop if chunk floor is already reached**: If `effectiveChunkLimit` is already at `TRANSLATION_MIN_CHUNK_MAX_CHARS` (8,000 chars) and the model still truncates output, the overflow fallback in `translateChunkWithOverflowFallback` detects that the chunk cannot be halved further and re-throws. This existing guard already handles the edge case correctly — no additional logic needed. + Mitigation: Confirm by reading `translateChunkWithOverflowFallback` lines 844–858, which already enforce the floor before re-throwing. + +## Alternative Approaches + +1. **Pass `max_tokens` explicitly in the API call**: Setting a large `max_tokens` (e.g., 32,768) would prevent truncation at the API level rather than handling it after the fact. This is complementary but not a substitute — the `finish_reason` check is still needed for robustness against future model changes or misconfiguration, and adding `max_tokens` to `getModelParams` is a separate, independent concern that would affect all models and require its own testing. + +2. **Attempt JSON repair before throwing**: Libraries like `jsonrepair` can reconstruct truncated JSON. This would avoid a retry API call entirely but introduces a dependency, silently accepts partial translations (content after the truncation point is lost), and masks the real problem rather than triggering the retry+completeness-validation path that already exists. diff --git a/plans/2026-03-25-robust-translation-chunking-v1.md b/plans/2026-03-25-robust-translation-chunking-v1.md new file mode 100644 index 00000000..f7fa7aff --- /dev/null +++ b/plans/2026-03-25-robust-translation-chunking-v1.md @@ -0,0 +1,424 @@ +# Robust Translation Chunking & Completion-Budget Hardening + +## Objective + +Harden markdown translation against output truncation, unsafe chunk sizing, and configuration drift by making completion-budget control explicit, tightening chunk-size derivation, handling frontmatter without breaking chunk 0 budgeting, and adding runtime validation that matches the repo's parity requirements. + +This revision intentionally narrows scope to changes that are safe to implement on top of the current translation loop. Speculative mechanisms that would require a larger refactor are deferred. + +## Status: Revised Before Implementation + +**Already completed in prior work:** +- Proactive chunking lowered from 500K to 120K chars +- Structural completeness validation (heading, code block, table, list, admonition checks) +- Completeness retry with halving chunk limits (up to 4 retries, 8K floor) +- `finish_reason: "length"` detection reclassified as `token_overflow` + +**In scope for this revision:** +- Explicit completion-budget configuration and request parameters +- Safer chunk-size derivation backed by small verified defaults plus env overrides +- Frontmatter-aware budgeting that does not break chunk 0 +- End-to-end propagation of `TRANSLATION_*` config through CLI, CI, and API job execution +- Stronger validation using targeted translation tests plus parity checks on real EN/PT/ES content + +**Deferred from v1:** +- Cross-chunk context injection +- Ratio-based targeted retry of individual chunks + +## Evidence Status + +There is no standalone saved research document for this plan. + +This revision is based on: +- current implementation review in `scripts/notion-translate/translateFrontMatter.ts` +- existing translation parity tracker and research map under `context/development/` +- current runtime/config paths in the API server and GitHub workflows +- current official model docs for the provider defaults this repo relies on + +Any mechanism not supported by those sources is treated as exploratory and kept out of the critical path. + +## Scope Boundaries + +**This plan hardens the markdown translation path only.** + +It does **not** claim full translation-parity closure across the entire pipeline. Full parity still depends on the broader backlog tracked in `context/development/translation-improvements-progress.md`, including: +- deterministic parity checker script +- broader `markdownToNotion` hardening +- broader `scripts/notion-fetch/*` locale-consistency work +- regression-gate coverage + +--- + +## Configuration Strategy + +### Authoritative Runtime Controls + +Use explicit runtime overrides first, then small verified model defaults, then conservative fallback with warning. + +| Variable | Default | Purpose | +|---|---|---| +| `OPENAI_MODEL` | current repo runtime default | Model selection only | +| `OPENAI_BASE_URL` | unset | Distinguishes OpenAI default API from custom/provider-compatible endpoints | +| `TRANSLATION_MAX_COMPLETION_TOKENS` | model-derived if verified, else conservative fallback | First-class output budget control | +| `TRANSLATION_CONTEXT_LIMIT` | model-derived if verified, else conservative fallback | Optional input-context override | +| `TRANSLATION_CHUNK_MAX_CHARS` | derived | Optional hard ceiling override | +| `TRANSLATION_MIN_CHUNK_CHARS` | `8000` | Retry floor for chunk halving | +| `TRANSLATION_COMPLETENESS_MAX_RETRIES` | `4` | Max completeness retry rounds | +| `TRANSLATION_JSON_ESCAPE_OVERHEAD` | `0.5` | Estimated JSON/escaping overhead | +| `TRANSLATION_CHARS_PER_TOKEN` | `3.5` | Conservative chars-per-token estimate | + +### Defaulting Policy + +1. `TRANSLATION_MAX_COMPLETION_TOKENS` is the primary control. +2. For the repo's known default models, maintain a **small verified table** in code. +3. For custom/self-hosted/OpenAI-compatible providers, require explicit overrides when the model is not in the verified table. +4. When falling back to a guessed/conservative default, log a warning to stderr so operators know the budget is not authoritative. + +### Concrete Conservative Fallbacks + +Use these exact values when no verified model default or explicit override exists: + +- `conservativeCompletionFallback = 8192` +- `conservativeContextFallback = 128000` + +Rationale: +- `8192` is conservative relative to the repo's current OpenAI defaults, but still large enough to avoid absurdly small proactive chunks. With the plan's default `3.5` chars/token and `0.5` escape overhead, it yields about `14,336` safe output chars before the context guardrail applies. +- `128000` matches the repo's current conservative unknown-model context fallback and common modern model context windows, while still being secondary to the output-budget cap for chunk sizing. + +Required warning rules: +- If either fallback is used, log the active model name and the fallback values being applied. +- If `OPENAI_BASE_URL` is set and either fallback is used, log a stronger warning that custom/provider-compatible deployments should set `TRANSLATION_MAX_COMPLETION_TOKENS` and `TRANSLATION_CONTEXT_LIMIT` explicitly. +- Do not silently treat fallback-derived limits as verified provider capabilities. + +### Minimal Verified Model Defaults + +Do not ship a large speculative table. + +Keep the built-in defaults limited to models the repo actively documents or defaults to, and annotate each entry as verified. Everything else must rely on explicit overrides. + +### Budget Derivation + +When `TRANSLATION_CHUNK_MAX_CHARS` is not set explicitly: + +```ts +completionCap = env.TRANSLATION_MAX_COMPLETION_TOKENS + ?? getVerifiedModelCompletionCap(model) + ?? conservativeCompletionFallback; + +contextLimit = env.TRANSLATION_CONTEXT_LIMIT + ?? getVerifiedModelContextLimit(model) + ?? conservativeContextFallback; + +charsPerToken = env.TRANSLATION_CHARS_PER_TOKEN ?? 3.5; +escapeOverhead = env.TRANSLATION_JSON_ESCAPE_OVERHEAD ?? 0.5; + +safeOutputChars = Math.floor( + completionCap * charsPerToken * (1 - escapeOverhead) +); + +inputBudget = Math.floor(contextLimit * charsPerToken / 2); + +chunkMaxChars = Math.min(safeOutputChars, inputBudget); +``` + +This derived limit must then still be capped by any explicit `TRANSLATION_CHUNK_MAX_CHARS` override. + +--- + +## Implementation Plan + +### Phase 0: Default-Model Decision & Operator Clarity + +- [ ] **0.1** Choose one authoritative runtime default model and document it consistently + + The code currently defaults to `gpt-5-mini`, but repo docs disagree in multiple places. Before relying on model-derived defaults, update all operator-facing references so the default model is unambiguous. + +- [ ] **0.2** Add an evidence note to the implementation PR/task summary + + State explicitly that this revision is based on code inspection, current provider docs, and parity requirements, not on a saved standalone research memo. + +### Phase 1: Explicit Completion Budget & Provider-Aware Request Params + +- [ ] **1.1** Add completion-budget helpers in `scripts/constants.ts` + + Replace the planned large `MODEL_OUTPUT_LIMITS` table with: + - a small verified completion-cap lookup for the repo's documented/default models + - a context-limit lookup + - env-first helpers for `TRANSLATION_MAX_COMPLETION_TOKENS` and `TRANSLATION_CONTEXT_LIMIT` + - warning-backed conservative fallbacks for unknown/custom models + +- [ ] **1.2** Pass explicit completion-budget params in the OpenAI request path + + In `scripts/notion-translate/translateFrontMatter.ts`, update the request builder so the API call sets an explicit output cap: + - OpenAI default API: `max_completion_tokens` + - custom/OpenAI-compatible providers: provider-compatible equivalent where supported (`max_tokens` if that is the documented parameter for the target path) + + This must be wired from the same effective completion-budget helper used by chunk-size derivation. + +- [ ] **1.3** Rewrite `getMaxChunkChars` around completion cap, not context alone + + The current formula is context-heavy and does not reflect response-size risk. Replace it with the completion-aware derivation above. + +- [ ] **1.4** Validate env parsing and fallback behavior + + Parse all numeric env vars with validation: + - positive integers for token/char/retry limits + - 0-1 range for `TRANSLATION_JSON_ESCAPE_OVERHEAD` + - positive float for `TRANSLATION_CHARS_PER_TOKEN` + + Invalid values should warn and fall back safely. + +- [ ] **1.5** Add/update tests for completion-budget behavior + + Add focused tests that prove: + - env overrides win over model defaults + - unknown/custom models warn and use conservative fallbacks + - `getMaxChunkChars` is derived from completion budget, not context alone + - request params include explicit completion-budget controls + +### Phase 2: Frontmatter-Aware Budgeting Without Changing Translation Semantics + +- [ ] **2.1** Replace destructive stripping with explicit extraction + + Add an `extractYamlFrontmatter()` helper that returns: + + ```ts + { frontmatter: string; body: string } + ``` + + Do not reuse `stripYamlFrontmatter()` for this purpose, because it discards the data needed for reconstruction. + +- [ ] **2.2** Reserve frontmatter budget before body splitting + + Keep frontmatter in the first translation request, but subtract its size from chunk 0's content budget **before** splitting the body. + + Required behavior: + - body-only content is split using chunk budgets that account for chunk 0 frontmatter overhead + - chunk 0 is reconstructed as `frontmatter + firstBodyChunk` + - later chunks remain body-only + - completeness validation continues to evaluate body content, not frontmatter noise + +- [ ] **2.3** Define the oversize-frontmatter edge case explicitly + + If frontmatter alone consumes nearly all of chunk 0's budget: + - log a warning + - bypass frontmatter-aware proactive splitting for that document + - rely on the existing overflow/completeness fallback rather than introducing a second special-case splitter + +- [ ] **2.4** Add focused tests for frontmatter-aware budgeting + + Verify: + - frontmatter appears only in the first API request + - chunk 0 stays within the intended total request budget + - the final output retains translated frontmatter and complete body content + - frontmatter-only validation noise does not trigger false incompleteness + +### Phase 3: Runtime Propagation of Translation Tuning + +**Supported execution paths for this revision:** +- local CLI translation runs +- GitHub Actions translation workflow +- API-triggered translation jobs launched from this repo + +**Out of scope for this revision:** +- deployed API-service runtime translation on Fly/production infrastructure + +That deployed runtime can be revisited later, but it should not block this markdown-translation hardening pass. + +- [ ] **3.1** Propagate `TRANSLATION_*` vars through API-triggered translation jobs + + Update `api-server/job-executor.ts` so child process env whitelisting includes the new translation-tuning vars. + +- [ ] **3.2** Add API env propagation tests + + Update `api-server/job-executor-env.test.ts` to verify the new vars are preserved for child jobs. + +- [ ] **3.3** Support translation overrides in GitHub Actions via non-secret workflow config + + Update `.github/workflows/translate-docs.yml` so `TRANSLATION_*` tuning values can be supplied from workflow/repository variables or workflow env. These values are configuration, not secrets. + +- [ ] **3.4** Explicitly exclude deployed API-service runtime translation from this plan + + Update docs/scope notes so this revision guarantees `TRANSLATION_*` support for CLI, GitHub Actions, and repo-local API jobs only. Do not expand Fly/deployed runtime secret propagation in this change set. + +### Phase 4: Diagnostics Instead of New Chunk-Control Heuristics + +- [ ] **4.1** Add per-chunk ratio telemetry only + + Record per-chunk input/output ratios for diagnostics, but do **not** add ratio-based control flow in v1. + + This telemetry can be logged in a debug-friendly structure for failed/incomplete translations and used to inform future research. + +- [ ] **4.2** Keep recovery behavior unchanged + + Continue using the existing whole-document completeness retry with smaller chunk limits. Do not add targeted retry of individual chunks in this revision. + +- [ ] **4.3** Explicitly defer cross-chunk context injection + + Do not prepend synthetic context to chunks in v1. That would require a cleaner separation between: + - content to translate + - context supplied to the model + - placeholder integrity checks + - overflow fallback splitting + + That refactor is larger than this plan. + +### Phase 5: Validation That Matches the Repo's Parity Contract + +- [ ] **5.1** Extend targeted translation tests + + Add/update focused tests in `scripts/notion-translate/translateFrontMatter.test.ts` for: + - explicit completion-budget request parameters + - env override precedence and invalid-value fallback + - frontmatter-aware budgeting + - no regression in overflow fallback and completeness retry + +- [ ] **5.2** Add an executable parity checker path + + The repo already has parity logic in `scripts/locale-parity.test.ts`, but it is trapped inside test-only fixtures. Extract or wrap the parity collector into an executable checker that can run against real repo content. + +- [ ] **5.3** Run one targeted parity validation on real content + + Validation must include: + - one targeted family known to have failed before + - one sampled family that currently succeeds + - frontmatter parity enabled if frontmatter handling changes + + Record results in `context/development/translation-improvements-progress.md`. + +- [ ] **5.4** Keep success claims narrow + + This plan is complete when markdown translation hardening is validated. Do not claim full pipeline parity closure unless the remaining backlog in `translation-improvements-progress.md` is also addressed. + +### Phase 6: Documentation Sweep + +- [ ] **6.1** Update `env-file` with an OpenAI/translation section + + `env-file` currently has no OpenAI section. Add one and include commented `TRANSLATION_*` examples in the same operator-facing location as `OPENAI_*`. + +- [ ] **6.2** Update operator docs + + Update at least: + - `SETUP.md` + - `context/workflows/translation-process.md` + - `context/api-server/reference.md` + + Update deployment docs as well if API-runtime translation remains in scope. + +- [ ] **6.3** Reconcile all default-model references + + After Phase 0's decision, update every operator-facing mention so docs stop disagreeing about the default model. + +### Phase 7: Verification Commands + +- [ ] **7.1** Run targeted checks on touched files + + Execute only the checks relevant to the files changed by implementation, for example: + + ```bash + bunx vitest run scripts/notion-translate/translateFrontMatter.test.ts + bunx vitest run scripts/constants.test.ts + bunx vitest run api-server/job-executor-env.test.ts + bunx vitest run scripts/locale-parity.test.ts + bun run typecheck --noEmit + bunx eslint --fix + bunx prettier --write + ``` + +--- + +## Verification Criteria + +The revised implementation is acceptable only if all of the following are true: + +- Translation requests use an explicit completion-budget parameter appropriate for the active provider path. +- Chunk sizing is derived from effective completion budget plus context guardrails, not from context size alone. +- Unknown/custom models do not silently rely on speculative limits; they warn and use explicit override paths. +- Frontmatter-aware chunking keeps frontmatter in the first request without allowing chunk 0 to exceed the intended budget. +- Existing overflow fallback and completeness retry continue to work. +- `TRANSLATION_*` overrides work in local CLI runs, the GitHub Actions translation workflow, and API-triggered translation jobs launched from this repo. +- Targeted translation tests pass. +- Parity validation is run on real EN/PT/ES content and recorded in `translation-improvements-progress.md`. +- Success claims remain limited to markdown translation hardening unless broader parity backlog items are also completed. + +--- + +## Risks and Mitigations + +1. **Provider parameter differences** + Mitigation: keep request-param handling explicit and provider-aware rather than assuming one universal output-token field. + +2. **Model tables become stale** + Mitigation: keep the built-in table intentionally small, verified, and override-friendly. + +3. **Frontmatter changes still create edge-case budget pressure** + Mitigation: reserve chunk 0 budget before splitting and define a fallback path for oversize frontmatter instead of inventing a second complex splitter. + +4. **Parity claims outpace actual pipeline coverage** + Mitigation: require one real parity run and record it, but keep the scope statement narrow. + +--- + +## Deferred Follow-Ups + +These are intentionally **not** part of this implementation: + +1. **Cross-chunk context injection** + Requires a cleaner translation contract so synthetic context is not treated as chunk content by placeholder validation, overflow fallback, or completeness checks. + +2. **Ratio-based targeted retry** + Requires a chunk-manifest model and replace-in-place reassembly logic that does not exist today. + +3. **Full pipeline parity closure** + Still depends on backlog work outside `translateFrontMatter.ts`. + +--- + +## File Change Map + +| File | Changes | +|---|---| +| `scripts/constants.ts` | Add effective completion/context budget helpers and env parsing | +| `scripts/constants.test.ts` | Add tests for env overrides, fallbacks, and completion-aware sizing | +| `scripts/notion-translate/translateFrontMatter.ts` | Add explicit completion-budget params, frontmatter-aware budgeting, ratio telemetry only | +| `scripts/notion-translate/translateFrontMatter.test.ts` | Add tests for provider-budget params and frontmatter-aware budgeting | +| `api-server/job-executor.ts` | Propagate `TRANSLATION_*` env vars to child jobs | +| `api-server/job-executor-env.test.ts` | Verify env propagation for translation tuning | +| `scripts/locale-parity.test.ts` or extracted shared module | Reuse parity logic through an executable path | +| `env-file` | Add OpenAI/translation env examples | +| `SETUP.md` | Update translation model/default and `TRANSLATION_*` docs | +| `context/workflows/translation-process.md` | Document translation tuning and validation expectations | +| `context/api-server/reference.md` | Document translation-related child-env/runtime config | + +--- + +## Progress Tracking + +| Phase | Task | Status | +|---|---|---| +| **0 — Defaults** | 0.1 Authoritative default model decision | Not Started | +| | 0.2 Evidence note in implementation summary | Not Started | +| **1 — Budget** | 1.1 Completion-budget helpers | Not Started | +| | 1.2 Explicit request completion params | Not Started | +| | 1.3 Rewrite `getMaxChunkChars` | Not Started | +| | 1.4 Env validation | Not Started | +| | 1.5 Budget tests | Not Started | +| **2 — Frontmatter** | 2.1 Extract helper | Not Started | +| | 2.2 Frontmatter-aware budgeting | Not Started | +| | 2.3 Oversize-frontmatter fallback | Not Started | +| | 2.4 Frontmatter tests | Not Started | +| **3 — Runtime** | 3.1 API env propagation | Not Started | +| | 3.2 API env tests | Not Started | +| | 3.3 CI override decision | Not Started | +| | 3.4 API-runtime scope decision | Not Started | +| **4 — Diagnostics** | 4.1 Ratio telemetry | Not Started | +| | 4.2 Keep existing recovery path | Not Started | +| | 4.3 Defer context injection | Not Started | +| **5 — Validation** | 5.1 Targeted translation tests | Not Started | +| | 5.2 Executable parity checker path | Not Started | +| | 5.3 Real parity run + tracker update | Not Started | +| | 5.4 Keep success claims narrow | Not Started | +| **6 — Docs** | 6.1 Update `env-file` | Not Started | +| | 6.2 Update operator docs | Not Started | +| | 6.3 Reconcile default-model references | Not Started | +| **7 — Checks** | 7.1 Targeted verification commands | Not Started | diff --git a/scripts/constants.ts b/scripts/constants.ts index 020f4c9c..33c40e3e 100644 --- a/scripts/constants.ts +++ b/scripts/constants.ts @@ -182,10 +182,22 @@ export const ENGLISH_DIR_SAVE_ERROR = // Translation retry configuration export const TRANSLATION_MAX_RETRIES = 3; export const TRANSLATION_RETRY_BASE_DELAY_MS = 750; -/** Max characters per translation chunk. - * Targets ~143K tokens (500K chars / 3.5 chars per token). - * Leaves generous buffer within OpenAI's 272K structured-output limit. */ -export const TRANSLATION_CHUNK_MAX_CHARS = 500_000; +/** + * Reliability-oriented cap for proactive markdown translation chunking. + * This keeps long-form docs away from the model's theoretical context ceiling, + * even when the model advertises a much larger maximum context window. + */ +export const TRANSLATION_CHUNK_MAX_CHARS = 120_000; +/** Smallest total-budget chunk size used when retrying incomplete translations. */ +export const TRANSLATION_MIN_CHUNK_MAX_CHARS = 8_000; +/** + * Maximum times to retry with smaller chunks after completeness checks fail. + * Each retry halves the chunk limit. Starting from 120 K chars: + * 120k → 60k → 30k → 15k → 8k (floor) + * Four halvings are needed to descend from the default cap to the 8k floor, + * so this must be at least 4. + */ +export const TRANSLATION_COMPLETENESS_MAX_RETRIES = 4; // URL handling export const INVALID_URL_PLACEHOLDER = diff --git a/scripts/notion-translate/index.test.ts b/scripts/notion-translate/index.test.ts index 4cdcad63..ef66d2ae 100644 --- a/scripts/notion-translate/index.test.ts +++ b/scripts/notion-translate/index.test.ts @@ -1,3 +1,4 @@ +import path from "path"; import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; import { createMockNotionPage, installTestNotionEnv } from "../test-utils"; @@ -685,8 +686,16 @@ describe("notion-translate index", () => { it("exits with failure on partial doc translation failures and reports counts", async () => { mockTranslateText.mockImplementation( async (_markdown: string, _title: string, targetLanguage: string) => { - if (targetLanguage === "es") { - throw new Error("es translation failed"); + if (targetLanguage === "pt-BR") { + throw Object.assign( + new Error( + "Translated markdown appears incomplete after chunk reassembly" + ), + { + code: "unexpected_error", + isCritical: false, + } + ); } return { markdown: "# translated", @@ -708,10 +717,37 @@ describe("notion-translate index", () => { totalEnglishPages: 1, processedLanguages: 2, failedTranslations: 1, + newTranslations: 1, + updatedTranslations: 0, + skippedTranslations: 0, codeJsonFailures: 0, themeFailures: 0, }); expect(loggedSummary.failures).toHaveLength(1); + expect(loggedSummary.failures[0]).toMatchObject({ + language: "pt-BR", + title: "Hello World", + pageId: "english-page-1", + error: "Translated markdown appears incomplete after chunk reassembly", + isCritical: false, + }); + + const failedDocPath = path.join( + "i18n", + "pt", + "docusaurus-plugin-content-docs", + "current", + "hello-world-englishpage1.md" + ); + expect( + mockNotionPagesCreate.mock.calls.length + + mockNotionPagesUpdate.mock.calls.length + ).toBe(1); + expect( + mockWriteFile.mock.calls.some( + ([filePath]) => String(filePath) === failedDocPath + ) + ).toBe(false); }); it("does not block translation for generic signed amazonaws links outside Notion image URL families", async () => { diff --git a/scripts/notion-translate/translateBlocks.test.ts b/scripts/notion-translate/translateBlocks.test.ts index 0dd0c957..322cb175 100644 --- a/scripts/notion-translate/translateBlocks.test.ts +++ b/scripts/notion-translate/translateBlocks.test.ts @@ -206,6 +206,51 @@ describe("translateNotionBlocksDirectly", () => { expect(callout.rich_text[0].text.content).toBe("static/images/block.png"); }); + it("keeps short rich-text paragraph translation intact", async () => { + mockBlocksChildrenList.mockResolvedValue( + blocksResponse([ + { + id: "b7", + type: "paragraph", + paragraph: { + rich_text: [ + { + type: "text", + text: { content: "Short paragraph content" }, + plain_text: "Short paragraph content", + }, + ], + }, + has_children: false, + }, + ]) + ); + + mockTranslateText.mockResolvedValue({ + markdown: "Parágrafo curto traduzido", + title: "", + }); + + const { translateNotionBlocksDirectly } = await import("./translateBlocks"); + const result = await translateNotionBlocksDirectly("page-id", "pt-BR"); + + const block = result[0] as Record; + expect(block.type).toBe("paragraph"); + const paragraph = block.paragraph as { + rich_text: Array<{ text: { content: string }; plain_text: string }>; + }; + expect(paragraph.rich_text[0].text.content).toBe( + "Parágrafo curto traduzido" + ); + expect(paragraph.rich_text[0].plain_text).toBe("Parágrafo curto traduzido"); + expect(mockTranslateText).toHaveBeenCalledTimes(1); + expect(mockTranslateText).toHaveBeenCalledWith( + "Short paragraph content", + "", + "pt-BR" + ); + }); + it("strips Notion-internal metadata fields from output blocks", async () => { mockBlocksChildrenList.mockResolvedValue( blocksResponse([ diff --git a/scripts/notion-translate/translateFrontMatter.test.ts b/scripts/notion-translate/translateFrontMatter.test.ts index f1351017..3b577570 100644 --- a/scripts/notion-translate/translateFrontMatter.test.ts +++ b/scripts/notion-translate/translateFrontMatter.test.ts @@ -5,6 +5,58 @@ import { } from "./test-openai-mock"; import { installTestNotionEnv } from "../test-utils"; +type MockOpenAIRequest = { + messages?: Array<{ role: string; content: string }>; +}; + +function extractPromptMarkdown(request: MockOpenAIRequest): { + title: string; + markdown: string; +} { + const userPrompt = + request.messages?.find((message) => message.role === "user")?.content ?? ""; + const titleMatch = userPrompt.match(/^title:\s*(.*)$/m); + const markdownMarker = "\nmarkdown: "; + const markdownIndex = userPrompt.indexOf(markdownMarker); + + return { + title: titleMatch?.[1] ?? "", + markdown: + markdownIndex >= 0 + ? userPrompt.slice(markdownIndex + markdownMarker.length) + : "", + }; +} + +function installStructuredTranslationMock( + mapResponse?: (payload: { title: string; markdown: string }) => { + title: string; + markdown: string; + } +) { + mockOpenAIChatCompletionCreate.mockImplementation( + async (request: MockOpenAIRequest) => { + const payload = extractPromptMarkdown(request); + const translated = mapResponse + ? mapResponse(payload) + : { + title: payload.title ? `Translated ${payload.title}` : "", + markdown: payload.markdown, + }; + + return { + choices: [ + { + message: { + content: JSON.stringify(translated), + }, + }, + ], + }; + } + ); +} + describe("notion-translate translateFrontMatter", () => { let restoreEnv: () => void; @@ -55,7 +107,390 @@ describe("notion-translate translateFrontMatter", () => { ); }); - it("classifies token overflow errors as non-critical token_overflow code", async () => { + it("chunks long-form content proactively below model-derived maximums", async () => { + const { translateText } = await import("./translateFrontMatter"); + installStructuredTranslationMock(); + + const largeContent = + "# Section One\n\n" + + "word ".repeat(14_000) + + "\n# Section Two\n\n" + + "word ".repeat(14_000); + + const result = await translateText(largeContent, "Large Page", "pt-BR"); + + expect(mockOpenAIChatCompletionCreate.mock.calls.length).toBeGreaterThan(1); + expect(result.markdown).toContain("# Section Two"); + }); + + it("retries with smaller chunks when a valid response omits a section", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "# Section One\n\n" + + "Alpha paragraph.\n\n" + + "# Section Two\n\n" + + "Beta paragraph.\n\n" + + "# Section Three\n\n" + + "Gamma paragraph.\n\n" + + "# Section Four\n\n" + + "Delta paragraph."; + + mockOpenAIChatCompletionCreate + .mockResolvedValueOnce({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "# Seção Um\n\nParágrafo alfa.\n\n# Seção Quatro\n\nParágrafo delta.", + title: "Título Traduzido", + }), + }, + }, + ], + }) + .mockResolvedValue({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "# Seção Um\n\nParágrafo alfa.\n\n# Seção Dois\n\nParágrafo beta.\n\n# Seção Três\n\nParágrafo gama.\n\n# Seção Quatro\n\nParágrafo delta.", + title: "Título Traduzido", + }), + }, + }, + ], + }); + + const result = await translateText(source, "Original Title", "pt-BR", { + chunkLimit: 8_500, + }); + + expect(mockOpenAIChatCompletionCreate).toHaveBeenCalledTimes(2); + expect(result.markdown).toContain("# Seção Dois"); + expect(result.title).toBe("Título Traduzido"); + }); + + it("fails when repeated completeness retries still return incomplete content", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "# Section One\n\n" + + "Alpha paragraph.\n\n" + + "# Section Two\n\n" + + "Beta paragraph.\n\n" + + "# Section Three\n\n" + + "Gamma paragraph.\n\n" + + "# Section Four\n\n" + + "Delta paragraph."; + + mockOpenAIChatCompletionCreate.mockImplementation(async () => ({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "# Seção Um\n\nParágrafo alfa.\n\n# Seção Quatro\n\nParágrafo delta.", + title: "Título Traduzido", + }), + }, + }, + ], + })); + + await expect( + translateText(source, "Original Title", "pt-BR", { + chunkLimit: 8_500, + }) + ).rejects.toEqual( + expect.objectContaining({ + code: "unexpected_error", + isCritical: false, + }) + ); + expect(mockOpenAIChatCompletionCreate.mock.calls.length).toBeGreaterThan(1); + }); + + it("does not count bullet lists inside YAML frontmatter towards structure validation", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "---\n" + + "title: Page\n" + + "keywords:\n" + + " - one\n" + + " - two\n" + + " - three\n" + + " - four\n" + + "---\n\n" + + "# Section One\n\n" + + "Body paragraph."; + + // The translated version turns the keywords list into an inline array + mockOpenAIChatCompletionCreate.mockResolvedValueOnce({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "---\n" + + "title: Page\n" + + "keywords: [one, two, three, four]\n" + + "---\n\n" + + "# Seção Um\n\n" + + "Parágrafo do corpo.", + title: "Página", + }), + }, + }, + ], + }); + + const result = await translateText(source, "Original Title", "pt-BR"); + + expect(mockOpenAIChatCompletionCreate).toHaveBeenCalledTimes(1); + expect(result.markdown).toContain("Seção Um"); + }); + + it("treats heavy structural shrinkage as incomplete long-form translation", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "# Long Section\n\n" + + Array.from( + { length: 160 }, + (_, index) => `Paragraph ${index} with repeated explanatory content.` + ).join("\n\n"); + + mockOpenAIChatCompletionCreate + .mockResolvedValueOnce({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: "# Seção Longa\n\nResumo curto.", + title: "Título Traduzido", + }), + }, + }, + ], + }) + .mockImplementation(async (request: MockOpenAIRequest) => { + const payload = extractPromptMarkdown(request); + return { + choices: [ + { + message: { + content: JSON.stringify({ + markdown: payload.markdown.replace(/Paragraph/g, "Parágrafo"), + title: "Título Traduzido", + }), + }, + }, + ], + }; + }); + + const result = await translateText(source, "Original Title", "pt-BR", { + chunkLimit: 25_000, + }); + + expect(mockOpenAIChatCompletionCreate).toHaveBeenCalledTimes(2); + expect(result.markdown.length).toBeGreaterThan(4_000); + }); + + it("does not count marker-like text inside fenced code blocks toward completeness checks", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "# Section One\n\n" + + "```md\n" + + "# not a real heading\n" + + "- fake bullet\n" + + "1. fake number\n" + + ":::note\n" + + "table | row\n" + + "```\n\n" + + "Plain paragraph."; + + mockOpenAIChatCompletionCreate.mockResolvedValueOnce({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "# Seção Um\n\n```md\n" + + "not a real heading\n" + + "fake bullet\n" + + "fake number\n" + + ":::note\n" + + "table | row\n" + + "```\n\n" + + "Parágrafo simples.", + title: "Título Traduzido", + }), + }, + }, + ], + }); + + const result = await translateText(source, "Original Title", "pt-BR"); + + expect(mockOpenAIChatCompletionCreate).toHaveBeenCalledTimes(1); + expect(result.markdown).toContain("Parágrafo simples."); + expect(result.markdown).toContain("not a real heading"); + }); + + it("retries when an indented fenced block is dropped during translation", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "# Section One\n\n" + + "- Item one\n\n" + + " ```js\n" + + " console.log('keep me');\n" + + " ```\n\n" + + "Plain paragraph."; + + mockOpenAIChatCompletionCreate + .mockResolvedValueOnce({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "# Seção Um\n\n" + "- Item um\n\n" + "Plain paragraph.", + title: "Título Traduzido", + }), + }, + }, + ], + }) + .mockResolvedValueOnce({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "# Seção Um\n\n" + + "- Item um\n\n" + + " ```js\n" + + " console.log('keep me');\n" + + " ```\n\n" + + "Parágrafo simples.", + title: "Título Traduzido", + }), + }, + }, + ], + }); + + const result = await translateText(source, "Original Title", "pt-BR"); + + expect(mockOpenAIChatCompletionCreate).toHaveBeenCalledTimes(2); + expect(result.markdown).toContain("console.log('keep me');"); + expect(result.markdown).toContain("Parágrafo simples."); + }); + + it("retries chunked translations when the reassembled markdown is structurally incomplete", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "# Section One\n\n" + + "- Item one A\n" + + "- Item one B\n\n" + + "Alpha ".repeat(500) + + "\n\n# Section Two\n\n" + + "- Item two A\n" + + "- Item two B\n\n" + + "Beta ".repeat(500); + + let callCount = 0; + mockOpenAIChatCompletionCreate.mockImplementation( + async (request: MockOpenAIRequest) => { + callCount++; + const payload = extractPromptMarkdown(request); + const translated = + callCount <= 2 + ? { + title: "Título Traduzido", + markdown: payload.markdown + .replace("# Section One", "# Seção Um") + .replace("# Section Two", "# Seção Dois") + .replace(/^- /gm, "") + .replace(/Alpha/g, "Alfa") + .replace(/Beta/g, "Beta") + .replace(/Gamma/g, "Gama"), + } + : { + title: "Título Traduzido", + markdown: payload.markdown + .replace("# Section One", "# Seção Um") + .replace("# Section Two", "# Seção Dois") + .replace(/Alpha/g, "Alfa") + .replace(/Beta/g, "Beta") + .replace(/Gamma/g, "Gama"), + }; + + return { + choices: [ + { + message: { + content: JSON.stringify(translated), + }, + }, + ], + }; + } + ); + + const result = await translateText(source, "Original Title", "pt-BR", { + chunkLimit: 8_500, + }); + + expect(callCount).toBeGreaterThan(2); + expect(result.markdown).toContain("Item one A"); + expect(result.markdown).toContain("Item two B"); + expect(result.markdown).toContain("# Seção Dois"); + }); + + it("preserves complete heading structures when chunking by sections", async () => { + const { translateText } = await import("./translateFrontMatter"); + installStructuredTranslationMock(({ title, markdown }) => ({ + title: title ? `Translated ${title}` : "", + markdown: markdown + .replace("# Section One", "# Seção Um") + .replace("# Section Two", "# Seção Dois") + .replace("# Section Three", "# Seção Três") + .replace(/Alpha/g, "Alfa") + .replace(/Gamma/g, "Gama"), + })); + + const source = + "# Section One\n\n" + + "Alpha ".repeat(60) + + "\n\n# Section Two\n\n" + + "Beta ".repeat(60) + + "\n\n# Section Three\n\n" + + "Gamma ".repeat(60); + + // chunkLimit is the *total* request budget (prompt overhead + markdown). + // Prompt overhead is ~2.6 K chars; a 3_200 limit leaves ~587 chars of + // markdown per chunk, which fits one 375-char section but not two — so + // the three sections produce exactly three API calls. + const result = await translateText(source, "Original Title", "pt-BR", { + chunkLimit: 3_200, + }); + + expect(mockOpenAIChatCompletionCreate).toHaveBeenCalledTimes(3); + expect(result.markdown).toContain("# Seção Um"); + expect(result.markdown).toContain("# Seção Dois"); + expect(result.markdown).toContain("# Seção Três"); + }); + + it("continues to classify token overflow errors as non-critical token_overflow code", async () => { const { translateText } = await import("./translateFrontMatter"); mockOpenAIChatCompletionCreate.mockRejectedValue({ @@ -89,8 +524,73 @@ describe("notion-translate translateFrontMatter", () => { ); }); + it("classifies finish_reason:length as non-critical token_overflow", async () => { + const { translateText } = await import("./translateFrontMatter"); + + mockOpenAIChatCompletionCreate.mockResolvedValue({ + choices: [ + { + finish_reason: "length", + message: { + content: '{"markdown":"partial content', + }, + }, + ], + }); + + await expect(translateText("# Body", "Title", "pt-BR")).rejects.toEqual( + expect.objectContaining({ + code: "token_overflow", + isCritical: false, + }) + ); + }); + + it("retries with smaller chunks when finish_reason:length is returned", async () => { + const { translateText } = await import("./translateFrontMatter"); + + mockOpenAIChatCompletionCreate + .mockResolvedValueOnce({ + choices: [ + { + finish_reason: "length", + message: { + content: '{"markdown":"partial content', + }, + }, + ], + }) + .mockImplementation(async (request: MockOpenAIRequest) => { + const payload = extractPromptMarkdown(request); + return { + choices: [ + { + finish_reason: "stop", + message: { + content: JSON.stringify({ + markdown: payload.markdown, + title: "Translated Title", + }), + }, + }, + ], + }; + }); + + const result = await translateText( + "# Small page\n\nJust a paragraph.", + "Small", + "pt-BR" + ); + + expect(mockOpenAIChatCompletionCreate.mock.calls.length).toBeGreaterThan(1); + expect(result.title).toBe("Translated Title"); + expect(result.markdown).toContain("Just a paragraph."); + }); + it("takes the single-call fast path for small content", async () => { const { translateText } = await import("./translateFrontMatter"); + installStructuredTranslationMock(); const result = await translateText( "# Small page\n\nJust a paragraph.", @@ -99,14 +599,15 @@ describe("notion-translate translateFrontMatter", () => { ); expect(mockOpenAIChatCompletionCreate).toHaveBeenCalledTimes(1); - expect(result.title).toBe("Mock Title"); - expect(result.markdown).toBe("# translated\n\nMock content"); + expect(result.title).toBe("Translated Small"); + expect(result.markdown).toBe("# Small page\n\nJust a paragraph."); }); it("chunks large content and calls the API once per chunk", async () => { const { translateText, splitMarkdownIntoChunks } = await import( "./translateFrontMatter" ); + installStructuredTranslationMock(); // Build content that is larger than the chunk threshold const bigSection1 = "# Section One\n\n" + "word ".repeat(100_000); @@ -123,8 +624,8 @@ describe("notion-translate translateFrontMatter", () => { expect( mockOpenAIChatCompletionCreate.mock.calls.length ).toBeGreaterThanOrEqual(2); - expect(result.title).toBe("Mock Title"); // taken from first chunk - expect(typeof result.markdown).toBe("string"); + expect(result.title).toBe("Translated Big Page"); + expect(result.markdown).toContain("# Section Two"); expect(result.markdown.length).toBeGreaterThan(0); }); @@ -137,17 +638,20 @@ describe("notion-translate translateFrontMatter", () => { message: "This model's maximum context length is 131072 tokens. However, you requested 211603 tokens (211603 in the messages, 0 in the completion).", }) - .mockResolvedValue({ - choices: [ - { - message: { - content: JSON.stringify({ - markdown: "translated chunk", - title: "Translated Title", - }), + .mockImplementation(async (request: MockOpenAIRequest) => { + const payload = extractPromptMarkdown(request); + return { + choices: [ + { + message: { + content: JSON.stringify({ + markdown: payload.markdown, + title: "Translated Title", + }), + }, }, - }, - ], + ], + }; }); const result = await translateText( @@ -158,7 +662,7 @@ describe("notion-translate translateFrontMatter", () => { expect(mockOpenAIChatCompletionCreate.mock.calls.length).toBeGreaterThan(1); expect(result.title).toBe("Translated Title"); - expect(result.markdown.length).toBeGreaterThan(0); + expect(result.markdown).toContain("Just a paragraph."); }); it("masks and restores data URL images during translation", async () => { @@ -230,6 +734,43 @@ describe("notion-translate translateFrontMatter", () => { expect(result.markdown).toContain(dataUrl); }); + it("retries when a canonical /images path is rewritten", async () => { + const { translateText } = await import("./translateFrontMatter"); + const canonicalImagePath = "/images/example.png"; + + mockOpenAIChatCompletionCreate + .mockResolvedValueOnce({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: "![image](/images/changed-path.png)\n\nTranslated", + title: "Translated Title", + }), + }, + }, + ], + }) + .mockResolvedValueOnce({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: `![image](${canonicalImagePath})\n\nTranslated`, + title: "Translated Title", + }), + }, + }, + ], + }); + + const source = `![image](${canonicalImagePath})\n\nBody text`; + const result = await translateText(source, "Title", "pt-BR"); + + expect(mockOpenAIChatCompletionCreate).toHaveBeenCalledTimes(2); + expect(result.markdown).toContain(canonicalImagePath); + }); + it("splitMarkdownIntoChunks does not split on headings inside fenced code blocks", async () => { const { splitMarkdownIntoChunks } = await import("./translateFrontMatter"); @@ -247,6 +788,27 @@ describe("notion-translate translateFrontMatter", () => { expect(fenceChunk).toContain("# not a heading"); }); + it("splitMarkdownIntoChunks does not split on headings inside indented fenced code blocks", async () => { + const { splitMarkdownIntoChunks } = await import("./translateFrontMatter"); + + const content = + "# Real Heading\n\n" + + "- Item one\n\n" + + " ```\n" + + " # not a heading\n" + + " ```\n\n" + + "# Another Heading\n\n" + + "text\n"; + + const chunks = splitMarkdownIntoChunks(content, 55); + + const joined = chunks.join(""); + expect(joined).toBe(content); + const fenceChunk = chunks.find((c) => c.includes(" ```")); + expect(fenceChunk).toBeDefined(); + expect(fenceChunk).toContain("# not a heading"); + }); + it("splitMarkdownIntoChunks reassembly is lossless", async () => { const { splitMarkdownIntoChunks } = await import("./translateFrontMatter"); @@ -284,4 +846,255 @@ describe("notion-translate translateFrontMatter", () => { } expect(chunks.join("")).toBe(longLine); }); + + // parseFrontmatterKeys unit tests + + it("parseFrontmatterKeys returns empty array when no frontmatter is present", async () => { + const { parseFrontmatterKeys } = await import("./translateFrontMatter"); + expect(parseFrontmatterKeys("# Heading\n\nBody.")).toEqual([]); + }); + + it("parseFrontmatterKeys extracts top-level keys from frontmatter", async () => { + const { parseFrontmatterKeys } = await import("./translateFrontMatter"); + const md = + "---\n" + + "title: My Page\n" + + "slug: /my-page\n" + + "sidebar_position: 2\n" + + "---\n\n" + + "# Body"; + expect(parseFrontmatterKeys(md)).toEqual([ + "title", + "slug", + "sidebar_position", + ]); + }); + + it("parseFrontmatterKeys ignores indented lines (nested values)", async () => { + const { parseFrontmatterKeys } = await import("./translateFrontMatter"); + const md = + "---\n" + + "title: My Page\n" + + "keywords:\n" + + " - one\n" + + " - two\n" + + "---\n\n" + + "# Body"; + expect(parseFrontmatterKeys(md)).toEqual(["title", "keywords"]); + }); + + it("parseFrontmatterKeys returns empty array when frontmatter closing marker is missing", async () => { + const { parseFrontmatterKeys } = await import("./translateFrontMatter"); + const md = "---\ntitle: My Page\n# Body"; + expect(parseFrontmatterKeys(md)).toEqual([]); + }); + + // Frontmatter integrity integration tests + + it("fails when a critical frontmatter field is dropped by translation", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "---\n" + + "title: My Page\n" + + "slug: /my-page\n" + + "sidebar_position: 2\n" + + "---\n\n" + + "# Body\n\nSome content."; + + // Translation drops slug from the frontmatter + mockOpenAIChatCompletionCreate.mockResolvedValue({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "---\n" + + "title: Minha Página\n" + + "sidebar_position: 2\n" + + "---\n\n" + + "# Corpo\n\nAlgum conteúdo.", + title: "Minha Página", + }), + }, + }, + ], + }); + + await expect( + translateText(source, "My Page", "pt-BR") + ).rejects.toMatchObject({ + code: "schema_invalid", + isCritical: false, + message: expect.stringContaining("slug"), + }); + }); + + it("fails when a non-critical frontmatter key is dropped by translation", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "---\n" + + "title: My Page\n" + + "description: A description\n" + + "---\n\n" + + "# Body\n\nSome content."; + + // Translation drops description + mockOpenAIChatCompletionCreate.mockResolvedValue({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "---\n" + + "title: Minha Página\n" + + "---\n\n" + + "# Corpo\n\nAlgum conteúdo.", + title: "Minha Página", + }), + }, + }, + ], + }); + + await expect( + translateText(source, "My Page", "pt-BR") + ).rejects.toMatchObject({ + code: "schema_invalid", + isCritical: false, + message: expect.stringContaining("description"), + }); + }); + + it("fails when translation adds an unexpected critical frontmatter field", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = "---\ntitle: My Page\n---\n\n# Body\n\nSome content."; + + // Translation invents a slug field + mockOpenAIChatCompletionCreate.mockResolvedValue({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "---\n" + + "title: Minha Página\n" + + "slug: /invented\n" + + "---\n\n" + + "# Corpo\n\nAlgum conteúdo.", + title: "Minha Página", + }), + }, + }, + ], + }); + + await expect( + translateText(source, "My Page", "pt-BR") + ).rejects.toMatchObject({ + code: "schema_invalid", + isCritical: false, + message: expect.stringContaining("slug"), + }); + }); + + it("retries and succeeds when frontmatter integrity fails on first attempt but passes on retry", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "---\n" + + "title: My Page\n" + + "slug: /my-page\n" + + "---\n\n" + + "# Body\n\nSome content."; + + // First call drops slug (integrity failure); second call preserves it. + mockOpenAIChatCompletionCreate + .mockResolvedValueOnce({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "---\n" + + "title: Minha Página\n" + + "---\n\n" + + "# Corpo\n\nAlgum conteúdo.", + title: "Minha Página", + }), + }, + }, + ], + }) + .mockResolvedValue({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "---\n" + + "title: Minha Página\n" + + "slug: /my-page\n" + + "---\n\n" + + "# Corpo\n\nAlgum conteúdo.", + title: "Minha Página", + }), + }, + }, + ], + }); + + const result = await translateText(source, "My Page", "pt-BR"); + expect(result.markdown).toContain("slug: /my-page"); + expect(result.markdown).toContain("title: Minha Página"); + }); + + it("passes when all frontmatter keys are preserved in translation", async () => { + const { translateText } = await import("./translateFrontMatter"); + + const source = + "---\n" + + "title: My Page\n" + + "slug: /my-page\n" + + "sidebar_position: 2\n" + + "---\n\n" + + "# Body\n\nSome content."; + + mockOpenAIChatCompletionCreate.mockResolvedValue({ + choices: [ + { + message: { + content: JSON.stringify({ + markdown: + "---\n" + + "title: Minha Página\n" + + "slug: /my-page\n" + + "sidebar_position: 2\n" + + "---\n\n" + + "# Corpo\n\nAlgum conteúdo.", + title: "Minha Página", + }), + }, + }, + ], + }); + + const result = await translateText(source, "My Page", "pt-BR"); + expect(result.markdown).toContain("slug: /my-page"); + expect(result.markdown).toContain("sidebar_position: 2"); + }); + + it("passes when markdown has no frontmatter and translation has none either", async () => { + const { translateText } = await import("./translateFrontMatter"); + installStructuredTranslationMock(); + + const result = await translateText( + "# No Frontmatter\n\nJust body.", + "Title", + "pt-BR" + ); + expect(result).toBeDefined(); + }); }); diff --git a/scripts/notion-translate/translateFrontMatter.ts b/scripts/notion-translate/translateFrontMatter.ts index 4f69d9a7..2638d4d9 100644 --- a/scripts/notion-translate/translateFrontMatter.ts +++ b/scripts/notion-translate/translateFrontMatter.ts @@ -12,6 +12,9 @@ import { OPENAI_BASE_URL, IS_CUSTOM_OPENAI_API, getMaxChunkChars, + TRANSLATION_CHUNK_MAX_CHARS, + TRANSLATION_MIN_CHUNK_MAX_CHARS, + TRANSLATION_COMPLETENESS_MAX_RETRIES, } from "../constants.js"; // Load environment variables @@ -28,7 +31,11 @@ const MAX_RETRIES = TRANSLATION_MAX_RETRIES; const RETRY_BASE_DELAY_MS = TRANSLATION_RETRY_BASE_DELAY_MS; const DATA_URL_PLACEHOLDER_REGEX = /\/images\/__data_url_placeholder_\d+__\.png/g; +const CANONICAL_IMAGE_PATH_REGEX = /\/images\/[^\s)"'<>]+/g; const MAX_PLACEHOLDER_INTEGRITY_RETRIES = 2; + +const isDataUrlPlaceholderPath = (path: string): boolean => + /\/images\/__data_url_placeholder_\d+__\.png/.test(path); // Translation prompt template const TRANSLATION_PROMPT = ` # Role: Translation Assistant @@ -290,15 +297,33 @@ function splitBySections(markdown: string): string[] { const lastIdx = lines.length - 1; let current = ""; let inFence = false; + let fenceChar = ""; + let fenceLen = 0; for (const [idx, line] of lines.entries()) { // Reconstruct original text: all lines except the last trailing empty get "\n" appended const lineWithNewline = idx < lastIdx ? line + "\n" : line.length > 0 ? line : ""; - // Toggle fence state on ``` or ~~~ lines - if (/^(`{3,}|~{3,})/.test(line)) { - inFence = !inFence; + // Track fence state per CommonMark spec: a fence of N backticks/tildes is closed + // only by a closing fence of >= N of the same character (and no info string on close). + const fenceMatch = line.match(/^[ \t]{0,3}(`{3,}|~{3,})/); + if (fenceMatch) { + const ch = fenceMatch[1][0]; + const len = fenceMatch[1].length; + if (!inFence) { + inFence = true; + fenceChar = ch; + fenceLen = len; + } else if (ch === fenceChar && len >= fenceLen) { + // Closing fence: same character, at least as long, no info string + const afterFence = line.trimStart().slice(len); + if (/^\s*$/.test(afterFence)) { + inFence = false; + fenceChar = ""; + fenceLen = 0; + } + } } // Start a new section before any ATX heading (outside fences) if (!inFence && /^#{1,6}\s/.test(line) && current.length > 0) { @@ -454,22 +479,312 @@ function extractDataUrlPlaceholders(text: string): string[] { return Array.from(new Set(matches)); } -function getMissingPlaceholders( +function extractCanonicalImagePaths(text: string): string[] { + const matches = text.match(CANONICAL_IMAGE_PATH_REGEX) ?? []; + + return Array.from( + new Set(matches.filter((match) => !isDataUrlPlaceholderPath(match))) + ); +} + +function getMissingProtectedPaths( text: string, - requiredPlaceholders: string[] + requiredPaths: string[] ): string[] { - return requiredPlaceholders.filter( - (placeholder) => !text.includes(placeholder) - ); + return requiredPaths.filter((requiredPath) => !text.includes(requiredPath)); } -function isPlaceholderIntegrityError( +function isProtectedPathIntegrityError( error: unknown ): error is TranslationError { return ( error instanceof TranslationError && error.code === "schema_invalid" && - /Data URL placeholder integrity check failed/.test(error.message) + /(Data URL placeholder|Canonical image path) integrity check failed/.test( + error.message + ) + ); +} + +const CRITICAL_FRONTMATTER_FIELDS = new Set([ + "slug", + "sidebar_position", + "sidebar_label", + "id", + "title", +]); + +/** + * Extracts the top-level YAML keys from a frontmatter block. + * Only recognises simple `key:` entries (no nested parsing) — enough to + * detect dropped or added keys without pulling in a YAML parser dependency. + * @internal exported for testing + */ +export function parseFrontmatterKeys(markdown: string): string[] { + if (!markdown.startsWith("---\n") && !markdown.startsWith("---\r\n")) { + return []; + } + const endFrontmatterIndex = markdown.indexOf("\n---", 3); + if (endFrontmatterIndex === -1) { + return []; + } + const frontmatterBody = markdown.slice(4, endFrontmatterIndex); + const keys: string[] = []; + for (const line of frontmatterBody.split("\n")) { + // Top-level keys: start at column 0, followed by optional spaces and ":" + const match = line.match(/^([A-Za-z_][\w-]*)[\s]*:/); + if (match) { + keys.push(match[1]); + } + } + return keys; +} + +/** + * Checks that the translated markdown preserves all frontmatter keys that + * were present in the source, and that no critical routing/sidebar fields + * have been added or removed. + * + * Throws a non-critical `TranslationError` when an integrity violation is + * detected so the caller can retry (same pattern as completeness checks). + */ +function assertFrontmatterIntegrity( + sourceMarkdown: string, + translatedMarkdown: string +): void { + const sourceKeys = parseFrontmatterKeys(sourceMarkdown); + if (sourceKeys.length === 0) { + // No frontmatter in source — nothing to validate. + return; + } + + const translatedKeys = new Set(parseFrontmatterKeys(translatedMarkdown)); + + const missingKeys = sourceKeys.filter((key) => !translatedKeys.has(key)); + if (missingKeys.length > 0) { + const criticalMissing = missingKeys.filter((key) => + CRITICAL_FRONTMATTER_FIELDS.has(key) + ); + const label = + criticalMissing.length > 0 + ? `critical frontmatter key(s) missing: ${criticalMissing.join(", ")}` + : `frontmatter key(s) missing: ${missingKeys.join(", ")}`; + throw new TranslationError( + `Frontmatter integrity check failed — ${label}`, + "schema_invalid", + false + ); + } + + // Also flag if the translation invented new critical keys not in the source + const sourceKeySet = new Set(sourceKeys); + const addedCriticalKeys = [...translatedKeys].filter( + (key) => CRITICAL_FRONTMATTER_FIELDS.has(key) && !sourceKeySet.has(key) + ); + if (addedCriticalKeys.length > 0) { + throw new TranslationError( + `Frontmatter integrity check failed — unexpected critical key(s) added: ${addedCriticalKeys.join(", ")}`, + "schema_invalid", + false + ); + } +} + +type MarkdownStructureMetrics = { + headingCount: number; + fencedCodeBlockCount: number; + admonitionCount: number; + bulletListCount: number; + numberedListCount: number; + tableCount: number; + contentLength: number; +}; + +/** + * Returns a copy of the markdown with the *content* of fenced code blocks + * removed (the opening/closing fence markers are kept so that fenced block + * counts remain accurate). This prevents structural markers inside code + * samples — headings, list items, table rows, etc. — from inflating counts. + */ +function stripFencedCodeContent(markdown: string): string { + const lines = markdown.split("\n"); + const result: string[] = []; + let inFence = false; + let fenceChar = ""; + let fenceLen = 0; + let fenceBuffer: string[] = []; + + for (const line of lines) { + if (!inFence) { + const match = line.match(/^[ \t]{0,3}(`{3,}|~{3,})/); + if (match) { + inFence = true; + fenceChar = match[1][0]; + fenceLen = match[1].length; + result.push(line); // keep opening marker + fenceBuffer = []; + } else { + result.push(line); + } + } else { + // Closing fence per CommonMark spec: same character, >= opening length, no info string + const closeMatch = line.match(/^[ \t]{0,3}(`{3,}|~{3,})/); + if ( + closeMatch && + closeMatch[1][0] === fenceChar && + closeMatch[1].length >= fenceLen && + /^\s*$/.test(line.trimStart().slice(closeMatch[1].length)) + ) { + inFence = false; + fenceChar = ""; + fenceLen = 0; + result.push(line); // keep closing marker + fenceBuffer = []; + } else { + fenceBuffer.push(line); + } + } + } + + // Failsafe: restore lines if the block was never closed + if (inFence && fenceBuffer.length > 0) { + result.push(...fenceBuffer); + } + + return result.join("\n"); +} + +function stripYamlFrontmatter(markdown: string): string { + if (markdown.startsWith("---\n") || markdown.startsWith("---\r\n")) { + const endFrontmatterIndex = markdown.indexOf("\n---", 3); + if (endFrontmatterIndex !== -1) { + const endOfLineIndex = markdown.indexOf("\n", endFrontmatterIndex + 1); + if (endOfLineIndex !== -1) { + return markdown.substring(endOfLineIndex + 1); + } + return ""; + } + } + return markdown; +} + +function collectMarkdownStructureMetrics( + markdown: string +): MarkdownStructureMetrics { + // Remove frontmatter before stripping fenced code content + const withoutFrontmatter = stripYamlFrontmatter(markdown); + + // Fenced code blocks must be counted on raw markdown (before stripping). + // Allow up to 3 leading spaces so the metric matches CommonMark fence rules. + const fencedCodeMatches = + withoutFrontmatter.match(/^[ \t]{0,3}(`{3,}|~{3,})/gm) ?? []; + + // All other structural markers are measured on the stripped version so that + // examples inside code blocks do not inflate the counts. + const stripped = stripFencedCodeContent(withoutFrontmatter); + + // ATX headings: "# Heading" + const atxHeadingMatches = stripped.match(/^#{1,6}\s.+$/gm) ?? []; + // Setext H1 headings ("===" underline): unambiguous — "=" has no other + // CommonMark meaning, so these can never be confused with thematic breaks. + const setextH1Matches = stripped.match(/^.+\n=+\s*$/gm) ?? []; + // Setext H2 headings ("---" underline): a thematic break uses the same + // syntax, but only when the preceding line is a block-level marker (list + // item, blockquote, ATX heading, etc.). A setext H2 content line is a + // plain paragraph — so we exclude lines starting with list/block markers. + const setextH2Matches = + stripped.match(/^(?![ \t]*(?:[-*+]|\d+\.)\s|[ \t]*[>#]).+\n-{2,}\s*$/gm) ?? + []; + // Docusaurus / MDX admonition markers (:::type … :::) + const admonitionMatches = stripped.match(/^:::/gm) ?? []; + const bulletListMatches = stripped.match(/^\s*[-*+]\s+/gm) ?? []; + const numberedListMatches = stripped.match(/^\s*\d+\.\s+/gm) ?? []; + // GFM table separator rows (---|---|---) are the unambiguous indicator of a + // table and work regardless of whether the model uses outer pipes or not. + // A separator line contains only "-", ":", "|", space, and tab characters, + // and must include both a "|" (distinguishes from thematic break) and a "-". + const tableMatches = ( + (stripped.match(/^[ \t:|-]+\s*$/gm) ?? []) as string[] + ).filter((line) => line.includes("|") && line.includes("-")); + + return { + headingCount: + atxHeadingMatches.length + + setextH1Matches.length + + setextH2Matches.length, + fencedCodeBlockCount: Math.floor(fencedCodeMatches.length / 2), + admonitionCount: Math.floor(admonitionMatches.length / 2), + bulletListCount: bulletListMatches.length, + numberedListCount: numberedListMatches.length, + tableCount: tableMatches.length, + contentLength: withoutFrontmatter.trim().length, + }; +} + +function isSuspiciouslyIncompleteTranslation( + sourceMarkdown: string, + translatedMarkdown: string +): boolean { + const sourceMetrics = collectMarkdownStructureMetrics(sourceMarkdown); + const translatedMetrics = collectMarkdownStructureMetrics(translatedMarkdown); + + if (sourceMetrics.contentLength === 0) { + return false; + } + + const lengthRatio = + translatedMetrics.contentLength / Math.max(sourceMetrics.contentLength, 1); + const headingLoss = + sourceMetrics.headingCount > 0 && + translatedMetrics.headingCount < sourceMetrics.headingCount; + const fencedBlockLoss = + sourceMetrics.fencedCodeBlockCount > 0 && + translatedMetrics.fencedCodeBlockCount < sourceMetrics.fencedCodeBlockCount; + const admonitionLoss = + sourceMetrics.admonitionCount > 0 && + translatedMetrics.admonitionCount < sourceMetrics.admonitionCount; + const bulletListLoss = + sourceMetrics.bulletListCount >= 3 && + translatedMetrics.bulletListCount === 0; + const numberedListLoss = + sourceMetrics.numberedListCount >= 3 && + translatedMetrics.numberedListCount === 0; + const tableLoss = + sourceMetrics.tableCount >= 1 && translatedMetrics.tableCount === 0; + const severeLengthShrinkage = + sourceMetrics.contentLength >= 4_000 && lengthRatio < 0.55; + + return ( + headingLoss || + fencedBlockLoss || + admonitionLoss || + bulletListLoss || + numberedListLoss || + tableLoss || + severeLengthShrinkage + ); +} + +function getProactiveChunkCharLimit(modelName: string): number { + return Math.min(getMaxChunkChars(modelName), TRANSLATION_CHUNK_MAX_CHARS); +} + +function getChunkContentBudget(totalChunkLimit: number, title: string): number { + // Subtract prompt overhead so the *total* request stays within totalChunkLimit. + // The minimum content budget is 1; the retry-level floor (TRANSLATION_MIN_CHUNK_MAX_CHARS) + // is enforced as a total-request budget by the caller, not as a markdown payload floor. + const overhead = TRANSLATION_PROMPT.length + title.length + 20; + return Math.max(totalChunkLimit - overhead, 1); +} + +function splitMarkdownForTranslation( + markdown: string, + title: string, + totalChunkLimit: number +): string[] { + return splitMarkdownIntoChunks( + markdown, + getChunkContentBudget(totalChunkLimit, title) ); } @@ -522,12 +837,12 @@ async function translateTextSingleCall( text: string, title: string, targetLanguage: string, - requiredPlaceholders: string[] = [], + requiredProtectedPaths: string[] = [], strictPlaceholderGuard = false ): Promise<{ markdown: string; title: string }> { const placeholderGuard = - requiredPlaceholders.length > 0 - ? `\n\n${strictPlaceholderGuard ? "CRITICAL REQUIREMENT" : "Placeholder paths to preserve exactly"}:\n${requiredPlaceholders.map((placeholder) => `- ${placeholder}`).join("\n")}\n` + requiredProtectedPaths.length > 0 + ? `\n\n${strictPlaceholderGuard ? "CRITICAL REQUIREMENT" : "Image paths to preserve exactly"}:\n${requiredProtectedPaths.map((requiredPath) => `- ${requiredPath}`).join("\n")}\n` : ""; const textWithTitle = `title: ${title}\n${placeholderGuard}\nmarkdown: ${text}`; @@ -570,7 +885,17 @@ async function translateTextSingleCall( ...modelParams, }); - const content = response.choices[0]?.message?.content; + const choice = response.choices[0]; + const finishReason = choice?.finish_reason; + if (finishReason === "length") { + throw new TranslationError( + "OpenAI output was truncated (finish_reason: length) — chunk too large for model output budget", + "token_overflow", + false + ); + } + + const content = choice?.message?.content; if (!content) { throw new TranslationError( "OpenAI returned an empty translation response", @@ -581,14 +906,29 @@ async function translateTextSingleCall( const parsed = parseTranslationPayload(content); - if (requiredPlaceholders.length > 0) { - const missingPlaceholders = getMissingPlaceholders( + if (requiredProtectedPaths.length > 0) { + const missingProtectedPaths = getMissingProtectedPaths( parsed.markdown, - requiredPlaceholders + requiredProtectedPaths ); - if (missingPlaceholders.length > 0) { + if (missingProtectedPaths.length > 0) { + const missingPlaceholderPaths = missingProtectedPaths.filter( + isDataUrlPlaceholderPath + ); + const missingCanonicalImagePaths = missingProtectedPaths.filter( + (path) => !isDataUrlPlaceholderPath(path) + ); + + if (missingPlaceholderPaths.length > 0) { + throw new TranslationError( + `Data URL placeholder integrity check failed: missing ${missingPlaceholderPaths.length} placeholder(s): ${missingPlaceholderPaths.slice(0, 3).join(", ")}`, + "schema_invalid", + true + ); + } + throw new TranslationError( - `Data URL placeholder integrity check failed: missing ${missingPlaceholders.length} placeholder(s): ${missingPlaceholders.slice(0, 3).join(", ")}`, + `Canonical image path integrity check failed: missing ${missingCanonicalImagePaths.length} path(s): ${missingCanonicalImagePaths.slice(0, 3).join(", ")}`, "schema_invalid", true ); @@ -622,21 +962,27 @@ async function translateChunkWithOverflowFallback( text: string, title: string, targetLanguage: string, - placeholderGuardAttempt = 0 + placeholderGuardAttempt = 0, + chunkBudgetForRetry = getProactiveChunkCharLimit(model) ): Promise<{ markdown: string; title: string }> { - const requiredPlaceholders = extractDataUrlPlaceholders(text); + const requiredProtectedPaths = Array.from( + new Set([ + ...extractDataUrlPlaceholders(text), + ...extractCanonicalImagePaths(text), + ]) + ); try { return await translateTextSingleCall( text, title, targetLanguage, - requiredPlaceholders, + requiredProtectedPaths, placeholderGuardAttempt > 0 ); } catch (err) { if ( - isPlaceholderIntegrityError(err) && + isProtectedPathIntegrityError(err) && placeholderGuardAttempt < MAX_PLACEHOLDER_INTEGRITY_RETRIES ) { return translateChunkWithOverflowFallback( @@ -655,8 +1001,11 @@ async function translateChunkWithOverflowFallback( throw err; } - const splitTarget = Math.max(Math.floor(text.length / 2), 1); - let subChunks = splitMarkdownIntoChunks(text, splitTarget); + const splitTarget = Math.max( + Math.floor(Math.min(text.length, chunkBudgetForRetry) / 2), + TRANSLATION_MIN_CHUNK_MAX_CHARS + ); + let subChunks = splitMarkdownForTranslation(text, title, splitTarget); if (subChunks.length <= 1) { const midpoint = Math.floor(text.length / 2); if (midpoint < 1 || midpoint >= text.length) { @@ -688,6 +1037,11 @@ async function translateChunkWithOverflowFallback( } } +type TranslateTextOptions = { + chunkLimit?: number; + completenessRetryDepth?: number; +}; + /** * Translates text using OpenAI * @param text Text to translate @@ -698,7 +1052,8 @@ async function translateChunkWithOverflowFallback( export async function translateText( text: string, title: string, - targetLanguage: string + targetLanguage: string, + options: TranslateTextOptions = {} ): Promise<{ markdown: string; title: string }> { const safeText = typeof text === "string" && text.length > 0 @@ -706,59 +1061,122 @@ export async function translateText( : "# Empty Content\n\nThis page has no content to translate."; const { maskedText, placeholders } = maskDataUrlImages(safeText); - // Get model-specific chunk size - const maxChunkChars = getMaxChunkChars(model); + const effectiveChunkLimit = + options.chunkLimit ?? getProactiveChunkCharLimit(model); + const completenessRetryDepth = options.completenessRetryDepth ?? 0; + + const translateAndValidate = async ( + sourceMarkdown: string, + translatedChunk: Promise<{ markdown: string; title: string }> + ) => { + const translated = await translatedChunk; + if ( + isSuspiciouslyIncompleteTranslation(sourceMarkdown, translated.markdown) + ) { + throw new TranslationError( + "Translated markdown appears incomplete compared to source structure", + "unexpected_error", + false + ); + } + assertFrontmatterIntegrity(sourceMarkdown, translated.markdown); + return translated; + }; // Include system prompt overhead (~1800 chars) + title prefix + "markdown: " prefix const estimatedTotalChars = TRANSLATION_PROMPT.length + title.length + 20 + maskedText.length; - if (estimatedTotalChars <= maxChunkChars) { - // Fast path: content fits in a single call - const translated = await translateChunkWithOverflowFallback( + try { + if (estimatedTotalChars <= effectiveChunkLimit) { + // Fast path: content fits in a single call + const translated = await translateAndValidate( + maskedText, + translateChunkWithOverflowFallback( + maskedText, + title, + targetLanguage, + 0, + effectiveChunkLimit + ) + ); + return { + markdown: restoreDataUrlPlaceholders(translated.markdown, placeholders), + title: restoreDataUrlPlaceholders(translated.title, placeholders), + }; + } + + // Slow path: content too large — split into chunks + const chunks = splitMarkdownForTranslation( maskedText, title, - targetLanguage + effectiveChunkLimit ); - return { - markdown: restoreDataUrlPlaceholders(translated.markdown, placeholders), - title: restoreDataUrlPlaceholders(translated.title, placeholders), - }; - } - // Slow path: content too large — split into chunks - const contentBudget = - maxChunkChars - TRANSLATION_PROMPT.length - title.length - 20; - const chunks = splitMarkdownIntoChunks( - maskedText, - Math.max(contentBudget, 50_000) - ); + let translatedTitle = title; + const translatedChunks: string[] = []; - let translatedTitle = title; - const translatedChunks: string[] = []; + for (const [i, chunk] of chunks.entries()) { + const chunkTitle = i === 0 ? title : ""; + const result = await translateAndValidate( + chunk, + translateChunkWithOverflowFallback( + chunk, + chunkTitle, + targetLanguage, + 0, + effectiveChunkLimit + ) + ); - for (const [i, chunk] of chunks.entries()) { - const chunkTitle = i === 0 ? title : ""; - const result = await translateChunkWithOverflowFallback( - chunk, - chunkTitle, - targetLanguage - ); + if (i === 0) { + translatedTitle = result.title; + } + translatedChunks.push(result.markdown); + } - if (i === 0) { - translatedTitle = result.title; + const joinedMarkdown = translatedChunks.join(""); + if (isSuspiciouslyIncompleteTranslation(maskedText, joinedMarkdown)) { + throw new TranslationError( + "Translated markdown appears incomplete after chunk reassembly", + "unexpected_error", + false + ); } - translatedChunks.push(result.markdown); - } - // Sections already end with "\n"; join with "" to avoid extra blank lines - return { - markdown: restoreDataUrlPlaceholders( - translatedChunks.join(""), - placeholders - ), - title: restoreDataUrlPlaceholders(translatedTitle, placeholders), - }; + // Sections already end with "\n"; join with "" to avoid extra blank lines + return { + markdown: restoreDataUrlPlaceholders(joinedMarkdown, placeholders), + title: restoreDataUrlPlaceholders(translatedTitle, placeholders), + }; + } catch (error) { + const isRecoverableCompletenessFailure = + error instanceof TranslationError && + error.isCritical === false && + ((error.code === "unexpected_error" && + /incomplete/.test(error.message)) || + (error.code === "schema_invalid" && + /Frontmatter integrity check failed/.test(error.message))); + + if ( + isRecoverableCompletenessFailure && + completenessRetryDepth < TRANSLATION_COMPLETENESS_MAX_RETRIES + ) { + const nextChunkLimit = Math.max( + Math.floor(effectiveChunkLimit / 2), + TRANSLATION_MIN_CHUNK_MAX_CHARS + ); + + if (nextChunkLimit < effectiveChunkLimit) { + return translateText(text, title, targetLanguage, { + chunkLimit: nextChunkLimit, + completenessRetryDepth: completenessRetryDepth + 1, + }); + } + } + + throw error; + } } /**