digidem · luandro · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 19, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,17 @@
+# Changelog - PR 170
+
+## ✨ New Features
+
+- **Slug Normalization**: Accented slugs are now normalized and locale-prefixed link references are supported.
+
+## 🐛 Fixes
+
+- **Doc Paths**: Flattened nested document paths.
+- **Link Normalization**: Links inside code blocks and indented code fences are now properly skipped during link normalization.
+- **Heading IDs**: Explicit heading IDs and empty filenames are handled correctly to prevent heading ID collisions.
+- **Slug Generation**: Preserved CJK and Unicode letters in slug generation.
+- **Code Fences**: Aligned code-fence regex with CommonMark standard.
+
+## 🧪 Testing
+
+- **Normalization**: Aligned tests with new normalization expectations.
diff --git a/bun-tests/vitest-bridge.test.ts b/bun-tests/vitest-bridge.test.ts
@@ -24,5 +24,7 @@ test(
   () => {
     runVitest();
   },
-  { timeout: 120_000 }
+  // The full Vitest suite can take just under two minutes on this repo, and
+  // Bun's own test harness adds enough overhead that 120s is too tight.
+  { timeout: 300_000 }
 );
diff --git a/eslint.config.mjs b/eslint.config.mjs
@@ -32,7 +32,7 @@ const eslintConfig = [
   // Docusaurus specific configurations
   {
     files: ["**/*.{js,mjs,cjs,ts,jsx,tsx}"],
-    ignores: ["scripts/**", "api-server/**"], // Ignore scripts and api-server directories for docusaurus rules
+    ignores: ["scripts/**", "api-server/**", "bun-tests/**"], // Ignore non-Docusaurus runtime directories for docusaurus/react rules
     plugins: {
       "@docusaurus": docusaurusPlugin,
       react: pluginReact,
@@ -74,7 +74,11 @@ const eslintConfig = [
 
   // Scripts and API server specific configurations
   {
-    files: ["scripts/**/*.{js,mjs,cjs,ts}", "api-server/**/*.{js,mjs,cjs,ts}"],
+    files: [
+      "scripts/**/*.{js,mjs,cjs,ts}",
+      "api-server/**/*.{js,mjs,cjs,ts}",
+      "bun-tests/**/*.{js,mjs,cjs,ts}",
+    ],
     plugins: {
       import: importPlugin,
       promise: promisePlugin,

diff --git a/scripts/notion-fetch/__tests__/retry-loop-behavior.test.ts b/scripts/notion-fetch/__tests__/retry-loop-behavior.test.ts
@@ -93,6 +93,7 @@ vi.mock("../imageProcessor", () => ({
 
 vi.mock("../utils", () => ({
   sanitizeMarkdownContent: vi.fn((content) => content),
+  injectExplicitHeadingIds: vi.fn((content) => content),
   compressImageToFileWithFallback: vi.fn().mockResolvedValue({
     finalSize: 512,
     usedFallback: false,

diff --git a/scripts/notion-fetch/contentSanitizer.test.ts b/scripts/notion-fetch/contentSanitizer.test.ts
@@ -50,6 +50,18 @@ describe("contentSanitizer", () => {
       expect(result).toBe(input); // Should remain unchanged
     });
 
+    it("should preserve tilde fenced code blocks", () => {
+      const input = "~~~md\nconst obj = { key: 'value' };\n~~~";
+      const result = scriptModule.sanitizeMarkdownContent(input);
+      expect(result).toBe(input);
+    });
+
+    it("should preserve multi-backtick inline code spans", () => {
+      const input = "Use ``<link to section.>`` and ``{foo}``.";
+      const result = scriptModule.sanitizeMarkdownContent(input);
+      expect(result).toBe(input);
+    });
+
     it("should fix malformed <link to section.> patterns", () => {
       const input = "Check <link to section.> for details.";
       const result = scriptModule.sanitizeMarkdownContent(input);
@@ -226,4 +238,80 @@ echo "# Not a heading"
       });
     });
   });
+
+  describe("injectExplicitHeadingIds", () => {
+    it("should normalize accented headings and append stable duplicate suffixes", () => {
+      const input = [
+        "# Título Único",
+        "## Título Único",
+        "### Niño & Acción",
+      ].join("\n");
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("# Título Único {#titulo-unico}");
+      expect(result).toContain("## Título Único {#titulo-unico-1}");
+      expect(result).toContain("### Niño & Acción {#nino-accion}");
+    });
+
+    it("should preserve existing explicit heading ids and code fences", () => {
+      const input = [
+        "# Encabezado {#custom-id}",
+        "```md",
+        "## Código Único",
+        "```",
+        "## Otro Título",
+      ].join("\n");
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("# Encabezado {#custom-id}");
+      expect(result).toContain("```md\n## Código Único\n```");
+      expect(result).toContain("## Otro Título {#otro-titulo}");
+      expect(result).not.toContain("## Código Único {#codigo-unico}");
+    });
+
+    it("should preserve headings inside tilde fenced code blocks", () => {
+      const input = ["~~~md", "## Código Único", "~~~", "## Otro Título"].join(
+        "\n"
+      );
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("~~~md\n## Código Único\n~~~");
+      expect(result).toContain("## Otro Título {#otro-titulo}");
+      expect(result).not.toContain("## Código Único {#codigo-unico}");
+    });
+
+    it("should avoid collisions between auto-incremented and explicit IDs", () => {
+      const input = ["## Título", "## Heading {#titulo-1}", "## Título"].join(
+        "\n"
+      );
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("## Título {#titulo}");
+      expect(result).toContain("## Heading {#titulo-1}");
+      // The second "Título" must NOT get titulo-1 (already claimed), should get titulo-2
+      expect(result).toContain("## Título {#titulo-2}");
+    });
+
+    it("should reserve later explicit ids before assigning earlier auto-generated headings", () => {
+      const input = ["## My Id", "## Custom {#my-id}"].join("\n");
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("## My Id {#my-id-1}");
+      expect(result).toContain("## Custom {#my-id}");
+    });
+
+    it("should not reserve a natural slug when a later explicit id is custom", () => {
+      const input = ["## My Id", "## My Id {#custom}"].join("\n");
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("## My Id {#my-id}");
+      expect(result).toContain("## My Id {#custom}");
+    });
+  });
 });
diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
@@ -3,6 +3,13 @@
  * that cause MDX compilation errors in Docusaurus.
  */
 
+import { createSafeSlug } from "./slugUtils";
+import {
+  maskFencedCodeBlocks,
+  maskInlineCodeSpans,
+  restoreCodeMasks,
+} from "./markdownUtils";
+
 const EMOJI_STYLE_MARKERS = ["display:", "height:", "margin:"];
 
 const isEmojiStyleObject = (snippet: string): boolean =>
@@ -68,6 +75,83 @@ function fixHeadingHierarchy(
   return fixedLines.join("\n");
 }
 
+export function injectExplicitHeadingIds(content: string): string {
+  if (!content) {
+    return content;
+  }
+
+  const {
+    content: maskedContent,
+    codeBlocks,
+    placeholders,
+  } = maskFencedCodeBlocks(content);
+  const reservedIds = new Set<string>();
+  const headingCounts = new Map<string, number>();
+
+  const lines = maskedContent.split("\n");
+  for (const line of lines) {
+    if (placeholders.some((placeholder) => line.includes(placeholder))) {
+      continue;
+    }
+
+    const fullMatch = line.match(
+      /^(\s{0,3})(#{1,6})\s+(.+?)\s*\{#([^}]+)\}\s*$/
+    );
+    if (fullMatch) {
+      const [, , , , explicitId] = fullMatch;
+      if (explicitId) {
+        reservedIds.add(explicitId);
+      }
+      continue;
+    }
+
+    const explicitIdMatch = line.match(/\s\{#([^}]+)\}\s*$/);
+    if (explicitIdMatch) {
+      const explicitId = explicitIdMatch[1];
+      reservedIds.add(explicitId);
+    }
+  }
+
+  const updatedLines = lines.map((line) => {
+    if (placeholders.some((placeholder) => line.includes(placeholder))) {
+      return line;
+    }
+
+    const explicitHeadingMatch = line.match(
+      /^(\s{0,3})(#{1,6})\s+(.+?)\s*\{#([^}]+)\}\s*$/
+    );
+    if (explicitHeadingMatch) {
+      return line;
+    }
+
+    const headingMatch = line.match(/^(\s{0,3})(#{1,6})\s+(.+?)\s*$/);
+    if (!headingMatch) {
+      return line;
+    }
+
+    const [, leadingWhitespace, hashes, headingText] = headingMatch;
+    const baseId = createSafeSlug(headingText);
+    if (!baseId) {
+      return line;
+    }
+
+    let counter = headingCounts.get(baseId) ?? 0;
+    let headingId = counter === 0 ? baseId : `${baseId}-${counter}`;
+    while (reservedIds.has(headingId) || headingCounts.has(headingId)) {
+      counter++;
+      headingId = `${baseId}-${counter}`;
+    }
+    headingCounts.set(baseId, counter + 1);
+    if (headingId !== baseId) {
+      headingCounts.set(headingId, 1);
+    }
+
+    return `${leadingWhitespace}${hashes} ${headingText} {#${headingId}}`;
+  });
+
+  return restoreCodeMasks(updatedLines.join("\n"), codeBlocks, []);
+}
+
 /**
  * Sanitizes markdown content to fix malformed HTML/JSX tags that cause MDX compilation errors
  * @param content - The markdown content string
@@ -77,23 +161,17 @@ export function sanitizeMarkdownContent(content: string): string {
   // Fix specific malformed patterns that cause MDX errors
 
   // 0. Mask code fences (```...```) and inline code (`...`) to avoid altering them
-  const codeBlocks: string[] = [];
-  const codeSpans: string[] = [];
-  const codeBlockPlaceholders: string[] = [];
-
-  content = content.replace(/```[\s\S]*?```/g, (m) => {
-    codeBlocks.push(m);
-    const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
-    codeBlockPlaceholders.push(placeholder);
-    return placeholder;
-  });
-  content = content.replace(/`[^`\n]*`/g, (m) => {
-    codeSpans.push(m);
-    return `__CODESPAN_${codeSpans.length - 1}__`;
-  });
+  const {
+    content: maskedContent,
+    codeBlocks,
+    placeholders,
+  } = maskFencedCodeBlocks(content);
+  const { content: maskedWithCodeSpans, codeSpans } =
+    maskInlineCodeSpans(maskedContent);
+  content = maskedWithCodeSpans;
 
   // 1. Fix heading hierarchy for proper TOC generation (after masking code blocks)
-  content = fixHeadingHierarchy(content, codeBlockPlaceholders);
+  content = fixHeadingHierarchy(content, placeholders);
 
   // 2. Aggressively strip all curly-brace expressions by unwrapping to inner text
   // BUT preserve JSX style objects for emoji images
@@ -157,14 +235,7 @@ export function sanitizeMarkdownContent(content: string): string {
   }
 
   // 9. Restore masked code blocks and inline code
-  content = content.replace(
-    /__CODEBLOCK_(\d+)__/g,
-    (_m, i) => codeBlocks[Number(i)]
-  );
-  content = content.replace(
-    /__CODESPAN_(\d+)__/g,
-    (_m, i) => codeSpans[Number(i)]
-  );
+  content = restoreCodeMasks(content, codeBlocks, codeSpans);
 
   return content;
 }