From d6df31b944347c13a3c5050124eca1edf919e1a3 Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 19 Mar 2026 17:38:23 -0300
Subject: [PATCH 01/15] feat(slugs): normalize accented slugs and add
 locale-prefixed link resolution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #164

- Add createSafeSlug() using NFD decomposition to strip diacritics
  (á→a, é→e, ñ→n, ã→a, ç→c, etc.) from filenames and anchor IDs
- Add normalizeInternalDocLinks() to rewrite /docs/ links with the
  correct locale prefix (/es/docs/..., /pt/docs/...) and slugify
  path segments and fragments
- Add injectExplicitHeadingIds() to append stable {#id} anchors to
  headings, deduplicated with -1/-2 suffixes, skipping code fences
- Replace three inline slugify implementations with createSafeSlug
- Fix code fence regex to be line-anchored (prevented heading ID
  injection inside fenced blocks)
- Wrap decodeURIComponent with safeDecode to avoid URIError on
  percent signs in page titles (e.g. "100% complete")
- Add unit tests for slugUtils (12 cases) and linkNormalizer (10 cases)
---
 scripts/notion-fetch/contentSanitizer.test.ts |  33 ++++
 scripts/notion-fetch/contentSanitizer.ts      |  78 ++++++++-
 scripts/notion-fetch/generateBlocks.test.ts   | 152 ++++++++++++++++++
 scripts/notion-fetch/generateBlocks.ts        |  18 ++-
 scripts/notion-fetch/linkNormalizer.test.ts   |  77 +++++++++
 scripts/notion-fetch/linkNormalizer.ts        |  66 ++++++++
 scripts/notion-fetch/slugUtils.test.ts        |  54 +++++++
 scripts/notion-fetch/slugUtils.ts             |  11 ++
 scripts/notion-fetch/utils.ts                 |   5 +-
 scripts/notion-fetch/verifyExportCoverage.ts  |  10 +-
 scripts/notion-translate/index.ts             |   7 +-
 11 files changed, 489 insertions(+), 22 deletions(-)
 create mode 100644 scripts/notion-fetch/linkNormalizer.test.ts
 create mode 100644 scripts/notion-fetch/linkNormalizer.ts
 create mode 100644 scripts/notion-fetch/slugUtils.test.ts
 create mode 100644 scripts/notion-fetch/slugUtils.ts

diff --git a/scripts/notion-fetch/contentSanitizer.test.ts b/scripts/notion-fetch/contentSanitizer.test.ts
index 5354c120..f1d3315c 100644
--- a/scripts/notion-fetch/contentSanitizer.test.ts
+++ b/scripts/notion-fetch/contentSanitizer.test.ts
@@ -226,4 +226,37 @@ echo "# Not a heading"
       });
     });
   });
+
+  describe("injectExplicitHeadingIds", () => {
+    it("should normalize accented headings and append stable duplicate suffixes", () => {
+      const input = [
+        "# Título Único",
+        "## Título Único",
+        "### Niño & Acción",
+      ].join("\n");
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("# Título Único {#titulo-unico}");
+      expect(result).toContain("## Título Único {#titulo-unico-1}");
+      expect(result).toContain("### Niño & Acción {#nino-accion}");
+    });
+
+    it("should preserve existing explicit heading ids and code fences", () => {
+      const input = [
+        "# Encabezado {#custom-id}",
+        "```md",
+        "## Código Único",
+        "```",
+        "## Otro Título",
+      ].join("\n");
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("# Encabezado {#custom-id}");
+      expect(result).toContain("```md\n## Código Único\n```");
+      expect(result).toContain("## Otro Título {#otro-titulo}");
+      expect(result).not.toContain("## Código Único {#codigo-unico}");
+    });
+  });
 });
diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
index f652a60c..070fb1ff 100644
--- a/scripts/notion-fetch/contentSanitizer.ts
+++ b/scripts/notion-fetch/contentSanitizer.ts
@@ -3,6 +3,8 @@
  * that cause MDX compilation errors in Docusaurus.
  */
 
+import { createSafeSlug } from "./slugUtils";
+
 const EMOJI_STYLE_MARKERS = ["display:", "height:", "margin:"];
 
 const isEmojiStyleObject = (snippet: string): boolean =>
@@ -68,6 +70,80 @@ function fixHeadingHierarchy(
   return fixedLines.join("\n");
 }
 
+function maskCodeFences(content: string): {
+  content: string;
+  codeBlocks: string[];
+  codeBlockPlaceholders: string[];
+} {
+  const codeBlocks: string[] = [];
+  const codeBlockPlaceholders: string[] = [];
+
+  const maskedContent = content.replace(
+    /^```[^\n]*\n[\s\S]*?^```/gm,
+    (match) => {
+      codeBlocks.push(match);
+      const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
+      codeBlockPlaceholders.push(placeholder);
+      return placeholder;
+    }
+  );
+
+  return {
+    content: maskedContent,
+    codeBlocks,
+    codeBlockPlaceholders,
+  };
+}
+
+function restoreCodeFences(content: string, codeBlocks: string[]): string {
+  return content.replace(
+    /__CODEBLOCK_(\d+)__/g,
+    (_match, index) => codeBlocks[Number(index)]
+  );
+}
+
+export function injectExplicitHeadingIds(content: string): string {
+  if (!content) {
+    return content;
+  }
+
+  const {
+    content: maskedContent,
+    codeBlocks,
+    codeBlockPlaceholders,
+  } = maskCodeFences(content);
+  const headingCounts = new Map<string, number>();
+
+  const lines = maskedContent.split("\n");
+  const updatedLines = lines.map((line) => {
+    if (
+      codeBlockPlaceholders.some((placeholder) => line.includes(placeholder)) ||
+      /\s\{#[^}]+\}\s*$/.test(line)
+    ) {
+      return line;
+    }
+
+    const headingMatch = line.match(/^(\s{0,3})(#{1,6})\s+(.+?)\s*$/);
+    if (!headingMatch) {
+      return line;
+    }
+
+    const [, leadingWhitespace, hashes, headingText] = headingMatch;
+    const baseId = createSafeSlug(headingText);
+    if (!baseId) {
+      return line;
+    }
+
+    const currentCount = headingCounts.get(baseId) ?? 0;
+    headingCounts.set(baseId, currentCount + 1);
+    const headingId = currentCount === 0 ? baseId : `${baseId}-${currentCount}`;
+
+    return `${leadingWhitespace}${hashes} ${headingText} {#${headingId}}`;
+  });
+
+  return restoreCodeFences(updatedLines.join("\n"), codeBlocks);
+}
+
 /**
  * Sanitizes markdown content to fix malformed HTML/JSX tags that cause MDX compilation errors
  * @param content - The markdown content string
@@ -81,7 +157,7 @@ export function sanitizeMarkdownContent(content: string): string {
   const codeSpans: string[] = [];
   const codeBlockPlaceholders: string[] = [];
 
-  content = content.replace(/```[\s\S]*?```/g, (m) => {
+  content = content.replace(/^```[^\n]*\n[\s\S]*?^```/gm, (m) => {
     codeBlocks.push(m);
     const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
     codeBlockPlaceholders.push(placeholder);
diff --git a/scripts/notion-fetch/generateBlocks.test.ts b/scripts/notion-fetch/generateBlocks.test.ts
index b5b0e88d..c2c937c6 100644
--- a/scripts/notion-fetch/generateBlocks.test.ts
+++ b/scripts/notion-fetch/generateBlocks.test.ts
@@ -112,6 +112,7 @@ vi.mock("./imageProcessor", () => ({
 
 vi.mock("./utils", () => ({
   sanitizeMarkdownContent: vi.fn((content) => content),
+  injectExplicitHeadingIds: vi.fn((content) => content),
   compressImageToFileWithFallback: vi.fn(),
   detectFormatFromBuffer: vi.fn(() => "jpeg"),
   formatFromContentType: vi.fn(() => "jpeg"),
@@ -198,6 +199,7 @@ describe("generateBlocks", () => {
   let fetchNotionBlocks: Mock;
   let processImage: Mock;
   let compressImageToFileWithFallback: Mock;
+  let injectExplicitHeadingIds: Mock;
 
   beforeEach(async () => {
     restoreEnv = installTestNotionEnv();
@@ -223,6 +225,7 @@ describe("generateBlocks", () => {
     const utils = await import("./utils");
     compressImageToFileWithFallback =
       utils.compressImageToFileWithFallback as Mock;
+    injectExplicitHeadingIds = utils.injectExplicitHeadingIds as Mock;
 
     // Setup default mock implementations
     processImage.mockResolvedValue(mockProcessedImageResult);
@@ -377,6 +380,155 @@ describe("generateBlocks", () => {
     });
   });
 
+  describe("Localized slug and link normalization", () => {
+    it("should derive the shared ASCII slug from the grouped title for every locale", async () => {
+      const { generateBlocks } = await import("./generateBlocks");
+      const mockWriteFileSync = fs.writeFileSync as Mock;
+
+      const mainPage = createMockNotionPage({
+        id: "main-accented",
+        title: "Título con acentos",
+        elementType: "Page",
+        subItems: ["en-accented", "es-accented", "pt-accented"],
+      });
+      const englishPage = createMockNotionPage({
+        id: "en-accented",
+        title: "Título con acentos",
+        language: "English",
+        elementType: "Page",
+      });
+      const spanishPage = createMockNotionPage({
+        id: "es-accented",
+        title: "Título con acentos",
+        language: "Spanish",
+        elementType: "Page",
+      });
+      const portuguesePage = createMockNotionPage({
+        id: "pt-accented",
+        title: "Título con acentos",
+        language: "Portuguese",
+        elementType: "Page",
+      });
+
+      n2m.pageToMarkdown.mockResolvedValue([]);
+      n2m.toMarkdownString.mockReturnValue({ parent: "Body content" });
+
+      await generateBlocks(
+        [mainPage, englishPage, spanishPage, portuguesePage],
+        vi.fn()
+      );
+
+      const markdownPaths = mockWriteFileSync.mock.calls
+        .map((call) => call[0])
+        .filter(
+          (value): value is string =>
+            typeof value === "string" && value.endsWith(".md")
+        );
+
+      expect(markdownPaths).toEqual(
+        expect.arrayContaining([
+          expect.stringContaining("titulo-con-acentos.md"),
+          expect.stringContaining(
+            "i18n/pt/docusaurus-plugin-content-docs/current/titulo-con-acentos.md"
+          ),
+          expect.stringContaining(
+            "i18n/es/docusaurus-plugin-content-docs/current/titulo-con-acentos.md"
+          ),
+        ])
+      );
+    });
+
+    it("should normalize localized internal docs links before writing markdown", async () => {
+      const { generateBlocks } = await import("./generateBlocks");
+      const mockWriteFileSync = fs.writeFileSync as Mock;
+
+      const pageFamily = createMockPageFamily("Página de prueba", "Page");
+      n2m.pageToMarkdown.mockResolvedValue([]);
+      n2m.toMarkdownString
+        .mockReturnValueOnce({
+          parent:
+            "[doc](/docs/Guía Rápida#Título Uno) [external](https://example.com/Árbol) [relative](./Guía Local#Título)",
+        })
+        .mockReturnValueOnce({
+          parent:
+            "[doc](/docs/Guía Rápida#Título Uno) [nested](/docs/Category Name/Sub Página#Título Dos)",
+        })
+        .mockReturnValueOnce({
+          parent: "[doc](/docs/Guía Rápida#Título Uno)",
+        });
+
+      await generateBlocks(pageFamily.pages, vi.fn());
+
+      const markdownWrites = mockWriteFileSync.mock.calls.filter(
+        (call) => typeof call[0] === "string" && call[0].endsWith(".md")
+      );
+
+      const englishOutput = markdownWrites.find(
+        (call) =>
+          typeof call[0] === "string" &&
+          !call[0].includes("/i18n/") &&
+          call[1].includes("/docs/guia-rapida#titulo-uno")
+      );
+      const portugueseOutput = markdownWrites.find(
+        (call) =>
+          typeof call[0] === "string" &&
+          call[0].includes("/i18n/pt/") &&
+          call[1].includes("/pt/docs/guia-rapida#titulo-uno")
+      );
+      const spanishOutput = markdownWrites.find(
+        (call) =>
+          typeof call[0] === "string" &&
+          call[0].includes("/i18n/es/") &&
+          call[1].includes("/es/docs/guia-rapida#titulo-uno")
+      );
+
+      expect(englishOutput?.[1]).toContain(
+        "[doc](/docs/guia-rapida#titulo-uno)"
+      );
+      expect(englishOutput?.[1]).toContain(
+        "[external](https://example.com/Árbol)"
+      );
+      expect(englishOutput?.[1]).toContain("[relative](./Guía Local#Título)");
+      expect(portugueseOutput?.[1]).toContain(
+        "[nested](/pt/docs/category-name/sub-pagina#titulo-dos)"
+      );
+      expect(spanishOutput?.[1]).toContain(
+        "[doc](/es/docs/guia-rapida#titulo-uno)"
+      );
+    });
+
+    it("should pass the de-duplicated content through heading ID injection before writing", async () => {
+      const { generateBlocks } = await import("./generateBlocks");
+      const mockWriteFileSync = fs.writeFileSync as Mock;
+
+      const page = createMockNotionPage({
+        id: "heading-page",
+        title: "Heading Title",
+        elementType: "Page",
+        language: "English",
+      });
+
+      n2m.pageToMarkdown.mockResolvedValue([]);
+      n2m.toMarkdownString.mockReturnValue({
+        parent: "# Heading Title\n\n## Título Único\nContent body",
+      });
+      injectExplicitHeadingIds.mockImplementation(
+        (content: string) => `${content}\n<!-- ids injected -->`
+      );
+
+      await generateBlocks([page], vi.fn());
+
+      expect(injectExplicitHeadingIds).toHaveBeenCalledWith(
+        "## Título Único\nContent body"
+      );
+
+      const markdownWrite = mockWriteFileSync.mock.calls.find(
+        (call) => typeof call[0] === "string" && call[0].endsWith(".md")
+      );
+      expect(markdownWrite?.[1]).toContain("<!-- ids injected -->");
+    });
+  });
+
   describe("Title fallbacks", () => {
     it("should fallback to legacy Title property when Content elements is missing", async () => {
       const { generateBlocks } = await import("./generateBlocks");
diff --git a/scripts/notion-fetch/generateBlocks.ts b/scripts/notion-fetch/generateBlocks.ts
index 4dda0cd7..94d4ef6d 100644
--- a/scripts/notion-fetch/generateBlocks.ts
+++ b/scripts/notion-fetch/generateBlocks.ts
@@ -9,7 +9,9 @@ import type {
 import { n2m } from "../notionClient";
 import { NOTION_PROPERTIES } from "../constants";
 import chalk from "chalk";
-import { sanitizeMarkdownContent } from "./utils";
+import { sanitizeMarkdownContent, injectExplicitHeadingIds } from "./utils";
+import { createSafeSlug } from "./slugUtils";
+import { normalizeInternalDocLinks } from "./linkNormalizer";
 import config from "../../docusaurus.config";
 import SpinnerManager from "./spinnerManager";
 import { convertCalloutToAdmonition, isCalloutBlock } from "./calloutProcessor";
@@ -528,6 +530,10 @@ async function processSinglePage(
       emojiCount += result.fallbackEmojiCount;
       contentHasS3 = result.containsS3;
 
+      markdownString.parent = normalizeInternalDocLinks(
+        markdownString.parent,
+        lang
+      );
       markdownString.parent = sanitizeMarkdownContent(markdownString.parent);
 
       markdownString.parent = ensureBlankLineAfterStandaloneBold(
@@ -538,18 +544,19 @@ async function processSinglePage(
         markdownString.parent,
         pageTitle
       );
+      const finalContentBody = injectExplicitHeadingIds(contentBody);
 
       const sectionFolderForWrite: Record<string, string | undefined> = {};
 
       sectionFolderForWrite[lang] = currentSectionFolderForLang;
 
-      const finalDiagnostics = getImageDiagnostics(markdownString.parent ?? "");
+      const finalDiagnostics = getImageDiagnostics(finalContentBody ?? "");
       contentHasS3 = finalDiagnostics.s3Matches > 0;
 
       writeMarkdownFile(
         filePath,
         frontmatter,
-        contentBody,
+        finalContentBody,
         pageTitle,
         pageProcessingIndex - 1,
         totalPages,
@@ -887,10 +894,7 @@ export async function generateBlocks(
           ? sectionTypeRaw.trim()
           : String(sectionTypeRaw ?? "").trim();
       const normalizedSectionType = sectionTypeString.toLowerCase();
-      const filename = title
-        .toLowerCase()
-        .replace(/\s+/g, "-")
-        .replace(/[^a-z0-9-]/g, "");
+      const filename = createSafeSlug(title);
 
       const orderedLocales = getOrderedLocales(Object.keys(pageByLang.content));
       for (const lang of orderedLocales) {
diff --git a/scripts/notion-fetch/linkNormalizer.test.ts b/scripts/notion-fetch/linkNormalizer.test.ts
new file mode 100644
index 00000000..e9b7c88e
--- /dev/null
+++ b/scripts/notion-fetch/linkNormalizer.test.ts
@@ -0,0 +1,77 @@
+import { describe, it, expect, vi } from "vitest";
+
+// Mock the docusaurus config before importing the module under test,
+// mirroring the pattern used in generateBlocks.test.ts.
+vi.mock("../../docusaurus.config", () => ({
+  default: {
+    i18n: {
+      locales: ["en", "pt", "es"],
+      defaultLocale: "en",
+    },
+  },
+}));
+
+import { normalizeInternalDocLinks } from "./linkNormalizer";
+
+describe("linkNormalizer", () => {
+  describe("normalizeInternalDocLinks", () => {
+    it("should normalize a docs link for the default locale (en) without a locale prefix", () => {
+      const input = "[link](/docs/Guía Rápida)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe("[link](/docs/guia-rapida)");
+    });
+
+    it("should add a locale prefix for a non-default locale (es)", () => {
+      const input = "[link](/docs/Guía Rápida)";
+      const result = normalizeInternalDocLinks(input, "es");
+      expect(result).toBe("[link](/es/docs/guia-rapida)");
+    });
+
+    it("should normalize both the path and the fragment", () => {
+      const input = "[link](/docs/Page#Título Uno)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe("[link](/docs/page#titulo-uno)");
+    });
+
+    it("should leave external links untouched", () => {
+      const input = "[link](https://example.com/Árbol)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
+    });
+
+    it("should leave relative links untouched", () => {
+      const input = "[link](./local)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
+    });
+
+    it("should not alter image links (lines starting with !)", () => {
+      const input = "![img](/docs/Accented Page)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
+    });
+
+    it("should normalize each path segment in a nested docs path and add a locale prefix", () => {
+      const input = "[link](/docs/Category Name/Sub Page)";
+      const result = normalizeInternalDocLinks(input, "pt");
+      expect(result).toBe("[link](/pt/docs/category-name/sub-page)");
+    });
+
+    it("should normalize multiple docs links on a single line", () => {
+      const input = "[a](/docs/Foo) and [b](/docs/Bar)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe("[a](/docs/foo) and [b](/docs/bar)");
+    });
+
+    it("should return empty string for empty content", () => {
+      const result = normalizeInternalDocLinks("", "en");
+      expect(result).toBe("");
+    });
+
+    it("should leave plain text with only external links unchanged", () => {
+      const input = "plain text with [link](https://example.com)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
+    });
+  });
+});
diff --git a/scripts/notion-fetch/linkNormalizer.ts b/scripts/notion-fetch/linkNormalizer.ts
new file mode 100644
index 00000000..4e384f2e
--- /dev/null
+++ b/scripts/notion-fetch/linkNormalizer.ts
@@ -0,0 +1,66 @@
+import config from "../../docusaurus.config";
+import { createSafeSlug } from "./slugUtils";
+
+const DEFAULT_LOCALE = config.i18n.defaultLocale;
+const MARKDOWN_LINK_REGEX = /(^|[^!])\[([^\]]+)\]\(([^)\n]+)\)/gm;
+
+function safeDecode(s: string): string {
+  try {
+    return decodeURIComponent(s);
+  } catch {
+    return s;
+  }
+}
+
+function normalizeDocPathname(pathname: string): string {
+  const hasTrailingSlash = pathname.endsWith("/") && pathname !== "/docs/";
+  const rawSegments = pathname
+    .slice("/docs/".length)
+    .split("/")
+    .filter(Boolean);
+  const normalizedSegments = rawSegments.map((segment) =>
+    createSafeSlug(safeDecode(segment))
+  );
+
+  const normalizedPath = normalizedSegments.length
+    ? `/docs/${normalizedSegments.join("/")}`
+    : "/docs";
+
+  return hasTrailingSlash ? `${normalizedPath}/` : normalizedPath;
+}
+
+function normalizeDocTarget(target: string, lang: string): string {
+  const [pathname, rawFragment] = target.split("#", 2);
+  const localePrefix = lang === DEFAULT_LOCALE ? "" : `/${lang}`;
+  const normalizedPath = normalizeDocPathname(pathname);
+  const normalizedFragment = rawFragment
+    ? `#${createSafeSlug(safeDecode(rawFragment))}`
+    : "";
+
+  return `${localePrefix}${normalizedPath}${normalizedFragment}`;
+}
+
+export function normalizeInternalDocLinks(
+  content: string,
+  lang: string
+): string {
+  if (!content) {
+    return content;
+  }
+
+  return content.replace(
+    MARKDOWN_LINK_REGEX,
+    (match, prefix: string, text: string, rawTarget: string) => {
+      const trimmedTarget = rawTarget.trim();
+      const titleMatch = trimmedTarget.match(/^(\/docs\/[^\n]*?)(\s+"[^"]*")$/);
+      const target = titleMatch ? titleMatch[1] : trimmedTarget;
+      const titleSuffix = titleMatch?.[2] ?? "";
+
+      if (!target.startsWith("/docs/")) {
+        return match;
+      }
+
+      return `${prefix}[${text}](${normalizeDocTarget(target, lang)}${titleSuffix})`;
+    }
+  );
+}
diff --git a/scripts/notion-fetch/slugUtils.test.ts b/scripts/notion-fetch/slugUtils.test.ts
new file mode 100644
index 00000000..35fc0bde
--- /dev/null
+++ b/scripts/notion-fetch/slugUtils.test.ts
@@ -0,0 +1,54 @@
+import { describe, it, expect } from "vitest";
+import { createSafeSlug } from "./slugUtils";
+
+describe("slugUtils", () => {
+  describe("createSafeSlug", () => {
+    it("should convert basic Latin text to lowercase hyphenated slug", () => {
+      expect(createSafeSlug("Hello World")).toBe("hello-world");
+    });
+
+    it("should strip accented Latin characters", () => {
+      expect(createSafeSlug("Título con acentos")).toBe("titulo-con-acentos");
+    });
+
+    it("should handle Spanish accented characters", () => {
+      expect(createSafeSlug("Guía Rápida")).toBe("guia-rapida");
+    });
+
+    it("should handle Portuguese characters", () => {
+      expect(createSafeSlug("Instalação")).toBe("instalacao");
+    });
+
+    it("should handle ñ and accented vowels in Spanish words", () => {
+      expect(createSafeSlug("Niño & Acción")).toBe("nino-accion");
+    });
+
+    it("should return an empty string for empty input", () => {
+      expect(createSafeSlug("")).toBe("");
+    });
+
+    it("should strip diacritics from accented letters", () => {
+      expect(createSafeSlug("éàü")).toBe("eau");
+    });
+
+    it("should preserve numbers in the slug", () => {
+      expect(createSafeSlug("FAQ Section 2")).toBe("faq-section-2");
+    });
+
+    it("should collapse multiple spaces and hyphens into a single hyphen", () => {
+      expect(createSafeSlug("hello   ---  world")).toBe("hello-world");
+    });
+
+    it("should strip leading and trailing hyphens", () => {
+      expect(createSafeSlug("--hello--")).toBe("hello");
+    });
+
+    it("should produce an empty string for CJK-only input (known limitation)", () => {
+      expect(createSafeSlug("安装指南")).toBe("");
+    });
+
+    it("should extract only the Latin portion from mixed CJK and Latin input", () => {
+      expect(createSafeSlug("安装 Setup 指南")).toBe("setup");
+    });
+  });
+});
diff --git a/scripts/notion-fetch/slugUtils.ts b/scripts/notion-fetch/slugUtils.ts
new file mode 100644
index 00000000..a4547422
--- /dev/null
+++ b/scripts/notion-fetch/slugUtils.ts
@@ -0,0 +1,11 @@
+export function createSafeSlug(text: string): string {
+  return text
+    .normalize("NFD")
+    .replace(/\p{M}/gu, "")
+    .toLowerCase()
+    .trim()
+    .replace(/\s+/g, "-")
+    .replace(/[^a-z0-9-]/g, "")
+    .replace(/-+/g, "-")
+    .replace(/^-+|-+$/g, "");
+}
diff --git a/scripts/notion-fetch/utils.ts b/scripts/notion-fetch/utils.ts
index 4b0a9415..4471e445 100644
--- a/scripts/notion-fetch/utils.ts
+++ b/scripts/notion-fetch/utils.ts
@@ -6,7 +6,10 @@ import { compressImage } from "./imageCompressor";
 import { withTimeoutFallback } from "./timeoutUtils";
 
 // Re-export sanitize so callers have a single utils entrypoint
-export { sanitizeMarkdownContent } from "./contentSanitizer";
+export {
+  sanitizeMarkdownContent,
+  injectExplicitHeadingIds,
+} from "./contentSanitizer";
 
 // Fail-open toggle: defaults to true unless explicitly set to 'false'
 export const SOFT_FAIL: boolean =
diff --git a/scripts/notion-fetch/verifyExportCoverage.ts b/scripts/notion-fetch/verifyExportCoverage.ts
index 80e4a78e..ade557c9 100644
--- a/scripts/notion-fetch/verifyExportCoverage.ts
+++ b/scripts/notion-fetch/verifyExportCoverage.ts
@@ -4,17 +4,13 @@ import path from "node:path";
 import { glob } from "glob";
 
 import { NOTION_PROPERTIES } from "../constants";
+import { createSafeSlug } from "./slugUtils";
 
 type NotionPage = Record<string, any>;
 
 const EXPORT_FILENAME = "notion_db.json";
 
-const slugify = (title: string): string =>
-  title
-    .toLowerCase()
-    .replace(/\s+/g, "-")
-    .replace(/[^a-z0-9-]/g, "")
-    .trim();
+const slugify = (title: string): string => createSafeSlug(title);
 
 const getTitle = (page: NotionPage): string | undefined =>
   page?.properties?.[NOTION_PROPERTIES.TITLE]?.title?.[0]?.plain_text;
@@ -50,14 +46,12 @@ export interface VerificationResult {
 export function verifyExportCoverage(
   exportPath: string = path.resolve(process.cwd(), EXPORT_FILENAME)
 ): VerificationResult {
-  // eslint-disable-next-line security/detect-non-literal-fs-filename
   if (!fs.existsSync(exportPath)) {
     throw new Error(
       `Notion export file not found at ${exportPath}. Run bun notion:export first.`
     );
   }
 
-  // eslint-disable-next-line security/detect-non-literal-fs-filename
   const payload = JSON.parse(fs.readFileSync(exportPath, "utf8"));
   const results: NotionPage[] = payload.results ?? [];
   const readyPages = results.filter(isReadyToPublish);
diff --git a/scripts/notion-translate/index.ts b/scripts/notion-translate/index.ts
index 03773771..c2c2c4c2 100644
--- a/scripts/notion-translate/index.ts
+++ b/scripts/notion-translate/index.ts
@@ -38,6 +38,7 @@ import {
   validateAndFixRemainingImages,
   extractImageMatches,
 } from "../notion-fetch/imageReplacer.js";
+import { createSafeSlug } from "../notion-fetch/slugUtils.js";
 
 const LEGACY_SECTION_PROPERTY = "Section";
 const PARENT_ITEM_PROPERTY = "Parent item";
@@ -584,11 +585,7 @@ const NOTION_IMAGE_URL_FAMILY_REGEX = new RegExp(
  * image filenames remain consistent with markdown filenames.
  */
 function generateSafeFilename(title: string, pageId: string): string {
-  const baseSlug = title
-    .toLowerCase()
-    .replace(/\s+/g, "-")
-    .replace(/[^a-z0-9-]/g, "")
-    .substring(0, MAX_SLUG_LENGTH);
+  const baseSlug = createSafeSlug(title).substring(0, MAX_SLUG_LENGTH);
   const stablePageId = pageId.toLowerCase().replace(/[^a-z0-9]/g, "");
   const deterministicBase = baseSlug || "untitled";
   return `${deterministicBase}-${stablePageId}`;

From 37a372e611575ebc414fcff8caaffcadbbb3cdd8 Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 19 Mar 2026 17:52:29 -0300
Subject: [PATCH 02/15] fix(slugs): flatten nested doc paths and skip links in
 code blocks

- normalizeDocPathname now uses only the last path segment, matching
  the flat slug shape that buildFrontmatter() generates (slug: /${safeSlug}).
  Multi-segment paths like /docs/Category/Page previously resolved to
  /docs/category/page which does not exist, causing 404s.
- normalizeInternalDocLinks now masks fenced code blocks and inline
  code before rewriting links, so Markdown link examples inside code
  fences are no longer altered.
- Update test for nested path to expect flat slug output.
- Add tests for code-fence and inline-code protection.
---
 scripts/notion-fetch/linkNormalizer.test.ts | 18 +++++++++--
 scripts/notion-fetch/linkNormalizer.ts      | 34 ++++++++++++++++-----
 2 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/scripts/notion-fetch/linkNormalizer.test.ts b/scripts/notion-fetch/linkNormalizer.test.ts
index e9b7c88e..855a6a98 100644
--- a/scripts/notion-fetch/linkNormalizer.test.ts
+++ b/scripts/notion-fetch/linkNormalizer.test.ts
@@ -51,10 +51,24 @@ describe("linkNormalizer", () => {
       expect(result).toBe(input);
     });
 
-    it("should normalize each path segment in a nested docs path and add a locale prefix", () => {
+    it("should flatten a nested docs path to only the last segment (slug shape)", () => {
       const input = "[link](/docs/Category Name/Sub Page)";
       const result = normalizeInternalDocLinks(input, "pt");
-      expect(result).toBe("[link](/pt/docs/category-name/sub-page)");
+      // buildFrontmatter() writes slug: /${safeSlug} (single level), so the
+      // public URL is /pt/docs/sub-page, not /pt/docs/category-name/sub-page.
+      expect(result).toBe("[link](/pt/docs/sub-page)");
+    });
+
+    it("should not rewrite links inside a fenced code block", () => {
+      const input = "```\n[example](/docs/Guía Rápida)\n```";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
+    });
+
+    it("should not rewrite links inside inline code", () => {
+      const input = "Use `[link](/docs/Guía Rápida)` as an example.";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
     });
 
     it("should normalize multiple docs links on a single line", () => {
diff --git a/scripts/notion-fetch/linkNormalizer.ts b/scripts/notion-fetch/linkNormalizer.ts
index 4e384f2e..96348c0f 100644
--- a/scripts/notion-fetch/linkNormalizer.ts
+++ b/scripts/notion-fetch/linkNormalizer.ts
@@ -18,14 +18,17 @@ function normalizeDocPathname(pathname: string): string {
     .slice("/docs/".length)
     .split("/")
     .filter(Boolean);
-  const normalizedSegments = rawSegments.map((segment) =>
-    createSafeSlug(safeDecode(segment))
-  );
 
-  const normalizedPath = normalizedSegments.length
-    ? `/docs/${normalizedSegments.join("/")}`
-    : "/docs";
+  // buildFrontmatter() always writes slug: /${safeSlug} (single level), so
+  // parent folder segments do not appear in the public URL. Only the last
+  // segment is the actual page slug; preserving parent segments produces a
+  // path that does not exist and results in a 404.
+  const lastSegment = rawSegments[rawSegments.length - 1];
+  if (!lastSegment) {
+    return "/docs";
+  }
 
+  const normalizedPath = `/docs/${createSafeSlug(safeDecode(lastSegment))}`;
   return hasTrailingSlash ? `${normalizedPath}/` : normalizedPath;
 }
 
@@ -48,7 +51,20 @@ export function normalizeInternalDocLinks(
     return content;
   }
 
-  return content.replace(
+  // Mask code fences and inline code so links inside literal examples are not
+  // rewritten. Uses the same placeholder strategy as sanitizeMarkdownContent.
+  const codeBlocks: string[] = [];
+  const codeSpans: string[] = [];
+  let masked = content.replace(/^```[^\n]*\n[\s\S]*?^```/gm, (m) => {
+    codeBlocks.push(m);
+    return `__CODEBLOCK_${codeBlocks.length - 1}__`;
+  });
+  masked = masked.replace(/`[^`\n]*`/g, (m) => {
+    codeSpans.push(m);
+    return `__CODESPAN_${codeSpans.length - 1}__`;
+  });
+
+  const result = masked.replace(
     MARKDOWN_LINK_REGEX,
     (match, prefix: string, text: string, rawTarget: string) => {
       const trimmedTarget = rawTarget.trim();
@@ -63,4 +79,8 @@ export function normalizeInternalDocLinks(
       return `${prefix}[${text}](${normalizeDocTarget(target, lang)}${titleSuffix})`;
     }
   );
+
+  return result
+    .replace(/__CODESPAN_(\d+)__/g, (_, i) => codeSpans[parseInt(i, 10)])
+    .replace(/__CODEBLOCK_(\d+)__/g, (_, i) => codeBlocks[parseInt(i, 10)]);
 }

From d85d885f06c990ad57799f904154edb43e8b3eac Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 19 Mar 2026 17:57:09 -0300
Subject: [PATCH 03/15] fix(slugs): flatten nested doc paths and skip links in
 code blocks

- normalizeDocPathname now uses only the last path segment, matching
  the flat slug shape buildFrontmatter() generates (slug: /${safeSlug}).
  Multi-segment paths like /docs/Category/Page previously resolved to
  /docs/category/page which does not exist, causing 404s.
- normalizeInternalDocLinks now masks fenced code blocks and inline
  code before rewriting links, so Markdown link examples inside code
  fences are no longer altered.
- Refactor mask/restore logic into dedicated maskCode/restoreCode helpers.
- Update test for nested path to expect flat slug output.
- Add tests for code-fence and inline-code protection.
---
 scripts/notion-fetch/linkNormalizer.ts | 57 ++++++++++++++++++--------
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/scripts/notion-fetch/linkNormalizer.ts b/scripts/notion-fetch/linkNormalizer.ts
index 96348c0f..648c5e94 100644
--- a/scripts/notion-fetch/linkNormalizer.ts
+++ b/scripts/notion-fetch/linkNormalizer.ts
@@ -12,6 +12,44 @@ function safeDecode(s: string): string {
   }
 }
 
+function maskCode(content: string): {
+  maskedContent: string;
+  codeBlocks: string[];
+  codeSpans: string[];
+} {
+  const codeBlocks: string[] = [];
+  const codeSpans: string[] = [];
+
+  const maskedBlocks = content.replace(
+    /^```[^\n]*\n[\s\S]*?^```/gm,
+    (match) => {
+      codeBlocks.push(match);
+      return `__LINK_NORMALIZER_CODEBLOCK_${codeBlocks.length - 1}__`;
+    }
+  );
+
+  const maskedContent = maskedBlocks.replace(/`[^`\n]*`/g, (match) => {
+    codeSpans.push(match);
+    return `__LINK_NORMALIZER_CODESPAN_${codeSpans.length - 1}__`;
+  });
+
+  return { maskedContent, codeBlocks, codeSpans };
+}
+
+function restoreCode(
+  content: string,
+  codeBlocks: string[],
+  codeSpans: string[]
+): string {
+  return content
+    .replace(/__LINK_NORMALIZER_CODESPAN_(\d+)__/g, (_match, index) => {
+      return codeSpans[Number(index)];
+    })
+    .replace(/__LINK_NORMALIZER_CODEBLOCK_(\d+)__/g, (_match, index) => {
+      return codeBlocks[Number(index)];
+    });
+}
+
 function normalizeDocPathname(pathname: string): string {
   const hasTrailingSlash = pathname.endsWith("/") && pathname !== "/docs/";
   const rawSegments = pathname
@@ -51,20 +89,9 @@ export function normalizeInternalDocLinks(
     return content;
   }
 
-  // Mask code fences and inline code so links inside literal examples are not
-  // rewritten. Uses the same placeholder strategy as sanitizeMarkdownContent.
-  const codeBlocks: string[] = [];
-  const codeSpans: string[] = [];
-  let masked = content.replace(/^```[^\n]*\n[\s\S]*?^```/gm, (m) => {
-    codeBlocks.push(m);
-    return `__CODEBLOCK_${codeBlocks.length - 1}__`;
-  });
-  masked = masked.replace(/`[^`\n]*`/g, (m) => {
-    codeSpans.push(m);
-    return `__CODESPAN_${codeSpans.length - 1}__`;
-  });
+  const { maskedContent, codeBlocks, codeSpans } = maskCode(content);
 
-  const result = masked.replace(
+  const normalizedContent = maskedContent.replace(
     MARKDOWN_LINK_REGEX,
     (match, prefix: string, text: string, rawTarget: string) => {
       const trimmedTarget = rawTarget.trim();
@@ -80,7 +107,5 @@ export function normalizeInternalDocLinks(
     }
   );
 
-  return result
-    .replace(/__CODESPAN_(\d+)__/g, (_, i) => codeSpans[parseInt(i, 10)])
-    .replace(/__CODEBLOCK_(\d+)__/g, (_, i) => codeBlocks[parseInt(i, 10)]);
+  return restoreCode(normalizedContent, codeBlocks, codeSpans);
 }

From c2f22f1e6665c674892110b4f2cb3b861fcdcc7b Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 19 Mar 2026 18:05:21 -0300
Subject: [PATCH 04/15] fix(slugs): mask indented code fences before link
 normalization and heading injection

Fix code-fence masking regex in linkNormalizer and contentSanitizer to
allow leading whitespace (^[ \t]*```) so indented fences (e.g. inside
list items or admonitions) are also protected before link normalization
and heading ID injection.
Add test for indented code fence protection.
---
 scripts/notion-fetch/contentSanitizer.ts    | 4 ++--
 scripts/notion-fetch/linkNormalizer.test.ts | 6 ++++++
 scripts/notion-fetch/linkNormalizer.ts      | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
index 070fb1ff..bd783e84 100644
--- a/scripts/notion-fetch/contentSanitizer.ts
+++ b/scripts/notion-fetch/contentSanitizer.ts
@@ -79,7 +79,7 @@ function maskCodeFences(content: string): {
   const codeBlockPlaceholders: string[] = [];
 
   const maskedContent = content.replace(
-    /^```[^\n]*\n[\s\S]*?^```/gm,
+    /^[ \t]*```[^\n]*\n[\s\S]*?^[ \t]*```/gm,
     (match) => {
       codeBlocks.push(match);
       const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
@@ -157,7 +157,7 @@ export function sanitizeMarkdownContent(content: string): string {
   const codeSpans: string[] = [];
   const codeBlockPlaceholders: string[] = [];
 
-  content = content.replace(/^```[^\n]*\n[\s\S]*?^```/gm, (m) => {
+  content = content.replace(/^[ \t]*```[^\n]*\n[\s\S]*?^[ \t]*```/gm, (m) => {
     codeBlocks.push(m);
     const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
     codeBlockPlaceholders.push(placeholder);
diff --git a/scripts/notion-fetch/linkNormalizer.test.ts b/scripts/notion-fetch/linkNormalizer.test.ts
index 855a6a98..ff93150a 100644
--- a/scripts/notion-fetch/linkNormalizer.test.ts
+++ b/scripts/notion-fetch/linkNormalizer.test.ts
@@ -65,6 +65,12 @@ describe("linkNormalizer", () => {
       expect(result).toBe(input);
     });
 
+    it("should not rewrite links inside an indented fenced code block", () => {
+      const input = "  ```\n  [example](/docs/Guía Rápida)\n  ```";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
+    });
+
     it("should not rewrite links inside inline code", () => {
       const input = "Use `[link](/docs/Guía Rápida)` as an example.";
       const result = normalizeInternalDocLinks(input, "en");
diff --git a/scripts/notion-fetch/linkNormalizer.ts b/scripts/notion-fetch/linkNormalizer.ts
index 648c5e94..87303207 100644
--- a/scripts/notion-fetch/linkNormalizer.ts
+++ b/scripts/notion-fetch/linkNormalizer.ts
@@ -21,7 +21,7 @@ function maskCode(content: string): {
   const codeSpans: string[] = [];
 
   const maskedBlocks = content.replace(
-    /^```[^\n]*\n[\s\S]*?^```/gm,
+    /^[ \t]*```[^\n]*\n[\s\S]*?^[ \t]*```/gm,
     (match) => {
       codeBlocks.push(match);
       return `__LINK_NORMALIZER_CODEBLOCK_${codeBlocks.length - 1}__`;

From c6ddcaa00c63bfa67d0cc97bd85e1926218ee37d Mon Sep 17 00:00:00 2001
From: CoMapeo Content Bot <content-bot@example.com>
Date: Thu, 19 Mar 2026 18:37:06 -0300
Subject: [PATCH 05/15] test(notion): align normalization expectations

---
 bun-tests/vitest-bridge.test.ts                            | 4 +++-
 eslint.config.mjs                                          | 2 +-
 scripts/notion-fetch/__tests__/retry-loop-behavior.test.ts | 1 +
 scripts/notion-fetch/generateBlocks.test.ts                | 2 +-
 scripts/notion-fetch/page-ordering.test.ts                 | 1 +
 scripts/notion-translate/imageStabilization.test.ts        | 2 +-
 6 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/bun-tests/vitest-bridge.test.ts b/bun-tests/vitest-bridge.test.ts
index a096b2fb..8e57ab74 100644
--- a/bun-tests/vitest-bridge.test.ts
+++ b/bun-tests/vitest-bridge.test.ts
@@ -24,5 +24,7 @@ test(
   () => {
     runVitest();
   },
-  { timeout: 120_000 }
+  // The full Vitest suite can take just under two minutes on this repo, and
+  // Bun's own test harness adds enough overhead that 120s is too tight.
+  { timeout: 300_000 }
 );
diff --git a/eslint.config.mjs b/eslint.config.mjs
index d03fda21..51a77904 100644
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -32,7 +32,7 @@ const eslintConfig = [
   // Docusaurus specific configurations
   {
     files: ["**/*.{js,mjs,cjs,ts,jsx,tsx}"],
-    ignores: ["scripts/**", "api-server/**"], // Ignore scripts and api-server directories for docusaurus rules
+    ignores: ["scripts/**", "api-server/**", "bun-tests/**"], // Ignore non-Docusaurus runtime directories for docusaurus/react rules
     plugins: {
       "@docusaurus": docusaurusPlugin,
       react: pluginReact,
diff --git a/scripts/notion-fetch/__tests__/retry-loop-behavior.test.ts b/scripts/notion-fetch/__tests__/retry-loop-behavior.test.ts
index 61cdb372..622c6456 100644
--- a/scripts/notion-fetch/__tests__/retry-loop-behavior.test.ts
+++ b/scripts/notion-fetch/__tests__/retry-loop-behavior.test.ts
@@ -93,6 +93,7 @@ vi.mock("../imageProcessor", () => ({
 
 vi.mock("../utils", () => ({
   sanitizeMarkdownContent: vi.fn((content) => content),
+  injectExplicitHeadingIds: vi.fn((content) => content),
   compressImageToFileWithFallback: vi.fn().mockResolvedValue({
     finalSize: 512,
     usedFallback: false,
diff --git a/scripts/notion-fetch/generateBlocks.test.ts b/scripts/notion-fetch/generateBlocks.test.ts
index c2c937c6..5d131524 100644
--- a/scripts/notion-fetch/generateBlocks.test.ts
+++ b/scripts/notion-fetch/generateBlocks.test.ts
@@ -490,7 +490,7 @@ describe("generateBlocks", () => {
       );
       expect(englishOutput?.[1]).toContain("[relative](./Guía Local#Título)");
       expect(portugueseOutput?.[1]).toContain(
-        "[nested](/pt/docs/category-name/sub-pagina#titulo-dos)"
+        "[nested](/pt/docs/sub-pagina#titulo-dos)"
       );
       expect(spanishOutput?.[1]).toContain(
         "[doc](/es/docs/guia-rapida#titulo-uno)"
diff --git a/scripts/notion-fetch/page-ordering.test.ts b/scripts/notion-fetch/page-ordering.test.ts
index 5bb96c83..4d0fa461 100644
--- a/scripts/notion-fetch/page-ordering.test.ts
+++ b/scripts/notion-fetch/page-ordering.test.ts
@@ -103,6 +103,7 @@ vi.mock("./imageProcessor", () => ({
 
 vi.mock("./utils", () => ({
   sanitizeMarkdownContent: vi.fn((content) => content),
+  injectExplicitHeadingIds: vi.fn((content) => content),
   compressImageToFileWithFallback: vi.fn(),
   detectFormatFromBuffer: vi.fn(() => "jpeg"),
   formatFromContentType: vi.fn(() => "jpeg"),
diff --git a/scripts/notion-translate/imageStabilization.test.ts b/scripts/notion-translate/imageStabilization.test.ts
index 514a5d92..946c9194 100644
--- a/scripts/notion-translate/imageStabilization.test.ts
+++ b/scripts/notion-translate/imageStabilization.test.ts
@@ -906,7 +906,7 @@ describe("image stabilization in translation pipeline", () => {
 
       expect(mockProcessAndReplaceImages).toHaveBeenCalledWith(
         expect.any(String),
-        "hllo-wrld-pageid1"
+        "hello-world-pageid1"
       );
     });
 

From 62724316e34d713cd8b22fa991b0259560aee104 Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 19 Mar 2026 19:02:20 -0300
Subject: [PATCH 06/15] fix(notion-fetch): handle explicit heading IDs and
 empty filenames

Co-authored-by: Junie <junie@jetbrains.com>
---
 scripts/notion-fetch/contentSanitizer.ts | 10 ++++++++--
 scripts/notion-fetch/generateBlocks.ts   |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
index bd783e84..d772fa3b 100644
--- a/scripts/notion-fetch/contentSanitizer.ts
+++ b/scripts/notion-fetch/contentSanitizer.ts
@@ -117,12 +117,18 @@ export function injectExplicitHeadingIds(content: string): string {
   const lines = maskedContent.split("\n");
   const updatedLines = lines.map((line) => {
     if (
-      codeBlockPlaceholders.some((placeholder) => line.includes(placeholder)) ||
-      /\s\{#[^}]+\}\s*$/.test(line)
+      codeBlockPlaceholders.some((placeholder) => line.includes(placeholder))
     ) {
       return line;
     }
 
+    const explicitIdMatch = line.match(/\s\{#([^}]+)\}\s*$/);
+    if (explicitIdMatch) {
+      const explicitId = explicitIdMatch[1];
+      headingCounts.set(explicitId, (headingCounts.get(explicitId) ?? 0) + 1);
+      return line;
+    }
+
     const headingMatch = line.match(/^(\s{0,3})(#{1,6})\s+(.+?)\s*$/);
     if (!headingMatch) {
       return line;
diff --git a/scripts/notion-fetch/generateBlocks.ts b/scripts/notion-fetch/generateBlocks.ts
index 94d4ef6d..e514676f 100644
--- a/scripts/notion-fetch/generateBlocks.ts
+++ b/scripts/notion-fetch/generateBlocks.ts
@@ -894,7 +894,7 @@ export async function generateBlocks(
           ? sectionTypeRaw.trim()
           : String(sectionTypeRaw ?? "").trim();
       const normalizedSectionType = sectionTypeString.toLowerCase();
-      const filename = createSafeSlug(title);
+      const filename = createSafeSlug(title) || "untitled";
 
       const orderedLocales = getOrderedLocales(Object.keys(pageByLang.content));
       for (const lang of orderedLocales) {

From ef855789ff28c4a257dde261d5d6ce1b72c82bf0 Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 19 Mar 2026 21:38:17 -0300
Subject: [PATCH 07/15] fix(slugs): preserve CJK and Unicode letters in slug
 generation

Replace ASCII-only regex with Unicode property escapes (\p{L}\p{N})
so CJK and accented characters are retained in slugs instead of stripped.
Update tests to reflect corrected behavior. Extend ESLint config to cover bun-tests/.
---
 eslint.config.mjs                      | 6 +++++-
 scripts/notion-fetch/slugUtils.test.ts | 8 ++++----
 scripts/notion-fetch/slugUtils.ts      | 2 +-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/eslint.config.mjs b/eslint.config.mjs
index 51a77904..83430e4d 100644
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -74,7 +74,11 @@ const eslintConfig = [
 
   // Scripts and API server specific configurations
   {
-    files: ["scripts/**/*.{js,mjs,cjs,ts}", "api-server/**/*.{js,mjs,cjs,ts}"],
+    files: [
+      "scripts/**/*.{js,mjs,cjs,ts}",
+      "api-server/**/*.{js,mjs,cjs,ts}",
+      "bun-tests/**/*.{js,mjs,cjs,ts}",
+    ],
     plugins: {
       import: importPlugin,
       promise: promisePlugin,
diff --git a/scripts/notion-fetch/slugUtils.test.ts b/scripts/notion-fetch/slugUtils.test.ts
index 35fc0bde..813b3478 100644
--- a/scripts/notion-fetch/slugUtils.test.ts
+++ b/scripts/notion-fetch/slugUtils.test.ts
@@ -43,12 +43,12 @@ describe("slugUtils", () => {
       expect(createSafeSlug("--hello--")).toBe("hello");
     });
 
-    it("should produce an empty string for CJK-only input (known limitation)", () => {
-      expect(createSafeSlug("安装指南")).toBe("");
+    it("should preserve CJK input", () => {
+      expect(createSafeSlug("安装指南")).toBe("安装指南");
     });
 
-    it("should extract only the Latin portion from mixed CJK and Latin input", () => {
-      expect(createSafeSlug("安装 Setup 指南")).toBe("setup");
+    it("should extract both CJK and Latin from mixed input", () => {
+      expect(createSafeSlug("安装 Setup 指南")).toBe("安装-setup-指南");
     });
   });
 });
diff --git a/scripts/notion-fetch/slugUtils.ts b/scripts/notion-fetch/slugUtils.ts
index a4547422..d0f473ab 100644
--- a/scripts/notion-fetch/slugUtils.ts
+++ b/scripts/notion-fetch/slugUtils.ts
@@ -5,7 +5,7 @@ export function createSafeSlug(text: string): string {
     .toLowerCase()
     .trim()
     .replace(/\s+/g, "-")
-    .replace(/[^a-z0-9-]/g, "")
+    .replace(/[^\p{L}\p{N}-]/gu, "")
     .replace(/-+/g, "-")
     .replace(/^-+|-+$/g, "");
 }

From 8f9ea2790e5828ff6d63d823960e853016e2a93c Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Fri, 20 Mar 2026 08:27:39 -0300
Subject: [PATCH 08/15] fix(notion-fetch): align code-fence regex with
 CommonMark and fix heading counter for explicit IDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace `[ \t]*` with ` {0,3}` in code-fence masks across contentSanitizer and linkNormalizer, matching CommonMark's 0–3 space rule for fenced blocks
- Register the text-derived baseId in headingCounts when a heading already carries an explicit {#id}, preventing incorrect -0 suffixes on subsequent duplicate headings
- Suppress security/detect-non-literal-fs-filename ESLint warnings in verifyExportCoverage where the path parameter is already validated
---
 scripts/notion-fetch/contentSanitizer.ts | 19 +++++++++++++++++--
 scripts/notion-fetch/linkNormalizer.ts   |  2 +-
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
index d772fa3b..5efb11e0 100644
--- a/scripts/notion-fetch/contentSanitizer.ts
+++ b/scripts/notion-fetch/contentSanitizer.ts
@@ -79,7 +79,7 @@ function maskCodeFences(content: string): {
   const codeBlockPlaceholders: string[] = [];
 
   const maskedContent = content.replace(
-    /^[ \t]*```[^\n]*\n[\s\S]*?^[ \t]*```/gm,
+    /^ {0,3}```[^\n]*\n[\s\S]*?^ {0,3}```/gm,
     (match) => {
       codeBlocks.push(match);
       const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
@@ -122,6 +122,21 @@ export function injectExplicitHeadingIds(content: string): string {
       return line;
     }
 
+    const fullMatch = line.match(
+      /^(\s{0,3})(#{1,6})\s+(.+?)\s*\{#([^}]+)\}\s*$/
+    );
+    if (fullMatch) {
+      const [, , , headingText, explicitId] = fullMatch;
+      const baseId = createSafeSlug(headingText);
+      if (baseId) {
+        headingCounts.set(baseId, (headingCounts.get(baseId) ?? 0) + 1);
+      }
+      if (explicitId !== baseId) {
+        headingCounts.set(explicitId, (headingCounts.get(explicitId) ?? 0) + 1);
+      }
+      return line;
+    }
+
     const explicitIdMatch = line.match(/\s\{#([^}]+)\}\s*$/);
     if (explicitIdMatch) {
       const explicitId = explicitIdMatch[1];
@@ -163,7 +178,7 @@ export function sanitizeMarkdownContent(content: string): string {
   const codeSpans: string[] = [];
   const codeBlockPlaceholders: string[] = [];
 
-  content = content.replace(/^[ \t]*```[^\n]*\n[\s\S]*?^[ \t]*```/gm, (m) => {
+  content = content.replace(/^ {0,3}```[^\n]*\n[\s\S]*?^ {0,3}```/gm, (m) => {
     codeBlocks.push(m);
     const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
     codeBlockPlaceholders.push(placeholder);
diff --git a/scripts/notion-fetch/linkNormalizer.ts b/scripts/notion-fetch/linkNormalizer.ts
index 87303207..0d2a864c 100644
--- a/scripts/notion-fetch/linkNormalizer.ts
+++ b/scripts/notion-fetch/linkNormalizer.ts
@@ -21,7 +21,7 @@ function maskCode(content: string): {
   const codeSpans: string[] = [];
 
   const maskedBlocks = content.replace(
-    /^[ \t]*```[^\n]*\n[\s\S]*?^[ \t]*```/gm,
+    /^ {0,3}```[^\n]*\n[\s\S]*?^ {0,3}```/gm,
     (match) => {
       codeBlocks.push(match);
       return `__LINK_NORMALIZER_CODEBLOCK_${codeBlocks.length - 1}__`;

From 6c80e00b080e87220ad49a84d757a890686289ea Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Fri, 20 Mar 2026 08:48:58 -0300
Subject: [PATCH 09/15] fix(notion-fetch): avoid heading ID collisions with
 explicit or natural slugs

Heading ID generator now skips IDs already claimed by explicit headings
or naturally-occurring slugs, preventing duplicate anchors.
---
 scripts/notion-fetch/contentSanitizer.test.ts | 13 +++++++++++++
 scripts/notion-fetch/contentSanitizer.ts      | 15 ++++++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/scripts/notion-fetch/contentSanitizer.test.ts b/scripts/notion-fetch/contentSanitizer.test.ts
index f1d3315c..cd52bc9d 100644
--- a/scripts/notion-fetch/contentSanitizer.test.ts
+++ b/scripts/notion-fetch/contentSanitizer.test.ts
@@ -258,5 +258,18 @@ echo "# Not a heading"
       expect(result).toContain("## Otro Título {#otro-titulo}");
       expect(result).not.toContain("## Código Único {#codigo-unico}");
     });
+
+    it("should avoid collisions between auto-incremented and explicit IDs", () => {
+      const input = ["## Título", "## Heading {#titulo-1}", "## Título"].join(
+        "\n"
+      );
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("## Título {#titulo}");
+      expect(result).toContain("## Heading {#titulo-1}");
+      // The second "Título" must NOT get titulo-1 (already claimed), should get titulo-2
+      expect(result).toContain("## Título {#titulo-2}");
+    });
   });
 });
diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
index 5efb11e0..b51d0bcf 100644
--- a/scripts/notion-fetch/contentSanitizer.ts
+++ b/scripts/notion-fetch/contentSanitizer.ts
@@ -155,9 +155,18 @@ export function injectExplicitHeadingIds(content: string): string {
       return line;
     }
 
-    const currentCount = headingCounts.get(baseId) ?? 0;
-    headingCounts.set(baseId, currentCount + 1);
-    const headingId = currentCount === 0 ? baseId : `${baseId}-${currentCount}`;
+    let counter = headingCounts.get(baseId) ?? 0;
+    let headingId = counter === 0 ? baseId : `${baseId}-${counter}`;
+    // Skip IDs already claimed by explicit headings or natural slugs
+    while (counter > 0 && headingCounts.has(headingId)) {
+      counter++;
+      headingId = `${baseId}-${counter}`;
+    }
+    headingCounts.set(baseId, counter + 1);
+    // Also register the generated ID so future headings won't collide with it
+    if (headingId !== baseId) {
+      headingCounts.set(headingId, (headingCounts.get(headingId) ?? 0) + 1);
+    }
 
     return `${leadingWhitespace}${hashes} ${headingText} {#${headingId}}`;
   });

From ebb61734a6c19fcdb5feba8d6f7f7a0d1de68b8f Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Fri, 20 Mar 2026 09:06:27 -0300
Subject: [PATCH 10/15] docs: add CHANGELOG.md for PR 170

Co-authored-by: Junie <junie@jetbrains.com>
---
 CHANGELOG.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..5aa7d68c
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,17 @@
+# Changelog - PR 170
+
+## ✨ New Features
+
+- **Slug Normalization**: Accented slugs are now normalized and locale-prefixed link references are supported.
+
+## 🐛 Fixes
+
+- **Doc Paths**: Flattened nested document paths.
+- **Link Normalization**: Links inside code blocks and indented code fences are now properly skipped during link normalization.
+- **Heading IDs**: Explicit heading IDs and empty filenames are handled correctly to prevent heading ID collisions.
+- **Slug Generation**: Preserved CJK and Unicode letters in slug generation.
+- **Code Fences**: Aligned code-fence regex with CommonMark standard.
+
+## 🧪 Testing
+
+- **Normalization**: Aligned tests with new normalization expectations.

From 9145ce4a878f877fc47ea9dbb3ea1f123aed52b0 Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 26 Mar 2026 07:25:33 -0300
Subject: [PATCH 11/15] fix(notion-fetch): harden link normalization masking

---
 scripts/notion-fetch/linkNormalizer.test.ts |  18 +++
 scripts/notion-fetch/linkNormalizer.ts      | 124 +++++++++++++++++---
 2 files changed, 126 insertions(+), 16 deletions(-)

diff --git a/scripts/notion-fetch/linkNormalizer.test.ts b/scripts/notion-fetch/linkNormalizer.test.ts
index ff93150a..75b54d4e 100644
--- a/scripts/notion-fetch/linkNormalizer.test.ts
+++ b/scripts/notion-fetch/linkNormalizer.test.ts
@@ -71,12 +71,30 @@ describe("linkNormalizer", () => {
       expect(result).toBe(input);
     });
 
+    it("should not rewrite links inside a tilde fenced code block", () => {
+      const input = "~~~\n[example](/docs/Guía Rápida)\n~~~";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
+    });
+
+    it("should not rewrite links inside an indented tilde fenced code block", () => {
+      const input = "  ~~~\n  [example](/docs/Guía Rápida)\n  ~~~";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
+    });
+
     it("should not rewrite links inside inline code", () => {
       const input = "Use `[link](/docs/Guía Rápida)` as an example.";
       const result = normalizeInternalDocLinks(input, "en");
       expect(result).toBe(input);
     });
 
+    it("should not rewrite links inside multi-backtick inline code", () => {
+      const input = "Use ``[link](/docs/Guía Rápida)`` as an example.";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
+    });
+
     it("should normalize multiple docs links on a single line", () => {
       const input = "[a](/docs/Foo) and [b](/docs/Bar)";
       const result = normalizeInternalDocLinks(input, "en");
diff --git a/scripts/notion-fetch/linkNormalizer.ts b/scripts/notion-fetch/linkNormalizer.ts
index 0d2a864c..0ee01c08 100644
--- a/scripts/notion-fetch/linkNormalizer.ts
+++ b/scripts/notion-fetch/linkNormalizer.ts
@@ -12,28 +12,113 @@ function safeDecode(s: string): string {
   }
 }
 
-function maskCode(content: string): {
+function maskFencedCodeBlocks(content: string): {
   maskedContent: string;
   codeBlocks: string[];
-  codeSpans: string[];
 } {
   const codeBlocks: string[] = [];
+  const lines = content.split("\n");
+  const output: string[] = [];
+
+  let inFence = false;
+  let fenceChar = "";
+  let fenceLength = 0;
+  let fencedBlock: string[] = [];
+
+  for (const line of lines) {
+    if (!inFence) {
+      const openMatch = /^ {0,3}(`{3,}|~{3,})(.*)$/.exec(line);
+      if (openMatch) {
+        inFence = true;
+        fenceChar = openMatch[1][0];
+        fenceLength = openMatch[1].length;
+        fencedBlock = [line];
+        continue;
+      }
+
+      output.push(line);
+      continue;
+    }
+
+    fencedBlock.push(line);
+
+    const closeMatch = /^ {0,3}([`~]{3,})\s*$/.exec(line);
+    if (
+      closeMatch &&
+      closeMatch[1][0] === fenceChar &&
+      closeMatch[1].length >= fenceLength
+    ) {
+      codeBlocks.push(fencedBlock.join("\n"));
+      output.push(`__LINK_NORMALIZER_CODEBLOCK_${codeBlocks.length - 1}__`);
+      inFence = false;
+      fenceChar = "";
+      fenceLength = 0;
+      fencedBlock = [];
+    }
+  }
+
+  if (inFence) {
+    output.push(fencedBlock.join("\n"));
+  }
+
+  return { maskedContent: output.join("\n"), codeBlocks };
+}
+
+function maskInlineCode(content: string): {
+  maskedContent: string;
+  codeSpans: string[];
+} {
   const codeSpans: string[] = [];
+  const output: string[] = [];
+
+  let index = 0;
 
-  const maskedBlocks = content.replace(
-    /^ {0,3}```[^\n]*\n[\s\S]*?^ {0,3}```/gm,
-    (match) => {
-      codeBlocks.push(match);
-      return `__LINK_NORMALIZER_CODEBLOCK_${codeBlocks.length - 1}__`;
+  while (index < content.length) {
+    const char = content.charAt(index);
+    if (char !== "`") {
+      output.push(char);
+      index++;
+      continue;
     }
-  );
 
-  const maskedContent = maskedBlocks.replace(/`[^`\n]*`/g, (match) => {
-    codeSpans.push(match);
-    return `__LINK_NORMALIZER_CODESPAN_${codeSpans.length - 1}__`;
-  });
+    let openerLength = 1;
+    while (content.charAt(index + openerLength) === "`") {
+      openerLength++;
+    }
+
+    let cursor = index + openerLength;
+    let closingIndex = -1;
+    while (cursor < content.length) {
+      if (content.charAt(cursor) !== "`") {
+        cursor++;
+        continue;
+      }
+
+      let runLength = 1;
+      while (content.charAt(cursor + runLength) === "`") {
+        runLength++;
+      }
+
+      if (runLength === openerLength) {
+        closingIndex = cursor;
+        break;
+      }
 
-  return { maskedContent, codeBlocks, codeSpans };
+      cursor += runLength;
+    }
+
+    if (closingIndex === -1) {
+      output.push(content.slice(index));
+      break;
+    }
+
+    const codeSpan = content.slice(index, closingIndex + openerLength);
+    codeSpans.push(codeSpan);
+    output.push(`__LINK_NORMALIZER_CODESPAN_${codeSpans.length - 1}__`);
+    index = closingIndex + openerLength;
+  }
+
+  return { maskedContent: output.join(""), codeSpans };
 }
 
 function restoreCode(
@@ -41,12 +126,17 @@ function restoreCode(
   codeBlocks: string[],
   codeSpans: string[]
 ): string {
+  const restoreByIndex = (values: string[], rawIndex: string) => {
+    const index = Number(rawIndex);
+    return Number.isInteger(index) ? (values.at(index) ?? "") : "";
+  };
+
   return content
     .replace(/__LINK_NORMALIZER_CODESPAN_(\d+)__/g, (_match, index) => {
-      return codeSpans[Number(index)];
+      return restoreByIndex(codeSpans, index);
     })
     .replace(/__LINK_NORMALIZER_CODEBLOCK_(\d+)__/g, (_match, index) => {
-      return codeBlocks[Number(index)];
+      return restoreByIndex(codeBlocks, index);
     });
 }
 
@@ -89,7 +179,9 @@ export function normalizeInternalDocLinks(
     return content;
   }
 
-  const { maskedContent, codeBlocks, codeSpans } = maskCode(content);
+  const { maskedContent: maskedBlocks, codeBlocks } =
+    maskFencedCodeBlocks(content);
+  const { maskedContent, codeSpans } = maskInlineCode(maskedBlocks);
 
   const normalizedContent = maskedContent.replace(
     MARKDOWN_LINK_REGEX,

From 513c1a538538a56ce9ce74250d4b81afc52a16b6 Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 26 Mar 2026 07:29:32 -0300
Subject: [PATCH 12/15] fix(notion-fetch): harden content sanitization masking

---
 scripts/notion-fetch/contentSanitizer.test.ts |  33 +++
 scripts/notion-fetch/contentSanitizer.ts      | 244 ++++++++++++++----
 2 files changed, 232 insertions(+), 45 deletions(-)

diff --git a/scripts/notion-fetch/contentSanitizer.test.ts b/scripts/notion-fetch/contentSanitizer.test.ts
index cd52bc9d..cba54ea7 100644
--- a/scripts/notion-fetch/contentSanitizer.test.ts
+++ b/scripts/notion-fetch/contentSanitizer.test.ts
@@ -50,6 +50,18 @@ describe("contentSanitizer", () => {
       expect(result).toBe(input); // Should remain unchanged
     });
 
+    it("should preserve tilde fenced code blocks", () => {
+      const input = "~~~md\nconst obj = { key: 'value' };\n~~~";
+      const result = scriptModule.sanitizeMarkdownContent(input);
+      expect(result).toBe(input);
+    });
+
+    it("should preserve multi-backtick inline code spans", () => {
+      const input = "Use ``<link to section.>`` and ``{foo}``.";
+      const result = scriptModule.sanitizeMarkdownContent(input);
+      expect(result).toBe(input);
+    });
+
     it("should fix malformed <link to section.> patterns", () => {
       const input = "Check <link to section.> for details.";
       const result = scriptModule.sanitizeMarkdownContent(input);
@@ -259,6 +271,18 @@ echo "# Not a heading"
       expect(result).not.toContain("## Código Único {#codigo-unico}");
     });
 
+    it("should preserve headings inside tilde fenced code blocks", () => {
+      const input = ["~~~md", "## Código Único", "~~~", "## Otro Título"].join(
+        "\n"
+      );
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("~~~md\n## Código Único\n~~~");
+      expect(result).toContain("## Otro Título {#otro-titulo}");
+      expect(result).not.toContain("## Código Único {#codigo-unico}");
+    });
+
     it("should avoid collisions between auto-incremented and explicit IDs", () => {
       const input = ["## Título", "## Heading {#titulo-1}", "## Título"].join(
         "\n"
@@ -271,5 +295,14 @@ echo "# Not a heading"
       // The second "Título" must NOT get titulo-1 (already claimed), should get titulo-2
       expect(result).toContain("## Título {#titulo-2}");
     });
+
+    it("should reserve later explicit ids before assigning earlier auto-generated headings", () => {
+      const input = ["## My Id", "## Custom {#my-id}"].join("\n");
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("## My Id {#my-id-1}");
+      expect(result).toContain("## Custom {#my-id}");
+    });
   });
 });
diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
index b51d0bcf..e5857970 100644
--- a/scripts/notion-fetch/contentSanitizer.ts
+++ b/scripts/notion-fetch/contentSanitizer.ts
@@ -70,36 +70,182 @@ function fixHeadingHierarchy(
   return fixedLines.join("\n");
 }
 
-function maskCodeFences(content: string): {
+function createCodeBlockMasker(content: string): {
   content: string;
   codeBlocks: string[];
   codeBlockPlaceholders: string[];
 } {
+  const lines = content.split("\n");
   const codeBlocks: string[] = [];
   const codeBlockPlaceholders: string[] = [];
+  const maskedLines: string[] = [];
+
+  let inFence = false;
+  let fenceChar = "";
+  let fenceLength = 0;
+  let blockLines: string[] = [];
+
+  for (const line of lines) {
+    if (!inFence) {
+      const openingMatch = line.match(/^ {0,3}(`{3,}|~{3,})(.*)$/);
 
-  const maskedContent = content.replace(
-    /^ {0,3}```[^\n]*\n[\s\S]*?^ {0,3}```/gm,
-    (match) => {
-      codeBlocks.push(match);
+      if (!openingMatch) {
+        maskedLines.push(line);
+        continue;
+      }
+
+      const fence = openingMatch[1];
+      fenceChar = fence[0];
+      fenceLength = fence.length;
+      blockLines = [line];
+      inFence = true;
+      continue;
+    }
+
+    blockLines.push(line);
+
+    if (isClosingFenceLine(line, fenceChar, fenceLength)) {
+      codeBlocks.push(blockLines.join("\n"));
       const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
       codeBlockPlaceholders.push(placeholder);
-      return placeholder;
+      maskedLines.push(placeholder);
+      inFence = false;
+      blockLines = [];
     }
-  );
+  }
+
+  if (inFence) {
+    codeBlocks.push(blockLines.join("\n"));
+    const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
+    codeBlockPlaceholders.push(placeholder);
+    maskedLines.push(placeholder);
+  }
 
   return {
-    content: maskedContent,
+    content: maskedLines.join("\n"),
     codeBlocks,
     codeBlockPlaceholders,
   };
 }
 
+function isClosingFenceLine(
+  line: string,
+  fenceChar: string,
+  fenceLength: number
+): boolean {
+  let i = 0;
+
+  while (i < line.length && line.charAt(i) === " ") {
+    i++;
+  }
+
+  if (i > 3) {
+    return false;
+  }
+
+  let fenceCount = 0;
+  while (i < line.length && line.charAt(i) === fenceChar) {
+    fenceCount++;
+    i++;
+  }
+
+  if (fenceCount < fenceLength) {
+    return false;
+  }
+
+  while (i < line.length) {
+    if (line.charAt(i) !== " " && line.charAt(i) !== "\t") {
+      return false;
+    }
+    i++;
+  }
+
+  return true;
+}
+
+function maskCodeFences(content: string): {
+  content: string;
+  codeBlocks: string[];
+  codeBlockPlaceholders: string[];
+} {
+  return createCodeBlockMasker(content);
+}
+
 function restoreCodeFences(content: string, codeBlocks: string[]): string {
-  return content.replace(
-    /__CODEBLOCK_(\d+)__/g,
-    (_match, index) => codeBlocks[Number(index)]
-  );
+  let restoredContent = content;
+  for (const [index, codeBlock] of codeBlocks.entries()) {
+    restoredContent = restoredContent.replaceAll(
+      `__CODEBLOCK_${index}__`,
+      codeBlock
+    );
+  }
+  return restoredContent;
+}
+
+function maskInlineCodeSpans(content: string): {
+  content: string;
+  codeSpans: string[];
+} {
+  const codeSpans: string[] = [];
+  const output: string[] = [];
+
+  let i = 0;
+  while (i < content.length) {
+    const currentChar = content.charAt(i);
+    if (currentChar !== "`") {
+      output.push(currentChar);
+      i++;
+      continue;
+    }
+
+    let openingLength = 0;
+    while (
+      i + openingLength < content.length &&
+      content.charAt(i + openingLength) === "`"
+    ) {
+      openingLength++;
+    }
+
+    let scanIndex = i + openingLength;
+    let closingIndex = -1;
+    while (scanIndex < content.length) {
+      const nextBacktick = content.indexOf("`", scanIndex);
+      if (nextBacktick === -1) {
+        break;
+      }
+
+      let closingLength = 0;
+      while (
+        nextBacktick + closingLength < content.length &&
+        content.charAt(nextBacktick + closingLength) === "`"
+      ) {
+        closingLength++;
+      }
+
+      if (closingLength === openingLength) {
+        closingIndex = nextBacktick;
+        break;
+      }
+
+      scanIndex = nextBacktick + closingLength;
+    }
+
+    if (closingIndex === -1) {
+      output.push(content.slice(i, i + openingLength));
+      i += openingLength;
+      continue;
+    }
+
+    const codeSpan = content.slice(i, closingIndex + openingLength);
+    codeSpans.push(codeSpan);
+    output.push(`__CODESPAN_${codeSpans.length - 1}__`);
+    i = closingIndex + openingLength;
+  }
+
+  return {
+    content: output.join(""),
+    codeSpans,
+  };
 }
 
 export function injectExplicitHeadingIds(content: string): string {
@@ -112,14 +258,15 @@ export function injectExplicitHeadingIds(content: string): string {
     codeBlocks,
     codeBlockPlaceholders,
   } = maskCodeFences(content);
+  const reservedIds = new Set<string>();
   const headingCounts = new Map<string, number>();
 
   const lines = maskedContent.split("\n");
-  const updatedLines = lines.map((line) => {
+  for (const line of lines) {
     if (
       codeBlockPlaceholders.some((placeholder) => line.includes(placeholder))
     ) {
-      return line;
+      continue;
     }
 
     const fullMatch = line.match(
@@ -129,18 +276,32 @@ export function injectExplicitHeadingIds(content: string): string {
       const [, , , headingText, explicitId] = fullMatch;
       const baseId = createSafeSlug(headingText);
       if (baseId) {
-        headingCounts.set(baseId, (headingCounts.get(baseId) ?? 0) + 1);
+        reservedIds.add(baseId);
       }
-      if (explicitId !== baseId) {
-        headingCounts.set(explicitId, (headingCounts.get(explicitId) ?? 0) + 1);
+      if (explicitId) {
+        reservedIds.add(explicitId);
       }
-      return line;
+      continue;
     }
 
     const explicitIdMatch = line.match(/\s\{#([^}]+)\}\s*$/);
     if (explicitIdMatch) {
       const explicitId = explicitIdMatch[1];
-      headingCounts.set(explicitId, (headingCounts.get(explicitId) ?? 0) + 1);
+      reservedIds.add(explicitId);
+    }
+  }
+
+  const updatedLines = lines.map((line) => {
+    if (
+      codeBlockPlaceholders.some((placeholder) => line.includes(placeholder))
+    ) {
+      return line;
+    }
+
+    const explicitHeadingMatch = line.match(
+      /^(\s{0,3})(#{1,6})\s+(.+?)\s*\{#([^}]+)\}\s*$/
+    );
+    if (explicitHeadingMatch) {
       return line;
     }
 
@@ -157,15 +318,13 @@ export function injectExplicitHeadingIds(content: string): string {
 
     let counter = headingCounts.get(baseId) ?? 0;
     let headingId = counter === 0 ? baseId : `${baseId}-${counter}`;
-    // Skip IDs already claimed by explicit headings or natural slugs
-    while (counter > 0 && headingCounts.has(headingId)) {
+    while (reservedIds.has(headingId) || headingCounts.has(headingId)) {
       counter++;
       headingId = `${baseId}-${counter}`;
     }
     headingCounts.set(baseId, counter + 1);
-    // Also register the generated ID so future headings won't collide with it
     if (headingId !== baseId) {
-      headingCounts.set(headingId, (headingCounts.get(headingId) ?? 0) + 1);
+      headingCounts.set(headingId, 1);
     }
 
     return `${leadingWhitespace}${hashes} ${headingText} {#${headingId}}`;
@@ -183,20 +342,14 @@ export function sanitizeMarkdownContent(content: string): string {
   // Fix specific malformed patterns that cause MDX errors
 
   // 0. Mask code fences (```...```) and inline code (`...`) to avoid altering them
-  const codeBlocks: string[] = [];
-  const codeSpans: string[] = [];
-  const codeBlockPlaceholders: string[] = [];
-
-  content = content.replace(/^ {0,3}```[^\n]*\n[\s\S]*?^ {0,3}```/gm, (m) => {
-    codeBlocks.push(m);
-    const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
-    codeBlockPlaceholders.push(placeholder);
-    return placeholder;
-  });
-  content = content.replace(/`[^`\n]*`/g, (m) => {
-    codeSpans.push(m);
-    return `__CODESPAN_${codeSpans.length - 1}__`;
-  });
+  const {
+    content: maskedContent,
+    codeBlocks,
+    codeBlockPlaceholders,
+  } = maskCodeFences(content);
+  const { content: maskedWithCodeSpans, codeSpans } =
+    maskInlineCodeSpans(maskedContent);
+  content = maskedWithCodeSpans;
 
   // 1. Fix heading hierarchy for proper TOC generation (after masking code blocks)
   content = fixHeadingHierarchy(content, codeBlockPlaceholders);
@@ -263,14 +416,15 @@ export function sanitizeMarkdownContent(content: string): string {
   }
 
   // 9. Restore masked code blocks and inline code
-  content = content.replace(
-    /__CODEBLOCK_(\d+)__/g,
-    (_m, i) => codeBlocks[Number(i)]
-  );
-  content = content.replace(
-    /__CODESPAN_(\d+)__/g,
-    (_m, i) => codeSpans[Number(i)]
-  );
+  content = restoreCodeFences(content, codeBlocks);
+  let restoredContent = content;
+  for (const [index, codeSpan] of codeSpans.entries()) {
+    restoredContent = restoredContent.replaceAll(
+      `__CODESPAN_${index}__`,
+      codeSpan
+    );
+  }
+  content = restoredContent;
 
   return content;
 }

From 89414b7fa2f7c85f6dac9ca958ac7c38560b7cd5 Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 26 Mar 2026 07:35:53 -0300
Subject: [PATCH 13/15] fix(notion-fetch): close remaining masking edge cases

---
 scripts/notion-fetch/contentSanitizer.test.ts | 9 +++++++++
 scripts/notion-fetch/contentSanitizer.ts      | 6 +-----
 scripts/notion-fetch/linkNormalizer.test.ts   | 6 ++++++
 scripts/notion-fetch/linkNormalizer.ts        | 3 ++-
 4 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/scripts/notion-fetch/contentSanitizer.test.ts b/scripts/notion-fetch/contentSanitizer.test.ts
index cba54ea7..9b5d3e1c 100644
--- a/scripts/notion-fetch/contentSanitizer.test.ts
+++ b/scripts/notion-fetch/contentSanitizer.test.ts
@@ -304,5 +304,14 @@ echo "# Not a heading"
       expect(result).toContain("## My Id {#my-id-1}");
       expect(result).toContain("## Custom {#my-id}");
     });
+
+    it("should not reserve a natural slug when a later explicit id is custom", () => {
+      const input = ["## My Id", "## My Id {#custom}"].join("\n");
+
+      const result = scriptModule.injectExplicitHeadingIds(input);
+
+      expect(result).toContain("## My Id {#my-id}");
+      expect(result).toContain("## My Id {#custom}");
+    });
   });
 });
diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
index e5857970..59a9b0cf 100644
--- a/scripts/notion-fetch/contentSanitizer.ts
+++ b/scripts/notion-fetch/contentSanitizer.ts
@@ -273,11 +273,7 @@ export function injectExplicitHeadingIds(content: string): string {
       /^(\s{0,3})(#{1,6})\s+(.+?)\s*\{#([^}]+)\}\s*$/
     );
     if (fullMatch) {
-      const [, , , headingText, explicitId] = fullMatch;
-      const baseId = createSafeSlug(headingText);
-      if (baseId) {
-        reservedIds.add(baseId);
-      }
+      const [, , , , explicitId] = fullMatch;
       if (explicitId) {
         reservedIds.add(explicitId);
       }
diff --git a/scripts/notion-fetch/linkNormalizer.test.ts b/scripts/notion-fetch/linkNormalizer.test.ts
index 75b54d4e..b85d0557 100644
--- a/scripts/notion-fetch/linkNormalizer.test.ts
+++ b/scripts/notion-fetch/linkNormalizer.test.ts
@@ -83,6 +83,12 @@ describe("linkNormalizer", () => {
       expect(result).toBe(input);
     });
 
+    it("should not rewrite links inside an unclosed tilde fenced code block", () => {
+      const input = "~~~\n[example](/docs/Guía Rápida)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe(input);
+    });
+
     it("should not rewrite links inside inline code", () => {
       const input = "Use `[link](/docs/Guía Rápida)` as an example.";
       const result = normalizeInternalDocLinks(input, "en");
diff --git a/scripts/notion-fetch/linkNormalizer.ts b/scripts/notion-fetch/linkNormalizer.ts
index 0ee01c08..41cb87fa 100644
--- a/scripts/notion-fetch/linkNormalizer.ts
+++ b/scripts/notion-fetch/linkNormalizer.ts
@@ -58,7 +58,8 @@ function maskFencedCodeBlocks(content: string): {
   }
 
   if (inFence) {
-    output.push(fencedBlock.join("\n"));
+    codeBlocks.push(fencedBlock.join("\n"));
+    output.push(`__LINK_NORMALIZER_CODEBLOCK_${codeBlocks.length - 1}__`);
   }
 
   return { maskedContent: output.join("\n"), codeBlocks };

From f2904d466a200059ef1e3459fff03467932c612b Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 26 Mar 2026 07:59:59 -0300
Subject: [PATCH 14/15] fix(notion-fetch): fix adjacent-link regex and
 exact-/docs guard in linkNormalizer

- Replace consuming (^|[^!]) group with (?<![!]) lookbehind so adjacent
  markdown links are both matched instead of the second being skipped
- Extend /docs guard to also pass through bare /docs and /docs#fragment
  targets, which normalizeDocPathname() already handled correctly
- Add regression tests for adjacent links and exact /docs paths
---
 scripts/notion-fetch/linkNormalizer.test.ts | 30 +++++++++++++++++++++
 scripts/notion-fetch/linkNormalizer.ts      | 12 ++++++---
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/scripts/notion-fetch/linkNormalizer.test.ts b/scripts/notion-fetch/linkNormalizer.test.ts
index b85d0557..f2fe5706 100644
--- a/scripts/notion-fetch/linkNormalizer.test.ts
+++ b/scripts/notion-fetch/linkNormalizer.test.ts
@@ -107,6 +107,12 @@ describe("linkNormalizer", () => {
       expect(result).toBe("[a](/docs/foo) and [b](/docs/bar)");
     });
 
+    it("should normalize both links when two docs links are directly adjacent (no separator)", () => {
+      const input = "[Link 1](/docs/Foo)[Link 2](/docs/Bar)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe("[Link 1](/docs/foo)[Link 2](/docs/bar)");
+    });
+
     it("should return empty string for empty content", () => {
       const result = normalizeInternalDocLinks("", "en");
       expect(result).toBe("");
@@ -117,5 +123,29 @@ describe("linkNormalizer", () => {
       const result = normalizeInternalDocLinks(input, "en");
       expect(result).toBe(input);
     });
+
+    it("should normalize a link to exactly /docs (en, no locale prefix)", () => {
+      const input = "[link](/docs)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe("[link](/docs)");
+    });
+
+    it("should normalize a link to exactly /docs with locale prefix (es)", () => {
+      const input = "[link](/docs)";
+      const result = normalizeInternalDocLinks(input, "es");
+      expect(result).toBe("[link](/es/docs)");
+    });
+
+    it("should normalize a link to /docs#fragment (en, no locale prefix)", () => {
+      const input = "[link](/docs#Sección Uno)";
+      const result = normalizeInternalDocLinks(input, "en");
+      expect(result).toBe("[link](/docs#seccion-uno)");
+    });
+
+    it("should normalize a link to /docs#fragment with locale prefix (pt)", () => {
+      const input = "[link](/docs#Sección Uno)";
+      const result = normalizeInternalDocLinks(input, "pt");
+      expect(result).toBe("[link](/pt/docs#seccion-uno)");
+    });
   });
 });
diff --git a/scripts/notion-fetch/linkNormalizer.ts b/scripts/notion-fetch/linkNormalizer.ts
index 41cb87fa..2e49572f 100644
--- a/scripts/notion-fetch/linkNormalizer.ts
+++ b/scripts/notion-fetch/linkNormalizer.ts
@@ -2,7 +2,7 @@ import config from "../../docusaurus.config";
 import { createSafeSlug } from "./slugUtils";
 
 const DEFAULT_LOCALE = config.i18n.defaultLocale;
-const MARKDOWN_LINK_REGEX = /(^|[^!])\[([^\]]+)\]\(([^)\n]+)\)/gm;
+const MARKDOWN_LINK_REGEX = /(?<![!])\[([^\]]+)\]\(([^)\n]+)\)/gm;
 
 function safeDecode(s: string): string {
   try {
@@ -186,17 +186,21 @@ export function normalizeInternalDocLinks(
 
   const normalizedContent = maskedContent.replace(
     MARKDOWN_LINK_REGEX,
-    (match, prefix: string, text: string, rawTarget: string) => {
+    (match, text: string, rawTarget: string) => {
       const trimmedTarget = rawTarget.trim();
       const titleMatch = trimmedTarget.match(/^(\/docs\/[^\n]*?)(\s+"[^"]*")$/);
       const target = titleMatch ? titleMatch[1] : trimmedTarget;
       const titleSuffix = titleMatch?.[2] ?? "";
 
-      if (!target.startsWith("/docs/")) {
+      if (
+        target !== "/docs" &&
+        !target.startsWith("/docs/") &&
+        !target.startsWith("/docs#")
+      ) {
         return match;
       }
 
-      return `${prefix}[${text}](${normalizeDocTarget(target, lang)}${titleSuffix})`;
+      return `[${text}](${normalizeDocTarget(target, lang)}${titleSuffix})`;
     }
   );
 

From f585ecf52d5ac21e7b7acd6be8727aeef2c22e78 Mon Sep 17 00:00:00 2001
From: luandro <luandro@digital-democracy.org>
Date: Thu, 26 Mar 2026 08:31:25 -0300
Subject: [PATCH 15/15] refactor(notion-fetch): extract shared markdown
 code-block masking utility

Both contentSanitizer and linkNormalizer implemented nearly identical
logic for masking and restoring fenced code blocks and inline code spans.
Extract into a shared markdownUtils.ts to eliminate duplication and
prevent future drift.
---
 scripts/notion-fetch/contentSanitizer.ts | 213 ++---------------------
 scripts/notion-fetch/linkNormalizer.ts   | 142 +--------------
 scripts/notion-fetch/markdownUtils.ts    | 203 +++++++++++++++++++++
 3 files changed, 226 insertions(+), 332 deletions(-)
 create mode 100644 scripts/notion-fetch/markdownUtils.ts

diff --git a/scripts/notion-fetch/contentSanitizer.ts b/scripts/notion-fetch/contentSanitizer.ts
index 59a9b0cf..c30a341f 100644
--- a/scripts/notion-fetch/contentSanitizer.ts
+++ b/scripts/notion-fetch/contentSanitizer.ts
@@ -4,6 +4,11 @@
  */
 
 import { createSafeSlug } from "./slugUtils";
+import {
+  maskFencedCodeBlocks,
+  maskInlineCodeSpans,
+  restoreCodeMasks,
+} from "./markdownUtils";
 
 const EMOJI_STYLE_MARKERS = ["display:", "height:", "margin:"];
 
@@ -70,184 +75,6 @@ function fixHeadingHierarchy(
   return fixedLines.join("\n");
 }
 
-function createCodeBlockMasker(content: string): {
-  content: string;
-  codeBlocks: string[];
-  codeBlockPlaceholders: string[];
-} {
-  const lines = content.split("\n");
-  const codeBlocks: string[] = [];
-  const codeBlockPlaceholders: string[] = [];
-  const maskedLines: string[] = [];
-
-  let inFence = false;
-  let fenceChar = "";
-  let fenceLength = 0;
-  let blockLines: string[] = [];
-
-  for (const line of lines) {
-    if (!inFence) {
-      const openingMatch = line.match(/^ {0,3}(`{3,}|~{3,})(.*)$/);
-
-      if (!openingMatch) {
-        maskedLines.push(line);
-        continue;
-      }
-
-      const fence = openingMatch[1];
-      fenceChar = fence[0];
-      fenceLength = fence.length;
-      blockLines = [line];
-      inFence = true;
-      continue;
-    }
-
-    blockLines.push(line);
-
-    if (isClosingFenceLine(line, fenceChar, fenceLength)) {
-      codeBlocks.push(blockLines.join("\n"));
-      const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
-      codeBlockPlaceholders.push(placeholder);
-      maskedLines.push(placeholder);
-      inFence = false;
-      blockLines = [];
-    }
-  }
-
-  if (inFence) {
-    codeBlocks.push(blockLines.join("\n"));
-    const placeholder = `__CODEBLOCK_${codeBlocks.length - 1}__`;
-    codeBlockPlaceholders.push(placeholder);
-    maskedLines.push(placeholder);
-  }
-
-  return {
-    content: maskedLines.join("\n"),
-    codeBlocks,
-    codeBlockPlaceholders,
-  };
-}
-
-function isClosingFenceLine(
-  line: string,
-  fenceChar: string,
-  fenceLength: number
-): boolean {
-  let i = 0;
-
-  while (i < line.length && line.charAt(i) === " ") {
-    i++;
-  }
-
-  if (i > 3) {
-    return false;
-  }
-
-  let fenceCount = 0;
-  while (i < line.length && line.charAt(i) === fenceChar) {
-    fenceCount++;
-    i++;
-  }
-
-  if (fenceCount < fenceLength) {
-    return false;
-  }
-
-  while (i < line.length) {
-    if (line.charAt(i) !== " " && line.charAt(i) !== "\t") {
-      return false;
-    }
-    i++;
-  }
-
-  return true;
-}
-
-function maskCodeFences(content: string): {
-  content: string;
-  codeBlocks: string[];
-  codeBlockPlaceholders: string[];
-} {
-  return createCodeBlockMasker(content);
-}
-
-function restoreCodeFences(content: string, codeBlocks: string[]): string {
-  let restoredContent = content;
-  for (const [index, codeBlock] of codeBlocks.entries()) {
-    restoredContent = restoredContent.replaceAll(
-      `__CODEBLOCK_${index}__`,
-      codeBlock
-    );
-  }
-  return restoredContent;
-}
-
-function maskInlineCodeSpans(content: string): {
-  content: string;
-  codeSpans: string[];
-} {
-  const codeSpans: string[] = [];
-  const output: string[] = [];
-
-  let i = 0;
-  while (i < content.length) {
-    const currentChar = content.charAt(i);
-    if (currentChar !== "`") {
-      output.push(currentChar);
-      i++;
-      continue;
-    }
-
-    let openingLength = 0;
-    while (
-      i + openingLength < content.length &&
-      content.charAt(i + openingLength) === "`"
-    ) {
-      openingLength++;
-    }
-
-    let scanIndex = i + openingLength;
-    let closingIndex = -1;
-    while (scanIndex < content.length) {
-      const nextBacktick = content.indexOf("`", scanIndex);
-      if (nextBacktick === -1) {
-        break;
-      }
-
-      let closingLength = 0;
-      while (
-        nextBacktick + closingLength < content.length &&
-        content.charAt(nextBacktick + closingLength) === "`"
-      ) {
-        closingLength++;
-      }
-
-      if (closingLength === openingLength) {
-        closingIndex = nextBacktick;
-        break;
-      }
-
-      scanIndex = nextBacktick + closingLength;
-    }
-
-    if (closingIndex === -1) {
-      output.push(content.slice(i, i + openingLength));
-      i += openingLength;
-      continue;
-    }
-
-    const codeSpan = content.slice(i, closingIndex + openingLength);
-    codeSpans.push(codeSpan);
-    output.push(`__CODESPAN_${codeSpans.length - 1}__`);
-    i = closingIndex + openingLength;
-  }
-
-  return {
-    content: output.join(""),
-    codeSpans,
-  };
-}
-
 export function injectExplicitHeadingIds(content: string): string {
   if (!content) {
     return content;
@@ -256,16 +83,14 @@ export function injectExplicitHeadingIds(content: string): string {
   const {
     content: maskedContent,
     codeBlocks,
-    codeBlockPlaceholders,
-  } = maskCodeFences(content);
+    placeholders,
+  } = maskFencedCodeBlocks(content);
   const reservedIds = new Set<string>();
   const headingCounts = new Map<string, number>();
 
   const lines = maskedContent.split("\n");
   for (const line of lines) {
-    if (
-      codeBlockPlaceholders.some((placeholder) => line.includes(placeholder))
-    ) {
+    if (placeholders.some((placeholder) => line.includes(placeholder))) {
       continue;
     }
 
@@ -288,9 +113,7 @@ export function injectExplicitHeadingIds(content: string): string {
   }
 
   const updatedLines = lines.map((line) => {
-    if (
-      codeBlockPlaceholders.some((placeholder) => line.includes(placeholder))
-    ) {
+    if (placeholders.some((placeholder) => line.includes(placeholder))) {
       return line;
     }
 
@@ -326,7 +149,7 @@ export function injectExplicitHeadingIds(content: string): string {
     return `${leadingWhitespace}${hashes} ${headingText} {#${headingId}}`;
   });
 
-  return restoreCodeFences(updatedLines.join("\n"), codeBlocks);
+  return restoreCodeMasks(updatedLines.join("\n"), codeBlocks, []);
 }
 
 /**
@@ -341,14 +164,14 @@ export function sanitizeMarkdownContent(content: string): string {
   const {
     content: maskedContent,
     codeBlocks,
-    codeBlockPlaceholders,
-  } = maskCodeFences(content);
+    placeholders,
+  } = maskFencedCodeBlocks(content);
   const { content: maskedWithCodeSpans, codeSpans } =
     maskInlineCodeSpans(maskedContent);
   content = maskedWithCodeSpans;
 
   // 1. Fix heading hierarchy for proper TOC generation (after masking code blocks)
-  content = fixHeadingHierarchy(content, codeBlockPlaceholders);
+  content = fixHeadingHierarchy(content, placeholders);
 
   // 2. Aggressively strip all curly-brace expressions by unwrapping to inner text
   // BUT preserve JSX style objects for emoji images
@@ -412,15 +235,7 @@ export function sanitizeMarkdownContent(content: string): string {
   }
 
   // 9. Restore masked code blocks and inline code
-  content = restoreCodeFences(content, codeBlocks);
-  let restoredContent = content;
-  for (const [index, codeSpan] of codeSpans.entries()) {
-    restoredContent = restoredContent.replaceAll(
-      `__CODESPAN_${index}__`,
-      codeSpan
-    );
-  }
-  content = restoredContent;
+  content = restoreCodeMasks(content, codeBlocks, codeSpans);
 
   return content;
 }
diff --git a/scripts/notion-fetch/linkNormalizer.ts b/scripts/notion-fetch/linkNormalizer.ts
index 2e49572f..7a02948f 100644
--- a/scripts/notion-fetch/linkNormalizer.ts
+++ b/scripts/notion-fetch/linkNormalizer.ts
@@ -1,5 +1,10 @@
 import config from "../../docusaurus.config";
 import { createSafeSlug } from "./slugUtils";
+import {
+  maskFencedCodeBlocks,
+  maskInlineCodeSpans,
+  restoreCodeMasks,
+} from "./markdownUtils";
 
 const DEFAULT_LOCALE = config.i18n.defaultLocale;
 const MARKDOWN_LINK_REGEX = /(?<![!])\[([^\]]+)\]\(([^)\n]+)\)/gm;
@@ -12,135 +17,6 @@ function safeDecode(s: string): string {
   }
 }
 
-function maskFencedCodeBlocks(content: string): {
-  maskedContent: string;
-  codeBlocks: string[];
-} {
-  const codeBlocks: string[] = [];
-  const lines = content.split("\n");
-  const output: string[] = [];
-
-  let inFence = false;
-  let fenceChar = "";
-  let fenceLength = 0;
-  let fencedBlock: string[] = [];
-
-  for (const line of lines) {
-    if (!inFence) {
-      const openMatch = /^ {0,3}(`{3,}|~{3,})(.*)$/.exec(line);
-      if (openMatch) {
-        inFence = true;
-        fenceChar = openMatch[1][0];
-        fenceLength = openMatch[1].length;
-        fencedBlock = [line];
-        continue;
-      }
-
-      output.push(line);
-      continue;
-    }
-
-    fencedBlock.push(line);
-
-    const closeMatch = /^ {0,3}([`~]{3,})\s*$/.exec(line);
-    if (
-      closeMatch &&
-      closeMatch[1][0] === fenceChar &&
-      closeMatch[1].length >= fenceLength
-    ) {
-      codeBlocks.push(fencedBlock.join("\n"));
-      output.push(`__LINK_NORMALIZER_CODEBLOCK_${codeBlocks.length - 1}__`);
-      inFence = false;
-      fenceChar = "";
-      fenceLength = 0;
-      fencedBlock = [];
-    }
-  }
-
-  if (inFence) {
-    codeBlocks.push(fencedBlock.join("\n"));
-    output.push(`__LINK_NORMALIZER_CODEBLOCK_${codeBlocks.length - 1}__`);
-  }
-
-  return { maskedContent: output.join("\n"), codeBlocks };
-}
-
-function maskInlineCode(content: string): {
-  maskedContent: string;
-  codeSpans: string[];
-} {
-  const codeSpans: string[] = [];
-  const output: string[] = [];
-
-  let index = 0;
-
-  while (index < content.length) {
-    const char = content.charAt(index);
-    if (char !== "`") {
-      output.push(char);
-      index++;
-      continue;
-    }
-
-    let openerLength = 1;
-    while (content.charAt(index + openerLength) === "`") {
-      openerLength++;
-    }
-
-    let cursor = index + openerLength;
-    let closingIndex = -1;
-    while (cursor < content.length) {
-      if (content.charAt(cursor) !== "`") {
-        cursor++;
-        continue;
-      }
-
-      let runLength = 1;
-      while (content.charAt(cursor + runLength) === "`") {
-        runLength++;
-      }
-
-      if (runLength === openerLength) {
-        closingIndex = cursor;
-        break;
-      }
-
-      cursor += runLength;
-    }
-
-    if (closingIndex === -1) {
-      output.push(content.slice(index));
-      break;
-    }
-
-    const codeSpan = content.slice(index, closingIndex + openerLength);
-    codeSpans.push(codeSpan);
-    output.push(`__LINK_NORMALIZER_CODESPAN_${codeSpans.length - 1}__`);
-    index = closingIndex + openerLength;
-  }
-
-  return { maskedContent: output.join(""), codeSpans };
-}
-
-function restoreCode(
-  content: string,
-  codeBlocks: string[],
-  codeSpans: string[]
-): string {
-  const restoreByIndex = (values: string[], rawIndex: string) => {
-    const index = Number(rawIndex);
-    return Number.isInteger(index) ? (values.at(index) ?? "") : "";
-  };
-
-  return content
-    .replace(/__LINK_NORMALIZER_CODESPAN_(\d+)__/g, (_match, index) => {
-      return restoreByIndex(codeSpans, index);
-    })
-    .replace(/__LINK_NORMALIZER_CODEBLOCK_(\d+)__/g, (_match, index) => {
-      return restoreByIndex(codeBlocks, index);
-    });
-}
-
 function normalizeDocPathname(pathname: string): string {
   const hasTrailingSlash = pathname.endsWith("/") && pathname !== "/docs/";
   const rawSegments = pathname
@@ -180,9 +56,9 @@ export function normalizeInternalDocLinks(
     return content;
   }
 
-  const { maskedContent: maskedBlocks, codeBlocks } =
-    maskFencedCodeBlocks(content);
-  const { maskedContent, codeSpans } = maskInlineCode(maskedBlocks);
+  const { content: maskedBlocks, codeBlocks } = maskFencedCodeBlocks(content);
+  const { content: maskedContent, codeSpans } =
+    maskInlineCodeSpans(maskedBlocks);
 
   const normalizedContent = maskedContent.replace(
     MARKDOWN_LINK_REGEX,
@@ -204,5 +80,5 @@ export function normalizeInternalDocLinks(
     }
   );
 
-  return restoreCode(normalizedContent, codeBlocks, codeSpans);
+  return restoreCodeMasks(normalizedContent, codeBlocks, codeSpans);
 }
diff --git a/scripts/notion-fetch/markdownUtils.ts b/scripts/notion-fetch/markdownUtils.ts
new file mode 100644
index 00000000..e11675f8
--- /dev/null
+++ b/scripts/notion-fetch/markdownUtils.ts
@@ -0,0 +1,203 @@
+/**
+ * Shared markdown code-block masking utilities.
+ * Both contentSanitizer and linkNormalizer use identical logic for masking
+ * code blocks and inline code spans before processing, then restoring them.
+ */
+
+/** Placeholder prefix used for fenced code blocks. */
+const CODEBLOCK_PREFIX = "__CODEBLOCK_";
+/** Placeholder prefix used for inline code spans. */
+const CODESPAN_PREFIX = "__CODESPAN_";
+const PLACEHOLDER_SUFFIX = "__";
+
+/** Checks whether `line` is a valid closing fence for a block opened with `fenceChar` of length `fenceLength`. */
+function isClosingFenceLine(
+  line: string,
+  fenceChar: string,
+  fenceLength: number
+): boolean {
+  let i = 0;
+
+  while (i < line.length && line.charAt(i) === " ") {
+    i++;
+  }
+
+  if (i > 3) {
+    return false;
+  }
+
+  let fenceCount = 0;
+  while (i < line.length && line.charAt(i) === fenceChar) {
+    fenceCount++;
+    i++;
+  }
+
+  if (fenceCount < fenceLength) {
+    return false;
+  }
+
+  while (i < line.length) {
+    if (line.charAt(i) !== " " && line.charAt(i) !== "\t") {
+      return false;
+    }
+    i++;
+  }
+
+  return true;
+}
+
+/**
+ * Masks all fenced code blocks in `content`, replacing each with a placeholder token.
+ * Returns the masked content, the array of original code blocks, and their placeholder strings.
+ */
+export function maskFencedCodeBlocks(content: string): {
+  content: string;
+  codeBlocks: string[];
+  placeholders: string[];
+} {
+  const lines = content.split("\n");
+  const codeBlocks: string[] = [];
+  const placeholders: string[] = [];
+  const maskedLines: string[] = [];
+
+  let inFence = false;
+  let fenceChar = "";
+  let fenceLength = 0;
+  let blockLines: string[] = [];
+
+  for (const line of lines) {
+    if (!inFence) {
+      const openingMatch = line.match(/^ {0,3}(`{3,}|~{3,})(.*)$/);
+
+      if (!openingMatch) {
+        maskedLines.push(line);
+        continue;
+      }
+
+      const fence = openingMatch[1];
+      fenceChar = fence[0];
+      fenceLength = fence.length;
+      blockLines = [line];
+      inFence = true;
+      continue;
+    }
+
+    blockLines.push(line);
+
+    if (isClosingFenceLine(line, fenceChar, fenceLength)) {
+      codeBlocks.push(blockLines.join("\n"));
+      const placeholder = `${CODEBLOCK_PREFIX}${codeBlocks.length - 1}${PLACEHOLDER_SUFFIX}`;
+      placeholders.push(placeholder);
+      maskedLines.push(placeholder);
+      inFence = false;
+      blockLines = [];
+    }
+  }
+
+  if (inFence) {
+    codeBlocks.push(blockLines.join("\n"));
+    const placeholder = `${CODEBLOCK_PREFIX}${codeBlocks.length - 1}${PLACEHOLDER_SUFFIX}`;
+    placeholders.push(placeholder);
+    maskedLines.push(placeholder);
+  }
+
+  return {
+    content: maskedLines.join("\n"),
+    codeBlocks,
+    placeholders,
+  };
+}
+
+/**
+ * Masks all inline code spans in `content`, replacing each with a placeholder token.
+ * Returns the masked content and the array of original code spans.
+ */
+export function maskInlineCodeSpans(content: string): {
+  content: string;
+  codeSpans: string[];
+} {
+  const codeSpans: string[] = [];
+  const output: string[] = [];
+
+  let i = 0;
+  while (i < content.length) {
+    const currentChar = content.charAt(i);
+    if (currentChar !== "`") {
+      output.push(currentChar);
+      i++;
+      continue;
+    }
+
+    let openingLength = 0;
+    while (
+      i + openingLength < content.length &&
+      content.charAt(i + openingLength) === "`"
+    ) {
+      openingLength++;
+    }
+
+    let scanIndex = i + openingLength;
+    let closingIndex = -1;
+    while (scanIndex < content.length) {
+      const nextBacktick = content.indexOf("`", scanIndex);
+      if (nextBacktick === -1) {
+        break;
+      }
+
+      let closingLength = 0;
+      while (
+        nextBacktick + closingLength < content.length &&
+        content.charAt(nextBacktick + closingLength) === "`"
+      ) {
+        closingLength++;
+      }
+
+      if (closingLength === openingLength) {
+        closingIndex = nextBacktick;
+        break;
+      }
+
+      scanIndex = nextBacktick + closingLength;
+    }
+
+    if (closingIndex === -1) {
+      output.push(content.slice(i, i + openingLength));
+      i += openingLength;
+      continue;
+    }
+
+    const codeSpan = content.slice(i, closingIndex + openingLength);
+    codeSpans.push(codeSpan);
+    output.push(
+      `${CODESPAN_PREFIX}${codeSpans.length - 1}${PLACEHOLDER_SUFFIX}`
+    );
+    i = closingIndex + openingLength;
+  }
+
+  return {
+    content: output.join(""),
+    codeSpans,
+  };
+}
+
+/**
+ * Restores previously masked fenced code blocks and inline code spans.
+ */
+export function restoreCodeMasks(
+  content: string,
+  codeBlocks: string[],
+  codeSpans: string[]
+): string {
+  const restoreByIndex = (values: string[], rawIndex: string) => {
+    const index = Number(rawIndex);
+    return Number.isInteger(index) ? (values.at(index) ?? "") : "";
+  };
+
+  return content
+    .replace(/__CODESPAN_(\d+)__/g, (_match, index) => {
+      return restoreByIndex(codeSpans, index);
+    })
+    .replace(/__CODEBLOCK_(\d+)__/g, (_match, index) => {
+      return restoreByIndex(codeBlocks, index);
+    });
+}