diff --git a/scripts/notion-fetch/__tests__/introductionMarkdown.test.ts b/scripts/notion-fetch/__tests__/introductionMarkdown.test.ts new file mode 100644 index 00000000..ff293a1a --- /dev/null +++ b/scripts/notion-fetch/__tests__/introductionMarkdown.test.ts @@ -0,0 +1,48 @@ +import { describe, it, expect, beforeAll, vi } from "vitest"; + +process.env.NOTION_API_KEY ??= "test-notion-api-key"; +process.env.DATABASE_ID ??= "test-database-id"; + +vi.mock("../../notionClient", () => ({ + n2m: {}, +})); + +let ensureBlankLineAfterStandaloneBold: (content: string) => string; + +beforeAll(async () => { + ({ ensureBlankLineAfterStandaloneBold } = await import("../generateBlocks")); +}); + +describe("ensureBlankLineAfterStandaloneBold", () => { + it("inserts a blank line after standalone bold headings", () => { + const input = [ + "**Collected Data**", + "This section provides overviews and walkthroughs of features.", + "", + "**Another Section**", + "", + "Already spaced content.", + ].join("\n"); + + const output = ensureBlankLineAfterStandaloneBold(input); + + expect(output).toBe( + [ + "**Collected Data**", + "", + "This section provides overviews and walkthroughs of features.", + "", + "**Another Section**", + "", + "Already spaced content.", + ].join("\n") + ); + }); + + it("ignores bold text that is not standalone", () => { + const input = "Some **inline bold** content."; + const output = ensureBlankLineAfterStandaloneBold(input); + + expect(output).toBe(input); + }); +}); diff --git a/scripts/notion-fetch/exportDatabase.test.ts b/scripts/notion-fetch/exportDatabase.test.ts index c434c918..96bee8b5 100644 --- a/scripts/notion-fetch/exportDatabase.test.ts +++ b/scripts/notion-fetch/exportDatabase.test.ts @@ -208,4 +208,300 @@ describe("exportNotionDatabase", () => { expect.stringContaining("Failed to fetch blocks for page page-2") ); }); + + it("preserves line breaks in paragraph rich_text", async () => { + const blocksWithLineBreaks = [ + { + id: "block-1", + type: "paragraph", + paragraph: { + rich_text: [ + { + plain_text: "First line\nSecond line", + }, + ], + }, + has_children: false, + archived: false, + created_time: "2024-01-02T12:34:56.000Z", + last_edited_time: "2024-01-02T12:34:56.000Z", + }, + ]; + + fetchNotionBlocksMock.mockResolvedValue(blocksWithLineBreaks); + + const { exportNotionDatabase } = await import("./exportDatabase"); + + await exportNotionDatabase({ + verbose: false, + quick: false, + includeRawData: true, + }); + + const completePayload = writeFileMock.mock.calls[0][1] as string; + const completeJson = JSON.parse(completePayload); + + const blockAnalysis = completeJson.pages[0].blocks[0]; + expect(blockAnalysis.textContent).toBe("First line
\nSecond line"); + }); + + it("preserves Unicode line separators (U+2028)", async () => { + const blocksWithUnicodeSeparator = [ + { + id: "block-1", + type: "paragraph", + paragraph: { + rich_text: [ + { + plain_text: "First line\u2028Second line", + }, + ], + }, + has_children: false, + archived: false, + created_time: "2024-01-02T12:34:56.000Z", + last_edited_time: "2024-01-02T12:34:56.000Z", + }, + ]; + + fetchNotionBlocksMock.mockResolvedValue(blocksWithUnicodeSeparator); + + const { exportNotionDatabase } = await import("./exportDatabase"); + + await exportNotionDatabase({ + verbose: false, + quick: false, + includeRawData: true, + }); + + const completePayload = writeFileMock.mock.calls[0][1] as string; + const completeJson = JSON.parse(completePayload); + + const blockAnalysis = completeJson.pages[0].blocks[0]; + expect(blockAnalysis.textContent).toBe("First line
\nSecond line"); + }); + + it("preserves Unicode paragraph separators (U+2029)", async () => { + const blocksWithUnicodeParagraph = [ + { + id: "block-1", + type: "paragraph", + paragraph: { + rich_text: [ + { + plain_text: "First line\u2029Second line", + }, + ], + }, + has_children: false, + archived: false, + created_time: "2024-01-02T12:34:56.000Z", + last_edited_time: "2024-01-02T12:34:56.000Z", + }, + ]; + + fetchNotionBlocksMock.mockResolvedValue(blocksWithUnicodeParagraph); + + const { exportNotionDatabase } = await import("./exportDatabase"); + + await exportNotionDatabase({ + verbose: false, + quick: false, + includeRawData: true, + }); + + const completePayload = writeFileMock.mock.calls[0][1] as string; + const completeJson = JSON.parse(completePayload); + + const blockAnalysis = completeJson.pages[0].blocks[0]; + expect(blockAnalysis.textContent).toBe("First line
\nSecond line"); + }); + + it("preserves Windows-style line breaks (\\r\\n) in rich_text", async () => { + const blocksWithWindowsBreaks = [ + { + id: "block-1", + type: "paragraph", + paragraph: { + rich_text: [ + { + plain_text: "First line\r\nSecond line", + }, + ], + }, + has_children: false, + archived: false, + created_time: "2024-01-02T12:34:56.000Z", + last_edited_time: "2024-01-02T12:34:56.000Z", + }, + ]; + + fetchNotionBlocksMock.mockResolvedValue(blocksWithWindowsBreaks); + + const { exportNotionDatabase } = await import("./exportDatabase"); + + await exportNotionDatabase({ + verbose: false, + quick: false, + includeRawData: true, + }); + + const completePayload = writeFileMock.mock.calls[0][1] as string; + const completeJson = JSON.parse(completePayload); + + const blockAnalysis = completeJson.pages[0].blocks[0]; + expect(blockAnalysis.textContent).toBe("First line
\nSecond line"); + }); + + it("preserves standalone carriage returns (\\r) in rich_text", async () => { + const blocksWithCarriageReturns = [ + { + id: "block-1", + type: "paragraph", + paragraph: { + rich_text: [ + { + plain_text: "First line\rSecond line", + }, + ], + }, + has_children: false, + archived: false, + created_time: "2024-01-02T12:34:56.000Z", + last_edited_time: "2024-01-02T12:34:56.000Z", + }, + ]; + + fetchNotionBlocksMock.mockResolvedValue(blocksWithCarriageReturns); + + const { exportNotionDatabase } = await import("./exportDatabase"); + + await exportNotionDatabase({ + verbose: false, + quick: false, + includeRawData: true, + }); + + const completePayload = writeFileMock.mock.calls[0][1] as string; + const completeJson = JSON.parse(completePayload); + + const blockAnalysis = completeJson.pages[0].blocks[0]; + expect(blockAnalysis.textContent).toBe("First line
\nSecond line"); + }); + + it("preserves line breaks in image captions", async () => { + const imageBlockWithLineBreaks = [ + { + id: "block-1", + type: "image", + image: { + caption: [ + { + plain_text: "Caption line 1\nCaption line 2", + }, + ], + external: { url: "https://example.com/image.png" }, + }, + has_children: false, + archived: false, + created_time: "2024-01-02T12:34:56.000Z", + last_edited_time: "2024-01-02T12:34:56.000Z", + }, + ]; + + fetchNotionBlocksMock.mockResolvedValue(imageBlockWithLineBreaks); + + const { exportNotionDatabase } = await import("./exportDatabase"); + + await exportNotionDatabase({ + verbose: false, + quick: false, + includeRawData: true, + }); + + const completePayload = writeFileMock.mock.calls[0][1] as string; + const completeJson = JSON.parse(completePayload); + + const blockAnalysis = completeJson.pages[0].blocks[0]; + expect(blockAnalysis.textContent).toBe( + "Caption line 1
\nCaption line 2" + ); + }); + + it("preserves line breaks in code blocks", async () => { + const codeBlockWithLineBreaks = [ + { + id: "block-1", + type: "code", + code: { + rich_text: [ + { + plain_text: "function test() {\n return true;\n}", + }, + ], + language: "javascript", + }, + has_children: false, + archived: false, + created_time: "2024-01-02T12:34:56.000Z", + last_edited_time: "2024-01-02T12:34:56.000Z", + }, + ]; + + fetchNotionBlocksMock.mockResolvedValue(codeBlockWithLineBreaks); + + const { exportNotionDatabase } = await import("./exportDatabase"); + + await exportNotionDatabase({ + verbose: false, + quick: false, + includeRawData: true, + }); + + const completePayload = writeFileMock.mock.calls[0][1] as string; + const completeJson = JSON.parse(completePayload); + + const blockAnalysis = completeJson.pages[0].blocks[0]; + expect(blockAnalysis.textContent).toBe( + "function test() {
\n return true;
\n}" + ); + }); + + it("preserves multiple consecutive line breaks", async () => { + const blocksWithMultipleLineBreaks = [ + { + id: "block-1", + type: "paragraph", + paragraph: { + rich_text: [ + { + plain_text: "Line 1\n\nLine 2\n\n\nLine 3", + }, + ], + }, + has_children: false, + archived: false, + created_time: "2024-01-02T12:34:56.000Z", + last_edited_time: "2024-01-02T12:34:56.000Z", + }, + ]; + + fetchNotionBlocksMock.mockResolvedValue(blocksWithMultipleLineBreaks); + + const { exportNotionDatabase } = await import("./exportDatabase"); + + await exportNotionDatabase({ + verbose: false, + quick: false, + includeRawData: true, + }); + + const completePayload = writeFileMock.mock.calls[0][1] as string; + const completeJson = JSON.parse(completePayload); + + const blockAnalysis = completeJson.pages[0].blocks[0]; + expect(blockAnalysis.textContent).toBe( + "Line 1
\n
\nLine 2
\n
\n
\nLine 3" + ); + }); }); diff --git a/scripts/notion-fetch/exportDatabase.ts b/scripts/notion-fetch/exportDatabase.ts index dc8969f5..69d4a68e 100644 --- a/scripts/notion-fetch/exportDatabase.ts +++ b/scripts/notion-fetch/exportDatabase.ts @@ -14,6 +14,9 @@ dotenv.config(); const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); +const LINE_BREAK_REGEX = /(?:\r\n|[\r\n\u2028\u2029])/g; +const HTML_LINE_BREAK = "
\n"; + // CLI Options Interface interface ExportOptions { verbose: boolean; @@ -213,17 +216,21 @@ function extractTextFromBlock(block: Record): string { const blockContent = block[blockType]; // Handle rich text arrays (most common case) + // Preserve manual line breaks (including Windows \r\n) by converting them to HTML
tags if (blockContent.rich_text && Array.isArray(blockContent.rich_text)) { return blockContent.rich_text .map((textObj: any) => textObj.plain_text || textObj.text?.content || "") - .join(""); + .join("") + .replace(LINE_BREAK_REGEX, HTML_LINE_BREAK); } // Handle title blocks + // Preserve manual line breaks (including Windows \r\n) by converting them to HTML
tags if (blockContent.title && Array.isArray(blockContent.title)) { return blockContent.title .map((textObj: any) => textObj.plain_text || textObj.text?.content || "") - .join(""); + .join("") + .replace(LINE_BREAK_REGEX, HTML_LINE_BREAK); } // Handle text property directly @@ -237,21 +244,28 @@ function extractTextFromBlock(block: Record): string { } // Handle specific block types + // Preserve manual line breaks in captions and code blocks switch (blockType) { case "image": return ( - blockContent.caption?.map((c: any) => c.plain_text || "").join("") || - "[Image]" + blockContent.caption + ?.map((c: any) => c.plain_text || "") + .join("") + .replace(LINE_BREAK_REGEX, HTML_LINE_BREAK) || "[Image]" ); case "video": return ( - blockContent.caption?.map((c: any) => c.plain_text || "").join("") || - "[Video]" + blockContent.caption + ?.map((c: any) => c.plain_text || "") + .join("") + .replace(LINE_BREAK_REGEX, HTML_LINE_BREAK) || "[Video]" ); case "file": return ( - blockContent.caption?.map((c: any) => c.plain_text || "").join("") || - "[File]" + blockContent.caption + ?.map((c: any) => c.plain_text || "") + .join("") + .replace(LINE_BREAK_REGEX, HTML_LINE_BREAK) || "[File]" ); case "bookmark": return blockContent.url || "[Bookmark]"; @@ -259,8 +273,10 @@ function extractTextFromBlock(block: Record): string { return blockContent.expression || "[Equation]"; case "code": return ( - blockContent.rich_text?.map((t: any) => t.plain_text || "").join("") || - "[Code]" + blockContent.rich_text + ?.map((t: any) => t.plain_text || "") + .join("") + .replace(LINE_BREAK_REGEX, HTML_LINE_BREAK) || "[Code]" ); case "divider": return "[Divider]"; diff --git a/scripts/notion-fetch/generateBlocks.ts b/scripts/notion-fetch/generateBlocks.ts index 0fa9456c..d6ac7aaf 100644 --- a/scripts/notion-fetch/generateBlocks.ts +++ b/scripts/notion-fetch/generateBlocks.ts @@ -286,6 +286,34 @@ function sanitizeMarkdownImages(content: string): string { return sanitized; } +/** + * Ensure standalone bold lines (`**Heading**`) are treated as their own paragraphs + * by inserting a blank line when missing. This preserves Notion formatting where + * bold text represents a section title followed by descriptive copy. + */ +export function ensureBlankLineAfterStandaloneBold(content: string): string { + if (!content) return content; + + const lines = content.split("\n"); + const result: string[] = []; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + result.push(line); + + const nextLine = lines[i + 1]; + const isStandaloneBold = /^\s*\*\*[^*].*\*\*\s*$/.test(line.trim()); + const nextLineHasContent = + nextLine !== undefined && nextLine.trim().length > 0; + + if (isStandaloneBold && nextLineHasContent) { + result.push(""); + } + } + + return result.join("\n"); +} + /** * Image cache system to prevent re-downloading and provide recovery options */ @@ -1804,6 +1832,10 @@ export async function generateBlocks(pages, progressCallback) { markdownString.parent = sanitizeMarkdownContent( markdownString.parent ); + + markdownString.parent = ensureBlankLineAfterStandaloneBold( + markdownString.parent + ); // Remove duplicate title heading if it exists // The first H1 heading often duplicates the title in Notion exports let contentBody = markdownString.parent; diff --git a/src/css/custom.css b/src/css/custom.css index 4e0ec226..03fa97ac 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -297,6 +297,38 @@ aside[class*="sidebar"] .menu__list-item-collapsible .menu__link { display: none !important; } +/* + * Paragraph and list spacing adjustments + * Issue #48: Match spacing with Notion layout + * + * Reduces vertical spacing between paragraphs and lists to align with + * Notion's visual rhythm. Scoped to .theme-doc-markdown to avoid affecting + * other components like landing pages, admonitions, and tables. + */ + +/* Reduce paragraph spacing from default ~1rem to ~0.5rem */ +.theme-doc-markdown p { + margin-bottom: 0.5rem; +} + +/* Reduce list spacing for better visual rhythm */ +.theme-doc-markdown ul, +.theme-doc-markdown ol { + margin-bottom: 0.75rem; +} + +/* Reduce spacing between list items */ +.theme-doc-markdown li { + margin-bottom: 0.25rem; +} + +/* Ensure nested lists maintain proper spacing */ +.theme-doc-markdown li > ul, +.theme-doc-markdown li > ol { + margin-top: 0.25rem; + margin-bottom: 0.25rem; +} + /* * Details/Toggle Styling * Issue #56: Match toggle background to warm light gray (same as callouts)