diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..3b6732b --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,72 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Pantsdown is a Markdown to HTML converter that renders markdown similar to GitHub's styling. It was built specifically for [github-preview.nvim](https://github.com/wallpants/github-preview.nvim). Based on [Marked](https://github.com/markedjs/marked). + +## Commands + +```bash +# Type checking +bun run typecheck + +# Linting +bun run lint + +# Both typecheck and lint +bun run check + +# Run tests (uses bun:test with happy-dom) +bun test + +# Run a single test file +bun test tests/parse.test.ts + +# Update test snapshots +bun test --update-snapshots + +# Format code +bun run format + +# Build docs +bun run docs:build +``` + +## Architecture + +The parsing pipeline follows a classic compiler pattern: + +1. **Lexer** (`src/lexer.ts`) - Tokenizes markdown source into an array of tokens + - Uses `Tokenizer` for the actual token creation + - Processes block-level tokens first, then inline tokens + - Tracks source maps for line number references + - Collects footnotes separately + +2. **Tokenizer** (`src/tokenizer.ts`) - Creates tokens from markdown patterns + - Uses regex rules from `src/rules/block.ts` and `src/rules/inline.ts` + +3. **Parser** (`src/parser.ts`) - Converts tokens to HTML by dispatching to the Renderer + - Recursively processes nested tokens + +4. **Renderer** (`src/renderer.ts`) - Produces HTML output for each token type + - Uses highlight.js for syntax highlighting + - Uses github-slugger for heading anchors + - Handles special cases like mermaid diagrams and alerts + +Entry point: `Pantsdown` class in `src/pantsdown.ts` coordinates the pipeline. + +## Key Types + +All token types are defined in `src/types.ts`. The `Token` union type covers all possible markdown elements (headings, code blocks, lists, tables, footnotes, alerts, etc.). + +## Output + +`pantsdown.parse(markdown)` returns `{ html, javascript }`: +- `html`: The rendered HTML string +- `javascript`: A script for interactive features (task list checkboxes, copy buttons) + +## Styling + +CSS is in `src/css/styles.css`. Requires a parent element with classes `pantsdown light` or `pantsdown dark` (optionally with `high-contrast`). diff --git a/bun.lockb b/bun.lockb index 312154f..e59bd8a 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/package.json b/package.json index a3549c4..99cc625 100644 --- a/package.json +++ b/package.json @@ -51,21 +51,23 @@ }, "dependencies": { "github-slugger": "^2.0.0", - "highlight.js": "^11.11.1" + "highlight.js": "^11.11.1", + "katex": "^0.16.28" }, "devDependencies": { - "@commitlint/config-conventional": "^19.6.0", - "@commitlint/cz-commitlint": "^19.6.1", - "@happy-dom/global-registrator": "^16.3.0", - "@types/bun": "^1.1.14", - "@typescript-eslint/eslint-plugin": "^8.19.0", - "@typescript-eslint/parser": "^8.19.0", + "@commitlint/config-conventional": "^19.8.1", + "@commitlint/cz-commitlint": "^19.8.1", + "@happy-dom/global-registrator": "^16.8.1", + "@types/bun": "^1.3.7", + "@types/katex": "^0.16.8", + "@typescript-eslint/eslint-plugin": "^8.54.0", + "@typescript-eslint/parser": "^8.54.0", "commitizen": "^4.3.1", - "commitlint": "^19.6.1", + "commitlint": "^19.8.1", "eslint": "^8.57.1", - "prettier": "^3.4.2", - "prettier-plugin-organize-imports": "^4.1.0", - "semantic-release": "^24.2.1", + "prettier": "^3.8.1", + "prettier-plugin-organize-imports": "^4.3.0", + "semantic-release": "^24.2.9", "typescript": "5.7.2" } } diff --git a/src/css/styles.css b/src/css/styles.css index 2bc2086..fd36c28 100644 --- a/src/css/styles.css +++ b/src/css/styles.css @@ -113,8 +113,9 @@ margin: 0; color: var(--color-fg-default); background-color: var(--color-canvas-default); - font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Noto Sans", Helvetica, Arial, - sans-serif, "Apple Color Emoji", "Segoe UI Emoji"; + font-family: + -apple-system, BlinkMacSystemFont, "Segoe UI", "Noto Sans", Helvetica, Arial, sans-serif, + "Apple Color Emoji", "Segoe UI Emoji"; font-size: 16px; line-height: 1.5; word-wrap: break-word; @@ -1323,3 +1324,36 @@ /* .pantsdown.dark .hljs-tag { */ /* /1* purposely ignored *1/ */ /* } */ + +/* +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * +* * +* KaTeX / LaTeX * +* * +* * * * * * * * * * * * * * * * * * * * * * * * * * * * * +*/ + +.pantsdown .katex-block { + display: block; + margin: 16px 0; + text-align: center; + overflow-x: auto; + overflow-y: hidden; +} + +.pantsdown .katex-inline { + display: inline; +} + +.pantsdown .katex-error { + color: var(--color-danger-fg); + background-color: var(--color-canvas-subtle); + padding: 0.2em 0.4em; + border-radius: 6px; +} + +.pantsdown .katex-error code { + color: inherit; + background-color: transparent; + padding: 0; +} diff --git a/src/lexer.ts b/src/lexer.ts index dce4c76..fdd1348 100644 --- a/src/lexer.ts +++ b/src/lexer.ts @@ -111,6 +111,13 @@ export class Lexer { continue; } + // latexBlock + if ((token = this.tokenizer.latexBlock(src))) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // fences if ((token = this.tokenizer.fences(src))) { src = src.substring(token.raw.length); @@ -169,8 +176,8 @@ export class Lexer { lastToken.text += "\n" + token.raw; const lastInline = this.inlineQueue[this.inlineQueue.length - 1]; if (lastInline) lastInline.src = lastToken.text; - } else if (!this.links[token.tag]) { - this.links[token.tag] = { + } else { + this.links[token.tag] ??= { href: token.href, title: token.title, }; @@ -215,7 +222,7 @@ export class Lexer { if ((token = this.tokenizer.text(src))) { src = src.substring(token.raw.length); lastToken = tokens[tokens.length - 1]; - if (lastToken && lastToken.type === "text") { + if (lastToken?.type === "text") { lastToken.raw += "\n" + token.raw; lastToken.text += "\n" + token.text; this.inlineQueue.pop(); @@ -291,6 +298,13 @@ export class Lexer { } keepPrevChar = false; + // latexInline (before escape to handle \(...\) syntax) + if ((token = this.tokenizer.latexInline(src))) { + src = src.substring(token.raw.length); + tokens.push(token); + continue; + } + // escape if ((token = this.tokenizer.escape(src))) { src = src.substring(token.raw.length); @@ -391,7 +405,7 @@ export class Lexer { } keepPrevChar = true; lastToken = tokens[tokens.length - 1]; - if (lastToken && lastToken.type === "text") { + if (lastToken?.type === "text") { lastToken.raw += token.raw; lastToken.text += token.text; } else { diff --git a/src/parser.ts b/src/parser.ts index 0acdca9..5c40ca7 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -184,6 +184,10 @@ export class Parser { out += top ? this.renderer.paragraph(body, textToken.sourceMap) : body; continue; } + case "latexBlock": { + out += this.renderer.latexBlock(token.text, token.sourceMap); + continue; + } default: { const errMsg = 'Token with "' + token.type + '" type was not found.'; @@ -253,6 +257,10 @@ export class Parser { out += this.renderer.text(token.text); break; } + case "latexInline": { + out += this.renderer.latexInline(token.text); + break; + } default: { const errMsg = 'Token with "' + token.type + '" type was not found.'; throw new Error(errMsg); diff --git a/src/renderer.ts b/src/renderer.ts index 74c3c31..ad39b86 100644 --- a/src/renderer.ts +++ b/src/renderer.ts @@ -1,5 +1,6 @@ import GithubSlugger from "github-slugger"; import hljs from "highlight.js"; +import katex from "katex"; import { type Pantsdown } from "./pantsdown.ts"; import { inline } from "./rules/inline.ts"; import { type HTMLAttrs, type SourceMap, type Tokens } from "./types.ts"; @@ -212,4 +213,40 @@ export class Renderer { text(text: string): string { return text; } + + latexBlock(latex: string, sourceMap: SourceMap): string { + try { + const rendered = katex.renderToString(latex, { + displayMode: true, + throwOnError: false, + output: "html", + trust: false, + }); + return injectHtmlAttributes( + `
${rendered}
\n`, + [], + sourceMap, + ); + } catch { + return injectHtmlAttributes( + `
${escape(latex)}
\n`, + [], + sourceMap, + ); + } + } + + latexInline(latex: string): string { + try { + const rendered = katex.renderToString(latex, { + displayMode: false, + throwOnError: false, + output: "html", + trust: false, + }); + return `${rendered}`; + } catch { + return `${escape(latex)}`; + } + } } diff --git a/src/rules/block.ts b/src/rules/block.ts index 55bc33c..056d0c0 100644 --- a/src/rules/block.ts +++ b/src/rules/block.ts @@ -16,7 +16,8 @@ type BlockRuleNames = | "bullet" | "listItemStart" | "footnote" - | "paragraph"; + | "paragraph" + | "latexBlock"; export const label = /(?!\s*\])(?:\\.|[^\[\]\\])+/; @@ -136,4 +137,5 @@ export const block: Record = { paragraph: block_paragraph, footnote: /^\[\^([^\]\n]+)\]:(?:[ \t]+|[\n]*?|$)([^\n]*?(?:\n|$)(?:[\n]*?[ ]{4,}[^\n]*)*)/, text: /^[^\n]+/, + latexBlock: /^(?:\$\$([^$]*(?:\$(?!\$)[^$]*)*)\$\$|\\\[([\s\S]*?)\\\])/, }; diff --git a/src/rules/inline.ts b/src/rules/inline.ts index 06aa1ac..d4ddde1 100644 --- a/src/rules/inline.ts +++ b/src/rules/inline.ts @@ -19,7 +19,8 @@ type InlineRuleNames = | "punctuation" | "blockSkip" | "footnoteRef" - | "backpedal"; + | "backpedal" + | "latexInline"; // list of unicode punctuation marks, plus any missing characters from CommonMark spec const punctuation = "\\p{P}\\p{S}"; @@ -112,7 +113,7 @@ const inline_backpedal = const inline_del = /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/; const inline_text = - /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\, "emStrong"> & { blockSkip: inline_blockSkip, backpedal: inline_backpedal, footnoteRef: /^\[\^([^\]\n]+)\]/, + latexInline: /^(?:\$(?!\$)([^\s$](?:[^$\n]*[^\s$])?)\$(?!\$)|\\\((.+?)\\\))/, }; diff --git a/src/tokenizer.ts b/src/tokenizer.ts index e04475c..1944384 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -716,6 +716,7 @@ export class Tokenizer { if (!nextChar || !prevChar || inline.punctuation.exec(prevChar)) { // unicode Regex counts emoji as 1 char; spread into array for proper count (used multiple times below) + // eslint-disable-next-line @typescript-eslint/no-misused-spread const lLength = [...match[0]].length - 1; let rDelim, rLength, @@ -736,6 +737,7 @@ export class Tokenizer { if (!rDelim) continue; // skip single * in __abc*abc__ + // eslint-disable-next-line @typescript-eslint/no-misused-spread rLength = [...rDelim].length; if (match[3] || match[4]) { @@ -757,6 +759,7 @@ export class Tokenizer { // Remove extra characters. *a*** -> *a* rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal); // char length can be >1 for unicode characters; + // eslint-disable-next-line @typescript-eslint/no-misused-spread const lastCharLength = [...match[0]][0]!.length; const raw = src.slice(0, lLength + match.index + lastCharLength + rLength); @@ -920,4 +923,33 @@ export class Tokenizer { text, }; } + + latexBlock(src: string): Tokens["LatexBlock"] | undefined { + const cap = block.latexBlock.exec(src); + if (!cap) return undefined; + + // cap[1] is from $$...$$ syntax, cap[2] is from \[...\] syntax + const text = cap[1] ?? cap[2] ?? ""; + + return { + type: "latexBlock", + raw: cap[0], + text: text.trim(), + sourceMap: this.lexer.getSourceMap(cap[0]), + }; + } + + latexInline(src: string): Tokens["LatexInline"] | undefined { + const cap = inline.latexInline.exec(src); + if (!cap) return undefined; + + // cap[1] is from $...$ syntax, cap[2] is from \(...\) syntax + const text = cap[1] ?? cap[2] ?? ""; + + return { + type: "latexInline", + raw: cap[0], + text, + }; + } } diff --git a/src/types.ts b/src/types.ts index e558c9e..7931664 100644 --- a/src/types.ts +++ b/src/types.ts @@ -186,6 +186,17 @@ export interface Tokens extends Record { raw: string; items: Tokens["Footnote"][]; }; + LatexBlock: { + type: "latexBlock"; + raw: string; + text: string; + sourceMap: SourceMap; + }; + LatexInline: { + type: "latexInline"; + raw: string; + text: string; + }; } export type Token = @@ -214,7 +225,9 @@ export type Token = | Tokens["Alert"] | Tokens["Footnote"] | Tokens["FootnoteRef"] - | Tokens["Footnotes"]; + | Tokens["Footnotes"] + | Tokens["LatexBlock"] + | Tokens["LatexInline"]; export type Links = Record; diff --git a/tests/__snapshots__/parse.test.ts.snap b/tests/__snapshots__/parse.test.ts.snap index eea0fea..ac3287f 100644 --- a/tests/__snapshots__/parse.test.ts.snap +++ b/tests/__snapshots__/parse.test.ts.snap @@ -1,4 +1,4 @@ -// Bun Snapshot v1, https://goo.gl/fbAQLP +// Bun Snapshot v1, https://bun.sh/docs/test/snapshots exports[`pantsdown.parse(test.md) 1`] = ` { @@ -589,6 +589,68 @@ Underscores

Underscores


+

LaTeX / Math

+

Inline LaTeX

+

Inline math using dollar signs: and .

+

Inline math using parentheses: and .

+

Mixed with text: The quadratic formula is which solves .

+

Block LaTeX

+

Block math using double dollar signs:

+
+

Block math using square brackets:

+
+

A more complex equation:

+
+

Code vs LaTeX

+

Code spans should not be affected: $not latex$ and \\(also not latex\\).

+

A regular dollar amount like $100 should not be parsed as LaTeX because there's no closing $.

+
  1. diff --git a/tests/test.md b/tests/test.md index e863186..63574fc 100644 --- a/tests/test.md +++ b/tests/test.md @@ -619,3 +619,44 @@ Asterisks Underscores --- + +# LaTeX / Math + +## Inline LaTeX + +Inline math using dollar signs: $E = mc^2$ and $a^2 + b^2 = c^2$. + +Inline math using parentheses: \(F = ma\) and \(\sum\_{i=1}^n i = \frac{n(n+1)}{2}\). + +Mixed with text: The quadratic formula is $x = \frac{-b \pm \sqrt{b^2-4ac}}{2a}$ which solves $ax^2 + bx + c = 0$. + +## Block LaTeX + +Block math using double dollar signs: + +$$ +\int_0^\infty e^{-x^2} dx = \frac{\sqrt{\pi}}{2} +$$ + +Block math using square brackets: + +\[ +\nabla \times \vec{E} = -\frac{\partial \vec{B}}{\partial t} +\] + +A more complex equation: + +$$ +\begin{aligned} +\nabla \cdot \vec{E} &= \frac{\rho}{\epsilon_0} \\ +\nabla \cdot \vec{B} &= 0 +\end{aligned} +$$ + +## Code vs LaTeX + +Code spans should not be affected: `$not latex$` and `\(also not latex\)`. + +A regular dollar amount like $100 should not be parsed as LaTeX because there's no closing $. + +---