Skip to content
Merged

Dev #276

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Project Overview

Pantsdown is a Markdown to HTML converter that renders markdown similar to GitHub's styling. It was built specifically for [github-preview.nvim](https://github.com/wallpants/github-preview.nvim). Based on [Marked](https://github.com/markedjs/marked).

## Commands

```bash
# Type checking
bun run typecheck

# Linting
bun run lint

# Both typecheck and lint
bun run check

# Run tests (uses bun:test with happy-dom)
bun test

# Run a single test file
bun test tests/parse.test.ts

# Update test snapshots
bun test --update-snapshots

# Format code
bun run format

# Build docs
bun run docs:build
```

## Architecture

The parsing pipeline follows a classic compiler pattern:

1. **Lexer** (`src/lexer.ts`) - Tokenizes markdown source into an array of tokens
- Uses `Tokenizer` for the actual token creation
- Processes block-level tokens first, then inline tokens
- Tracks source maps for line number references
- Collects footnotes separately

2. **Tokenizer** (`src/tokenizer.ts`) - Creates tokens from markdown patterns
- Uses regex rules from `src/rules/block.ts` and `src/rules/inline.ts`

3. **Parser** (`src/parser.ts`) - Converts tokens to HTML by dispatching to the Renderer
- Recursively processes nested tokens

4. **Renderer** (`src/renderer.ts`) - Produces HTML output for each token type
- Uses highlight.js for syntax highlighting
- Uses github-slugger for heading anchors
- Handles special cases like mermaid diagrams and alerts

Entry point: `Pantsdown` class in `src/pantsdown.ts` coordinates the pipeline.

## Key Types

All token types are defined in `src/types.ts`. The `Token` union type covers all possible markdown elements (headings, code blocks, lists, tables, footnotes, alerts, etc.).

## Output

`pantsdown.parse(markdown)` returns `{ html, javascript }`:
- `html`: The rendered HTML string
- `javascript`: A script for interactive features (task list checkboxes, copy buttons)

## Styling

CSS is in `src/css/styles.css`. Requires a parent element with classes `pantsdown light` or `pantsdown dark` (optionally with `high-contrast`).
Binary file modified bun.lockb
Binary file not shown.
24 changes: 13 additions & 11 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,23 @@
},
"dependencies": {
"github-slugger": "^2.0.0",
"highlight.js": "^11.11.1"
"highlight.js": "^11.11.1",
"katex": "^0.16.28"
},
"devDependencies": {
"@commitlint/config-conventional": "^19.6.0",
"@commitlint/cz-commitlint": "^19.6.1",
"@happy-dom/global-registrator": "^16.3.0",
"@types/bun": "^1.1.14",
"@typescript-eslint/eslint-plugin": "^8.19.0",
"@typescript-eslint/parser": "^8.19.0",
"@commitlint/config-conventional": "^19.8.1",
"@commitlint/cz-commitlint": "^19.8.1",
"@happy-dom/global-registrator": "^16.8.1",
"@types/bun": "^1.3.7",
"@types/katex": "^0.16.8",
"@typescript-eslint/eslint-plugin": "^8.54.0",
"@typescript-eslint/parser": "^8.54.0",
"commitizen": "^4.3.1",
"commitlint": "^19.6.1",
"commitlint": "^19.8.1",
"eslint": "^8.57.1",
"prettier": "^3.4.2",
"prettier-plugin-organize-imports": "^4.1.0",
"semantic-release": "^24.2.1",
"prettier": "^3.8.1",
"prettier-plugin-organize-imports": "^4.3.0",
"semantic-release": "^24.2.9",
"typescript": "5.7.2"
}
}
38 changes: 36 additions & 2 deletions src/css/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,9 @@
margin: 0;
color: var(--color-fg-default);
background-color: var(--color-canvas-default);
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Noto Sans", Helvetica, Arial,
sans-serif, "Apple Color Emoji", "Segoe UI Emoji";
font-family:
-apple-system, BlinkMacSystemFont, "Segoe UI", "Noto Sans", Helvetica, Arial, sans-serif,
"Apple Color Emoji", "Segoe UI Emoji";
font-size: 16px;
line-height: 1.5;
word-wrap: break-word;
Expand Down Expand Up @@ -1323,3 +1324,36 @@
/* .pantsdown.dark .hljs-tag { */
/* /1* purposely ignored *1/ */
/* } */

/*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* *
* KaTeX / LaTeX *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/

.pantsdown .katex-block {
display: block;
margin: 16px 0;
text-align: center;
overflow-x: auto;
overflow-y: hidden;
}

.pantsdown .katex-inline {
display: inline;
}

.pantsdown .katex-error {
color: var(--color-danger-fg);
background-color: var(--color-canvas-subtle);
padding: 0.2em 0.4em;
border-radius: 6px;
}

.pantsdown .katex-error code {
color: inherit;
background-color: transparent;
padding: 0;
}
22 changes: 18 additions & 4 deletions src/lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,13 @@ export class Lexer {
continue;
}

// latexBlock
if ((token = this.tokenizer.latexBlock(src))) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}

// fences
if ((token = this.tokenizer.fences(src))) {
src = src.substring(token.raw.length);
Expand Down Expand Up @@ -169,8 +176,8 @@ export class Lexer {
lastToken.text += "\n" + token.raw;
const lastInline = this.inlineQueue[this.inlineQueue.length - 1];
if (lastInline) lastInline.src = lastToken.text;
} else if (!this.links[token.tag]) {
this.links[token.tag] = {
} else {
this.links[token.tag] ??= {
href: token.href,
title: token.title,
};
Expand Down Expand Up @@ -215,7 +222,7 @@ export class Lexer {
if ((token = this.tokenizer.text(src))) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === "text") {
if (lastToken?.type === "text") {
lastToken.raw += "\n" + token.raw;
lastToken.text += "\n" + token.text;
this.inlineQueue.pop();
Expand Down Expand Up @@ -291,6 +298,13 @@ export class Lexer {
}
keepPrevChar = false;

// latexInline (before escape to handle \(...\) syntax)
if ((token = this.tokenizer.latexInline(src))) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}

// escape
if ((token = this.tokenizer.escape(src))) {
src = src.substring(token.raw.length);
Expand Down Expand Up @@ -391,7 +405,7 @@ export class Lexer {
}
keepPrevChar = true;
lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === "text") {
if (lastToken?.type === "text") {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
Expand Down
8 changes: 8 additions & 0 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ export class Parser {
out += top ? this.renderer.paragraph(body, textToken.sourceMap) : body;
continue;
}
case "latexBlock": {
out += this.renderer.latexBlock(token.text, token.sourceMap);
continue;
}

default: {
const errMsg = 'Token with "' + token.type + '" type was not found.';
Expand Down Expand Up @@ -253,6 +257,10 @@ export class Parser {
out += this.renderer.text(token.text);
break;
}
case "latexInline": {
out += this.renderer.latexInline(token.text);
break;
}
default: {
const errMsg = 'Token with "' + token.type + '" type was not found.';
throw new Error(errMsg);
Expand Down
37 changes: 37 additions & 0 deletions src/renderer.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import GithubSlugger from "github-slugger";
import hljs from "highlight.js";
import katex from "katex";
import { type Pantsdown } from "./pantsdown.ts";
import { inline } from "./rules/inline.ts";
import { type HTMLAttrs, type SourceMap, type Tokens } from "./types.ts";
Expand Down Expand Up @@ -212,4 +213,40 @@ export class Renderer {
text(text: string): string {
return text;
}

latexBlock(latex: string, sourceMap: SourceMap): string {
try {
const rendered = katex.renderToString(latex, {
displayMode: true,
throwOnError: false,
output: "html",
trust: false,
});
return injectHtmlAttributes(
`<div class="katex-block">${rendered}</div>\n`,
[],
sourceMap,
);
} catch {
return injectHtmlAttributes(
`<div class="katex-block katex-error"><code>${escape(latex)}</code></div>\n`,
[],
sourceMap,
);
}
}

latexInline(latex: string): string {
try {
const rendered = katex.renderToString(latex, {
displayMode: false,
throwOnError: false,
output: "html",
trust: false,
});
return `<span class="katex-inline">${rendered}</span>`;
} catch {
return `<span class="katex-inline katex-error"><code>${escape(latex)}</code></span>`;
}
}
}
4 changes: 3 additions & 1 deletion src/rules/block.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ type BlockRuleNames =
| "bullet"
| "listItemStart"
| "footnote"
| "paragraph";
| "paragraph"
| "latexBlock";

export const label = /(?!\s*\])(?:\\.|[^\[\]\\])+/;

Expand Down Expand Up @@ -136,4 +137,5 @@ export const block: Record<BlockRuleNames, RegExp> = {
paragraph: block_paragraph,
footnote: /^\[\^([^\]\n]+)\]:(?:[ \t]+|[\n]*?|$)([^\n]*?(?:\n|$)(?:[\n]*?[ ]{4,}[^\n]*)*)/,
text: /^[^\n]+/,
latexBlock: /^(?:\$\$([^$]*(?:\$(?!\$)[^$]*)*)\$\$|\\\[([\s\S]*?)\\\])/,
};
6 changes: 4 additions & 2 deletions src/rules/inline.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ type InlineRuleNames =
| "punctuation"
| "blockSkip"
| "footnoteRef"
| "backpedal";
| "backpedal"
| "latexInline";

// list of unicode punctuation marks, plus any missing characters from CommonMark spec
const punctuation = "\\p{P}\\p{S}";
Expand Down Expand Up @@ -112,7 +113,7 @@ const inline_backpedal =
const inline_del = /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/;

const inline_text =
/^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/;
/^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\<!\[`*~_$]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)))/;

const inline_url = edit(/^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, "i")
.replace("email", extended_email)
Expand All @@ -139,4 +140,5 @@ export const inline: Omit<Record<InlineRuleNames, RegExp>, "emStrong"> & {
blockSkip: inline_blockSkip,
backpedal: inline_backpedal,
footnoteRef: /^\[\^([^\]\n]+)\]/,
latexInline: /^(?:\$(?!\$)([^\s$](?:[^$\n]*[^\s$])?)\$(?!\$)|\\\((.+?)\\\))/,
};
32 changes: 32 additions & 0 deletions src/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,7 @@ export class Tokenizer {

if (!nextChar || !prevChar || inline.punctuation.exec(prevChar)) {
// unicode Regex counts emoji as 1 char; spread into array for proper count (used multiple times below)
// eslint-disable-next-line @typescript-eslint/no-misused-spread
const lLength = [...match[0]].length - 1;
let rDelim,
rLength,
Expand All @@ -736,6 +737,7 @@ export class Tokenizer {

if (!rDelim) continue; // skip single * in __abc*abc__

// eslint-disable-next-line @typescript-eslint/no-misused-spread
rLength = [...rDelim].length;

if (match[3] || match[4]) {
Expand All @@ -757,6 +759,7 @@ export class Tokenizer {
// Remove extra characters. *a*** -> *a*
rLength = Math.min(rLength, rLength + delimTotal + midDelimTotal);
// char length can be >1 for unicode characters;
// eslint-disable-next-line @typescript-eslint/no-misused-spread
const lastCharLength = [...match[0]][0]!.length;
const raw = src.slice(0, lLength + match.index + lastCharLength + rLength);

Expand Down Expand Up @@ -920,4 +923,33 @@ export class Tokenizer {
text,
};
}

latexBlock(src: string): Tokens["LatexBlock"] | undefined {
const cap = block.latexBlock.exec(src);
if (!cap) return undefined;

// cap[1] is from $$...$$ syntax, cap[2] is from \[...\] syntax
const text = cap[1] ?? cap[2] ?? "";

return {
type: "latexBlock",
raw: cap[0],
text: text.trim(),
sourceMap: this.lexer.getSourceMap(cap[0]),
};
}

latexInline(src: string): Tokens["LatexInline"] | undefined {
const cap = inline.latexInline.exec(src);
if (!cap) return undefined;

// cap[1] is from $...$ syntax, cap[2] is from \(...\) syntax
const text = cap[1] ?? cap[2] ?? "";

return {
type: "latexInline",
raw: cap[0],
text,
};
}
}
Loading