Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 0 additions & 20 deletions app/src/lib/aisreClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import {
import {
ParserService,
type Notebook,
type SerializeRequestOptions,
type DeserializeRequestOptions,
} from "@buf/stateful_runme.bufbuild_es/runme/parser/v1/parser_pb.js";
import { timestampDate } from "@bufbuild/protobuf/wkt";
Expand Down Expand Up @@ -185,25 +184,6 @@ export class AisreClient {
return response.notebook!;
}

/**
* Serializes a notebook via the parser service and returns the raw bytes
* produced by the backend (e.g. Markdown content for an index file).
*/
async serializeNotebook(
notebook: Notebook,
serializeOptions?: SerializeRequestOptions,
requestOptions?: RequestOptions,
): Promise<Uint8Array> {
const response = await this.parserClient.serialize(
{
notebook,
options: serializeOptions,
},
this.mergeCallOptions(requestOptions),
);
return response.result;
}

private mergeCallOptions(
overrides?: RequestOptions,
): CallOptions | undefined {
Expand Down
134 changes: 134 additions & 0 deletions app/src/lib/markdown/serializeNotebookToMarkdown.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import { create } from '@bufbuild/protobuf'
import { describe, expect, it } from 'vitest'

import { MimeType, parser_pb } from '../../runme/client'
import { serializeNotebookToMarkdown } from './serializeNotebookToMarkdown'

const textEncoder = new TextEncoder()

describe('serializeNotebookToMarkdown', () => {
it('renders markdown cells, code cells, and text outputs', () => {
const notebook = create(parser_pb.NotebookSchema, {
cells: [
create(parser_pb.CellSchema, {
kind: parser_pb.CellKind.MARKUP,
languageId: 'markdown',
value: '# Title\n\nSome notes.',
}),
create(parser_pb.CellSchema, {
kind: parser_pb.CellKind.CODE,
languageId: 'python',
value: 'print("hello")',
outputs: [
create(parser_pb.CellOutputSchema, {
items: [
create(parser_pb.CellOutputItemSchema, {
mime: MimeType.VSCodeNotebookStdOut,
type: 'Buffer',
data: textEncoder.encode('hello\n'),
}),
create(parser_pb.CellOutputItemSchema, {
mime: 'application/json',
type: 'Buffer',
data: textEncoder.encode('{"ok":true}'),
}),
],
}),
],
}),
],
})

expect(serializeNotebookToMarkdown(notebook)).toBe(
[
'# Title',
'',
'Some notes.',
'',
'```python',
'print("hello")',
'```',
'',
'```stdout',
'hello',
'```',
'',
'```json',
'{"ok":true}',
'```',
'',
].join('\n')
)
})

it('treats code cells tagged as markdown as prose', () => {
const notebook = create(parser_pb.NotebookSchema, {
cells: [
create(parser_pb.CellSchema, {
kind: parser_pb.CellKind.CODE,
languageId: 'markdown',
value: 'A paragraph with **bold** text.',
}),
],
})

expect(serializeNotebookToMarkdown(notebook)).toBe(
'A paragraph with **bold** text.\n'
)
})

it('skips binary and internal output payloads', () => {
const notebook = create(parser_pb.NotebookSchema, {
cells: [
create(parser_pb.CellSchema, {
kind: parser_pb.CellKind.CODE,
languageId: 'bash',
value: 'echo hi',
outputs: [
create(parser_pb.CellOutputSchema, {
items: [
create(parser_pb.CellOutputItemSchema, {
mime: MimeType.StatefulRunmeTerminal,
type: 'Buffer',
data: textEncoder.encode('ignored'),
}),
create(parser_pb.CellOutputItemSchema, {
mime: 'image/png',
type: 'Buffer',
data: new Uint8Array([0, 1, 2, 3]),
}),
create(parser_pb.CellOutputItemSchema, {
mime: MimeType.VSCodeNotebookStdErr,
type: 'Buffer',
data: textEncoder.encode('warn\n'),
}),
],
}),
],
}),
],
})

expect(serializeNotebookToMarkdown(notebook)).toBe(
['```bash', 'echo hi', '```', '', '```stderr', 'warn', '```', ''].join(
'\n'
)
)
})

it('uses a longer fence when content already contains triple backticks', () => {
const notebook = create(parser_pb.NotebookSchema, {
cells: [
create(parser_pb.CellSchema, {
kind: parser_pb.CellKind.CODE,
languageId: 'javascript',
value: 'console.log("```inside```")',
}),
],
})

expect(serializeNotebookToMarkdown(notebook)).toBe(
['````javascript', 'console.log("```inside```")', '````', ''].join('\n')
)
})
})
168 changes: 168 additions & 0 deletions app/src/lib/markdown/serializeNotebookToMarkdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import { MimeType, parser_pb } from '../../runme/client'

const IOPUB_MIME_TYPE = 'application/vnd.jupyter.iopub+json'

const outputTextDecoder = new TextDecoder()

const MARKDOWN_LANGUAGES = new Set(['markdown', 'md'])
const INTERNAL_SKIP_MIMES = new Set<string>([
MimeType.StatefulRunmeOutputItems,
MimeType.StatefulRunmeTerminal,
])

export function serializeNotebookToMarkdown(
notebook: parser_pb.Notebook
): string {
const parts = notebook.cells
.map((cell) => serializeCell(cell))
.filter((part) => part.trim().length > 0)

if (parts.length === 0) {
return ''
}

return `${parts.join('\n\n')}\n`
}

function serializeCell(cell: parser_pb.Cell): string {
const body = isMarkupCell(cell)
? normalizeMarkupCell(cell.value)
: renderFencedBlock(cell.value, normalizeCodeFenceLanguage(cell.languageId))
const outputs = serializeCellOutputs(cell.outputs ?? [])
return [body, outputs].filter(Boolean).join('\n\n')
}

function isMarkupCell(cell: parser_pb.Cell): boolean {
if (cell.kind === parser_pb.CellKind.MARKUP) {
return true
}
return MARKDOWN_LANGUAGES.has(cell.languageId.trim().toLowerCase())
}

function normalizeMarkupCell(value: string): string {
return value.replace(/\s+$/u, '')
}

function normalizeCodeFenceLanguage(languageId: string): string {
return languageId.trim().toLowerCase()
}

function serializeCellOutputs(outputs: parser_pb.CellOutput[]): string {
const rendered = outputs.flatMap((output) =>
(output.items ?? [])
.map((item) => serializeOutputItem(item))
.filter((value): value is string => Boolean(value))
)

return rendered.join('\n\n')
}

function serializeOutputItem(item: parser_pb.CellOutputItem): string | null {
const mime = (item.mime ?? '').trim()
if (!mime || INTERNAL_SKIP_MIMES.has(mime)) {
return null
}

if (!isTextLikeMime(mime)) {
return null
}

const text = decodeOutputText(item.data ?? new Uint8Array())
if (!text) {
return null
}

return renderFencedBlock(text, languageForOutputMime(mime))
}

function isTextLikeMime(mime: string): boolean {
if (
mime === MimeType.VSCodeNotebookStdOut ||
mime === MimeType.VSCodeNotebookStdErr
) {
return true
}
if (mime === IOPUB_MIME_TYPE) {
return true
}
if (mime.startsWith('text/')) {
return true
}
if (mime === 'application/json' || mime.endsWith('+json')) {
return true
}
if (
mime === 'application/javascript' ||
mime === 'application/x-javascript'
) {
return true
}
if (mime === 'application/xml' || mime.endsWith('+xml')) {
return true
}
if (mime === 'application/sql') {
return true
}
if (mime === 'application/yaml' || mime === 'application/x-yaml') {
return true
}
return false
}

function languageForOutputMime(mime: string): string {
switch (mime) {
case MimeType.VSCodeNotebookStdOut:
return 'stdout'
case MimeType.VSCodeNotebookStdErr:
return 'stderr'
case IOPUB_MIME_TYPE:
case 'application/json':
return 'json'
case 'text/html':
return 'html'
case 'application/javascript':
case 'application/x-javascript':
return 'javascript'
case 'application/xml':
return 'xml'
case 'application/sql':
return 'sql'
case 'application/yaml':
case 'application/x-yaml':
return 'yaml'
default:
if (mime.startsWith('text/')) {
return mime.slice('text/'.length)
}
if (mime.endsWith('+json')) {
return 'json'
}
if (mime.endsWith('+xml')) {
return 'xml'
}
return ''
}
}

function decodeOutputText(data: Uint8Array): string {
if (!(data instanceof Uint8Array) || data.length === 0) {
return ''
}
try {
return outputTextDecoder.decode(data).replace(/\s+$/u, '')
} catch {
return ''
}
}

function renderFencedBlock(content: string, language = ''): string {
const fence = pickFence(content)
const info = language ? `${language}` : ''
return `${fence}${info}\n${content}\n${fence}`
}

function pickFence(content: string): string {
const matches: string[] = content.match(/`+/gu) ?? []
const longest = matches.reduce((max, run) => Math.max(max, run.length), 0)
return '`'.repeat(Math.max(3, longest + 1))
}
Loading
Loading