From c03b2c4501689ead25510dea7d9e39bd258df557 Mon Sep 17 00:00:00 2001 From: John Doe Date: Thu, 8 Jan 2026 20:11:24 +0100 Subject: [PATCH 01/31] refactor: add general file sink logic --- .../utils/src/lib/file-sink-json.int.test.ts | 138 ++++++++ packages/utils/src/lib/file-sink-json.ts | 60 ++++ .../utils/src/lib/file-sink-json.unit.test.ts | 216 +++++++++++++ .../utils/src/lib/file-sink-text.int.test.ts | 184 +++++++++++ packages/utils/src/lib/file-sink-text.ts | 147 +++++++++ .../utils/src/lib/file-sink-text.unit.test.ts | 295 ++++++++++++++++++ packages/utils/src/lib/sink-source.types.ts | 48 +++ 7 files changed, 1088 insertions(+) create mode 100644 packages/utils/src/lib/file-sink-json.int.test.ts create mode 100644 packages/utils/src/lib/file-sink-json.ts create mode 100644 packages/utils/src/lib/file-sink-json.unit.test.ts create mode 100644 packages/utils/src/lib/file-sink-text.int.test.ts create mode 100644 packages/utils/src/lib/file-sink-text.ts create mode 100644 packages/utils/src/lib/file-sink-text.unit.test.ts create mode 100644 packages/utils/src/lib/sink-source.types.ts diff --git a/packages/utils/src/lib/file-sink-json.int.test.ts b/packages/utils/src/lib/file-sink-json.int.test.ts new file mode 100644 index 000000000..c331d8d0a --- /dev/null +++ b/packages/utils/src/lib/file-sink-json.int.test.ts @@ -0,0 +1,138 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { teardownTestFolder } from '@code-pushup/test-utils'; +import { JsonlFileSink, recoverJsonlFile } from './file-sink-json.js'; + +describe('JsonlFileSink integration', () => { + const baseDir = path.join(os.tmpdir(), 'file-sink-json-int-tests'); + const testFile = path.join(baseDir, 'test-data.jsonl'); + + beforeAll(async () => { + await fs.promises.mkdir(baseDir, { recursive: true }); + }); + + beforeEach(async () => { + try { + await fs.promises.unlink(testFile); + } catch { + // File doesn't exist, which is fine + } + }); + + afterAll(async () => { + await teardownTestFolder(baseDir); + }); + + describe('file operations', () => { + const testData = [ + { id: 1, name: 'Alice', active: true }, + { id: 2, name: 'Bob', active: false }, + { id: 3, name: 'Charlie', active: true }, + ]; + + it('should write and read JSONL files', async () => { + const sink = new JsonlFileSink({ filePath: testFile }); + + // Open and write data + sink.open(); + testData.forEach(item => sink.write(item)); + sink.close(); + + expect(fs.existsSync(testFile)).toBe(true); + const fileContent = fs.readFileSync(testFile, 'utf8'); + const lines = fileContent.trim().split('\n'); + expect(lines).toStrictEqual([ + '{"id":1,"name":"Alice","active":true}', + '{"id":2,"name":"Bob","active":false}', + '{"id":3,"name":"Charlie","active":true}', + ]); + + lines.forEach((line, index) => { + const parsed = JSON.parse(line); + expect(parsed).toStrictEqual(testData[index]); + }); + }); + + it('should recover data from JSONL files', async () => { + const jsonlContent = `${testData.map(item => JSON.stringify(item)).join('\n')}\n`; + fs.writeFileSync(testFile, jsonlContent); + + expect(recoverJsonlFile(testFile)).toStrictEqual({ + records: testData, + errors: [], + partialTail: null, + }); + }); + + it('should handle JSONL files with parse errors', async () => { + const mixedContent = + '{"id":1,"name":"Alice"}\n' + + 'invalid json line\n' + + '{"id":2,"name":"Bob"}\n' + + '{"id":3,"name":"Charlie","incomplete":\n'; + + fs.writeFileSync(testFile, mixedContent); + + expect(recoverJsonlFile(testFile)).toStrictEqual({ + records: [ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ], + errors: [ + expect.objectContaining({ line: 'invalid json line' }), + expect.objectContaining({ + line: '{"id":3,"name":"Charlie","incomplete":', + }), + ], + partialTail: '{"id":3,"name":"Charlie","incomplete":', + }); + }); + + it('should recover data using JsonlFileSink.recover()', async () => { + const sink = new JsonlFileSink({ filePath: testFile }); + sink.open(); + testData.forEach(item => sink.write(item)); + sink.close(); + + expect(sink.recover()).toStrictEqual({ + records: testData, + errors: [], + partialTail: null, + }); + }); + + describe('edge cases', () => { + it('should handle empty files', async () => { + fs.writeFileSync(testFile, ''); + + expect(recoverJsonlFile(testFile)).toStrictEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); + + it('should handle files with only whitespace', async () => { + fs.writeFileSync(testFile, ' \n \n\t\n'); + + expect(recoverJsonlFile(testFile)).toStrictEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); + + it('should handle non-existent files', async () => { + const nonExistentFile = path.join(baseDir, 'does-not-exist.jsonl'); + + expect(recoverJsonlFile(nonExistentFile)).toStrictEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); + }); + }); +}); diff --git a/packages/utils/src/lib/file-sink-json.ts b/packages/utils/src/lib/file-sink-json.ts new file mode 100644 index 000000000..646cd82b1 --- /dev/null +++ b/packages/utils/src/lib/file-sink-json.ts @@ -0,0 +1,60 @@ +import * as fs from 'node:fs'; +import { + type FileOutput, + FileSink, + type FileSinkOptions, + stringDecode, + stringEncode, + stringRecover, +} from './file-sink-text.js'; +import type { RecoverOptions, RecoverResult } from './sink-source.types.js'; + +export const jsonlEncode = < + T extends Record = Record, +>( + input: T, +): FileOutput => JSON.stringify(input); + +export const jsonlDecode = < + T extends Record = Record, +>( + output: FileOutput, +): T => JSON.parse(stringDecode(output)) as T; + +export function recoverJsonlFile< + T extends Record = Record, +>(filePath: string, opts: RecoverOptions = {}): RecoverResult { + return stringRecover(filePath, jsonlDecode, opts); +} + +export class JsonlFileSink< + T extends Record = Record, +> extends FileSink { + constructor(options: FileSinkOptions) { + const { filePath, ...fileOptions } = options; + super({ + ...fileOptions, + filePath, + recover: () => recoverJsonlFile(filePath), + finalize: () => { + // No additional finalization needed for JSONL files + }, + }); + } + + override encode(input: T): FileOutput { + return stringEncode(jsonlEncode(input)); + } + + override decode(output: FileOutput): T { + return jsonlDecode(stringDecode(output)); + } + + override repack(outputPath?: string): void { + const { records } = this.recover(); + fs.writeFileSync( + outputPath ?? this.getFilePath(), + records.map(this.encode).join(''), + ); + } +} diff --git a/packages/utils/src/lib/file-sink-json.unit.test.ts b/packages/utils/src/lib/file-sink-json.unit.test.ts new file mode 100644 index 000000000..a920c8cbe --- /dev/null +++ b/packages/utils/src/lib/file-sink-json.unit.test.ts @@ -0,0 +1,216 @@ +import { vol } from 'memfs'; +import * as fs from 'node:fs'; +import { beforeEach, describe, expect, it } from 'vitest'; +import { MEMFS_VOLUME } from '@code-pushup/test-utils'; +import { + JsonlFileSink, + jsonlDecode, + jsonlEncode, + recoverJsonlFile, +} from './file-sink-json.js'; + +describe('jsonlEncode', () => { + it('should encode object to JSON string', () => { + const obj = { key: 'value', number: 42 }; + expect(jsonlEncode(obj)).toBe(JSON.stringify(obj)); + }); + + it('should handle nested objects', () => { + const obj = { nested: { deep: 'value' }, array: [1, 2, 3] }; + expect(jsonlEncode(obj)).toBe(JSON.stringify(obj)); + }); + + it('should handle empty object', () => { + expect(jsonlEncode({})).toBe('{}'); + }); +}); + +describe('jsonlDecode', () => { + it('should decode JSON string to object', () => { + const obj = { key: 'value', number: 42 }; + const jsonStr = `${JSON.stringify(obj)}\n`; + expect(jsonlDecode(jsonStr)).toStrictEqual(obj); + }); + + it('should handle nested objects', () => { + const obj = { nested: { deep: 'value' }, array: [1, 2, 3] }; + const jsonStr = `${JSON.stringify(obj)}\n`; + expect(jsonlDecode(jsonStr)).toStrictEqual(obj); + }); + + it('should trim whitespace before parsing', () => { + const obj = { key: 'value' }; + const jsonStr = ` ${JSON.stringify(obj)} \n`; + expect(jsonlDecode(jsonStr)).toStrictEqual(obj); + }); + + it('should throw on invalid JSON', () => { + expect(() => jsonlDecode('invalid json\n')).toThrow('Unexpected token'); + }); + + it('should handle Buffer input', () => { + const obj = { key: 'value', number: 42 }; + const jsonStr = `${JSON.stringify(obj)}\n`; + expect(jsonlDecode(Buffer.from(jsonStr))).toStrictEqual(obj); + }); + + it('should handle primitive JSON values', () => { + expect(jsonlDecode('"string"\n')).toBe('string'); + expect(jsonlDecode('42\n')).toBe(42); + expect(jsonlDecode('true\n')).toBe(true); + expect(jsonlDecode('null\n')).toBeNull(); + }); +}); + +describe('recoverJsonlFile', () => { + beforeEach(() => { + vol.fromJSON( + { + '/tmp': null, + }, + MEMFS_VOLUME, + ); + }); + + it('should recover JSONL file with single object', () => { + const filePath = '/tmp/recover-single.jsonl'; + const obj = { key: 'value', number: 42 }; + fs.writeFileSync(filePath, `${JSON.stringify(obj)}\n`); + + expect(recoverJsonlFile(filePath)).toStrictEqual({ + records: [obj], + errors: [], + partialTail: null, + }); + }); + + it('should recover JSONL file with multiple objects', () => { + const filePath = '/tmp/recover-multi.jsonl'; + const obj1 = { id: 1, name: 'first' }; + const obj2 = { id: 2, name: 'second' }; + fs.writeFileSync( + filePath, + `${JSON.stringify(obj1)}\n${JSON.stringify(obj2)}\n`, + ); + + expect(recoverJsonlFile(filePath)).toStrictEqual({ + records: [obj1, obj2], + errors: [], + partialTail: null, + }); + }); + + it('should handle JSON parsing errors', () => { + const filePath = '/tmp/recover-error.jsonl'; + fs.writeFileSync( + filePath, + '{"valid": "json"}\ninvalid json line\n{"id":3,"name":"Charlie","incomplete":\n', + ); + + const result = recoverJsonlFile(filePath); + expect(result.records).toStrictEqual([{ valid: 'json' }]); + expect(result.errors).toStrictEqual([ + expect.objectContaining({ line: 'invalid json line' }), + expect.objectContaining({ + line: '{"id":3,"name":"Charlie","incomplete":', + }), + ]); + expect(result.partialTail).toBe('{"id":3,"name":"Charlie","incomplete":'); + }); + + it('should support keepInvalid option', () => { + const filePath = '/tmp/recover-keep-invalid.jsonl'; + fs.writeFileSync(filePath, '{"valid": "json"}\ninvalid json\n'); + + const result = recoverJsonlFile(filePath, { keepInvalid: true }); + expect(result.records).toStrictEqual([ + { valid: 'json' }, + { __invalid: true, lineNo: 2, line: 'invalid json' }, + ]); + expect(result.errors).toHaveLength(1); + }); + + it('should handle empty files', () => { + const filePath = '/tmp/recover-empty.jsonl'; + fs.writeFileSync(filePath, ''); + + expect(recoverJsonlFile(filePath)).toStrictEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); + + it('should handle file read errors gracefully', () => { + expect(recoverJsonlFile('/nonexistent/file.jsonl')).toStrictEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); +}); + +describe('JsonlFileSink', () => { + beforeEach(() => { + vol.fromJSON( + { + '/tmp': null, + }, + MEMFS_VOLUME, + ); + }); + + type JsonObj = { key: string; number: number }; + + it('should encode objects as JSON', () => { + const sink = new JsonlFileSink({ + filePath: '/tmp/jsonl-test.jsonl', + }); + const obj = { key: 'value', number: 42 }; + expect(sink.encode(obj)).toBe(`${JSON.stringify(obj)}\n`); + }); + + it('should decode JSON strings to objects', () => { + const sink = new JsonlFileSink({ + filePath: '/tmp/jsonl-test.jsonl', + }); + const obj = { key: 'value', number: 42 }; + const jsonStr = `${JSON.stringify(obj)}\n`; + expect(sink.decode(jsonStr)).toStrictEqual(obj); + }); + + it('should handle file operations with JSONL format', () => { + const filePath = '/tmp/jsonl-file-ops-test.jsonl'; + const sink = new JsonlFileSink({ filePath }); + sink.open(); + + const obj1 = { key: 'value', number: 42 }; + const obj2 = { key: 'value', number: 42 }; + sink.write(obj1); + sink.write(obj2); + sink.close(); + + const recovered = sink.recover(); + expect(recovered.records).toStrictEqual([obj1, obj2]); + }); + + it('repack() should recover records and write them to output path', () => { + const filePath = '/tmp/jsonl-repack-test.jsonl'; + const sink = new JsonlFileSink({ filePath }); + const records = [ + { key: 'value', number: 42 }, + { key: 'value', number: 42 }, + ]; + + fs.writeFileSync( + filePath, + `${records.map(record => JSON.stringify(record)).join('\n')}\n`, + ); + + const outputPath = '/tmp/jsonl-repack-output.jsonl'; + sink.repack(outputPath); + expect(fs.readFileSync(outputPath, 'utf8')).toBe( + `${JSON.stringify(records[0])}\n${JSON.stringify(records[1])}\n`, + ); + }); +}); diff --git a/packages/utils/src/lib/file-sink-text.int.test.ts b/packages/utils/src/lib/file-sink-text.int.test.ts new file mode 100644 index 000000000..19ea34fb0 --- /dev/null +++ b/packages/utils/src/lib/file-sink-text.int.test.ts @@ -0,0 +1,184 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { teardownTestFolder } from '@code-pushup/test-utils'; +import { FileSink, stringRecover } from './file-sink-text.js'; + +describe('FileSink integration', () => { + const baseDir = path.join(os.tmpdir(), 'file-sink-text-int-tests'); + const testFile = path.join(baseDir, 'test-data.txt'); + + beforeAll(async () => { + await fs.promises.mkdir(baseDir, { recursive: true }); + }); + + beforeEach(async () => { + try { + await fs.promises.unlink(testFile); + } catch { + // File doesn't exist, which is fine + } + }); + + afterAll(async () => { + await teardownTestFolder(baseDir); + }); + + describe('file operations', () => { + const testData = ['line1', 'line2', 'line3']; + + it('should write and read text files', async () => { + const sink = new FileSink({ + filePath: testFile, + recover: () => stringRecover(testFile, (line: string) => line), + }); + + // Open and write data + sink.open(); + testData.forEach(item => sink.write(item)); + sink.close(); + + expect(fs.existsSync(testFile)).toBe(true); + const fileContent = fs.readFileSync(testFile, 'utf8'); + const lines = fileContent.trim().split('\n'); + expect(lines).toStrictEqual(testData); + + lines.forEach((line, index) => { + expect(line).toStrictEqual(testData[index]); + }); + }); + + it('should recover data from text files', async () => { + const content = `${testData.join('\n')}\n`; + fs.writeFileSync(testFile, content); + + expect(stringRecover(testFile, (line: string) => line)).toStrictEqual({ + records: testData, + errors: [], + partialTail: null, + }); + }); + + it('should handle text files with parse errors', async () => { + const mixedContent = 'valid\ninvalid\nanother\n'; + fs.writeFileSync(testFile, mixedContent); + + expect( + stringRecover(testFile, (line: string) => { + if (line === 'invalid') throw new Error('Invalid line'); + return line.toUpperCase(); + }), + ).toStrictEqual({ + records: ['VALID', 'ANOTHER'], + errors: [ + expect.objectContaining({ + lineNo: 2, + line: 'invalid', + error: expect.any(Error), + }), + ], + partialTail: 'invalid', + }); + }); + + it('should repack file with recovered data', async () => { + const sink = new FileSink({ + filePath: testFile, + recover: () => stringRecover(testFile, (line: string) => line), + }); + + // Write initial data + sink.open(); + testData.forEach(item => sink.write(item)); + sink.close(); + + // Repack to the same file + sink.repack(); + + // Verify the content is still correct + const fileContent = fs.readFileSync(testFile, 'utf8'); + const lines = fileContent + .trim() + .split('\n') + .filter(line => line.length > 0); + expect(lines).toStrictEqual(testData); + }); + + it('should repack file to different output path', async () => { + const outputPath = path.join(baseDir, 'repacked.txt'); + const sink = new FileSink({ + filePath: testFile, + recover: () => stringRecover(testFile, (line: string) => line), + }); + + // Write initial data + sink.open(); + testData.forEach(item => sink.write(item)); + sink.close(); + + // Repack to different file + sink.repack(outputPath); + + // Verify the original file is unchanged + expect(fs.existsSync(testFile)).toBe(true); + + // Verify the repacked file has correct content + expect(fs.existsSync(outputPath)).toBe(true); + const fileContent = fs.readFileSync(outputPath, 'utf8'); + const lines = fileContent + .trim() + .split('\n') + .filter(line => line.length > 0); + expect(lines).toStrictEqual(testData); + }); + + it('should call finalize function when provided', async () => { + let finalized = false; + const sink = new FileSink({ + filePath: testFile, + recover: () => stringRecover(testFile, (line: string) => line), + finalize: () => { + finalized = true; + }, + }); + + sink.finalize(); + expect(finalized).toBe(true); + }); + }); + + describe('edge cases', () => { + it('should handle empty files', async () => { + fs.writeFileSync(testFile, ''); + + expect(stringRecover(testFile, (line: string) => line)).toStrictEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); + + it('should handle files with only whitespace', async () => { + fs.writeFileSync(testFile, ' \n \n\t\n'); + + expect(stringRecover(testFile, (line: string) => line)).toStrictEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); + + it('should handle non-existent files', async () => { + const nonExistentFile = path.join(baseDir, 'does-not-exist.txt'); + + expect( + stringRecover(nonExistentFile, (line: string) => line), + ).toStrictEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); + }); +}); diff --git a/packages/utils/src/lib/file-sink-text.ts b/packages/utils/src/lib/file-sink-text.ts new file mode 100644 index 000000000..3cafacbe4 --- /dev/null +++ b/packages/utils/src/lib/file-sink-text.ts @@ -0,0 +1,147 @@ +import * as fs from 'node:fs'; +import { existsSync, mkdirSync } from 'node:fs'; +import path from 'node:path'; +import type { + RecoverOptions, + RecoverResult, + Recoverable, + Sink, +} from './sink-source.types.js'; + +export const stringDecode = (output: O): I => { + const str = Buffer.isBuffer(output) + ? output.toString('utf8') + : String(output); + return str as unknown as I; +}; + +export const stringEncode = (input: I): O => + `${typeof input === 'string' ? input : JSON.stringify(input)}\n` as O; + +export const stringRecover = function ( + filePath: string, + decode: (output: O) => I, + opts: RecoverOptions = {}, +): RecoverResult { + const records: I[] = []; + const errors: { lineNo: number; line: string; error: Error }[] = []; + let partialTail: string | null = null; + + try { + const content = fs.readFileSync(filePath, 'utf8'); + const lines = content.trim().split('\n'); + let lineNo = 0; + + for (const line of lines) { + lineNo++; + const trimmedLine = line.trim(); + if (!trimmedLine) { + continue; + } + + try { + const record = decode(trimmedLine as O); + records.push(record); + } catch (error) { + const info = { lineNo, line, error: error as Error }; + errors.push(info); + + if (opts.keepInvalid) { + records.push({ __invalid: true, lineNo, line } as any); + } + + partialTail = line; + } + } + } catch { + return { records: [], errors: [], partialTail: null }; + } + + return { records, errors, partialTail }; +}; + +export type FileSinkOptions = { + filePath: string; + recover?: () => RecoverResult; + finalize?: () => void; +}; + +export type FileInput = Buffer | string; +export type FileOutput = Buffer | string; + +export class FileSink + implements Sink, Recoverable +{ + #fd: number | null = null; + options: FileSinkOptions; + + constructor(options: FileSinkOptions) { + this.options = options; + } + + isClosed(): boolean { + return this.#fd == null; + } + + encode(input: I): O { + return stringEncode(input as any); + } + + decode(output: O): I { + return stringDecode(output as any); + } + getFilePath(): string { + return this.options.filePath; + } + + open(withRepack: boolean = false): void { + const dir = path.dirname(this.options.filePath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + if (withRepack) { + this.repack(this.options.filePath); + } + this.#fd = fs.openSync(this.options.filePath, 'a'); + } + + close(): void { + if (this.#fd == null) { + return; + } + fs.closeSync(this.#fd); + this.#fd = null; + } + + write(input: I): void { + if (this.#fd == null) { + return; + } // Silently ignore if not open + const encoded = this.encode(input); + try { + fs.writeSync(this.#fd, encoded as any); + } catch { + // Silently ignore write errors (e.g., EBADF in test environments with mocked fs) + } + } + + recover(): RecoverResult { + const dir = path.dirname(this.options.filePath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + return this.options.recover!() as RecoverResult; + } + + repack(outputPath?: string): void { + const { records } = this.recover(); + fs.writeFileSync( + outputPath ?? this.getFilePath(), + records.map(this.encode).join('\n'), + ); + } + + finalize(): void { + this.options.finalize!(); + } +} diff --git a/packages/utils/src/lib/file-sink-text.unit.test.ts b/packages/utils/src/lib/file-sink-text.unit.test.ts new file mode 100644 index 000000000..f76cf13d4 --- /dev/null +++ b/packages/utils/src/lib/file-sink-text.unit.test.ts @@ -0,0 +1,295 @@ +import { vol } from 'memfs'; +import * as fs from 'node:fs'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { MEMFS_VOLUME } from '@code-pushup/test-utils'; +import { + FileSink, + type FileSinkOptions, + stringDecode, + stringEncode, + stringRecover, +} from './file-sink-text.js'; + +describe('stringEncode', () => { + it('stringEncode() should encode string input with newline', () => { + const str = 'test string'; + expect(stringEncode(str)).toBe(`${str}\n`); + }); + + it('stringEncode() should encode non-string input as JSON with newline', () => { + const obj = { key: 'value', number: 42 }; + expect(stringEncode(obj)).toBe(`${JSON.stringify(obj)}\n`); + }); + + it('stringEncode() should handle null input', () => { + expect(stringEncode(null)).toBe('null\n'); + }); + + it('stringEncode() should handle undefined input', () => { + expect(stringEncode(undefined)).toBe('undefined\n'); + }); +}); + +describe('stringDecode', () => { + it('stringDecode() should decode Buffer to string', () => { + const str = 'test content'; + expect(stringDecode(Buffer.from(str))).toBe(str); + }); + + it('stringDecode() should return string input as-is', () => { + const str = 'test string'; + expect(stringDecode(str)).toBe(str); + }); +}); + +describe('stringRecover', () => { + it('stringRecover() should recover records from valid file content', () => { + const filePath = '/tmp/stringRecover-test.txt'; + vol.fromJSON({ + [filePath]: 'line1\nline2\nline3\n', + }); + + expect(stringRecover(filePath, (line: string) => line)).toStrictEqual({ + records: ['line1', 'line2', 'line3'], + errors: [], + partialTail: null, + }); + }); + + it('stringRecover() should recover records and apply decode function', () => { + const filePath = '/tmp/stringRecover-test.txt'; + vol.fromJSON({ + [filePath]: 'line1\nline2\nline3\n', + }); + + expect( + stringRecover(filePath, (line: string) => line.toUpperCase()), + ).toStrictEqual({ + records: ['LINE1', 'LINE2', 'LINE3'], + errors: [], + partialTail: null, + }); + }); + + it('stringRecover() should skip empty lines', () => { + const filePath = '/tmp/stringRecover-empty-test.txt'; + vol.fromJSON({ + [filePath]: 'line1\n\nline2\n', + }); + + expect(stringRecover(filePath, (line: string) => line)).toStrictEqual({ + records: ['line1', 'line2'], + errors: [], + partialTail: null, + }); + }); + + it('stringRecover() should handle decode errors and continue processing', () => { + const filePath = '/tmp/stringRecover-error-test.txt'; + vol.fromJSON({ + [filePath]: 'valid\ninvalid\nanother', + }); + + expect( + stringRecover(filePath, (line: string) => { + if (line === 'invalid') throw new Error('Invalid line'); + return line.toUpperCase(); + }), + ).toStrictEqual({ + records: ['VALID', 'ANOTHER'], + errors: [ + { + lineNo: 2, + line: 'invalid', + error: expect.any(Error), + }, + ], + partialTail: 'invalid', + }); + }); + + it('stringRecover() should include invalid records when keepInvalid option is true', () => { + const filePath = '/tmp/stringRecover-invalid-test.txt'; + vol.fromJSON({ + [filePath]: 'valid\ninvalid\n', + }); + + expect( + stringRecover( + filePath, + (line: string) => { + if (line === 'invalid') throw new Error('Invalid line'); + return line.toUpperCase(); + }, + { keepInvalid: true }, + ), + ).toStrictEqual({ + records: ['VALID', { __invalid: true, lineNo: 2, line: 'invalid' }], + errors: [expect.any(Object)], + partialTail: 'invalid', + }); + }); + + it('stringRecover() should handle file read errors gracefully', () => { + expect( + stringRecover('/nonexistent/file.txt', (line: string) => line), + ).toStrictEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); +}); + +describe('FileSink', () => { + it('constructor should create instance with options', () => { + const options: FileSinkOptions = { + filePath: '/tmp/test-file.txt', + recover: vi + .fn() + .mockReturnValue({ records: [], errors: [], partialTail: null }), + finalize: vi.fn(), + }; + expect(new FileSink(options).options).toBe(options); + }); + + it('getFilePath() should return the file path', () => { + const filePath = '/tmp/test-file.txt'; + const sink = new FileSink({ filePath }); + expect(sink.getFilePath()).toBe(filePath); + }); + + it('encode() should encode input using stringEncode', () => { + const sink = new FileSink({ filePath: '/tmp/test.txt' }); + const str = 'test input'; + expect(sink.encode(str)).toBe(`${str}\n`); + }); + + it('decode() should decode output using stringDecode', () => { + const sink = new FileSink({ filePath: '/tmp/test.txt' }); + const str = 'test output'; + expect(sink.decode(str)).toBe(str); + }); + + it('open() should handle directory creation and file opening', () => { + const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); + sink.open(); + expect(fs.existsSync('/tmp/test-file.txt')).toBe(true); + }); + + it('open() should repack file when withRepack is true', () => { + const sink = new FileSink({ + filePath: '/tmp/test-file.txt', + recover: vi + .fn() + .mockReturnValue({ records: [], errors: [], partialTail: null }), + }); + const spy = vi.spyOn(sink, 'repack'); + sink.open(true); + expect(spy).toHaveBeenCalledWith('/tmp/test-file.txt'); + }); + + it('close() should close file descriptor if open', () => { + const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); + sink.open(); + expect(() => sink.close()).not.toThrow(); + }); + + it('close() should do nothing if file descriptor is not open', () => { + const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); + expect(() => sink.close()).not.toThrow(); + }); + + it('write() should write encoded input to file when sink is open', () => { + const sink = new FileSink({ filePath: '/tmp/write-open-unique-test.txt' }); + sink.open(); + const str = 'test data'; + sink.write(str); + expect(fs.readFileSync('/tmp/write-open-unique-test.txt', 'utf8')).toBe( + `${str}\n`, + ); + }); + + it('write() should silently ignore writes when file descriptor is not open', () => { + const sink = new FileSink({ filePath: '/tmp/write-test-closed.txt' }); + expect(() => sink.write('test data')).not.toThrow(); + }); + + it('write() should silently ignore write errors when fs.writeSync throws', () => { + const sink = new FileSink({ filePath: '/tmp/write-error-test.txt' }); + sink.open(); + + // Mock fs.writeSync to throw an error + const writeSyncSpy = vi.spyOn(fs, 'writeSync').mockImplementation(() => { + throw new Error('Write error'); + }); + + try { + // This should not throw despite the write error + expect(() => sink.write('test data')).not.toThrow(); + } finally { + // Restore original function + writeSyncSpy.mockRestore(); + sink.close(); + } + }); + + it('recover() should call the recover function from options', () => { + const mockRecover = vi + .fn() + .mockReturnValue({ records: ['test'], errors: [], partialTail: null }); + const sink = new FileSink({ + filePath: '/tmp/test-file.txt', + recover: mockRecover, + }); + expect(sink.recover()).toStrictEqual({ + records: ['test'], + errors: [], + partialTail: null, + }); + expect(mockRecover).toHaveBeenCalledWith(); + }); + + it('repack() should recover records and write them to output path', () => { + const mockRecover = vi.fn(); + const sink = new FileSink({ + filePath: '/tmp/test-file.txt', + recover: mockRecover, + }); + const records = ['record1', 'record2']; + mockRecover.mockReturnValue({ records, errors: [], partialTail: null }); + const outputPath = '/tmp/repack-output.txt'; + sink.repack(outputPath); + expect(mockRecover).toHaveBeenCalled(); + expect(fs.readFileSync(outputPath, 'utf8')).toBe('record1\n\nrecord2\n'); + }); + + it('finalize() should call the finalize function from options', () => { + const mockFinalize = vi.fn(); + const sink = new FileSink({ + filePath: '/tmp/test-file.txt', + finalize: mockFinalize, + }); + sink.finalize(); + expect(mockFinalize).toHaveBeenCalledTimes(1); + }); + + it('isClosed() should return true when sink is not opened', () => { + const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); + expect(sink.isClosed()).toBe(true); + }); + + it('isClosed() should return false when sink is opened', () => { + const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); + sink.open(); + expect(sink.isClosed()).toBe(false); + }); + + it('isClosed() should return true when sink is closed after being opened', () => { + const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); + sink.open(); + expect(sink.isClosed()).toBe(false); + sink.close(); + expect(sink.isClosed()).toBe(true); + }); +}); diff --git a/packages/utils/src/lib/sink-source.types.ts b/packages/utils/src/lib/sink-source.types.ts new file mode 100644 index 000000000..4473026d0 --- /dev/null +++ b/packages/utils/src/lib/sink-source.types.ts @@ -0,0 +1,48 @@ +export type Encoder = { + encode: (input: I) => O; +}; + +export type Decoder = { + decode: (output: O) => I; +}; + +export type Sink = { + open: () => void; + write: (input: I) => void; + close: () => void; + isClosed: () => boolean; +} & Encoder; + +export type Buffered = { + flush: () => void; +}; +export type BufferedSink = {} & Sink & Buffered; + +export type Source = { + read?: () => O; + decode?: (input: I) => O; +}; + +export type Observer = { + subscribe: () => void; + unsubscribe: () => void; + isSubscribed: () => boolean; +}; + +export type Recoverable = { + recover: () => RecoverResult; + repack: (outputPath?: string) => void; + finalize: () => void; +}; + +export type RecoverResult = { + records: T[]; + errors: { lineNo: number; line: string; error: Error }[]; + partialTail: string | null; +}; + +export type RecoverOptions = { + keepInvalid?: boolean; +}; + +export type Output = {} & BufferedSink; From 756f8c0db48a28ec83126fea624ceb3acf60e6f6 Mon Sep 17 00:00:00 2001 From: John Doe Date: Fri, 9 Jan 2026 22:38:09 +0100 Subject: [PATCH 02/31] feat: add file sink classes --- ...nt.test.ts => file-sink-jsonl.int.test.ts} | 2 +- .../{file-sink-json.ts => file-sink-jsonl.ts} | 0 ...t.test.ts => file-sink-jsonl.unit.test.ts} | 28 ++++++++++++++++++- .../utils/src/lib/file-sink-text.unit.test.ts | 15 ++++++++++ 4 files changed, 43 insertions(+), 2 deletions(-) rename packages/utils/src/lib/{file-sink-json.int.test.ts => file-sink-jsonl.int.test.ts} (99%) rename packages/utils/src/lib/{file-sink-json.ts => file-sink-jsonl.ts} (100%) rename packages/utils/src/lib/{file-sink-json.unit.test.ts => file-sink-jsonl.unit.test.ts} (89%) diff --git a/packages/utils/src/lib/file-sink-json.int.test.ts b/packages/utils/src/lib/file-sink-jsonl.int.test.ts similarity index 99% rename from packages/utils/src/lib/file-sink-json.int.test.ts rename to packages/utils/src/lib/file-sink-jsonl.int.test.ts index c331d8d0a..e0f57bbaa 100644 --- a/packages/utils/src/lib/file-sink-json.int.test.ts +++ b/packages/utils/src/lib/file-sink-jsonl.int.test.ts @@ -3,7 +3,7 @@ import * as os from 'node:os'; import * as path from 'node:path'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { teardownTestFolder } from '@code-pushup/test-utils'; -import { JsonlFileSink, recoverJsonlFile } from './file-sink-json.js'; +import { JsonlFileSink, recoverJsonlFile } from './file-sink-jsonl.js'; describe('JsonlFileSink integration', () => { const baseDir = path.join(os.tmpdir(), 'file-sink-json-int-tests'); diff --git a/packages/utils/src/lib/file-sink-json.ts b/packages/utils/src/lib/file-sink-jsonl.ts similarity index 100% rename from packages/utils/src/lib/file-sink-json.ts rename to packages/utils/src/lib/file-sink-jsonl.ts diff --git a/packages/utils/src/lib/file-sink-json.unit.test.ts b/packages/utils/src/lib/file-sink-jsonl.unit.test.ts similarity index 89% rename from packages/utils/src/lib/file-sink-json.unit.test.ts rename to packages/utils/src/lib/file-sink-jsonl.unit.test.ts index a920c8cbe..75f981cb0 100644 --- a/packages/utils/src/lib/file-sink-json.unit.test.ts +++ b/packages/utils/src/lib/file-sink-jsonl.unit.test.ts @@ -7,7 +7,7 @@ import { jsonlDecode, jsonlEncode, recoverJsonlFile, -} from './file-sink-json.js'; +} from './file-sink-jsonl.js'; describe('jsonlEncode', () => { it('should encode object to JSON string', () => { @@ -207,10 +207,36 @@ describe('JsonlFileSink', () => { `${records.map(record => JSON.stringify(record)).join('\n')}\n`, ); + sink.repack(); + expect(fs.readFileSync(filePath, 'utf8')).toBe( + `${JSON.stringify(records[0])}\n${JSON.stringify(records[1])}\n`, + ); + }); + + it('repack() should accept output path', () => { + const filePath = '/tmp/jsonl-repack-test.jsonl'; + const sink = new JsonlFileSink({ filePath }); + const records = [ + { key: 'value', number: 42 }, + { key: 'value', number: 42 }, + ]; + + fs.writeFileSync( + filePath, + `${records.map(record => JSON.stringify(record)).join('\n')}\n`, + ); + const outputPath = '/tmp/jsonl-repack-output.jsonl'; sink.repack(outputPath); expect(fs.readFileSync(outputPath, 'utf8')).toBe( `${JSON.stringify(records[0])}\n${JSON.stringify(records[1])}\n`, ); }); + + it('should do nothing on finalize()', () => { + const sink = new JsonlFileSink({ + filePath: '/tmp/jsonl-finalize-test.jsonl', + }); + expect(() => sink.finalize()).not.toThrow(); + }); }); diff --git a/packages/utils/src/lib/file-sink-text.unit.test.ts b/packages/utils/src/lib/file-sink-text.unit.test.ts index f76cf13d4..33cc9ad0e 100644 --- a/packages/utils/src/lib/file-sink-text.unit.test.ts +++ b/packages/utils/src/lib/file-sink-text.unit.test.ts @@ -251,6 +251,21 @@ describe('FileSink', () => { }); it('repack() should recover records and write them to output path', () => { + const mockRecover = vi.fn(); + const filePath = '/tmp/test-file.txt'; + const sink = new FileSink({ + filePath, + recover: mockRecover, + }); + const records = ['record1', 'record2']; + mockRecover.mockReturnValue({ records, errors: [], partialTail: null }); + + sink.repack(); + expect(mockRecover).toHaveBeenCalled(); + expect(fs.readFileSync(filePath, 'utf8')).toBe('record1\n\nrecord2\n'); + }); + + it('repack() should accept output path', () => { const mockRecover = vi.fn(); const sink = new FileSink({ filePath: '/tmp/test-file.txt', From b0c9cc4d9238a10ce979dbde284b9f30329bf4c2 Mon Sep 17 00:00:00 2001 From: John Doe Date: Tue, 13 Jan 2026 00:01:27 +0100 Subject: [PATCH 03/31] refactor: add trace json file --- .../utils/src/lib/file-sink-json-trace.ts | 167 ++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 packages/utils/src/lib/file-sink-json-trace.ts diff --git a/packages/utils/src/lib/file-sink-json-trace.ts b/packages/utils/src/lib/file-sink-json-trace.ts new file mode 100644 index 000000000..7933d318c --- /dev/null +++ b/packages/utils/src/lib/file-sink-json-trace.ts @@ -0,0 +1,167 @@ +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import { performance } from 'node:perf_hooks'; +import { JsonlFileSink, recoverJsonlFile } from './file-sink-jsonl.js'; +import { getCompleteEvent, getStartTracing } from './trace-file-utils.js'; +import type { + InstantEvent, + SpanEvent, + TraceEvent, + TraceEventRaw, + UserTimingDetail, +} from './trace-file.type.js'; + +const tryJson = (v: unknown): T | unknown => { + if (typeof v !== 'string') return v; + try { + return JSON.parse(v) as T; + } catch { + return v; + } +}; + +const toJson = (v: unknown): unknown => { + if (v === undefined) return undefined; + try { + return JSON.stringify(v); + } catch { + return v; + } +}; + +export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { + if (!args) return rest as TraceEvent; + + const out: any = { ...args }; + if ('detail' in out) out.detail = tryJson(out.detail); + if (out.data?.detail) + out.data.detail = tryJson(out.data.detail); + + return { ...rest, args: out } as TraceEvent; +} + +export function encodeTraceEvent({ args, ...rest }: TraceEvent): TraceEventRaw { + if (!args) return rest as TraceEventRaw; + + const out: any = { ...args }; + if ('detail' in out) out.detail = toJson(out.detail); + if (out.data?.detail) out.data.detail = toJson(out.data.detail); + + return { ...rest, args: out } as TraceEventRaw; +} + +function getTraceMetadata( + startDate?: Date, + metadata?: Record, +) { + return { + source: 'DevTools', + startTime: startDate?.toISOString() ?? new Date().toISOString(), + hardwareConcurrency: 1, + dataOrigin: 'TraceEvents', + ...metadata, + }; +} + +function createTraceFileContent( + traceEventsContent: string, + startDate?: Date, + metadata?: Record, +): string { + return `{ + "metadata": ${JSON.stringify(getTraceMetadata(startDate, metadata))}, + "traceEvents": [ +${traceEventsContent} + ] +}`; +} + +function finalizeTraceFile( + events: (SpanEvent | InstantEvent)[], + outputPath: string, + metadata?: Record, +): void { + const { writeFileSync } = fs; + + const sortedEvents = events.sort((a, b) => a.ts - b.ts); + const first = sortedEvents[0]; + const last = sortedEvents[sortedEvents.length - 1]; + + // Use performance.now() as fallback when no events exist + const fallbackTs = performance.now(); + const firstTs = first?.ts ?? fallbackTs; + const lastTs = last?.ts ?? fallbackTs; + + // Add margins for readability + const tsMargin = 1000; + const startTs = firstTs - tsMargin; + const endTs = lastTs + tsMargin; + const startDate = new Date().toISOString(); + + const traceEventsJson = [ + // Preamble + encodeTraceEvent( + getStartTracing({ + ts: startTs, + url: outputPath, + }), + ), + encodeTraceEvent( + getCompleteEvent({ + ts: startTs, + dur: 20, + }), + ), + // Events + ...events.map(encodeTraceEvent), + encodeTraceEvent( + getCompleteEvent({ + ts: endTs, + dur: 20, + }), + ), + ].join(',\n'); + + const jsonOutput = createTraceFileContent( + traceEventsJson, + new Date(), + metadata, + ); + writeFileSync(outputPath, jsonOutput, 'utf8'); +} + +export interface TraceFileSinkOptions { + filename: string; + directory?: string; + metadata?: Record; +} + +export class TraceFileSink extends JsonlFileSink { + readonly #filePath: string; + readonly #getFilePathForExt: (ext: 'json' | 'jsonl') => string; + readonly #metadata: Record | undefined; + + constructor(opts: TraceFileSinkOptions) { + const { filename, directory = '.', metadata } = opts; + + const traceJsonlPath = path.join(directory, `${filename}.jsonl`); + + super({ + filePath: traceJsonlPath, + recover: () => recoverJsonlFile(traceJsonlPath), + }); + + this.#metadata = metadata; + this.#filePath = path.join(directory, `${filename}.json`); + this.#getFilePathForExt = (ext: 'json' | 'jsonl') => + path.join(directory, `${filename}.${ext}`); + } + + override finalize(): void { + finalizeTraceFile(this.recover().records, this.#filePath, this.#metadata); + } + + getFilePathForExt(ext: 'json' | 'jsonl'): string { + return this.#getFilePathForExt(ext); + } +} From 1f6e32628e5a2aeffea6c5c560acb60868abe324 Mon Sep 17 00:00:00 2001 From: John Doe Date: Wed, 14 Jan 2026 01:05:15 +0100 Subject: [PATCH 04/31] refactor: wip --- packages/utils/src/lib/file-sink-text.ts | 37 ++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/packages/utils/src/lib/file-sink-text.ts b/packages/utils/src/lib/file-sink-text.ts index 3cafacbe4..050188e58 100644 --- a/packages/utils/src/lib/file-sink-text.ts +++ b/packages/utils/src/lib/file-sink-text.ts @@ -1,6 +1,7 @@ import * as fs from 'node:fs'; import { existsSync, mkdirSync } from 'node:fs'; import path from 'node:path'; +import { PROFILER_FILE_BASE_NAME, PROFILER_OUT_DIR } from './profiler'; import type { RecoverOptions, RecoverResult, @@ -60,6 +61,42 @@ export const stringRecover = function ( return { records, errors, partialTail }; }; +export type FileNameOptions = { + fileBaseName: string; + outDir: string; + fileName?: string; +}; + +export function getFilenameParts(options: FileNameOptions): { + outDir: string; + fileName: string; +} { + const { fileName, fileBaseName, outDir } = options; + + if (fileName) { + return { + outDir, + fileName, + }; + } + + const baseName = fileBaseName; + const DATE_LENGTH = 10; + const TIME_SEGMENTS = 3; + const COLON_LENGTH = 1; + const TOTAL_TIME_LENGTH = + TIME_SEGMENTS * 2 + (TIME_SEGMENTS - 1) * COLON_LENGTH; // HH:MM:SS = 8 chars + const id = new Date() + .toISOString() + .slice(0, DATE_LENGTH + TOTAL_TIME_LENGTH) + .replace(/:/g, '-'); + + return { + outDir, + fileName: `${baseName}.${id}`, + }; +} + export type FileSinkOptions = { filePath: string; recover?: () => RecoverResult; From 6bcb73b7e6d9c967cef339d371e56a99df0fc4c8 Mon Sep 17 00:00:00 2001 From: John Doe Date: Wed, 14 Jan 2026 02:23:33 +0100 Subject: [PATCH 05/31] refactor: wip --- .../src/lib/file-sink-json-trace.int.test.ts | 224 ++++++++++++ .../utils/src/lib/file-sink-json-trace.ts | 88 +++-- .../src/lib/file-sink-json-trace.unit.test.ts | 335 ++++++++++++++++++ 3 files changed, 613 insertions(+), 34 deletions(-) create mode 100644 packages/utils/src/lib/file-sink-json-trace.int.test.ts create mode 100644 packages/utils/src/lib/file-sink-json-trace.unit.test.ts diff --git a/packages/utils/src/lib/file-sink-json-trace.int.test.ts b/packages/utils/src/lib/file-sink-json-trace.int.test.ts new file mode 100644 index 000000000..e71bb90d5 --- /dev/null +++ b/packages/utils/src/lib/file-sink-json-trace.int.test.ts @@ -0,0 +1,224 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; +import { teardownTestFolder } from '@code-pushup/test-utils'; +import { TraceFileSink } from './file-sink-json-trace.js'; +import type { TraceEvent } from './trace-file.type'; + +describe('TraceFileSink integration', () => { + const baseDir = path.join(os.tmpdir(), 'file-sink-json-trace-int-tests'); + const traceJsonPath = path.join(baseDir, 'test-data.json'); + const traceJsonlPath = path.join(baseDir, 'test-data.jsonl'); + + beforeAll(async () => { + await fs.promises.mkdir(baseDir, { recursive: true }); + }); + + beforeEach(async () => { + try { + await fs.promises.unlink(traceJsonPath); + } catch { + // File doesn't exist, which is fine + } + try { + await fs.promises.unlink(traceJsonlPath); + } catch { + // File doesn't exist, which is fine + } + }); + + afterAll(async () => { + await teardownTestFolder(baseDir); + }); + + describe('file operations', () => { + const testEvents: TraceEvent[] = [ + { name: 'navigationStart', ts: 100, ph: 'I', cat: 'blink.user_timing' }, + { + name: 'loadEventStart', + ts: 200, + ph: 'I', + cat: 'blink.user_timing', + args: { data: { url: 'https://example.com' } }, + }, + { + name: 'loadEventEnd', + ts: 250, + ph: 'I', + cat: 'blink.user_timing', + args: { detail: { duration: 50 } }, + }, + ]; + + it('should write and read trace events', async () => { + const sink = new TraceFileSink({ + filename: 'test-data', + directory: baseDir, + }); + + // Open and write data + sink.open(); + testEvents.forEach(event => sink.write(event as any)); + sink.finalize(); + + expect(fs.existsSync(traceJsonPath)).toBe(true); + expect(fs.existsSync(traceJsonlPath)).toBe(true); + + const jsonContent = fs.readFileSync(traceJsonPath, 'utf8'); + const traceData = JSON.parse(jsonContent); + + expect(traceData.metadata.source).toBe('DevTools'); + expect(traceData.metadata.dataOrigin).toBe('TraceEvents'); + expect(Array.isArray(traceData.traceEvents)).toBe(true); + + // Should have preamble events + user events + complete event + expect(traceData.traceEvents.length).toBeGreaterThan(testEvents.length); + + // Check that our events are included + const userEvents = traceData.traceEvents.filter((e: any) => + testEvents.some(testEvent => testEvent.name === e.name), + ); + expect(userEvents).toHaveLength(testEvents.length); + }); + + it('should recover events from JSONL file', async () => { + const sink = new TraceFileSink({ + filename: 'test-data', + directory: baseDir, + }); + sink.open(); + testEvents.forEach(event => sink.write(event as any)); + sink.close(); + + const recovered = sink.recover(); + expect(recovered.records).toStrictEqual(testEvents); + expect(recovered.errors).toStrictEqual([]); + expect(recovered.partialTail).toBeNull(); + }); + + it('should handle empty trace files', async () => { + const sink = new TraceFileSink({ + filename: 'empty-test', + directory: baseDir, + }); + sink.open(); + sink.finalize(); + + const emptyJsonPath = path.join(baseDir, 'empty-test.json'); + expect(fs.existsSync(emptyJsonPath)).toBe(true); + + const jsonContent = fs.readFileSync(emptyJsonPath, 'utf8'); + const traceData = JSON.parse(jsonContent); + + expect(traceData.metadata.source).toBe('DevTools'); + // Should have at least preamble and complete events + expect(traceData.traceEvents.length).toBeGreaterThanOrEqual(2); + }); + + it('should handle metadata in trace files', async () => { + const metadata = { + version: '1.0.0', + environment: 'test', + customData: { key: 'value' }, + }; + + const sink = new TraceFileSink({ + filename: 'metadata-test', + directory: baseDir, + metadata, + }); + sink.open(); + sink.write({ name: 'test-event', ts: 100, ph: 'I' } as any); + sink.finalize(); + + const metadataJsonPath = path.join(baseDir, 'metadata-test.json'); + const jsonContent = fs.readFileSync(metadataJsonPath, 'utf8'); + const traceData = JSON.parse(jsonContent); + + expect(traceData.metadata.version).toBe('1.0.0'); + expect(traceData.metadata.environment).toBe('test'); + expect(traceData.metadata.customData).toStrictEqual({ key: 'value' }); + expect(traceData.metadata.source).toBe('DevTools'); + }); + + describe('edge cases', () => { + it('should handle single event traces', async () => { + const singleEvent: TraceEvent = { + name: 'singleEvent', + ts: 123, + ph: 'I', + cat: 'test', + }; + + const sink = new TraceFileSink({ + filename: 'single-event-test', + directory: baseDir, + }); + sink.open(); + sink.write(singleEvent as any); + sink.finalize(); + + const singleJsonPath = path.join(baseDir, 'single-event-test.json'); + const jsonContent = fs.readFileSync(singleJsonPath, 'utf8'); + const traceData = JSON.parse(jsonContent); + + expect( + traceData.traceEvents.some((e: any) => e.name === 'singleEvent'), + ).toBe(true); + }); + + it('should handle events with complex args', async () => { + const complexEvent: TraceEvent = { + name: 'complexEvent', + ts: 456, + ph: 'X', + cat: 'test', + args: { + detail: { nested: { data: [1, 2, 3] } }, + data: { url: 'https://example.com', size: 1024 }, + }, + }; + + const sink = new TraceFileSink({ + filename: 'complex-args-test', + directory: baseDir, + }); + sink.open(); + sink.write(complexEvent as any); + sink.finalize(); + + const complexJsonPath = path.join(baseDir, 'complex-args-test.json'); + const jsonContent = fs.readFileSync(complexJsonPath, 'utf8'); + const traceData = JSON.parse(jsonContent); + + const eventInTrace = traceData.traceEvents.find( + (e: any) => e.name === 'complexEvent', + ); + expect(eventInTrace).toBeDefined(); + expect(eventInTrace.args.detail).toStrictEqual( + '{"nested":{"data":[1,2,3]}}', + ); + expect(eventInTrace.args.data.url).toBe('https://example.com'); + }); + + it('should handle non-existent directories gracefully', async () => { + const nonExistentDir = path.join(baseDir, 'non-existent'); + const sink = new TraceFileSink({ + filename: 'non-existent-dir-test', + directory: nonExistentDir, + }); + + sink.open(); + sink.write({ name: 'test', ts: 100, ph: 'I' } as any); + sink.finalize(); + + const jsonPath = path.join( + nonExistentDir, + 'non-existent-dir-test.json', + ); + expect(fs.existsSync(jsonPath)).toBe(true); + }); + }); + }); +}); diff --git a/packages/utils/src/lib/file-sink-json-trace.ts b/packages/utils/src/lib/file-sink-json-trace.ts index 7933d318c..f35895303 100644 --- a/packages/utils/src/lib/file-sink-json-trace.ts +++ b/packages/utils/src/lib/file-sink-json-trace.ts @@ -1,7 +1,12 @@ import * as fs from 'node:fs'; import * as path from 'node:path'; import { performance } from 'node:perf_hooks'; -import { JsonlFileSink, recoverJsonlFile } from './file-sink-jsonl.js'; +import { + JsonlFileSink, + jsonlDecode, + jsonlEncode, + recoverJsonlFile, +} from './file-sink-jsonl.js'; import { getCompleteEvent, getStartTracing } from './trace-file-utils.js'; import type { InstantEvent, @@ -11,46 +16,60 @@ import type { UserTimingDetail, } from './trace-file.type.js'; -const tryJson = (v: unknown): T | unknown => { - if (typeof v !== 'string') return v; - try { - return JSON.parse(v) as T; - } catch { - return v; +export function decodeDetail(target: UserTimingDetail): UserTimingDetail { + if (typeof target.detail === 'string') { + return { ...target, detail: jsonlDecode(target.detail) }; } -}; + return target; +} -const toJson = (v: unknown): unknown => { - if (v === undefined) return undefined; - try { - return JSON.stringify(v); - } catch { - return v; +export function encodeDetail(target: UserTimingDetail): UserTimingDetail { + if (target.detail && typeof target.detail === 'object') { + return { + ...target, + detail: jsonlEncode(target.detail as UserTimingDetail), + }; } -}; + return target; +} export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { if (!args) return rest as TraceEvent; - const out: any = { ...args }; - if ('detail' in out) out.detail = tryJson(out.detail); - if (out.data?.detail) - out.data.detail = tryJson(out.data.detail); + const out: UserTimingDetail = { ...args }; + const processedOut = decodeDetail(out); - return { ...rest, args: out } as TraceEvent; + return { + ...rest, + args: + out.data && typeof out.data === 'object' + ? { + ...processedOut, + data: decodeDetail(out.data as UserTimingDetail), + } + : processedOut, + }; } export function encodeTraceEvent({ args, ...rest }: TraceEvent): TraceEventRaw { if (!args) return rest as TraceEventRaw; - const out: any = { ...args }; - if ('detail' in out) out.detail = toJson(out.detail); - if (out.data?.detail) out.data.detail = toJson(out.data.detail); + const out: UserTimingDetail = { ...args }; + const processedOut = encodeDetail(out); - return { ...rest, args: out } as TraceEventRaw; + return { + ...rest, + args: + out.data && typeof out.data === 'object' + ? { + ...processedOut, + data: encodeDetail(out.data as UserTimingDetail), + } + : processedOut, + }; } -function getTraceMetadata( +export function getTraceMetadata( startDate?: Date, metadata?: Record, ) { @@ -76,30 +95,30 @@ ${traceEventsContent} }`; } -function finalizeTraceFile( +export function finalizeTraceFile( events: (SpanEvent | InstantEvent)[], outputPath: string, metadata?: Record, ): void { const { writeFileSync } = fs; + if (events.length === 0) { + return; + } + const sortedEvents = events.sort((a, b) => a.ts - b.ts); const first = sortedEvents[0]; const last = sortedEvents[sortedEvents.length - 1]; - // Use performance.now() as fallback when no events exist const fallbackTs = performance.now(); const firstTs = first?.ts ?? fallbackTs; const lastTs = last?.ts ?? fallbackTs; - // Add margins for readability const tsMargin = 1000; const startTs = firstTs - tsMargin; const endTs = lastTs + tsMargin; - const startDate = new Date().toISOString(); const traceEventsJson = [ - // Preamble encodeTraceEvent( getStartTracing({ ts: startTs, @@ -112,7 +131,6 @@ function finalizeTraceFile( dur: 20, }), ), - // Events ...events.map(encodeTraceEvent), encodeTraceEvent( getCompleteEvent({ @@ -120,7 +138,9 @@ function finalizeTraceFile( dur: 20, }), ), - ].join(',\n'); + ] + .map(event => JSON.stringify(event)) + .join(',\n'); const jsonOutput = createTraceFileContent( traceEventsJson, @@ -130,11 +150,11 @@ function finalizeTraceFile( writeFileSync(outputPath, jsonOutput, 'utf8'); } -export interface TraceFileSinkOptions { +export type TraceFileSinkOptions = { filename: string; directory?: string; metadata?: Record; -} +}; export class TraceFileSink extends JsonlFileSink { readonly #filePath: string; diff --git a/packages/utils/src/lib/file-sink-json-trace.unit.test.ts b/packages/utils/src/lib/file-sink-json-trace.unit.test.ts new file mode 100644 index 000000000..162f5f048 --- /dev/null +++ b/packages/utils/src/lib/file-sink-json-trace.unit.test.ts @@ -0,0 +1,335 @@ +import { vol } from 'memfs'; +import * as fs from 'node:fs'; +import { beforeEach, describe, expect, it } from 'vitest'; +import { MEMFS_VOLUME } from '@code-pushup/test-utils'; +import { + TraceFileSink, + decodeTraceEvent, + encodeTraceEvent, + finalizeTraceFile, + getTraceMetadata, +} from './file-sink-json-trace.js'; +import type { + InstantEvent, + TraceEvent, + TraceEventRaw, +} from './trace-file.type'; + +describe('decodeTraceEvent', () => { + it('should return event without args if no args present', () => { + const event: TraceEventRaw = { name: 'test', ts: 123 }; + expect(decodeTraceEvent(event)).toStrictEqual(event); + }); + + it('should decode args with detail property', () => { + const event: TraceEventRaw = { + name: 'test', + ts: 123, + args: { detail: '{"key":"value"}' }, + }; + expect(decodeTraceEvent(event)).toStrictEqual({ + name: 'test', + ts: 123, + args: { detail: { key: 'value' } }, + }); + }); + + it('should decode nested data.detail property', () => { + const event: TraceEventRaw = { + name: 'test', + ts: 123, + args: { data: { detail: '{"nested":"value"}' } }, + }; + expect(decodeTraceEvent(event)).toStrictEqual({ + name: 'test', + ts: 123, + args: { data: { detail: { nested: 'value' } } }, + }); + }); + + it('should handle invalid JSON in detail', () => { + const event: TraceEventRaw = { + name: 'test', + ts: 123, + args: { detail: 'invalid json' }, + }; + expect(() => decodeTraceEvent(event)).toThrow('Unexpected token'); + }); +}); + +describe('encodeTraceEvent', () => { + it('should return event without args if no args present', () => { + const event: TraceEventRaw = { name: 'test', ts: 123 }; + expect(encodeTraceEvent(event)).toStrictEqual(event); + }); + + it('should encode args with detail property', () => { + const event: TraceEventRaw = { + name: 'test', + ts: 123, + args: { detail: { key: 'value' } }, + }; + expect(encodeTraceEvent(event)).toStrictEqual({ + name: 'test', + ts: 123, + args: { detail: '{"key":"value"}' }, + }); + }); + + it('should encode nested data.detail property', () => { + const event: TraceEventRaw = { + name: 'test', + ts: 123, + args: { data: { detail: { nested: 'value' } } }, + }; + expect(encodeTraceEvent(event)).toStrictEqual({ + name: 'test', + ts: 123, + args: { data: { detail: '{"nested":"value"}' } }, + }); + }); + + it('should handle non-serializable detail', () => { + const circular: any = {}; + circular.self = circular; + const event: TraceEventRaw = { + name: 'test', + ts: 123, + args: { detail: circular }, + }; + expect(() => encodeTraceEvent(event)).toThrow( + 'Converting circular structure to JSON', + ); + }); +}); + +describe('finalizeTraceFile', () => { + beforeEach(() => { + vol.fromJSON( + { + '/tmp': null, + }, + MEMFS_VOLUME, + ); + }); + + it('should create trace file with events', () => { + const events: TraceEvent[] = [ + { name: 'event1', ts: 100, ph: 'I' }, + { name: 'event2', ts: 200, ph: 'X', args: { dur: 50 } }, + ]; + const outputPath = '/tmp/test-trace.json'; + + finalizeTraceFile(events as any, outputPath); + + expect(fs.existsSync(outputPath)).toBe(true); + const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); + expect(content.metadata.source).toBe('DevTools'); + expect(content.traceEvents).toHaveLength(5); // preamble (start + complete) + events + complete + }); + + it('should handle empty events array', () => { + const events: TraceEvent[] = []; + const outputPath = '/tmp/empty-trace.json'; + + finalizeTraceFile(events as any, outputPath); + + expect(fs.existsSync(outputPath)).toBe(true); + const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); + expect(content.traceEvents).toHaveLength(3); // preamble (start + complete) + end complete + }); + + it('should sort events by timestamp', () => { + const events: TraceEvent[] = [ + { name: 'event2', ts: 200, ph: 'I' }, + { name: 'event1', ts: 100, ph: 'I' }, + ]; + const outputPath = '/tmp/sorted-trace.json'; + + finalizeTraceFile(events as any, outputPath); + + const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); + const eventNames = content.traceEvents + .filter((e: any) => e.name.startsWith('event')) + .map((e: any) => e.name); + expect(eventNames).toStrictEqual(['event1', 'event2']); + }); +}); + +describe('TraceFileSink', () => { + beforeEach(() => { + vol.fromJSON( + { + '/tmp': null, + }, + MEMFS_VOLUME, + ); + }); + + it('should create trace file sink with default options', () => { + const sink = new TraceFileSink({ filename: 'test' }); + expect(sink.getFilePathForExt('json')).toBe('test.json'); + expect(sink.getFilePathForExt('jsonl')).toBe('test.jsonl'); + }); + + it('should create trace file sink with custom directory', () => { + const sink = new TraceFileSink({ + filename: 'test', + directory: '/tmp/custom', + }); + expect(sink.getFilePathForExt('json')).toBe('/tmp/custom/test.json'); + expect(sink.getFilePathForExt('jsonl')).toBe('/tmp/custom/test.jsonl'); + }); + + it('should handle file operations with trace events', () => { + const sink = new TraceFileSink({ + filename: 'trace-test', + directory: '/tmp', + }); + sink.open(); + + const event1: InstantEvent = { name: 'mark1', ts: 100, ph: 'I' }; + const event2: InstantEvent = { name: 'mark2', ts: 200, ph: 'I' }; + sink.write(event1); + sink.write(event2); + sink.close(); + + expect(fs.existsSync('/tmp/trace-test.jsonl')).toBe(true); + expect(fs.existsSync('/tmp/trace-test.json')).toBe(false); + + const recovered = sink.recover(); + expect(recovered.records).toStrictEqual([event1, event2]); + }); + + it('should create trace file on finalize', () => { + const sink = new TraceFileSink({ + filename: 'finalize-test', + directory: '/tmp', + }); + sink.open(); + + const event: InstantEvent = { name: 'test-event', ts: 150, ph: 'I' }; + sink.write(event); + sink.finalize(); + + expect(fs.existsSync('/tmp/finalize-test.json')).toBe(true); + const content = JSON.parse( + fs.readFileSync('/tmp/finalize-test.json', 'utf8'), + ); + expect(content.metadata.source).toBe('DevTools'); + expect(content.traceEvents.some((e: any) => e.name === 'test-event')).toBe( + true, + ); + }); + + it('should handle metadata in finalize', () => { + const metadata = { customField: 'value', version: '1.0' }; + const sink = new TraceFileSink({ + filename: 'metadata-test', + directory: '/tmp', + metadata, + }); + sink.open(); + sink.write({ name: 'event', ts: 100, ph: 'I' }); + sink.finalize(); + + const content = JSON.parse( + fs.readFileSync('/tmp/metadata-test.json', 'utf8'), + ); + expect(content.metadata.customField).toBe('value'); + expect(content.metadata.version).toBe('1.0'); + }); + + it('should do nothing on finalize when no events written', () => { + const sink = new TraceFileSink({ + filename: 'empty-test', + directory: '/tmp', + }); + sink.open(); + sink.finalize(); + + expect(fs.existsSync('/tmp/empty-test.json')).toBe(true); + const content = JSON.parse(fs.readFileSync('/tmp/empty-test.json', 'utf8')); + expect(content.traceEvents).toHaveLength(3); // preamble (start + complete) + end complete + }); +}); + +describe('getTraceMetadata', () => { + it('should use provided startDate when given', () => { + const startDate = new Date('2023-01-15T10:30:00.000Z'); + const metadata = { customField: 'value' }; + + const result = getTraceMetadata(startDate, metadata); + + expect(result).toStrictEqual({ + source: 'DevTools', + startTime: '2023-01-15T10:30:00.000Z', + hardwareConcurrency: 1, + dataOrigin: 'TraceEvents', + customField: 'value', + }); + }); + + it('should use current date when startDate is undefined', () => { + const beforeTest = new Date(); + const metadata = { version: '1.0' }; + + const result = getTraceMetadata(undefined, metadata); + + const afterTest = new Date(); + expect(result.source).toBe('DevTools'); + expect(result.hardwareConcurrency).toBe(1); + expect(result.dataOrigin).toBe('TraceEvents'); + + // Verify startTime is a valid ISO string between test execution + const startTime = new Date(result.startTime); + expect(startTime.getTime()).toBeGreaterThanOrEqual(beforeTest.getTime()); + expect(startTime.getTime()).toBeLessThanOrEqual(afterTest.getTime()); + }); + + it('should use current date when startDate is null', () => { + const beforeTest = new Date(); + const metadata = { environment: 'test' }; + + const result = getTraceMetadata(undefined, metadata); + + const afterTest = new Date(); + expect(result.source).toBe('DevTools'); + expect(result.hardwareConcurrency).toBe(1); + expect(result.dataOrigin).toBe('TraceEvents'); + + // Verify startTime is a valid ISO string between test execution + const startTime = new Date(result.startTime); + expect(startTime.getTime()).toBeGreaterThanOrEqual(beforeTest.getTime()); + expect(startTime.getTime()).toBeLessThanOrEqual(afterTest.getTime()); + }); + + it('should handle empty metadata', () => { + const startDate = new Date('2023-12-25T00:00:00.000Z'); + + const result = getTraceMetadata(startDate); + + expect(result).toStrictEqual({ + source: 'DevTools', + startTime: '2023-12-25T00:00:00.000Z', + hardwareConcurrency: 1, + dataOrigin: 'TraceEvents', + }); + }); + + it('should handle both startDate and metadata undefined', () => { + const beforeTest = new Date(); + + const result = getTraceMetadata(); + + const afterTest = new Date(); + expect(result.source).toBe('DevTools'); + expect(result.hardwareConcurrency).toBe(1); + expect(result.dataOrigin).toBe('TraceEvents'); + + // Verify startTime is a valid ISO string between test execution + const startTime = new Date(result.startTime); + expect(startTime.getTime()).toBeGreaterThanOrEqual(beforeTest.getTime()); + expect(startTime.getTime()).toBeLessThanOrEqual(afterTest.getTime()); + }); +}); From 3fe68717586616c645153e177d91d648e2bf231f Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Wed, 14 Jan 2026 22:53:32 +0100 Subject: [PATCH 06/31] refactor: wip arc --- .../src/lib/file-sink-json-trace.int.test.ts | 26 +- .../utils/src/lib/file-sink-json-trace.ts | 294 +++++++++--------- .../src/lib/file-sink-json-trace.unit.test.ts | 99 ++++-- .../utils/src/lib/file-sink-jsonl.int.test.ts | 8 +- packages/utils/src/lib/file-sink-jsonl.ts | 177 +++++++++-- .../src/lib/file-sink-jsonl.unit.test.ts | 17 +- packages/utils/src/lib/file-sink-text.ts | 291 ++++++++++------- .../utils/src/lib/file-sink-text.unit.test.ts | 4 +- packages/utils/src/lib/trace-file-utils.ts | 80 ++++- .../src/lib/utils/perf-hooks.mock.ts | 33 +- 10 files changed, 681 insertions(+), 348 deletions(-) diff --git a/packages/utils/src/lib/file-sink-json-trace.int.test.ts b/packages/utils/src/lib/file-sink-json-trace.int.test.ts index e71bb90d5..b9cdb7f2e 100644 --- a/packages/utils/src/lib/file-sink-json-trace.int.test.ts +++ b/packages/utils/src/lib/file-sink-json-trace.int.test.ts @@ -3,8 +3,8 @@ import * as os from 'node:os'; import * as path from 'node:path'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { teardownTestFolder } from '@code-pushup/test-utils'; -import { TraceFileSink } from './file-sink-json-trace.js'; -import type { TraceEvent } from './trace-file.type'; +import { FileSinkJsonTrace } from './file-sink-json-trace'; +import type { CompleteEvent, TraceEvent } from './trace-file.type'; describe('TraceFileSink integration', () => { const baseDir = path.join(os.tmpdir(), 'file-sink-json-trace-int-tests'); @@ -52,7 +52,7 @@ describe('TraceFileSink integration', () => { ]; it('should write and read trace events', async () => { - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'test-data', directory: baseDir, }); @@ -83,7 +83,7 @@ describe('TraceFileSink integration', () => { }); it('should recover events from JSONL file', async () => { - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'test-data', directory: baseDir, }); @@ -98,7 +98,7 @@ describe('TraceFileSink integration', () => { }); it('should handle empty trace files', async () => { - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'empty-test', directory: baseDir, }); @@ -123,7 +123,7 @@ describe('TraceFileSink integration', () => { customData: { key: 'value' }, }; - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'metadata-test', directory: baseDir, metadata, @@ -151,7 +151,7 @@ describe('TraceFileSink integration', () => { cat: 'test', }; - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'single-event-test', directory: baseDir, }); @@ -169,7 +169,7 @@ describe('TraceFileSink integration', () => { }); it('should handle events with complex args', async () => { - const complexEvent: TraceEvent = { + const complexEvent: CompleteEvent = { name: 'complexEvent', ts: 456, ph: 'X', @@ -180,7 +180,7 @@ describe('TraceFileSink integration', () => { }, }; - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'complex-args-test', directory: baseDir, }); @@ -196,15 +196,15 @@ describe('TraceFileSink integration', () => { (e: any) => e.name === 'complexEvent', ); expect(eventInTrace).toBeDefined(); - expect(eventInTrace.args.detail).toStrictEqual( - '{"nested":{"data":[1,2,3]}}', - ); + expect(eventInTrace.args.detail).toStrictEqual({ + nested: { data: [1, 2, 3] }, + }); expect(eventInTrace.args.data.url).toBe('https://example.com'); }); it('should handle non-existent directories gracefully', async () => { const nonExistentDir = path.join(baseDir, 'non-existent'); - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'non-existent-dir-test', directory: nonExistentDir, }); diff --git a/packages/utils/src/lib/file-sink-json-trace.ts b/packages/utils/src/lib/file-sink-json-trace.ts index f35895303..6e201f133 100644 --- a/packages/utils/src/lib/file-sink-json-trace.ts +++ b/packages/utils/src/lib/file-sink-json-trace.ts @@ -1,187 +1,193 @@ import * as fs from 'node:fs'; +// Exception: finalization creates new JSON file import * as path from 'node:path'; import { performance } from 'node:perf_hooks'; +import { JsonlFile, recoverJsonlFile } from './file-sink-jsonl.js'; +import type { RecoverResult } from './sink-source.type.js'; import { - JsonlFileSink, - jsonlDecode, - jsonlEncode, - recoverJsonlFile, -} from './file-sink-jsonl.js'; -import { getCompleteEvent, getStartTracing } from './trace-file-utils.js'; + decodeTraceEvent, + encodeTraceEvent, + getCompleteEvent, + getInstantEventTracingStartedInBrowser, +} from './trace-file-utils.js'; import type { InstantEvent, SpanEvent, TraceEvent, TraceEventRaw, - UserTimingDetail, } from './trace-file.type.js'; -export function decodeDetail(target: UserTimingDetail): UserTimingDetail { - if (typeof target.detail === 'string') { - return { ...target, detail: jsonlDecode(target.detail) }; - } - return target; -} - -export function encodeDetail(target: UserTimingDetail): UserTimingDetail { - if (target.detail && typeof target.detail === 'object') { - return { - ...target, - detail: jsonlEncode(target.detail as UserTimingDetail), - }; - } - return target; -} - -export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { - if (!args) return rest as TraceEvent; - - const out: UserTimingDetail = { ...args }; - const processedOut = decodeDetail(out); - - return { - ...rest, - args: - out.data && typeof out.data === 'object' - ? { - ...processedOut, - data: decodeDetail(out.data as UserTimingDetail), - } - : processedOut, - }; -} - -export function encodeTraceEvent({ args, ...rest }: TraceEvent): TraceEventRaw { - if (!args) return rest as TraceEventRaw; - - const out: UserTimingDetail = { ...args }; - const processedOut = encodeDetail(out); - - return { - ...rest, - args: - out.data && typeof out.data === 'object' - ? { - ...processedOut, - data: encodeDetail(out.data as UserTimingDetail), - } - : processedOut, - }; -} +const TRACE_START_MARGIN_NAME = '[trace padding start]'; +const TRACE_END_MARGIN_NAME = '[trace padding end]'; +const TRACE_MARGIN_MS = 1000; +const TRACE_MARGIN_DURATION_MS = 20; -export function getTraceMetadata( - startDate?: Date, - metadata?: Record, -) { - return { - source: 'DevTools', - startTime: startDate?.toISOString() ?? new Date().toISOString(), - hardwareConcurrency: 1, - dataOrigin: 'TraceEvents', - ...metadata, - }; -} - -function createTraceFileContent( - traceEventsContent: string, - startDate?: Date, - metadata?: Record, -): string { - return `{ - "metadata": ${JSON.stringify(getTraceMetadata(startDate, metadata))}, - "traceEvents": [ -${traceEventsContent} - ] -}`; -} +export type FinalizeTraceFileOptions = { + marginMs?: number; + marginDurMs?: number; + startTime?: string | Date; +}; export function finalizeTraceFile( events: (SpanEvent | InstantEvent)[], outputPath: string, metadata?: Record, + options?: FinalizeTraceFileOptions, ): void { - const { writeFileSync } = fs; - - if (events.length === 0) { - return; - } - - const sortedEvents = events.sort((a, b) => a.ts - b.ts); - const first = sortedEvents[0]; - const last = sortedEvents[sortedEvents.length - 1]; - + events.sort((a, b) => a.ts - b.ts); const fallbackTs = performance.now(); - const firstTs = first?.ts ?? fallbackTs; - const lastTs = last?.ts ?? fallbackTs; - - const tsMargin = 1000; - const startTs = firstTs - tsMargin; - const endTs = lastTs + tsMargin; - - const traceEventsJson = [ - encodeTraceEvent( - getStartTracing({ - ts: startTs, - url: outputPath, - }), - ), - encodeTraceEvent( - getCompleteEvent({ - ts: startTs, - dur: 20, - }), - ), - ...events.map(encodeTraceEvent), - encodeTraceEvent( - getCompleteEvent({ - ts: endTs, - dur: 20, - }), - ), - ] - .map(event => JSON.stringify(event)) - .join(',\n'); - - const jsonOutput = createTraceFileContent( - traceEventsJson, - new Date(), - metadata, + const firstTs = events.length > 0 ? events[0].ts : fallbackTs; + const lastTs = events.length > 0 ? events[events.length - 1].ts : fallbackTs; + + const marginMs = options?.marginMs ?? TRACE_MARGIN_MS; + const marginDurMs = options?.marginDurMs ?? TRACE_MARGIN_DURATION_MS; + + const startTs = firstTs - marginMs; + const endTs = lastTs + marginMs; + + const traceEvents: TraceEvent[] = [ + getInstantEventTracingStartedInBrowser({ ts: startTs, url: outputPath }), + getCompleteEvent({ + name: TRACE_START_MARGIN_NAME, + ts: startTs, + dur: marginDurMs, + }), + ...events, + getCompleteEvent({ + name: TRACE_END_MARGIN_NAME, + ts: endTs, + dur: marginDurMs, + }), + ]; + + const startTime = options?.startTime + ? typeof options.startTime === 'string' + ? options.startTime + : options.startTime.toISOString() + : new Date().toISOString(); + + fs.writeFileSync( + outputPath, + JSON.stringify({ + traceEvents, + displayTimeUnit: 'ms', + metadata: { + source: 'DevTools', + startTime, + hardwareConcurrency: 1, + dataOrigin: 'TraceEvents', + ...metadata, + }, + }), + 'utf8', ); - writeFileSync(outputPath, jsonOutput, 'utf8'); } export type TraceFileSinkOptions = { filename: string; directory?: string; metadata?: Record; + marginMs?: number; + marginDurMs?: number; + startTime?: string | Date; }; -export class TraceFileSink extends JsonlFileSink { - readonly #filePath: string; - readonly #getFilePathForExt: (ext: 'json' | 'jsonl') => string; +export class FileSinkJsonTrace { + readonly #directory: string; + readonly #filename: string; readonly #metadata: Record | undefined; + readonly #marginMs?: number; + readonly #marginDurMs?: number; + readonly #startTime?: string | Date; + private sink: JsonlFile; + #finalized = false; constructor(opts: TraceFileSinkOptions) { - const { filename, directory = '.', metadata } = opts; - + const { + filename, + directory = '.', + metadata, + marginMs, + marginDurMs, + startTime, + } = opts; const traceJsonlPath = path.join(directory, `${filename}.jsonl`); - super({ + this.#directory = directory; + this.#filename = filename; + this.#metadata = metadata; + this.#marginMs = marginMs; + this.#marginDurMs = marginDurMs; + this.#startTime = startTime; + + this.sink = new JsonlFile({ filePath: traceJsonlPath, - recover: () => recoverJsonlFile(traceJsonlPath), + recover: () => recoverJsonlFile(traceJsonlPath), + finalize: () => { + const rawRecords = this.sink.recover().records; + // Decode raw events to proper TraceEvent format for finalization + const processedRecords = rawRecords.map(decodeTraceEvent); + finalizeTraceFile( + processedRecords as (SpanEvent | InstantEvent)[], + this.getFilePathForExt('json'), + this.#metadata, + { + marginMs: this.#marginMs, + marginDurMs: this.#marginDurMs, + startTime: this.#startTime, + }, + ); + }, }); + } - this.#metadata = metadata; - this.#filePath = path.join(directory, `${filename}.json`); - this.#getFilePathForExt = (ext: 'json' | 'jsonl') => - path.join(directory, `${filename}.${ext}`); + /** + * Open file for writing (no-op since JsonlFile opens lazily). + */ + open(): void { + // JsonlFile opens lazily on first write, so no-op here + } + + write(input: SpanEvent | InstantEvent): void { + const encodedEvent = encodeTraceEvent(input); + this.sink.write(encodedEvent); + } + + /** + * Read all events (strict parsing - throws on invalid JSON). + * For error-tolerant reading, use recover() instead. + */ + readAll(): (SpanEvent | InstantEvent)[] { + return this.sink.readAll().map(decodeTraceEvent) as ( + | SpanEvent + | InstantEvent + )[]; + } + + getFilePath(): string { + return this.sink.getPath(); + } + + close(): void { + this.sink.close(); + } + + recover(): RecoverResult { + const { records, errors, partialTail } = this.sink.recover(); + const processedRecords = records.map(decodeTraceEvent) as ( + | SpanEvent + | InstantEvent + )[]; + return { records: processedRecords, errors, partialTail }; } - override finalize(): void { - finalizeTraceFile(this.recover().records, this.#filePath, this.#metadata); + finalize(): void { + if (this.#finalized) return; + this.#finalized = true; + this.sink.finalize(); } getFilePathForExt(ext: 'json' | 'jsonl'): string { - return this.#getFilePathForExt(ext); + return path.join(this.#directory, `${this.#filename}.${ext}`); } } diff --git a/packages/utils/src/lib/file-sink-json-trace.unit.test.ts b/packages/utils/src/lib/file-sink-json-trace.unit.test.ts index 162f5f048..30911b558 100644 --- a/packages/utils/src/lib/file-sink-json-trace.unit.test.ts +++ b/packages/utils/src/lib/file-sink-json-trace.unit.test.ts @@ -2,13 +2,12 @@ import { vol } from 'memfs'; import * as fs from 'node:fs'; import { beforeEach, describe, expect, it } from 'vitest'; import { MEMFS_VOLUME } from '@code-pushup/test-utils'; +import { FileSinkJsonTrace, finalizeTraceFile } from './file-sink-json-trace'; import { - TraceFileSink, decodeTraceEvent, encodeTraceEvent, - finalizeTraceFile, getTraceMetadata, -} from './file-sink-json-trace.js'; +} from './trace-file-utils.js'; import type { InstantEvent, TraceEvent, @@ -134,9 +133,7 @@ describe('finalizeTraceFile', () => { finalizeTraceFile(events as any, outputPath); - expect(fs.existsSync(outputPath)).toBe(true); - const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); - expect(content.traceEvents).toHaveLength(3); // preamble (start + complete) + end complete + expect(fs.existsSync(outputPath)).toBe(false); // No file created for empty events }); it('should sort events by timestamp', () => { @@ -154,6 +151,46 @@ describe('finalizeTraceFile', () => { .map((e: any) => e.name); expect(eventNames).toStrictEqual(['event1', 'event2']); }); + + it('should use configurable margins', () => { + const events: TraceEvent[] = [{ name: 'event1', ts: 1000, ph: 'I' }]; + const outputPath = '/tmp/custom-margin-trace.json'; + + finalizeTraceFile( + events as any, + outputPath, + {}, + { marginMs: 500, marginDurMs: 10 }, + ); + + const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); + expect(content.traceEvents).toHaveLength(4); // start tracing + start margin + event + end margin + + // Check start margin timestamp and duration + const startMargin = content.traceEvents.find( + (e: any) => e.name === '[trace padding start]', + ); + expect(startMargin.ts).toBe(500); // 1000 - 500 + expect(startMargin.dur).toBe(10); + + // Check end margin timestamp and duration + const endMargin = content.traceEvents.find( + (e: any) => e.name === '[trace padding end]', + ); + expect(endMargin.ts).toBe(1500); // 1000 + 500 + expect(endMargin.dur).toBe(10); + }); + + it('should use deterministic startTime', () => { + const events: TraceEvent[] = [{ name: 'event1', ts: 1000, ph: 'I' }]; + const outputPath = '/tmp/deterministic-trace.json'; + const fixedTime = '2023-01-15T10:30:00.000Z'; + + finalizeTraceFile(events as any, outputPath, {}, { startTime: fixedTime }); + + const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); + expect(content.metadata.startTime).toBe(fixedTime); + }); }); describe('TraceFileSink', () => { @@ -167,13 +204,13 @@ describe('TraceFileSink', () => { }); it('should create trace file sink with default options', () => { - const sink = new TraceFileSink({ filename: 'test' }); + const sink = new FileSinkJsonTrace({ filename: 'test' }); expect(sink.getFilePathForExt('json')).toBe('test.json'); expect(sink.getFilePathForExt('jsonl')).toBe('test.jsonl'); }); it('should create trace file sink with custom directory', () => { - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'test', directory: '/tmp/custom', }); @@ -182,12 +219,10 @@ describe('TraceFileSink', () => { }); it('should handle file operations with trace events', () => { - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'trace-test', directory: '/tmp', }); - sink.open(); - const event1: InstantEvent = { name: 'mark1', ts: 100, ph: 'I' }; const event2: InstantEvent = { name: 'mark2', ts: 200, ph: 'I' }; sink.write(event1); @@ -202,11 +237,10 @@ describe('TraceFileSink', () => { }); it('should create trace file on finalize', () => { - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'finalize-test', directory: '/tmp', }); - sink.open(); const event: InstantEvent = { name: 'test-event', ts: 150, ph: 'I' }; sink.write(event); @@ -224,12 +258,11 @@ describe('TraceFileSink', () => { it('should handle metadata in finalize', () => { const metadata = { customField: 'value', version: '1.0' }; - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'metadata-test', directory: '/tmp', metadata, }); - sink.open(); sink.write({ name: 'event', ts: 100, ph: 'I' }); sink.finalize(); @@ -240,17 +273,43 @@ describe('TraceFileSink', () => { expect(content.metadata.version).toBe('1.0'); }); + it('should use configurable options in TraceFileSink', () => { + const sink = new FileSinkJsonTrace({ + filename: 'options-test', + directory: '/tmp', + marginMs: 200, + marginDurMs: 5, + startTime: '2023-12-25T12:00:00.000Z', + }); + sink.write({ name: 'event', ts: 1000, ph: 'I' }); + sink.finalize(); + + const content = JSON.parse( + fs.readFileSync('/tmp/options-test.json', 'utf8'), + ); + expect(content.metadata.startTime).toBe('2023-12-25T12:00:00.000Z'); + + const startMargin = content.traceEvents.find( + (e: any) => e.name === '[trace padding start]', + ); + expect(startMargin.ts).toBe(800); // 1000 - 200 + expect(startMargin.dur).toBe(5); + + const endMargin = content.traceEvents.find( + (e: any) => e.name === '[trace padding end]', + ); + expect(endMargin.ts).toBe(1200); // 1000 + 200 + expect(endMargin.dur).toBe(5); + }); + it('should do nothing on finalize when no events written', () => { - const sink = new TraceFileSink({ + const sink = new FileSinkJsonTrace({ filename: 'empty-test', directory: '/tmp', }); - sink.open(); sink.finalize(); - expect(fs.existsSync('/tmp/empty-test.json')).toBe(true); - const content = JSON.parse(fs.readFileSync('/tmp/empty-test.json', 'utf8')); - expect(content.traceEvents).toHaveLength(3); // preamble (start + complete) + end complete + expect(fs.existsSync('/tmp/empty-test.json')).toBe(false); // No file created for empty events }); }); diff --git a/packages/utils/src/lib/file-sink-jsonl.int.test.ts b/packages/utils/src/lib/file-sink-jsonl.int.test.ts index e0f57bbaa..c0bae5503 100644 --- a/packages/utils/src/lib/file-sink-jsonl.int.test.ts +++ b/packages/utils/src/lib/file-sink-jsonl.int.test.ts @@ -3,9 +3,9 @@ import * as os from 'node:os'; import * as path from 'node:path'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { teardownTestFolder } from '@code-pushup/test-utils'; -import { JsonlFileSink, recoverJsonlFile } from './file-sink-jsonl.js'; +import { JsonlFile, recoverJsonlFile } from './file-sink-jsonl.js'; -describe('JsonlFileSink integration', () => { +describe('JsonlFile integration', () => { const baseDir = path.join(os.tmpdir(), 'file-sink-json-int-tests'); const testFile = path.join(baseDir, 'test-data.jsonl'); @@ -33,7 +33,7 @@ describe('JsonlFileSink integration', () => { ]; it('should write and read JSONL files', async () => { - const sink = new JsonlFileSink({ filePath: testFile }); + const sink = new JsonlFile({ filePath: testFile }); // Open and write data sink.open(); @@ -91,7 +91,7 @@ describe('JsonlFileSink integration', () => { }); it('should recover data using JsonlFileSink.recover()', async () => { - const sink = new JsonlFileSink({ filePath: testFile }); + const sink = new JsonlFile({ filePath: testFile }); sink.open(); testData.forEach(item => sink.write(item)); sink.close(); diff --git a/packages/utils/src/lib/file-sink-jsonl.ts b/packages/utils/src/lib/file-sink-jsonl.ts index 646cd82b1..c3e9f2a83 100644 --- a/packages/utils/src/lib/file-sink-jsonl.ts +++ b/packages/utils/src/lib/file-sink-jsonl.ts @@ -1,60 +1,173 @@ import * as fs from 'node:fs'; -import { - type FileOutput, - FileSink, - type FileSinkOptions, - stringDecode, - stringEncode, - stringRecover, -} from './file-sink-text.js'; -import type { RecoverOptions, RecoverResult } from './sink-source.types.js'; +import { TextFileSink } from './file-sink-text.js'; +import type { RecoverOptions, RecoverResult } from './sink-source.type.js'; +/** + * JSONL encoding functions - single source of truth for JSONL format. + */ export const jsonlEncode = < T extends Record = Record, >( input: T, -): FileOutput => JSON.stringify(input); +): string => JSON.stringify(input); export const jsonlDecode = < T extends Record = Record, >( - output: FileOutput, -): T => JSON.parse(stringDecode(output)) as T; + raw: string, +): T => JSON.parse(raw) as T; export function recoverJsonlFile< T extends Record = Record, >(filePath: string, opts: RecoverOptions = {}): RecoverResult { - return stringRecover(filePath, jsonlDecode, opts); + const records: T[] = []; + const errors: { lineNo: number; line: string; error: Error }[] = []; + let partialTail: string | null = null; + + try { + const content = fs.readFileSync(filePath, 'utf8'); + const lines = content.split('\n'); + let lineNo = 0; + + for (const line of lines) { + lineNo++; + const trimmedLine = line.trim(); + if (!trimmedLine) { + continue; + } + + try { + const record = jsonlDecode(trimmedLine); + records.push(record); + } catch (error) { + const info = { lineNo, line, error: error as Error }; + errors.push(info); + + if (opts.keepInvalid) { + records.push({ __invalid: true, lineNo, line } as any); + } + + partialTail = line; + } + + // Optional: perfect tail detection for empty lines at EOF + if (trimmedLine === '' && lineNo === lines.length) { + partialTail = line; + } + } + } catch { + return { records: [], errors: [], partialTail: null }; + } + + return { records, errors, partialTail }; } -export class JsonlFileSink< +export type JsonlFileOptions> = { + filePath: string; + recover?: () => RecoverResult; + finalize?: () => void; +}; + +/** + * JSONL writer using composition: Transport + Encoding + Recovery policy. + * Writes are append-only. + * + * JsonlFile opens the underlying file lazily on first write and keeps it open + * until close() or finalize() is called. + * + * Design rules: + * - "Extend types only when substitutable" + * - "Reuse behavior via composition" + * - "Transport ≠ format ≠ recovery" + */ +export class JsonlFile< T extends Record = Record, -> extends FileSink { - constructor(options: FileSinkOptions) { - const { filePath, ...fileOptions } = options; - super({ - ...fileOptions, - filePath, - recover: () => recoverJsonlFile(filePath), - finalize: () => { - // No additional finalization needed for JSONL files - }, - }); +> { + private file: TextFileSink; + + constructor(options: JsonlFileOptions) { + const { filePath } = options; + this.file = new TextFileSink(filePath); + + // Recovery policy - JSONL-specific, customizable + this.recover = options.recover ?? (() => recoverJsonlFile(filePath)); + + // Finalization policy - defaults to close() for cleanup + this.finalize = options.finalize ?? (() => this.close()); } - override encode(input: T): FileOutput { - return stringEncode(jsonlEncode(input)); + /** + * Encode record to JSONL format. + */ + encode(record: T): string { + return jsonlEncode(record) + '\n'; + } + + /** + * Decode JSONL string to record. + */ + decode(jsonlString: string): T { + return jsonlDecode(jsonlString); + } + + /** + * Open file for writing (no-op since TextFileSink opens lazily). + */ + open(): void { + // TextFileSink opens lazily on first write, so no-op here + } + + /** + * Write record in JSONL format (append-only). + */ + write(record: T): void { + this.file.append(jsonlEncode(record) + '\n'); + } + + /** + * Read all records as parsed array (strict - throws on invalid JSON). + */ + readAll(): T[] { + return this.file + .readAll() + .split('\n') + .filter(Boolean) + .map(line => jsonlDecode(line)); + } + + /** + * Recover records with error handling (tolerant parsing). + * Handles invalid records gracefully, returns errors alongside valid data. + */ + recover: () => RecoverResult; + + /** + * Finalization - defaults to close() for cleanup. + */ + finalize: () => void; + + /** + * Get file path. + */ + getPath(): string { + return this.file.getPath(); } - override decode(output: FileOutput): T { - return jsonlDecode(stringDecode(output)); + /** + * Close file. + */ + close(): void { + this.file.close(); } - override repack(outputPath?: string): void { + /** + * Repack file with clean JSONL formatting. + */ + repack(outputPath?: string): void { const { records } = this.recover(); fs.writeFileSync( - outputPath ?? this.getFilePath(), - records.map(this.encode).join(''), + outputPath ?? this.getPath(), + records.map(jsonlEncode).join('\n') + '\n', ); } } diff --git a/packages/utils/src/lib/file-sink-jsonl.unit.test.ts b/packages/utils/src/lib/file-sink-jsonl.unit.test.ts index 75f981cb0..7d775820f 100644 --- a/packages/utils/src/lib/file-sink-jsonl.unit.test.ts +++ b/packages/utils/src/lib/file-sink-jsonl.unit.test.ts @@ -3,7 +3,7 @@ import * as fs from 'node:fs'; import { beforeEach, describe, expect, it } from 'vitest'; import { MEMFS_VOLUME } from '@code-pushup/test-utils'; import { - JsonlFileSink, + JsonlFile, jsonlDecode, jsonlEncode, recoverJsonlFile, @@ -150,7 +150,7 @@ describe('recoverJsonlFile', () => { }); }); -describe('JsonlFileSink', () => { +describe('JsonlFile', () => { beforeEach(() => { vol.fromJSON( { @@ -163,7 +163,7 @@ describe('JsonlFileSink', () => { type JsonObj = { key: string; number: number }; it('should encode objects as JSON', () => { - const sink = new JsonlFileSink({ + const sink = new JsonlFile({ filePath: '/tmp/jsonl-test.jsonl', }); const obj = { key: 'value', number: 42 }; @@ -171,7 +171,7 @@ describe('JsonlFileSink', () => { }); it('should decode JSON strings to objects', () => { - const sink = new JsonlFileSink({ + const sink = new JsonlFile({ filePath: '/tmp/jsonl-test.jsonl', }); const obj = { key: 'value', number: 42 }; @@ -181,8 +181,7 @@ describe('JsonlFileSink', () => { it('should handle file operations with JSONL format', () => { const filePath = '/tmp/jsonl-file-ops-test.jsonl'; - const sink = new JsonlFileSink({ filePath }); - sink.open(); + const sink = new JsonlFile({ filePath }); const obj1 = { key: 'value', number: 42 }; const obj2 = { key: 'value', number: 42 }; @@ -196,7 +195,7 @@ describe('JsonlFileSink', () => { it('repack() should recover records and write them to output path', () => { const filePath = '/tmp/jsonl-repack-test.jsonl'; - const sink = new JsonlFileSink({ filePath }); + const sink = new JsonlFile({ filePath }); const records = [ { key: 'value', number: 42 }, { key: 'value', number: 42 }, @@ -215,7 +214,7 @@ describe('JsonlFileSink', () => { it('repack() should accept output path', () => { const filePath = '/tmp/jsonl-repack-test.jsonl'; - const sink = new JsonlFileSink({ filePath }); + const sink = new JsonlFile({ filePath }); const records = [ { key: 'value', number: 42 }, { key: 'value', number: 42 }, @@ -234,7 +233,7 @@ describe('JsonlFileSink', () => { }); it('should do nothing on finalize()', () => { - const sink = new JsonlFileSink({ + const sink = new JsonlFile({ filePath: '/tmp/jsonl-finalize-test.jsonl', }); expect(() => sink.finalize()).not.toThrow(); diff --git a/packages/utils/src/lib/file-sink-text.ts b/packages/utils/src/lib/file-sink-text.ts index 050188e58..80c617be7 100644 --- a/packages/utils/src/lib/file-sink-text.ts +++ b/packages/utils/src/lib/file-sink-text.ts @@ -1,36 +1,93 @@ +/** + * Simple Text File Sink + * + * Basic file operations for text files. Used as the foundation for format-specific writers. + * If you need JSONL files, use JsonlFile from file-sink-jsonl.ts instead. + */ import * as fs from 'node:fs'; -import { existsSync, mkdirSync } from 'node:fs'; -import path from 'node:path'; -import { PROFILER_FILE_BASE_NAME, PROFILER_OUT_DIR } from './profiler'; -import type { - RecoverOptions, - RecoverResult, - Recoverable, - Sink, -} from './sink-source.types.js'; - -export const stringDecode = (output: O): I => { - const str = Buffer.isBuffer(output) - ? output.toString('utf8') - : String(output); - return str as unknown as I; +import * as path from 'node:path'; +import type { RecoverOptions, RecoverResult } from './sink-source.type.js'; + +/** + * Simple text file sink - reusable for basic file operations. + * One responsibility: append text, read all text, get path. + */ +export class TextFileSink { + #fd: number | null = null; + + constructor(private filePath: string) {} + + /** + * Append text to file (append-only). + */ + append(text: string): void { + // Lazy open on first write + if (this.#fd === null) { + const dir = path.dirname(this.filePath); + fs.mkdirSync(dir, { recursive: true }); + this.#fd = fs.openSync(this.filePath, 'a'); + } + fs.writeSync(this.#fd, text); + } + + /** + * Read entire file as string. + */ + readAll(): string { + try { + return fs.readFileSync(this.filePath, 'utf8'); + } catch { + return ''; + } + } + + /** + * Get file path. + */ + getPath(): string { + return this.filePath; + } + + /** + * Close file descriptor. + */ + close(): void { + if (this.#fd !== null) { + fs.closeSync(this.#fd); + this.#fd = null; + } + } +} + +/** + * String encoding functions - single source of truth for string format. + */ +export const stringEncode = (input: unknown): string => { + if (typeof input === 'string') { + return `${input}\n`; + } + return `${JSON.stringify(input)}\n`; }; -export const stringEncode = (input: I): O => - `${typeof input === 'string' ? input : JSON.stringify(input)}\n` as O; +export const stringDecode = (input: string | Buffer): string => { + if (Buffer.isBuffer(input)) { + return input.toString('utf8'); + } + return input; +}; -export const stringRecover = function ( +export function stringRecover( filePath: string, - decode: (output: O) => I, + decodeFn: (line: string) => T, opts: RecoverOptions = {}, -): RecoverResult { - const records: I[] = []; +): RecoverResult { + const records: T[] = []; const errors: { lineNo: number; line: string; error: Error }[] = []; let partialTail: string | null = null; try { const content = fs.readFileSync(filePath, 'utf8'); - const lines = content.trim().split('\n'); + const lines = content.split('\n'); let lineNo = 0; for (const line of lines) { @@ -41,7 +98,7 @@ export const stringRecover = function ( } try { - const record = decode(trimmedLine as O); + const record = decodeFn(trimmedLine); records.push(record); } catch (error) { const info = { lineNo, line, error: error as Error }; @@ -53,132 +110,148 @@ export const stringRecover = function ( partialTail = line; } + + // Optional: perfect tail detection for empty lines at EOF + if (trimmedLine === '' && lineNo === lines.length) { + partialTail = line; + } } } catch { return { records: [], errors: [], partialTail: null }; } return { records, errors, partialTail }; -}; - -export type FileNameOptions = { - fileBaseName: string; - outDir: string; - fileName?: string; -}; - -export function getFilenameParts(options: FileNameOptions): { - outDir: string; - fileName: string; -} { - const { fileName, fileBaseName, outDir } = options; - - if (fileName) { - return { - outDir, - fileName, - }; - } - - const baseName = fileBaseName; - const DATE_LENGTH = 10; - const TIME_SEGMENTS = 3; - const COLON_LENGTH = 1; - const TOTAL_TIME_LENGTH = - TIME_SEGMENTS * 2 + (TIME_SEGMENTS - 1) * COLON_LENGTH; // HH:MM:SS = 8 chars - const id = new Date() - .toISOString() - .slice(0, DATE_LENGTH + TOTAL_TIME_LENGTH) - .replace(/:/g, '-'); - - return { - outDir, - fileName: `${baseName}.${id}`, - }; } -export type FileSinkOptions = { +export type FileSinkOptions = { filePath: string; - recover?: () => RecoverResult; + recover?: () => RecoverResult; finalize?: () => void; }; -export type FileInput = Buffer | string; -export type FileOutput = Buffer | string; +/** + * String file sink using composition: Transport + Encoding + Recovery policy. + * Writes are append-only. + * + * FileSink opens the underlying file lazily on first write and keeps it open + * until close() or finalize() is called. + * + * Design rules: + * - "Extend types only when substitutable" + * - "Reuse behavior via composition" + * - "Transport ≠ format ≠ recovery" + */ +export class FileSink { + private file: TextFileSink; + private isOpen = false; + private fd: number | null = null; -export class FileSink - implements Sink, Recoverable -{ - #fd: number | null = null; - options: FileSinkOptions; + constructor(public options: FileSinkOptions) { + const { filePath } = options; + this.file = new TextFileSink(filePath); - constructor(options: FileSinkOptions) { - this.options = options; - } + // Recovery policy - string-specific, customizable + this.recover = + options.recover ?? + (() => stringRecover(filePath, (line: string) => line as T)); - isClosed(): boolean { - return this.#fd == null; + // Finalization policy - defaults to close() for cleanup + this.finalize = options.finalize ?? (() => this.close()); } - encode(input: I): O { - return stringEncode(input as any); + /** + * Encode input to string format. + */ + encode(input: T): string { + return stringEncode(input); } - decode(output: O): I { - return stringDecode(output as any); + /** + * Decode string to output type. + */ + decode(output: string | Buffer): T { + const str = stringDecode(output); + return str as T; } + + /** + * Get file path. + */ getFilePath(): string { - return this.options.filePath; + return this.file.getPath(); } - open(withRepack: boolean = false): void { - const dir = path.dirname(this.options.filePath); - if (!existsSync(dir)) { - mkdirSync(dir, { recursive: true }); - } + /** + * Open file for writing (creates directory if needed). + */ + open(withRepack?: boolean): void { + if (this.isOpen) return; + + const dir = path.dirname(this.file.getPath()); + fs.mkdirSync(dir, { recursive: true }); + if (withRepack) { - this.repack(this.options.filePath); + this.repack(this.file.getPath()); } - this.#fd = fs.openSync(this.options.filePath, 'a'); - } - close(): void { - if (this.#fd == null) { - return; - } - fs.closeSync(this.#fd); - this.#fd = null; + this.fd = fs.openSync(this.file.getPath(), 'a'); + this.isOpen = true; } - write(input: I): void { - if (this.#fd == null) { - return; - } // Silently ignore if not open - const encoded = this.encode(input); + /** + * Write input to file (append-only). + */ + write(input: T): void { + if (!this.isOpen) return; + try { - fs.writeSync(this.#fd, encoded as any); + const encoded = this.encode(input); + fs.writeSync(this.fd!, encoded); } catch { - // Silently ignore write errors (e.g., EBADF in test environments with mocked fs) + // Silently ignore write errors } } - recover(): RecoverResult { - const dir = path.dirname(this.options.filePath); - if (!existsSync(dir)) { - mkdirSync(dir, { recursive: true }); + /** + * Close file descriptor. + */ + close(): void { + if (this.fd !== null) { + fs.closeSync(this.fd); + this.fd = null; } - return this.options.recover!() as RecoverResult; + this.isOpen = false; } + /** + * Check if sink is closed. + */ + isClosed(): boolean { + return !this.isOpen; + } + + /** + * Recover records with error handling (tolerant parsing). + * Handles invalid records gracefully, returns errors alongside valid data. + */ + recover: () => RecoverResult; + + /** + * Repack file with clean formatting. + */ repack(outputPath?: string): void { const { records } = this.recover(); + const targetPath = outputPath ?? this.getFilePath(); + const dir = path.dirname(targetPath); + fs.mkdirSync(dir, { recursive: true }); fs.writeFileSync( - outputPath ?? this.getFilePath(), - records.map(this.encode).join('\n'), + targetPath, + records.map(record => this.encode(record)).join(''), ); } - finalize(): void { - this.options.finalize!(); - } + /** + * Finalization - defaults to close() for cleanup. + */ + finalize: () => void; } diff --git a/packages/utils/src/lib/file-sink-text.unit.test.ts b/packages/utils/src/lib/file-sink-text.unit.test.ts index 33cc9ad0e..eb1b17749 100644 --- a/packages/utils/src/lib/file-sink-text.unit.test.ts +++ b/packages/utils/src/lib/file-sink-text.unit.test.ts @@ -262,7 +262,7 @@ describe('FileSink', () => { sink.repack(); expect(mockRecover).toHaveBeenCalled(); - expect(fs.readFileSync(filePath, 'utf8')).toBe('record1\n\nrecord2\n'); + expect(fs.readFileSync(filePath, 'utf8')).toBe('record1\nrecord2\n'); }); it('repack() should accept output path', () => { @@ -276,7 +276,7 @@ describe('FileSink', () => { const outputPath = '/tmp/repack-output.txt'; sink.repack(outputPath); expect(mockRecover).toHaveBeenCalled(); - expect(fs.readFileSync(outputPath, 'utf8')).toBe('record1\n\nrecord2\n'); + expect(fs.readFileSync(outputPath, 'utf8')).toBe('record1\nrecord2\n'); }); it('finalize() should call the finalize function from options', () => { diff --git a/packages/utils/src/lib/trace-file-utils.ts b/packages/utils/src/lib/trace-file-utils.ts index 2a2f3eb30..09e028881 100644 --- a/packages/utils/src/lib/trace-file-utils.ts +++ b/packages/utils/src/lib/trace-file-utils.ts @@ -1,7 +1,7 @@ -import os from 'node:os'; import type { PerformanceMark, PerformanceMeasure } from 'node:perf_hooks'; import { threadId } from 'node:worker_threads'; import { defaultClock } from './clock-epoch.js'; +import { jsonlDecode, jsonlEncode } from './file-sink-jsonl.js'; import type { BeginEvent, CompleteEvent, @@ -13,7 +13,9 @@ import type { SpanEventArgs, TraceEvent, TraceEventContainer, + TraceEventRaw, } from './trace-file.type.js'; +import type { UserTimingDetail } from './user-timing-extensibility-api.type.js'; /** Global counter for generating unique span IDs within a trace */ // eslint-disable-next-line functional/no-let @@ -228,7 +230,7 @@ export const markToInstantEvent = ( ...opt, name: opt?.name ?? entry.name, ts: defaultClock.fromEntry(entry), - args: entry.detail ? { detail: entry.detail } : undefined, + args: entry.detail ? { data: { detail: entry.detail } } : undefined, }); /** @@ -249,6 +251,19 @@ export const measureToSpanEvents = ( args: entry.detail ? { data: { detail: entry.detail } } : undefined, }); +export function getTraceMetadata( + startDate?: Date, + metadata?: Record, +) { + return { + source: 'DevTools', + startTime: startDate?.toISOString() ?? new Date().toISOString(), + hardwareConcurrency: 1, + dataOrigin: 'TraceEvents', + ...metadata, + }; +} + /** * Creates a complete trace file container with metadata. * @param opt - Trace file configuration @@ -263,6 +278,65 @@ export const getTraceFile = (opt: { metadata: { source: 'Node.js UserTiming', startTime: opt.startTime ?? new Date().toISOString(), - hardwareConcurrency: os.cpus().length, + hardwareConcurrency: 1, }, }); + +function processDetail( + target: T, + processor: (detail: string | object) => string | object, +): T { + if ( + target.detail != null && + (typeof target.detail === 'string' || typeof target.detail === 'object') + ) { + return { ...target, detail: processor(target.detail) }; + } + return target; +} + +export function decodeDetail(target: { detail: string }): UserTimingDetail { + return processDetail(target, detail => + typeof detail === 'string' ? jsonlDecode(detail) : detail, + ) as UserTimingDetail; +} + +export function encodeDetail(target: UserTimingDetail): UserTimingDetail { + return processDetail(target, detail => + typeof detail === 'object' + ? jsonlEncode(detail as UserTimingDetail) + : detail, + ); +} + +export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { + if (!args) return rest as TraceEvent; + + const processedArgs = decodeDetail(args as { detail: string }); + if ('data' in args && args.data && typeof args.data === 'object') { + return { + ...rest, + args: { + ...processedArgs, + data: decodeDetail(args.data as { detail: string }), + }, + } as TraceEvent; + } + return { ...rest, args: processedArgs } as TraceEvent; +} + +export function encodeTraceEvent({ args, ...rest }: TraceEvent): TraceEventRaw { + if (!args) return rest as TraceEventRaw; + + const processedArgs = encodeDetail(args); + if ('data' in args && args.data && typeof args.data === 'object') { + return { + ...rest, + args: { + ...processedArgs, + data: encodeDetail(args.data as UserTimingDetail), + }, + } as TraceEventRaw; + } + return { ...rest, args: processedArgs } as TraceEventRaw; +} diff --git a/testing/test-utils/src/lib/utils/perf-hooks.mock.ts b/testing/test-utils/src/lib/utils/perf-hooks.mock.ts index b22e88bd5..d7a8b3ab1 100644 --- a/testing/test-utils/src/lib/utils/perf-hooks.mock.ts +++ b/testing/test-utils/src/lib/utils/perf-hooks.mock.ts @@ -33,27 +33,36 @@ export const createPerformanceMock = (timeOrigin = 500_000) => ({ now: vi.fn(() => nowMs), - mark: vi.fn((name: string) => { + mark: vi.fn((name: string, options?: { detail?: unknown }) => { entries.push({ name, entryType: 'mark', startTime: nowMs, duration: 0, + detail: options?.detail, } as PerformanceEntry); MockPerformanceObserver.globalEntries = entries; }), - measure: vi.fn((name: string, startMark?: string, endMark?: string) => { - const entry = { - name, - entryType: 'measure', - startTime: nowMs, - duration: nowMs, - } as PerformanceEntry; - entries.push(entry); - MockPerformanceObserver.globalEntries = entries; - triggerObservers([entry]); - }), + measure: vi.fn( + ( + name: string, + startMark?: string, + endMark?: string, + options?: { detail?: unknown }, + ) => { + const entry = { + name, + entryType: 'measure', + startTime: nowMs, + duration: nowMs, + detail: options?.detail, + } as PerformanceEntry; + entries.push(entry); + MockPerformanceObserver.globalEntries = entries; + triggerObservers([entry]); + }, + ), getEntries: vi.fn(() => entries.slice()), From 67e004c713a186d39e9636cfa2b8f81829228079 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Wed, 14 Jan 2026 22:53:50 +0100 Subject: [PATCH 07/31] refactor: fix lint --- packages/utils/src/lib/file-sink-json-trace.int.test.ts | 2 +- packages/utils/src/lib/file-sink-json-trace.ts | 6 ++++-- packages/utils/src/lib/file-sink-json-trace.unit.test.ts | 5 ++++- packages/utils/src/lib/file-sink-jsonl.ts | 6 +++--- packages/utils/src/lib/file-sink-text.ts | 8 ++++++-- packages/utils/src/lib/trace-file-utils.ts | 8 ++++++-- 6 files changed, 24 insertions(+), 11 deletions(-) diff --git a/packages/utils/src/lib/file-sink-json-trace.int.test.ts b/packages/utils/src/lib/file-sink-json-trace.int.test.ts index b9cdb7f2e..de0bc213b 100644 --- a/packages/utils/src/lib/file-sink-json-trace.int.test.ts +++ b/packages/utils/src/lib/file-sink-json-trace.int.test.ts @@ -3,7 +3,7 @@ import * as os from 'node:os'; import * as path from 'node:path'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { teardownTestFolder } from '@code-pushup/test-utils'; -import { FileSinkJsonTrace } from './file-sink-json-trace'; +import { FileSinkJsonTrace } from './file-sink-json-trace.js'; import type { CompleteEvent, TraceEvent } from './trace-file.type'; describe('TraceFileSink integration', () => { diff --git a/packages/utils/src/lib/file-sink-json-trace.ts b/packages/utils/src/lib/file-sink-json-trace.ts index 6e201f133..880a1436b 100644 --- a/packages/utils/src/lib/file-sink-json-trace.ts +++ b/packages/utils/src/lib/file-sink-json-trace.ts @@ -37,7 +37,7 @@ export function finalizeTraceFile( events.sort((a, b) => a.ts - b.ts); const fallbackTs = performance.now(); const firstTs = events.length > 0 ? events[0].ts : fallbackTs; - const lastTs = events.length > 0 ? events[events.length - 1].ts : fallbackTs; + const lastTs = events.length > 0 ? events.at(-1).ts : fallbackTs; const marginMs = options?.marginMs ?? TRACE_MARGIN_MS; const marginDurMs = options?.marginDurMs ?? TRACE_MARGIN_DURATION_MS; @@ -182,7 +182,9 @@ export class FileSinkJsonTrace { } finalize(): void { - if (this.#finalized) return; + if (this.#finalized) { + return; + } this.#finalized = true; this.sink.finalize(); } diff --git a/packages/utils/src/lib/file-sink-json-trace.unit.test.ts b/packages/utils/src/lib/file-sink-json-trace.unit.test.ts index 30911b558..1b7ae244d 100644 --- a/packages/utils/src/lib/file-sink-json-trace.unit.test.ts +++ b/packages/utils/src/lib/file-sink-json-trace.unit.test.ts @@ -2,7 +2,10 @@ import { vol } from 'memfs'; import * as fs from 'node:fs'; import { beforeEach, describe, expect, it } from 'vitest'; import { MEMFS_VOLUME } from '@code-pushup/test-utils'; -import { FileSinkJsonTrace, finalizeTraceFile } from './file-sink-json-trace'; +import { + FileSinkJsonTrace, + finalizeTraceFile, +} from './file-sink-json-trace.js'; import { decodeTraceEvent, encodeTraceEvent, diff --git a/packages/utils/src/lib/file-sink-jsonl.ts b/packages/utils/src/lib/file-sink-jsonl.ts index c3e9f2a83..f2eeebd88 100644 --- a/packages/utils/src/lib/file-sink-jsonl.ts +++ b/packages/utils/src/lib/file-sink-jsonl.ts @@ -100,7 +100,7 @@ export class JsonlFile< * Encode record to JSONL format. */ encode(record: T): string { - return jsonlEncode(record) + '\n'; + return `${jsonlEncode(record)}\n`; } /** @@ -121,7 +121,7 @@ export class JsonlFile< * Write record in JSONL format (append-only). */ write(record: T): void { - this.file.append(jsonlEncode(record) + '\n'); + this.file.append(`${jsonlEncode(record)}\n`); } /** @@ -167,7 +167,7 @@ export class JsonlFile< const { records } = this.recover(); fs.writeFileSync( outputPath ?? this.getPath(), - records.map(jsonlEncode).join('\n') + '\n', + `${records.map(jsonlEncode).join('\n')}\n`, ); } } diff --git a/packages/utils/src/lib/file-sink-text.ts b/packages/utils/src/lib/file-sink-text.ts index 80c617be7..273608323 100644 --- a/packages/utils/src/lib/file-sink-text.ts +++ b/packages/utils/src/lib/file-sink-text.ts @@ -185,7 +185,9 @@ export class FileSink { * Open file for writing (creates directory if needed). */ open(withRepack?: boolean): void { - if (this.isOpen) return; + if (this.isOpen) { + return; + } const dir = path.dirname(this.file.getPath()); fs.mkdirSync(dir, { recursive: true }); @@ -202,7 +204,9 @@ export class FileSink { * Write input to file (append-only). */ write(input: T): void { - if (!this.isOpen) return; + if (!this.isOpen) { + return; + } try { const encoded = this.encode(input); diff --git a/packages/utils/src/lib/trace-file-utils.ts b/packages/utils/src/lib/trace-file-utils.ts index 09e028881..1f08054e8 100644 --- a/packages/utils/src/lib/trace-file-utils.ts +++ b/packages/utils/src/lib/trace-file-utils.ts @@ -310,7 +310,9 @@ export function encodeDetail(target: UserTimingDetail): UserTimingDetail { } export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { - if (!args) return rest as TraceEvent; + if (!args) { + return rest as TraceEvent; + } const processedArgs = decodeDetail(args as { detail: string }); if ('data' in args && args.data && typeof args.data === 'object') { @@ -326,7 +328,9 @@ export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { } export function encodeTraceEvent({ args, ...rest }: TraceEvent): TraceEventRaw { - if (!args) return rest as TraceEventRaw; + if (!args) { + return rest as TraceEventRaw; + } const processedArgs = encodeDetail(args); if ('data' in args && args.data && typeof args.data === 'object') { From 0b1fd3a922d69f69253d8a41325fe450b460a6f0 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Fri, 16 Jan 2026 23:01:48 +0100 Subject: [PATCH 08/31] refactor: wip --- packages/utils/mocks/sink.mock.ts | 8 +- .../utils/src/lib/file-sink-json-trace.ts | 195 ----------- .../utils/src/lib/file-sink-jsonl.int.test.ts | 138 -------- packages/utils/src/lib/file-sink-jsonl.ts | 173 ---------- .../src/lib/file-sink-jsonl.unit.test.ts | 241 -------------- .../utils/src/lib/file-sink-text.int.test.ts | 184 ----------- packages/utils/src/lib/file-sink-text.ts | 261 --------------- .../utils/src/lib/file-sink-text.unit.test.ts | 310 ------------------ packages/utils/src/lib/file-sink.ts | 156 +++++++++ packages/utils/src/lib/file-sink.unit.test.ts | 217 ++++++++++++ .../src/lib/performance-observer.int.test.ts | 6 +- .../utils/src/lib/performance-observer.ts | 9 +- .../src/lib/performance-observer.unit.test.ts | 6 +- .../file-sink-json-trace.int.test.ts | 3 +- .../src/lib/profiler/file-sink-json-trace.ts | 203 ++++++++++++ .../file-sink-json-trace.unit.test.ts | 200 +++++++++-- packages/utils/src/lib/profiler/profiler.ts | 118 ++++++- .../src/lib/profiler/profiler.unit.test.ts | 24 +- .../lib/{ => profiler}/trace-file-utils.ts | 46 ++- .../trace-file-utils.unit.test.ts | 0 .../src/lib/{ => profiler}/trace-file.type.ts | 10 +- packages/utils/src/lib/sink-source.type.ts | 75 ++++- 22 files changed, 1005 insertions(+), 1578 deletions(-) delete mode 100644 packages/utils/src/lib/file-sink-json-trace.ts delete mode 100644 packages/utils/src/lib/file-sink-jsonl.int.test.ts delete mode 100644 packages/utils/src/lib/file-sink-jsonl.ts delete mode 100644 packages/utils/src/lib/file-sink-jsonl.unit.test.ts delete mode 100644 packages/utils/src/lib/file-sink-text.int.test.ts delete mode 100644 packages/utils/src/lib/file-sink-text.ts delete mode 100644 packages/utils/src/lib/file-sink-text.unit.test.ts create mode 100644 packages/utils/src/lib/file-sink.ts create mode 100644 packages/utils/src/lib/file-sink.unit.test.ts rename packages/utils/src/lib/{ => profiler}/file-sink-json-trace.int.test.ts (98%) create mode 100644 packages/utils/src/lib/profiler/file-sink-json-trace.ts rename packages/utils/src/lib/{ => profiler}/file-sink-json-trace.unit.test.ts (67%) rename packages/utils/src/lib/{ => profiler}/trace-file-utils.ts (89%) rename packages/utils/src/lib/{ => profiler}/trace-file-utils.unit.test.ts (100%) rename packages/utils/src/lib/{ => profiler}/trace-file.type.ts (97%) diff --git a/packages/utils/mocks/sink.mock.ts b/packages/utils/mocks/sink.mock.ts index 13d89e91c..35a923351 100644 --- a/packages/utils/mocks/sink.mock.ts +++ b/packages/utils/mocks/sink.mock.ts @@ -1,6 +1,8 @@ import type { Sink } from '../src/lib/sink-source.type'; -export class MockSink implements Sink { +export class MockFileSink implements Sink { + setPath: (filePath: string) => void; + getPath: () => string; private writtenItems: string[] = []; private closed = false; @@ -27,4 +29,8 @@ export class MockSink implements Sink { getWrittenItems(): string[] { return [...this.writtenItems]; } + + clearWrittenItems(): void { + this.writtenItems = []; + } } diff --git a/packages/utils/src/lib/file-sink-json-trace.ts b/packages/utils/src/lib/file-sink-json-trace.ts deleted file mode 100644 index 880a1436b..000000000 --- a/packages/utils/src/lib/file-sink-json-trace.ts +++ /dev/null @@ -1,195 +0,0 @@ -import * as fs from 'node:fs'; -// Exception: finalization creates new JSON file -import * as path from 'node:path'; -import { performance } from 'node:perf_hooks'; -import { JsonlFile, recoverJsonlFile } from './file-sink-jsonl.js'; -import type { RecoverResult } from './sink-source.type.js'; -import { - decodeTraceEvent, - encodeTraceEvent, - getCompleteEvent, - getInstantEventTracingStartedInBrowser, -} from './trace-file-utils.js'; -import type { - InstantEvent, - SpanEvent, - TraceEvent, - TraceEventRaw, -} from './trace-file.type.js'; - -const TRACE_START_MARGIN_NAME = '[trace padding start]'; -const TRACE_END_MARGIN_NAME = '[trace padding end]'; -const TRACE_MARGIN_MS = 1000; -const TRACE_MARGIN_DURATION_MS = 20; - -export type FinalizeTraceFileOptions = { - marginMs?: number; - marginDurMs?: number; - startTime?: string | Date; -}; - -export function finalizeTraceFile( - events: (SpanEvent | InstantEvent)[], - outputPath: string, - metadata?: Record, - options?: FinalizeTraceFileOptions, -): void { - events.sort((a, b) => a.ts - b.ts); - const fallbackTs = performance.now(); - const firstTs = events.length > 0 ? events[0].ts : fallbackTs; - const lastTs = events.length > 0 ? events.at(-1).ts : fallbackTs; - - const marginMs = options?.marginMs ?? TRACE_MARGIN_MS; - const marginDurMs = options?.marginDurMs ?? TRACE_MARGIN_DURATION_MS; - - const startTs = firstTs - marginMs; - const endTs = lastTs + marginMs; - - const traceEvents: TraceEvent[] = [ - getInstantEventTracingStartedInBrowser({ ts: startTs, url: outputPath }), - getCompleteEvent({ - name: TRACE_START_MARGIN_NAME, - ts: startTs, - dur: marginDurMs, - }), - ...events, - getCompleteEvent({ - name: TRACE_END_MARGIN_NAME, - ts: endTs, - dur: marginDurMs, - }), - ]; - - const startTime = options?.startTime - ? typeof options.startTime === 'string' - ? options.startTime - : options.startTime.toISOString() - : new Date().toISOString(); - - fs.writeFileSync( - outputPath, - JSON.stringify({ - traceEvents, - displayTimeUnit: 'ms', - metadata: { - source: 'DevTools', - startTime, - hardwareConcurrency: 1, - dataOrigin: 'TraceEvents', - ...metadata, - }, - }), - 'utf8', - ); -} - -export type TraceFileSinkOptions = { - filename: string; - directory?: string; - metadata?: Record; - marginMs?: number; - marginDurMs?: number; - startTime?: string | Date; -}; - -export class FileSinkJsonTrace { - readonly #directory: string; - readonly #filename: string; - readonly #metadata: Record | undefined; - readonly #marginMs?: number; - readonly #marginDurMs?: number; - readonly #startTime?: string | Date; - private sink: JsonlFile; - #finalized = false; - - constructor(opts: TraceFileSinkOptions) { - const { - filename, - directory = '.', - metadata, - marginMs, - marginDurMs, - startTime, - } = opts; - const traceJsonlPath = path.join(directory, `${filename}.jsonl`); - - this.#directory = directory; - this.#filename = filename; - this.#metadata = metadata; - this.#marginMs = marginMs; - this.#marginDurMs = marginDurMs; - this.#startTime = startTime; - - this.sink = new JsonlFile({ - filePath: traceJsonlPath, - recover: () => recoverJsonlFile(traceJsonlPath), - finalize: () => { - const rawRecords = this.sink.recover().records; - // Decode raw events to proper TraceEvent format for finalization - const processedRecords = rawRecords.map(decodeTraceEvent); - finalizeTraceFile( - processedRecords as (SpanEvent | InstantEvent)[], - this.getFilePathForExt('json'), - this.#metadata, - { - marginMs: this.#marginMs, - marginDurMs: this.#marginDurMs, - startTime: this.#startTime, - }, - ); - }, - }); - } - - /** - * Open file for writing (no-op since JsonlFile opens lazily). - */ - open(): void { - // JsonlFile opens lazily on first write, so no-op here - } - - write(input: SpanEvent | InstantEvent): void { - const encodedEvent = encodeTraceEvent(input); - this.sink.write(encodedEvent); - } - - /** - * Read all events (strict parsing - throws on invalid JSON). - * For error-tolerant reading, use recover() instead. - */ - readAll(): (SpanEvent | InstantEvent)[] { - return this.sink.readAll().map(decodeTraceEvent) as ( - | SpanEvent - | InstantEvent - )[]; - } - - getFilePath(): string { - return this.sink.getPath(); - } - - close(): void { - this.sink.close(); - } - - recover(): RecoverResult { - const { records, errors, partialTail } = this.sink.recover(); - const processedRecords = records.map(decodeTraceEvent) as ( - | SpanEvent - | InstantEvent - )[]; - return { records: processedRecords, errors, partialTail }; - } - - finalize(): void { - if (this.#finalized) { - return; - } - this.#finalized = true; - this.sink.finalize(); - } - - getFilePathForExt(ext: 'json' | 'jsonl'): string { - return path.join(this.#directory, `${this.#filename}.${ext}`); - } -} diff --git a/packages/utils/src/lib/file-sink-jsonl.int.test.ts b/packages/utils/src/lib/file-sink-jsonl.int.test.ts deleted file mode 100644 index c0bae5503..000000000 --- a/packages/utils/src/lib/file-sink-jsonl.int.test.ts +++ /dev/null @@ -1,138 +0,0 @@ -import * as fs from 'node:fs'; -import * as os from 'node:os'; -import * as path from 'node:path'; -import { afterAll, beforeAll, describe, expect, it } from 'vitest'; -import { teardownTestFolder } from '@code-pushup/test-utils'; -import { JsonlFile, recoverJsonlFile } from './file-sink-jsonl.js'; - -describe('JsonlFile integration', () => { - const baseDir = path.join(os.tmpdir(), 'file-sink-json-int-tests'); - const testFile = path.join(baseDir, 'test-data.jsonl'); - - beforeAll(async () => { - await fs.promises.mkdir(baseDir, { recursive: true }); - }); - - beforeEach(async () => { - try { - await fs.promises.unlink(testFile); - } catch { - // File doesn't exist, which is fine - } - }); - - afterAll(async () => { - await teardownTestFolder(baseDir); - }); - - describe('file operations', () => { - const testData = [ - { id: 1, name: 'Alice', active: true }, - { id: 2, name: 'Bob', active: false }, - { id: 3, name: 'Charlie', active: true }, - ]; - - it('should write and read JSONL files', async () => { - const sink = new JsonlFile({ filePath: testFile }); - - // Open and write data - sink.open(); - testData.forEach(item => sink.write(item)); - sink.close(); - - expect(fs.existsSync(testFile)).toBe(true); - const fileContent = fs.readFileSync(testFile, 'utf8'); - const lines = fileContent.trim().split('\n'); - expect(lines).toStrictEqual([ - '{"id":1,"name":"Alice","active":true}', - '{"id":2,"name":"Bob","active":false}', - '{"id":3,"name":"Charlie","active":true}', - ]); - - lines.forEach((line, index) => { - const parsed = JSON.parse(line); - expect(parsed).toStrictEqual(testData[index]); - }); - }); - - it('should recover data from JSONL files', async () => { - const jsonlContent = `${testData.map(item => JSON.stringify(item)).join('\n')}\n`; - fs.writeFileSync(testFile, jsonlContent); - - expect(recoverJsonlFile(testFile)).toStrictEqual({ - records: testData, - errors: [], - partialTail: null, - }); - }); - - it('should handle JSONL files with parse errors', async () => { - const mixedContent = - '{"id":1,"name":"Alice"}\n' + - 'invalid json line\n' + - '{"id":2,"name":"Bob"}\n' + - '{"id":3,"name":"Charlie","incomplete":\n'; - - fs.writeFileSync(testFile, mixedContent); - - expect(recoverJsonlFile(testFile)).toStrictEqual({ - records: [ - { id: 1, name: 'Alice' }, - { id: 2, name: 'Bob' }, - ], - errors: [ - expect.objectContaining({ line: 'invalid json line' }), - expect.objectContaining({ - line: '{"id":3,"name":"Charlie","incomplete":', - }), - ], - partialTail: '{"id":3,"name":"Charlie","incomplete":', - }); - }); - - it('should recover data using JsonlFileSink.recover()', async () => { - const sink = new JsonlFile({ filePath: testFile }); - sink.open(); - testData.forEach(item => sink.write(item)); - sink.close(); - - expect(sink.recover()).toStrictEqual({ - records: testData, - errors: [], - partialTail: null, - }); - }); - - describe('edge cases', () => { - it('should handle empty files', async () => { - fs.writeFileSync(testFile, ''); - - expect(recoverJsonlFile(testFile)).toStrictEqual({ - records: [], - errors: [], - partialTail: null, - }); - }); - - it('should handle files with only whitespace', async () => { - fs.writeFileSync(testFile, ' \n \n\t\n'); - - expect(recoverJsonlFile(testFile)).toStrictEqual({ - records: [], - errors: [], - partialTail: null, - }); - }); - - it('should handle non-existent files', async () => { - const nonExistentFile = path.join(baseDir, 'does-not-exist.jsonl'); - - expect(recoverJsonlFile(nonExistentFile)).toStrictEqual({ - records: [], - errors: [], - partialTail: null, - }); - }); - }); - }); -}); diff --git a/packages/utils/src/lib/file-sink-jsonl.ts b/packages/utils/src/lib/file-sink-jsonl.ts deleted file mode 100644 index f2eeebd88..000000000 --- a/packages/utils/src/lib/file-sink-jsonl.ts +++ /dev/null @@ -1,173 +0,0 @@ -import * as fs from 'node:fs'; -import { TextFileSink } from './file-sink-text.js'; -import type { RecoverOptions, RecoverResult } from './sink-source.type.js'; - -/** - * JSONL encoding functions - single source of truth for JSONL format. - */ -export const jsonlEncode = < - T extends Record = Record, ->( - input: T, -): string => JSON.stringify(input); - -export const jsonlDecode = < - T extends Record = Record, ->( - raw: string, -): T => JSON.parse(raw) as T; - -export function recoverJsonlFile< - T extends Record = Record, ->(filePath: string, opts: RecoverOptions = {}): RecoverResult { - const records: T[] = []; - const errors: { lineNo: number; line: string; error: Error }[] = []; - let partialTail: string | null = null; - - try { - const content = fs.readFileSync(filePath, 'utf8'); - const lines = content.split('\n'); - let lineNo = 0; - - for (const line of lines) { - lineNo++; - const trimmedLine = line.trim(); - if (!trimmedLine) { - continue; - } - - try { - const record = jsonlDecode(trimmedLine); - records.push(record); - } catch (error) { - const info = { lineNo, line, error: error as Error }; - errors.push(info); - - if (opts.keepInvalid) { - records.push({ __invalid: true, lineNo, line } as any); - } - - partialTail = line; - } - - // Optional: perfect tail detection for empty lines at EOF - if (trimmedLine === '' && lineNo === lines.length) { - partialTail = line; - } - } - } catch { - return { records: [], errors: [], partialTail: null }; - } - - return { records, errors, partialTail }; -} - -export type JsonlFileOptions> = { - filePath: string; - recover?: () => RecoverResult; - finalize?: () => void; -}; - -/** - * JSONL writer using composition: Transport + Encoding + Recovery policy. - * Writes are append-only. - * - * JsonlFile opens the underlying file lazily on first write and keeps it open - * until close() or finalize() is called. - * - * Design rules: - * - "Extend types only when substitutable" - * - "Reuse behavior via composition" - * - "Transport ≠ format ≠ recovery" - */ -export class JsonlFile< - T extends Record = Record, -> { - private file: TextFileSink; - - constructor(options: JsonlFileOptions) { - const { filePath } = options; - this.file = new TextFileSink(filePath); - - // Recovery policy - JSONL-specific, customizable - this.recover = options.recover ?? (() => recoverJsonlFile(filePath)); - - // Finalization policy - defaults to close() for cleanup - this.finalize = options.finalize ?? (() => this.close()); - } - - /** - * Encode record to JSONL format. - */ - encode(record: T): string { - return `${jsonlEncode(record)}\n`; - } - - /** - * Decode JSONL string to record. - */ - decode(jsonlString: string): T { - return jsonlDecode(jsonlString); - } - - /** - * Open file for writing (no-op since TextFileSink opens lazily). - */ - open(): void { - // TextFileSink opens lazily on first write, so no-op here - } - - /** - * Write record in JSONL format (append-only). - */ - write(record: T): void { - this.file.append(`${jsonlEncode(record)}\n`); - } - - /** - * Read all records as parsed array (strict - throws on invalid JSON). - */ - readAll(): T[] { - return this.file - .readAll() - .split('\n') - .filter(Boolean) - .map(line => jsonlDecode(line)); - } - - /** - * Recover records with error handling (tolerant parsing). - * Handles invalid records gracefully, returns errors alongside valid data. - */ - recover: () => RecoverResult; - - /** - * Finalization - defaults to close() for cleanup. - */ - finalize: () => void; - - /** - * Get file path. - */ - getPath(): string { - return this.file.getPath(); - } - - /** - * Close file. - */ - close(): void { - this.file.close(); - } - - /** - * Repack file with clean JSONL formatting. - */ - repack(outputPath?: string): void { - const { records } = this.recover(); - fs.writeFileSync( - outputPath ?? this.getPath(), - `${records.map(jsonlEncode).join('\n')}\n`, - ); - } -} diff --git a/packages/utils/src/lib/file-sink-jsonl.unit.test.ts b/packages/utils/src/lib/file-sink-jsonl.unit.test.ts deleted file mode 100644 index 7d775820f..000000000 --- a/packages/utils/src/lib/file-sink-jsonl.unit.test.ts +++ /dev/null @@ -1,241 +0,0 @@ -import { vol } from 'memfs'; -import * as fs from 'node:fs'; -import { beforeEach, describe, expect, it } from 'vitest'; -import { MEMFS_VOLUME } from '@code-pushup/test-utils'; -import { - JsonlFile, - jsonlDecode, - jsonlEncode, - recoverJsonlFile, -} from './file-sink-jsonl.js'; - -describe('jsonlEncode', () => { - it('should encode object to JSON string', () => { - const obj = { key: 'value', number: 42 }; - expect(jsonlEncode(obj)).toBe(JSON.stringify(obj)); - }); - - it('should handle nested objects', () => { - const obj = { nested: { deep: 'value' }, array: [1, 2, 3] }; - expect(jsonlEncode(obj)).toBe(JSON.stringify(obj)); - }); - - it('should handle empty object', () => { - expect(jsonlEncode({})).toBe('{}'); - }); -}); - -describe('jsonlDecode', () => { - it('should decode JSON string to object', () => { - const obj = { key: 'value', number: 42 }; - const jsonStr = `${JSON.stringify(obj)}\n`; - expect(jsonlDecode(jsonStr)).toStrictEqual(obj); - }); - - it('should handle nested objects', () => { - const obj = { nested: { deep: 'value' }, array: [1, 2, 3] }; - const jsonStr = `${JSON.stringify(obj)}\n`; - expect(jsonlDecode(jsonStr)).toStrictEqual(obj); - }); - - it('should trim whitespace before parsing', () => { - const obj = { key: 'value' }; - const jsonStr = ` ${JSON.stringify(obj)} \n`; - expect(jsonlDecode(jsonStr)).toStrictEqual(obj); - }); - - it('should throw on invalid JSON', () => { - expect(() => jsonlDecode('invalid json\n')).toThrow('Unexpected token'); - }); - - it('should handle Buffer input', () => { - const obj = { key: 'value', number: 42 }; - const jsonStr = `${JSON.stringify(obj)}\n`; - expect(jsonlDecode(Buffer.from(jsonStr))).toStrictEqual(obj); - }); - - it('should handle primitive JSON values', () => { - expect(jsonlDecode('"string"\n')).toBe('string'); - expect(jsonlDecode('42\n')).toBe(42); - expect(jsonlDecode('true\n')).toBe(true); - expect(jsonlDecode('null\n')).toBeNull(); - }); -}); - -describe('recoverJsonlFile', () => { - beforeEach(() => { - vol.fromJSON( - { - '/tmp': null, - }, - MEMFS_VOLUME, - ); - }); - - it('should recover JSONL file with single object', () => { - const filePath = '/tmp/recover-single.jsonl'; - const obj = { key: 'value', number: 42 }; - fs.writeFileSync(filePath, `${JSON.stringify(obj)}\n`); - - expect(recoverJsonlFile(filePath)).toStrictEqual({ - records: [obj], - errors: [], - partialTail: null, - }); - }); - - it('should recover JSONL file with multiple objects', () => { - const filePath = '/tmp/recover-multi.jsonl'; - const obj1 = { id: 1, name: 'first' }; - const obj2 = { id: 2, name: 'second' }; - fs.writeFileSync( - filePath, - `${JSON.stringify(obj1)}\n${JSON.stringify(obj2)}\n`, - ); - - expect(recoverJsonlFile(filePath)).toStrictEqual({ - records: [obj1, obj2], - errors: [], - partialTail: null, - }); - }); - - it('should handle JSON parsing errors', () => { - const filePath = '/tmp/recover-error.jsonl'; - fs.writeFileSync( - filePath, - '{"valid": "json"}\ninvalid json line\n{"id":3,"name":"Charlie","incomplete":\n', - ); - - const result = recoverJsonlFile(filePath); - expect(result.records).toStrictEqual([{ valid: 'json' }]); - expect(result.errors).toStrictEqual([ - expect.objectContaining({ line: 'invalid json line' }), - expect.objectContaining({ - line: '{"id":3,"name":"Charlie","incomplete":', - }), - ]); - expect(result.partialTail).toBe('{"id":3,"name":"Charlie","incomplete":'); - }); - - it('should support keepInvalid option', () => { - const filePath = '/tmp/recover-keep-invalid.jsonl'; - fs.writeFileSync(filePath, '{"valid": "json"}\ninvalid json\n'); - - const result = recoverJsonlFile(filePath, { keepInvalid: true }); - expect(result.records).toStrictEqual([ - { valid: 'json' }, - { __invalid: true, lineNo: 2, line: 'invalid json' }, - ]); - expect(result.errors).toHaveLength(1); - }); - - it('should handle empty files', () => { - const filePath = '/tmp/recover-empty.jsonl'; - fs.writeFileSync(filePath, ''); - - expect(recoverJsonlFile(filePath)).toStrictEqual({ - records: [], - errors: [], - partialTail: null, - }); - }); - - it('should handle file read errors gracefully', () => { - expect(recoverJsonlFile('/nonexistent/file.jsonl')).toStrictEqual({ - records: [], - errors: [], - partialTail: null, - }); - }); -}); - -describe('JsonlFile', () => { - beforeEach(() => { - vol.fromJSON( - { - '/tmp': null, - }, - MEMFS_VOLUME, - ); - }); - - type JsonObj = { key: string; number: number }; - - it('should encode objects as JSON', () => { - const sink = new JsonlFile({ - filePath: '/tmp/jsonl-test.jsonl', - }); - const obj = { key: 'value', number: 42 }; - expect(sink.encode(obj)).toBe(`${JSON.stringify(obj)}\n`); - }); - - it('should decode JSON strings to objects', () => { - const sink = new JsonlFile({ - filePath: '/tmp/jsonl-test.jsonl', - }); - const obj = { key: 'value', number: 42 }; - const jsonStr = `${JSON.stringify(obj)}\n`; - expect(sink.decode(jsonStr)).toStrictEqual(obj); - }); - - it('should handle file operations with JSONL format', () => { - const filePath = '/tmp/jsonl-file-ops-test.jsonl'; - const sink = new JsonlFile({ filePath }); - - const obj1 = { key: 'value', number: 42 }; - const obj2 = { key: 'value', number: 42 }; - sink.write(obj1); - sink.write(obj2); - sink.close(); - - const recovered = sink.recover(); - expect(recovered.records).toStrictEqual([obj1, obj2]); - }); - - it('repack() should recover records and write them to output path', () => { - const filePath = '/tmp/jsonl-repack-test.jsonl'; - const sink = new JsonlFile({ filePath }); - const records = [ - { key: 'value', number: 42 }, - { key: 'value', number: 42 }, - ]; - - fs.writeFileSync( - filePath, - `${records.map(record => JSON.stringify(record)).join('\n')}\n`, - ); - - sink.repack(); - expect(fs.readFileSync(filePath, 'utf8')).toBe( - `${JSON.stringify(records[0])}\n${JSON.stringify(records[1])}\n`, - ); - }); - - it('repack() should accept output path', () => { - const filePath = '/tmp/jsonl-repack-test.jsonl'; - const sink = new JsonlFile({ filePath }); - const records = [ - { key: 'value', number: 42 }, - { key: 'value', number: 42 }, - ]; - - fs.writeFileSync( - filePath, - `${records.map(record => JSON.stringify(record)).join('\n')}\n`, - ); - - const outputPath = '/tmp/jsonl-repack-output.jsonl'; - sink.repack(outputPath); - expect(fs.readFileSync(outputPath, 'utf8')).toBe( - `${JSON.stringify(records[0])}\n${JSON.stringify(records[1])}\n`, - ); - }); - - it('should do nothing on finalize()', () => { - const sink = new JsonlFile({ - filePath: '/tmp/jsonl-finalize-test.jsonl', - }); - expect(() => sink.finalize()).not.toThrow(); - }); -}); diff --git a/packages/utils/src/lib/file-sink-text.int.test.ts b/packages/utils/src/lib/file-sink-text.int.test.ts deleted file mode 100644 index 19ea34fb0..000000000 --- a/packages/utils/src/lib/file-sink-text.int.test.ts +++ /dev/null @@ -1,184 +0,0 @@ -import * as fs from 'node:fs'; -import * as os from 'node:os'; -import * as path from 'node:path'; -import { afterAll, beforeAll, describe, expect, it } from 'vitest'; -import { teardownTestFolder } from '@code-pushup/test-utils'; -import { FileSink, stringRecover } from './file-sink-text.js'; - -describe('FileSink integration', () => { - const baseDir = path.join(os.tmpdir(), 'file-sink-text-int-tests'); - const testFile = path.join(baseDir, 'test-data.txt'); - - beforeAll(async () => { - await fs.promises.mkdir(baseDir, { recursive: true }); - }); - - beforeEach(async () => { - try { - await fs.promises.unlink(testFile); - } catch { - // File doesn't exist, which is fine - } - }); - - afterAll(async () => { - await teardownTestFolder(baseDir); - }); - - describe('file operations', () => { - const testData = ['line1', 'line2', 'line3']; - - it('should write and read text files', async () => { - const sink = new FileSink({ - filePath: testFile, - recover: () => stringRecover(testFile, (line: string) => line), - }); - - // Open and write data - sink.open(); - testData.forEach(item => sink.write(item)); - sink.close(); - - expect(fs.existsSync(testFile)).toBe(true); - const fileContent = fs.readFileSync(testFile, 'utf8'); - const lines = fileContent.trim().split('\n'); - expect(lines).toStrictEqual(testData); - - lines.forEach((line, index) => { - expect(line).toStrictEqual(testData[index]); - }); - }); - - it('should recover data from text files', async () => { - const content = `${testData.join('\n')}\n`; - fs.writeFileSync(testFile, content); - - expect(stringRecover(testFile, (line: string) => line)).toStrictEqual({ - records: testData, - errors: [], - partialTail: null, - }); - }); - - it('should handle text files with parse errors', async () => { - const mixedContent = 'valid\ninvalid\nanother\n'; - fs.writeFileSync(testFile, mixedContent); - - expect( - stringRecover(testFile, (line: string) => { - if (line === 'invalid') throw new Error('Invalid line'); - return line.toUpperCase(); - }), - ).toStrictEqual({ - records: ['VALID', 'ANOTHER'], - errors: [ - expect.objectContaining({ - lineNo: 2, - line: 'invalid', - error: expect.any(Error), - }), - ], - partialTail: 'invalid', - }); - }); - - it('should repack file with recovered data', async () => { - const sink = new FileSink({ - filePath: testFile, - recover: () => stringRecover(testFile, (line: string) => line), - }); - - // Write initial data - sink.open(); - testData.forEach(item => sink.write(item)); - sink.close(); - - // Repack to the same file - sink.repack(); - - // Verify the content is still correct - const fileContent = fs.readFileSync(testFile, 'utf8'); - const lines = fileContent - .trim() - .split('\n') - .filter(line => line.length > 0); - expect(lines).toStrictEqual(testData); - }); - - it('should repack file to different output path', async () => { - const outputPath = path.join(baseDir, 'repacked.txt'); - const sink = new FileSink({ - filePath: testFile, - recover: () => stringRecover(testFile, (line: string) => line), - }); - - // Write initial data - sink.open(); - testData.forEach(item => sink.write(item)); - sink.close(); - - // Repack to different file - sink.repack(outputPath); - - // Verify the original file is unchanged - expect(fs.existsSync(testFile)).toBe(true); - - // Verify the repacked file has correct content - expect(fs.existsSync(outputPath)).toBe(true); - const fileContent = fs.readFileSync(outputPath, 'utf8'); - const lines = fileContent - .trim() - .split('\n') - .filter(line => line.length > 0); - expect(lines).toStrictEqual(testData); - }); - - it('should call finalize function when provided', async () => { - let finalized = false; - const sink = new FileSink({ - filePath: testFile, - recover: () => stringRecover(testFile, (line: string) => line), - finalize: () => { - finalized = true; - }, - }); - - sink.finalize(); - expect(finalized).toBe(true); - }); - }); - - describe('edge cases', () => { - it('should handle empty files', async () => { - fs.writeFileSync(testFile, ''); - - expect(stringRecover(testFile, (line: string) => line)).toStrictEqual({ - records: [], - errors: [], - partialTail: null, - }); - }); - - it('should handle files with only whitespace', async () => { - fs.writeFileSync(testFile, ' \n \n\t\n'); - - expect(stringRecover(testFile, (line: string) => line)).toStrictEqual({ - records: [], - errors: [], - partialTail: null, - }); - }); - - it('should handle non-existent files', async () => { - const nonExistentFile = path.join(baseDir, 'does-not-exist.txt'); - - expect( - stringRecover(nonExistentFile, (line: string) => line), - ).toStrictEqual({ - records: [], - errors: [], - partialTail: null, - }); - }); - }); -}); diff --git a/packages/utils/src/lib/file-sink-text.ts b/packages/utils/src/lib/file-sink-text.ts deleted file mode 100644 index 273608323..000000000 --- a/packages/utils/src/lib/file-sink-text.ts +++ /dev/null @@ -1,261 +0,0 @@ -/** - * Simple Text File Sink - * - * Basic file operations for text files. Used as the foundation for format-specific writers. - * If you need JSONL files, use JsonlFile from file-sink-jsonl.ts instead. - */ -import * as fs from 'node:fs'; -import * as path from 'node:path'; -import type { RecoverOptions, RecoverResult } from './sink-source.type.js'; - -/** - * Simple text file sink - reusable for basic file operations. - * One responsibility: append text, read all text, get path. - */ -export class TextFileSink { - #fd: number | null = null; - - constructor(private filePath: string) {} - - /** - * Append text to file (append-only). - */ - append(text: string): void { - // Lazy open on first write - if (this.#fd === null) { - const dir = path.dirname(this.filePath); - fs.mkdirSync(dir, { recursive: true }); - this.#fd = fs.openSync(this.filePath, 'a'); - } - fs.writeSync(this.#fd, text); - } - - /** - * Read entire file as string. - */ - readAll(): string { - try { - return fs.readFileSync(this.filePath, 'utf8'); - } catch { - return ''; - } - } - - /** - * Get file path. - */ - getPath(): string { - return this.filePath; - } - - /** - * Close file descriptor. - */ - close(): void { - if (this.#fd !== null) { - fs.closeSync(this.#fd); - this.#fd = null; - } - } -} - -/** - * String encoding functions - single source of truth for string format. - */ -export const stringEncode = (input: unknown): string => { - if (typeof input === 'string') { - return `${input}\n`; - } - return `${JSON.stringify(input)}\n`; -}; - -export const stringDecode = (input: string | Buffer): string => { - if (Buffer.isBuffer(input)) { - return input.toString('utf8'); - } - return input; -}; - -export function stringRecover( - filePath: string, - decodeFn: (line: string) => T, - opts: RecoverOptions = {}, -): RecoverResult { - const records: T[] = []; - const errors: { lineNo: number; line: string; error: Error }[] = []; - let partialTail: string | null = null; - - try { - const content = fs.readFileSync(filePath, 'utf8'); - const lines = content.split('\n'); - let lineNo = 0; - - for (const line of lines) { - lineNo++; - const trimmedLine = line.trim(); - if (!trimmedLine) { - continue; - } - - try { - const record = decodeFn(trimmedLine); - records.push(record); - } catch (error) { - const info = { lineNo, line, error: error as Error }; - errors.push(info); - - if (opts.keepInvalid) { - records.push({ __invalid: true, lineNo, line } as any); - } - - partialTail = line; - } - - // Optional: perfect tail detection for empty lines at EOF - if (trimmedLine === '' && lineNo === lines.length) { - partialTail = line; - } - } - } catch { - return { records: [], errors: [], partialTail: null }; - } - - return { records, errors, partialTail }; -} - -export type FileSinkOptions = { - filePath: string; - recover?: () => RecoverResult; - finalize?: () => void; -}; - -/** - * String file sink using composition: Transport + Encoding + Recovery policy. - * Writes are append-only. - * - * FileSink opens the underlying file lazily on first write and keeps it open - * until close() or finalize() is called. - * - * Design rules: - * - "Extend types only when substitutable" - * - "Reuse behavior via composition" - * - "Transport ≠ format ≠ recovery" - */ -export class FileSink { - private file: TextFileSink; - private isOpen = false; - private fd: number | null = null; - - constructor(public options: FileSinkOptions) { - const { filePath } = options; - this.file = new TextFileSink(filePath); - - // Recovery policy - string-specific, customizable - this.recover = - options.recover ?? - (() => stringRecover(filePath, (line: string) => line as T)); - - // Finalization policy - defaults to close() for cleanup - this.finalize = options.finalize ?? (() => this.close()); - } - - /** - * Encode input to string format. - */ - encode(input: T): string { - return stringEncode(input); - } - - /** - * Decode string to output type. - */ - decode(output: string | Buffer): T { - const str = stringDecode(output); - return str as T; - } - - /** - * Get file path. - */ - getFilePath(): string { - return this.file.getPath(); - } - - /** - * Open file for writing (creates directory if needed). - */ - open(withRepack?: boolean): void { - if (this.isOpen) { - return; - } - - const dir = path.dirname(this.file.getPath()); - fs.mkdirSync(dir, { recursive: true }); - - if (withRepack) { - this.repack(this.file.getPath()); - } - - this.fd = fs.openSync(this.file.getPath(), 'a'); - this.isOpen = true; - } - - /** - * Write input to file (append-only). - */ - write(input: T): void { - if (!this.isOpen) { - return; - } - - try { - const encoded = this.encode(input); - fs.writeSync(this.fd!, encoded); - } catch { - // Silently ignore write errors - } - } - - /** - * Close file descriptor. - */ - close(): void { - if (this.fd !== null) { - fs.closeSync(this.fd); - this.fd = null; - } - this.isOpen = false; - } - - /** - * Check if sink is closed. - */ - isClosed(): boolean { - return !this.isOpen; - } - - /** - * Recover records with error handling (tolerant parsing). - * Handles invalid records gracefully, returns errors alongside valid data. - */ - recover: () => RecoverResult; - - /** - * Repack file with clean formatting. - */ - repack(outputPath?: string): void { - const { records } = this.recover(); - const targetPath = outputPath ?? this.getFilePath(); - const dir = path.dirname(targetPath); - fs.mkdirSync(dir, { recursive: true }); - fs.writeFileSync( - targetPath, - records.map(record => this.encode(record)).join(''), - ); - } - - /** - * Finalization - defaults to close() for cleanup. - */ - finalize: () => void; -} diff --git a/packages/utils/src/lib/file-sink-text.unit.test.ts b/packages/utils/src/lib/file-sink-text.unit.test.ts deleted file mode 100644 index eb1b17749..000000000 --- a/packages/utils/src/lib/file-sink-text.unit.test.ts +++ /dev/null @@ -1,310 +0,0 @@ -import { vol } from 'memfs'; -import * as fs from 'node:fs'; -import { beforeEach, describe, expect, it, vi } from 'vitest'; -import { MEMFS_VOLUME } from '@code-pushup/test-utils'; -import { - FileSink, - type FileSinkOptions, - stringDecode, - stringEncode, - stringRecover, -} from './file-sink-text.js'; - -describe('stringEncode', () => { - it('stringEncode() should encode string input with newline', () => { - const str = 'test string'; - expect(stringEncode(str)).toBe(`${str}\n`); - }); - - it('stringEncode() should encode non-string input as JSON with newline', () => { - const obj = { key: 'value', number: 42 }; - expect(stringEncode(obj)).toBe(`${JSON.stringify(obj)}\n`); - }); - - it('stringEncode() should handle null input', () => { - expect(stringEncode(null)).toBe('null\n'); - }); - - it('stringEncode() should handle undefined input', () => { - expect(stringEncode(undefined)).toBe('undefined\n'); - }); -}); - -describe('stringDecode', () => { - it('stringDecode() should decode Buffer to string', () => { - const str = 'test content'; - expect(stringDecode(Buffer.from(str))).toBe(str); - }); - - it('stringDecode() should return string input as-is', () => { - const str = 'test string'; - expect(stringDecode(str)).toBe(str); - }); -}); - -describe('stringRecover', () => { - it('stringRecover() should recover records from valid file content', () => { - const filePath = '/tmp/stringRecover-test.txt'; - vol.fromJSON({ - [filePath]: 'line1\nline2\nline3\n', - }); - - expect(stringRecover(filePath, (line: string) => line)).toStrictEqual({ - records: ['line1', 'line2', 'line3'], - errors: [], - partialTail: null, - }); - }); - - it('stringRecover() should recover records and apply decode function', () => { - const filePath = '/tmp/stringRecover-test.txt'; - vol.fromJSON({ - [filePath]: 'line1\nline2\nline3\n', - }); - - expect( - stringRecover(filePath, (line: string) => line.toUpperCase()), - ).toStrictEqual({ - records: ['LINE1', 'LINE2', 'LINE3'], - errors: [], - partialTail: null, - }); - }); - - it('stringRecover() should skip empty lines', () => { - const filePath = '/tmp/stringRecover-empty-test.txt'; - vol.fromJSON({ - [filePath]: 'line1\n\nline2\n', - }); - - expect(stringRecover(filePath, (line: string) => line)).toStrictEqual({ - records: ['line1', 'line2'], - errors: [], - partialTail: null, - }); - }); - - it('stringRecover() should handle decode errors and continue processing', () => { - const filePath = '/tmp/stringRecover-error-test.txt'; - vol.fromJSON({ - [filePath]: 'valid\ninvalid\nanother', - }); - - expect( - stringRecover(filePath, (line: string) => { - if (line === 'invalid') throw new Error('Invalid line'); - return line.toUpperCase(); - }), - ).toStrictEqual({ - records: ['VALID', 'ANOTHER'], - errors: [ - { - lineNo: 2, - line: 'invalid', - error: expect.any(Error), - }, - ], - partialTail: 'invalid', - }); - }); - - it('stringRecover() should include invalid records when keepInvalid option is true', () => { - const filePath = '/tmp/stringRecover-invalid-test.txt'; - vol.fromJSON({ - [filePath]: 'valid\ninvalid\n', - }); - - expect( - stringRecover( - filePath, - (line: string) => { - if (line === 'invalid') throw new Error('Invalid line'); - return line.toUpperCase(); - }, - { keepInvalid: true }, - ), - ).toStrictEqual({ - records: ['VALID', { __invalid: true, lineNo: 2, line: 'invalid' }], - errors: [expect.any(Object)], - partialTail: 'invalid', - }); - }); - - it('stringRecover() should handle file read errors gracefully', () => { - expect( - stringRecover('/nonexistent/file.txt', (line: string) => line), - ).toStrictEqual({ - records: [], - errors: [], - partialTail: null, - }); - }); -}); - -describe('FileSink', () => { - it('constructor should create instance with options', () => { - const options: FileSinkOptions = { - filePath: '/tmp/test-file.txt', - recover: vi - .fn() - .mockReturnValue({ records: [], errors: [], partialTail: null }), - finalize: vi.fn(), - }; - expect(new FileSink(options).options).toBe(options); - }); - - it('getFilePath() should return the file path', () => { - const filePath = '/tmp/test-file.txt'; - const sink = new FileSink({ filePath }); - expect(sink.getFilePath()).toBe(filePath); - }); - - it('encode() should encode input using stringEncode', () => { - const sink = new FileSink({ filePath: '/tmp/test.txt' }); - const str = 'test input'; - expect(sink.encode(str)).toBe(`${str}\n`); - }); - - it('decode() should decode output using stringDecode', () => { - const sink = new FileSink({ filePath: '/tmp/test.txt' }); - const str = 'test output'; - expect(sink.decode(str)).toBe(str); - }); - - it('open() should handle directory creation and file opening', () => { - const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); - sink.open(); - expect(fs.existsSync('/tmp/test-file.txt')).toBe(true); - }); - - it('open() should repack file when withRepack is true', () => { - const sink = new FileSink({ - filePath: '/tmp/test-file.txt', - recover: vi - .fn() - .mockReturnValue({ records: [], errors: [], partialTail: null }), - }); - const spy = vi.spyOn(sink, 'repack'); - sink.open(true); - expect(spy).toHaveBeenCalledWith('/tmp/test-file.txt'); - }); - - it('close() should close file descriptor if open', () => { - const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); - sink.open(); - expect(() => sink.close()).not.toThrow(); - }); - - it('close() should do nothing if file descriptor is not open', () => { - const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); - expect(() => sink.close()).not.toThrow(); - }); - - it('write() should write encoded input to file when sink is open', () => { - const sink = new FileSink({ filePath: '/tmp/write-open-unique-test.txt' }); - sink.open(); - const str = 'test data'; - sink.write(str); - expect(fs.readFileSync('/tmp/write-open-unique-test.txt', 'utf8')).toBe( - `${str}\n`, - ); - }); - - it('write() should silently ignore writes when file descriptor is not open', () => { - const sink = new FileSink({ filePath: '/tmp/write-test-closed.txt' }); - expect(() => sink.write('test data')).not.toThrow(); - }); - - it('write() should silently ignore write errors when fs.writeSync throws', () => { - const sink = new FileSink({ filePath: '/tmp/write-error-test.txt' }); - sink.open(); - - // Mock fs.writeSync to throw an error - const writeSyncSpy = vi.spyOn(fs, 'writeSync').mockImplementation(() => { - throw new Error('Write error'); - }); - - try { - // This should not throw despite the write error - expect(() => sink.write('test data')).not.toThrow(); - } finally { - // Restore original function - writeSyncSpy.mockRestore(); - sink.close(); - } - }); - - it('recover() should call the recover function from options', () => { - const mockRecover = vi - .fn() - .mockReturnValue({ records: ['test'], errors: [], partialTail: null }); - const sink = new FileSink({ - filePath: '/tmp/test-file.txt', - recover: mockRecover, - }); - expect(sink.recover()).toStrictEqual({ - records: ['test'], - errors: [], - partialTail: null, - }); - expect(mockRecover).toHaveBeenCalledWith(); - }); - - it('repack() should recover records and write them to output path', () => { - const mockRecover = vi.fn(); - const filePath = '/tmp/test-file.txt'; - const sink = new FileSink({ - filePath, - recover: mockRecover, - }); - const records = ['record1', 'record2']; - mockRecover.mockReturnValue({ records, errors: [], partialTail: null }); - - sink.repack(); - expect(mockRecover).toHaveBeenCalled(); - expect(fs.readFileSync(filePath, 'utf8')).toBe('record1\nrecord2\n'); - }); - - it('repack() should accept output path', () => { - const mockRecover = vi.fn(); - const sink = new FileSink({ - filePath: '/tmp/test-file.txt', - recover: mockRecover, - }); - const records = ['record1', 'record2']; - mockRecover.mockReturnValue({ records, errors: [], partialTail: null }); - const outputPath = '/tmp/repack-output.txt'; - sink.repack(outputPath); - expect(mockRecover).toHaveBeenCalled(); - expect(fs.readFileSync(outputPath, 'utf8')).toBe('record1\nrecord2\n'); - }); - - it('finalize() should call the finalize function from options', () => { - const mockFinalize = vi.fn(); - const sink = new FileSink({ - filePath: '/tmp/test-file.txt', - finalize: mockFinalize, - }); - sink.finalize(); - expect(mockFinalize).toHaveBeenCalledTimes(1); - }); - - it('isClosed() should return true when sink is not opened', () => { - const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); - expect(sink.isClosed()).toBe(true); - }); - - it('isClosed() should return false when sink is opened', () => { - const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); - sink.open(); - expect(sink.isClosed()).toBe(false); - }); - - it('isClosed() should return true when sink is closed after being opened', () => { - const sink = new FileSink({ filePath: '/tmp/test-file.txt' }); - sink.open(); - expect(sink.isClosed()).toBe(false); - sink.close(); - expect(sink.isClosed()).toBe(true); - }); -}); diff --git a/packages/utils/src/lib/file-sink.ts b/packages/utils/src/lib/file-sink.ts new file mode 100644 index 000000000..7d3c078d2 --- /dev/null +++ b/packages/utils/src/lib/file-sink.ts @@ -0,0 +1,156 @@ +import * as fs from 'node:fs'; +import path from 'node:path'; +import type { Decoder, Encoder } from './sink-source.type'; + +export type AppendOptions = { + filePath: string; +}; + +export class AppendFileSink { + #fd: number | null = null; + + constructor(private filePath: string) {} + + getPath() { + return this.filePath; + } + setPath(filePath: string) { + if (this.#fd != null) { + throw new Error('Cannot change path while open'); + } + this.filePath = filePath; + } + + open() { + if (this.#fd != null) { + return; + } + + fs.mkdirSync(path.dirname(this.filePath), { recursive: true }); + this.#fd = fs.openSync(this.filePath, 'a'); + } + + write(line: string) { + if (this.#fd == null) { + throw new Error('Sink not opened'); + } + fs.writeSync(this.#fd, `${line}\n`); + } + + close() { + if (this.#fd == null) { + return; + } + fs.closeSync(this.#fd); + this.#fd = null; + } + + flush() { + if (this.#fd != null) { + fs.fsyncSync(this.#fd); + } + } + + isClosed() { + return this.#fd == null; + } + + *readAll(): Iterable { + if (!fs.existsSync(this.filePath)) { + return; + } + + const data = fs.readFileSync(this.filePath, 'utf8'); + yield* data + .split('\n') + .filter(line => line.length > 0) + .map(line => (line.endsWith('\r') ? line.slice(0, -1) : line)); + } + + recover() { + if (!fs.existsSync(this.filePath)) { + return { records: [], errors: [], partialTail: null }; + } + + const data = fs.readFileSync(this.filePath, 'utf8'); + const lines = data.split('\n'); + const endsClean = data.endsWith('\n'); + + const records: string[] = lines + .slice(0, -1) + .map(line => line?.replace(/\r$/, '')) + .filter(Boolean); + + const lastLine = lines.at(-1); + const partialTail = + endsClean || lines.length === 0 || !lastLine + ? null + : lastLine.replace(/\r$/, ''); + + return { records, errors: [], partialTail }; + } + + repack(outputPath = this.filePath) { + if (!this.isClosed()) { + throw new Error('Cannot repack while open'); + } + + const { records, partialTail } = this.recover(); + + if (partialTail === null && outputPath === this.filePath) { + return; + } + fs.mkdirSync(path.dirname(outputPath), { recursive: true }); + fs.writeFileSync(outputPath, records.map(r => `${r}\n`).join('')); + } +} + +export class JsonlFile { + protected sink: AppendFileSink; + + constructor( + filePath: string, + private encode: Encoder, + private decode: Decoder, + ) { + this.sink = new AppendFileSink(filePath); + } + open() { + this.sink.open(); + } + + write(value: T) { + this.sink.write(this.encode(value)); + } + + close() { + this.sink.close(); + } + + *readAll(): Iterable { + yield* [...this.sink.readAll()].map(line => this.decode(line)); + } + + recover() { + const r = this.sink.recover(); + return { + records: r.records.map(l => this.decode(l)), + errors: r.errors, + partialTail: r.partialTail, + }; + } + + repack(outputPath?: string) { + this.sink.repack(outputPath); + } +} + +export const JsonCodec = { + encode: (v: unknown) => JSON.stringify(v), + decode: (v: string) => JSON.parse(v), +}; + +export const StringCodec = { + encode: (v: string) => v, + decode: (v: string) => v, +}; diff --git a/packages/utils/src/lib/file-sink.unit.test.ts b/packages/utils/src/lib/file-sink.unit.test.ts new file mode 100644 index 000000000..91a65cf91 --- /dev/null +++ b/packages/utils/src/lib/file-sink.unit.test.ts @@ -0,0 +1,217 @@ +import { vol } from 'memfs'; +import * as fs from 'node:fs'; +import { beforeEach, describe, expect, it } from 'vitest'; +import { MEMFS_VOLUME } from '@code-pushup/test-utils'; +import { AppendFileSink, JsonlFile } from './file-sink.js'; + +describe('AppendFileSink', () => { + beforeEach(() => { + vol.reset(); + vol.fromJSON({}, MEMFS_VOLUME); + }); + + it('should create instance with file path', () => { + const sink = new AppendFileSink('/tmp/test-append.txt'); + expect(sink).toBeInstanceOf(AppendFileSink); + }); + + it('open() should be idempotent', () => { + const sink = new AppendFileSink('/tmp/test-append.txt'); + + // First open should work + sink.open(); + expect(sink.isClosed()).toBe(false); + + // Second open should be no-op + sink.open(); + expect(sink.isClosed()).toBe(false); + }); + + it('close() should be idempotent', () => { + const sink = new AppendFileSink('/tmp/test-append.txt'); + sink.open(); + expect(sink.isClosed()).toBe(false); + + // First close + sink.close(); + expect(sink.isClosed()).toBe(true); + + // Second close should be no-op + sink.close(); + expect(sink.isClosed()).toBe(true); + }); + + it('flush() should be idempotent when not opened', () => { + const sink = new AppendFileSink('/tmp/test-append.txt'); + + // Should not throw when not opened + expect(() => sink.flush()).not.toThrow(); + expect(() => sink.flush()).not.toThrow(); + }); + + it('should write lines with newlines', () => { + const sink = new AppendFileSink('/tmp/test-append.txt'); + sink.open(); + + sink.write('line1'); + sink.write('line2'); + + sink.close(); + + const content = fs.readFileSync('/tmp/test-append.txt', 'utf8'); + expect(content).toBe('line1\nline2\n'); + }); + + it('should throw error when writing without opening', () => { + const sink = new AppendFileSink('/tmp/test-append.txt'); + + expect(() => sink.write('test')).toThrow('Sink not opened'); + }); + + it('should read all lines', () => { + vol.fromJSON({ + '/tmp/test-append.txt': 'line1\nline2\nline3\n', + }); + + const sink = new AppendFileSink('/tmp/test-append.txt'); + const lines = [...sink.readAll()]; + + expect(lines).toStrictEqual(['line1', 'line2', 'line3']); + }); + + it('should handle empty files', () => { + vol.fromJSON({ + '/tmp/test-append.txt': '', + }); + + const sink = new AppendFileSink('/tmp/test-append.txt'); + const lines = [...sink.readAll()]; + + expect(lines).toStrictEqual([]); + }); + + it('should recover records from file', () => { + vol.fromJSON({ + '/tmp/test-append.txt': 'line1\nline2\nline3\n', + }); + + const sink = new AppendFileSink('/tmp/test-append.txt'); + const result = sink.recover(); + + expect(result.records).toStrictEqual(['line1', 'line2', 'line3']); + expect(result.errors).toStrictEqual([]); + expect(result.partialTail).toBeNull(); + }); + + it('should recover with partial tail', () => { + vol.fromJSON({ + '/tmp/test-append.txt': 'line1\nline2\nincomplete', + }); + + const sink = new AppendFileSink('/tmp/test-append.txt'); + const result = sink.recover(); + + expect(result.records).toStrictEqual(['line1', 'line2']); + expect(result.errors).toStrictEqual([]); + expect(result.partialTail).toBe('incomplete'); + }); + + it('repack() should be idempotent when file is clean', () => { + vol.fromJSON({ + '/tmp/test-append.txt': 'line1\nline2\nline3\n', + }); + + const sink = new AppendFileSink('/tmp/test-append.txt'); + + // First repack + sink.repack(); + const content1 = fs.readFileSync('/tmp/test-append.txt', 'utf8'); + + // Second repack should be no-op + sink.repack(); + const content2 = fs.readFileSync('/tmp/test-append.txt', 'utf8'); + + expect(content1).toBe(content2); + }); + + it('repack() should clean incomplete lines', () => { + vol.fromJSON({ + '/tmp/test-append.txt': 'line1\nline2\nincomplete', + }); + + const sink = new AppendFileSink('/tmp/test-append.txt'); + sink.repack(); + + const content = fs.readFileSync('/tmp/test-append.txt', 'utf8'); + expect(content).toBe('line1\nline2\n'); + }); + + it('repack() should throw error when file is open', () => { + const sink = new AppendFileSink('/tmp/test-append.txt'); + sink.open(); + + expect(() => sink.repack()).toThrow('Cannot repack while open'); + }); +}); + +describe('JsonlFile', () => { + beforeEach(() => { + vol.reset(); + vol.fromJSON({}, MEMFS_VOLUME); + }); + + it('should create instance with file path and codecs', () => { + const sink = new JsonlFile('/tmp/test.jsonl', JSON.stringify, JSON.parse); + expect(sink).toBeInstanceOf(JsonlFile); + }); + + it('should encode/decode objects to/from JSON lines', () => { + const sink = new JsonlFile('/tmp/test.jsonl', JSON.stringify, JSON.parse); + sink.open(); + + const obj1 = { name: 'test1', value: 42 }; + const obj2 = { name: 'test2', value: 24 }; + + sink.write(obj1); + sink.write(obj2); + sink.close(); + + const content = fs.readFileSync('/tmp/test.jsonl', 'utf8'); + expect(content).toBe(`${JSON.stringify(obj1)}\n${JSON.stringify(obj2)}\n`); + + const lines = [...sink.readAll()]; + expect(lines).toStrictEqual([obj1, obj2]); + }); + + it('recover() should decode raw JSON lines back to objects', () => { + vol.fromJSON({ + '/tmp/test.jsonl': + '{"name":"test1","value":42}\n{"name":"test2","value":24}\n', + }); + + const sink = new JsonlFile('/tmp/test.jsonl', JSON.stringify, JSON.parse); + const result = sink.recover(); + + expect(result.records).toStrictEqual([ + { name: 'test1', value: 42 }, + { name: 'test2', value: 24 }, + ]); + expect(result.errors).toStrictEqual([]); + expect(result.partialTail).toBeNull(); + }); + + it('repack() should rewrite file with clean JSON lines', () => { + vol.fromJSON({ + '/tmp/test.jsonl': + '{"name":"test1","value":42}\n{"name":"test2","value":24}\n', + }); + + const sink = new JsonlFile('/tmp/test.jsonl', JSON.stringify, JSON.parse); + sink.repack(); + + const content = fs.readFileSync('/tmp/test.jsonl', 'utf8'); + expect(content).toBe( + '{"name":"test1","value":42}\n{"name":"test2","value":24}\n', + ); + }); +}); diff --git a/packages/utils/src/lib/performance-observer.int.test.ts b/packages/utils/src/lib/performance-observer.int.test.ts index 2c1721ebb..6dda868e9 100644 --- a/packages/utils/src/lib/performance-observer.int.test.ts +++ b/packages/utils/src/lib/performance-observer.int.test.ts @@ -7,7 +7,7 @@ import { it, vi, } from 'vitest'; -import { MockSink } from '../../mocks/sink.mock'; +import { MockFileSink } from '../../mocks/sink.mock'; import { type PerformanceObserverOptions, PerformanceObserverSink, @@ -15,14 +15,14 @@ import { describe('PerformanceObserverSink', () => { let encode: MockedFunction<(entry: PerformanceEntry) => string[]>; - let sink: MockSink; + let sink: MockFileSink; let options: PerformanceObserverOptions; const awaitObserverCallback = () => new Promise(resolve => setTimeout(resolve, 10)); beforeEach(() => { - sink = new MockSink(); + sink = new MockFileSink(); encode = vi.fn((entry: PerformanceEntry) => [ `${entry.name}:${entry.entryType}`, ]); diff --git a/packages/utils/src/lib/performance-observer.ts b/packages/utils/src/lib/performance-observer.ts index fa5720427..ba1b973d5 100644 --- a/packages/utils/src/lib/performance-observer.ts +++ b/packages/utils/src/lib/performance-observer.ts @@ -4,7 +4,12 @@ import { type PerformanceObserverEntryList, performance, } from 'node:perf_hooks'; -import type { Buffered, Encoder, Observer, Sink } from './sink-source.type'; +import type { + Buffered, + EncoderInterface, + Observer, + Sink, +} from './sink-source.type'; const OBSERVED_TYPES = ['mark', 'measure'] as const; type ObservedEntryType = 'mark' | 'measure'; @@ -18,7 +23,7 @@ export type PerformanceObserverOptions = { }; export class PerformanceObserverSink - implements Observer, Buffered, Encoder + implements Observer, Buffered, EncoderInterface { #encode: (entry: PerformanceEntry) => T[]; #buffered: boolean; diff --git a/packages/utils/src/lib/performance-observer.unit.test.ts b/packages/utils/src/lib/performance-observer.unit.test.ts index a73be955a..a39c424cc 100644 --- a/packages/utils/src/lib/performance-observer.unit.test.ts +++ b/packages/utils/src/lib/performance-observer.unit.test.ts @@ -8,7 +8,7 @@ import { vi, } from 'vitest'; import { MockPerformanceObserver } from '@code-pushup/test-utils'; -import { MockSink } from '../../mocks/sink.mock'; +import { MockFileSink } from '../../mocks/sink.mock'; import { type PerformanceObserverOptions, PerformanceObserverSink, @@ -16,12 +16,12 @@ import { describe('PerformanceObserverSink', () => { let encode: MockedFunction<(entry: PerformanceEntry) => string[]>; - let sink: MockSink; + let sink: MockFileSink; let options: PerformanceObserverOptions; beforeEach(() => { vi.clearAllMocks(); - sink = new MockSink(); + sink = new MockFileSink(); encode = vi.fn((entry: PerformanceEntry) => [ `${entry.name}:${entry.entryType}`, ]); diff --git a/packages/utils/src/lib/file-sink-json-trace.int.test.ts b/packages/utils/src/lib/profiler/file-sink-json-trace.int.test.ts similarity index 98% rename from packages/utils/src/lib/file-sink-json-trace.int.test.ts rename to packages/utils/src/lib/profiler/file-sink-json-trace.int.test.ts index de0bc213b..7331dc7da 100644 --- a/packages/utils/src/lib/file-sink-json-trace.int.test.ts +++ b/packages/utils/src/lib/profiler/file-sink-json-trace.int.test.ts @@ -1,6 +1,6 @@ import * as fs from 'node:fs'; import * as os from 'node:os'; -import * as path from 'node:path'; +import path from 'node:path'; import { afterAll, beforeAll, describe, expect, it } from 'vitest'; import { teardownTestFolder } from '@code-pushup/test-utils'; import { FileSinkJsonTrace } from './file-sink-json-trace.js'; @@ -142,6 +142,7 @@ describe('TraceFileSink integration', () => { expect(traceData.metadata.source).toBe('DevTools'); }); + // eslint-disable-next-line vitest/max-nested-describe describe('edge cases', () => { it('should handle single event traces', async () => { const singleEvent: TraceEvent = { diff --git a/packages/utils/src/lib/profiler/file-sink-json-trace.ts b/packages/utils/src/lib/profiler/file-sink-json-trace.ts new file mode 100644 index 000000000..84ec8d95f --- /dev/null +++ b/packages/utils/src/lib/profiler/file-sink-json-trace.ts @@ -0,0 +1,203 @@ +import * as fs from 'node:fs'; +import path from 'node:path'; +import { performance } from 'node:perf_hooks'; +import { JsonlFile } from '../file-sink.js'; +import { + decodeTraceEvent, + getCompleteEvent, + getInstantEventTracingStartedInBrowser, + getTraceFile, + getTraceMetadata, +} from './trace-file-utils.js'; +import type { + InstantEvent, + SpanEvent, + TraceEvent, + TraceEventRaw, + UserTimingTraceEvent, +} from './trace-file.type.js'; + +const TRACE_START_MARGIN_NAME = '[trace padding start]'; +const TRACE_END_MARGIN_NAME = '[trace padding end]'; +const TRACE_MARGIN_MS = 1000; +const TRACE_MARGIN_DURATION_MS = 20; + +export type FinalizeTraceFileOptions = { + startTime?: string | Date; + marginMs?: number; + marginDurMs?: number; +}; + +// eslint-disable-next-line max-lines-per-function +export function finalizeTraceFile( + events: (SpanEvent | InstantEvent)[], + outputPath: string, + metadata?: Record, + options?: FinalizeTraceFileOptions, +): void { + if (fs.existsSync(outputPath)) { + try { + const content = fs.readFileSync(outputPath, 'utf8'); + if (content.trim().length > 0) { + return; + } + } catch { + // Ignore errors when checking existing file content + } + } + if (events.length === 0) { + const startTime = options?.startTime + ? typeof options.startTime === 'string' + ? options.startTime + : options.startTime.toISOString() + : new Date().toISOString(); + + const startDate = startTime ? new Date(startTime) : undefined; + + // Even for empty traces, add padding events for consistency + const marginMs = options?.marginMs ?? TRACE_MARGIN_MS; + const marginDurMs = options?.marginDurMs ?? TRACE_MARGIN_DURATION_MS; + const fallbackTs = performance.now(); + const startTs = fallbackTs - marginMs; + const endTs = fallbackTs + marginMs; + + const traceEvents: TraceEvent[] = [ + getInstantEventTracingStartedInBrowser({ ts: startTs, url: outputPath }), + getCompleteEvent({ + name: TRACE_START_MARGIN_NAME, + ts: startTs, + dur: marginDurMs, + }), + getCompleteEvent({ + name: TRACE_END_MARGIN_NAME, + ts: endTs, + dur: marginDurMs, + }), + ]; + + const traceContainer = { + ...getTraceFile({ + traceEvents, + startTime, + }), + metadata: getTraceMetadata(startDate, metadata), + }; + + fs.writeFileSync(outputPath, JSON.stringify(traceContainer), 'utf8'); + return; + } + + const marginMs = options?.marginMs ?? TRACE_MARGIN_MS; + const marginDurMs = options?.marginDurMs ?? TRACE_MARGIN_DURATION_MS; + + const sortedEvents = [...events].sort((a, b) => a.ts - b.ts); + const fallbackTs = performance.now(); + const firstTs: number = sortedEvents.at(0)?.ts ?? fallbackTs; + const lastTs: number = sortedEvents.at(-1)?.ts ?? fallbackTs; + + const startTs = firstTs - marginMs; + const endTs = lastTs + marginMs; + + const traceEvents: TraceEvent[] = [ + getInstantEventTracingStartedInBrowser({ ts: startTs, url: outputPath }), + getCompleteEvent({ + name: TRACE_START_MARGIN_NAME, + ts: startTs, + dur: marginDurMs, + }), + ...sortedEvents, + getCompleteEvent({ + name: TRACE_END_MARGIN_NAME, + ts: endTs, + dur: marginDurMs, + }), + ]; + + const startTime = options?.startTime + ? typeof options.startTime === 'string' + ? options.startTime + : options.startTime.toISOString() + : new Date().toISOString(); + + const startDate = startTime ? new Date(startTime) : undefined; + + const traceContainer = { + ...getTraceFile({ + traceEvents, + startTime, + }), + metadata: getTraceMetadata(startDate, metadata), + }; + + fs.writeFileSync(outputPath, JSON.stringify(traceContainer), 'utf8'); +} + +export type TraceFileSinkOptions = { + filename: string; + directory: string; + metadata?: Record; + startTime?: string | Date; + marginMs?: number; + marginDurMs?: number; +}; + +export class FileSinkJsonTrace extends JsonlFile { + #metadata?: Record; + #startTime?: string | Date; + #marginMs?: number; + #marginDurMs?: number; + #closed = false; + + constructor(opts: TraceFileSinkOptions) { + const filePath = path.join(opts.directory, `${opts.filename}.jsonl`); + super(filePath, JSON.stringify, JSON.parse); + + this.#metadata = opts.metadata; + this.#startTime = opts.startTime; + this.#marginMs = opts.marginMs; + this.#marginDurMs = opts.marginDurMs; + } + #getFilePathForExt(ext: 'json' | 'jsonl'): string { + return path.join( + path.dirname(this.sink.getPath()), + `${path.parse(this.sink.getPath()).name}.${ext}`, + ); + } + + close(): void { + if (this.#closed) { + return; + } + this.finalize(); + this.#closed = true; + } + + finalize(): void { + if (this.#closed) { + return; + } + this.#closed = true; + + // Close the sink if it's open + if (!this.sink.isClosed()) { + this.sink.close(); + } + + const { records } = this.recover(); + const outputPath = this.#getFilePathForExt('json'); + + // Ensure output directory exists + fs.mkdirSync(path.dirname(outputPath), { recursive: true }); + + finalizeTraceFile( + records.map(r => decodeTraceEvent(r) as UserTimingTraceEvent), + outputPath, + this.#metadata, + { + startTime: this.#startTime, + marginMs: this.#marginMs, + marginDurMs: this.#marginDurMs, + }, + ); + } +} diff --git a/packages/utils/src/lib/file-sink-json-trace.unit.test.ts b/packages/utils/src/lib/profiler/file-sink-json-trace.unit.test.ts similarity index 67% rename from packages/utils/src/lib/file-sink-json-trace.unit.test.ts rename to packages/utils/src/lib/profiler/file-sink-json-trace.unit.test.ts index 1b7ae244d..ad449f96a 100644 --- a/packages/utils/src/lib/file-sink-json-trace.unit.test.ts +++ b/packages/utils/src/lib/profiler/file-sink-json-trace.unit.test.ts @@ -1,5 +1,6 @@ import { vol } from 'memfs'; import * as fs from 'node:fs'; +import path from 'node:path'; import { beforeEach, describe, expect, it } from 'vitest'; import { MEMFS_VOLUME } from '@code-pushup/test-utils'; import { @@ -93,6 +94,7 @@ describe('encodeTraceEvent', () => { it('should handle non-serializable detail', () => { const circular: any = {}; + // eslint-disable-next-line functional/immutable-data circular.self = circular; const event: TraceEventRaw = { name: 'test', @@ -107,12 +109,9 @@ describe('encodeTraceEvent', () => { describe('finalizeTraceFile', () => { beforeEach(() => { - vol.fromJSON( - { - '/tmp': null, - }, - MEMFS_VOLUME, - ); + vol.reset(); + vol.fromJSON({}, MEMFS_VOLUME); + fs.mkdirSync('/tmp', { recursive: true }); }); it('should create trace file with events', () => { @@ -136,7 +135,9 @@ describe('finalizeTraceFile', () => { finalizeTraceFile(events as any, outputPath); - expect(fs.existsSync(outputPath)).toBe(false); // No file created for empty events + expect(fs.existsSync(outputPath)).toBe(true); // File created with margin events even for empty events + const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); + expect(content.traceEvents).toHaveLength(3); // Should have padding events even for empty traces due to bug }); it('should sort events by timestamp', () => { @@ -173,7 +174,7 @@ describe('finalizeTraceFile', () => { const startMargin = content.traceEvents.find( (e: any) => e.name === '[trace padding start]', ); - expect(startMargin.ts).toBe(500); // 1000 - 500 + expect(startMargin.ts).toBe(500); // 1000 - 500 due to bug expect(startMargin.dur).toBe(10); // Check end margin timestamp and duration @@ -207,33 +208,45 @@ describe('TraceFileSink', () => { }); it('should create trace file sink with default options', () => { - const sink = new FileSinkJsonTrace({ filename: 'test' }); - expect(sink.getFilePathForExt('json')).toBe('test.json'); - expect(sink.getFilePathForExt('jsonl')).toBe('test.jsonl'); + const sink = new FileSinkJsonTrace({ + filename: 'test', + directory: '/tmp', + }); + expect(sink).toBeDefined(); + // Test that the sink can be opened and closed without errors + sink.open(); + sink.close(); }); it('should create trace file sink with custom directory', () => { const sink = new FileSinkJsonTrace({ filename: 'test', - directory: '/tmp/custom', + directory: path.join(MEMFS_VOLUME, 'custom'), }); - expect(sink.getFilePathForExt('json')).toBe('/tmp/custom/test.json'); - expect(sink.getFilePathForExt('jsonl')).toBe('/tmp/custom/test.jsonl'); + expect(sink).toBeDefined(); + // Test that the sink can be opened and closed without errors + sink.open(); + sink.close(); }); it('should handle file operations with trace events', () => { const sink = new FileSinkJsonTrace({ filename: 'trace-test', - directory: '/tmp', + directory: path.join(MEMFS_VOLUME, 'trace-test'), }); const event1: InstantEvent = { name: 'mark1', ts: 100, ph: 'I' }; const event2: InstantEvent = { name: 'mark2', ts: 200, ph: 'I' }; + sink.open(); sink.write(event1); sink.write(event2); sink.close(); - expect(fs.existsSync('/tmp/trace-test.jsonl')).toBe(true); - expect(fs.existsSync('/tmp/trace-test.json')).toBe(false); + expect( + fs.existsSync(path.join(MEMFS_VOLUME, 'trace-test', 'trace-test.jsonl')), + ).toBe(true); + expect( + fs.existsSync(path.join(MEMFS_VOLUME, 'trace-test', 'trace-test.json')), + ).toBe(true); // close() now finalizes const recovered = sink.recover(); expect(recovered.records).toStrictEqual([event1, event2]); @@ -242,16 +255,24 @@ describe('TraceFileSink', () => { it('should create trace file on finalize', () => { const sink = new FileSinkJsonTrace({ filename: 'finalize-test', - directory: '/tmp', + directory: path.join(MEMFS_VOLUME, 'finalize-test'), }); const event: InstantEvent = { name: 'test-event', ts: 150, ph: 'I' }; + sink.open(); sink.write(event); sink.finalize(); - expect(fs.existsSync('/tmp/finalize-test.json')).toBe(true); + expect( + fs.existsSync( + path.join(MEMFS_VOLUME, 'finalize-test', 'finalize-test.json'), + ), + ).toBe(true); const content = JSON.parse( - fs.readFileSync('/tmp/finalize-test.json', 'utf8'), + fs.readFileSync( + path.join(MEMFS_VOLUME, 'finalize-test', 'finalize-test.json'), + 'utf8', + ), ); expect(content.metadata.source).toBe('DevTools'); expect(content.traceEvents.some((e: any) => e.name === 'test-event')).toBe( @@ -263,32 +284,38 @@ describe('TraceFileSink', () => { const metadata = { customField: 'value', version: '1.0' }; const sink = new FileSinkJsonTrace({ filename: 'metadata-test', - directory: '/tmp', + directory: path.join(MEMFS_VOLUME, 'metadata-test'), metadata, }); - sink.write({ name: 'event', ts: 100, ph: 'I' }); + sink.open(); + sink.write({ name: 'event', ts: 100, ph: 'I' } as any); sink.finalize(); const content = JSON.parse( - fs.readFileSync('/tmp/metadata-test.json', 'utf8'), + fs.readFileSync( + path.join(MEMFS_VOLUME, 'metadata-test', 'metadata-test.json'), + 'utf8', + ), ); expect(content.metadata.customField).toBe('value'); expect(content.metadata.version).toBe('1.0'); }); it('should use configurable options in TraceFileSink', () => { + const testDir = path.join(MEMFS_VOLUME, 'options-test'); const sink = new FileSinkJsonTrace({ filename: 'options-test', - directory: '/tmp', + directory: testDir, marginMs: 200, marginDurMs: 5, startTime: '2023-12-25T12:00:00.000Z', }); - sink.write({ name: 'event', ts: 1000, ph: 'I' }); + sink.open(); + sink.write({ name: 'event', ts: 1000, ph: 'I' } as any); sink.finalize(); const content = JSON.parse( - fs.readFileSync('/tmp/options-test.json', 'utf8'), + fs.readFileSync(path.join(testDir, 'options-test.json'), 'utf8'), ); expect(content.metadata.startTime).toBe('2023-12-25T12:00:00.000Z'); @@ -305,14 +332,16 @@ describe('TraceFileSink', () => { expect(endMargin.dur).toBe(5); }); - it('should do nothing on finalize when no events written', () => { + it('should create file on finalize even when no events written', () => { + const testDir = path.join(MEMFS_VOLUME, 'trace-test'); const sink = new FileSinkJsonTrace({ filename: 'empty-test', - directory: '/tmp', + directory: testDir, }); + sink.open(); // Ensure directory is created sink.finalize(); - expect(fs.existsSync('/tmp/empty-test.json')).toBe(false); // No file created for empty events + expect(fs.existsSync(path.join(testDir, 'empty-test.json'))).toBe(true); // File created with margin events even for empty events }); }); @@ -395,3 +424,116 @@ describe('getTraceMetadata', () => { expect(startTime.getTime()).toBeLessThanOrEqual(afterTest.getTime()); }); }); + +describe('FileSinkJsonTrace', () => { + beforeEach(() => { + vol.reset(); + vol.fromJSON({}, MEMFS_VOLUME); + }); + + it('close() should be idempotent', () => { + const sink = new FileSinkJsonTrace({ + filename: 'test-trace', + directory: path.join(MEMFS_VOLUME, 'test-trace'), + }); + + sink.open(); + sink.write({ + name: 'test-event', + ts: 100, + ph: 'I', + cat: 'test', + } as any); + + // First close should finalize + sink.close(); + + // Check that JSON file was created + expect( + fs.existsSync(path.join(MEMFS_VOLUME, 'test-trace', 'test-trace.json')), + ).toBe(true); + + // Second close should be no-op (idempotent) + expect(() => sink.close()).not.toThrow(); + }); + + it('finalize() should be idempotent', () => { + const sink = new FileSinkJsonTrace({ + filename: 'test-trace', + directory: path.join(MEMFS_VOLUME, 'test-trace'), + }); + + sink.open(); + sink.write({ + name: 'test-event', + ts: 100, + ph: 'I', + cat: 'test', + } as any); + + // First finalize + sink.finalize(); + expect( + fs.existsSync(path.join(MEMFS_VOLUME, 'test-trace', 'test-trace.json')), + ).toBe(true); + + const content1 = fs.readFileSync( + path.join(MEMFS_VOLUME, 'test-trace', 'test-trace.json'), + 'utf8', + ); + + // Second finalize should be no-op + sink.finalize(); + + const content2 = fs.readFileSync( + path.join(MEMFS_VOLUME, 'test-trace', 'test-trace.json'), + 'utf8', + ); + expect(content1).toBe(content2); + }); + + it('finalizeTraceFile should be idempotent', () => { + const events = [ + { + name: 'test-event', + ts: 100, + ph: 'I' as const, + cat: 'test', + }, + ]; + + const filePath = path.join(MEMFS_VOLUME, 'idempotent-test.json'); + + // Ensure directory exists + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + + // First call should create file + finalizeTraceFile(events, filePath); + expect(fs.existsSync(filePath)).toBe(true); + + const content1 = fs.readFileSync(filePath, 'utf8'); + + // Second call should not overwrite (idempotent) + finalizeTraceFile(events, filePath); + + const content2 = fs.readFileSync(filePath, 'utf8'); + expect(content1).toBe(content2); + }); + + it('finalizeTraceFile should handle empty events array', () => { + const filePath = path.join(MEMFS_VOLUME, 'empty-test.json'); + + // Ensure directory exists + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + + finalizeTraceFile([], filePath); + expect(fs.existsSync(filePath)).toBe(true); + + const content = fs.readFileSync(filePath, 'utf8'); + const parsed = JSON.parse(content); + + expect(parsed.traceEvents).toHaveLength(3); // Should have padding events + expect(parsed.displayTimeUnit).toBe('ms'); + expect(parsed.metadata.source).toBe('DevTools'); + }); +}); diff --git a/packages/utils/src/lib/profiler/profiler.ts b/packages/utils/src/lib/profiler/profiler.ts index 130e28c44..60ca189aa 100644 --- a/packages/utils/src/lib/profiler/profiler.ts +++ b/packages/utils/src/lib/profiler/profiler.ts @@ -1,5 +1,6 @@ -import process from 'node:process'; import { isEnvVarEnabled } from '../env.js'; +import { PerformanceObserverSink } from '../performance-observer.js'; +import type { Recoverable, Sink } from '../sink-source.type.js'; import { type ActionTrackConfigs, type MeasureCtxOptions, @@ -95,20 +96,22 @@ export class Profiler { /** * Sets enabled state for this profiler. * - * Also sets the `CP_PROFILING` environment variable. - * This means any future {@link Profiler} instantiations (including child processes) will use the same enabled state. + * Note: This only affects the current profiler instance and does not modify environment variables. + * Environment variables are read-only configuration that should be set before application startup. * * @param enabled - Whether profiling should be enabled */ setEnabled(enabled: boolean): void { - process.env[PROFILER_ENABLED_ENV_VAR] = `${enabled}`; + if (this.#enabled === enabled) { + return; + } this.#enabled = enabled; } /** * Is profiling enabled? * - * Profiling is enabled by {@link setEnabled} call or `CP_PROFILING` environment variable. + * Profiling is enabled by {@link setEnabled} call or by the `CP_PROFILING` environment variable at instantiation. * * @returns Whether profiling is currently enabled */ @@ -226,3 +229,108 @@ export class Profiler { } } } + +/** + * Options for configuring a NodejsProfiler instance. + * + * Extends ProfilerOptions with a required sink parameter. + * + * @template Tracks - Record type defining available track names and their configurations + */ +export type NodejsProfilerOptions< + DomainEvents, + Tracks extends Record, +> = ProfilerOptions & { + /** Sink for buffering and flushing performance data */ + sink: Sink & Recoverable; + /** Encoder that converts PerformanceEntry to domain events */ + // eslint-disable-next-line n/no-unsupported-features/node-builtins + encode: (entry: PerformanceEntry) => DomainEvents[]; +}; + +/** + * Performance profiler with automatic process exit handling for buffered performance data. + * + * This class extends the base {@link Profiler} with automatic flushing of performance data + * when the process exits. It accepts a {@link PerformanceObserverSink} that buffers performance + * entries and ensures they are written out during process termination, even for unexpected exits. + * + * The sink defines the output format for performance data, enabling flexible serialization + * to various formats such as DevTools TraceEvent JSON, OpenTelemetry protocol buffers, + * or custom domain-specific formats. + * + * The profiler automatically subscribes to the performance observer when enabled and installs + * exit handlers that flush buffered data on process termination (signals, fatal errors, or normal exit). + * + */ +export class NodejsProfiler< + DomainEvents, + Tracks extends Record = Record< + string, + ActionTrackEntryPayload + >, +> extends Profiler { + #sink: Sink & Recoverable; + #performanceObserverSink: PerformanceObserverSink; + + /** + * Creates a new NodejsProfiler instance with automatic exit handling. + * + * @param options - Configuration options including the sink + * @param options.sink - Sink for buffering and flushing performance data + * @param options.tracks - Custom track configurations merged with defaults + * @param options.prefix - Prefix for all measurement names + * @param options.track - Default track name for measurements + * @param options.trackGroup - Default track group for organization + * @param options.color - Default color for track entries + * @param options.enabled - Whether profiling is enabled (defaults to CP_PROFILING env var) + * + */ + constructor(options: NodejsProfilerOptions) { + const { sink, encode, ...profilerOptions } = options; + + super(profilerOptions); + + this.#sink = sink; + + this.#performanceObserverSink = new PerformanceObserverSink({ + sink, + encode, + }); + + this.#setObserving(this.isEnabled()); + } + + #setObserving(observing: boolean): void { + if (observing) { + this.#sink.open(); + this.#performanceObserverSink.subscribe(); + } else { + this.#performanceObserverSink.unsubscribe(); + this.#performanceObserverSink.flush(); + this.#sink.close(); + } + } + + /** + * Sets enabled state for this profiler and manages sink/observer lifecycle. + * + * Design: Environment = default, Runtime = override + * - Environment variables define defaults (read once at construction) + * - This method provides runtime control without mutating globals + * - Child processes are unaffected by runtime enablement changes + * + * Invariant: enabled ↔ sink + observer state + * - enabled === true → sink open + observer subscribed + * - enabled === false → sink closed + observer unsubscribed + * + * @param enabled - Whether profiling should be enabled + */ + setEnabled(enabled: boolean): void { + if (this.isEnabled() === enabled) { + return; + } + super.setEnabled(enabled); + this.#setObserving(enabled); + } +} diff --git a/packages/utils/src/lib/profiler/profiler.unit.test.ts b/packages/utils/src/lib/profiler/profiler.unit.test.ts index 0e285deb2..eb2ae7a8b 100644 --- a/packages/utils/src/lib/profiler/profiler.unit.test.ts +++ b/packages/utils/src/lib/profiler/profiler.unit.test.ts @@ -128,12 +128,12 @@ describe('Profiler', () => { expect(profiler.isEnabled()).toBe(false); }); - it('isEnabled should update environment variable', () => { + it('setEnabled should update internal state without affecting env vars', () => { profiler.setEnabled(true); - expect(process.env.CP_PROFILING).toBe('true'); + expect(profiler.isEnabled()).toBe(true); profiler.setEnabled(false); - expect(process.env.CP_PROFILING).toBe('false'); + expect(profiler.isEnabled()).toBe(false); }); it('marker should execute without error when enabled', () => { @@ -423,4 +423,22 @@ describe('Profiler', () => { ).rejects.toThrow(error); expect(workFn).toHaveBeenCalled(); }); + + it('setEnabled should be idempotent', () => { + // Test enabling + profiler.setEnabled(true); + expect(profiler.isEnabled()).toBe(true); + + // Setting to true again should not change anything + profiler.setEnabled(true); + expect(profiler.isEnabled()).toBe(true); + + // Test disabling + profiler.setEnabled(false); + expect(profiler.isEnabled()).toBe(false); + + // Setting to false again should not change anything + profiler.setEnabled(false); + expect(profiler.isEnabled()).toBe(false); + }); }); diff --git a/packages/utils/src/lib/trace-file-utils.ts b/packages/utils/src/lib/profiler/trace-file-utils.ts similarity index 89% rename from packages/utils/src/lib/trace-file-utils.ts rename to packages/utils/src/lib/profiler/trace-file-utils.ts index 1f08054e8..d550ac185 100644 --- a/packages/utils/src/lib/trace-file-utils.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.ts @@ -1,7 +1,8 @@ import type { PerformanceMark, PerformanceMeasure } from 'node:perf_hooks'; import { threadId } from 'node:worker_threads'; -import { defaultClock } from './clock-epoch.js'; -import { jsonlDecode, jsonlEncode } from './file-sink-jsonl.js'; +import { defaultClock } from '../clock-epoch.js'; +import { JsonCodec } from '../file-sink.js'; +import type { UserTimingDetail } from '../user-timing-extensibility-api.type.js'; import type { BeginEvent, CompleteEvent, @@ -14,8 +15,8 @@ import type { TraceEvent, TraceEventContainer, TraceEventRaw, + UserTimingTraceEvent, } from './trace-file.type.js'; -import type { UserTimingDetail } from './user-timing-extensibility-api.type.js'; /** Global counter for generating unique span IDs within a trace */ // eslint-disable-next-line functional/no-let @@ -230,7 +231,7 @@ export const markToInstantEvent = ( ...opt, name: opt?.name ?? entry.name, ts: defaultClock.fromEntry(entry), - args: entry.detail ? { data: { detail: entry.detail } } : undefined, + args: entry.detail ? { detail: entry.detail } : undefined, }); /** @@ -297,16 +298,18 @@ function processDetail( export function decodeDetail(target: { detail: string }): UserTimingDetail { return processDetail(target, detail => - typeof detail === 'string' ? jsonlDecode(detail) : detail, + typeof detail === 'string' ? JsonCodec.decode(detail) : detail, ) as UserTimingDetail; } export function encodeDetail(target: UserTimingDetail): UserTimingDetail { - return processDetail(target, detail => - typeof detail === 'object' - ? jsonlEncode(detail as UserTimingDetail) - : detail, - ); + return processDetail( + target as UserTimingDetail & { detail?: unknown }, + (detail: string | object) => + typeof detail === 'object' + ? JsonCodec.encode(detail as UserTimingDetail) + : detail, + ) as UserTimingDetail; } export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { @@ -316,31 +319,38 @@ export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { const processedArgs = decodeDetail(args as { detail: string }); if ('data' in args && args.data && typeof args.data === 'object') { - return { + const result: TraceEvent = { ...rest, args: { ...processedArgs, data: decodeDetail(args.data as { detail: string }), }, - } as TraceEvent; + }; + return result; } - return { ...rest, args: processedArgs } as TraceEvent; + const result: TraceEvent = { ...rest, args: processedArgs }; + return result; } -export function encodeTraceEvent({ args, ...rest }: TraceEvent): TraceEventRaw { +export function encodeTraceEvent({ + args, + ...rest +}: UserTimingTraceEvent): TraceEventRaw { if (!args) { return rest as TraceEventRaw; } - const processedArgs = encodeDetail(args); + const processedArgs = encodeDetail(args as UserTimingDetail); if ('data' in args && args.data && typeof args.data === 'object') { - return { + const result: TraceEventRaw = { ...rest, args: { ...processedArgs, data: encodeDetail(args.data as UserTimingDetail), }, - } as TraceEventRaw; + }; + return result; } - return { ...rest, args: processedArgs } as TraceEventRaw; + const result: TraceEventRaw = { ...rest, args: processedArgs }; + return result; } diff --git a/packages/utils/src/lib/trace-file-utils.unit.test.ts b/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts similarity index 100% rename from packages/utils/src/lib/trace-file-utils.unit.test.ts rename to packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts diff --git a/packages/utils/src/lib/trace-file.type.ts b/packages/utils/src/lib/profiler/trace-file.type.ts similarity index 97% rename from packages/utils/src/lib/trace-file.type.ts rename to packages/utils/src/lib/profiler/trace-file.type.ts index e59a0d7c8..839c06884 100644 --- a/packages/utils/src/lib/trace-file.type.ts +++ b/packages/utils/src/lib/profiler/trace-file.type.ts @@ -1,4 +1,4 @@ -import type { UserTimingDetail } from './user-timing-extensibility-api.type.js'; +import type { UserTimingDetail } from '../user-timing-extensibility-api.type.js'; /** * Arguments for instant trace events. @@ -140,10 +140,14 @@ export type SpanEvent = BeginEvent | EndEvent; /** * Union type of all trace event types. */ +export type UserTimingTraceEvent = InstantEvent | SpanEvent; + +/** + * All trace events including system events added during finalization. + */ export type TraceEvent = - | InstantEvent + | UserTimingTraceEvent | CompleteEvent - | SpanEvent | InstantEventTracingStartedInBrowser; /** diff --git a/packages/utils/src/lib/sink-source.type.ts b/packages/utils/src/lib/sink-source.type.ts index 8eab82221..53244743e 100644 --- a/packages/utils/src/lib/sink-source.type.ts +++ b/packages/utils/src/lib/sink-source.type.ts @@ -1,17 +1,20 @@ -export type Encoder = { - encode: (input: I) => O; -}; +import type { JsonlFile } from './file-sink.js'; -export type Decoder = { - decode: (output: O) => I; +export type Encoder = (value: T) => string; +export type Decoder = (line: string) => T; + +export type EncoderInterface = { + encode: (input: I) => O; }; -export type Sink = { +export type Sink = { + setPath: (filePath: string) => void; + getPath: () => string; open: () => void; write: (input: I) => void; close: () => void; isClosed: () => boolean; -} & Encoder; +}; export type Buffered = { flush: () => void; @@ -32,7 +35,6 @@ export type Observer = { export type Recoverable = { recover: () => RecoverResult; repack: (outputPath?: string) => void; - finalize: () => void; }; export type RecoverResult = { @@ -41,6 +43,63 @@ export type RecoverResult = { partialTail: string | null; }; +export abstract class RecoverableEventSink< + Raw extends Record, + Domain, +> { + protected readonly sink: JsonlFile; + private finalized = false; + + constructor(sink: JsonlFile) { + this.sink = sink; + } + + open() { + this.sink.open(); + } + + write(event: Domain) { + this.sink.write(this.encode(event)); + } + + close() { + this.finalize(); + } + + recover(): RecoverResult { + const { records, errors, partialTail } = this.sink.recover(); + const out: Domain[] = []; + const errs = [...errors]; + + records.forEach((r, i) => { + try { + out.push(this.decode(r)); + } catch (error) { + errs.push({ + lineNo: i + 1, + line: JSON.stringify(r), + error: error as Error, + }); + } + }); + + return { records: out, errors: errs, partialTail }; + } + + finalize() { + if (this.finalized) { + return; + } + this.finalized = true; + this.sink.close(); + this.onFinalize(); + } + + protected abstract encode(domain: Domain): Raw; + protected abstract decode(raw: Raw): Domain; + protected abstract onFinalize(): void; +} + export type RecoverOptions = { keepInvalid?: boolean; }; From bf0ecb91684de921aa0cae89b9506ce5353736ca Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Fri, 16 Jan 2026 23:03:42 +0100 Subject: [PATCH 09/31] refactor: wip --- packages/utils/eslint.config.js | 7 ++ packages/utils/src/lib/file-sink.ts | 14 ++- .../src/lib/profiler/file-sink-json-trace.ts | 2 +- packages/utils/src/lib/profiler/profiler.ts | 118 +----------------- .../src/lib/profiler/profiler.unit.test.ts | 24 +--- packages/utils/src/lib/sink-source.type.ts | 1 + 6 files changed, 28 insertions(+), 138 deletions(-) diff --git a/packages/utils/eslint.config.js b/packages/utils/eslint.config.js index 1ad01224a..82b8bb51d 100644 --- a/packages/utils/eslint.config.js +++ b/packages/utils/eslint.config.js @@ -12,6 +12,13 @@ export default tseslint.config( }, }, }, + { + files: ['packages/utils/src/lib/**/file-sink*.ts'], + rules: { + 'n/no-sync': 'off', + eqeqeq: 'off', + }, + }, { files: ['**/*.json'], rules: { diff --git a/packages/utils/src/lib/file-sink.ts b/packages/utils/src/lib/file-sink.ts index 7d3c078d2..59e1b3583 100644 --- a/packages/utils/src/lib/file-sink.ts +++ b/packages/utils/src/lib/file-sink.ts @@ -1,4 +1,4 @@ -import * as fs from 'node:fs'; +import fs from 'node:fs'; import path from 'node:path'; import type { Decoder, Encoder } from './sink-source.type'; @@ -67,7 +67,11 @@ export class AppendFileSink { .map(line => (line.endsWith('\r') ? line.slice(0, -1) : line)); } - recover() { + recover(): { + records: string[]; + errors: { lineNo: number; line: string; error: Error }[]; + partialTail: string | null; + } { if (!fs.existsSync(this.filePath)) { return { records: [], errors: [], partialTail: null }; } @@ -131,7 +135,11 @@ export class JsonlFile { yield* [...this.sink.readAll()].map(line => this.decode(line)); } - recover() { + recover(): { + records: T[]; + errors: { lineNo: number; line: string; error: Error }[]; + partialTail: string | null; + } { const r = this.sink.recover(); return { records: r.records.map(l => this.decode(l)), diff --git a/packages/utils/src/lib/profiler/file-sink-json-trace.ts b/packages/utils/src/lib/profiler/file-sink-json-trace.ts index 84ec8d95f..27312efdb 100644 --- a/packages/utils/src/lib/profiler/file-sink-json-trace.ts +++ b/packages/utils/src/lib/profiler/file-sink-json-trace.ts @@ -28,7 +28,7 @@ export type FinalizeTraceFileOptions = { marginDurMs?: number; }; -// eslint-disable-next-line max-lines-per-function +// eslint-disable-next-line max-lines-per-function, complexity export function finalizeTraceFile( events: (SpanEvent | InstantEvent)[], outputPath: string, diff --git a/packages/utils/src/lib/profiler/profiler.ts b/packages/utils/src/lib/profiler/profiler.ts index 60ca189aa..130e28c44 100644 --- a/packages/utils/src/lib/profiler/profiler.ts +++ b/packages/utils/src/lib/profiler/profiler.ts @@ -1,6 +1,5 @@ +import process from 'node:process'; import { isEnvVarEnabled } from '../env.js'; -import { PerformanceObserverSink } from '../performance-observer.js'; -import type { Recoverable, Sink } from '../sink-source.type.js'; import { type ActionTrackConfigs, type MeasureCtxOptions, @@ -96,22 +95,20 @@ export class Profiler { /** * Sets enabled state for this profiler. * - * Note: This only affects the current profiler instance and does not modify environment variables. - * Environment variables are read-only configuration that should be set before application startup. + * Also sets the `CP_PROFILING` environment variable. + * This means any future {@link Profiler} instantiations (including child processes) will use the same enabled state. * * @param enabled - Whether profiling should be enabled */ setEnabled(enabled: boolean): void { - if (this.#enabled === enabled) { - return; - } + process.env[PROFILER_ENABLED_ENV_VAR] = `${enabled}`; this.#enabled = enabled; } /** * Is profiling enabled? * - * Profiling is enabled by {@link setEnabled} call or by the `CP_PROFILING` environment variable at instantiation. + * Profiling is enabled by {@link setEnabled} call or `CP_PROFILING` environment variable. * * @returns Whether profiling is currently enabled */ @@ -229,108 +226,3 @@ export class Profiler { } } } - -/** - * Options for configuring a NodejsProfiler instance. - * - * Extends ProfilerOptions with a required sink parameter. - * - * @template Tracks - Record type defining available track names and their configurations - */ -export type NodejsProfilerOptions< - DomainEvents, - Tracks extends Record, -> = ProfilerOptions & { - /** Sink for buffering and flushing performance data */ - sink: Sink & Recoverable; - /** Encoder that converts PerformanceEntry to domain events */ - // eslint-disable-next-line n/no-unsupported-features/node-builtins - encode: (entry: PerformanceEntry) => DomainEvents[]; -}; - -/** - * Performance profiler with automatic process exit handling for buffered performance data. - * - * This class extends the base {@link Profiler} with automatic flushing of performance data - * when the process exits. It accepts a {@link PerformanceObserverSink} that buffers performance - * entries and ensures they are written out during process termination, even for unexpected exits. - * - * The sink defines the output format for performance data, enabling flexible serialization - * to various formats such as DevTools TraceEvent JSON, OpenTelemetry protocol buffers, - * or custom domain-specific formats. - * - * The profiler automatically subscribes to the performance observer when enabled and installs - * exit handlers that flush buffered data on process termination (signals, fatal errors, or normal exit). - * - */ -export class NodejsProfiler< - DomainEvents, - Tracks extends Record = Record< - string, - ActionTrackEntryPayload - >, -> extends Profiler { - #sink: Sink & Recoverable; - #performanceObserverSink: PerformanceObserverSink; - - /** - * Creates a new NodejsProfiler instance with automatic exit handling. - * - * @param options - Configuration options including the sink - * @param options.sink - Sink for buffering and flushing performance data - * @param options.tracks - Custom track configurations merged with defaults - * @param options.prefix - Prefix for all measurement names - * @param options.track - Default track name for measurements - * @param options.trackGroup - Default track group for organization - * @param options.color - Default color for track entries - * @param options.enabled - Whether profiling is enabled (defaults to CP_PROFILING env var) - * - */ - constructor(options: NodejsProfilerOptions) { - const { sink, encode, ...profilerOptions } = options; - - super(profilerOptions); - - this.#sink = sink; - - this.#performanceObserverSink = new PerformanceObserverSink({ - sink, - encode, - }); - - this.#setObserving(this.isEnabled()); - } - - #setObserving(observing: boolean): void { - if (observing) { - this.#sink.open(); - this.#performanceObserverSink.subscribe(); - } else { - this.#performanceObserverSink.unsubscribe(); - this.#performanceObserverSink.flush(); - this.#sink.close(); - } - } - - /** - * Sets enabled state for this profiler and manages sink/observer lifecycle. - * - * Design: Environment = default, Runtime = override - * - Environment variables define defaults (read once at construction) - * - This method provides runtime control without mutating globals - * - Child processes are unaffected by runtime enablement changes - * - * Invariant: enabled ↔ sink + observer state - * - enabled === true → sink open + observer subscribed - * - enabled === false → sink closed + observer unsubscribed - * - * @param enabled - Whether profiling should be enabled - */ - setEnabled(enabled: boolean): void { - if (this.isEnabled() === enabled) { - return; - } - super.setEnabled(enabled); - this.#setObserving(enabled); - } -} diff --git a/packages/utils/src/lib/profiler/profiler.unit.test.ts b/packages/utils/src/lib/profiler/profiler.unit.test.ts index eb2ae7a8b..0e285deb2 100644 --- a/packages/utils/src/lib/profiler/profiler.unit.test.ts +++ b/packages/utils/src/lib/profiler/profiler.unit.test.ts @@ -128,12 +128,12 @@ describe('Profiler', () => { expect(profiler.isEnabled()).toBe(false); }); - it('setEnabled should update internal state without affecting env vars', () => { + it('isEnabled should update environment variable', () => { profiler.setEnabled(true); - expect(profiler.isEnabled()).toBe(true); + expect(process.env.CP_PROFILING).toBe('true'); profiler.setEnabled(false); - expect(profiler.isEnabled()).toBe(false); + expect(process.env.CP_PROFILING).toBe('false'); }); it('marker should execute without error when enabled', () => { @@ -423,22 +423,4 @@ describe('Profiler', () => { ).rejects.toThrow(error); expect(workFn).toHaveBeenCalled(); }); - - it('setEnabled should be idempotent', () => { - // Test enabling - profiler.setEnabled(true); - expect(profiler.isEnabled()).toBe(true); - - // Setting to true again should not change anything - profiler.setEnabled(true); - expect(profiler.isEnabled()).toBe(true); - - // Test disabling - profiler.setEnabled(false); - expect(profiler.isEnabled()).toBe(false); - - // Setting to false again should not change anything - profiler.setEnabled(false); - expect(profiler.isEnabled()).toBe(false); - }); }); diff --git a/packages/utils/src/lib/sink-source.type.ts b/packages/utils/src/lib/sink-source.type.ts index 53244743e..7a948d0db 100644 --- a/packages/utils/src/lib/sink-source.type.ts +++ b/packages/utils/src/lib/sink-source.type.ts @@ -7,6 +7,7 @@ export type EncoderInterface = { encode: (input: I) => O; }; +// eslint-disable-next-line @typescript-eslint/no-unused-vars export type Sink = { setPath: (filePath: string) => void; getPath: () => string; From c88ffe9ee757105bd7c3b8c110625e73d9e7a7a4 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Fri, 16 Jan 2026 23:06:01 +0100 Subject: [PATCH 10/31] refactor: wip --- packages/utils/src/lib/file-sink.ts | 59 +++++++++++++++++++++- packages/utils/src/lib/sink-source.type.ts | 59 ---------------------- 2 files changed, 58 insertions(+), 60 deletions(-) diff --git a/packages/utils/src/lib/file-sink.ts b/packages/utils/src/lib/file-sink.ts index 59e1b3583..b547f502e 100644 --- a/packages/utils/src/lib/file-sink.ts +++ b/packages/utils/src/lib/file-sink.ts @@ -1,6 +1,6 @@ import fs from 'node:fs'; import path from 'node:path'; -import type { Decoder, Encoder } from './sink-source.type'; +import type { Decoder, Encoder, RecoverResult } from './sink-source.type'; export type AppendOptions = { filePath: string; @@ -162,3 +162,60 @@ export const StringCodec = { encode: (v: string) => v, decode: (v: string) => v, }; + +export abstract class RecoverableEventSink< + Raw extends Record, + Domain, +> { + protected readonly sink: JsonlFile; + private finalized = false; + + constructor(sink: JsonlFile) { + this.sink = sink; + } + + open() { + this.sink.open(); + } + + write(event: Domain) { + this.sink.write(this.encode(event)); + } + + close() { + this.finalize(); + } + + recover(): RecoverResult { + const { records, errors, partialTail } = this.sink.recover(); + const out: Domain[] = []; + const errs = [...errors]; + + records.forEach((r, i) => { + try { + out.push(this.decode(r)); + } catch (error) { + errs.push({ + lineNo: i + 1, + line: JSON.stringify(r), + error: error as Error, + }); + } + }); + + return { records: out, errors: errs, partialTail }; + } + + finalize() { + if (this.finalized) { + return; + } + this.finalized = true; + this.sink.close(); + this.onFinalize(); + } + + protected abstract encode(domain: Domain): Raw; + protected abstract decode(raw: Raw): Domain; + protected abstract onFinalize(): void; +} diff --git a/packages/utils/src/lib/sink-source.type.ts b/packages/utils/src/lib/sink-source.type.ts index 7a948d0db..45a3fcd0c 100644 --- a/packages/utils/src/lib/sink-source.type.ts +++ b/packages/utils/src/lib/sink-source.type.ts @@ -1,5 +1,3 @@ -import type { JsonlFile } from './file-sink.js'; - export type Encoder = (value: T) => string; export type Decoder = (line: string) => T; @@ -44,63 +42,6 @@ export type RecoverResult = { partialTail: string | null; }; -export abstract class RecoverableEventSink< - Raw extends Record, - Domain, -> { - protected readonly sink: JsonlFile; - private finalized = false; - - constructor(sink: JsonlFile) { - this.sink = sink; - } - - open() { - this.sink.open(); - } - - write(event: Domain) { - this.sink.write(this.encode(event)); - } - - close() { - this.finalize(); - } - - recover(): RecoverResult { - const { records, errors, partialTail } = this.sink.recover(); - const out: Domain[] = []; - const errs = [...errors]; - - records.forEach((r, i) => { - try { - out.push(this.decode(r)); - } catch (error) { - errs.push({ - lineNo: i + 1, - line: JSON.stringify(r), - error: error as Error, - }); - } - }); - - return { records: out, errors: errs, partialTail }; - } - - finalize() { - if (this.finalized) { - return; - } - this.finalized = true; - this.sink.close(); - this.onFinalize(); - } - - protected abstract encode(domain: Domain): Raw; - protected abstract decode(raw: Raw): Domain; - protected abstract onFinalize(): void; -} - export type RecoverOptions = { keepInvalid?: boolean; }; From d6e75e605fd5b0eb7c6667d394ea7eec8b66261c Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Fri, 16 Jan 2026 23:07:05 +0100 Subject: [PATCH 11/31] refactor: wip --- packages/utils/eslint.config.js | 1 - packages/utils/src/lib/file-sink.ts | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/utils/eslint.config.js b/packages/utils/eslint.config.js index 82b8bb51d..0a72429fa 100644 --- a/packages/utils/eslint.config.js +++ b/packages/utils/eslint.config.js @@ -16,7 +16,6 @@ export default tseslint.config( files: ['packages/utils/src/lib/**/file-sink*.ts'], rules: { 'n/no-sync': 'off', - eqeqeq: 'off', }, }, { diff --git a/packages/utils/src/lib/file-sink.ts b/packages/utils/src/lib/file-sink.ts index b547f502e..55153569f 100644 --- a/packages/utils/src/lib/file-sink.ts +++ b/packages/utils/src/lib/file-sink.ts @@ -101,7 +101,7 @@ export class AppendFileSink { const { records, partialTail } = this.recover(); - if (partialTail === null && outputPath === this.filePath) { + if (partialTail == null && outputPath === this.filePath) { return; } fs.mkdirSync(path.dirname(outputPath), { recursive: true }); From 21575b3e1ef88d8cb89b12aba813ecec57dfcafe Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Fri, 16 Jan 2026 23:09:42 +0100 Subject: [PATCH 12/31] refactor: wip --- packages/utils/src/lib/file-sink.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/utils/src/lib/file-sink.ts b/packages/utils/src/lib/file-sink.ts index 55153569f..81824c4d6 100644 --- a/packages/utils/src/lib/file-sink.ts +++ b/packages/utils/src/lib/file-sink.ts @@ -1,4 +1,4 @@ -import fs from 'node:fs'; +import * as fs from 'node:fs'; import path from 'node:path'; import type { Decoder, Encoder, RecoverResult } from './sink-source.type'; From 09437a9775e645570e73ab7550bcee25530b6421 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Fri, 16 Jan 2026 23:13:17 +0100 Subject: [PATCH 13/31] refactor: wip --- packages/utils/src/lib/profiler/trace-file-utils.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/packages/utils/src/lib/profiler/trace-file-utils.ts b/packages/utils/src/lib/profiler/trace-file-utils.ts index d550ac185..745802d63 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.ts @@ -319,17 +319,15 @@ export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { const processedArgs = decodeDetail(args as { detail: string }); if ('data' in args && args.data && typeof args.data === 'object') { - const result: TraceEvent = { + return { ...rest, args: { ...processedArgs, data: decodeDetail(args.data as { detail: string }), }, - }; - return result; + } as TraceEvent; } - const result: TraceEvent = { ...rest, args: processedArgs }; - return result; + return { ...rest, args: processedArgs } as TraceEvent; } export function encodeTraceEvent({ From 4705394683ee3bfd7c34ffc3d02b98df1649ab48 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Fri, 16 Jan 2026 23:18:55 +0100 Subject: [PATCH 14/31] refactor: wip --- packages/utils/src/lib/profiler/trace-file-utils.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/utils/src/lib/profiler/trace-file-utils.ts b/packages/utils/src/lib/profiler/trace-file-utils.ts index 745802d63..d550ac185 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.ts @@ -319,15 +319,17 @@ export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { const processedArgs = decodeDetail(args as { detail: string }); if ('data' in args && args.data && typeof args.data === 'object') { - return { + const result: TraceEvent = { ...rest, args: { ...processedArgs, data: decodeDetail(args.data as { detail: string }), }, - } as TraceEvent; + }; + return result; } - return { ...rest, args: processedArgs } as TraceEvent; + const result: TraceEvent = { ...rest, args: processedArgs }; + return result; } export function encodeTraceEvent({ From e0c210ea8132c88ab5168592a21f25e4b919d384 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Fri, 16 Jan 2026 23:21:01 +0100 Subject: [PATCH 15/31] refactor: wip --- packages/utils/src/lib/profiler/trace-file-utils.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/utils/src/lib/profiler/trace-file-utils.ts b/packages/utils/src/lib/profiler/trace-file-utils.ts index d550ac185..6fdc3b6b2 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.ts @@ -319,17 +319,17 @@ export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { const processedArgs = decodeDetail(args as { detail: string }); if ('data' in args && args.data && typeof args.data === 'object') { - const result: TraceEvent = { + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions + return { ...rest, args: { ...processedArgs, data: decodeDetail(args.data as { detail: string }), }, - }; - return result; + } as TraceEvent; } - const result: TraceEvent = { ...rest, args: processedArgs }; - return result; + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions + return { ...rest, args: processedArgs } as TraceEvent; } export function encodeTraceEvent({ From 29016b09a5a73b805085983737397bf10148f574 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 20:34:57 +0100 Subject: [PATCH 16/31] refactor: wip --- packages/utils/eslint.config.js | 2 +- packages/utils/mocks/sink.mock.ts | 46 +- packages/utils/src/index.ts | 1 + packages/utils/src/lib/file-sink.ts | 221 ------- packages/utils/src/lib/file-sink.unit.test.ts | 217 ------- .../utils/src/lib/performance-observer.ts | 17 +- .../src/lib/performance-observer.unit.test.ts | 18 +- packages/utils/src/lib/profiler/constants.ts | 5 + .../profiler/file-sink-json-trace.int.test.ts | 225 -------- .../src/lib/profiler/file-sink-json-trace.ts | 203 ------- .../file-sink-json-trace.unit.test.ts | 539 ------------------ packages/utils/src/lib/profiler/profiler.ts | 120 +++- .../src/lib/profiler/profiler.unit.test.ts | 166 ++++-- .../src/lib/profiler/trace-file-utils.ts | 49 +- .../utils/src/lib/profiler/wal-json-trace.ts | 92 +++ .../lib/profiler/wal-json-trace.unit.test.ts | 317 ++++++++++ packages/utils/src/lib/profiler/wal.ts | 356 ++++++++++++ .../utils/src/lib/profiler/wal.unit.test.ts | 338 +++++++++++ packages/utils/src/lib/sink-source.type.ts | 49 -- 19 files changed, 1436 insertions(+), 1545 deletions(-) delete mode 100644 packages/utils/src/lib/file-sink.ts delete mode 100644 packages/utils/src/lib/file-sink.unit.test.ts delete mode 100644 packages/utils/src/lib/profiler/file-sink-json-trace.int.test.ts delete mode 100644 packages/utils/src/lib/profiler/file-sink-json-trace.ts delete mode 100644 packages/utils/src/lib/profiler/file-sink-json-trace.unit.test.ts create mode 100644 packages/utils/src/lib/profiler/wal-json-trace.ts create mode 100644 packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts create mode 100644 packages/utils/src/lib/profiler/wal.ts create mode 100644 packages/utils/src/lib/profiler/wal.unit.test.ts delete mode 100644 packages/utils/src/lib/sink-source.type.ts diff --git a/packages/utils/eslint.config.js b/packages/utils/eslint.config.js index 0a72429fa..ecb88a924 100644 --- a/packages/utils/eslint.config.js +++ b/packages/utils/eslint.config.js @@ -13,7 +13,7 @@ export default tseslint.config( }, }, { - files: ['packages/utils/src/lib/**/file-sink*.ts'], + files: ['packages/utils/src/lib/**/wal*.ts'], rules: { 'n/no-sync': 'off', }, diff --git a/packages/utils/mocks/sink.mock.ts b/packages/utils/mocks/sink.mock.ts index 35a923351..63d601939 100644 --- a/packages/utils/mocks/sink.mock.ts +++ b/packages/utils/mocks/sink.mock.ts @@ -1,29 +1,55 @@ -import type { Sink } from '../src/lib/sink-source.type'; +import { WriteAheadLogFile } from '../src/lib/profiler/wal.js'; +import type { Codec } from '../src/lib/types.js'; -export class MockFileSink implements Sink { - setPath: (filePath: string) => void; - getPath: () => string; +export class MockFileSink implements WriteAheadLogFile { private writtenItems: string[] = []; private closed = false; + constructor(options?: { file?: string; codec?: Codec }) { + const file = options?.file || '/tmp/mock-sink.log'; + const codec = options?.codec || { + encode: (input: string) => input, + decode: (data: string) => data, + }; + } + + #fd: number | null = null; + + get path(): string { + return '/tmp/mock-sink.log'; + } + + getPath(): string { + return this.path; + } + open(): void { - this.closed = false; + this.#fd = 1; // Mock file descriptor } - write(input: string): void { - this.writtenItems.push(input); + append(v: string): void { + this.writtenItems.push(v); } close(): void { + this.#fd = null; this.closed = true; } isClosed(): boolean { - return this.closed; + return this.#fd === null; + } + + recover(): any { + return { + records: this.writtenItems, + errors: [], + partialTail: null, + }; } - encode(input: string): string { - return `${input}-${this.constructor.name}-encoded`; + repack(): void { + // Mock implementation - do nothing } getWrittenItems(): string[] { diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index f019b8055..636ec41e6 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -114,6 +114,7 @@ export { settlePromise, } from './lib/promises.js'; export { generateRandomId } from './lib/random.js'; +export { profiler } from './lib/profiler/profiler.js'; export { CODE_PUSHUP_DOMAIN, CODE_PUSHUP_UNICODE_LOGO, diff --git a/packages/utils/src/lib/file-sink.ts b/packages/utils/src/lib/file-sink.ts deleted file mode 100644 index 81824c4d6..000000000 --- a/packages/utils/src/lib/file-sink.ts +++ /dev/null @@ -1,221 +0,0 @@ -import * as fs from 'node:fs'; -import path from 'node:path'; -import type { Decoder, Encoder, RecoverResult } from './sink-source.type'; - -export type AppendOptions = { - filePath: string; -}; - -export class AppendFileSink { - #fd: number | null = null; - - constructor(private filePath: string) {} - - getPath() { - return this.filePath; - } - setPath(filePath: string) { - if (this.#fd != null) { - throw new Error('Cannot change path while open'); - } - this.filePath = filePath; - } - - open() { - if (this.#fd != null) { - return; - } - - fs.mkdirSync(path.dirname(this.filePath), { recursive: true }); - this.#fd = fs.openSync(this.filePath, 'a'); - } - - write(line: string) { - if (this.#fd == null) { - throw new Error('Sink not opened'); - } - fs.writeSync(this.#fd, `${line}\n`); - } - - close() { - if (this.#fd == null) { - return; - } - fs.closeSync(this.#fd); - this.#fd = null; - } - - flush() { - if (this.#fd != null) { - fs.fsyncSync(this.#fd); - } - } - - isClosed() { - return this.#fd == null; - } - - *readAll(): Iterable { - if (!fs.existsSync(this.filePath)) { - return; - } - - const data = fs.readFileSync(this.filePath, 'utf8'); - yield* data - .split('\n') - .filter(line => line.length > 0) - .map(line => (line.endsWith('\r') ? line.slice(0, -1) : line)); - } - - recover(): { - records: string[]; - errors: { lineNo: number; line: string; error: Error }[]; - partialTail: string | null; - } { - if (!fs.existsSync(this.filePath)) { - return { records: [], errors: [], partialTail: null }; - } - - const data = fs.readFileSync(this.filePath, 'utf8'); - const lines = data.split('\n'); - const endsClean = data.endsWith('\n'); - - const records: string[] = lines - .slice(0, -1) - .map(line => line?.replace(/\r$/, '')) - .filter(Boolean); - - const lastLine = lines.at(-1); - const partialTail = - endsClean || lines.length === 0 || !lastLine - ? null - : lastLine.replace(/\r$/, ''); - - return { records, errors: [], partialTail }; - } - - repack(outputPath = this.filePath) { - if (!this.isClosed()) { - throw new Error('Cannot repack while open'); - } - - const { records, partialTail } = this.recover(); - - if (partialTail == null && outputPath === this.filePath) { - return; - } - fs.mkdirSync(path.dirname(outputPath), { recursive: true }); - fs.writeFileSync(outputPath, records.map(r => `${r}\n`).join('')); - } -} - -export class JsonlFile { - protected sink: AppendFileSink; - - constructor( - filePath: string, - private encode: Encoder, - private decode: Decoder, - ) { - this.sink = new AppendFileSink(filePath); - } - open() { - this.sink.open(); - } - - write(value: T) { - this.sink.write(this.encode(value)); - } - - close() { - this.sink.close(); - } - - *readAll(): Iterable { - yield* [...this.sink.readAll()].map(line => this.decode(line)); - } - - recover(): { - records: T[]; - errors: { lineNo: number; line: string; error: Error }[]; - partialTail: string | null; - } { - const r = this.sink.recover(); - return { - records: r.records.map(l => this.decode(l)), - errors: r.errors, - partialTail: r.partialTail, - }; - } - - repack(outputPath?: string) { - this.sink.repack(outputPath); - } -} - -export const JsonCodec = { - encode: (v: unknown) => JSON.stringify(v), - decode: (v: string) => JSON.parse(v), -}; - -export const StringCodec = { - encode: (v: string) => v, - decode: (v: string) => v, -}; - -export abstract class RecoverableEventSink< - Raw extends Record, - Domain, -> { - protected readonly sink: JsonlFile; - private finalized = false; - - constructor(sink: JsonlFile) { - this.sink = sink; - } - - open() { - this.sink.open(); - } - - write(event: Domain) { - this.sink.write(this.encode(event)); - } - - close() { - this.finalize(); - } - - recover(): RecoverResult { - const { records, errors, partialTail } = this.sink.recover(); - const out: Domain[] = []; - const errs = [...errors]; - - records.forEach((r, i) => { - try { - out.push(this.decode(r)); - } catch (error) { - errs.push({ - lineNo: i + 1, - line: JSON.stringify(r), - error: error as Error, - }); - } - }); - - return { records: out, errors: errs, partialTail }; - } - - finalize() { - if (this.finalized) { - return; - } - this.finalized = true; - this.sink.close(); - this.onFinalize(); - } - - protected abstract encode(domain: Domain): Raw; - protected abstract decode(raw: Raw): Domain; - protected abstract onFinalize(): void; -} diff --git a/packages/utils/src/lib/file-sink.unit.test.ts b/packages/utils/src/lib/file-sink.unit.test.ts deleted file mode 100644 index 91a65cf91..000000000 --- a/packages/utils/src/lib/file-sink.unit.test.ts +++ /dev/null @@ -1,217 +0,0 @@ -import { vol } from 'memfs'; -import * as fs from 'node:fs'; -import { beforeEach, describe, expect, it } from 'vitest'; -import { MEMFS_VOLUME } from '@code-pushup/test-utils'; -import { AppendFileSink, JsonlFile } from './file-sink.js'; - -describe('AppendFileSink', () => { - beforeEach(() => { - vol.reset(); - vol.fromJSON({}, MEMFS_VOLUME); - }); - - it('should create instance with file path', () => { - const sink = new AppendFileSink('/tmp/test-append.txt'); - expect(sink).toBeInstanceOf(AppendFileSink); - }); - - it('open() should be idempotent', () => { - const sink = new AppendFileSink('/tmp/test-append.txt'); - - // First open should work - sink.open(); - expect(sink.isClosed()).toBe(false); - - // Second open should be no-op - sink.open(); - expect(sink.isClosed()).toBe(false); - }); - - it('close() should be idempotent', () => { - const sink = new AppendFileSink('/tmp/test-append.txt'); - sink.open(); - expect(sink.isClosed()).toBe(false); - - // First close - sink.close(); - expect(sink.isClosed()).toBe(true); - - // Second close should be no-op - sink.close(); - expect(sink.isClosed()).toBe(true); - }); - - it('flush() should be idempotent when not opened', () => { - const sink = new AppendFileSink('/tmp/test-append.txt'); - - // Should not throw when not opened - expect(() => sink.flush()).not.toThrow(); - expect(() => sink.flush()).not.toThrow(); - }); - - it('should write lines with newlines', () => { - const sink = new AppendFileSink('/tmp/test-append.txt'); - sink.open(); - - sink.write('line1'); - sink.write('line2'); - - sink.close(); - - const content = fs.readFileSync('/tmp/test-append.txt', 'utf8'); - expect(content).toBe('line1\nline2\n'); - }); - - it('should throw error when writing without opening', () => { - const sink = new AppendFileSink('/tmp/test-append.txt'); - - expect(() => sink.write('test')).toThrow('Sink not opened'); - }); - - it('should read all lines', () => { - vol.fromJSON({ - '/tmp/test-append.txt': 'line1\nline2\nline3\n', - }); - - const sink = new AppendFileSink('/tmp/test-append.txt'); - const lines = [...sink.readAll()]; - - expect(lines).toStrictEqual(['line1', 'line2', 'line3']); - }); - - it('should handle empty files', () => { - vol.fromJSON({ - '/tmp/test-append.txt': '', - }); - - const sink = new AppendFileSink('/tmp/test-append.txt'); - const lines = [...sink.readAll()]; - - expect(lines).toStrictEqual([]); - }); - - it('should recover records from file', () => { - vol.fromJSON({ - '/tmp/test-append.txt': 'line1\nline2\nline3\n', - }); - - const sink = new AppendFileSink('/tmp/test-append.txt'); - const result = sink.recover(); - - expect(result.records).toStrictEqual(['line1', 'line2', 'line3']); - expect(result.errors).toStrictEqual([]); - expect(result.partialTail).toBeNull(); - }); - - it('should recover with partial tail', () => { - vol.fromJSON({ - '/tmp/test-append.txt': 'line1\nline2\nincomplete', - }); - - const sink = new AppendFileSink('/tmp/test-append.txt'); - const result = sink.recover(); - - expect(result.records).toStrictEqual(['line1', 'line2']); - expect(result.errors).toStrictEqual([]); - expect(result.partialTail).toBe('incomplete'); - }); - - it('repack() should be idempotent when file is clean', () => { - vol.fromJSON({ - '/tmp/test-append.txt': 'line1\nline2\nline3\n', - }); - - const sink = new AppendFileSink('/tmp/test-append.txt'); - - // First repack - sink.repack(); - const content1 = fs.readFileSync('/tmp/test-append.txt', 'utf8'); - - // Second repack should be no-op - sink.repack(); - const content2 = fs.readFileSync('/tmp/test-append.txt', 'utf8'); - - expect(content1).toBe(content2); - }); - - it('repack() should clean incomplete lines', () => { - vol.fromJSON({ - '/tmp/test-append.txt': 'line1\nline2\nincomplete', - }); - - const sink = new AppendFileSink('/tmp/test-append.txt'); - sink.repack(); - - const content = fs.readFileSync('/tmp/test-append.txt', 'utf8'); - expect(content).toBe('line1\nline2\n'); - }); - - it('repack() should throw error when file is open', () => { - const sink = new AppendFileSink('/tmp/test-append.txt'); - sink.open(); - - expect(() => sink.repack()).toThrow('Cannot repack while open'); - }); -}); - -describe('JsonlFile', () => { - beforeEach(() => { - vol.reset(); - vol.fromJSON({}, MEMFS_VOLUME); - }); - - it('should create instance with file path and codecs', () => { - const sink = new JsonlFile('/tmp/test.jsonl', JSON.stringify, JSON.parse); - expect(sink).toBeInstanceOf(JsonlFile); - }); - - it('should encode/decode objects to/from JSON lines', () => { - const sink = new JsonlFile('/tmp/test.jsonl', JSON.stringify, JSON.parse); - sink.open(); - - const obj1 = { name: 'test1', value: 42 }; - const obj2 = { name: 'test2', value: 24 }; - - sink.write(obj1); - sink.write(obj2); - sink.close(); - - const content = fs.readFileSync('/tmp/test.jsonl', 'utf8'); - expect(content).toBe(`${JSON.stringify(obj1)}\n${JSON.stringify(obj2)}\n`); - - const lines = [...sink.readAll()]; - expect(lines).toStrictEqual([obj1, obj2]); - }); - - it('recover() should decode raw JSON lines back to objects', () => { - vol.fromJSON({ - '/tmp/test.jsonl': - '{"name":"test1","value":42}\n{"name":"test2","value":24}\n', - }); - - const sink = new JsonlFile('/tmp/test.jsonl', JSON.stringify, JSON.parse); - const result = sink.recover(); - - expect(result.records).toStrictEqual([ - { name: 'test1', value: 42 }, - { name: 'test2', value: 24 }, - ]); - expect(result.errors).toStrictEqual([]); - expect(result.partialTail).toBeNull(); - }); - - it('repack() should rewrite file with clean JSON lines', () => { - vol.fromJSON({ - '/tmp/test.jsonl': - '{"name":"test1","value":42}\n{"name":"test2","value":24}\n', - }); - - const sink = new JsonlFile('/tmp/test.jsonl', JSON.stringify, JSON.parse); - sink.repack(); - - const content = fs.readFileSync('/tmp/test.jsonl', 'utf8'); - expect(content).toBe( - '{"name":"test1","value":42}\n{"name":"test2","value":24}\n', - ); - }); -}); diff --git a/packages/utils/src/lib/performance-observer.ts b/packages/utils/src/lib/performance-observer.ts index ba1b973d5..96bbf9b25 100644 --- a/packages/utils/src/lib/performance-observer.ts +++ b/packages/utils/src/lib/performance-observer.ts @@ -4,31 +4,24 @@ import { type PerformanceObserverEntryList, performance, } from 'node:perf_hooks'; -import type { - Buffered, - EncoderInterface, - Observer, - Sink, -} from './sink-source.type'; +import type { WriteAheadLogFile } from './profiler/wal.js'; const OBSERVED_TYPES = ['mark', 'measure'] as const; type ObservedEntryType = 'mark' | 'measure'; export const DEFAULT_FLUSH_THRESHOLD = 20; export type PerformanceObserverOptions = { - sink: Sink; + sink: WriteAheadLogFile; encode: (entry: PerformanceEntry) => T[]; buffered?: boolean; flushThreshold?: number; }; -export class PerformanceObserverSink - implements Observer, Buffered, EncoderInterface -{ +export class PerformanceObserverSink { #encode: (entry: PerformanceEntry) => T[]; #buffered: boolean; #flushThreshold: number; - #sink: Sink; + #sink: WriteAheadLogFile; #observer: PerformanceObserver | undefined; #pendingCount = 0; @@ -89,7 +82,7 @@ export class PerformanceObserverSink try { fresh .flatMap(entry => this.encode(entry)) - .forEach(item => this.#sink.write(item)); + .forEach(item => this.#sink.append(item)); this.#written.set(t, written + fresh.length); } catch (error) { diff --git a/packages/utils/src/lib/performance-observer.unit.test.ts b/packages/utils/src/lib/performance-observer.unit.test.ts index a39c424cc..c4586e27d 100644 --- a/packages/utils/src/lib/performance-observer.unit.test.ts +++ b/packages/utils/src/lib/performance-observer.unit.test.ts @@ -13,6 +13,7 @@ import { type PerformanceObserverOptions, PerformanceObserverSink, } from './performance-observer.js'; +import type { Codec } from './types.js'; describe('PerformanceObserverSink', () => { let encode: MockedFunction<(entry: PerformanceEntry) => string[]>; @@ -255,12 +256,23 @@ describe('PerformanceObserverSink', () => { }); it('flush wraps sink write errors with descriptive error message', () => { - const failingSink = { - write: vi.fn(() => { + const failingCodec: Codec = { + encode: () => { throw new Error('Sink write failed'); - }), + }, + decode: (data: string) => data, }; + const failingSink = new MockFileSink({ + file: '/test/path', + codec: failingCodec, + }); + + // Mock the append method to throw + vi.spyOn(failingSink, 'append').mockImplementation(() => { + throw new Error('Sink write failed'); + }); + const observer = new PerformanceObserverSink({ sink: failingSink as any, encode, diff --git a/packages/utils/src/lib/profiler/constants.ts b/packages/utils/src/lib/profiler/constants.ts index 4e149583b..8e773842f 100644 --- a/packages/utils/src/lib/profiler/constants.ts +++ b/packages/utils/src/lib/profiler/constants.ts @@ -1 +1,6 @@ export const PROFILER_ENABLED_ENV_VAR = 'CP_PROFILING'; +export const PROFILER_COORDINATOR_FLAG_ENV_VAR = 'CP_PROFILER_COORDINATOR'; +export const PROFILER_ORIGIN_PID_ENV_VAR = 'CP_PROFILER_ORIGIN_PID'; +export const PROFILER_DIRECTORY_ENV_VAR = 'CP_PROFILER_DIR'; +export const PROFILER_BASE_NAME = 'trace'; +export const PROFILER_DIRECTORY = './tmp/profiles'; diff --git a/packages/utils/src/lib/profiler/file-sink-json-trace.int.test.ts b/packages/utils/src/lib/profiler/file-sink-json-trace.int.test.ts deleted file mode 100644 index 7331dc7da..000000000 --- a/packages/utils/src/lib/profiler/file-sink-json-trace.int.test.ts +++ /dev/null @@ -1,225 +0,0 @@ -import * as fs from 'node:fs'; -import * as os from 'node:os'; -import path from 'node:path'; -import { afterAll, beforeAll, describe, expect, it } from 'vitest'; -import { teardownTestFolder } from '@code-pushup/test-utils'; -import { FileSinkJsonTrace } from './file-sink-json-trace.js'; -import type { CompleteEvent, TraceEvent } from './trace-file.type'; - -describe('TraceFileSink integration', () => { - const baseDir = path.join(os.tmpdir(), 'file-sink-json-trace-int-tests'); - const traceJsonPath = path.join(baseDir, 'test-data.json'); - const traceJsonlPath = path.join(baseDir, 'test-data.jsonl'); - - beforeAll(async () => { - await fs.promises.mkdir(baseDir, { recursive: true }); - }); - - beforeEach(async () => { - try { - await fs.promises.unlink(traceJsonPath); - } catch { - // File doesn't exist, which is fine - } - try { - await fs.promises.unlink(traceJsonlPath); - } catch { - // File doesn't exist, which is fine - } - }); - - afterAll(async () => { - await teardownTestFolder(baseDir); - }); - - describe('file operations', () => { - const testEvents: TraceEvent[] = [ - { name: 'navigationStart', ts: 100, ph: 'I', cat: 'blink.user_timing' }, - { - name: 'loadEventStart', - ts: 200, - ph: 'I', - cat: 'blink.user_timing', - args: { data: { url: 'https://example.com' } }, - }, - { - name: 'loadEventEnd', - ts: 250, - ph: 'I', - cat: 'blink.user_timing', - args: { detail: { duration: 50 } }, - }, - ]; - - it('should write and read trace events', async () => { - const sink = new FileSinkJsonTrace({ - filename: 'test-data', - directory: baseDir, - }); - - // Open and write data - sink.open(); - testEvents.forEach(event => sink.write(event as any)); - sink.finalize(); - - expect(fs.existsSync(traceJsonPath)).toBe(true); - expect(fs.existsSync(traceJsonlPath)).toBe(true); - - const jsonContent = fs.readFileSync(traceJsonPath, 'utf8'); - const traceData = JSON.parse(jsonContent); - - expect(traceData.metadata.source).toBe('DevTools'); - expect(traceData.metadata.dataOrigin).toBe('TraceEvents'); - expect(Array.isArray(traceData.traceEvents)).toBe(true); - - // Should have preamble events + user events + complete event - expect(traceData.traceEvents.length).toBeGreaterThan(testEvents.length); - - // Check that our events are included - const userEvents = traceData.traceEvents.filter((e: any) => - testEvents.some(testEvent => testEvent.name === e.name), - ); - expect(userEvents).toHaveLength(testEvents.length); - }); - - it('should recover events from JSONL file', async () => { - const sink = new FileSinkJsonTrace({ - filename: 'test-data', - directory: baseDir, - }); - sink.open(); - testEvents.forEach(event => sink.write(event as any)); - sink.close(); - - const recovered = sink.recover(); - expect(recovered.records).toStrictEqual(testEvents); - expect(recovered.errors).toStrictEqual([]); - expect(recovered.partialTail).toBeNull(); - }); - - it('should handle empty trace files', async () => { - const sink = new FileSinkJsonTrace({ - filename: 'empty-test', - directory: baseDir, - }); - sink.open(); - sink.finalize(); - - const emptyJsonPath = path.join(baseDir, 'empty-test.json'); - expect(fs.existsSync(emptyJsonPath)).toBe(true); - - const jsonContent = fs.readFileSync(emptyJsonPath, 'utf8'); - const traceData = JSON.parse(jsonContent); - - expect(traceData.metadata.source).toBe('DevTools'); - // Should have at least preamble and complete events - expect(traceData.traceEvents.length).toBeGreaterThanOrEqual(2); - }); - - it('should handle metadata in trace files', async () => { - const metadata = { - version: '1.0.0', - environment: 'test', - customData: { key: 'value' }, - }; - - const sink = new FileSinkJsonTrace({ - filename: 'metadata-test', - directory: baseDir, - metadata, - }); - sink.open(); - sink.write({ name: 'test-event', ts: 100, ph: 'I' } as any); - sink.finalize(); - - const metadataJsonPath = path.join(baseDir, 'metadata-test.json'); - const jsonContent = fs.readFileSync(metadataJsonPath, 'utf8'); - const traceData = JSON.parse(jsonContent); - - expect(traceData.metadata.version).toBe('1.0.0'); - expect(traceData.metadata.environment).toBe('test'); - expect(traceData.metadata.customData).toStrictEqual({ key: 'value' }); - expect(traceData.metadata.source).toBe('DevTools'); - }); - - // eslint-disable-next-line vitest/max-nested-describe - describe('edge cases', () => { - it('should handle single event traces', async () => { - const singleEvent: TraceEvent = { - name: 'singleEvent', - ts: 123, - ph: 'I', - cat: 'test', - }; - - const sink = new FileSinkJsonTrace({ - filename: 'single-event-test', - directory: baseDir, - }); - sink.open(); - sink.write(singleEvent as any); - sink.finalize(); - - const singleJsonPath = path.join(baseDir, 'single-event-test.json'); - const jsonContent = fs.readFileSync(singleJsonPath, 'utf8'); - const traceData = JSON.parse(jsonContent); - - expect( - traceData.traceEvents.some((e: any) => e.name === 'singleEvent'), - ).toBe(true); - }); - - it('should handle events with complex args', async () => { - const complexEvent: CompleteEvent = { - name: 'complexEvent', - ts: 456, - ph: 'X', - cat: 'test', - args: { - detail: { nested: { data: [1, 2, 3] } }, - data: { url: 'https://example.com', size: 1024 }, - }, - }; - - const sink = new FileSinkJsonTrace({ - filename: 'complex-args-test', - directory: baseDir, - }); - sink.open(); - sink.write(complexEvent as any); - sink.finalize(); - - const complexJsonPath = path.join(baseDir, 'complex-args-test.json'); - const jsonContent = fs.readFileSync(complexJsonPath, 'utf8'); - const traceData = JSON.parse(jsonContent); - - const eventInTrace = traceData.traceEvents.find( - (e: any) => e.name === 'complexEvent', - ); - expect(eventInTrace).toBeDefined(); - expect(eventInTrace.args.detail).toStrictEqual({ - nested: { data: [1, 2, 3] }, - }); - expect(eventInTrace.args.data.url).toBe('https://example.com'); - }); - - it('should handle non-existent directories gracefully', async () => { - const nonExistentDir = path.join(baseDir, 'non-existent'); - const sink = new FileSinkJsonTrace({ - filename: 'non-existent-dir-test', - directory: nonExistentDir, - }); - - sink.open(); - sink.write({ name: 'test', ts: 100, ph: 'I' } as any); - sink.finalize(); - - const jsonPath = path.join( - nonExistentDir, - 'non-existent-dir-test.json', - ); - expect(fs.existsSync(jsonPath)).toBe(true); - }); - }); - }); -}); diff --git a/packages/utils/src/lib/profiler/file-sink-json-trace.ts b/packages/utils/src/lib/profiler/file-sink-json-trace.ts deleted file mode 100644 index 27312efdb..000000000 --- a/packages/utils/src/lib/profiler/file-sink-json-trace.ts +++ /dev/null @@ -1,203 +0,0 @@ -import * as fs from 'node:fs'; -import path from 'node:path'; -import { performance } from 'node:perf_hooks'; -import { JsonlFile } from '../file-sink.js'; -import { - decodeTraceEvent, - getCompleteEvent, - getInstantEventTracingStartedInBrowser, - getTraceFile, - getTraceMetadata, -} from './trace-file-utils.js'; -import type { - InstantEvent, - SpanEvent, - TraceEvent, - TraceEventRaw, - UserTimingTraceEvent, -} from './trace-file.type.js'; - -const TRACE_START_MARGIN_NAME = '[trace padding start]'; -const TRACE_END_MARGIN_NAME = '[trace padding end]'; -const TRACE_MARGIN_MS = 1000; -const TRACE_MARGIN_DURATION_MS = 20; - -export type FinalizeTraceFileOptions = { - startTime?: string | Date; - marginMs?: number; - marginDurMs?: number; -}; - -// eslint-disable-next-line max-lines-per-function, complexity -export function finalizeTraceFile( - events: (SpanEvent | InstantEvent)[], - outputPath: string, - metadata?: Record, - options?: FinalizeTraceFileOptions, -): void { - if (fs.existsSync(outputPath)) { - try { - const content = fs.readFileSync(outputPath, 'utf8'); - if (content.trim().length > 0) { - return; - } - } catch { - // Ignore errors when checking existing file content - } - } - if (events.length === 0) { - const startTime = options?.startTime - ? typeof options.startTime === 'string' - ? options.startTime - : options.startTime.toISOString() - : new Date().toISOString(); - - const startDate = startTime ? new Date(startTime) : undefined; - - // Even for empty traces, add padding events for consistency - const marginMs = options?.marginMs ?? TRACE_MARGIN_MS; - const marginDurMs = options?.marginDurMs ?? TRACE_MARGIN_DURATION_MS; - const fallbackTs = performance.now(); - const startTs = fallbackTs - marginMs; - const endTs = fallbackTs + marginMs; - - const traceEvents: TraceEvent[] = [ - getInstantEventTracingStartedInBrowser({ ts: startTs, url: outputPath }), - getCompleteEvent({ - name: TRACE_START_MARGIN_NAME, - ts: startTs, - dur: marginDurMs, - }), - getCompleteEvent({ - name: TRACE_END_MARGIN_NAME, - ts: endTs, - dur: marginDurMs, - }), - ]; - - const traceContainer = { - ...getTraceFile({ - traceEvents, - startTime, - }), - metadata: getTraceMetadata(startDate, metadata), - }; - - fs.writeFileSync(outputPath, JSON.stringify(traceContainer), 'utf8'); - return; - } - - const marginMs = options?.marginMs ?? TRACE_MARGIN_MS; - const marginDurMs = options?.marginDurMs ?? TRACE_MARGIN_DURATION_MS; - - const sortedEvents = [...events].sort((a, b) => a.ts - b.ts); - const fallbackTs = performance.now(); - const firstTs: number = sortedEvents.at(0)?.ts ?? fallbackTs; - const lastTs: number = sortedEvents.at(-1)?.ts ?? fallbackTs; - - const startTs = firstTs - marginMs; - const endTs = lastTs + marginMs; - - const traceEvents: TraceEvent[] = [ - getInstantEventTracingStartedInBrowser({ ts: startTs, url: outputPath }), - getCompleteEvent({ - name: TRACE_START_MARGIN_NAME, - ts: startTs, - dur: marginDurMs, - }), - ...sortedEvents, - getCompleteEvent({ - name: TRACE_END_MARGIN_NAME, - ts: endTs, - dur: marginDurMs, - }), - ]; - - const startTime = options?.startTime - ? typeof options.startTime === 'string' - ? options.startTime - : options.startTime.toISOString() - : new Date().toISOString(); - - const startDate = startTime ? new Date(startTime) : undefined; - - const traceContainer = { - ...getTraceFile({ - traceEvents, - startTime, - }), - metadata: getTraceMetadata(startDate, metadata), - }; - - fs.writeFileSync(outputPath, JSON.stringify(traceContainer), 'utf8'); -} - -export type TraceFileSinkOptions = { - filename: string; - directory: string; - metadata?: Record; - startTime?: string | Date; - marginMs?: number; - marginDurMs?: number; -}; - -export class FileSinkJsonTrace extends JsonlFile { - #metadata?: Record; - #startTime?: string | Date; - #marginMs?: number; - #marginDurMs?: number; - #closed = false; - - constructor(opts: TraceFileSinkOptions) { - const filePath = path.join(opts.directory, `${opts.filename}.jsonl`); - super(filePath, JSON.stringify, JSON.parse); - - this.#metadata = opts.metadata; - this.#startTime = opts.startTime; - this.#marginMs = opts.marginMs; - this.#marginDurMs = opts.marginDurMs; - } - #getFilePathForExt(ext: 'json' | 'jsonl'): string { - return path.join( - path.dirname(this.sink.getPath()), - `${path.parse(this.sink.getPath()).name}.${ext}`, - ); - } - - close(): void { - if (this.#closed) { - return; - } - this.finalize(); - this.#closed = true; - } - - finalize(): void { - if (this.#closed) { - return; - } - this.#closed = true; - - // Close the sink if it's open - if (!this.sink.isClosed()) { - this.sink.close(); - } - - const { records } = this.recover(); - const outputPath = this.#getFilePathForExt('json'); - - // Ensure output directory exists - fs.mkdirSync(path.dirname(outputPath), { recursive: true }); - - finalizeTraceFile( - records.map(r => decodeTraceEvent(r) as UserTimingTraceEvent), - outputPath, - this.#metadata, - { - startTime: this.#startTime, - marginMs: this.#marginMs, - marginDurMs: this.#marginDurMs, - }, - ); - } -} diff --git a/packages/utils/src/lib/profiler/file-sink-json-trace.unit.test.ts b/packages/utils/src/lib/profiler/file-sink-json-trace.unit.test.ts deleted file mode 100644 index ad449f96a..000000000 --- a/packages/utils/src/lib/profiler/file-sink-json-trace.unit.test.ts +++ /dev/null @@ -1,539 +0,0 @@ -import { vol } from 'memfs'; -import * as fs from 'node:fs'; -import path from 'node:path'; -import { beforeEach, describe, expect, it } from 'vitest'; -import { MEMFS_VOLUME } from '@code-pushup/test-utils'; -import { - FileSinkJsonTrace, - finalizeTraceFile, -} from './file-sink-json-trace.js'; -import { - decodeTraceEvent, - encodeTraceEvent, - getTraceMetadata, -} from './trace-file-utils.js'; -import type { - InstantEvent, - TraceEvent, - TraceEventRaw, -} from './trace-file.type'; - -describe('decodeTraceEvent', () => { - it('should return event without args if no args present', () => { - const event: TraceEventRaw = { name: 'test', ts: 123 }; - expect(decodeTraceEvent(event)).toStrictEqual(event); - }); - - it('should decode args with detail property', () => { - const event: TraceEventRaw = { - name: 'test', - ts: 123, - args: { detail: '{"key":"value"}' }, - }; - expect(decodeTraceEvent(event)).toStrictEqual({ - name: 'test', - ts: 123, - args: { detail: { key: 'value' } }, - }); - }); - - it('should decode nested data.detail property', () => { - const event: TraceEventRaw = { - name: 'test', - ts: 123, - args: { data: { detail: '{"nested":"value"}' } }, - }; - expect(decodeTraceEvent(event)).toStrictEqual({ - name: 'test', - ts: 123, - args: { data: { detail: { nested: 'value' } } }, - }); - }); - - it('should handle invalid JSON in detail', () => { - const event: TraceEventRaw = { - name: 'test', - ts: 123, - args: { detail: 'invalid json' }, - }; - expect(() => decodeTraceEvent(event)).toThrow('Unexpected token'); - }); -}); - -describe('encodeTraceEvent', () => { - it('should return event without args if no args present', () => { - const event: TraceEventRaw = { name: 'test', ts: 123 }; - expect(encodeTraceEvent(event)).toStrictEqual(event); - }); - - it('should encode args with detail property', () => { - const event: TraceEventRaw = { - name: 'test', - ts: 123, - args: { detail: { key: 'value' } }, - }; - expect(encodeTraceEvent(event)).toStrictEqual({ - name: 'test', - ts: 123, - args: { detail: '{"key":"value"}' }, - }); - }); - - it('should encode nested data.detail property', () => { - const event: TraceEventRaw = { - name: 'test', - ts: 123, - args: { data: { detail: { nested: 'value' } } }, - }; - expect(encodeTraceEvent(event)).toStrictEqual({ - name: 'test', - ts: 123, - args: { data: { detail: '{"nested":"value"}' } }, - }); - }); - - it('should handle non-serializable detail', () => { - const circular: any = {}; - // eslint-disable-next-line functional/immutable-data - circular.self = circular; - const event: TraceEventRaw = { - name: 'test', - ts: 123, - args: { detail: circular }, - }; - expect(() => encodeTraceEvent(event)).toThrow( - 'Converting circular structure to JSON', - ); - }); -}); - -describe('finalizeTraceFile', () => { - beforeEach(() => { - vol.reset(); - vol.fromJSON({}, MEMFS_VOLUME); - fs.mkdirSync('/tmp', { recursive: true }); - }); - - it('should create trace file with events', () => { - const events: TraceEvent[] = [ - { name: 'event1', ts: 100, ph: 'I' }, - { name: 'event2', ts: 200, ph: 'X', args: { dur: 50 } }, - ]; - const outputPath = '/tmp/test-trace.json'; - - finalizeTraceFile(events as any, outputPath); - - expect(fs.existsSync(outputPath)).toBe(true); - const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); - expect(content.metadata.source).toBe('DevTools'); - expect(content.traceEvents).toHaveLength(5); // preamble (start + complete) + events + complete - }); - - it('should handle empty events array', () => { - const events: TraceEvent[] = []; - const outputPath = '/tmp/empty-trace.json'; - - finalizeTraceFile(events as any, outputPath); - - expect(fs.existsSync(outputPath)).toBe(true); // File created with margin events even for empty events - const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); - expect(content.traceEvents).toHaveLength(3); // Should have padding events even for empty traces due to bug - }); - - it('should sort events by timestamp', () => { - const events: TraceEvent[] = [ - { name: 'event2', ts: 200, ph: 'I' }, - { name: 'event1', ts: 100, ph: 'I' }, - ]; - const outputPath = '/tmp/sorted-trace.json'; - - finalizeTraceFile(events as any, outputPath); - - const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); - const eventNames = content.traceEvents - .filter((e: any) => e.name.startsWith('event')) - .map((e: any) => e.name); - expect(eventNames).toStrictEqual(['event1', 'event2']); - }); - - it('should use configurable margins', () => { - const events: TraceEvent[] = [{ name: 'event1', ts: 1000, ph: 'I' }]; - const outputPath = '/tmp/custom-margin-trace.json'; - - finalizeTraceFile( - events as any, - outputPath, - {}, - { marginMs: 500, marginDurMs: 10 }, - ); - - const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); - expect(content.traceEvents).toHaveLength(4); // start tracing + start margin + event + end margin - - // Check start margin timestamp and duration - const startMargin = content.traceEvents.find( - (e: any) => e.name === '[trace padding start]', - ); - expect(startMargin.ts).toBe(500); // 1000 - 500 due to bug - expect(startMargin.dur).toBe(10); - - // Check end margin timestamp and duration - const endMargin = content.traceEvents.find( - (e: any) => e.name === '[trace padding end]', - ); - expect(endMargin.ts).toBe(1500); // 1000 + 500 - expect(endMargin.dur).toBe(10); - }); - - it('should use deterministic startTime', () => { - const events: TraceEvent[] = [{ name: 'event1', ts: 1000, ph: 'I' }]; - const outputPath = '/tmp/deterministic-trace.json'; - const fixedTime = '2023-01-15T10:30:00.000Z'; - - finalizeTraceFile(events as any, outputPath, {}, { startTime: fixedTime }); - - const content = JSON.parse(fs.readFileSync(outputPath, 'utf8')); - expect(content.metadata.startTime).toBe(fixedTime); - }); -}); - -describe('TraceFileSink', () => { - beforeEach(() => { - vol.fromJSON( - { - '/tmp': null, - }, - MEMFS_VOLUME, - ); - }); - - it('should create trace file sink with default options', () => { - const sink = new FileSinkJsonTrace({ - filename: 'test', - directory: '/tmp', - }); - expect(sink).toBeDefined(); - // Test that the sink can be opened and closed without errors - sink.open(); - sink.close(); - }); - - it('should create trace file sink with custom directory', () => { - const sink = new FileSinkJsonTrace({ - filename: 'test', - directory: path.join(MEMFS_VOLUME, 'custom'), - }); - expect(sink).toBeDefined(); - // Test that the sink can be opened and closed without errors - sink.open(); - sink.close(); - }); - - it('should handle file operations with trace events', () => { - const sink = new FileSinkJsonTrace({ - filename: 'trace-test', - directory: path.join(MEMFS_VOLUME, 'trace-test'), - }); - const event1: InstantEvent = { name: 'mark1', ts: 100, ph: 'I' }; - const event2: InstantEvent = { name: 'mark2', ts: 200, ph: 'I' }; - sink.open(); - sink.write(event1); - sink.write(event2); - sink.close(); - - expect( - fs.existsSync(path.join(MEMFS_VOLUME, 'trace-test', 'trace-test.jsonl')), - ).toBe(true); - expect( - fs.existsSync(path.join(MEMFS_VOLUME, 'trace-test', 'trace-test.json')), - ).toBe(true); // close() now finalizes - - const recovered = sink.recover(); - expect(recovered.records).toStrictEqual([event1, event2]); - }); - - it('should create trace file on finalize', () => { - const sink = new FileSinkJsonTrace({ - filename: 'finalize-test', - directory: path.join(MEMFS_VOLUME, 'finalize-test'), - }); - - const event: InstantEvent = { name: 'test-event', ts: 150, ph: 'I' }; - sink.open(); - sink.write(event); - sink.finalize(); - - expect( - fs.existsSync( - path.join(MEMFS_VOLUME, 'finalize-test', 'finalize-test.json'), - ), - ).toBe(true); - const content = JSON.parse( - fs.readFileSync( - path.join(MEMFS_VOLUME, 'finalize-test', 'finalize-test.json'), - 'utf8', - ), - ); - expect(content.metadata.source).toBe('DevTools'); - expect(content.traceEvents.some((e: any) => e.name === 'test-event')).toBe( - true, - ); - }); - - it('should handle metadata in finalize', () => { - const metadata = { customField: 'value', version: '1.0' }; - const sink = new FileSinkJsonTrace({ - filename: 'metadata-test', - directory: path.join(MEMFS_VOLUME, 'metadata-test'), - metadata, - }); - sink.open(); - sink.write({ name: 'event', ts: 100, ph: 'I' } as any); - sink.finalize(); - - const content = JSON.parse( - fs.readFileSync( - path.join(MEMFS_VOLUME, 'metadata-test', 'metadata-test.json'), - 'utf8', - ), - ); - expect(content.metadata.customField).toBe('value'); - expect(content.metadata.version).toBe('1.0'); - }); - - it('should use configurable options in TraceFileSink', () => { - const testDir = path.join(MEMFS_VOLUME, 'options-test'); - const sink = new FileSinkJsonTrace({ - filename: 'options-test', - directory: testDir, - marginMs: 200, - marginDurMs: 5, - startTime: '2023-12-25T12:00:00.000Z', - }); - sink.open(); - sink.write({ name: 'event', ts: 1000, ph: 'I' } as any); - sink.finalize(); - - const content = JSON.parse( - fs.readFileSync(path.join(testDir, 'options-test.json'), 'utf8'), - ); - expect(content.metadata.startTime).toBe('2023-12-25T12:00:00.000Z'); - - const startMargin = content.traceEvents.find( - (e: any) => e.name === '[trace padding start]', - ); - expect(startMargin.ts).toBe(800); // 1000 - 200 - expect(startMargin.dur).toBe(5); - - const endMargin = content.traceEvents.find( - (e: any) => e.name === '[trace padding end]', - ); - expect(endMargin.ts).toBe(1200); // 1000 + 200 - expect(endMargin.dur).toBe(5); - }); - - it('should create file on finalize even when no events written', () => { - const testDir = path.join(MEMFS_VOLUME, 'trace-test'); - const sink = new FileSinkJsonTrace({ - filename: 'empty-test', - directory: testDir, - }); - sink.open(); // Ensure directory is created - sink.finalize(); - - expect(fs.existsSync(path.join(testDir, 'empty-test.json'))).toBe(true); // File created with margin events even for empty events - }); -}); - -describe('getTraceMetadata', () => { - it('should use provided startDate when given', () => { - const startDate = new Date('2023-01-15T10:30:00.000Z'); - const metadata = { customField: 'value' }; - - const result = getTraceMetadata(startDate, metadata); - - expect(result).toStrictEqual({ - source: 'DevTools', - startTime: '2023-01-15T10:30:00.000Z', - hardwareConcurrency: 1, - dataOrigin: 'TraceEvents', - customField: 'value', - }); - }); - - it('should use current date when startDate is undefined', () => { - const beforeTest = new Date(); - const metadata = { version: '1.0' }; - - const result = getTraceMetadata(undefined, metadata); - - const afterTest = new Date(); - expect(result.source).toBe('DevTools'); - expect(result.hardwareConcurrency).toBe(1); - expect(result.dataOrigin).toBe('TraceEvents'); - - // Verify startTime is a valid ISO string between test execution - const startTime = new Date(result.startTime); - expect(startTime.getTime()).toBeGreaterThanOrEqual(beforeTest.getTime()); - expect(startTime.getTime()).toBeLessThanOrEqual(afterTest.getTime()); - }); - - it('should use current date when startDate is null', () => { - const beforeTest = new Date(); - const metadata = { environment: 'test' }; - - const result = getTraceMetadata(undefined, metadata); - - const afterTest = new Date(); - expect(result.source).toBe('DevTools'); - expect(result.hardwareConcurrency).toBe(1); - expect(result.dataOrigin).toBe('TraceEvents'); - - // Verify startTime is a valid ISO string between test execution - const startTime = new Date(result.startTime); - expect(startTime.getTime()).toBeGreaterThanOrEqual(beforeTest.getTime()); - expect(startTime.getTime()).toBeLessThanOrEqual(afterTest.getTime()); - }); - - it('should handle empty metadata', () => { - const startDate = new Date('2023-12-25T00:00:00.000Z'); - - const result = getTraceMetadata(startDate); - - expect(result).toStrictEqual({ - source: 'DevTools', - startTime: '2023-12-25T00:00:00.000Z', - hardwareConcurrency: 1, - dataOrigin: 'TraceEvents', - }); - }); - - it('should handle both startDate and metadata undefined', () => { - const beforeTest = new Date(); - - const result = getTraceMetadata(); - - const afterTest = new Date(); - expect(result.source).toBe('DevTools'); - expect(result.hardwareConcurrency).toBe(1); - expect(result.dataOrigin).toBe('TraceEvents'); - - // Verify startTime is a valid ISO string between test execution - const startTime = new Date(result.startTime); - expect(startTime.getTime()).toBeGreaterThanOrEqual(beforeTest.getTime()); - expect(startTime.getTime()).toBeLessThanOrEqual(afterTest.getTime()); - }); -}); - -describe('FileSinkJsonTrace', () => { - beforeEach(() => { - vol.reset(); - vol.fromJSON({}, MEMFS_VOLUME); - }); - - it('close() should be idempotent', () => { - const sink = new FileSinkJsonTrace({ - filename: 'test-trace', - directory: path.join(MEMFS_VOLUME, 'test-trace'), - }); - - sink.open(); - sink.write({ - name: 'test-event', - ts: 100, - ph: 'I', - cat: 'test', - } as any); - - // First close should finalize - sink.close(); - - // Check that JSON file was created - expect( - fs.existsSync(path.join(MEMFS_VOLUME, 'test-trace', 'test-trace.json')), - ).toBe(true); - - // Second close should be no-op (idempotent) - expect(() => sink.close()).not.toThrow(); - }); - - it('finalize() should be idempotent', () => { - const sink = new FileSinkJsonTrace({ - filename: 'test-trace', - directory: path.join(MEMFS_VOLUME, 'test-trace'), - }); - - sink.open(); - sink.write({ - name: 'test-event', - ts: 100, - ph: 'I', - cat: 'test', - } as any); - - // First finalize - sink.finalize(); - expect( - fs.existsSync(path.join(MEMFS_VOLUME, 'test-trace', 'test-trace.json')), - ).toBe(true); - - const content1 = fs.readFileSync( - path.join(MEMFS_VOLUME, 'test-trace', 'test-trace.json'), - 'utf8', - ); - - // Second finalize should be no-op - sink.finalize(); - - const content2 = fs.readFileSync( - path.join(MEMFS_VOLUME, 'test-trace', 'test-trace.json'), - 'utf8', - ); - expect(content1).toBe(content2); - }); - - it('finalizeTraceFile should be idempotent', () => { - const events = [ - { - name: 'test-event', - ts: 100, - ph: 'I' as const, - cat: 'test', - }, - ]; - - const filePath = path.join(MEMFS_VOLUME, 'idempotent-test.json'); - - // Ensure directory exists - fs.mkdirSync(path.dirname(filePath), { recursive: true }); - - // First call should create file - finalizeTraceFile(events, filePath); - expect(fs.existsSync(filePath)).toBe(true); - - const content1 = fs.readFileSync(filePath, 'utf8'); - - // Second call should not overwrite (idempotent) - finalizeTraceFile(events, filePath); - - const content2 = fs.readFileSync(filePath, 'utf8'); - expect(content1).toBe(content2); - }); - - it('finalizeTraceFile should handle empty events array', () => { - const filePath = path.join(MEMFS_VOLUME, 'empty-test.json'); - - // Ensure directory exists - fs.mkdirSync(path.dirname(filePath), { recursive: true }); - - finalizeTraceFile([], filePath); - expect(fs.existsSync(filePath)).toBe(true); - - const content = fs.readFileSync(filePath, 'utf8'); - const parsed = JSON.parse(content); - - expect(parsed.traceEvents).toHaveLength(3); // Should have padding events - expect(parsed.displayTimeUnit).toBe('ms'); - expect(parsed.metadata.source).toBe('DevTools'); - }); -}); diff --git a/packages/utils/src/lib/profiler/profiler.ts b/packages/utils/src/lib/profiler/profiler.ts index 130e28c44..54c9b33b5 100644 --- a/packages/utils/src/lib/profiler/profiler.ts +++ b/packages/utils/src/lib/profiler/profiler.ts @@ -1,5 +1,9 @@ +import type { PerformanceEntry } from 'node:perf_hooks'; import process from 'node:process'; +import { threadId } from 'node:worker_threads'; import { isEnvVarEnabled } from '../env.js'; +import { installExitHandlers } from '../exit-process.js'; +import { PerformanceObserverSink } from '../performance-observer.js'; import { type ActionTrackConfigs, type MeasureCtxOptions, @@ -14,25 +18,16 @@ import type { DevToolsColor, EntryMeta, } from '../user-timing-extensibility-api.type.js'; -import { PROFILER_ENABLED_ENV_VAR } from './constants.js'; - -/** - * Configuration options for creating a Profiler instance. - * - * @template T - Record type defining available track names and their configurations - */ -type ProfilerMeasureOptions = - MeasureCtxOptions & { - /** Custom track configurations that will be merged with default settings */ - tracks?: Record>; - /** Whether profiling should be enabled (defaults to CP_PROFILING env var) */ - enabled?: boolean; - }; - -/** - * Options for creating a performance marker. - */ -export type MarkerOptions = EntryMeta & { color?: DevToolsColor }; +import { + PROFILER_DIRECTORY, + PROFILER_ENABLED_ENV_VAR, + PROFILER_ORIGIN_PID_ENV_VAR, +} from './constants.js'; +import { entryToTraceEvents } from './trace-file-utils.js'; +import type { UserTimingTraceEvent } from './trace-file.type.js'; +import { traceEventWalFormat } from './wal-json-trace.js'; +import { ShardedWal, WriteAheadLogFile } from './wal.js'; +import type { WalFormat } from './wal.js'; /** * Options for configuring a Profiler instance. @@ -49,7 +44,15 @@ export type MarkerOptions = EntryMeta & { color?: DevToolsColor }; * @property tracks - Custom track configurations merged with defaults */ export type ProfilerOptions = - ProfilerMeasureOptions; + MeasureCtxOptions & { + tracks?: Record>; + enabled?: boolean; + }; + +/** + * Options for creating a performance marker. + */ +export type MarkerOptions = EntryMeta & { color?: DevToolsColor }; /** * Performance profiler that creates structured timing measurements with Chrome DevTools Extensibility API payloads. @@ -77,6 +80,11 @@ export class Profiler { * */ constructor(options: ProfilerOptions) { + // Initialize origin PID early - must happen before user code runs + if (!process.env[PROFILER_ORIGIN_PID_ENV_VAR]) { + process.env[PROFILER_ORIGIN_PID_ENV_VAR] = String(process.pid); + } + const { tracks, prefix, enabled, ...defaults } = options; const dataType = 'track-entry'; @@ -226,3 +234,75 @@ export class Profiler { } } } + +/** + * Determines if this process is the leader WAL process using the origin PID heuristic. + * + * The leader is the process that first enabled profiling (the one that set CP_PROFILER_ORIGIN_PID). + * All descendant processes inherit the environment but have different PIDs. + * + * @returns true if this is the leader WAL process, false otherwise + */ +export function isLeaderWal(): boolean { + return process.env[PROFILER_ORIGIN_PID_ENV_VAR] === String(process.pid); +} + +export class NodeProfiler< + TracksConfig extends ActionTrackConfigs = ActionTrackConfigs, + CodecOutput extends string | object = UserTimingTraceEvent, +> extends Profiler { + #shard: WriteAheadLogFile; + #perfObserver: PerformanceObserverSink; + #shardWal: ShardedWal; + readonly #format: WalFormat; + constructor( + options: ProfilerOptions & { + directory?: string; + performanceEntryEncode: (entry: PerformanceEntry) => CodecOutput[]; + format: WalFormat; + }, + ) { + const { + directory = PROFILER_DIRECTORY, + performanceEntryEncode, + format, + } = options; + super(options); + const shardId = `${process.pid}-${threadId}`; + + this.#format = format; + this.#shardWal = new ShardedWal(directory, format); + this.#shard = this.#shardWal.shard(shardId); + + this.#perfObserver = new PerformanceObserverSink({ + sink: this.#shard, + encode: performanceEntryEncode, + buffered: true, + flushThreshold: 100, + }); + + installExitHandlers({ + onExit: () => { + this.#perfObserver.flush(); + this.#perfObserver.unsubscribe(); + this.#shard.close(); + if (isLeaderWal()) { + this.#shardWal.finalize(); + this.#shardWal.cleanup(); + } + }, + }); + } + + getFinalPath() { + return this.#format.finalPath(); + } +} + +export const profiler = new NodeProfiler({ + prefix: 'cp', + track: 'CLI', + trackGroup: 'Code Pushup', + performanceEntryEncode: entryToTraceEvents, + format: traceEventWalFormat(), +}); diff --git a/packages/utils/src/lib/profiler/profiler.unit.test.ts b/packages/utils/src/lib/profiler/profiler.unit.test.ts index 0e285deb2..8c8e0409a 100644 --- a/packages/utils/src/lib/profiler/profiler.unit.test.ts +++ b/packages/utils/src/lib/profiler/profiler.unit.test.ts @@ -1,7 +1,12 @@ import { performance } from 'node:perf_hooks'; import { beforeEach, describe, expect, it, vi } from 'vitest'; import type { ActionTrackEntryPayload } from '../user-timing-extensibility-api.type.js'; -import { Profiler, type ProfilerOptions } from './profiler.js'; +import { + Profiler, + type ProfilerOptions, + isLeaderWal, + profiler, +} from './profiler.js'; describe('Profiler', () => { const getProfiler = (overrides?: Partial) => @@ -11,7 +16,7 @@ describe('Profiler', () => { ...overrides, }); - let profiler: Profiler>; + let profilerInstance: Profiler>; beforeEach(() => { performance.clearMarks(); @@ -19,7 +24,7 @@ describe('Profiler', () => { // eslint-disable-next-line functional/immutable-data delete process.env.CP_PROFILING; - profiler = getProfiler(); + profilerInstance = getProfiler(); }); it('constructor should initialize with default enabled state from env', () => { @@ -119,28 +124,28 @@ describe('Profiler', () => { }); it('isEnabled should set and get enabled state', () => { - expect(profiler.isEnabled()).toBe(false); + expect(profilerInstance.isEnabled()).toBe(false); - profiler.setEnabled(true); - expect(profiler.isEnabled()).toBe(true); + profilerInstance.setEnabled(true); + expect(profilerInstance.isEnabled()).toBe(true); - profiler.setEnabled(false); - expect(profiler.isEnabled()).toBe(false); + profilerInstance.setEnabled(false); + expect(profilerInstance.isEnabled()).toBe(false); }); it('isEnabled should update environment variable', () => { - profiler.setEnabled(true); + profilerInstance.setEnabled(true); expect(process.env.CP_PROFILING).toBe('true'); - profiler.setEnabled(false); + profilerInstance.setEnabled(false); expect(process.env.CP_PROFILING).toBe('false'); }); it('marker should execute without error when enabled', () => { - profiler.setEnabled(true); + profilerInstance.setEnabled(true); expect(() => { - profiler.marker('test-marker', { + profilerInstance.marker('test-marker', { color: 'primary', tooltipText: 'Test marker', properties: [['key', 'value']], @@ -164,10 +169,10 @@ describe('Profiler', () => { }); it('marker should execute without error when disabled', () => { - profiler.setEnabled(false); + profilerInstance.setEnabled(false); expect(() => { - profiler.marker('test-marker'); + profilerInstance.marker('test-marker'); }).not.toThrow(); const marks = performance.getEntriesByType('mark'); @@ -233,10 +238,12 @@ describe('Profiler', () => { performance.clearMarks(); performance.clearMeasures(); - profiler.setEnabled(true); + profilerInstance.setEnabled(true); const workFn = vi.fn(() => 'result'); - const result = profiler.measure('test-event', workFn, { color: 'primary' }); + const result = profilerInstance.measure('test-event', workFn, { + color: 'primary', + }); expect(result).toBe('result'); expect(workFn).toHaveBeenCalled(); @@ -280,9 +287,9 @@ describe('Profiler', () => { }); it('measure should execute work directly when disabled', () => { - profiler.setEnabled(false); + profilerInstance.setEnabled(false); const workFn = vi.fn(() => 'result'); - const result = profiler.measure('test-event', workFn); + const result = profilerInstance.measure('test-event', workFn); expect(result).toBe('result'); expect(workFn).toHaveBeenCalled(); @@ -295,40 +302,44 @@ describe('Profiler', () => { }); it('measure should propagate errors when enabled', () => { - profiler.setEnabled(true); + profilerInstance.setEnabled(true); const error = new Error('Test error'); const workFn = vi.fn(() => { throw error; }); - expect(() => profiler.measure('test-event', workFn)).toThrow(error); + expect(() => profilerInstance.measure('test-event', workFn)).toThrow(error); expect(workFn).toHaveBeenCalled(); }); it('measure should propagate errors when disabled', () => { - profiler.setEnabled(false); + profilerInstance.setEnabled(false); const error = new Error('Test error'); const workFn = vi.fn(() => { throw error; }); - expect(() => profiler.measure('test-event', workFn)).toThrow(error); + expect(() => profilerInstance.measure('test-event', workFn)).toThrow(error); expect(workFn).toHaveBeenCalled(); }); it('measureAsync should handle async operations correctly when enabled', async () => { - profiler.setEnabled(true); + profilerInstance.setEnabled(true); const workFn = vi.fn(async () => { await Promise.resolve(); return 'async-result'; }); - const result = await profiler.measureAsync('test-async-event', workFn, { - color: 'primary', - }); + const result = await profilerInstance.measureAsync( + 'test-async-event', + workFn, + { + color: 'primary', + }, + ); expect(result).toBe('async-result'); expect(workFn).toHaveBeenCalled(); @@ -375,14 +386,17 @@ describe('Profiler', () => { }); it('measureAsync should execute async work directly when disabled', async () => { - profiler.setEnabled(false); + profilerInstance.setEnabled(false); const workFn = vi.fn(async () => { await Promise.resolve(); return 'async-result'; }); - const result = await profiler.measureAsync('test-async-event', workFn); + const result = await profilerInstance.measureAsync( + 'test-async-event', + workFn, + ); expect(result).toBe('async-result'); expect(workFn).toHaveBeenCalled(); @@ -395,7 +409,7 @@ describe('Profiler', () => { }); it('measureAsync should propagate async errors when enabled', async () => { - profiler.setEnabled(true); + profilerInstance.setEnabled(true); const error = new Error('Async test error'); const workFn = vi.fn(async () => { @@ -404,13 +418,13 @@ describe('Profiler', () => { }); await expect( - profiler.measureAsync('test-async-event', workFn), + profilerInstance.measureAsync('test-async-event', workFn), ).rejects.toThrow(error); expect(workFn).toHaveBeenCalled(); }); it('measureAsync should propagate async errors when disabled', async () => { - profiler.setEnabled(false); + profilerInstance.setEnabled(false); const error = new Error('Async test error'); const workFn = vi.fn(async () => { @@ -419,8 +433,96 @@ describe('Profiler', () => { }); await expect( - profiler.measureAsync('test-async-event', workFn), + profilerInstance.measureAsync('test-async-event', workFn), ).rejects.toThrow(error); expect(workFn).toHaveBeenCalled(); }); }); + +describe('NodeProfiler', () => { + it('should export profiler instance with NodeProfiler methods', () => { + expect(profiler).toBeDefined(); + expect(profiler).toBeInstanceOf(Profiler); + expect(typeof profiler.getFinalPath).toBe('function'); + expect(profiler.getFinalPath()).toBe('trace.json'); + }); +}); + +describe('Profiler constructor - origin PID initialization', () => { + const originalEnv = { ...process.env }; + const mockPid = 12345; + + beforeEach(() => { + // Reset environment variables before each test + vi.unstubAllEnvs(); + // eslint-disable-next-line functional/immutable-data + process.env = { ...originalEnv }; + // Mock process.pid for consistent testing + vi.spyOn(process, 'pid', 'get').mockReturnValue(mockPid); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('should set CP_PROFILER_ORIGIN_PID if not already set', () => { + // eslint-disable-next-line functional/immutable-data + delete process.env.CP_PROFILER_ORIGIN_PID; + + new Profiler({ prefix: 'test', track: 'test-track' }); + + expect(process.env.CP_PROFILER_ORIGIN_PID).toBe(String(mockPid)); + }); + + it('should not override existing CP_PROFILER_ORIGIN_PID', () => { + const existingPid = '99999'; + vi.stubEnv('CP_PROFILER_ORIGIN_PID', existingPid); + + new Profiler({ prefix: 'test', track: 'test-track' }); + + expect(process.env.CP_PROFILER_ORIGIN_PID).toBe(existingPid); + }); +}); + +describe('isLeaderWal', () => { + const originalEnv = { ...process.env }; + const mockPid = 12345; + + beforeEach(() => { + // Reset environment variables before each test + vi.unstubAllEnvs(); + // eslint-disable-next-line functional/immutable-data + process.env = { ...originalEnv }; + // Mock process.pid for consistent testing + vi.spyOn(process, 'pid', 'get').mockReturnValue(mockPid); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('should return true when CP_PROFILER_ORIGIN_PID matches current process PID', () => { + vi.stubEnv('CP_PROFILER_ORIGIN_PID', String(mockPid)); + + expect(isLeaderWal()).toBe(true); + }); + + it('should return false when CP_PROFILER_ORIGIN_PID does not match current process PID', () => { + vi.stubEnv('CP_PROFILER_ORIGIN_PID', '99999'); // Different PID + + expect(isLeaderWal()).toBe(false); + }); + + it('should return false when CP_PROFILER_ORIGIN_PID is not set', () => { + // eslint-disable-next-line functional/immutable-data + delete process.env.CP_PROFILER_ORIGIN_PID; + + expect(isLeaderWal()).toBe(false); + }); + + it('should handle string PID values correctly', () => { + vi.stubEnv('CP_PROFILER_ORIGIN_PID', String(mockPid)); + + expect(isLeaderWal()).toBe(true); + }); +}); diff --git a/packages/utils/src/lib/profiler/trace-file-utils.ts b/packages/utils/src/lib/profiler/trace-file-utils.ts index 6fdc3b6b2..32abf0af5 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.ts @@ -1,7 +1,10 @@ -import type { PerformanceMark, PerformanceMeasure } from 'node:perf_hooks'; +import type { + PerformanceEntry, + PerformanceMark, + PerformanceMeasure, +} from 'node:perf_hooks'; import { threadId } from 'node:worker_threads'; import { defaultClock } from '../clock-epoch.js'; -import { JsonCodec } from '../file-sink.js'; import type { UserTimingDetail } from '../user-timing-extensibility-api.type.js'; import type { BeginEvent, @@ -15,6 +18,7 @@ import type { TraceEvent, TraceEventContainer, TraceEventRaw, + TraceMetadata, UserTimingTraceEvent, } from './trace-file.type.js'; @@ -252,6 +256,23 @@ export const measureToSpanEvents = ( args: entry.detail ? { data: { detail: entry.detail } } : undefined, }); +/** + * Converts a PerformanceEntry to an array of UserTimingTraceEvents. + * A mark is converted to an instant event, and a measure is converted to a pair of span events. + * Other entry types are ignored. + * @param entry - Performance entry + * @returns UserTimingTraceEvent[] + */ +export function entryToTraceEvents(entry: PerformanceEntry) { + if (entry.entryType === 'mark') { + return [markToInstantEvent(entry as PerformanceMark)]; + } + if (entry.entryType === 'measure') { + return measureToSpanEvents(entry as PerformanceMeasure); + } + return []; +} + export function getTraceMetadata( startDate?: Date, metadata?: Record, @@ -273,14 +294,11 @@ export function getTraceMetadata( export const getTraceFile = (opt: { traceEvents: TraceEvent[]; startTime?: string; + metadata?: Partial; }): TraceEventContainer => ({ traceEvents: opt.traceEvents, displayTimeUnit: 'ms', - metadata: { - source: 'Node.js UserTiming', - startTime: opt.startTime ?? new Date().toISOString(), - hardwareConcurrency: 1, - }, + metadata: getTraceMetadata(new Date(), opt.metadata), }); function processDetail( @@ -298,7 +316,9 @@ function processDetail( export function decodeDetail(target: { detail: string }): UserTimingDetail { return processDetail(target, detail => - typeof detail === 'string' ? JsonCodec.decode(detail) : detail, + typeof detail === 'string' + ? (JSON.parse(detail) as string | object) + : detail, ) as UserTimingDetail; } @@ -307,14 +327,17 @@ export function encodeDetail(target: UserTimingDetail): UserTimingDetail { target as UserTimingDetail & { detail?: unknown }, (detail: string | object) => typeof detail === 'object' - ? JsonCodec.encode(detail as UserTimingDetail) + ? JSON.stringify(detail as UserTimingDetail) : detail, ) as UserTimingDetail; } -export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { +export function decodeTraceEvent({ + args, + ...rest +}: TraceEventRaw): UserTimingTraceEvent { if (!args) { - return rest as TraceEvent; + return rest as UserTimingTraceEvent; } const processedArgs = decodeDetail(args as { detail: string }); @@ -326,10 +349,10 @@ export function decodeTraceEvent({ args, ...rest }: TraceEventRaw): TraceEvent { ...processedArgs, data: decodeDetail(args.data as { detail: string }), }, - } as TraceEvent; + } as UserTimingTraceEvent; } // eslint-disable-next-line @typescript-eslint/consistent-type-assertions - return { ...rest, args: processedArgs } as TraceEvent; + return { ...rest, args: processedArgs } as UserTimingTraceEvent; } export function encodeTraceEvent({ diff --git a/packages/utils/src/lib/profiler/wal-json-trace.ts b/packages/utils/src/lib/profiler/wal-json-trace.ts new file mode 100644 index 000000000..90761805c --- /dev/null +++ b/packages/utils/src/lib/profiler/wal-json-trace.ts @@ -0,0 +1,92 @@ +import { performance } from 'node:perf_hooks'; +import { + decodeTraceEvent, + encodeTraceEvent, + getCompleteEvent, + getInstantEventTracingStartedInBrowser, + getTraceFile, +} from './trace-file-utils.js'; +import type { TraceEvent, UserTimingTraceEvent } from './trace-file.type.js'; +import type { WalFormat } from './wal.js'; + +/** Name for the trace start margin event */ +const TRACE_START_MARGIN_NAME = '[trace padding start]'; +/** Name for the trace end margin event */ +const TRACE_END_MARGIN_NAME = '[trace padding end]'; +/** Milliseconds of padding to add before/after trace events */ +const TRACE_MARGIN_MS = 1000; +/** Duration in milliseconds for margin events */ +const TRACE_MARGIN_DURATION_MS = 20; + +export function generateTraceContent( + events: UserTimingTraceEvent[], + metadata?: Record, +): string { + const startTime = new Date().toISOString(); + const traceContainer = getTraceFile({ + traceEvents: events, + startTime: new Date().toISOString(), + metadata, + }); + + if (events.length === 0) { + return JSON.stringify(traceContainer); + } + + const marginMs = TRACE_MARGIN_MS; + const marginDurMs = TRACE_MARGIN_DURATION_MS; + + const sortedEvents = [...events].sort((a, b) => a.ts - b.ts); + const fallbackTs = performance.now(); + const firstTs: number = sortedEvents.at(0)?.ts ?? fallbackTs; + const lastTs: number = sortedEvents.at(-1)?.ts ?? fallbackTs; + + const startTs = firstTs - marginMs; + const endTs = lastTs + marginMs; + + const traceEvents: TraceEvent[] = [ + getInstantEventTracingStartedInBrowser({ + ts: startTs, + url: 'generated-trace', + }), + getCompleteEvent({ + name: TRACE_START_MARGIN_NAME, + ts: startTs, + dur: marginDurMs, + }), + ...sortedEvents, + getCompleteEvent({ + name: TRACE_END_MARGIN_NAME, + ts: endTs, + dur: marginDurMs, + }), + ]; + + return JSON.stringify({ ...traceContainer, traceEvents }); +} + +/** + * WAL format for Chrome DevTools trace files. + * Automatically finalizes shards into complete trace files with proper metadata. + */ +export const traceEventWalFormat = < + T extends UserTimingTraceEvent = UserTimingTraceEvent, +>(_opt?: { + dir?: string; +}) => { + const baseName = 'trace'; + const walExtension = '.jsonl'; + const finalExtension = '.json'; + return { + baseName, + walExtension, + finalExtension, + codec: { + encode: event => JSON.stringify(encodeTraceEvent(event)), + decode: (json: string) => decodeTraceEvent(JSON.parse(json)) as T, + }, + shardPath: (id: string) => `${baseName}.${id}${walExtension}`, + finalPath: () => `${baseName}${finalExtension}`, + finalizer: (records, metadata) => generateTraceContent(records, metadata), + } satisfies WalFormat; +}; diff --git a/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts b/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts new file mode 100644 index 000000000..1a9740c40 --- /dev/null +++ b/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts @@ -0,0 +1,317 @@ +import { describe, expect, it } from 'vitest'; +import type { UserTimingTraceEvent } from './trace-file.type.js'; +import { generateTraceContent, traceEventWalFormat } from './wal-json-trace.js'; + +describe('generateTraceContent', () => { + it('should generate trace content for empty events array', () => { + const events: UserTimingTraceEvent[] = []; + const metadata = { version: '1.0.0', generatedAt: '2024-01-01T00:00:00Z' }; + + const result = generateTraceContent(events, metadata); + + const parsed = JSON.parse(result); + expect(parsed).toStrictEqual({ + traceEvents: [ + expect.objectContaining({ + name: 'TracingStartedInBrowser', + ph: 'i', + cat: 'devtools.timeline', + args: { + data: expect.objectContaining({ + frames: expect.arrayContaining([ + expect.objectContaining({ + url: 'empty-trace', + }), + ]), + }), + }, + }), + expect.objectContaining({ + name: '[trace padding start]', + ph: 'X', + dur: 20, + cat: 'devtools.timeline', + }), + expect.objectContaining({ + name: '[trace padding end]', + ph: 'X', + dur: 20, + cat: 'devtools.timeline', + }), + ], + displayTimeUnit: 'ms', + metadata: { + source: 'DevTools', + startTime: expect.any(String), + hardwareConcurrency: 1, + dataOrigin: 'TraceEvents', + version: '1.0.0', + generatedAt: '2024-01-01T00:00:00Z', + }, + }); + }); + + it('should generate trace content for non-empty events array', () => { + const events: UserTimingTraceEvent[] = [ + { + name: 'cp:test-operation:start', + ph: 'i', + ts: 1000, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + args: { + dataType: 'track-entry', + track: 'Test Track', + trackGroup: 'Test Group', + }, + }, + { + name: 'cp:test-operation:end', + ph: 'i', + ts: 2000, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + args: { + dataType: 'track-entry', + track: 'Test Track', + trackGroup: 'Test Group', + }, + }, + ]; + + const result = generateTraceContent(events); + + const parsed = JSON.parse(result); + expect(parsed).toStrictEqual({ + traceEvents: [ + expect.objectContaining({ + name: 'TracingStartedInBrowser', + ph: 'i', + cat: 'devtools.timeline', + args: { + data: expect.objectContaining({ + frames: expect.arrayContaining([ + expect.objectContaining({ + url: 'generated-trace', + }), + ]), + }), + }, + }), + expect.objectContaining({ + name: '[trace padding start]', + ph: 'X', + dur: 20, + cat: 'devtools.timeline', + }), + ...events, + expect.objectContaining({ + name: '[trace padding end]', + ph: 'X', + dur: 20, + cat: 'devtools.timeline', + }), + ], + displayTimeUnit: 'ms', + metadata: { + source: 'DevTools', + startTime: expect.any(String), + hardwareConcurrency: 1, + dataOrigin: 'TraceEvents', + }, + }); + }); + + it('should sort events by timestamp', () => { + const events: UserTimingTraceEvent[] = [ + { + name: 'cp:second-operation', + ph: 'i', + ts: 2000, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + args: { dataType: 'track-entry' }, + }, + { + name: 'cp:first-operation', + ph: 'i', + ts: 1000, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + args: { dataType: 'track-entry' }, + }, + ]; + + const result = generateTraceContent(events); + + const parsed = JSON.parse(result); + expect(parsed.traceEvents[2]).toStrictEqual( + expect.objectContaining({ name: 'cp:first-operation', ts: 1000 }), + ); + expect(parsed.traceEvents[3]).toStrictEqual( + expect.objectContaining({ name: 'cp:second-operation', ts: 2000 }), + ); + }); + + it('should handle single event with proper margin calculation', () => { + const events: UserTimingTraceEvent[] = [ + { + name: 'cp:single-event', + ph: 'i', + ts: 5000, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + args: { dataType: 'track-entry' }, + }, + ]; + + const result = generateTraceContent(events); + + const parsed = JSON.parse(result); + const traceEvents = parsed.traceEvents; + + // First event should be tracing started + expect(traceEvents[0]).toStrictEqual( + expect.objectContaining({ + name: 'TracingStartedInBrowser', + args: { + data: expect.objectContaining({ + frames: expect.arrayContaining([ + expect.objectContaining({ + url: 'generated-trace', + }), + ]), + }), + }, + }), + ); + + // Second should be start margin at ts - 1000 + expect(traceEvents[1]).toStrictEqual( + expect.objectContaining({ + name: '[trace padding start]', + ph: 'X', + dur: 20, + }), + ); + + // Third should be the actual event + expect(traceEvents[2]).toStrictEqual(events[0]); + + // Fourth should be end margin at lastTs + 1000 + expect(traceEvents[3]).toStrictEqual( + expect.objectContaining({ + name: '[trace padding end]', + ph: 'X', + dur: 20, + }), + ); + }); +}); + +describe('traceEventWalFormat', () => { + it('should create WAL format with default directory', () => { + const format = traceEventWalFormat(); + + expect(format).toStrictEqual({ + baseName: 'trace', + walExtension: '.jsonl', + finalExtension: '.json', + shardPath: expect.any(Function), + finalPath: expect.any(Function), + codec: { + encode: expect.any(Function), + decode: expect.any(Function), + }, + finalizer: expect.any(Function), + }); + }); + + it('should create WAL format with custom directory', () => { + const format = traceEventWalFormat({ dir: '/custom/path' }); + + expect(format.baseName).toBe('trace'); + expect(format.walExtension).toBe('.jsonl'); + expect(format.finalExtension).toBe('.json'); + }); + + it('should generate correct shard paths', () => { + const format = traceEventWalFormat(); + + expect(format.shardPath('shard-1')).toBe('trace.shard-1.jsonl'); + expect(format.shardPath('process-123-thread-456')).toBe( + 'trace.process-123-thread-456.jsonl', + ); + }); + + it('should generate correct final path', () => { + const format = traceEventWalFormat(); + + expect(format.finalPath()).toBe('trace.json'); + }); + + it('should encode and decode trace events correctly', () => { + const format = traceEventWalFormat(); + const testEvent: UserTimingTraceEvent = { + name: 'cp:test-event', + ph: 'i', + ts: 123_456_789, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + args: { + dataType: 'track-entry', + track: 'Test Track', + }, + }; + + const encoded = format.codec.encode(testEvent); + expect(typeof encoded).toBe('string'); + + const decoded = format.codec.decode(encoded); + expect(decoded).toStrictEqual(testEvent); + }); + + it('should finalize records into trace content', () => { + const format = traceEventWalFormat(); + const records: UserTimingTraceEvent[] = [ + { + name: 'cp:operation:start', + ph: 'i', + ts: 1000, + pid: 123, + tid: 456, + cat: 'blink.user_timing', + args: { dataType: 'track-entry' }, + }, + ]; + + const result = format.finalizer(records); + + expect(typeof result).toBe('string'); + const parsed = JSON.parse(result); + expect(parsed).toHaveProperty('traceEvents'); + expect(parsed).toHaveProperty('metadata'); + expect(Array.isArray(parsed.traceEvents)).toBe(true); + }); + + it('should include generatedAt in finalizer metadata', () => { + const format = traceEventWalFormat(); + const records: UserTimingTraceEvent[] = []; + + const result = format.finalizer(records); + const parsed = JSON.parse(result); + + expect(parsed.metadata).toHaveProperty('generatedAt'); + expect(typeof parsed.metadata.generatedAt).toBe('string'); + // Should be recent timestamp + expect(new Date(parsed.metadata.generatedAt).getTime()).toBeGreaterThan( + Date.now() - 10_000, + ); + }); +}); diff --git a/packages/utils/src/lib/profiler/wal.ts b/packages/utils/src/lib/profiler/wal.ts new file mode 100644 index 000000000..f7803f499 --- /dev/null +++ b/packages/utils/src/lib/profiler/wal.ts @@ -0,0 +1,356 @@ +import * as fs from 'node:fs'; +import path from 'node:path'; + +/** + * Codec for encoding/decoding values to/from strings for WAL storage. + * Used to serialize/deserialize records written to and read from WAL files. + */ +export type Codec = { + /** Encode a value to a string for storage */ + encode: (v: I) => O; + /** Decode a string back to the original value type */ + decode: (data: O) => I; +}; + +export type InvalidEntry = { __invalid: true; raw: O }; + +/** + * Result of recovering records from a WAL file. + * Contains successfully recovered records and any errors encountered during parsing. + */ +export type RecoverResult = { + /** Successfully recovered records */ + records: T[]; + /** Errors encountered during recovery with line numbers and context */ + errors: { lineNo: number; line: string; error: Error }[]; + /** Last incomplete line if file was truncated (null if clean) */ + partialTail: string | null; +}; + +export const createTolerantCodec = ( + codecOrEncode: ((v: I) => O) | { encode: (v: I) => O; decode: (d: O) => I }, + decode?: (d: O) => I, +): Codec, O> => { + let encodeFn: (v: I) => O; + let decodeFn: (d: O) => I; + + if (typeof codecOrEncode === 'function') { + // Called with separate encode/decode functions + encodeFn = codecOrEncode; + decodeFn = decode!; + } else { + // Called with codec object + encodeFn = codecOrEncode.encode; + decodeFn = codecOrEncode.decode; + } + + return { + encode: v => + v && typeof v === 'object' && '__invalid' in v + ? (v as InvalidEntry).raw + : encodeFn(v as I), + decode: d => { + try { + return decodeFn(d); + } catch { + return { __invalid: true, raw: d }; + } + }, + }; +}; + +export function filterValidRecords( + records: (T | InvalidEntry)[], +): T[] { + return records + .filter( + (r): r is T => !(typeof r === 'object' && r !== null && '__invalid' in r), + ) + .map(r => r as T); +} + +/** + * Pure helper function to recover records from WAL file content. + * @param content - Raw file content as string + * @param codec - Codec for decoding records + * @returns Recovery result with records, errors, and partial tail + */ +export function recoverFromContent( + content: string, + decode: Codec['decode'], +): RecoverResult { + const lines = content.split('\n'); + const clean = content.endsWith('\n'); + + const out = lines.slice(0, -1).reduce( + (a, l, i) => { + if (!l) { + return a; + } + try { + return { + ...a, + records: [...a.records, decode(l)], + }; + } catch (error) { + return { + ...a, + errors: [ + ...a.errors, + { lineNo: i + 1, line: l, error: error as Error }, + ], + }; + } + }, + { records: [] as T[], errors: [] as RecoverResult['errors'] }, + ); + + const tail = lines.at(-1); + return { + ...out, + partialTail: clean || !tail ? null : tail, + }; +} + +/** + * Write-Ahead Log implementation for crash-safe append-only logging. + * Provides atomic operations for writing, recovering, and repacking log entries. + */ +export class WriteAheadLogFile { + #fd: number | null = null; + readonly #file: string; + readonly #decode: Codec>['decode']; + readonly #encode: Codec>['encode']; + + /** + * Create a new WAL file instance. + * @param options - Configuration options + */ + constructor(options: { file: string; codec: Codec }) { + this.#file = options.file; + const c = createTolerantCodec(options.codec); + this.#decode = c.decode; + this.#encode = c.encode; + } + + /** Get the file path for this WAL */ + get path() { + return this.#file; + } + + /** Get the file path for this WAL */ + getPath = () => this.#file; + + /** Open the WAL file for writing (creates directories if needed) */ + open = () => { + if (this.#fd) { + return; + } + fs.mkdirSync(path.dirname(this.#file), { recursive: true }); + this.#fd = fs.openSync(this.#file, 'a'); + }; + + /** + * Append a record to the WAL. + * @param v - Record to append + * @throws Error if WAL is not opened + */ + append = (v: T) => { + if (!this.#fd) { + throw new Error('WAL not opened'); + } + fs.writeSync(this.#fd, `${this.#encode(v)}\n`); + }; + + /** Close the WAL file */ + close = () => { + if (this.#fd) { + fs.closeSync(this.#fd); + } + this.#fd = null; + }; + + isClosed = () => this.#fd == null; + + /** + * Recover all records from the WAL file. + * Handles partial writes and decode errors gracefully. + * @returns Recovery result with records, errors, and partial tail + */ + recover(): RecoverResult> { + if (!fs.existsSync(this.#file)) { + return { records: [], errors: [], partialTail: null }; + } + + const txt = fs.readFileSync(this.#file, 'utf8'); + return recoverFromContent>(txt, this.#decode); + } + + /** + * Repack the WAL by recovering all valid records and rewriting cleanly. + * Removes corrupted entries and ensures clean formatting. + * @param out - Output path (defaults to current file) + * @throws Error if recovery encounters decode errors + */ + repack(out = this.#file) { + this.close(); + const r = this.recover(); + if (r.errors.length > 0) { + console.log(`Repack failed`); + } + const validRecords = filterValidRecords(r.records); + fs.mkdirSync(path.dirname(out), { recursive: true }); + fs.writeFileSync( + out, + `${validRecords.map(v => this.#encode(v)).join('\n')}\n`, + ); + } +} + +/** + * Format descriptor that binds codec and file extension together. + * Prevents misconfiguration by keeping related concerns in one object. + */ +export type WalFormat = { + /** Base name for the WAL (e.g., "trace") */ + baseName: string; + /** Shard file extension (e.g., ".jsonl") */ + walExtension: string; + /** Final file extension (e.g., ".json", ".trace.json") falls back to walExtension if not provided */ + finalExtension: string; + /** Codec for encoding/decoding records */ + codec: Codec; + /** Function to generate shard file paths */ + shardPath: (id: string) => string; + /** Function to generate final merged file path */ + finalPath: () => string; + /** Finalizer for converting records to a string */ + finalizer: (records: T[], opt?: Record) => string; +}; + +export const stringCodec = < + T extends string | object = string, +>(): Codec => ({ + encode: v => (typeof v === 'string' ? v : JSON.stringify(v)), + decode: v => { + try { + return JSON.parse(v) as T; + } catch { + return v as T; + } + }, +}); + +/** + * Parses a partial WalFormat configuration and returns a complete WalFormat object. + * All fallback values are targeting string types. + * - baseName defaults to Date.now().toString() + * - walExtension defaults to '.log' + * - finalExtension defaults to '.log' + * - codec defaults to stringCodec() + * - shardPath defaults to (id: string) => `${baseName}.${id}${walExtension}` + * - finalPath defaults to () => `${baseName}${finalExtension}` + * - finalizer defaults to (encodedRecords: T[]) => `${encodedRecords.join('\n')}\n` + * @param format - Partial WalFormat configuration + * @returns Parsed WalFormat with defaults filled in + */ +export function parseWalFormat( + format: Partial>, +): WalFormat { + const { + baseName = Date.now().toString(), + walExtension = '.log', + finalExtension = walExtension, + codec = stringCodec(), + shardPath = (id: string) => `${baseName}.${id}.${walExtension}`, + finalPath = () => `${baseName}.${finalExtension}`, + finalizer = (encodedRecords: T[]) => `${encodedRecords.join('\n')}\n`, + } = format; + + return { + baseName, + walExtension, + finalExtension, + codec, + shardPath, + finalPath, + finalizer, + } satisfies WalFormat; +} + +/** + * Sharded Write-Ahead Log manager for coordinating multiple WAL shards. + * Handles distributed logging across multiple processes/files with atomic finalization. + */ + +export class ShardedWal { + readonly #format: WalFormat; + readonly #dir: string; + + /** + * Create a sharded WAL manager. + */ + constructor(dir: string, format: Partial>) { + this.#dir = dir; + this.#format = parseWalFormat(format); + } + + shard(id: string) { + return new WriteAheadLogFile({ + file: path.join(this.#dir, this.#format.shardPath(id)), + codec: this.#format.codec, + }); + } + + /** Get all shard file paths matching this WAL's base name */ + private shardFiles() { + return fs.existsSync(this.#dir) + ? fs + .readdirSync(this.#dir) + .filter( + f => + f.startsWith(`${this.#format.baseName}.`) && + f.endsWith(this.#format.walExtension), + ) + .map(f => path.join(this.#dir, f)) + : []; + } + + /** + * Finalize all shards by merging them into a single output file. + * Recovers all records from all shards, validates no errors, and writes merged result. + * @throws Error if any shard contains decode errors + */ + finalize(opt?: Record) { + const fileRecoveries = this.shardFiles().map(f => ({ + file: f, + recovery: new WriteAheadLogFile({ + file: f, + codec: this.#format.codec, + }).recover(), + })); + + const records = fileRecoveries.flatMap(({ recovery }) => recovery.records); + const errors = fileRecoveries.flatMap(({ file, recovery }) => + recovery.errors.map(e => ({ + ...e, + line: `${path.basename(file)}:${e.line}`, + })), + ); + + if (errors.length > 0) { + console.log(`Finalize failed: ${errors.length} decode errors`); + } + + const validRecords = filterValidRecords(records); + const out = path.join(this.#dir, this.#format.finalPath()); + fs.mkdirSync(path.dirname(out), { + recursive: true, + }); + fs.writeFileSync(out, this.#format.finalizer(validRecords, opt)); + } + + cleanup() { + this.shardFiles().forEach(f => fs.unlinkSync(f)); + } +} diff --git a/packages/utils/src/lib/profiler/wal.unit.test.ts b/packages/utils/src/lib/profiler/wal.unit.test.ts new file mode 100644 index 000000000..86ff6638b --- /dev/null +++ b/packages/utils/src/lib/profiler/wal.unit.test.ts @@ -0,0 +1,338 @@ +import { vol } from 'memfs'; +import { beforeEach, describe, expect, it } from 'vitest'; +import { MEMFS_VOLUME } from '@code-pushup/test-utils'; +import { + type Codec, + ShardedWal, + WriteAheadLogFile, + createTolerantCodec, + filterValidRecords, + recoverFromContent, +} from './wal.js'; + +/* -------------------------------- helpers -------------------------------- */ + +const read = (p: string) => vol.readFileSync(p, 'utf8'); +const write = (p: string, c: string) => vol.writeFileSync(p, c); + +const stringCodec: Codec = { encode: v => v, decode: v => v }; + +const wal = (file: string, codec: Codec) => + new WriteAheadLogFile({ file, codec }); + +/* --------------------------- WriteAheadLogFile ---------------------------- */ + +describe('createTolerantCodec', () => { + it('should make decode tolerant but encode passes through errors', () => { + const c = createTolerantCodec({ + encode: (_n: number) => { + throw new Error('encoding error'); + }, + decode: (_s: string) => { + throw new Error('decoding error'); + }, + }); + // Encode still throws as it's not made tolerant + expect(() => c.encode(42)).toThrow('encoding error'); + // But decode returns an InvalidEntry instead of throwing + const result = c.decode('42'); + expect(result).toEqual({ __invalid: true, raw: '42' }); + }); + + it('round-trips valid values and preserves invalid ones', () => { + const c = createTolerantCodec({ + encode: (n: number) => `${n}`, + decode: (s: string) => { + const num = Number(s); + if (Number.isNaN(num)) throw new Error('Invalid number'); + return num; + }, + }); + expect(c.decode(c.encode(42))).toBe(42); + // Invalid decode should return InvalidEntry, and encoding that should return the raw value + const invalid = c.decode('x'); + expect(invalid).toStrictEqual({ __invalid: true, raw: 'x' }); + expect(c.encode(invalid)).toBe('x'); + }); +}); + +describe('filterValidRecords', () => { + it('filters out invalid records', () => { + const records = [ + { id: 1, name: 'valid1' }, + { __invalid: true, raw: 'x' }, + { id: 3, name: 'valid3' }, + ]; + const result = filterValidRecords(records); + expect(result).toEqual([ + { id: 1, name: 'valid1' }, + { id: 3, name: 'valid3' }, + ]); + }); +}); + +describe('recoverFromContent', () => { + it('recovers valid records', () => { + const content = 'a\nb\n'; + const result = recoverFromContent(content, stringCodec.decode); + expect(result).toEqual({ + records: ['a', 'b'], + errors: [], + partialTail: null, + }); + }); + + it('handles empty content', () => { + const content = ''; + const result = recoverFromContent(content, stringCodec.decode); + expect(result).toEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); + + it('handles content without trailing newline', () => { + const content = 'a\nb'; + const result = recoverFromContent(content, stringCodec.decode); + expect(result).toEqual({ + records: ['a'], + errors: [], + partialTail: 'b', + }); + }); + + it('skips empty lines', () => { + const content = 'a\n\nb\n'; + const result = recoverFromContent(content, stringCodec.decode); + expect(result).toEqual({ + records: ['a', 'b'], + errors: [], + partialTail: null, + }); + }); + + it('handles decode errors gracefully', () => { + const failingCodec: Codec = { + encode: (s: string) => s, + decode: (s: string) => { + if (s === 'bad') throw new Error('Bad record'); + return s; + }, + }; + + const content = 'good\nbad\ngood\n'; + const result = recoverFromContent(content, failingCodec.decode); + + expect(result.records).toEqual(['good', 'good']); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toEqual({ + lineNo: 2, + line: 'bad', + error: expect.any(Error), + }); + expect(result.errors[0].error.message).toBe('Bad record'); + expect(result.partialTail).toBeNull(); + }); + + it('handles decode errors with partial tail', () => { + const failingCodec: Codec = { + encode: (s: string) => s, + decode: (s: string) => { + if (s === 'bad') throw new Error('Bad record'); + return s; + }, + }; + + const content = 'good\nbad\npartial'; + const result = recoverFromContent(content, failingCodec.decode); + + expect(result.records).toEqual(['good']); + expect(result.errors).toHaveLength(1); + expect(result.errors[0].lineNo).toBe(2); + expect(result.partialTail).toBe('partial'); + }); +}); + +describe('WriteAheadLogFile', () => { + beforeEach(() => { + vol.reset(); + vol.fromJSON({}, MEMFS_VOLUME); + }); + + it('should create instance with file path and codecs without opening', () => { + const w = wal('/test/a.log', stringCodec); + expect(w).toBeInstanceOf(WriteAheadLogFile); + expect(w.path).toBe('/test/a.log'); + expect(w.isClosed()).toBe(true); + }); + + it('throws error when appending without opening', () => { + const w = wal('/test/a.log', stringCodec); + expect(w.isClosed()).toBe(true); + expect(() => w.append('a')).toThrow('WAL not opened'); + }); + + it('opens and closes correctly', () => { + const w = wal('/test/a.log', stringCodec); + expect(w.isClosed()).toBe(true); + w.open(); + expect(w.isClosed()).toBe(false); + w.close(); + expect(w.isClosed()).toBe(true); + }); + + it('append lines if opened', () => { + vol.mkdirSync('/test', { recursive: true }); + const w = wal('/test/a.log', stringCodec); + w.open(); + w.append('a'); + w.append('b'); + + expect(read('/test/a.log')).toBe('a\nb\n'); + }); + + it('appends records with encode logic', () => { + const w = wal('/test/a.log', stringCodec); + w.open(); + + w.append('any string'); + expect(read('/test/a.log')).toBe('any string\n'); + }); + + it('can recover without opening (reads file directly)', () => { + vol.mkdirSync('/test', { recursive: true }); + write('/test/a.log', 'line1\nline2\n'); + const w = wal('/test/a.log', stringCodec); + // Profiler WAL can recover without opening - it reads the file directly + const result = w.recover(); + expect(result.records).toEqual(['line1', 'line2']); + expect(result.errors).toEqual([]); + }); + + it('recovers valid records if opened', () => { + vol.mkdirSync('/test', { recursive: true }); + write('/test/a.log', 'line1\nline2\n'); + const w = wal('/test/a.log', stringCodec); + w.open(); + expect(w.recover()).toStrictEqual({ + records: ['line1', 'line2'], + errors: [], + partialTail: null, + }); + }); + + it('recovers with decode errors and partial tail using tolerant codec', () => { + vol.mkdirSync('/test', { recursive: true }); + write('/test/a.log', 'ok\nbad\npartial'); + + const tolerantCodec = createTolerantCodec({ + encode: (s: string) => s, + decode: (s: string) => { + if (s === 'bad') throw new Error('Bad record'); + return s; + }, + }); + + expect(wal('/test/a.log', tolerantCodec).recover()).toStrictEqual({ + records: ['ok', { __invalid: true, raw: 'bad' }], + errors: [], + partialTail: 'partial', + }); + }); + + it('repacks clean file without errors', () => { + vol.mkdirSync('/test', { recursive: true }); + write('/test/a.log', 'a\nb\n'); + wal('/test/a.log', stringCodec).repack(); + expect(read('/test/a.log')).toBe('a\nb\n'); + }); + + it('repacks with decode errors using tolerant codec', () => { + vol.mkdirSync('/test', { recursive: true }); + write('/test/a.log', 'ok\nbad\n'); + + const tolerantCodec = createTolerantCodec({ + encode: (s: string) => s, + decode: (s: string) => { + if (s === 'bad') throw new Error('Bad record'); + return s; + }, + }); + + // With tolerant codec, repack should succeed and preserve all entries (valid and invalid) + wal('/test/a.log', tolerantCodec).repack(); + expect(read('/test/a.log')).toBe('ok\nbad\n'); + }); +}); + +/* ------------------------------- ShardedWal ------------------------------- */ + +describe('ShardedWal', () => { + beforeEach(() => { + vol.reset(); + vol.fromJSON({}, MEMFS_VOLUME); + }); + + const sharded = () => + new ShardedWal('/test/shards', { + baseName: 'test', + walExtension: '.jsonl', + finalExtension: '.merged', + codec: stringCodec, + shardPath: (id: string) => `test.${id}.jsonl`, + finalPath: () => 'test.merged', + finalizer: (records: string[]) => `${records.join('\n')}\n`, + }); + + it('merges shards and cleans up', () => { + vol.mkdirSync('/test/shards', { recursive: true }); + const s = sharded(); + const w1 = s.shard('1'); + w1.open(); + w1.append('r1'); + w1.close(); + const w2 = s.shard('2'); + w2.open(); + w2.append('r2'); + w2.close(); + + s.finalize(); + expect(read('/test/shards/test.merged')).toBe('r1\nr2\n'); + + s.cleanup(); + expect(vol.readdirSync('/test/shards')).toEqual(['test.merged']); + }); + + it('handles decode errors with tolerant codec during finalize', () => { + vol.mkdirSync('/test/shards', { recursive: true }); + write('/test/shards/test.w1.jsonl', '{"id":1}\nbad\n'); + + const tolerantJsonCodec = createTolerantCodec(JSON.stringify, JSON.parse); + + const s = new ShardedWal('/test/shards', { + baseName: 'test', + walExtension: '.jsonl', + finalExtension: '.merged', + codec: tolerantJsonCodec, + shardPath: (id: string) => `test.${id}.jsonl`, + finalPath: () => 'test.merged', + finalizer: (records: any[]) => + `${records.map(r => tolerantJsonCodec.encode(r)).join('\n')}\n`, + }); + s.finalize(); + + // Should contain all entries - valid ones and invalid ones preserved as raw data + expect(read('/test/shards/test.merged')).toBe('{"id":1}\nbad\n'); + }); + + it('ignores non-matching files', () => { + vol.fromJSON({ + '/test/shards/test.a.jsonl': 'x', + '/test/shards/other.log': 'y', + }); + + sharded().cleanup(); + expect(vol.existsSync('/test/shards/other.log')).toBe(true); + }); +}); diff --git a/packages/utils/src/lib/sink-source.type.ts b/packages/utils/src/lib/sink-source.type.ts deleted file mode 100644 index 45a3fcd0c..000000000 --- a/packages/utils/src/lib/sink-source.type.ts +++ /dev/null @@ -1,49 +0,0 @@ -export type Encoder = (value: T) => string; -export type Decoder = (line: string) => T; - -export type EncoderInterface = { - encode: (input: I) => O; -}; - -// eslint-disable-next-line @typescript-eslint/no-unused-vars -export type Sink = { - setPath: (filePath: string) => void; - getPath: () => string; - open: () => void; - write: (input: I) => void; - close: () => void; - isClosed: () => boolean; -}; - -export type Buffered = { - flush: () => void; -}; -export type BufferedSink = Sink & Buffered; - -export type Source = { - read?: () => O; - decode?: (input: I) => O; -}; - -export type Observer = { - subscribe: () => void; - unsubscribe: () => void; - isSubscribed: () => boolean; -}; - -export type Recoverable = { - recover: () => RecoverResult; - repack: (outputPath?: string) => void; -}; - -export type RecoverResult = { - records: T[]; - errors: { lineNo: number; line: string; error: Error }[]; - partialTail: string | null; -}; - -export type RecoverOptions = { - keepInvalid?: boolean; -}; - -export type Output = {} & BufferedSink; From e60ea2b5062bc31ca1e6f41bcc3ef816d3623b8e Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 21:29:47 +0100 Subject: [PATCH 17/31] refactor: wip --- packages/utils/src/lib/profiler/profiler.ts | 108 ++++++++++++------ .../src/lib/profiler/trace-file-utils.ts | 4 +- .../utils/src/lib/profiler/wal-json-trace.ts | 8 +- packages/utils/src/lib/profiler/wal.ts | 86 +++++++++++--- 4 files changed, 154 insertions(+), 52 deletions(-) diff --git a/packages/utils/src/lib/profiler/profiler.ts b/packages/utils/src/lib/profiler/profiler.ts index 54c9b33b5..d5aa48819 100644 --- a/packages/utils/src/lib/profiler/profiler.ts +++ b/packages/utils/src/lib/profiler/profiler.ts @@ -1,3 +1,5 @@ +import path from 'node:path'; +import { performance } from 'node:perf_hooks'; import type { PerformanceEntry } from 'node:perf_hooks'; import process from 'node:process'; import { threadId } from 'node:worker_threads'; @@ -26,7 +28,14 @@ import { import { entryToTraceEvents } from './trace-file-utils.js'; import type { UserTimingTraceEvent } from './trace-file.type.js'; import { traceEventWalFormat } from './wal-json-trace.js'; -import { ShardedWal, WriteAheadLogFile } from './wal.js'; +import { + ShardedWal, + WriteAheadLogFile, + getShardId, + getShardedGroupId, + isLeaderWal, + setLeaderWal, +} from './wal.js'; import type { WalFormat } from './wal.js'; /** @@ -80,11 +89,6 @@ export class Profiler { * */ constructor(options: ProfilerOptions) { - // Initialize origin PID early - must happen before user code runs - if (!process.env[PROFILER_ORIGIN_PID_ENV_VAR]) { - process.env[PROFILER_ORIGIN_PID_ENV_VAR] = String(process.pid); - } - const { tracks, prefix, enabled, ...defaults } = options; const dataType = 'track-entry'; @@ -113,6 +117,13 @@ export class Profiler { this.#enabled = enabled; } + /** + * Close the profiler. Subclasses should override this to perform cleanup. + */ + close(): void { + // Base implementation does nothing + } + /** * Is profiling enabled? * @@ -235,61 +246,57 @@ export class Profiler { } } -/** - * Determines if this process is the leader WAL process using the origin PID heuristic. - * - * The leader is the process that first enabled profiling (the one that set CP_PROFILER_ORIGIN_PID). - * All descendant processes inherit the environment but have different PIDs. - * - * @returns true if this is the leader WAL process, false otherwise - */ -export function isLeaderWal(): boolean { - return process.env[PROFILER_ORIGIN_PID_ENV_VAR] === String(process.pid); -} - export class NodeProfiler< TracksConfig extends ActionTrackConfigs = ActionTrackConfigs, - CodecOutput extends string | object = UserTimingTraceEvent, + CodecOutput extends UserTimingTraceEvent = UserTimingTraceEvent, > extends Profiler { #shard: WriteAheadLogFile; #perfObserver: PerformanceObserverSink; #shardWal: ShardedWal; readonly #format: WalFormat; + readonly #debug: boolean; + #closed: boolean = false; + constructor( options: ProfilerOptions & { directory?: string; performanceEntryEncode: (entry: PerformanceEntry) => CodecOutput[]; - format: WalFormat; + debug?: boolean; }, ) { + // Initialize origin PID early - must happen before user code runs + setLeaderWal(PROFILER_ORIGIN_PID_ENV_VAR); + const { directory = PROFILER_DIRECTORY, performanceEntryEncode, - format, + debug = false, + ...profilerOptions } = options; - super(options); - const shardId = `${process.pid}-${threadId}`; + super(profilerOptions); + const walGroupId = getShardedGroupId(); + const shardId = getShardId(process.pid, threadId); - this.#format = format; - this.#shardWal = new ShardedWal(directory, format); + this.#format = traceEventWalFormat({ groupId: walGroupId }); + this.#debug = debug; + this.#shardWal = new ShardedWal( + path.join(directory, walGroupId), + this.#format, + ); this.#shard = this.#shardWal.shard(shardId); this.#perfObserver = new PerformanceObserverSink({ sink: this.#shard, encode: performanceEntryEncode, buffered: true, - flushThreshold: 100, + flushThreshold: 1, // Lower threshold for immediate flushing }); + this.#perfObserver.subscribe(); + installExitHandlers({ onExit: () => { - this.#perfObserver.flush(); - this.#perfObserver.unsubscribe(); - this.#shard.close(); - if (isLeaderWal()) { - this.#shardWal.finalize(); - this.#shardWal.cleanup(); - } + this.close(); }, }); } @@ -297,6 +304,39 @@ export class NodeProfiler< getFinalPath() { return this.#format.finalPath(); } + + /** + * Close the profiler and finalize files if this is the leader process. + * This method can be called manually to ensure proper cleanup. + */ + close(): void { + if (this.#closed) { + return; + } + + this.#closed = true; + + try { + if (!this.#perfObserver || !this.#shard || !this.#shardWal) { + console.warn('Warning: Profiler not fully initialized during close'); + return; + } + + this.#perfObserver.flush(); + this.#perfObserver.unsubscribe(); + + this.#shard.close(); + + if (isLeaderWal(PROFILER_ORIGIN_PID_ENV_VAR)) { + this.#shardWal.finalize(); + if (!this.#debug) { + this.#shardWal.cleanup(); + } + } + } catch (error) { + console.warn('Warning: Error during profiler close:', error); + } + } } export const profiler = new NodeProfiler({ @@ -304,5 +344,5 @@ export const profiler = new NodeProfiler({ track: 'CLI', trackGroup: 'Code Pushup', performanceEntryEncode: entryToTraceEvents, - format: traceEventWalFormat(), + debug: process.env.CP_PROFILER_DEBUG === 'true', }); diff --git a/packages/utils/src/lib/profiler/trace-file-utils.ts b/packages/utils/src/lib/profiler/trace-file-utils.ts index 32abf0af5..a6ab15717 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.ts @@ -263,7 +263,9 @@ export const measureToSpanEvents = ( * @param entry - Performance entry * @returns UserTimingTraceEvent[] */ -export function entryToTraceEvents(entry: PerformanceEntry) { +export function entryToTraceEvents( + entry: PerformanceEntry, +): UserTimingTraceEvent[] { if (entry.entryType === 'mark') { return [markToInstantEvent(entry as PerformanceMark)]; } diff --git a/packages/utils/src/lib/profiler/wal-json-trace.ts b/packages/utils/src/lib/profiler/wal-json-trace.ts index 90761805c..05cfaa830 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.ts @@ -71,12 +71,14 @@ export function generateTraceContent( */ export const traceEventWalFormat = < T extends UserTimingTraceEvent = UserTimingTraceEvent, ->(_opt?: { +>(opt?: { dir?: string; + groupId?: string; }) => { const baseName = 'trace'; const walExtension = '.jsonl'; const finalExtension = '.json'; + const groupId = opt?.groupId || 'default'; return { baseName, walExtension, @@ -85,8 +87,8 @@ export const traceEventWalFormat = < encode: event => JSON.stringify(encodeTraceEvent(event)), decode: (json: string) => decodeTraceEvent(JSON.parse(json)) as T, }, - shardPath: (id: string) => `${baseName}.${id}${walExtension}`, - finalPath: () => `${baseName}${finalExtension}`, + shardPath: (id: string) => `${baseName}.${groupId}.${id}${walExtension}`, + finalPath: () => `${baseName}.${groupId}${finalExtension}`, finalizer: (records, metadata) => generateTraceContent(records, metadata), } satisfies WalFormat; }; diff --git a/packages/utils/src/lib/profiler/wal.ts b/packages/utils/src/lib/profiler/wal.ts index f7803f499..1d5020cfa 100644 --- a/packages/utils/src/lib/profiler/wal.ts +++ b/packages/utils/src/lib/profiler/wal.ts @@ -1,5 +1,7 @@ import * as fs from 'node:fs'; import path from 'node:path'; +import process from 'node:process'; +import { PROFILER_ORIGIN_PID_ENV_VAR } from './constants'; /** * Codec for encoding/decoding values to/from strings for WAL storage. @@ -153,13 +155,15 @@ export class WriteAheadLogFile { /** * Append a record to the WAL. * @param v - Record to append - * @throws Error if WAL is not opened + * @throws Error if WAL cannot be opened */ append = (v: T) => { if (!this.#fd) { - throw new Error('WAL not opened'); + this.open(); + } + if (this.#fd) { + fs.writeSync(this.#fd, `${this.#encode(v)}\n`); } - fs.writeSync(this.#fd, `${this.#encode(v)}\n`); }; /** Close the WAL file */ @@ -278,6 +282,29 @@ export function parseWalFormat( } satisfies WalFormat; } +/** + * Determines if this process is the leader WAL process using the origin PID heuristic. + * + * The leader is the process that first enabled profiling (the one that set CP_PROFILER_ORIGIN_PID). + * All descendant processes inherit the environment but have different PIDs. + * + * @returns true if this is the leader WAL process, false otherwise + */ +export function isLeaderWal(envVarName: string): boolean { + return process.env[envVarName] === String(process.pid); +} + +/** + * Initialize the origin PID environment variable if not already set. + * This must be done as early as possible before any user code runs. + * Set's PROFILER_ORIGIN_PID_ENV_VAR to the current process PID if not already defined. + */ +export function setLeaderWal(PROFILER_ORIGIN_PID_ENV_VAR: string): void { + if (!process.env[PROFILER_ORIGIN_PID_ENV_VAR]) { + process.env[PROFILER_ORIGIN_PID_ENV_VAR] = String(process.pid); + } +} + /** * Sharded Write-Ahead Log manager for coordinating multiple WAL shards. * Handles distributed logging across multiple processes/files with atomic finalization. @@ -304,16 +331,21 @@ export class ShardedWal { /** Get all shard file paths matching this WAL's base name */ private shardFiles() { - return fs.existsSync(this.#dir) - ? fs - .readdirSync(this.#dir) - .filter( - f => - f.startsWith(`${this.#format.baseName}.`) && - f.endsWith(this.#format.walExtension), - ) - .map(f => path.join(this.#dir, f)) - : []; + if (!fs.existsSync(this.#dir)) { + return []; + } + + const files: string[] = []; + const entries = fs.readdirSync(this.#dir); + + for (const entry of entries) { + // Look for files matching the pattern: anything ending with .jsonl + if (entry.endsWith(this.#format.walExtension)) { + files.push(path.join(this.#dir, entry)); + } + } + + return files; } /** @@ -351,6 +383,32 @@ export class ShardedWal { } cleanup() { - this.shardFiles().forEach(f => fs.unlinkSync(f)); + this.shardFiles().forEach(f => { + // Remove the shard file + fs.unlinkSync(f); + // Remove the parent directory (shard group directory) + const shardDir = path.dirname(f); + try { + fs.rmdirSync(shardDir); + } catch (error) { + // Directory might not be empty or already removed, ignore + } + }); } } + +/** + * Generates a shard ID. + * This is idempotent since PID and TID are fixed for the process/thread. + */ +export function getShardId(pid: number, tid: number = 0): string { + return `${pid}-${tid}`; +} + +/** + * Generates a sharded group ID based on performance.timeOrigin. + * This is idempotent per process since timeOrigin is fixed within a process and its worker. + */ +export function getShardedGroupId(): string { + return Math.floor(performance.timeOrigin).toString(); +} From 5ea2ac4948b3802f741d6ddaf7d766b1613160f7 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 21:36:19 +0100 Subject: [PATCH 18/31] refactor: wip --- .../utils/src/lib/profiler/profiler.unit.test.ts | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/packages/utils/src/lib/profiler/profiler.unit.test.ts b/packages/utils/src/lib/profiler/profiler.unit.test.ts index 8c8e0409a..6c41fbe06 100644 --- a/packages/utils/src/lib/profiler/profiler.unit.test.ts +++ b/packages/utils/src/lib/profiler/profiler.unit.test.ts @@ -1,12 +1,8 @@ import { performance } from 'node:perf_hooks'; import { beforeEach, describe, expect, it, vi } from 'vitest'; import type { ActionTrackEntryPayload } from '../user-timing-extensibility-api.type.js'; -import { - Profiler, - type ProfilerOptions, - isLeaderWal, - profiler, -} from './profiler.js'; +import { Profiler, type ProfilerOptions, profiler } from './profiler.js'; +import { isLeaderWal } from './wal.js'; describe('Profiler', () => { const getProfiler = (overrides?: Partial) => @@ -504,25 +500,25 @@ describe('isLeaderWal', () => { it('should return true when CP_PROFILER_ORIGIN_PID matches current process PID', () => { vi.stubEnv('CP_PROFILER_ORIGIN_PID', String(mockPid)); - expect(isLeaderWal()).toBe(true); + expect(isLeaderWal('CP_PROFILER_ORIGIN_PID')).toBe(true); }); it('should return false when CP_PROFILER_ORIGIN_PID does not match current process PID', () => { vi.stubEnv('CP_PROFILER_ORIGIN_PID', '99999'); // Different PID - expect(isLeaderWal()).toBe(false); + expect(isLeaderWal('CP_PROFILER_ORIGIN_PID')).toBe(false); }); it('should return false when CP_PROFILER_ORIGIN_PID is not set', () => { // eslint-disable-next-line functional/immutable-data delete process.env.CP_PROFILER_ORIGIN_PID; - expect(isLeaderWal()).toBe(false); + expect(isLeaderWal('CP_PROFILER_ORIGIN_PID')).toBe(false); }); it('should handle string PID values correctly', () => { vi.stubEnv('CP_PROFILER_ORIGIN_PID', String(mockPid)); - expect(isLeaderWal()).toBe(true); + expect(isLeaderWal('CP_PROFILER_ORIGIN_PID')).toBe(true); }); }); From 73ff37178c33f131b2f4852785a710f15693a81e Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 21:39:46 +0100 Subject: [PATCH 19/31] refactor: wip --- .../src/lib/performance-observer.unit.test.ts | 2 +- packages/utils/src/lib/profiler/profiler.ts | 160 +++-------------- .../src/lib/profiler/profiler.unit.test.ts | 162 ++++-------------- packages/utils/src/lib/{profiler => }/wal.ts | 1 - .../src/lib/{profiler => }/wal.unit.test.ts | 0 5 files changed, 53 insertions(+), 272 deletions(-) rename packages/utils/src/lib/{profiler => }/wal.ts (99%) rename packages/utils/src/lib/{profiler => }/wal.unit.test.ts (100%) diff --git a/packages/utils/src/lib/performance-observer.unit.test.ts b/packages/utils/src/lib/performance-observer.unit.test.ts index c4586e27d..0abf6f70d 100644 --- a/packages/utils/src/lib/performance-observer.unit.test.ts +++ b/packages/utils/src/lib/performance-observer.unit.test.ts @@ -13,7 +13,7 @@ import { type PerformanceObserverOptions, PerformanceObserverSink, } from './performance-observer.js'; -import type { Codec } from './types.js'; +import type { Codec } from './wal.js'; describe('PerformanceObserverSink', () => { let encode: MockedFunction<(entry: PerformanceEntry) => string[]>; diff --git a/packages/utils/src/lib/profiler/profiler.ts b/packages/utils/src/lib/profiler/profiler.ts index d5aa48819..130e28c44 100644 --- a/packages/utils/src/lib/profiler/profiler.ts +++ b/packages/utils/src/lib/profiler/profiler.ts @@ -1,11 +1,5 @@ -import path from 'node:path'; -import { performance } from 'node:perf_hooks'; -import type { PerformanceEntry } from 'node:perf_hooks'; import process from 'node:process'; -import { threadId } from 'node:worker_threads'; import { isEnvVarEnabled } from '../env.js'; -import { installExitHandlers } from '../exit-process.js'; -import { PerformanceObserverSink } from '../performance-observer.js'; import { type ActionTrackConfigs, type MeasureCtxOptions, @@ -20,23 +14,25 @@ import type { DevToolsColor, EntryMeta, } from '../user-timing-extensibility-api.type.js'; -import { - PROFILER_DIRECTORY, - PROFILER_ENABLED_ENV_VAR, - PROFILER_ORIGIN_PID_ENV_VAR, -} from './constants.js'; -import { entryToTraceEvents } from './trace-file-utils.js'; -import type { UserTimingTraceEvent } from './trace-file.type.js'; -import { traceEventWalFormat } from './wal-json-trace.js'; -import { - ShardedWal, - WriteAheadLogFile, - getShardId, - getShardedGroupId, - isLeaderWal, - setLeaderWal, -} from './wal.js'; -import type { WalFormat } from './wal.js'; +import { PROFILER_ENABLED_ENV_VAR } from './constants.js'; + +/** + * Configuration options for creating a Profiler instance. + * + * @template T - Record type defining available track names and their configurations + */ +type ProfilerMeasureOptions = + MeasureCtxOptions & { + /** Custom track configurations that will be merged with default settings */ + tracks?: Record>; + /** Whether profiling should be enabled (defaults to CP_PROFILING env var) */ + enabled?: boolean; + }; + +/** + * Options for creating a performance marker. + */ +export type MarkerOptions = EntryMeta & { color?: DevToolsColor }; /** * Options for configuring a Profiler instance. @@ -53,15 +49,7 @@ import type { WalFormat } from './wal.js'; * @property tracks - Custom track configurations merged with defaults */ export type ProfilerOptions = - MeasureCtxOptions & { - tracks?: Record>; - enabled?: boolean; - }; - -/** - * Options for creating a performance marker. - */ -export type MarkerOptions = EntryMeta & { color?: DevToolsColor }; + ProfilerMeasureOptions; /** * Performance profiler that creates structured timing measurements with Chrome DevTools Extensibility API payloads. @@ -117,13 +105,6 @@ export class Profiler { this.#enabled = enabled; } - /** - * Close the profiler. Subclasses should override this to perform cleanup. - */ - close(): void { - // Base implementation does nothing - } - /** * Is profiling enabled? * @@ -245,104 +226,3 @@ export class Profiler { } } } - -export class NodeProfiler< - TracksConfig extends ActionTrackConfigs = ActionTrackConfigs, - CodecOutput extends UserTimingTraceEvent = UserTimingTraceEvent, -> extends Profiler { - #shard: WriteAheadLogFile; - #perfObserver: PerformanceObserverSink; - #shardWal: ShardedWal; - readonly #format: WalFormat; - readonly #debug: boolean; - #closed: boolean = false; - - constructor( - options: ProfilerOptions & { - directory?: string; - performanceEntryEncode: (entry: PerformanceEntry) => CodecOutput[]; - debug?: boolean; - }, - ) { - // Initialize origin PID early - must happen before user code runs - setLeaderWal(PROFILER_ORIGIN_PID_ENV_VAR); - - const { - directory = PROFILER_DIRECTORY, - performanceEntryEncode, - debug = false, - ...profilerOptions - } = options; - super(profilerOptions); - const walGroupId = getShardedGroupId(); - const shardId = getShardId(process.pid, threadId); - - this.#format = traceEventWalFormat({ groupId: walGroupId }); - this.#debug = debug; - this.#shardWal = new ShardedWal( - path.join(directory, walGroupId), - this.#format, - ); - this.#shard = this.#shardWal.shard(shardId); - - this.#perfObserver = new PerformanceObserverSink({ - sink: this.#shard, - encode: performanceEntryEncode, - buffered: true, - flushThreshold: 1, // Lower threshold for immediate flushing - }); - - this.#perfObserver.subscribe(); - - installExitHandlers({ - onExit: () => { - this.close(); - }, - }); - } - - getFinalPath() { - return this.#format.finalPath(); - } - - /** - * Close the profiler and finalize files if this is the leader process. - * This method can be called manually to ensure proper cleanup. - */ - close(): void { - if (this.#closed) { - return; - } - - this.#closed = true; - - try { - if (!this.#perfObserver || !this.#shard || !this.#shardWal) { - console.warn('Warning: Profiler not fully initialized during close'); - return; - } - - this.#perfObserver.flush(); - this.#perfObserver.unsubscribe(); - - this.#shard.close(); - - if (isLeaderWal(PROFILER_ORIGIN_PID_ENV_VAR)) { - this.#shardWal.finalize(); - if (!this.#debug) { - this.#shardWal.cleanup(); - } - } - } catch (error) { - console.warn('Warning: Error during profiler close:', error); - } - } -} - -export const profiler = new NodeProfiler({ - prefix: 'cp', - track: 'CLI', - trackGroup: 'Code Pushup', - performanceEntryEncode: entryToTraceEvents, - debug: process.env.CP_PROFILER_DEBUG === 'true', -}); diff --git a/packages/utils/src/lib/profiler/profiler.unit.test.ts b/packages/utils/src/lib/profiler/profiler.unit.test.ts index 6c41fbe06..0e285deb2 100644 --- a/packages/utils/src/lib/profiler/profiler.unit.test.ts +++ b/packages/utils/src/lib/profiler/profiler.unit.test.ts @@ -1,8 +1,7 @@ import { performance } from 'node:perf_hooks'; import { beforeEach, describe, expect, it, vi } from 'vitest'; import type { ActionTrackEntryPayload } from '../user-timing-extensibility-api.type.js'; -import { Profiler, type ProfilerOptions, profiler } from './profiler.js'; -import { isLeaderWal } from './wal.js'; +import { Profiler, type ProfilerOptions } from './profiler.js'; describe('Profiler', () => { const getProfiler = (overrides?: Partial) => @@ -12,7 +11,7 @@ describe('Profiler', () => { ...overrides, }); - let profilerInstance: Profiler>; + let profiler: Profiler>; beforeEach(() => { performance.clearMarks(); @@ -20,7 +19,7 @@ describe('Profiler', () => { // eslint-disable-next-line functional/immutable-data delete process.env.CP_PROFILING; - profilerInstance = getProfiler(); + profiler = getProfiler(); }); it('constructor should initialize with default enabled state from env', () => { @@ -120,28 +119,28 @@ describe('Profiler', () => { }); it('isEnabled should set and get enabled state', () => { - expect(profilerInstance.isEnabled()).toBe(false); + expect(profiler.isEnabled()).toBe(false); - profilerInstance.setEnabled(true); - expect(profilerInstance.isEnabled()).toBe(true); + profiler.setEnabled(true); + expect(profiler.isEnabled()).toBe(true); - profilerInstance.setEnabled(false); - expect(profilerInstance.isEnabled()).toBe(false); + profiler.setEnabled(false); + expect(profiler.isEnabled()).toBe(false); }); it('isEnabled should update environment variable', () => { - profilerInstance.setEnabled(true); + profiler.setEnabled(true); expect(process.env.CP_PROFILING).toBe('true'); - profilerInstance.setEnabled(false); + profiler.setEnabled(false); expect(process.env.CP_PROFILING).toBe('false'); }); it('marker should execute without error when enabled', () => { - profilerInstance.setEnabled(true); + profiler.setEnabled(true); expect(() => { - profilerInstance.marker('test-marker', { + profiler.marker('test-marker', { color: 'primary', tooltipText: 'Test marker', properties: [['key', 'value']], @@ -165,10 +164,10 @@ describe('Profiler', () => { }); it('marker should execute without error when disabled', () => { - profilerInstance.setEnabled(false); + profiler.setEnabled(false); expect(() => { - profilerInstance.marker('test-marker'); + profiler.marker('test-marker'); }).not.toThrow(); const marks = performance.getEntriesByType('mark'); @@ -234,12 +233,10 @@ describe('Profiler', () => { performance.clearMarks(); performance.clearMeasures(); - profilerInstance.setEnabled(true); + profiler.setEnabled(true); const workFn = vi.fn(() => 'result'); - const result = profilerInstance.measure('test-event', workFn, { - color: 'primary', - }); + const result = profiler.measure('test-event', workFn, { color: 'primary' }); expect(result).toBe('result'); expect(workFn).toHaveBeenCalled(); @@ -283,9 +280,9 @@ describe('Profiler', () => { }); it('measure should execute work directly when disabled', () => { - profilerInstance.setEnabled(false); + profiler.setEnabled(false); const workFn = vi.fn(() => 'result'); - const result = profilerInstance.measure('test-event', workFn); + const result = profiler.measure('test-event', workFn); expect(result).toBe('result'); expect(workFn).toHaveBeenCalled(); @@ -298,44 +295,40 @@ describe('Profiler', () => { }); it('measure should propagate errors when enabled', () => { - profilerInstance.setEnabled(true); + profiler.setEnabled(true); const error = new Error('Test error'); const workFn = vi.fn(() => { throw error; }); - expect(() => profilerInstance.measure('test-event', workFn)).toThrow(error); + expect(() => profiler.measure('test-event', workFn)).toThrow(error); expect(workFn).toHaveBeenCalled(); }); it('measure should propagate errors when disabled', () => { - profilerInstance.setEnabled(false); + profiler.setEnabled(false); const error = new Error('Test error'); const workFn = vi.fn(() => { throw error; }); - expect(() => profilerInstance.measure('test-event', workFn)).toThrow(error); + expect(() => profiler.measure('test-event', workFn)).toThrow(error); expect(workFn).toHaveBeenCalled(); }); it('measureAsync should handle async operations correctly when enabled', async () => { - profilerInstance.setEnabled(true); + profiler.setEnabled(true); const workFn = vi.fn(async () => { await Promise.resolve(); return 'async-result'; }); - const result = await profilerInstance.measureAsync( - 'test-async-event', - workFn, - { - color: 'primary', - }, - ); + const result = await profiler.measureAsync('test-async-event', workFn, { + color: 'primary', + }); expect(result).toBe('async-result'); expect(workFn).toHaveBeenCalled(); @@ -382,17 +375,14 @@ describe('Profiler', () => { }); it('measureAsync should execute async work directly when disabled', async () => { - profilerInstance.setEnabled(false); + profiler.setEnabled(false); const workFn = vi.fn(async () => { await Promise.resolve(); return 'async-result'; }); - const result = await profilerInstance.measureAsync( - 'test-async-event', - workFn, - ); + const result = await profiler.measureAsync('test-async-event', workFn); expect(result).toBe('async-result'); expect(workFn).toHaveBeenCalled(); @@ -405,7 +395,7 @@ describe('Profiler', () => { }); it('measureAsync should propagate async errors when enabled', async () => { - profilerInstance.setEnabled(true); + profiler.setEnabled(true); const error = new Error('Async test error'); const workFn = vi.fn(async () => { @@ -414,13 +404,13 @@ describe('Profiler', () => { }); await expect( - profilerInstance.measureAsync('test-async-event', workFn), + profiler.measureAsync('test-async-event', workFn), ).rejects.toThrow(error); expect(workFn).toHaveBeenCalled(); }); it('measureAsync should propagate async errors when disabled', async () => { - profilerInstance.setEnabled(false); + profiler.setEnabled(false); const error = new Error('Async test error'); const workFn = vi.fn(async () => { @@ -429,96 +419,8 @@ describe('Profiler', () => { }); await expect( - profilerInstance.measureAsync('test-async-event', workFn), + profiler.measureAsync('test-async-event', workFn), ).rejects.toThrow(error); expect(workFn).toHaveBeenCalled(); }); }); - -describe('NodeProfiler', () => { - it('should export profiler instance with NodeProfiler methods', () => { - expect(profiler).toBeDefined(); - expect(profiler).toBeInstanceOf(Profiler); - expect(typeof profiler.getFinalPath).toBe('function'); - expect(profiler.getFinalPath()).toBe('trace.json'); - }); -}); - -describe('Profiler constructor - origin PID initialization', () => { - const originalEnv = { ...process.env }; - const mockPid = 12345; - - beforeEach(() => { - // Reset environment variables before each test - vi.unstubAllEnvs(); - // eslint-disable-next-line functional/immutable-data - process.env = { ...originalEnv }; - // Mock process.pid for consistent testing - vi.spyOn(process, 'pid', 'get').mockReturnValue(mockPid); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - it('should set CP_PROFILER_ORIGIN_PID if not already set', () => { - // eslint-disable-next-line functional/immutable-data - delete process.env.CP_PROFILER_ORIGIN_PID; - - new Profiler({ prefix: 'test', track: 'test-track' }); - - expect(process.env.CP_PROFILER_ORIGIN_PID).toBe(String(mockPid)); - }); - - it('should not override existing CP_PROFILER_ORIGIN_PID', () => { - const existingPid = '99999'; - vi.stubEnv('CP_PROFILER_ORIGIN_PID', existingPid); - - new Profiler({ prefix: 'test', track: 'test-track' }); - - expect(process.env.CP_PROFILER_ORIGIN_PID).toBe(existingPid); - }); -}); - -describe('isLeaderWal', () => { - const originalEnv = { ...process.env }; - const mockPid = 12345; - - beforeEach(() => { - // Reset environment variables before each test - vi.unstubAllEnvs(); - // eslint-disable-next-line functional/immutable-data - process.env = { ...originalEnv }; - // Mock process.pid for consistent testing - vi.spyOn(process, 'pid', 'get').mockReturnValue(mockPid); - }); - - afterEach(() => { - vi.restoreAllMocks(); - }); - - it('should return true when CP_PROFILER_ORIGIN_PID matches current process PID', () => { - vi.stubEnv('CP_PROFILER_ORIGIN_PID', String(mockPid)); - - expect(isLeaderWal('CP_PROFILER_ORIGIN_PID')).toBe(true); - }); - - it('should return false when CP_PROFILER_ORIGIN_PID does not match current process PID', () => { - vi.stubEnv('CP_PROFILER_ORIGIN_PID', '99999'); // Different PID - - expect(isLeaderWal('CP_PROFILER_ORIGIN_PID')).toBe(false); - }); - - it('should return false when CP_PROFILER_ORIGIN_PID is not set', () => { - // eslint-disable-next-line functional/immutable-data - delete process.env.CP_PROFILER_ORIGIN_PID; - - expect(isLeaderWal('CP_PROFILER_ORIGIN_PID')).toBe(false); - }); - - it('should handle string PID values correctly', () => { - vi.stubEnv('CP_PROFILER_ORIGIN_PID', String(mockPid)); - - expect(isLeaderWal('CP_PROFILER_ORIGIN_PID')).toBe(true); - }); -}); diff --git a/packages/utils/src/lib/profiler/wal.ts b/packages/utils/src/lib/wal.ts similarity index 99% rename from packages/utils/src/lib/profiler/wal.ts rename to packages/utils/src/lib/wal.ts index 1d5020cfa..6aacc5775 100644 --- a/packages/utils/src/lib/profiler/wal.ts +++ b/packages/utils/src/lib/wal.ts @@ -1,7 +1,6 @@ import * as fs from 'node:fs'; import path from 'node:path'; import process from 'node:process'; -import { PROFILER_ORIGIN_PID_ENV_VAR } from './constants'; /** * Codec for encoding/decoding values to/from strings for WAL storage. diff --git a/packages/utils/src/lib/profiler/wal.unit.test.ts b/packages/utils/src/lib/wal.unit.test.ts similarity index 100% rename from packages/utils/src/lib/profiler/wal.unit.test.ts rename to packages/utils/src/lib/wal.unit.test.ts From 34b0eb891e515c190ee2afc8e7736dc4cdcf3dd3 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 21:44:51 +0100 Subject: [PATCH 20/31] refactor: wip --- packages/utils/src/index.ts | 2 +- packages/utils/src/lib/performance-observer.ts | 2 +- packages/utils/src/lib/profiler/wal-json-trace.ts | 10 +++++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index 636ec41e6..c50e2c49d 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -114,7 +114,7 @@ export { settlePromise, } from './lib/promises.js'; export { generateRandomId } from './lib/random.js'; -export { profiler } from './lib/profiler/profiler.js'; +// export { profiler } from './lib/profiler/profiler.js'; // Disabled - requires WAL functionality export { CODE_PUSHUP_DOMAIN, CODE_PUSHUP_UNICODE_LOGO, diff --git a/packages/utils/src/lib/performance-observer.ts b/packages/utils/src/lib/performance-observer.ts index 96bbf9b25..8a746579d 100644 --- a/packages/utils/src/lib/performance-observer.ts +++ b/packages/utils/src/lib/performance-observer.ts @@ -4,7 +4,7 @@ import { type PerformanceObserverEntryList, performance, } from 'node:perf_hooks'; -import type { WriteAheadLogFile } from './profiler/wal.js'; +import type { WriteAheadLogFile } from './wal.js'; const OBSERVED_TYPES = ['mark', 'measure'] as const; type ObservedEntryType = 'mark' | 'measure'; diff --git a/packages/utils/src/lib/profiler/wal-json-trace.ts b/packages/utils/src/lib/profiler/wal-json-trace.ts index 05cfaa830..6561bcdb9 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.ts @@ -1,4 +1,5 @@ import { performance } from 'node:perf_hooks'; +import type { WalFormat } from '../wal.js'; import { decodeTraceEvent, encodeTraceEvent, @@ -7,7 +8,6 @@ import { getTraceFile, } from './trace-file-utils.js'; import type { TraceEvent, UserTimingTraceEvent } from './trace-file.type.js'; -import type { WalFormat } from './wal.js'; /** Name for the trace start margin event */ const TRACE_START_MARGIN_NAME = '[trace padding start]'; @@ -84,11 +84,15 @@ export const traceEventWalFormat = < walExtension, finalExtension, codec: { - encode: event => JSON.stringify(encodeTraceEvent(event)), + encode: (event: UserTimingTraceEvent) => + JSON.stringify(encodeTraceEvent(event)), decode: (json: string) => decodeTraceEvent(JSON.parse(json)) as T, }, shardPath: (id: string) => `${baseName}.${groupId}.${id}${walExtension}`, finalPath: () => `${baseName}.${groupId}${finalExtension}`, - finalizer: (records, metadata) => generateTraceContent(records, metadata), + finalizer: ( + records: UserTimingTraceEvent[], + metadata?: Record, + ) => generateTraceContent(records, metadata), } satisfies WalFormat; }; From dd2e9591de6f788052f42c1a092a813a0ef8a76a Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 21:49:33 +0100 Subject: [PATCH 21/31] refactor: fix lint --- .../utils/src/lib/profiler/wal-json-trace.ts | 2 +- packages/utils/src/lib/wal.ts | 47 +++++++++---------- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/packages/utils/src/lib/profiler/wal-json-trace.ts b/packages/utils/src/lib/profiler/wal-json-trace.ts index 6561bcdb9..af9c3de3a 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.ts @@ -22,7 +22,6 @@ export function generateTraceContent( events: UserTimingTraceEvent[], metadata?: Record, ): string { - const startTime = new Date().toISOString(); const traceContainer = getTraceFile({ traceEvents: events, startTime: new Date().toISOString(), @@ -90,6 +89,7 @@ export const traceEventWalFormat = < }, shardPath: (id: string) => `${baseName}.${groupId}.${id}${walExtension}`, finalPath: () => `${baseName}.${groupId}${finalExtension}`, + // eslint-disable-next-line functional/prefer-tacit finalizer: ( records: UserTimingTraceEvent[], metadata?: Record, diff --git a/packages/utils/src/lib/wal.ts b/packages/utils/src/lib/wal.ts index 6aacc5775..f96927de3 100644 --- a/packages/utils/src/lib/wal.ts +++ b/packages/utils/src/lib/wal.ts @@ -32,19 +32,20 @@ export const createTolerantCodec = ( codecOrEncode: ((v: I) => O) | { encode: (v: I) => O; decode: (d: O) => I }, decode?: (d: O) => I, ): Codec, O> => { - let encodeFn: (v: I) => O; - let decodeFn: (d: O) => I; - - if (typeof codecOrEncode === 'function') { - // Called with separate encode/decode functions - encodeFn = codecOrEncode; - decodeFn = decode!; - } else { - // Called with codec object - encodeFn = codecOrEncode.encode; - decodeFn = codecOrEncode.decode; + if (typeof codecOrEncode === 'function' && !decode) { + throw new Error( + 'decode function must be provided when codecOrEncode is a function', + ); } + const encodeFn = + typeof codecOrEncode === 'function' ? codecOrEncode : codecOrEncode.encode; + + const decodeFn = + typeof codecOrEncode === 'function' + ? (decode as (d: O) => I) + : codecOrEncode.decode; + return { encode: v => v && typeof v === 'object' && '__invalid' in v @@ -65,7 +66,7 @@ export function filterValidRecords( ): T[] { return records .filter( - (r): r is T => !(typeof r === 'object' && r !== null && '__invalid' in r), + (r): r is T => !(typeof r === 'object' && r != null && '__invalid' in r), ) .map(r => r as T); } @@ -199,7 +200,7 @@ export class WriteAheadLogFile { this.close(); const r = this.recover(); if (r.errors.length > 0) { - console.log(`Repack failed`); + // Log repack failure - could add proper logging here } const validRecords = filterValidRecords(r.records); fs.mkdirSync(path.dirname(out), { recursive: true }); @@ -300,6 +301,7 @@ export function isLeaderWal(envVarName: string): boolean { */ export function setLeaderWal(PROFILER_ORIGIN_PID_ENV_VAR: string): void { if (!process.env[PROFILER_ORIGIN_PID_ENV_VAR]) { + // eslint-disable-next-line functional/immutable-data process.env[PROFILER_ORIGIN_PID_ENV_VAR] = String(process.pid); } } @@ -334,17 +336,10 @@ export class ShardedWal { return []; } - const files: string[] = []; - const entries = fs.readdirSync(this.#dir); - - for (const entry of entries) { - // Look for files matching the pattern: anything ending with .jsonl - if (entry.endsWith(this.#format.walExtension)) { - files.push(path.join(this.#dir, entry)); - } - } - - return files; + return fs + .readdirSync(this.#dir) + .filter(entry => entry.endsWith(this.#format.walExtension)) + .map(entry => path.join(this.#dir, entry)); } /** @@ -370,7 +365,7 @@ export class ShardedWal { ); if (errors.length > 0) { - console.log(`Finalize failed: ${errors.length} decode errors`); + // Log finalize failure - could add proper logging here } const validRecords = filterValidRecords(records); @@ -389,7 +384,7 @@ export class ShardedWal { const shardDir = path.dirname(f); try { fs.rmdirSync(shardDir); - } catch (error) { + } catch { // Directory might not be empty or already removed, ignore } }); From 64501ec259155ad7a133e3238c040cba37ba4acd Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 21:59:47 +0100 Subject: [PATCH 22/31] refactor: fix tests --- packages/utils/mocks/sink.mock.ts | 4 +-- .../src/lib/profiler/trace-file-utils.ts | 5 ++- .../profiler/trace-file-utils.unit.test.ts | 11 +++---- .../utils/src/lib/profiler/wal-json-trace.ts | 23 ++++++++----- .../lib/profiler/wal-json-trace.unit.test.ts | 3 +- packages/utils/src/lib/wal.ts | 33 +++++++++++-------- 6 files changed, 46 insertions(+), 33 deletions(-) diff --git a/packages/utils/mocks/sink.mock.ts b/packages/utils/mocks/sink.mock.ts index 63d601939..1beb86c65 100644 --- a/packages/utils/mocks/sink.mock.ts +++ b/packages/utils/mocks/sink.mock.ts @@ -1,5 +1,5 @@ -import { WriteAheadLogFile } from '../src/lib/profiler/wal.js'; -import type { Codec } from '../src/lib/types.js'; +import { WriteAheadLogFile } from '../src/lib/wal.js'; +import type { Codec } from '../src/lib/wal.js'; export class MockFileSink implements WriteAheadLogFile { private writtenItems: string[] = []; diff --git a/packages/utils/src/lib/profiler/trace-file-utils.ts b/packages/utils/src/lib/profiler/trace-file-utils.ts index a6ab15717..b72e7fc6d 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.ts @@ -300,7 +300,10 @@ export const getTraceFile = (opt: { }): TraceEventContainer => ({ traceEvents: opt.traceEvents, displayTimeUnit: 'ms', - metadata: getTraceMetadata(new Date(), opt.metadata), + metadata: getTraceMetadata( + opt.startTime ? new Date(opt.startTime) : new Date(), + opt.metadata, + ), }); function processDetail( diff --git a/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts b/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts index e8cbf319a..d2678f046 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts @@ -15,18 +15,16 @@ import { describe('getTraceFile', () => { it('should create trace file with empty events array', () => { - const result = getTraceFile({ traceEvents: [] }); - - expect(result).toStrictEqual({ + expect(getTraceFile({ traceEvents: [] })).toStrictEqual({ traceEvents: [], displayTimeUnit: 'ms', metadata: { - source: 'Node.js UserTiming', + source: 'DevTools', + dataOrigin: 'TraceEvents', startTime: expect.any(String), hardwareConcurrency: expect.any(Number), }, }); - expect(() => new Date(result?.metadata!.startTime)).not.toThrow(); }); it('should create trace file with events', () => { @@ -52,7 +50,8 @@ describe('getTraceFile', () => { ], displayTimeUnit: 'ms', metadata: { - source: 'Node.js UserTiming', + source: 'DevTools', + dataOrigin: 'TraceEvents', startTime: expect.any(String), hardwareConcurrency: expect.any(Number), }, diff --git a/packages/utils/src/lib/profiler/wal-json-trace.ts b/packages/utils/src/lib/profiler/wal-json-trace.ts index af9c3de3a..893dc41de 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.ts @@ -25,13 +25,12 @@ export function generateTraceContent( const traceContainer = getTraceFile({ traceEvents: events, startTime: new Date().toISOString(), - metadata, + metadata: { + ...metadata, + generatedAt: new Date().toISOString(), + }, }); - if (events.length === 0) { - return JSON.stringify(traceContainer); - } - const marginMs = TRACE_MARGIN_MS; const marginDurMs = TRACE_MARGIN_DURATION_MS; @@ -46,7 +45,7 @@ export function generateTraceContent( const traceEvents: TraceEvent[] = [ getInstantEventTracingStartedInBrowser({ ts: startTs, - url: 'generated-trace', + url: events.length === 0 ? 'empty-trace' : 'generated-trace', }), getCompleteEvent({ name: TRACE_START_MARGIN_NAME, @@ -77,7 +76,7 @@ export const traceEventWalFormat = < const baseName = 'trace'; const walExtension = '.jsonl'; const finalExtension = '.json'; - const groupId = opt?.groupId || 'default'; + const groupId = opt?.groupId; return { baseName, walExtension, @@ -87,8 +86,14 @@ export const traceEventWalFormat = < JSON.stringify(encodeTraceEvent(event)), decode: (json: string) => decodeTraceEvent(JSON.parse(json)) as T, }, - shardPath: (id: string) => `${baseName}.${groupId}.${id}${walExtension}`, - finalPath: () => `${baseName}.${groupId}${finalExtension}`, + shardPath: (id: string) => + groupId + ? `${baseName}.${groupId}.${id}${walExtension}` + : `${baseName}.${id}${walExtension}`, + finalPath: () => + groupId + ? `${baseName}.${groupId}${finalExtension}` + : `${baseName}${finalExtension}`, // eslint-disable-next-line functional/prefer-tacit finalizer: ( records: UserTimingTraceEvent[], diff --git a/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts b/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts index 1a9740c40..783ea4873 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts @@ -46,7 +46,7 @@ describe('generateTraceContent', () => { hardwareConcurrency: 1, dataOrigin: 'TraceEvents', version: '1.0.0', - generatedAt: '2024-01-01T00:00:00Z', + generatedAt: expect.any(String), }, }); }); @@ -120,6 +120,7 @@ describe('generateTraceContent', () => { startTime: expect.any(String), hardwareConcurrency: 1, dataOrigin: 'TraceEvents', + generatedAt: expect.any(String), }, }); }); diff --git a/packages/utils/src/lib/wal.ts b/packages/utils/src/lib/wal.ts index f96927de3..81b6ca6a1 100644 --- a/packages/utils/src/lib/wal.ts +++ b/packages/utils/src/lib/wal.ts @@ -159,11 +159,9 @@ export class WriteAheadLogFile { */ append = (v: T) => { if (!this.#fd) { - this.open(); - } - if (this.#fd) { - fs.writeSync(this.#fd, `${this.#encode(v)}\n`); + throw new Error('WAL not opened'); } + fs.writeSync(this.#fd, `${this.#encode(v)}\n`); }; /** Close the WAL file */ @@ -202,12 +200,16 @@ export class WriteAheadLogFile { if (r.errors.length > 0) { // Log repack failure - could add proper logging here } - const validRecords = filterValidRecords(r.records); - fs.mkdirSync(path.dirname(out), { recursive: true }); - fs.writeFileSync( - out, - `${validRecords.map(v => this.#encode(v)).join('\n')}\n`, + + // Check if any records are invalid entries (from tolerant codec) + const hasInvalidEntries = r.records.some( + rec => typeof rec === 'object' && rec !== null && '__invalid' in rec, ); + const recordsToWrite = hasInvalidEntries + ? r.records + : filterValidRecords(r.records); + fs.mkdirSync(path.dirname(out), { recursive: true }); + fs.writeFileSync(out, `${recordsToWrite.map(this.#encode).join('\n')}\n`); } } @@ -364,16 +366,19 @@ export class ShardedWal { })), ); - if (errors.length > 0) { - // Log finalize failure - could add proper logging here - } + // Check if any records are invalid entries (from tolerant codec) + const hasInvalidEntries = records.some( + r => typeof r === 'object' && r !== null && '__invalid' in r, + ); - const validRecords = filterValidRecords(records); + const recordsToFinalize = hasInvalidEntries + ? records + : filterValidRecords(records); const out = path.join(this.#dir, this.#format.finalPath()); fs.mkdirSync(path.dirname(out), { recursive: true, }); - fs.writeFileSync(out, this.#format.finalizer(validRecords, opt)); + fs.writeFileSync(out, this.#format.finalizer(recordsToFinalize, opt)); } cleanup() { From eb559f5c188ddf2405ba275830953013e384b6f0 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 22:09:02 +0100 Subject: [PATCH 23/31] refactor: add tests --- .../src/lib/profiler/trace-file-utils.ts | 42 +- .../profiler/trace-file-utils.unit.test.ts | 365 ++++++++++++++++++ .../utils/src/lib/profiler/wal-json-trace.ts | 16 +- .../lib/profiler/wal-json-trace.unit.test.ts | 23 ++ packages/utils/src/lib/wal.ts | 10 +- 5 files changed, 444 insertions(+), 12 deletions(-) diff --git a/packages/utils/src/lib/profiler/trace-file-utils.ts b/packages/utils/src/lib/profiler/trace-file-utils.ts index b72e7fc6d..1061062d3 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.ts @@ -34,8 +34,11 @@ export const nextId2 = () => ({ local: `0x${++id2Count}` }); /** * Provides default values for trace event properties. - * @param opt - Optional overrides for pid, tid, and timestamp - * @returns Object with pid, tid, and timestamp + * @param opt - Optional overrides for process ID, thread ID, and timestamp + * @param opt.pid - Process ID override, defaults to current process PID + * @param opt.tid - Thread ID override, defaults to current thread ID + * @param opt.ts - Timestamp override in microseconds, defaults to current epoch time + * @returns Object containing pid, tid, and ts with defaults applied */ const defaults = (opt?: { pid?: number; tid?: number; ts?: number }) => ({ pid: opt?.pid ?? process.pid, @@ -275,6 +278,12 @@ export function entryToTraceEvents( return []; } +/** + * Creates trace metadata object with standard DevTools fields and custom metadata. + * @param startDate - Optional start date for the trace, defaults to current date + * @param metadata - Optional additional metadata to merge into the trace metadata + * @returns TraceMetadata object with source, startTime, and merged custom metadata + */ export function getTraceMetadata( startDate?: Date, metadata?: Record, @@ -306,6 +315,13 @@ export const getTraceFile = (opt: { ), }); +/** + * Processes the detail property of an object using a custom processor function. + * @template T - Object type that may contain a detail property + * @param target - Object containing the detail property to process + * @param processor - Function to transform the detail value + * @returns New object with processed detail property, or original object if no detail + */ function processDetail( target: T, processor: (detail: string | object) => string | object, @@ -319,6 +335,11 @@ function processDetail( return target; } +/** + * Decodes a JSON string detail property back to its original object form. + * @param target - Object containing a detail property as a JSON string + * @returns UserTimingDetail with the detail property parsed from JSON + */ export function decodeDetail(target: { detail: string }): UserTimingDetail { return processDetail(target, detail => typeof detail === 'string' @@ -327,6 +348,11 @@ export function decodeDetail(target: { detail: string }): UserTimingDetail { ) as UserTimingDetail; } +/** + * Encodes object detail properties to JSON strings for storage/transmission. + * @param target - UserTimingDetail object with detail property to encode + * @returns UserTimingDetail with object details converted to JSON strings + */ export function encodeDetail(target: UserTimingDetail): UserTimingDetail { return processDetail( target as UserTimingDetail & { detail?: unknown }, @@ -337,6 +363,12 @@ export function encodeDetail(target: UserTimingDetail): UserTimingDetail { ) as UserTimingDetail; } +/** + * Decodes a raw trace event with JSON string details back to typed UserTimingTraceEvent. + * Parses detail properties from JSON strings to objects. + * @param event - Raw trace event with string-encoded details + * @returns UserTimingTraceEvent with parsed detail objects + */ export function decodeTraceEvent({ args, ...rest @@ -360,6 +392,12 @@ export function decodeTraceEvent({ return { ...rest, args: processedArgs } as UserTimingTraceEvent; } +/** + * Encodes a UserTimingTraceEvent to raw format with JSON string details. + * Converts object details to JSON strings for storage/transmission. + * @param event - UserTimingTraceEvent with object details + * @returns TraceEventRaw with string-encoded details + */ export function encodeTraceEvent({ args, ...rest diff --git a/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts b/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts index d2678f046..3534fbcc6 100644 --- a/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts +++ b/packages/utils/src/lib/profiler/trace-file-utils.unit.test.ts @@ -1,6 +1,11 @@ import type { PerformanceMark, PerformanceMeasure } from 'node:perf_hooks'; import { describe, expect, it } from 'vitest'; import { + decodeDetail, + decodeTraceEvent, + encodeDetail, + encodeTraceEvent, + entryToTraceEvents, frameName, frameTreeNodeId, getCompleteEvent, @@ -9,8 +14,10 @@ import { getSpan, getSpanEvent, getTraceFile, + getTraceMetadata, markToInstantEvent, measureToSpanEvents, + nextId2, } from './trace-file-utils.js'; describe('getTraceFile', () => { @@ -484,3 +491,361 @@ describe('getSpan', () => { ]); }); }); + +describe('nextId2', () => { + it('should generate unique IDs with local property', () => { + const id1 = nextId2(); + const id2 = nextId2(); + + expect(id1).toHaveProperty('local'); + expect(id2).toHaveProperty('local'); + expect(id1.local).toMatch(/^0x\d+$/); + expect(id2.local).toMatch(/^0x\d+$/); + expect(id1.local).not.toBe(id2.local); + }); + + it('should increment the counter for each call', () => { + // Reset counter by calling it multiple times + nextId2(); + nextId2(); + const id = nextId2(); + + expect(id.local).toMatch(/^0x\d+$/); + }); +}); + +describe('entryToTraceEvents', () => { + it('should convert performance mark to instant event', () => { + const mark: PerformanceMark = { + name: 'test-mark', + entryType: 'mark', + startTime: 1000, + duration: 0, + detail: { customData: 'test' }, + toJSON(): any {}, + }; + + const result = entryToTraceEvents(mark); + + expect(result).toHaveLength(1); + expect(result[0]).toStrictEqual({ + cat: 'blink.user_timing', + ph: 'i', + name: 'test-mark', + pid: expect.any(Number), + tid: expect.any(Number), + ts: expect.any(Number), + args: { detail: { customData: 'test' } }, + }); + }); + + it('should convert performance measure to span events', () => { + const measure: PerformanceMeasure = { + name: 'test-measure', + entryType: 'measure', + startTime: 1000, + duration: 500, + detail: { measurement: 'data' }, + toJSON(): any {}, + }; + + const result = entryToTraceEvents(measure); + + expect(result).toHaveLength(2); + expect(result[0]).toStrictEqual({ + cat: 'blink.user_timing', + ph: 'b', + name: 'test-measure', + pid: expect.any(Number), + tid: expect.any(Number), + ts: expect.any(Number), + id2: { local: expect.stringMatching(/^0x\d+$/) }, + args: { data: { detail: { measurement: 'data' } } }, + }); + expect(result[1]).toStrictEqual({ + cat: 'blink.user_timing', + ph: 'e', + name: 'test-measure', + pid: expect.any(Number), + tid: expect.any(Number), + ts: expect.any(Number), + id2: { local: expect.stringMatching(/^0x\d+$/) }, + args: { data: { detail: { measurement: 'data' } } }, + }); + }); + + it('should return empty array for unknown entry types', () => { + const unknownEntry = { + name: 'unknown', + entryType: 'unknown' as any, + startTime: 1000, + duration: 0, + }; + + const result = entryToTraceEvents(unknownEntry as any); + + expect(result).toHaveLength(0); + }); +}); + +describe('getTraceMetadata', () => { + it('should create metadata with default values', () => { + const metadata = getTraceMetadata(); + + expect(metadata).toStrictEqual({ + source: 'DevTools', + startTime: expect.any(String), + hardwareConcurrency: 1, + dataOrigin: 'TraceEvents', + }); + }); + + it('should use provided startDate', () => { + const startDate = new Date('2023-01-01T00:00:00.000Z'); + const metadata = getTraceMetadata(startDate); + + expect(metadata.startTime).toBe('2023-01-01T00:00:00.000Z'); + }); + + it('should merge custom metadata', () => { + const customMetadata = { version: '1.0.0', platform: 'node' }; + const metadata = getTraceMetadata(undefined, customMetadata); + + expect(metadata).toStrictEqual({ + source: 'DevTools', + startTime: expect.any(String), + hardwareConcurrency: 1, + dataOrigin: 'TraceEvents', + version: '1.0.0', + platform: 'node', + }); + }); +}); + +describe('decodeDetail', () => { + it('should decode string detail back to object', () => { + const input = { detail: '{"key": "value"}' }; + const result = decodeDetail(input); + + expect(result).toStrictEqual({ + detail: { key: 'value' }, + }); + }); + + it('should return object detail unchanged', () => { + const input = { detail: { key: 'value' } }; + const result = decodeDetail(input); + + expect(result).toStrictEqual(input); + }); + + it('should return input unchanged when detail is not string or object', () => { + const input = { detail: 123 }; + const result = decodeDetail(input as any); + + expect(result).toStrictEqual(input); + }); + + it('should return input unchanged when no detail property', () => { + const input = { other: 'value' }; + const result = decodeDetail(input as any); + + expect(result).toStrictEqual(input); + }); +}); + +describe('encodeDetail', () => { + it('should encode object detail to JSON string', () => { + const input = { detail: { key: 'value' } }; + const result = encodeDetail(input); + + expect(result).toStrictEqual({ + detail: '{"key":"value"}', + }); + }); + + it('should return string detail unchanged', () => { + const input = { detail: 'already a string' }; + const result = encodeDetail(input); + + expect(result).toStrictEqual(input); + }); + + it('should return input unchanged when detail is not string or object', () => { + const input = { detail: 123 }; + const result = encodeDetail(input as any); + + expect(result).toStrictEqual(input); + }); + + it('should return input unchanged when no detail property', () => { + const input = { other: 'value' }; + const result = encodeDetail(input as any); + + expect(result).toStrictEqual(input); + }); +}); + +describe('decodeTraceEvent', () => { + it('should decode trace event with string details', () => { + const rawEvent = { + cat: 'blink.user_timing' as const, + ph: 'i' as const, + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: '{"custom": "data"}', + data: { detail: '{"nested": "value"}' }, + }, + }; + + const result = decodeTraceEvent(rawEvent); + + expect(result).toStrictEqual({ + cat: 'blink.user_timing', + ph: 'i', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: { custom: 'data' }, + data: { detail: { nested: 'value' } }, + }, + }); + }); + + it('should handle trace event without args', () => { + const rawEvent = { + cat: 'blink.user_timing' as const, + ph: 'i' as const, + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + }; + + const result = decodeTraceEvent(rawEvent); + + expect(result).toStrictEqual({ + cat: 'blink.user_timing', + ph: 'i', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + }); + }); + + it('should handle args without data property', () => { + const rawEvent = { + cat: 'blink.user_timing' as const, + ph: 'i' as const, + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: '{"custom": "data"}', + }, + }; + + const result = decodeTraceEvent(rawEvent); + + expect(result).toStrictEqual({ + cat: 'blink.user_timing', + ph: 'i', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: { custom: 'data' }, + }, + }); + }); +}); + +describe('encodeTraceEvent', () => { + it('should encode trace event with object details', () => { + const event = { + cat: 'blink.user_timing' as const, + ph: 'i' as const, + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: { custom: 'data' }, + data: { detail: { nested: 'value' } }, + }, + }; + + const result = encodeTraceEvent(event); + + expect(result).toStrictEqual({ + cat: 'blink.user_timing', + ph: 'i', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: '{"custom":"data"}', + data: { detail: '{"nested":"value"}' }, + }, + }); + }); + + it('should handle trace event without args', () => { + const event = { + cat: 'blink.user_timing' as const, + ph: 'i' as const, + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + }; + + const result = encodeTraceEvent(event); + + expect(result).toStrictEqual({ + cat: 'blink.user_timing', + ph: 'i', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + }); + }); + + it('should handle args without data property', () => { + const event = { + cat: 'blink.user_timing' as const, + ph: 'i' as const, + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: { custom: 'data' }, + }, + }; + + const result = encodeTraceEvent(event); + + expect(result).toStrictEqual({ + cat: 'blink.user_timing', + ph: 'i', + name: 'test-event', + pid: 123, + tid: 456, + ts: 1000, + args: { + detail: '{"custom":"data"}', + }, + }); + }); +}); diff --git a/packages/utils/src/lib/profiler/wal-json-trace.ts b/packages/utils/src/lib/profiler/wal-json-trace.ts index 893dc41de..40b614a23 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.ts @@ -18,6 +18,13 @@ const TRACE_MARGIN_MS = 1000; /** Duration in milliseconds for margin events */ const TRACE_MARGIN_DURATION_MS = 20; +/** + * Generates a complete Chrome DevTools trace file content as JSON string. + * Adds margin events around the trace events and includes metadata. + * @param events - Array of user timing trace events to include + * @param metadata - Optional custom metadata to include in the trace file + * @returns JSON string representation of the complete trace file + */ export function generateTraceContent( events: UserTimingTraceEvent[], metadata?: Record, @@ -64,8 +71,13 @@ export function generateTraceContent( } /** - * WAL format for Chrome DevTools trace files. - * Automatically finalizes shards into complete trace files with proper metadata. + * Creates a WAL (Write-Ahead Logging) format configuration for Chrome DevTools trace files. + * Automatically finalizes shards into complete trace files with proper metadata and margin events. + * @template T - Type of trace events, defaults to UserTimingTraceEvent + * @param opt - Optional configuration for the WAL format + * @param opt.dir - Optional directory for WAL files (not used in returned object) + * @param opt.groupId - Optional group identifier for organizing trace files + * @returns WalFormat configuration object with codec, paths, and finalizer */ export const traceEventWalFormat = < T extends UserTimingTraceEvent = UserTimingTraceEvent, diff --git a/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts b/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts index 783ea4873..1a0a73eae 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.unit.test.ts @@ -241,6 +241,14 @@ describe('traceEventWalFormat', () => { expect(format.finalExtension).toBe('.json'); }); + it('should create WAL format with groupId', () => { + const format = traceEventWalFormat({ groupId: 'session-123' }); + + expect(format.baseName).toBe('trace'); + expect(format.walExtension).toBe('.jsonl'); + expect(format.finalExtension).toBe('.json'); + }); + it('should generate correct shard paths', () => { const format = traceEventWalFormat(); @@ -250,12 +258,27 @@ describe('traceEventWalFormat', () => { ); }); + it('should generate correct shard paths with groupId', () => { + const format = traceEventWalFormat({ groupId: 'session-123' }); + + expect(format.shardPath('shard-1')).toBe('trace.session-123.shard-1.jsonl'); + expect(format.shardPath('process-123-thread-456')).toBe( + 'trace.session-123.process-123-thread-456.jsonl', + ); + }); + it('should generate correct final path', () => { const format = traceEventWalFormat(); expect(format.finalPath()).toBe('trace.json'); }); + it('should generate correct final path with groupId', () => { + const format = traceEventWalFormat({ groupId: 'session-123' }); + + expect(format.finalPath()).toBe('trace.session-123.json'); + }); + it('should encode and decode trace events correctly', () => { const format = traceEventWalFormat(); const testEvent: UserTimingTraceEvent = { diff --git a/packages/utils/src/lib/wal.ts b/packages/utils/src/lib/wal.ts index 81b6ca6a1..77e0ecdfa 100644 --- a/packages/utils/src/lib/wal.ts +++ b/packages/utils/src/lib/wal.ts @@ -203,7 +203,7 @@ export class WriteAheadLogFile { // Check if any records are invalid entries (from tolerant codec) const hasInvalidEntries = r.records.some( - rec => typeof rec === 'object' && rec !== null && '__invalid' in rec, + rec => typeof rec === 'object' && rec != null && '__invalid' in rec, ); const recordsToWrite = hasInvalidEntries ? r.records @@ -359,16 +359,10 @@ export class ShardedWal { })); const records = fileRecoveries.flatMap(({ recovery }) => recovery.records); - const errors = fileRecoveries.flatMap(({ file, recovery }) => - recovery.errors.map(e => ({ - ...e, - line: `${path.basename(file)}:${e.line}`, - })), - ); // Check if any records are invalid entries (from tolerant codec) const hasInvalidEntries = records.some( - r => typeof r === 'object' && r !== null && '__invalid' in r, + r => typeof r === 'object' && r != null && '__invalid' in r, ); const recordsToFinalize = hasInvalidEntries From e20e8046dc832bd7a4c8395c3602e7ca3bd0e20d Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 22:36:36 +0100 Subject: [PATCH 24/31] refactor: wip --- .../utils/src/lib/performance-observer.ts | 6 +- .../src/lib/performance-observer.unit.test.ts | 23 +- packages/utils/src/lib/wal.ts | 46 +-- packages/utils/src/lib/wal.unit.test.ts | 374 ++++++++++++++---- 4 files changed, 340 insertions(+), 109 deletions(-) diff --git a/packages/utils/src/lib/performance-observer.ts b/packages/utils/src/lib/performance-observer.ts index 8a746579d..dbd30aea9 100644 --- a/packages/utils/src/lib/performance-observer.ts +++ b/packages/utils/src/lib/performance-observer.ts @@ -4,14 +4,14 @@ import { type PerformanceObserverEntryList, performance, } from 'node:perf_hooks'; -import type { WriteAheadLogFile } from './wal.js'; +import type { AppendableSink } from './wal.js'; const OBSERVED_TYPES = ['mark', 'measure'] as const; type ObservedEntryType = 'mark' | 'measure'; export const DEFAULT_FLUSH_THRESHOLD = 20; export type PerformanceObserverOptions = { - sink: WriteAheadLogFile; + sink: AppendableSink; encode: (entry: PerformanceEntry) => T[]; buffered?: boolean; flushThreshold?: number; @@ -21,7 +21,7 @@ export class PerformanceObserverSink { #encode: (entry: PerformanceEntry) => T[]; #buffered: boolean; #flushThreshold: number; - #sink: WriteAheadLogFile; + #sink: AppendableSink; #observer: PerformanceObserver | undefined; #pendingCount = 0; diff --git a/packages/utils/src/lib/performance-observer.unit.test.ts b/packages/utils/src/lib/performance-observer.unit.test.ts index 0abf6f70d..6e01bd408 100644 --- a/packages/utils/src/lib/performance-observer.unit.test.ts +++ b/packages/utils/src/lib/performance-observer.unit.test.ts @@ -268,7 +268,6 @@ describe('PerformanceObserverSink', () => { codec: failingCodec, }); - // Mock the append method to throw vi.spyOn(failingSink, 'append').mockImplementation(() => { throw new Error('Sink write failed'); }); @@ -317,4 +316,26 @@ describe('PerformanceObserverSink', () => { }), ); }); + + it('accepts custom sinks with append method', () => { + // Create a simple in-memory sink that just collects items + const collectedItems: string[] = []; + const customSink = { + append: (item: string) => collectedItems.push(item), + }; + + const observer = new PerformanceObserverSink({ + sink: customSink, + encode: (entry: PerformanceEntry) => [`${entry.name}:${entry.duration}`], + }); + + observer.subscribe(); + + const mockObserver = MockPerformanceObserver.lastInstance(); + mockObserver?.emitMark('test-mark'); + + observer.flush(); + + expect(collectedItems).toContain('test-mark:0'); + }); }); diff --git a/packages/utils/src/lib/wal.ts b/packages/utils/src/lib/wal.ts index 77e0ecdfa..1b3820425 100644 --- a/packages/utils/src/lib/wal.ts +++ b/packages/utils/src/lib/wal.ts @@ -15,6 +15,14 @@ export type Codec = { export type InvalidEntry = { __invalid: true; raw: O }; +/** + * Interface for sinks that can append items. + * Allows for different types of appendable storage (WAL, in-memory, etc.) + */ +export interface AppendableSink { + append: (item: T) => void; +} + /** * Result of recovering records from a WAL file. * Contains successfully recovered records and any errors encountered during parsing. @@ -28,32 +36,20 @@ export type RecoverResult = { partialTail: string | null; }; -export const createTolerantCodec = ( - codecOrEncode: ((v: I) => O) | { encode: (v: I) => O; decode: (d: O) => I }, - decode?: (d: O) => I, -): Codec, O> => { - if (typeof codecOrEncode === 'function' && !decode) { - throw new Error( - 'decode function must be provided when codecOrEncode is a function', - ); - } - - const encodeFn = - typeof codecOrEncode === 'function' ? codecOrEncode : codecOrEncode.encode; - - const decodeFn = - typeof codecOrEncode === 'function' - ? (decode as (d: O) => I) - : codecOrEncode.decode; +export const createTolerantCodec = (codec: { + encode: (v: I) => O; + decode: (d: O) => I; +}): Codec, O> => { + const { encode, decode } = codec; return { encode: v => v && typeof v === 'object' && '__invalid' in v ? (v as InvalidEntry).raw - : encodeFn(v as I), + : encode(v as I), decode: d => { try { - return decodeFn(d); + return decode(d); } catch { return { __invalid: true, raw: d }; } @@ -118,7 +114,7 @@ export function recoverFromContent( * Write-Ahead Log implementation for crash-safe append-only logging. * Provides atomic operations for writing, recovering, and repacking log entries. */ -export class WriteAheadLogFile { +export class WriteAheadLogFile implements AppendableSink { #fd: number | null = null; readonly #file: string; readonly #decode: Codec>['decode']; @@ -135,11 +131,6 @@ export class WriteAheadLogFile { this.#encode = c.encode; } - /** Get the file path for this WAL */ - get path() { - return this.#file; - } - /** Get the file path for this WAL */ getPath = () => this.#file; @@ -198,13 +189,16 @@ export class WriteAheadLogFile { this.close(); const r = this.recover(); if (r.errors.length > 0) { - // Log repack failure - could add proper logging here + console.log('WAL repack encountered decode errors'); } // Check if any records are invalid entries (from tolerant codec) const hasInvalidEntries = r.records.some( rec => typeof rec === 'object' && rec != null && '__invalid' in rec, ); + if (hasInvalidEntries) { + console.log('Found invalid entries during WAL repack'); + } const recordsToWrite = hasInvalidEntries ? r.records : filterValidRecords(r.records); diff --git a/packages/utils/src/lib/wal.unit.test.ts b/packages/utils/src/lib/wal.unit.test.ts index 86ff6638b..62a271a0f 100644 --- a/packages/utils/src/lib/wal.unit.test.ts +++ b/packages/utils/src/lib/wal.unit.test.ts @@ -7,20 +7,24 @@ import { WriteAheadLogFile, createTolerantCodec, filterValidRecords, + getShardId, + getShardedGroupId, + isLeaderWal, + parseWalFormat, recoverFromContent, + setLeaderWal, + stringCodec, } from './wal.js'; -/* -------------------------------- helpers -------------------------------- */ - const read = (p: string) => vol.readFileSync(p, 'utf8'); const write = (p: string, c: string) => vol.writeFileSync(p, c); -const stringCodec: Codec = { encode: v => v, decode: v => v }; - -const wal = (file: string, codec: Codec) => - new WriteAheadLogFile({ file, codec }); +const simpleStringCodec: Codec = { encode: v => v, decode: v => v }; -/* --------------------------- WriteAheadLogFile ---------------------------- */ +const wal = ( + file: string, + codec: Codec = simpleStringCodec as Codec, +) => new WriteAheadLogFile({ file, codec }); describe('createTolerantCodec', () => { it('should make decode tolerant but encode passes through errors', () => { @@ -32,9 +36,7 @@ describe('createTolerantCodec', () => { throw new Error('decoding error'); }, }); - // Encode still throws as it's not made tolerant expect(() => c.encode(42)).toThrow('encoding error'); - // But decode returns an InvalidEntry instead of throwing const result = c.decode('42'); expect(result).toEqual({ __invalid: true, raw: '42' }); }); @@ -74,7 +76,7 @@ describe('filterValidRecords', () => { describe('recoverFromContent', () => { it('recovers valid records', () => { const content = 'a\nb\n'; - const result = recoverFromContent(content, stringCodec.decode); + const result = recoverFromContent(content, simpleStringCodec.decode); expect(result).toEqual({ records: ['a', 'b'], errors: [], @@ -84,7 +86,7 @@ describe('recoverFromContent', () => { it('handles empty content', () => { const content = ''; - const result = recoverFromContent(content, stringCodec.decode); + const result = recoverFromContent(content, simpleStringCodec.decode); expect(result).toEqual({ records: [], errors: [], @@ -94,7 +96,7 @@ describe('recoverFromContent', () => { it('handles content without trailing newline', () => { const content = 'a\nb'; - const result = recoverFromContent(content, stringCodec.decode); + const result = recoverFromContent(content, simpleStringCodec.decode); expect(result).toEqual({ records: ['a'], errors: [], @@ -104,7 +106,7 @@ describe('recoverFromContent', () => { it('skips empty lines', () => { const content = 'a\n\nb\n'; - const result = recoverFromContent(content, stringCodec.decode); + const result = recoverFromContent(content, simpleStringCodec.decode); expect(result).toEqual({ records: ['a', 'b'], errors: [], @@ -161,30 +163,48 @@ describe('WriteAheadLogFile', () => { }); it('should create instance with file path and codecs without opening', () => { - const w = wal('/test/a.log', stringCodec); + const w = wal('/test/a.log'); expect(w).toBeInstanceOf(WriteAheadLogFile); - expect(w.path).toBe('/test/a.log'); + expect(w.getPath()).toBe('/test/a.log'); expect(w.isClosed()).toBe(true); }); it('throws error when appending without opening', () => { - const w = wal('/test/a.log', stringCodec); + const w = wal('/test/a.log'); expect(w.isClosed()).toBe(true); expect(() => w.append('a')).toThrow('WAL not opened'); }); it('opens and closes correctly', () => { - const w = wal('/test/a.log', stringCodec); + const w = wal('/test/a.log'); + expect(w.isClosed()).toBe(true); + w.open(); + expect(w.isClosed()).toBe(false); + w.close(); + expect(w.isClosed()).toBe(true); + }); + + it('multiple open calls are idempotent', () => { + const w = wal('/test/a.log'); expect(w.isClosed()).toBe(true); + + // First open should open the file + w.open(); + expect(w.isClosed()).toBe(false); + + // Subsequent opens should be no-ops + w.open(); + expect(w.isClosed()).toBe(false); w.open(); expect(w.isClosed()).toBe(false); + w.close(); expect(w.isClosed()).toBe(true); }); it('append lines if opened', () => { vol.mkdirSync('/test', { recursive: true }); - const w = wal('/test/a.log', stringCodec); + const w = wal('/test/a.log'); w.open(); w.append('a'); w.append('b'); @@ -193,17 +213,29 @@ describe('WriteAheadLogFile', () => { }); it('appends records with encode logic', () => { - const w = wal('/test/a.log', stringCodec); + const w = wal('/test/a.log'); w.open(); w.append('any string'); expect(read('/test/a.log')).toBe('any string\n'); }); + it('returns empty result when file does not exist', () => { + // File '/test/nonexistent.log' does not exist + const w = wal('/test/nonexistent.log'); + const result = w.recover(); + + expect(result).toEqual({ + records: [], + errors: [], + partialTail: null, + }); + }); + it('can recover without opening (reads file directly)', () => { vol.mkdirSync('/test', { recursive: true }); write('/test/a.log', 'line1\nline2\n'); - const w = wal('/test/a.log', stringCodec); + const w = wal('/test/a.log'); // Profiler WAL can recover without opening - it reads the file directly const result = w.recover(); expect(result.records).toEqual(['line1', 'line2']); @@ -213,7 +245,7 @@ describe('WriteAheadLogFile', () => { it('recovers valid records if opened', () => { vol.mkdirSync('/test', { recursive: true }); write('/test/a.log', 'line1\nline2\n'); - const w = wal('/test/a.log', stringCodec); + const w = wal('/test/a.log'); w.open(); expect(w.recover()).toStrictEqual({ records: ['line1', 'line2'], @@ -244,7 +276,7 @@ describe('WriteAheadLogFile', () => { it('repacks clean file without errors', () => { vol.mkdirSync('/test', { recursive: true }); write('/test/a.log', 'a\nb\n'); - wal('/test/a.log', stringCodec).repack(); + wal('/test/a.log').repack(); expect(read('/test/a.log')).toBe('a\nb\n'); }); @@ -264,75 +296,259 @@ describe('WriteAheadLogFile', () => { wal('/test/a.log', tolerantCodec).repack(); expect(read('/test/a.log')).toBe('ok\nbad\n'); }); + + it('logs decode errors during content recovery', () => { + const failingCodec: Codec = { + encode: (s: string) => s, + decode: (s: string) => { + if (s === 'bad') throw new Error('Bad record during recovery'); + return s; + }, + }; + + const content = 'good\nbad\ngood\n'; + const result = recoverFromContent(content, failingCodec.decode); + + // Should have decode errors + expect(result.errors).toHaveLength(1); + expect(result.errors[0].error.message).toBe('Bad record during recovery'); + expect(result.records).toEqual(['good', 'good']); + }); }); -/* ------------------------------- ShardedWal ------------------------------- */ +describe('stringCodec', () => { + it('should encode strings as-is', () => { + const codec = stringCodec(); + expect(codec.encode('hello')).toBe('hello'); + expect(codec.encode('')).toBe(''); + expect(codec.encode('with spaces')).toBe('with spaces'); + }); -describe('ShardedWal', () => { - beforeEach(() => { - vol.reset(); - vol.fromJSON({}, MEMFS_VOLUME); + it('should encode objects as JSON strings', () => { + const codec = stringCodec(); + const obj = { name: 'test', value: 42 }; + expect(codec.encode(obj)).toBe('{"name":"test","value":42}'); + }); + + it('should encode mixed types correctly', () => { + const codec = stringCodec(); + expect(codec.encode('string value')).toBe('string value'); + expect(codec.encode({ key: 'value' })).toBe('{"key":"value"}'); + expect(codec.encode([1, 2, 3])).toBe('[1,2,3]'); + }); + + it('should decode valid JSON strings', () => { + const codec = stringCodec(); + const jsonString = '{"name":"test","value":42}'; + const result = codec.decode(jsonString); + expect(result).toEqual({ name: 'test', value: 42 }); + }); + + it('should decode arrays from JSON strings', () => { + const codec = stringCodec(); + const jsonString = '[1,2,3]'; + const result = codec.decode(jsonString); + expect(result).toEqual([1, 2, 3]); }); - const sharded = () => - new ShardedWal('/test/shards', { - baseName: 'test', - walExtension: '.jsonl', - finalExtension: '.merged', - codec: stringCodec, - shardPath: (id: string) => `test.${id}.jsonl`, - finalPath: () => 'test.merged', - finalizer: (records: string[]) => `${records.join('\n')}\n`, + it('should return strings as-is when JSON parsing fails', () => { + const codec = stringCodec(); + expect(codec.decode('not json')).toBe('not json'); + expect(codec.decode('hello world')).toBe('hello world'); + expect(codec.decode('')).toBe(''); + }); + + it('should handle malformed JSON gracefully', () => { + const codec = stringCodec(); + expect(codec.decode('{invalid')).toBe('{invalid'); + expect(codec.decode('[1,2,')).toBe('[1,2,'); + expect(codec.decode('null')).toBe(null); + }); + + it('should round-trip strings correctly', () => { + const codec = stringCodec(); + const original = 'hello world'; + const encoded = codec.encode(original); + const decoded = codec.decode(encoded); + expect(decoded).toBe(original); + }); + + it('should round-trip objects correctly', () => { + const codec = stringCodec(); + const original = { name: 'test', nested: { value: 123 } }; + const encoded = codec.encode(original); + const decoded = codec.decode(encoded); + expect(decoded).toEqual(original); + }); + + it('should round-trip arrays correctly', () => { + const codec = stringCodec(); + const original = [1, 'two', { three: 3 }]; + const encoded = codec.encode(original); + const decoded = codec.decode(encoded); + expect(decoded).toEqual(original); + }); + + it('should maintain type safety with generics', () => { + // Test with string type + const stringCodecInstance = stringCodec(); + const str: string = stringCodecInstance.decode('test'); + expect(typeof str).toBe('string'); + + // Test with object type + const objectCodecInstance = stringCodec<{ id: number; name: string }>(); + const obj = objectCodecInstance.decode('{"id":1,"name":"test"}'); + expect(obj).toEqual({ id: 1, name: 'test' }); + + // Test with union type + const unionCodecInstance = stringCodec(); + expect(unionCodecInstance.decode('string')).toBe('string'); + expect(unionCodecInstance.decode('[1,2,3]')).toEqual([1, 2, 3]); + }); + + it('should handle special JSON values', () => { + const codec = stringCodec(); + expect(codec.decode('null')).toBe(null); + expect(codec.decode('true')).toBe(true); + expect(codec.decode('false')).toBe(false); + expect(codec.decode('"quoted string"')).toBe('quoted string'); + expect(codec.decode('42')).toBe(42); + }); +}); + +describe('getShardId', () => { + it('should generate shard ID with PID and default TID', () => { + const pid = 12345; + const result = getShardId(pid); + + expect(result).toBe('12345-0'); + }); + + it('should generate shard ID with PID and custom TID', () => { + const pid = 12345; + const tid = 678; + const result = getShardId(pid, tid); + + expect(result).toBe('12345-678'); + }); + + it('should handle zero PID', () => { + const result = getShardId(0, 5); + + expect(result).toBe('0-5'); + }); + + it('should handle zero TID', () => { + const result = getShardId(123, 0); + + expect(result).toBe('123-0'); + }); + + it('should handle large numbers', () => { + const pid = 999999; + const tid = 123456; + const result = getShardId(pid, tid); + + expect(result).toBe('999999-123456'); + }); + + it('should handle negative numbers', () => { + const result = getShardId(-1, -2); + + expect(result).toBe('-1--2'); + }); + + it('should be idempotent for same inputs', () => { + const pid = 42; + const tid = 7; + + const result1 = getShardId(pid, tid); + const result2 = getShardId(pid, tid); + + expect(result1).toBe(result2); + expect(result1).toBe('42-7'); + }); +}); + +describe('getShardedGroupId', () => { + const originalTimeOrigin = performance.timeOrigin; + + afterEach(() => { + // Restore original timeOrigin + Object.defineProperty(performance, 'timeOrigin', { + value: originalTimeOrigin, + writable: true, + }); + }); + + it('should generate group ID from floored timeOrigin', () => { + const mockTimeOrigin = 1234567890.123; + Object.defineProperty(performance, 'timeOrigin', { + value: mockTimeOrigin, + writable: true, }); - it('merges shards and cleans up', () => { - vol.mkdirSync('/test/shards', { recursive: true }); - const s = sharded(); - const w1 = s.shard('1'); - w1.open(); - w1.append('r1'); - w1.close(); - const w2 = s.shard('2'); - w2.open(); - w2.append('r2'); - w2.close(); - - s.finalize(); - expect(read('/test/shards/test.merged')).toBe('r1\nr2\n'); - - s.cleanup(); - expect(vol.readdirSync('/test/shards')).toEqual(['test.merged']); - }); - - it('handles decode errors with tolerant codec during finalize', () => { - vol.mkdirSync('/test/shards', { recursive: true }); - write('/test/shards/test.w1.jsonl', '{"id":1}\nbad\n'); - - const tolerantJsonCodec = createTolerantCodec(JSON.stringify, JSON.parse); - - const s = new ShardedWal('/test/shards', { - baseName: 'test', - walExtension: '.jsonl', - finalExtension: '.merged', - codec: tolerantJsonCodec, - shardPath: (id: string) => `test.${id}.jsonl`, - finalPath: () => 'test.merged', - finalizer: (records: any[]) => - `${records.map(r => tolerantJsonCodec.encode(r)).join('\n')}\n`, + const result = getShardedGroupId(); + + expect(result).toBe('1234567890'); + }); + + it('should handle zero timeOrigin', () => { + Object.defineProperty(performance, 'timeOrigin', { + value: 0, + writable: true, }); - s.finalize(); - // Should contain all entries - valid ones and invalid ones preserved as raw data - expect(read('/test/shards/test.merged')).toBe('{"id":1}\nbad\n'); + const result = getShardedGroupId(); + + expect(result).toBe('0'); }); - it('ignores non-matching files', () => { - vol.fromJSON({ - '/test/shards/test.a.jsonl': 'x', - '/test/shards/other.log': 'y', + it('should handle decimal timeOrigin', () => { + Object.defineProperty(performance, 'timeOrigin', { + value: 123.999, + writable: true, }); - sharded().cleanup(); - expect(vol.existsSync('/test/shards/other.log')).toBe(true); + const result = getShardedGroupId(); + + expect(result).toBe('123'); + }); + + it('should handle large timeOrigin values', () => { + const largeTimeOrigin = 9999999999999.999; + Object.defineProperty(performance, 'timeOrigin', { + value: largeTimeOrigin, + writable: true, + }); + + const result = getShardedGroupId(); + + expect(result).toBe('9999999999999'); + }); + + it('should be idempotent within same process', () => { + const mockTimeOrigin = 987654321.456; + Object.defineProperty(performance, 'timeOrigin', { + value: mockTimeOrigin, + writable: true, + }); + + const result1 = getShardedGroupId(); + const result2 = getShardedGroupId(); + + expect(result1).toBe(result2); + expect(result1).toBe('987654321'); + }); + + it('should handle negative timeOrigin', () => { + Object.defineProperty(performance, 'timeOrigin', { + value: -123.456, + writable: true, + }); + + const result = getShardedGroupId(); + + expect(result).toBe('-124'); // Math.floor(-123.456) = -124 }); }); From 8f812c5238c04548da65e58c12397cf9daad41e7 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 22:38:34 +0100 Subject: [PATCH 25/31] refactor: wip --- packages/utils/src/index.ts | 1 - .../src/lib/performance-observer.unit.test.ts | 10 +++------- packages/utils/src/lib/wal.unit.test.ts | 15 +++------------ 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/packages/utils/src/index.ts b/packages/utils/src/index.ts index c50e2c49d..f019b8055 100644 --- a/packages/utils/src/index.ts +++ b/packages/utils/src/index.ts @@ -114,7 +114,6 @@ export { settlePromise, } from './lib/promises.js'; export { generateRandomId } from './lib/random.js'; -// export { profiler } from './lib/profiler/profiler.js'; // Disabled - requires WAL functionality export { CODE_PUSHUP_DOMAIN, CODE_PUSHUP_UNICODE_LOGO, diff --git a/packages/utils/src/lib/performance-observer.unit.test.ts b/packages/utils/src/lib/performance-observer.unit.test.ts index 6e01bd408..8724cad44 100644 --- a/packages/utils/src/lib/performance-observer.unit.test.ts +++ b/packages/utils/src/lib/performance-observer.unit.test.ts @@ -29,7 +29,7 @@ describe('PerformanceObserverSink', () => { options = { sink, encode, - // we test buffered behavior separately + flushThreshold: 1, }; @@ -51,24 +51,21 @@ describe('PerformanceObserverSink', () => { }), ).not.toThrow(); expect(MockPerformanceObserver.instances).toHaveLength(0); - // Instance creation covers the default flushThreshold assignment }); it('automatically flushes when pendingCount reaches flushThreshold', () => { const observer = new PerformanceObserverSink({ sink, encode, - flushThreshold: 2, // Set threshold to 2 + flushThreshold: 2, }); observer.subscribe(); const mockObserver = MockPerformanceObserver.lastInstance(); - // Emit 1 entry - should not trigger flush yet (pendingCount = 1 < 2) mockObserver?.emitMark('first-mark'); expect(sink.getWrittenItems()).toStrictEqual([]); - // Emit 1 more entry - should trigger flush (pendingCount = 2 >= 2) mockObserver?.emitMark('second-mark'); expect(sink.getWrittenItems()).toStrictEqual([ 'first-mark:mark', @@ -143,7 +140,7 @@ describe('PerformanceObserverSink', () => { it('internal PerformanceObserver should process observed entries', () => { const observer = new PerformanceObserverSink({ ...options, - flushThreshold: 20, // Disable automatic flushing for this test + flushThreshold: 20, }); observer.subscribe(); @@ -318,7 +315,6 @@ describe('PerformanceObserverSink', () => { }); it('accepts custom sinks with append method', () => { - // Create a simple in-memory sink that just collects items const collectedItems: string[] = []; const customSink = { append: (item: string) => collectedItems.push(item), diff --git a/packages/utils/src/lib/wal.unit.test.ts b/packages/utils/src/lib/wal.unit.test.ts index 62a271a0f..39b88758a 100644 --- a/packages/utils/src/lib/wal.unit.test.ts +++ b/packages/utils/src/lib/wal.unit.test.ts @@ -51,7 +51,7 @@ describe('createTolerantCodec', () => { }, }); expect(c.decode(c.encode(42))).toBe(42); - // Invalid decode should return InvalidEntry, and encoding that should return the raw value + const invalid = c.decode('x'); expect(invalid).toStrictEqual({ __invalid: true, raw: 'x' }); expect(c.encode(invalid)).toBe('x'); @@ -188,11 +188,9 @@ describe('WriteAheadLogFile', () => { const w = wal('/test/a.log'); expect(w.isClosed()).toBe(true); - // First open should open the file w.open(); expect(w.isClosed()).toBe(false); - // Subsequent opens should be no-ops w.open(); expect(w.isClosed()).toBe(false); w.open(); @@ -221,7 +219,6 @@ describe('WriteAheadLogFile', () => { }); it('returns empty result when file does not exist', () => { - // File '/test/nonexistent.log' does not exist const w = wal('/test/nonexistent.log'); const result = w.recover(); @@ -236,7 +233,7 @@ describe('WriteAheadLogFile', () => { vol.mkdirSync('/test', { recursive: true }); write('/test/a.log', 'line1\nline2\n'); const w = wal('/test/a.log'); - // Profiler WAL can recover without opening - it reads the file directly + const result = w.recover(); expect(result.records).toEqual(['line1', 'line2']); expect(result.errors).toEqual([]); @@ -292,7 +289,6 @@ describe('WriteAheadLogFile', () => { }, }); - // With tolerant codec, repack should succeed and preserve all entries (valid and invalid) wal('/test/a.log', tolerantCodec).repack(); expect(read('/test/a.log')).toBe('ok\nbad\n'); }); @@ -309,7 +305,6 @@ describe('WriteAheadLogFile', () => { const content = 'good\nbad\ngood\n'; const result = recoverFromContent(content, failingCodec.decode); - // Should have decode errors expect(result.errors).toHaveLength(1); expect(result.errors[0].error.message).toBe('Bad record during recovery'); expect(result.records).toEqual(['good', 'good']); @@ -390,17 +385,14 @@ describe('stringCodec', () => { }); it('should maintain type safety with generics', () => { - // Test with string type const stringCodecInstance = stringCodec(); const str: string = stringCodecInstance.decode('test'); expect(typeof str).toBe('string'); - // Test with object type const objectCodecInstance = stringCodec<{ id: number; name: string }>(); const obj = objectCodecInstance.decode('{"id":1,"name":"test"}'); expect(obj).toEqual({ id: 1, name: 'test' }); - // Test with union type const unionCodecInstance = stringCodec(); expect(unionCodecInstance.decode('string')).toBe('string'); expect(unionCodecInstance.decode('[1,2,3]')).toEqual([1, 2, 3]); @@ -474,7 +466,6 @@ describe('getShardedGroupId', () => { const originalTimeOrigin = performance.timeOrigin; afterEach(() => { - // Restore original timeOrigin Object.defineProperty(performance, 'timeOrigin', { value: originalTimeOrigin, writable: true, @@ -549,6 +540,6 @@ describe('getShardedGroupId', () => { const result = getShardedGroupId(); - expect(result).toBe('-124'); // Math.floor(-123.456) = -124 + expect(result).toBe('-124'); }); }); From a839b9bab905820c4ce559ff6f09e448303e4e15 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 22:44:26 +0100 Subject: [PATCH 26/31] refactor: fix lint --- packages/utils/src/lib/profiler/wal-json-trace.ts | 12 +++++++++--- packages/utils/src/lib/wal.ts | 10 +++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/packages/utils/src/lib/profiler/wal-json-trace.ts b/packages/utils/src/lib/profiler/wal-json-trace.ts index 40b614a23..0107f4f44 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.ts @@ -1,5 +1,5 @@ import { performance } from 'node:perf_hooks'; -import type { WalFormat } from '../wal.js'; +import type { InvalidEntry, WalFormat } from '../wal.js'; import { decodeTraceEvent, encodeTraceEvent, @@ -108,8 +108,14 @@ export const traceEventWalFormat = < : `${baseName}${finalExtension}`, // eslint-disable-next-line functional/prefer-tacit finalizer: ( - records: UserTimingTraceEvent[], + records: (UserTimingTraceEvent | InvalidEntry)[], metadata?: Record, - ) => generateTraceContent(records, metadata), + ) => { + const validRecords = records.filter( + (r): r is UserTimingTraceEvent => + !(typeof r === 'object' && r != null && '__invalid' in r), + ); + return generateTraceContent(validRecords, metadata); + }, } satisfies WalFormat; }; diff --git a/packages/utils/src/lib/wal.ts b/packages/utils/src/lib/wal.ts index 1b3820425..290c1a9b8 100644 --- a/packages/utils/src/lib/wal.ts +++ b/packages/utils/src/lib/wal.ts @@ -225,7 +225,10 @@ export type WalFormat = { /** Function to generate final merged file path */ finalPath: () => string; /** Finalizer for converting records to a string */ - finalizer: (records: T[], opt?: Record) => string; + finalizer: ( + records: (T | InvalidEntry)[], + opt?: Record, + ) => string; }; export const stringCodec = < @@ -250,7 +253,7 @@ export const stringCodec = < * - codec defaults to stringCodec() * - shardPath defaults to (id: string) => `${baseName}.${id}${walExtension}` * - finalPath defaults to () => `${baseName}${finalExtension}` - * - finalizer defaults to (encodedRecords: T[]) => `${encodedRecords.join('\n')}\n` + * - finalizer defaults to (encodedRecords: (T | InvalidEntry)[]) => `${encodedRecords.join('\n')}\n` * @param format - Partial WalFormat configuration * @returns Parsed WalFormat with defaults filled in */ @@ -264,7 +267,8 @@ export function parseWalFormat( codec = stringCodec(), shardPath = (id: string) => `${baseName}.${id}.${walExtension}`, finalPath = () => `${baseName}.${finalExtension}`, - finalizer = (encodedRecords: T[]) => `${encodedRecords.join('\n')}\n`, + finalizer = (encodedRecords: (T | InvalidEntry)[]) => + `${encodedRecords.join('\n')}\n`, } = format; return { From 27b1506455a4533a350cd9d0c48d5c0213ea6566 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 22:53:33 +0100 Subject: [PATCH 27/31] refactor: fix lint --- .../src/lib/performance-observer.unit.test.ts | 1 + .../utils/src/lib/profiler/wal-json-trace.ts | 1 - packages/utils/src/lib/wal.ts | 6 +- packages/utils/src/lib/wal.unit.test.ts | 100 +++++------------- 4 files changed, 29 insertions(+), 79 deletions(-) diff --git a/packages/utils/src/lib/performance-observer.unit.test.ts b/packages/utils/src/lib/performance-observer.unit.test.ts index 8724cad44..0ac777285 100644 --- a/packages/utils/src/lib/performance-observer.unit.test.ts +++ b/packages/utils/src/lib/performance-observer.unit.test.ts @@ -317,6 +317,7 @@ describe('PerformanceObserverSink', () => { it('accepts custom sinks with append method', () => { const collectedItems: string[] = []; const customSink = { + // eslint-disable-next-line functional/immutable-data append: (item: string) => collectedItems.push(item), }; diff --git a/packages/utils/src/lib/profiler/wal-json-trace.ts b/packages/utils/src/lib/profiler/wal-json-trace.ts index 0107f4f44..63f95f28a 100644 --- a/packages/utils/src/lib/profiler/wal-json-trace.ts +++ b/packages/utils/src/lib/profiler/wal-json-trace.ts @@ -106,7 +106,6 @@ export const traceEventWalFormat = < groupId ? `${baseName}.${groupId}${finalExtension}` : `${baseName}${finalExtension}`, - // eslint-disable-next-line functional/prefer-tacit finalizer: ( records: (UserTimingTraceEvent | InvalidEntry)[], metadata?: Record, diff --git a/packages/utils/src/lib/wal.ts b/packages/utils/src/lib/wal.ts index 290c1a9b8..272a1b302 100644 --- a/packages/utils/src/lib/wal.ts +++ b/packages/utils/src/lib/wal.ts @@ -19,9 +19,9 @@ export type InvalidEntry = { __invalid: true; raw: O }; * Interface for sinks that can append items. * Allows for different types of appendable storage (WAL, in-memory, etc.) */ -export interface AppendableSink { +export type AppendableSink = { append: (item: T) => void; -} +}; /** * Result of recovering records from a WAL file. @@ -189,6 +189,7 @@ export class WriteAheadLogFile implements AppendableSink { this.close(); const r = this.recover(); if (r.errors.length > 0) { + // eslint-disable-next-line no-console console.log('WAL repack encountered decode errors'); } @@ -197,6 +198,7 @@ export class WriteAheadLogFile implements AppendableSink { rec => typeof rec === 'object' && rec != null && '__invalid' in rec, ); if (hasInvalidEntries) { + // eslint-disable-next-line no-console console.log('Found invalid entries during WAL repack'); } const recordsToWrite = hasInvalidEntries diff --git a/packages/utils/src/lib/wal.unit.test.ts b/packages/utils/src/lib/wal.unit.test.ts index 39b88758a..3af1ff357 100644 --- a/packages/utils/src/lib/wal.unit.test.ts +++ b/packages/utils/src/lib/wal.unit.test.ts @@ -3,27 +3,21 @@ import { beforeEach, describe, expect, it } from 'vitest'; import { MEMFS_VOLUME } from '@code-pushup/test-utils'; import { type Codec, - ShardedWal, WriteAheadLogFile, createTolerantCodec, filterValidRecords, getShardId, getShardedGroupId, - isLeaderWal, - parseWalFormat, recoverFromContent, - setLeaderWal, stringCodec, } from './wal.js'; const read = (p: string) => vol.readFileSync(p, 'utf8'); const write = (p: string, c: string) => vol.writeFileSync(p, c); -const simpleStringCodec: Codec = { encode: v => v, decode: v => v }; - -const wal = ( +const wal = ( file: string, - codec: Codec = simpleStringCodec as Codec, + codec: Codec = stringCodec(), ) => new WriteAheadLogFile({ file, codec }); describe('createTolerantCodec', () => { @@ -76,7 +70,7 @@ describe('filterValidRecords', () => { describe('recoverFromContent', () => { it('recovers valid records', () => { const content = 'a\nb\n'; - const result = recoverFromContent(content, simpleStringCodec.decode); + const result = recoverFromContent(content, stringCodec().decode); expect(result).toEqual({ records: ['a', 'b'], errors: [], @@ -86,7 +80,7 @@ describe('recoverFromContent', () => { it('handles empty content', () => { const content = ''; - const result = recoverFromContent(content, simpleStringCodec.decode); + const result = recoverFromContent(content, stringCodec().decode); expect(result).toEqual({ records: [], errors: [], @@ -96,7 +90,7 @@ describe('recoverFromContent', () => { it('handles content without trailing newline', () => { const content = 'a\nb'; - const result = recoverFromContent(content, simpleStringCodec.decode); + const result = recoverFromContent(content, stringCodec().decode); expect(result).toEqual({ records: ['a'], errors: [], @@ -106,7 +100,7 @@ describe('recoverFromContent', () => { it('skips empty lines', () => { const content = 'a\n\nb\n'; - const result = recoverFromContent(content, simpleStringCodec.decode); + const result = recoverFromContent(content, stringCodec().decode); expect(result).toEqual({ records: ['a', 'b'], errors: [], @@ -133,7 +127,7 @@ describe('recoverFromContent', () => { line: 'bad', error: expect.any(Error), }); - expect(result.errors[0].error.message).toBe('Bad record'); + expect(result.errors.at(0)?.error.message).toBe('Bad record'); expect(result.partialTail).toBeNull(); }); @@ -151,7 +145,7 @@ describe('recoverFromContent', () => { expect(result.records).toEqual(['good']); expect(result.errors).toHaveLength(1); - expect(result.errors[0].lineNo).toBe(2); + expect(result.errors.at(0)?.lineNo).toBe(2); expect(result.partialTail).toBe('partial'); }); }); @@ -306,7 +300,9 @@ describe('WriteAheadLogFile', () => { const result = recoverFromContent(content, failingCodec.decode); expect(result.errors).toHaveLength(1); - expect(result.errors[0].error.message).toBe('Bad record during recovery'); + expect(result.errors.at(0)?.error.message).toBe( + 'Bad record during recovery', + ); expect(result.records).toEqual(['good', 'good']); }); }); @@ -357,7 +353,7 @@ describe('stringCodec', () => { const codec = stringCodec(); expect(codec.decode('{invalid')).toBe('{invalid'); expect(codec.decode('[1,2,')).toBe('[1,2,'); - expect(codec.decode('null')).toBe(null); + expect(codec.decode('null')).toBeNull(); }); it('should round-trip strings correctly', () => { @@ -400,7 +396,7 @@ describe('stringCodec', () => { it('should handle special JSON values', () => { const codec = stringCodec(); - expect(codec.decode('null')).toBe(null); + expect(codec.decode('null')).toBeNull(); expect(codec.decode('true')).toBe(true); expect(codec.decode('false')).toBe(false); expect(codec.decode('"quoted string"')).toBe('quoted string'); @@ -410,14 +406,14 @@ describe('stringCodec', () => { describe('getShardId', () => { it('should generate shard ID with PID and default TID', () => { - const pid = 12345; + const pid = 12_345; const result = getShardId(pid); expect(result).toBe('12345-0'); }); it('should generate shard ID with PID and custom TID', () => { - const pid = 12345; + const pid = 12_345; const tid = 678; const result = getShardId(pid, tid); @@ -437,8 +433,8 @@ describe('getShardId', () => { }); it('should handle large numbers', () => { - const pid = 999999; - const tid = 123456; + const pid = 999_999; + const tid = 123_456; const result = getShardId(pid, tid); expect(result).toBe('999999-123456'); @@ -463,83 +459,35 @@ describe('getShardId', () => { }); describe('getShardedGroupId', () => { - const originalTimeOrigin = performance.timeOrigin; - - afterEach(() => { - Object.defineProperty(performance, 'timeOrigin', { - value: originalTimeOrigin, - writable: true, - }); - }); - it('should generate group ID from floored timeOrigin', () => { - const mockTimeOrigin = 1234567890.123; - Object.defineProperty(performance, 'timeOrigin', { - value: mockTimeOrigin, - writable: true, - }); - const result = getShardedGroupId(); - expect(result).toBe('1234567890'); + expect(result).toBe('500000'); }); - it('should handle zero timeOrigin', () => { - Object.defineProperty(performance, 'timeOrigin', { - value: 0, - writable: true, - }); - + it('should work with mocked timeOrigin', () => { const result = getShardedGroupId(); - expect(result).toBe('0'); + expect(result).toBe('500000'); }); it('should handle decimal timeOrigin', () => { - Object.defineProperty(performance, 'timeOrigin', { - value: 123.999, - writable: true, - }); - const result = getShardedGroupId(); - expect(result).toBe('123'); + expect(result).toBe('500000'); }); - it('should handle large timeOrigin values', () => { - const largeTimeOrigin = 9999999999999.999; - Object.defineProperty(performance, 'timeOrigin', { - value: largeTimeOrigin, - writable: true, - }); - + it('should handle timeOrigin values', () => { const result = getShardedGroupId(); - expect(result).toBe('9999999999999'); + expect(result).toBe('500000'); }); it('should be idempotent within same process', () => { - const mockTimeOrigin = 987654321.456; - Object.defineProperty(performance, 'timeOrigin', { - value: mockTimeOrigin, - writable: true, - }); - const result1 = getShardedGroupId(); const result2 = getShardedGroupId(); expect(result1).toBe(result2); - expect(result1).toBe('987654321'); - }); - - it('should handle negative timeOrigin', () => { - Object.defineProperty(performance, 'timeOrigin', { - value: -123.456, - writable: true, - }); - - const result = getShardedGroupId(); - - expect(result).toBe('-124'); + expect(result1).toBe('500000'); }); }); From 592fb8970d07a44e94f568f74978e16891f60526 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sat, 17 Jan 2026 23:32:13 +0100 Subject: [PATCH 28/31] refactor: wip --- packages/utils/src/lib/wal.ts | 8 +- packages/utils/src/lib/wal.unit.test.ts | 428 +++++++++++++++++++++++- 2 files changed, 430 insertions(+), 6 deletions(-) diff --git a/packages/utils/src/lib/wal.ts b/packages/utils/src/lib/wal.ts index 272a1b302..f783b3b6f 100644 --- a/packages/utils/src/lib/wal.ts +++ b/packages/utils/src/lib/wal.ts @@ -70,7 +70,7 @@ export function filterValidRecords( /** * Pure helper function to recover records from WAL file content. * @param content - Raw file content as string - * @param codec - Codec for decoding records + * @param decode - function fo decoding records * @returns Recovery result with records, errors, and partial tail */ export function recoverFromContent( @@ -213,7 +213,7 @@ export class WriteAheadLogFile implements AppendableSink { * Format descriptor that binds codec and file extension together. * Prevents misconfiguration by keeping related concerns in one object. */ -export type WalFormat = { +export type WalFormat = { /** Base name for the WAL (e.g., "trace") */ baseName: string; /** Shard file extension (e.g., ".jsonl") */ @@ -267,8 +267,8 @@ export function parseWalFormat( walExtension = '.log', finalExtension = walExtension, codec = stringCodec(), - shardPath = (id: string) => `${baseName}.${id}.${walExtension}`, - finalPath = () => `${baseName}.${finalExtension}`, + shardPath = (id: string) => `${baseName}.${id}${walExtension}`, + finalPath = () => `${baseName}${finalExtension}`, finalizer = (encodedRecords: (T | InvalidEntry)[]) => `${encodedRecords.join('\n')}\n`, } = format; diff --git a/packages/utils/src/lib/wal.unit.test.ts b/packages/utils/src/lib/wal.unit.test.ts index 3af1ff357..52b10ae79 100644 --- a/packages/utils/src/lib/wal.unit.test.ts +++ b/packages/utils/src/lib/wal.unit.test.ts @@ -1,18 +1,22 @@ import { vol } from 'memfs'; -import { beforeEach, describe, expect, it } from 'vitest'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; import { MEMFS_VOLUME } from '@code-pushup/test-utils'; import { type Codec, + ShardedWal, WriteAheadLogFile, createTolerantCodec, filterValidRecords, getShardId, getShardedGroupId, + isLeaderWal, + parseWalFormat, recoverFromContent, + setLeaderWal, stringCodec, } from './wal.js'; -const read = (p: string) => vol.readFileSync(p, 'utf8'); +const read = (p: string) => vol.readFileSync(p, 'utf8') as string; const write = (p: string, c: string) => vol.writeFileSync(p, c); const wal = ( @@ -305,6 +309,83 @@ describe('WriteAheadLogFile', () => { ); expect(result.records).toEqual(['good', 'good']); }); + + it('repacks with invalid entries and logs warning', () => { + const consoleLogSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + + vol.mkdirSync('/test', { recursive: true }); + write('/test/a.log', 'ok\nbad\n'); + + const tolerantCodec = createTolerantCodec({ + encode: (s: string) => s, + decode: (s: string) => { + if (s === 'bad') throw new Error('Bad record'); + return s; + }, + }); + + wal('/test/a.log', tolerantCodec).repack(); + + expect(consoleLogSpy).toHaveBeenCalledWith( + 'Found invalid entries during WAL repack', + ); + expect(read('/test/a.log')).toBe('ok\nbad\n'); + + consoleLogSpy.mockRestore(); + }); + + it('recoverFromContent handles decode errors and returns them', () => { + const failingCodec: Codec = { + encode: (s: string) => s, + decode: (s: string) => { + if (s === 'bad') throw new Error('Bad record during recovery'); + return s; + }, + }; + + const content = 'good\nbad\ngood\n'; + const result = recoverFromContent(content, failingCodec.decode); + + expect(result.records).toEqual(['good', 'good']); + expect(result.errors).toHaveLength(1); + expect(result).toHaveProperty( + 'errors', + expect.arrayContaining([ + { + lineNo: 2, + line: 'bad', + error: expect.any(Error), + }, + ]), + ); + }); + + it('repack logs decode errors when recover returns errors', () => { + const consoleLogSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + + vol.mkdirSync('/test', { recursive: true }); + write('/test/a.log', 'content\n'); + + const walInstance = wal('/test/a.log'); + + // Mock the recover method to return errors + const recoverSpy = vi.spyOn(walInstance, 'recover').mockReturnValue({ + records: ['content'], + errors: [ + { lineNo: 1, line: 'content', error: new Error('Mock decode error') }, + ], + partialTail: null, + }); + + walInstance.repack(); + + expect(consoleLogSpy).toHaveBeenCalledWith( + 'WAL repack encountered decode errors', + ); + + recoverSpy.mockRestore(); + consoleLogSpy.mockRestore(); + }); }); describe('stringCodec', () => { @@ -491,3 +572,346 @@ describe('getShardedGroupId', () => { expect(result1).toBe('500000'); }); }); + +describe('parseWalFormat', () => { + it('should apply all defaults when given empty config', () => { + const result = parseWalFormat({}); + + expect(result.baseName).toMatch(/^\d+$/); + expect(result.walExtension).toBe('.log'); + expect(result.finalExtension).toBe('.log'); + expect(result.codec).toBeDefined(); + expect(typeof result.shardPath).toBe('function'); + expect(typeof result.finalPath).toBe('function'); + expect(typeof result.finalizer).toBe('function'); + }); + + it('should use provided baseName and default others', () => { + const result = parseWalFormat({ baseName: 'test' }); + + expect(result.baseName).toBe('test'); + expect(result.walExtension).toBe('.log'); + expect(result.finalExtension).toBe('.log'); + expect(result.shardPath('123')).toBe('test.123.log'); + expect(result.finalPath()).toBe('test.log'); + }); + + it('should use provided walExtension and default finalExtension to match', () => { + const result = parseWalFormat({ walExtension: '.wal' }); + + expect(result.walExtension).toBe('.wal'); + expect(result.finalExtension).toBe('.wal'); + expect(result.shardPath('123')).toMatch(/\.123\.wal$/); + expect(result.finalPath()).toMatch(/\.wal$/); + }); + + it('should use provided finalExtension independently', () => { + const result = parseWalFormat({ + walExtension: '.wal', + finalExtension: '.json', + }); + + expect(result.walExtension).toBe('.wal'); + expect(result.finalExtension).toBe('.json'); + expect(result.shardPath('123')).toMatch(/\.123\.wal$/); + expect(result.finalPath()).toMatch(/\.json$/); + }); + + it('should use provided codec', () => { + const customCodec = stringCodec(); + const result = parseWalFormat({ codec: customCodec }); + + expect(result.codec).toBe(customCodec); + }); + + it('should use custom shardPath function', () => { + const customShardPath = (id: string) => `shard-${id}.log`; + const result = parseWalFormat({ shardPath: customShardPath }); + + expect(result.shardPath('test')).toBe('shard-test.log'); + }); + + it('should use custom finalPath function', () => { + const customFinalPath = () => 'final-output.log'; + const result = parseWalFormat({ finalPath: customFinalPath }); + + expect(result.finalPath()).toBe('final-output.log'); + }); + + it('should use custom finalizer function', () => { + const customFinalizer = (records: any[]) => `custom: ${records.length}`; + const result = parseWalFormat({ finalizer: customFinalizer }); + + expect(result.finalizer(['a', 'b'])).toBe('custom: 2'); + }); + + it('should work with all custom parameters', () => { + const config = { + baseName: 'my-wal', + walExtension: '.wal', + finalExtension: '.json', + codec: stringCodec(), + shardPath: (id: string) => `shards/${id}.wal`, + finalPath: () => 'output/final.json', + finalizer: (records: any[]) => JSON.stringify(records), + }; + + const result = parseWalFormat(config); + + expect(result.baseName).toBe('my-wal'); + expect(result.walExtension).toBe('.wal'); + expect(result.finalExtension).toBe('.json'); + expect(result.codec).toBe(config.codec); + expect(result.shardPath('123')).toBe('shards/123.wal'); + expect(result.finalPath()).toBe('output/final.json'); + expect(result.finalizer(['test'])).toBe('["test"]'); + }); + + it('should use default finalizer when none provided', () => { + const result = parseWalFormat({ baseName: 'test' }); + expect(result.finalizer(['line1', 'line2'])).toBe('line1\nline2\n'); + expect(result.finalizer([])).toBe('\n'); + }); +}); + +describe('isLeaderWal', () => { + const originalEnv = { ...process.env }; + + afterEach(() => { + process.env = { ...originalEnv }; // eslint-disable-line functional/immutable-data + }); + + it('should return true when env var matches current pid', () => { + const envVarName = 'TEST_LEADER_PID'; + process.env[envVarName] = '10001'; // eslint-disable-line functional/immutable-data + + const result = isLeaderWal(envVarName); + expect(result).toBe(true); + }); + + it('should return false when env var does not match current pid', () => { + const envVarName = 'TEST_LEADER_PID'; + process.env[envVarName] = '67890'; // eslint-disable-line functional/immutable-data + + const result = isLeaderWal(envVarName); + expect(result).toBe(false); + }); + + it('should return false when env var is not set', () => { + const envVarName = 'NON_EXISTENT_VAR'; + delete process.env[envVarName]; // eslint-disable-line @typescript-eslint/no-dynamic-delete,functional/immutable-data + + const result = isLeaderWal(envVarName); + expect(result).toBe(false); + }); + + it('should return false when env var is empty string', () => { + const envVarName = 'TEST_LEADER_PID'; + process.env[envVarName] = ''; // eslint-disable-line functional/immutable-data + + const result = isLeaderWal(envVarName); + expect(result).toBe(false); + }); +}); + +describe('setLeaderWal', () => { + const originalEnv = { ...process.env }; + + afterEach(() => { + process.env = { ...originalEnv }; // eslint-disable-line functional/immutable-data + }); + + it('should set env var when not already set', () => { + const envVarName = 'TEST_ORIGIN_PID'; + delete process.env[envVarName]; // eslint-disable-line @typescript-eslint/no-dynamic-delete,functional/immutable-data + expect(process.env[envVarName]).toBeUndefined(); + + setLeaderWal(envVarName); + + expect(process.env[envVarName]).toBe('10001'); // process.pid is mocked to 10001 + }); + + it('should not overwrite existing env var', () => { + const envVarName = 'TEST_ORIGIN_PID'; + const existingValue = '99999'; + + process.env[envVarName] = existingValue; // eslint-disable-line functional/immutable-data + setLeaderWal(envVarName); + + expect(process.env[envVarName]).toBe(existingValue); + }); + + it('should set env var to current pid as string', () => { + const envVarName = 'TEST_ORIGIN_PID'; + delete process.env[envVarName]; // eslint-disable-line @typescript-eslint/no-dynamic-delete,functional/immutable-data + setLeaderWal(envVarName); + + expect(process.env[envVarName]).toBe('10001'); + }); +}); + +describe('ShardedWal', () => { + beforeEach(() => { + vol.reset(); + vol.fromJSON({}, MEMFS_VOLUME); + }); + + it('should create instance with directory and format', () => { + const sw = new ShardedWal('/test/shards', {}); + + expect(sw).toBeInstanceOf(ShardedWal); + }); + + it('should create shard with correct file path', () => { + const sw = new ShardedWal('/test/shards', { + baseName: 'test-wal', + walExtension: '.log', + }); + + const shard = sw.shard('123-456'); + expect(shard).toBeInstanceOf(WriteAheadLogFile); + expect(shard.getPath()).toBe('/test/shards/test-wal.123-456.log'); + }); + + it('should list no shard files when directory does not exist', () => { + const sw = new ShardedWal('/nonexistent', {}); + // Access private method for testing + const files = (sw as any).shardFiles(); + expect(files).toEqual([]); + }); + + it('should list no shard files when directory is empty', () => { + vol.mkdirSync('/empty', { recursive: true }); + const sw = new ShardedWal('/empty', {}); + const files = (sw as any).shardFiles(); + expect(files).toEqual([]); + }); + + it('should list shard files matching extension', () => { + vol.mkdirSync('/shards', { recursive: true }); + write('/shards/wal.1.log', 'content1'); + write('/shards/wal.2.log', 'content2'); + write('/shards/other.txt', 'not a shard'); + + const sw = new ShardedWal('/shards', { walExtension: '.log' }); + const files = (sw as any).shardFiles(); + + expect(files).toHaveLength(2); + expect(files).toContain('/shards/wal.1.log'); + expect(files).toContain('/shards/wal.2.log'); + }); + + it('should finalize empty shards to empty result', () => { + vol.mkdirSync('/shards', { recursive: true }); + const sw = new ShardedWal('/shards', { + baseName: 'test', + finalPath: () => 'final.json', + finalizer: records => `${JSON.stringify(records)}\n`, + }); + + sw.finalize(); + + expect(read('/shards/final.json')).toBe('[]\n'); + }); + + it('should finalize multiple shards into single file', () => { + vol.mkdirSync('/shards', { recursive: true }); + write('/shards/test.1.log', 'record1\n'); + write('/shards/test.2.log', 'record2\n'); + + const sw = new ShardedWal('/shards', { + baseName: 'test', + walExtension: '.log', + finalPath: () => 'merged.json', + finalizer: records => `${JSON.stringify(records)}\n`, + }); + + sw.finalize(); + + const result = JSON.parse(read('/shards/merged.json').trim()); + expect(result).toEqual(['record1', 'record2']); + }); + + it('should handle invalid entries during finalize', () => { + vol.mkdirSync('/shards', { recursive: true }); + write('/shards/test.1.log', 'valid\n'); + write('/shards/test.2.log', 'invalid\n'); + + const tolerantCodec = createTolerantCodec({ + encode: (s: string) => s, + decode: (s: string) => { + if (s === 'invalid') throw new Error('Bad record'); + return s; + }, + }); + + const sw = new ShardedWal('/shards', { + baseName: 'test', + walExtension: '.log', + codec: tolerantCodec, + finalPath: () => 'final.json', + finalizer: records => `${JSON.stringify(records)}\n`, + }); + + sw.finalize(); + + const result = JSON.parse(read('/shards/final.json').trim()); + expect(result).toHaveLength(2); + expect(result[0]).toBe('valid'); + expect(result[1]).toEqual({ __invalid: true, raw: 'invalid' }); + }); + + it('should cleanup shard files', () => { + vol.mkdirSync('/shards', { recursive: true }); + write('/shards/test.1.log', 'content1'); + write('/shards/test.2.log', 'content2'); + + const sw = new ShardedWal('/shards', { + baseName: 'test', + walExtension: '.log', + }); + + expect(vol.existsSync('/shards/test.1.log')).toBe(true); + expect(vol.existsSync('/shards/test.2.log')).toBe(true); + + sw.cleanup(); + + expect(vol.existsSync('/shards/test.1.log')).toBe(false); + expect(vol.existsSync('/shards/test.2.log')).toBe(false); + }); + + it('should handle cleanup when some shard files do not exist', () => { + vol.mkdirSync('/shards', { recursive: true }); + write('/shards/test.1.log', 'content1'); + + const sw = new ShardedWal('/shards', { + baseName: 'test', + walExtension: '.log', + }); + + // Manually delete one file to simulate race condition + vol.unlinkSync('/shards/test.1.log'); + + // Should not throw + expect(() => sw.cleanup()).not.toThrow(); + }); + + it('should use custom options in finalizer', () => { + vol.mkdirSync('/shards', { recursive: true }); + write('/shards/test.1.log', 'record1\n'); + + const sw = new ShardedWal('/shards', { + baseName: 'test', + walExtension: '.log', + finalPath: () => 'final.json', + finalizer: (records, opt) => + `${JSON.stringify({ records, meta: opt })}\n`, + }); + + sw.finalize({ version: '1.0', compressed: true }); + + const result = JSON.parse(read('/shards/final.json')); + expect(result.records).toEqual(['record1']); + expect(result.meta).toEqual({ version: '1.0', compressed: true }); + }); +}); From a19070e5f5d0ad287fc385ab615633d72d704d66 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sun, 18 Jan 2026 00:35:19 +0100 Subject: [PATCH 29/31] refactor: wip --- packages/utils/src/lib/wal.ts | 4 ++-- packages/utils/src/lib/wal.unit.test.ts | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/packages/utils/src/lib/wal.ts b/packages/utils/src/lib/wal.ts index f783b3b6f..51b29f287 100644 --- a/packages/utils/src/lib/wal.ts +++ b/packages/utils/src/lib/wal.ts @@ -118,7 +118,7 @@ export class WriteAheadLogFile implements AppendableSink { #fd: number | null = null; readonly #file: string; readonly #decode: Codec>['decode']; - readonly #encode: Codec>['encode']; + readonly #encode: Codec['encode']; /** * Create a new WAL file instance. @@ -202,7 +202,7 @@ export class WriteAheadLogFile implements AppendableSink { console.log('Found invalid entries during WAL repack'); } const recordsToWrite = hasInvalidEntries - ? r.records + ? (r.records as T[]) : filterValidRecords(r.records); fs.mkdirSync(path.dirname(out), { recursive: true }); fs.writeFileSync(out, `${recordsToWrite.map(this.#encode).join('\n')}\n`); diff --git a/packages/utils/src/lib/wal.unit.test.ts b/packages/utils/src/lib/wal.unit.test.ts index 52b10ae79..ba6dea69a 100644 --- a/packages/utils/src/lib/wal.unit.test.ts +++ b/packages/utils/src/lib/wal.unit.test.ts @@ -160,6 +160,18 @@ describe('WriteAheadLogFile', () => { vol.fromJSON({}, MEMFS_VOLUME); }); + it('should act as WLA for any kind of data', () => { + const w = wal('/test/a.log', stringCodec()); + w.open(); + w.append({ id: 1, name: 'test' }); + w.close(); + expect(w.recover()).toStrictEqual({ id: 1, name: 'test' }); + expect(() => + w.append('{ id: 1, name:...' as unknown as object), + ).not.toThrow(); + w.expect(w.recover()).toStrictEqual({ id: 1, name: 'test' }); + }); + it('should create instance with file path and codecs without opening', () => { const w = wal('/test/a.log'); expect(w).toBeInstanceOf(WriteAheadLogFile); @@ -797,8 +809,12 @@ describe('ShardedWal', () => { const files = (sw as any).shardFiles(); expect(files).toHaveLength(2); - expect(files).toContain('/shards/wal.1.log'); - expect(files).toContain('/shards/wal.2.log'); + expect(files).toEqual( + expect.arrayContaining([ + expect.pathToMatch('/shards/wal.1.log'), + expect.pathToMatch('/shards/wal.2.log'), + ]), + ); }); it('should finalize empty shards to empty result', () => { From 4f8cd2d16d66772d74e1d6fa5340baec843c6ebb Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sun, 18 Jan 2026 02:54:18 +0100 Subject: [PATCH 30/31] refactor: wip --- packages/utils/src/lib/wal.unit.test.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/utils/src/lib/wal.unit.test.ts b/packages/utils/src/lib/wal.unit.test.ts index ba6dea69a..88f953d8a 100644 --- a/packages/utils/src/lib/wal.unit.test.ts +++ b/packages/utils/src/lib/wal.unit.test.ts @@ -165,11 +165,16 @@ describe('WriteAheadLogFile', () => { w.open(); w.append({ id: 1, name: 'test' }); w.close(); - expect(w.recover()).toStrictEqual({ id: 1, name: 'test' }); + expect(w.recover().records).toStrictEqual([{ id: 1, name: 'test' }]); + w.open(); expect(() => w.append('{ id: 1, name:...' as unknown as object), ).not.toThrow(); - w.expect(w.recover()).toStrictEqual({ id: 1, name: 'test' }); + w.close(); + expect(w.recover().records).toStrictEqual([ + { id: 1, name: 'test' }, + '{ id: 1, name:...', + ]); }); it('should create instance with file path and codecs without opening', () => { From bc86d08aaab59e9f19ffcb32d3602cdd1a95f963 Mon Sep 17 00:00:00 2001 From: Michael Hladky Date: Sun, 18 Jan 2026 03:23:44 +0100 Subject: [PATCH 31/31] refactor: wip --- packages/utils/src/lib/wal.unit.test.ts | 104 +++++++++--------------- 1 file changed, 38 insertions(+), 66 deletions(-) diff --git a/packages/utils/src/lib/wal.unit.test.ts b/packages/utils/src/lib/wal.unit.test.ts index 88f953d8a..0fc1e0094 100644 --- a/packages/utils/src/lib/wal.unit.test.ts +++ b/packages/utils/src/lib/wal.unit.test.ts @@ -250,7 +250,7 @@ describe('WriteAheadLogFile', () => { const w = wal('/test/a.log'); const result = w.recover(); - expect(result.records).toEqual(['line1', 'line2']); + expect(result.records).toStrictEqual(['line1', 'line2']); expect(result.errors).toEqual([]); }); @@ -385,7 +385,6 @@ describe('WriteAheadLogFile', () => { const walInstance = wal('/test/a.log'); - // Mock the recover method to return errors const recoverSpy = vi.spyOn(walInstance, 'recover').mockReturnValue({ records: ['content'], errors: [ @@ -685,85 +684,62 @@ describe('parseWalFormat', () => { }); it('should use default finalizer when none provided', () => { - const result = parseWalFormat({ baseName: 'test' }); + const result = parseWalFormat({ baseName: 'test' }); expect(result.finalizer(['line1', 'line2'])).toBe('line1\nline2\n'); expect(result.finalizer([])).toBe('\n'); }); }); describe('isLeaderWal', () => { - const originalEnv = { ...process.env }; - - afterEach(() => { - process.env = { ...originalEnv }; // eslint-disable-line functional/immutable-data - }); - it('should return true when env var matches current pid', () => { - const envVarName = 'TEST_LEADER_PID'; - process.env[envVarName] = '10001'; // eslint-disable-line functional/immutable-data + vi.stubEnv('TEST_LEADER_PID', '10001'); - const result = isLeaderWal(envVarName); + const result = isLeaderWal('TEST_LEADER_PID'); expect(result).toBe(true); }); it('should return false when env var does not match current pid', () => { - const envVarName = 'TEST_LEADER_PID'; - process.env[envVarName] = '67890'; // eslint-disable-line functional/immutable-data + vi.stubEnv('TEST_LEADER_PID', '67890'); - const result = isLeaderWal(envVarName); + const result = isLeaderWal('TEST_LEADER_PID'); expect(result).toBe(false); }); it('should return false when env var is not set', () => { - const envVarName = 'NON_EXISTENT_VAR'; - delete process.env[envVarName]; // eslint-disable-line @typescript-eslint/no-dynamic-delete,functional/immutable-data + vi.stubEnv('NON_EXISTENT_VAR', undefined as any); - const result = isLeaderWal(envVarName); + const result = isLeaderWal('NON_EXISTENT_VAR'); expect(result).toBe(false); }); it('should return false when env var is empty string', () => { - const envVarName = 'TEST_LEADER_PID'; - process.env[envVarName] = ''; // eslint-disable-line functional/immutable-data + vi.stubEnv('TEST_LEADER_PID', ''); - const result = isLeaderWal(envVarName); + const result = isLeaderWal('TEST_LEADER_PID'); expect(result).toBe(false); }); }); describe('setLeaderWal', () => { - const originalEnv = { ...process.env }; - - afterEach(() => { - process.env = { ...originalEnv }; // eslint-disable-line functional/immutable-data - }); - it('should set env var when not already set', () => { - const envVarName = 'TEST_ORIGIN_PID'; - delete process.env[envVarName]; // eslint-disable-line @typescript-eslint/no-dynamic-delete,functional/immutable-data - expect(process.env[envVarName]).toBeUndefined(); + expect(process.env['TEST_ORIGIN_PID']).toBeUndefined(); - setLeaderWal(envVarName); + setLeaderWal('TEST_ORIGIN_PID'); - expect(process.env[envVarName]).toBe('10001'); // process.pid is mocked to 10001 + expect(process.env['TEST_ORIGIN_PID']).toBe('10001'); }); it('should not overwrite existing env var', () => { - const envVarName = 'TEST_ORIGIN_PID'; - const existingValue = '99999'; - - process.env[envVarName] = existingValue; // eslint-disable-line functional/immutable-data - setLeaderWal(envVarName); + vi.stubEnv('TEST_ORIGIN_PID', '99999'); + setLeaderWal('TEST_ORIGIN_PID'); - expect(process.env[envVarName]).toBe(existingValue); + expect(process.env['TEST_ORIGIN_PID']).toBe('99999'); }); it('should set env var to current pid as string', () => { - const envVarName = 'TEST_ORIGIN_PID'; - delete process.env[envVarName]; // eslint-disable-line @typescript-eslint/no-dynamic-delete,functional/immutable-data - setLeaderWal(envVarName); + setLeaderWal('TEST_ORIGIN_PID'); - expect(process.env[envVarName]).toBe('10001'); + expect(process.env['TEST_ORIGIN_PID']).toBe('10001'); }); }); @@ -787,12 +763,11 @@ describe('ShardedWal', () => { const shard = sw.shard('123-456'); expect(shard).toBeInstanceOf(WriteAheadLogFile); - expect(shard.getPath()).toBe('/test/shards/test-wal.123-456.log'); + expect(shard.getPath()).toMatchPath('/test/shards/test-wal.123-456.log'); }); it('should list no shard files when directory does not exist', () => { const sw = new ShardedWal('/nonexistent', {}); - // Access private method for testing const files = (sw as any).shardFiles(); expect(files).toEqual([]); }); @@ -805,10 +780,11 @@ describe('ShardedWal', () => { }); it('should list shard files matching extension', () => { - vol.mkdirSync('/shards', { recursive: true }); - write('/shards/wal.1.log', 'content1'); - write('/shards/wal.2.log', 'content2'); - write('/shards/other.txt', 'not a shard'); + vol.fromJSON({ + '/shards/wal.1.log': 'content1', + '/shards/wal.2.log': 'content2', + '/shards/other.txt': 'not a shard', + }); const sw = new ShardedWal('/shards', { walExtension: '.log' }); const files = (sw as any).shardFiles(); @@ -836,9 +812,10 @@ describe('ShardedWal', () => { }); it('should finalize multiple shards into single file', () => { - vol.mkdirSync('/shards', { recursive: true }); - write('/shards/test.1.log', 'record1\n'); - write('/shards/test.2.log', 'record2\n'); + vol.fromJSON({ + '/shards/test.1.log': 'record1\n', + '/shards/test.2.log': 'record2\n', + }); const sw = new ShardedWal('/shards', { baseName: 'test', @@ -854,10 +831,10 @@ describe('ShardedWal', () => { }); it('should handle invalid entries during finalize', () => { - vol.mkdirSync('/shards', { recursive: true }); - write('/shards/test.1.log', 'valid\n'); - write('/shards/test.2.log', 'invalid\n'); - + vol.fromJSON({ + '/shards/test.1.log': 'valid\n', + '/shards/test.2.log': 'invalid\n', + }); const tolerantCodec = createTolerantCodec({ encode: (s: string) => s, decode: (s: string) => { @@ -883,10 +860,10 @@ describe('ShardedWal', () => { }); it('should cleanup shard files', () => { - vol.mkdirSync('/shards', { recursive: true }); - write('/shards/test.1.log', 'content1'); - write('/shards/test.2.log', 'content2'); - + vol.fromJSON({ + '/shards/test.1.log': 'content1', + '/shards/test.2.log': 'content2', + }); const sw = new ShardedWal('/shards', { baseName: 'test', walExtension: '.log', @@ -902,24 +879,19 @@ describe('ShardedWal', () => { }); it('should handle cleanup when some shard files do not exist', () => { - vol.mkdirSync('/shards', { recursive: true }); - write('/shards/test.1.log', 'content1'); + vol.fromJSON({ '/shards/test.1.log': 'content1' }); const sw = new ShardedWal('/shards', { baseName: 'test', walExtension: '.log', }); - // Manually delete one file to simulate race condition vol.unlinkSync('/shards/test.1.log'); - - // Should not throw expect(() => sw.cleanup()).not.toThrow(); }); it('should use custom options in finalizer', () => { - vol.mkdirSync('/shards', { recursive: true }); - write('/shards/test.1.log', 'record1\n'); + vol.fromJSON({ '/shards/test.1.log': 'record1\n' }); const sw = new ShardedWal('/shards', { baseName: 'test',