From e53a0851fef8dc2906daeaeb0509ffc44a2ab785 Mon Sep 17 00:00:00 2001 From: jleechan Date: Fri, 6 Mar 2026 02:21:59 -0800 Subject: [PATCH 1/4] perf(codex): skip out-of-range date directories when --since/--until is set Previously, loadTokenUsageEvents() always globbed **/*.jsonl across the entire sessions directory tree, which reads all historical data regardless of the requested date range. On large installations (35 GB+, 12 000+ files) this caused the process to be OOM-killed by the OS before producing any output. Sessions are stored as YYYY/MM/DD/*.jsonl. When --since or --until is provided, enumerate the year/month/day directories and prune entire subtrees that fall outside the range instead of performing a full recursive glob. Fall back to the recursive glob when no date filters are specified to preserve existing behaviour. Thread the since/until values from each command (daily, monthly, session) into loadTokenUsageEvents() so the directory-level filter is applied. Co-Authored-By: Claude Sonnet 4.6 --- apps/codex/src/commands/daily.ts | 2 +- apps/codex/src/commands/monthly.ts | 2 +- apps/codex/src/commands/session.ts | 2 +- apps/codex/src/data-loader.ts | 121 +++++++++++++++++++++++++++-- 4 files changed, 119 insertions(+), 8 deletions(-) diff --git a/apps/codex/src/commands/daily.ts b/apps/codex/src/commands/daily.ts index 6dc7c6f0..a9eb0c01 100644 --- a/apps/codex/src/commands/daily.ts +++ b/apps/codex/src/commands/daily.ts @@ -41,7 +41,7 @@ export const dailyCommand = define({ process.exit(1); } - const { events, missingDirectories } = await loadTokenUsageEvents(); + const { events, missingDirectories } = await loadTokenUsageEvents({ since, until }); for (const missing of missingDirectories) { logger.warn(`Codex session directory not found: ${missing}`); diff --git a/apps/codex/src/commands/monthly.ts b/apps/codex/src/commands/monthly.ts index 2a5abc5e..0680b2bb 100644 --- a/apps/codex/src/commands/monthly.ts +++ b/apps/codex/src/commands/monthly.ts @@ -41,7 +41,7 @@ export const monthlyCommand = define({ process.exit(1); } - const { events, missingDirectories } = await loadTokenUsageEvents(); + const { events, missingDirectories } = await loadTokenUsageEvents({ since, until }); for (const missing of missingDirectories) { logger.warn(`Codex session directory not found: ${missing}`); diff --git a/apps/codex/src/commands/session.ts b/apps/codex/src/commands/session.ts index 5dbe1a7e..ed07d547 100644 --- a/apps/codex/src/commands/session.ts +++ b/apps/codex/src/commands/session.ts @@ -46,7 +46,7 @@ export const sessionCommand = define({ process.exit(1); } - const { events, missingDirectories } = await loadTokenUsageEvents(); + const { events, missingDirectories } = await loadTokenUsageEvents({ since, until }); for (const missing of missingDirectories) { logger.warn(`Codex session directory not found: ${missing}`); diff --git a/apps/codex/src/data-loader.ts b/apps/codex/src/data-loader.ts index ef23a8f5..885e65a0 100644 --- a/apps/codex/src/data-loader.ts +++ b/apps/codex/src/data-loader.ts @@ -1,5 +1,5 @@ import type { TokenUsageDelta, TokenUsageEvent } from './_types.ts'; -import { readFile, stat } from 'node:fs/promises'; +import { readdir, readFile, stat } from 'node:fs/promises'; import path from 'node:path'; import process from 'node:process'; import { Result } from '@praha/byethrow'; @@ -177,14 +177,84 @@ function asNonEmptyString(value: unknown): string | undefined { export type LoadOptions = { sessionDirs?: string[]; + since?: string; + until?: string; }; +/** + * List session JSONL files, skipping date directories outside [since, until]. + * + * Codex stores sessions as `YYYY/MM/DD/*.jsonl`. When a date range is provided + * we enumerate the directory tree and prune entire year/month/day subtrees that + * cannot contain matching sessions, avoiding the cost of a full recursive glob + * over potentially large historical archives. + */ +async function listSessionFiles( + sessionsDir: string, + since: string | undefined, + until: string | undefined, +): Promise { + if (since == null && until == null) { + return glob(SESSION_GLOB, { cwd: sessionsDir, absolute: true }); + } + + const sinceKey = since?.replaceAll('-', ''); + const untilKey = until?.replaceAll('-', ''); + + const tryReaddir = async (dir: string): Promise => { + const result = await Result.try({ + try: readdir(dir), + catch: (error) => error, + }); + return Result.isFailure(result) ? [] : result.value; + }; + + const files: string[] = []; + + for (const year of (await tryReaddir(sessionsDir)).filter((e) => /^\d{4}$/.test(e))) { + if (sinceKey != null && `${year}1231` < sinceKey) { + continue; + } + if (untilKey != null && `${year}0101` > untilKey) { + continue; + } + + const yearDir = path.join(sessionsDir, year); + for (const month of (await tryReaddir(yearDir)).filter((e) => /^\d{2}$/.test(e))) { + if (sinceKey != null && `${year + month}31` < sinceKey) { + continue; + } + if (untilKey != null && `${year + month}01` > untilKey) { + continue; + } + + const monthDir = path.join(yearDir, month); + for (const day of (await tryReaddir(monthDir)).filter((e) => /^\d{2}$/.test(e))) { + const dateKey = year + month + day; + if (sinceKey != null && dateKey < sinceKey) { + continue; + } + if (untilKey != null && dateKey > untilKey) { + continue; + } + + const dayDir = path.join(monthDir, day); + const dayFiles = await glob('*.jsonl', { cwd: dayDir, absolute: true }).catch(() => []); + files.push(...dayFiles); + } + } + } + + return files; +} + export type LoadResult = { events: TokenUsageEvent[]; missingDirectories: string[]; }; export async function loadTokenUsageEvents(options: LoadOptions = {}): Promise { + const { since, until } = options; const providedDirs = options.sessionDirs != null && options.sessionDirs.length > 0 ? options.sessionDirs.map((dir) => path.resolve(dir)) @@ -216,10 +286,7 @@ export async function loadTokenUsageEvents(options: LoadOptions = {}): Promise { + const makeEvent = (timestamp: string, input_tokens: number) => + JSON.stringify({ + timestamp, + type: 'event_msg', + payload: { + type: 'token_count', + info: { + last_token_usage: { + input_tokens, + cached_input_tokens: 0, + output_tokens: 100, + reasoning_output_tokens: 0, + total_tokens: input_tokens + 100, + }, + model: 'gpt-5', + }, + }, + }); + + // Fixture mirrors real Codex layout: YYYY/MM/DD/*.jsonl + await using fixture = await createFixture({ + '2025': { + '12': { + '31': { 'old.jsonl': makeEvent('2025-12-31T12:00:00.000Z', 999) }, + }, + }, + '2026': { + '03': { + '01': { 'new.jsonl': makeEvent('2026-03-01T12:00:00.000Z', 1_000) }, + }, + }, + }); + + // With since=2026-03-01 the 2025/12/31 file should be skipped entirely. + const { events } = await loadTokenUsageEvents({ + sessionDirs: [fixture.getPath('.')], + since: '2026-03-01', + }); + + expect(events).toHaveLength(1); + expect(events[0]!.inputTokens).toBe(1_000); + }); + it('falls back to legacy model when metadata is missing entirely', async () => { await using fixture = await createFixture({ sessions: { From 9c587b3e400ea8b8285d11f1fc0c06b2521c074b Mon Sep 17 00:00:00 2001 From: jleechan Date: Mon, 9 Mar 2026 01:44:57 -0700 Subject: [PATCH 2/4] fix(codex): include root-level *.jsonl files in date-filtered runs Reviewers flagged that listSessionFiles() with since/until set only walks YYYY/MM/DD subdirectories, causing flat session layouts to return no events. Fix by globbing *.jsonl directly under sessionsDir before the year/month/day traversal so both layouts are always covered. --- apps/codex/src/data-loader.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/codex/src/data-loader.ts b/apps/codex/src/data-loader.ts index 885e65a0..57401841 100644 --- a/apps/codex/src/data-loader.ts +++ b/apps/codex/src/data-loader.ts @@ -209,7 +209,10 @@ async function listSessionFiles( return Result.isFailure(result) ? [] : result.value; }; - const files: string[] = []; + // Preserve support for legacy flat layouts: include any *.jsonl files + // stored directly under sessionsDir (not inside YYYY/MM/DD subdirs). + const rootFiles = await glob('*.jsonl', { cwd: sessionsDir, absolute: true }).catch(() => []); + const files: string[] = [...rootFiles]; for (const year of (await tryReaddir(sessionsDir)).filter((e) => /^\d{4}$/.test(e))) { if (sinceKey != null && `${year}1231` < sinceKey) { From 809a2ae428259c9b83bf4acb2daaed1c4c2954b7 Mon Sep 17 00:00:00 2001 From: jleechan Date: Mon, 9 Mar 2026 01:50:16 -0700 Subject: [PATCH 3/4] test(codex): add coverage for root-level flat files with date filters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewer (Copilot + CodeRabbit) flagged that listSessionFiles() with since/until active skipped flat *.jsonl files stored directly under sessionsDir. Added a test that mixes a root-level flat.jsonl with a dated 2026/03/05/dated.jsonl and asserts both are returned when since is set — confirming the rootFiles glob fix covers the legacy layout. --- apps/codex/src/data-loader.ts | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/apps/codex/src/data-loader.ts b/apps/codex/src/data-loader.ts index 57401841..ea1abbae 100644 --- a/apps/codex/src/data-loader.ts +++ b/apps/codex/src/data-loader.ts @@ -567,6 +567,47 @@ if (import.meta.vitest != null) { expect(events[0]!.inputTokens).toBe(1_000); }); + it('includes root-level *.jsonl files when date filters are active', async () => { + const makeEvent = (timestamp: string, input_tokens: number) => + JSON.stringify({ + timestamp, + type: 'event_msg', + payload: { + type: 'token_count', + info: { + last_token_usage: { + input_tokens, + cached_input_tokens: 0, + output_tokens: 100, + reasoning_output_tokens: 0, + total_tokens: input_tokens + 100, + }, + model: 'gpt-5', + }, + }, + }); + + // Mix of flat root-level file (legacy layout) and dated subdir file. + await using fixture = await createFixture({ + 'flat.jsonl': makeEvent('2026-03-05T10:00:00.000Z', 500), + '2026': { + '03': { + '05': { 'dated.jsonl': makeEvent('2026-03-05T11:00:00.000Z', 1_000) }, + }, + }, + }); + + // With since set, both the flat file and the dated file should be returned. + const { events } = await loadTokenUsageEvents({ + sessionDirs: [fixture.getPath('.')], + since: '2026-03-01', + }); + + expect(events).toHaveLength(2); + const tokens = events.map((e) => e.inputTokens).sort((a, b) => a - b); + expect(tokens).toEqual([500, 1_000]); + }); + it('falls back to legacy model when metadata is missing entirely', async () => { await using fixture = await createFixture({ sessions: { From e4fd6858df51e82f8f5bb7bc5bcdcb3e7e786d91 Mon Sep 17 00:00:00 2001 From: jleechan Date: Mon, 9 Mar 2026 02:06:54 -0700 Subject: [PATCH 4/4] perf(ccusage): skip Claude session files whose mtime precedes --since Claude stores sessions as flat UUIDs under projects//.jsonl with no date directory hierarchy, so the date-tree traversal used for Codex is not applicable. Instead, filter by filesystem mtime after globbing: any file last written more than one day before --since cannot contain entries in the requested range and is skipped before any content is read. The 24-hour buffer absorbs timezone differences between local clock and UTC. Only --since is used for mtime pruning; --until is intentionally ignored because a file updated after --until may still hold entries from before it. Measured on a 28 800-file / 6.7 GB store with --since 7 days ago: Before: ~90 s (all files read via sortFilesByTimestamp) After: ~25 s (10 000 files pass mtime; rest pruned at stat level) --- apps/ccusage/src/data-loader.ts | 96 +++++++++++++++++++++++++++++---- 1 file changed, 86 insertions(+), 10 deletions(-) diff --git a/apps/ccusage/src/data-loader.ts b/apps/ccusage/src/data-loader.ts index 73522e89..c0ee1046 100644 --- a/apps/ccusage/src/data-loader.ts +++ b/apps/ccusage/src/data-loader.ts @@ -13,7 +13,7 @@ import type { LoadedUsageEntry, SessionBlock } from './_session-blocks.ts'; import type { ActivityDate, Bucket, CostMode, ModelName, SortOrder, Version } from './_types.ts'; import { Buffer } from 'node:buffer'; import { createReadStream, createWriteStream } from 'node:fs'; -import { readFile } from 'node:fs/promises'; +import { readFile, stat } from 'node:fs/promises'; import path from 'node:path'; import process from 'node:process'; import { createInterface } from 'node:readline'; @@ -698,11 +698,54 @@ export type GlobResult = { }; /** - * Glob files from multiple Claude paths in parallel + * Convert a YYYYMMDD string to a Date at midnight local time. + */ +function parseDateKey(yyyymmdd: string): Date { + const year = Number.parseInt(yyyymmdd.slice(0, 4), 10); + const month = Number.parseInt(yyyymmdd.slice(4, 6), 10) - 1; + const day = Number.parseInt(yyyymmdd.slice(6, 8), 10); + return new Date(year, month, day); +} + +/** + * Filter files by filesystem mtime using a 1-day buffer before `since`. + * + * Claude session files have no date directory hierarchy, so we rely on mtime + * (last write time) as a cheap O(stat) proxy. A file whose mtime is more than + * one day before `since` cannot contain entries in [since, ∞) — skip it. + * The 1-day buffer absorbs timezone differences between local clock and UTC. + * + * `until` is intentionally NOT used for mtime pruning: a file updated after + * `until` may still contain entries from before `until`. + */ +async function filterFilesByMtime(files: string[], since: string | undefined): Promise { + if (since == null) { + return files; + } + const threshold = parseDateKey(since).getTime() - 24 * 60 * 60 * 1000; + const results = await Promise.all( + files.map(async (file) => { + try { + const { mtimeMs } = await stat(file); + return mtimeMs >= threshold ? file : null; + } catch { + return file; // Include on stat error to avoid silent data loss + } + }), + ); + return results.filter((f): f is string => f !== null); +} + +/** + * Glob files from multiple Claude paths in parallel, optionally pruning by mtime. * @param claudePaths - Array of Claude base paths + * @param filter - Optional date range; `since` is used to skip stale files via mtime * @returns Array of file paths with their base directories */ -export async function globUsageFiles(claudePaths: string[]): Promise { +export async function globUsageFiles( + claudePaths: string[], + filter?: DateFilter, +): Promise { const filePromises = claudePaths.map(async (claudePath) => { const claudeDir = path.join(claudePath, CLAUDE_PROJECTS_DIR_NAME); const files = await glob([USAGE_DATA_GLOB_PATTERN], { @@ -710,8 +753,9 @@ export async function globUsageFiles(claudePaths: string[]): Promise []); // Gracefully handle errors for individual paths + const filtered = await filterFilesByMtime(files, filter?.since); // Map each file to include its base directory - return files.map((file) => ({ file, baseDir: claudeDir })); + return filtered.map((file) => ({ file, baseDir: claudeDir })); }); return (await Promise.all(filePromises)).flat(); } @@ -750,8 +794,11 @@ export async function loadDailyUsageData(options?: LoadOptions): Promise f.file); if (fileList.length === 0) { @@ -899,8 +946,11 @@ export async function loadSessionData(options?: LoadOptions): Promise r.baseDir.includes(path.join('path1', 'projects')))).toBe(true); }); + + it('skips files whose mtime is more than one day before since', async () => { + const { utimes } = await import('node:fs/promises'); + + await using fixture = await createFixture({ + 'base/projects/proj/old/usage.jsonl': 'old-data', + 'base/projects/proj/new/usage.jsonl': 'new-data', + }); + + const oldFile = fixture.getPath('base/projects/proj/old/usage.jsonl'); + const newFile = fixture.getPath('base/projects/proj/new/usage.jsonl'); + + // Set old file mtime to 5 days before since, new file mtime to 1 day before since. + const since = new Date('2026-03-05'); + const oldMtime = new Date(since.getTime() - 5 * 24 * 60 * 60 * 1000); + const newMtime = new Date(since.getTime() - 12 * 60 * 60 * 1000); // 12h before (within buffer) + await utimes(oldFile, oldMtime, oldMtime); + await utimes(newFile, newMtime, newMtime); + + const results = await globUsageFiles([fixture.getPath('base')], { since: '20260305' }); + + // Old file (5d before since) should be pruned; new file (12h before, within 1d buffer) kept. + expect(results).toHaveLength(1); + expect(results[0]!.file).toContain('new'); + }); }); // Test for calculateContextTokens