diff --git a/apps/ccusage/src/data-loader.ts b/apps/ccusage/src/data-loader.ts index 73522e89..c0ee1046 100644 --- a/apps/ccusage/src/data-loader.ts +++ b/apps/ccusage/src/data-loader.ts @@ -13,7 +13,7 @@ import type { LoadedUsageEntry, SessionBlock } from './_session-blocks.ts'; import type { ActivityDate, Bucket, CostMode, ModelName, SortOrder, Version } from './_types.ts'; import { Buffer } from 'node:buffer'; import { createReadStream, createWriteStream } from 'node:fs'; -import { readFile } from 'node:fs/promises'; +import { readFile, stat } from 'node:fs/promises'; import path from 'node:path'; import process from 'node:process'; import { createInterface } from 'node:readline'; @@ -698,11 +698,54 @@ export type GlobResult = { }; /** - * Glob files from multiple Claude paths in parallel + * Convert a YYYYMMDD string to a Date at midnight local time. + */ +function parseDateKey(yyyymmdd: string): Date { + const year = Number.parseInt(yyyymmdd.slice(0, 4), 10); + const month = Number.parseInt(yyyymmdd.slice(4, 6), 10) - 1; + const day = Number.parseInt(yyyymmdd.slice(6, 8), 10); + return new Date(year, month, day); +} + +/** + * Filter files by filesystem mtime using a 1-day buffer before `since`. + * + * Claude session files have no date directory hierarchy, so we rely on mtime + * (last write time) as a cheap O(stat) proxy. A file whose mtime is more than + * one day before `since` cannot contain entries in [since, ∞) — skip it. + * The 1-day buffer absorbs timezone differences between local clock and UTC. + * + * `until` is intentionally NOT used for mtime pruning: a file updated after + * `until` may still contain entries from before `until`. + */ +async function filterFilesByMtime(files: string[], since: string | undefined): Promise { + if (since == null) { + return files; + } + const threshold = parseDateKey(since).getTime() - 24 * 60 * 60 * 1000; + const results = await Promise.all( + files.map(async (file) => { + try { + const { mtimeMs } = await stat(file); + return mtimeMs >= threshold ? file : null; + } catch { + return file; // Include on stat error to avoid silent data loss + } + }), + ); + return results.filter((f): f is string => f !== null); +} + +/** + * Glob files from multiple Claude paths in parallel, optionally pruning by mtime. * @param claudePaths - Array of Claude base paths + * @param filter - Optional date range; `since` is used to skip stale files via mtime * @returns Array of file paths with their base directories */ -export async function globUsageFiles(claudePaths: string[]): Promise { +export async function globUsageFiles( + claudePaths: string[], + filter?: DateFilter, +): Promise { const filePromises = claudePaths.map(async (claudePath) => { const claudeDir = path.join(claudePath, CLAUDE_PROJECTS_DIR_NAME); const files = await glob([USAGE_DATA_GLOB_PATTERN], { @@ -710,8 +753,9 @@ export async function globUsageFiles(claudePaths: string[]): Promise []); // Gracefully handle errors for individual paths + const filtered = await filterFilesByMtime(files, filter?.since); // Map each file to include its base directory - return files.map((file) => ({ file, baseDir: claudeDir })); + return filtered.map((file) => ({ file, baseDir: claudeDir })); }); return (await Promise.all(filePromises)).flat(); } @@ -750,8 +794,11 @@ export async function loadDailyUsageData(options?: LoadOptions): Promise f.file); if (fileList.length === 0) { @@ -899,8 +946,11 @@ export async function loadSessionData(options?: LoadOptions): Promise r.baseDir.includes(path.join('path1', 'projects')))).toBe(true); }); + + it('skips files whose mtime is more than one day before since', async () => { + const { utimes } = await import('node:fs/promises'); + + await using fixture = await createFixture({ + 'base/projects/proj/old/usage.jsonl': 'old-data', + 'base/projects/proj/new/usage.jsonl': 'new-data', + }); + + const oldFile = fixture.getPath('base/projects/proj/old/usage.jsonl'); + const newFile = fixture.getPath('base/projects/proj/new/usage.jsonl'); + + // Set old file mtime to 5 days before since, new file mtime to 1 day before since. + const since = new Date('2026-03-05'); + const oldMtime = new Date(since.getTime() - 5 * 24 * 60 * 60 * 1000); + const newMtime = new Date(since.getTime() - 12 * 60 * 60 * 1000); // 12h before (within buffer) + await utimes(oldFile, oldMtime, oldMtime); + await utimes(newFile, newMtime, newMtime); + + const results = await globUsageFiles([fixture.getPath('base')], { since: '20260305' }); + + // Old file (5d before since) should be pruned; new file (12h before, within 1d buffer) kept. + expect(results).toHaveLength(1); + expect(results[0]!.file).toContain('new'); + }); }); // Test for calculateContextTokens diff --git a/apps/codex/src/commands/daily.ts b/apps/codex/src/commands/daily.ts index 6dc7c6f0..a9eb0c01 100644 --- a/apps/codex/src/commands/daily.ts +++ b/apps/codex/src/commands/daily.ts @@ -41,7 +41,7 @@ export const dailyCommand = define({ process.exit(1); } - const { events, missingDirectories } = await loadTokenUsageEvents(); + const { events, missingDirectories } = await loadTokenUsageEvents({ since, until }); for (const missing of missingDirectories) { logger.warn(`Codex session directory not found: ${missing}`); diff --git a/apps/codex/src/commands/monthly.ts b/apps/codex/src/commands/monthly.ts index 2a5abc5e..0680b2bb 100644 --- a/apps/codex/src/commands/monthly.ts +++ b/apps/codex/src/commands/monthly.ts @@ -41,7 +41,7 @@ export const monthlyCommand = define({ process.exit(1); } - const { events, missingDirectories } = await loadTokenUsageEvents(); + const { events, missingDirectories } = await loadTokenUsageEvents({ since, until }); for (const missing of missingDirectories) { logger.warn(`Codex session directory not found: ${missing}`); diff --git a/apps/codex/src/commands/session.ts b/apps/codex/src/commands/session.ts index 5dbe1a7e..ed07d547 100644 --- a/apps/codex/src/commands/session.ts +++ b/apps/codex/src/commands/session.ts @@ -46,7 +46,7 @@ export const sessionCommand = define({ process.exit(1); } - const { events, missingDirectories } = await loadTokenUsageEvents(); + const { events, missingDirectories } = await loadTokenUsageEvents({ since, until }); for (const missing of missingDirectories) { logger.warn(`Codex session directory not found: ${missing}`); diff --git a/apps/codex/src/data-loader.ts b/apps/codex/src/data-loader.ts index ef23a8f5..ea1abbae 100644 --- a/apps/codex/src/data-loader.ts +++ b/apps/codex/src/data-loader.ts @@ -1,5 +1,5 @@ import type { TokenUsageDelta, TokenUsageEvent } from './_types.ts'; -import { readFile, stat } from 'node:fs/promises'; +import { readdir, readFile, stat } from 'node:fs/promises'; import path from 'node:path'; import process from 'node:process'; import { Result } from '@praha/byethrow'; @@ -177,14 +177,87 @@ function asNonEmptyString(value: unknown): string | undefined { export type LoadOptions = { sessionDirs?: string[]; + since?: string; + until?: string; }; +/** + * List session JSONL files, skipping date directories outside [since, until]. + * + * Codex stores sessions as `YYYY/MM/DD/*.jsonl`. When a date range is provided + * we enumerate the directory tree and prune entire year/month/day subtrees that + * cannot contain matching sessions, avoiding the cost of a full recursive glob + * over potentially large historical archives. + */ +async function listSessionFiles( + sessionsDir: string, + since: string | undefined, + until: string | undefined, +): Promise { + if (since == null && until == null) { + return glob(SESSION_GLOB, { cwd: sessionsDir, absolute: true }); + } + + const sinceKey = since?.replaceAll('-', ''); + const untilKey = until?.replaceAll('-', ''); + + const tryReaddir = async (dir: string): Promise => { + const result = await Result.try({ + try: readdir(dir), + catch: (error) => error, + }); + return Result.isFailure(result) ? [] : result.value; + }; + + // Preserve support for legacy flat layouts: include any *.jsonl files + // stored directly under sessionsDir (not inside YYYY/MM/DD subdirs). + const rootFiles = await glob('*.jsonl', { cwd: sessionsDir, absolute: true }).catch(() => []); + const files: string[] = [...rootFiles]; + + for (const year of (await tryReaddir(sessionsDir)).filter((e) => /^\d{4}$/.test(e))) { + if (sinceKey != null && `${year}1231` < sinceKey) { + continue; + } + if (untilKey != null && `${year}0101` > untilKey) { + continue; + } + + const yearDir = path.join(sessionsDir, year); + for (const month of (await tryReaddir(yearDir)).filter((e) => /^\d{2}$/.test(e))) { + if (sinceKey != null && `${year + month}31` < sinceKey) { + continue; + } + if (untilKey != null && `${year + month}01` > untilKey) { + continue; + } + + const monthDir = path.join(yearDir, month); + for (const day of (await tryReaddir(monthDir)).filter((e) => /^\d{2}$/.test(e))) { + const dateKey = year + month + day; + if (sinceKey != null && dateKey < sinceKey) { + continue; + } + if (untilKey != null && dateKey > untilKey) { + continue; + } + + const dayDir = path.join(monthDir, day); + const dayFiles = await glob('*.jsonl', { cwd: dayDir, absolute: true }).catch(() => []); + files.push(...dayFiles); + } + } + } + + return files; +} + export type LoadResult = { events: TokenUsageEvent[]; missingDirectories: string[]; }; export async function loadTokenUsageEvents(options: LoadOptions = {}): Promise { + const { since, until } = options; const providedDirs = options.sessionDirs != null && options.sessionDirs.length > 0 ? options.sessionDirs.map((dir) => path.resolve(dir)) @@ -216,10 +289,7 @@ export async function loadTokenUsageEvents(options: LoadOptions = {}): Promise { + const makeEvent = (timestamp: string, input_tokens: number) => + JSON.stringify({ + timestamp, + type: 'event_msg', + payload: { + type: 'token_count', + info: { + last_token_usage: { + input_tokens, + cached_input_tokens: 0, + output_tokens: 100, + reasoning_output_tokens: 0, + total_tokens: input_tokens + 100, + }, + model: 'gpt-5', + }, + }, + }); + + // Fixture mirrors real Codex layout: YYYY/MM/DD/*.jsonl + await using fixture = await createFixture({ + '2025': { + '12': { + '31': { 'old.jsonl': makeEvent('2025-12-31T12:00:00.000Z', 999) }, + }, + }, + '2026': { + '03': { + '01': { 'new.jsonl': makeEvent('2026-03-01T12:00:00.000Z', 1_000) }, + }, + }, + }); + + // With since=2026-03-01 the 2025/12/31 file should be skipped entirely. + const { events } = await loadTokenUsageEvents({ + sessionDirs: [fixture.getPath('.')], + since: '2026-03-01', + }); + + expect(events).toHaveLength(1); + expect(events[0]!.inputTokens).toBe(1_000); + }); + + it('includes root-level *.jsonl files when date filters are active', async () => { + const makeEvent = (timestamp: string, input_tokens: number) => + JSON.stringify({ + timestamp, + type: 'event_msg', + payload: { + type: 'token_count', + info: { + last_token_usage: { + input_tokens, + cached_input_tokens: 0, + output_tokens: 100, + reasoning_output_tokens: 0, + total_tokens: input_tokens + 100, + }, + model: 'gpt-5', + }, + }, + }); + + // Mix of flat root-level file (legacy layout) and dated subdir file. + await using fixture = await createFixture({ + 'flat.jsonl': makeEvent('2026-03-05T10:00:00.000Z', 500), + '2026': { + '03': { + '05': { 'dated.jsonl': makeEvent('2026-03-05T11:00:00.000Z', 1_000) }, + }, + }, + }); + + // With since set, both the flat file and the dated file should be returned. + const { events } = await loadTokenUsageEvents({ + sessionDirs: [fixture.getPath('.')], + since: '2026-03-01', + }); + + expect(events).toHaveLength(2); + const tokens = events.map((e) => e.inputTokens).sort((a, b) => a - b); + expect(tokens).toEqual([500, 1_000]); + }); + it('falls back to legacy model when metadata is missing entirely', async () => { await using fixture = await createFixture({ sessions: {