-
-
Notifications
You must be signed in to change notification settings - Fork 453
perf(codex): skip out-of-range date directories when --since/--until is set #877
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e53a085
9c587b3
809a2ae
e4fd685
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,7 +13,7 @@ import type { LoadedUsageEntry, SessionBlock } from './_session-blocks.ts'; | |
| import type { ActivityDate, Bucket, CostMode, ModelName, SortOrder, Version } from './_types.ts'; | ||
| import { Buffer } from 'node:buffer'; | ||
| import { createReadStream, createWriteStream } from 'node:fs'; | ||
| import { readFile } from 'node:fs/promises'; | ||
| import { readFile, stat } from 'node:fs/promises'; | ||
| import path from 'node:path'; | ||
| import process from 'node:process'; | ||
| import { createInterface } from 'node:readline'; | ||
|
|
@@ -698,20 +698,64 @@ export type GlobResult = { | |
| }; | ||
|
|
||
| /** | ||
| * Glob files from multiple Claude paths in parallel | ||
| * Convert a YYYYMMDD string to a Date at midnight local time. | ||
| */ | ||
| function parseDateKey(yyyymmdd: string): Date { | ||
| const year = Number.parseInt(yyyymmdd.slice(0, 4), 10); | ||
| const month = Number.parseInt(yyyymmdd.slice(4, 6), 10) - 1; | ||
| const day = Number.parseInt(yyyymmdd.slice(6, 8), 10); | ||
| return new Date(year, month, day); | ||
| } | ||
|
|
||
| /** | ||
| * Filter files by filesystem mtime using a 1-day buffer before `since`. | ||
| * | ||
| * Claude session files have no date directory hierarchy, so we rely on mtime | ||
| * (last write time) as a cheap O(stat) proxy. A file whose mtime is more than | ||
| * one day before `since` cannot contain entries in [since, ∞) — skip it. | ||
| * The 1-day buffer absorbs timezone differences between local clock and UTC. | ||
| * | ||
| * `until` is intentionally NOT used for mtime pruning: a file updated after | ||
| * `until` may still contain entries from before `until`. | ||
| */ | ||
| async function filterFilesByMtime(files: string[], since: string | undefined): Promise<string[]> { | ||
| if (since == null) { | ||
| return files; | ||
| } | ||
| const threshold = parseDateKey(since).getTime() - 24 * 60 * 60 * 1000; | ||
| const results = await Promise.all( | ||
| files.map(async (file) => { | ||
| try { | ||
| const { mtimeMs } = await stat(file); | ||
| return mtimeMs >= threshold ? file : null; | ||
| } catch { | ||
| return file; // Include on stat error to avoid silent data loss | ||
| } | ||
| }), | ||
| ); | ||
| return results.filter((f): f is string => f !== null); | ||
| } | ||
|
|
||
| /** | ||
| * Glob files from multiple Claude paths in parallel, optionally pruning by mtime. | ||
| * @param claudePaths - Array of Claude base paths | ||
| * @param filter - Optional date range; `since` is used to skip stale files via mtime | ||
| * @returns Array of file paths with their base directories | ||
| */ | ||
| export async function globUsageFiles(claudePaths: string[]): Promise<GlobResult[]> { | ||
| export async function globUsageFiles( | ||
| claudePaths: string[], | ||
| filter?: DateFilter, | ||
| ): Promise<GlobResult[]> { | ||
| const filePromises = claudePaths.map(async (claudePath) => { | ||
| const claudeDir = path.join(claudePath, CLAUDE_PROJECTS_DIR_NAME); | ||
| const files = await glob([USAGE_DATA_GLOB_PATTERN], { | ||
| cwd: claudeDir, | ||
| absolute: true, | ||
| }).catch(() => []); // Gracefully handle errors for individual paths | ||
|
|
||
| const filtered = await filterFilesByMtime(files, filter?.since); | ||
| // Map each file to include its base directory | ||
| return files.map((file) => ({ file, baseDir: claudeDir })); | ||
| return filtered.map((file) => ({ file, baseDir: claudeDir })); | ||
| }); | ||
| return (await Promise.all(filePromises)).flat(); | ||
| } | ||
|
|
@@ -750,8 +794,11 @@ export async function loadDailyUsageData(options?: LoadOptions): Promise<DailyUs | |
| // Get all Claude paths or use the specific one from options | ||
| const claudePaths = toArray(options?.claudePath ?? getClaudePaths()); | ||
|
|
||
| // Collect files from all paths in parallel | ||
| const allFiles = await globUsageFiles(claudePaths); | ||
| // Collect files from all paths in parallel, pruning by mtime when since is set | ||
| const allFiles = await globUsageFiles(claudePaths, { | ||
| since: options?.since, | ||
| until: options?.until, | ||
| }); | ||
| const fileList = allFiles.map((f) => f.file); | ||
|
|
||
| if (fileList.length === 0) { | ||
|
|
@@ -899,8 +946,11 @@ export async function loadSessionData(options?: LoadOptions): Promise<SessionUsa | |
| // Get all Claude paths or use the specific one from options | ||
| const claudePaths = toArray(options?.claudePath ?? getClaudePaths()); | ||
|
|
||
| // Collect files from all paths with their base directories in parallel | ||
| const filesWithBase = await globUsageFiles(claudePaths); | ||
| // Collect files from all paths with their base directories in parallel, pruning by mtime | ||
| const filesWithBase = await globUsageFiles(claudePaths, { | ||
| since: options?.since, | ||
| until: options?.until, | ||
| }); | ||
|
|
||
| if (filesWithBase.length === 0) { | ||
| return []; | ||
|
|
@@ -1345,15 +1395,16 @@ export async function loadSessionBlockData(options?: LoadOptions): Promise<Sessi | |
| // Get all Claude paths or use the specific one from options | ||
| const claudePaths = toArray(options?.claudePath ?? getClaudePaths()); | ||
|
|
||
| // Collect files from all paths | ||
| // Collect files from all paths, pruning by mtime when since is set | ||
| const allFiles: string[] = []; | ||
| for (const claudePath of claudePaths) { | ||
| const claudeDir = path.join(claudePath, CLAUDE_PROJECTS_DIR_NAME); | ||
| const files = await glob([USAGE_DATA_GLOB_PATTERN], { | ||
| cwd: claudeDir, | ||
| absolute: true, | ||
| }); | ||
| allFiles.push(...files); | ||
| const filtered = await filterFilesByMtime(files, options?.since); | ||
| allFiles.push(...filtered); | ||
| } | ||
|
|
||
| if (allFiles.length === 0) { | ||
|
|
@@ -4691,6 +4742,31 @@ if (import.meta.vitest != null) { | |
| expect(results).toHaveLength(3); | ||
| expect(results.every((r) => r.baseDir.includes(path.join('path1', 'projects')))).toBe(true); | ||
| }); | ||
|
|
||
| it('skips files whose mtime is more than one day before since', async () => { | ||
| const { utimes } = await import('node:fs/promises'); | ||
|
|
||
| await using fixture = await createFixture({ | ||
| 'base/projects/proj/old/usage.jsonl': 'old-data', | ||
| 'base/projects/proj/new/usage.jsonl': 'new-data', | ||
| }); | ||
|
|
||
| const oldFile = fixture.getPath('base/projects/proj/old/usage.jsonl'); | ||
| const newFile = fixture.getPath('base/projects/proj/new/usage.jsonl'); | ||
|
|
||
| // Set old file mtime to 5 days before since, new file mtime to 1 day before since. | ||
| const since = new Date('2026-03-05'); | ||
| const oldMtime = new Date(since.getTime() - 5 * 24 * 60 * 60 * 1000); | ||
| const newMtime = new Date(since.getTime() - 12 * 60 * 60 * 1000); // 12h before (within buffer) | ||
| await utimes(oldFile, oldMtime, oldMtime); | ||
| await utimes(newFile, newMtime, newMtime); | ||
|
|
||
| const results = await globUsageFiles([fixture.getPath('base')], { since: '20260305' }); | ||
|
|
||
| // Old file (5d before since) should be pruned; new file (12h before, within 1d buffer) kept. | ||
| expect(results).toHaveLength(1); | ||
| expect(results[0]!.file).toContain('new'); | ||
| }); | ||
|
Comment on lines
+4746
to
+4769
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
rg -nP 'await\s+import\s*\(' apps/ccusage/src -g '*.ts'Repository: ryoppippi/ccusage Length of output: 339 🏁 Script executed: head -50 apps/ccusage/src/data-loader.tsRepository: ryoppippi/ccusage Length of output: 1751 🏁 Script executed: sed -n '1575,1585p' apps/ccusage/src/data-loader.tsRepository: ryoppippi/ccusage Length of output: 295 🏁 Script executed: sed -n '4773,4783p' apps/ccusage/src/data-loader.tsRepository: ryoppippi/ccusage Length of output: 568 Replace all dynamic imports in this file with static imports. This file uses Suggested fixesLine 4747 — add -import { readFile, stat } from 'node:fs/promises';
+import { readFile, stat, utimes } from 'node:fs/promises';Lines 1580 and 4778 — remove the dynamic imports since describe('loadSessionUsageById', async () => {
- const { createFixture } = await import('fs-fixture'); describe('calculateContextTokens', async () => {
- const { createFixture } = await import('fs-fixture');🤖 Prompt for AI Agents |
||
| }); | ||
|
|
||
| // Test for calculateContextTokens | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| import type { TokenUsageDelta, TokenUsageEvent } from './_types.ts'; | ||
| import { readFile, stat } from 'node:fs/promises'; | ||
| import { readdir, readFile, stat } from 'node:fs/promises'; | ||
| import path from 'node:path'; | ||
| import process from 'node:process'; | ||
| import { Result } from '@praha/byethrow'; | ||
|
|
@@ -177,14 +177,87 @@ function asNonEmptyString(value: unknown): string | undefined { | |
|
|
||
| export type LoadOptions = { | ||
| sessionDirs?: string[]; | ||
| since?: string; | ||
| until?: string; | ||
| }; | ||
|
|
||
| /** | ||
| * List session JSONL files, skipping date directories outside [since, until]. | ||
| * | ||
| * Codex stores sessions as `YYYY/MM/DD/*.jsonl`. When a date range is provided | ||
| * we enumerate the directory tree and prune entire year/month/day subtrees that | ||
| * cannot contain matching sessions, avoiding the cost of a full recursive glob | ||
| * over potentially large historical archives. | ||
| */ | ||
| async function listSessionFiles( | ||
| sessionsDir: string, | ||
| since: string | undefined, | ||
| until: string | undefined, | ||
| ): Promise<string[]> { | ||
| if (since == null && until == null) { | ||
| return glob(SESSION_GLOB, { cwd: sessionsDir, absolute: true }); | ||
| } | ||
|
|
||
| const sinceKey = since?.replaceAll('-', ''); | ||
| const untilKey = until?.replaceAll('-', ''); | ||
|
|
||
|
Comment on lines
+192
to
+203
|
||
| const tryReaddir = async (dir: string): Promise<string[]> => { | ||
| const result = await Result.try({ | ||
| try: readdir(dir), | ||
| catch: (error) => error, | ||
| }); | ||
| return Result.isFailure(result) ? [] : result.value; | ||
| }; | ||
|
|
||
| // Preserve support for legacy flat layouts: include any *.jsonl files | ||
| // stored directly under sessionsDir (not inside YYYY/MM/DD subdirs). | ||
| const rootFiles = await glob('*.jsonl', { cwd: sessionsDir, absolute: true }).catch(() => []); | ||
| const files: string[] = [...rootFiles]; | ||
|
|
||
| for (const year of (await tryReaddir(sessionsDir)).filter((e) => /^\d{4}$/.test(e))) { | ||
| if (sinceKey != null && `${year}1231` < sinceKey) { | ||
| continue; | ||
| } | ||
| if (untilKey != null && `${year}0101` > untilKey) { | ||
|
Comment on lines
+217
to
+221
|
||
| continue; | ||
| } | ||
|
|
||
| const yearDir = path.join(sessionsDir, year); | ||
| for (const month of (await tryReaddir(yearDir)).filter((e) => /^\d{2}$/.test(e))) { | ||
| if (sinceKey != null && `${year + month}31` < sinceKey) { | ||
| continue; | ||
| } | ||
| if (untilKey != null && `${year + month}01` > untilKey) { | ||
| continue; | ||
| } | ||
|
|
||
| const monthDir = path.join(yearDir, month); | ||
| for (const day of (await tryReaddir(monthDir)).filter((e) => /^\d{2}$/.test(e))) { | ||
| const dateKey = year + month + day; | ||
| if (sinceKey != null && dateKey < sinceKey) { | ||
| continue; | ||
| } | ||
| if (untilKey != null && dateKey > untilKey) { | ||
| continue; | ||
| } | ||
|
|
||
| const dayDir = path.join(monthDir, day); | ||
| const dayFiles = await glob('*.jsonl', { cwd: dayDir, absolute: true }).catch(() => []); | ||
| files.push(...dayFiles); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return files; | ||
| } | ||
|
|
||
| export type LoadResult = { | ||
| events: TokenUsageEvent[]; | ||
| missingDirectories: string[]; | ||
| }; | ||
|
|
||
| export async function loadTokenUsageEvents(options: LoadOptions = {}): Promise<LoadResult> { | ||
| const { since, until } = options; | ||
| const providedDirs = | ||
| options.sessionDirs != null && options.sessionDirs.length > 0 | ||
| ? options.sessionDirs.map((dir) => path.resolve(dir)) | ||
|
|
@@ -216,10 +289,7 @@ export async function loadTokenUsageEvents(options: LoadOptions = {}): Promise<L | |
| continue; | ||
| } | ||
|
|
||
| const files = await glob(SESSION_GLOB, { | ||
| cwd: directoryPath, | ||
| absolute: true, | ||
| }); | ||
| const files = await listSessionFiles(directoryPath, since, until); | ||
|
|
||
| for (const file of files) { | ||
| const relativeSessionPath = path.relative(directoryPath, file); | ||
|
|
@@ -453,6 +523,91 @@ if (import.meta.vitest != null) { | |
| expect(second.cachedInputTokens).toBe(100); | ||
| }); | ||
|
|
||
| it('skips date directories outside the since/until range', async () => { | ||
| const makeEvent = (timestamp: string, input_tokens: number) => | ||
| JSON.stringify({ | ||
| timestamp, | ||
| type: 'event_msg', | ||
| payload: { | ||
| type: 'token_count', | ||
| info: { | ||
| last_token_usage: { | ||
| input_tokens, | ||
| cached_input_tokens: 0, | ||
| output_tokens: 100, | ||
| reasoning_output_tokens: 0, | ||
| total_tokens: input_tokens + 100, | ||
| }, | ||
| model: 'gpt-5', | ||
| }, | ||
| }, | ||
| }); | ||
|
|
||
| // Fixture mirrors real Codex layout: YYYY/MM/DD/*.jsonl | ||
| await using fixture = await createFixture({ | ||
| '2025': { | ||
| '12': { | ||
| '31': { 'old.jsonl': makeEvent('2025-12-31T12:00:00.000Z', 999) }, | ||
| }, | ||
| }, | ||
| '2026': { | ||
| '03': { | ||
| '01': { 'new.jsonl': makeEvent('2026-03-01T12:00:00.000Z', 1_000) }, | ||
| }, | ||
| }, | ||
| }); | ||
|
|
||
| // With since=2026-03-01 the 2025/12/31 file should be skipped entirely. | ||
| const { events } = await loadTokenUsageEvents({ | ||
| sessionDirs: [fixture.getPath('.')], | ||
| since: '2026-03-01', | ||
| }); | ||
|
|
||
| expect(events).toHaveLength(1); | ||
| expect(events[0]!.inputTokens).toBe(1_000); | ||
| }); | ||
|
|
||
| it('includes root-level *.jsonl files when date filters are active', async () => { | ||
| const makeEvent = (timestamp: string, input_tokens: number) => | ||
| JSON.stringify({ | ||
| timestamp, | ||
| type: 'event_msg', | ||
| payload: { | ||
| type: 'token_count', | ||
| info: { | ||
| last_token_usage: { | ||
| input_tokens, | ||
| cached_input_tokens: 0, | ||
| output_tokens: 100, | ||
| reasoning_output_tokens: 0, | ||
| total_tokens: input_tokens + 100, | ||
| }, | ||
| model: 'gpt-5', | ||
| }, | ||
| }, | ||
| }); | ||
|
|
||
| // Mix of flat root-level file (legacy layout) and dated subdir file. | ||
| await using fixture = await createFixture({ | ||
| 'flat.jsonl': makeEvent('2026-03-05T10:00:00.000Z', 500), | ||
| '2026': { | ||
| '03': { | ||
| '05': { 'dated.jsonl': makeEvent('2026-03-05T11:00:00.000Z', 1_000) }, | ||
| }, | ||
| }, | ||
| }); | ||
|
|
||
| // With since set, both the flat file and the dated file should be returned. | ||
| const { events } = await loadTokenUsageEvents({ | ||
| sessionDirs: [fixture.getPath('.')], | ||
| since: '2026-03-01', | ||
| }); | ||
|
|
||
| expect(events).toHaveLength(2); | ||
| const tokens = events.map((e) => e.inputTokens).sort((a, b) => a - b); | ||
| expect(tokens).toEqual([500, 1_000]); | ||
| }); | ||
|
|
||
| it('falls back to legacy model when metadata is missing entirely', async () => { | ||
| await using fixture = await createFixture({ | ||
| sessions: { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 1146
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 2575
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 3260
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 123
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 2900
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 2537
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 3360
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 4901
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 2174
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 1329
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 1184
🏁 Script executed:
Repository: ryoppippi/ccusage
Length of output: 1540
Remove dynamic import of
node:fs/promisesand addutimesto the top-level import.Line 4747 uses
await import('node:fs/promises')to getutimes, which violates the no-dynamic-imports constraint forapps/ccusage/**/*.ts. SincereadFileandstatare already imported fromnode:fs/promisesat line 16, addutimesto that import statement instead.Then replace the dynamic import in the test:
it('skips files whose mtime is more than one day before since', async () => { - const { utimes } = await import('node:fs/promises'); - await using fixture = await createFixture({🤖 Prompt for AI Agents