icebear0828 · icebear0828 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,8 @@
 
 ### Fixed
 
+- 账号"已限速"状态与上游 cachedQuota 双真理漂移彻底修复（free 用户尤甚，本地锁可长达一周不释放）：proxy 此前同时维护 `entry.usage.rate_limit_until` + `entry.status === "rate_limited"`（来自 429 retry-after 的本地锁）和 `entry.cachedQuota.<bucket>.limit_reached/reset_at`（来自上游 rate_limits header 被动收集）两套独立信号。`refresh-scheduler.ts:196/236/291` 和 `services/account-mutation.ts:59` 的 `markStatus(_, "active")` 调用会把 status 从 "rate_limited" 翻回 "active" 但不动 `rate_limit_until`，导致后者成为孤儿字段——dashboard 显示"已限速 / 5h 0% 已使用"自相矛盾，且对 free 账号"7d 主窗口"语义下，孤儿 lock 可比上游真实重置时间晚出整整一个周期。修复：(1) 删除 `markRateLimited` / `clearRateLimit` / `markQuotaExhausted` 三个旧方法，新增 `applyRateLimit429(entryId, { retryAfterSec?, resetsAtSec?, countRequest? })`，把 429 retry-after 直接写到 `cachedQuota.rate_limit.{limit_reached=true, reset_at}`，永不缩短已有 reset_at（下一次 passive header 采集会修正 bucket），不再 mutate `entry.status`；(2) `AccountStatus` 枚举去掉 `"rate_limited"`（只剩 `active / expired / quota_exhausted / refreshing / disabled / banned`，纯轮转状态机）；(3) 重写 `proxy-error-handler.ts:91` / `proxy-handler.ts:545` 走新方法；(4) `refreshStatus` 删掉 rate-limit 清理分支（`resetExpiredQuotaWindow` 已 cover 窗口到期自动清 `limit_reached`）；(5) `isAuthenticated` / `getPoolSummary` 加 `hasReachedCachedQuota` 检查，避免全 quota 耗尽时误报 authenticated；(6) `accounts.json` 一次性 migration：`migrateLegacyRateLimit` 在 `loadPersisted` 里把 `status="rate_limited" + rate_limit_until` 老条目转成 `status="active" + 合成 cachedQuota.rate_limit`（仅当本地 lock 比 cachedQuota 新鲜时才覆盖），下一次 persist 自动丢字段；(7) `web/src/lib/accountStatus.ts` 新增 `derivedStatus(account)`，dashboard `AccountCard` / `AccountList` 都按 cachedQuota → "rate_limited" 派生 badge，"已达上限"账号现在如实显示而不是错标"活跃"；新增 `tests/unit/auth/account-pool-rate-limit-429.test.ts`（9 个）+ `tests/unit/auth/account-persistence-migration.test.ts`（7 个），改写约 20 处旧 `markRateLimited` / `markQuotaExhausted` 断言用 `applyRateLimit429` + `isQuotaExhausted(account.quota)`，full suite 1927 全绿（`src/auth/account-registry.ts`、`src/auth/account-pool.ts`、`src/auth/account-persistence.ts`、`src/auth/types.ts`、`src/auth/quota-skip.ts`、`src/routes/shared/proxy-error-handler.ts`、`src/routes/shared/proxy-handler.ts`、`web/src/lib/accountStatus.ts`、`web/src/components/AccountCard.tsx`、`web/src/components/AccountTable.tsx`、`web/src/components/AccountList.tsx`、`web/src/pages/AccountManagement.tsx`）
+- `src/routes/shared/proxy-handler.ts` 对 `EmptyResponseError` 与"上游 reasoning 中途断流"两类错误未区分，前者会 cross-account retry（吃 3 个号），后者重试也一定会再次撞同样的上游 120s 硬上限——一次"上游 reasoning 没在 ~120s 内 emit 任何 output_text"会消耗整池 360s 的号。新增 `UpstreamPrematureCloseError`（`src/translation/codex-event-extractor.ts`），`collectCodexResponse`（`src/translation/codex-to-openai.ts`）检测到 stream 无 `response.completed`/`response.failed`/`error` 任一 terminal 事件时抛新类型，proxy-handler 接住直接 504 fail-fast 不跨号重试。背景：调查中观察到上游 chatgpt.com 在 HTTP/1.1 chunked encoding 的 chunk 之间硬切 TCP，不发 0-length 终止 chunk 也不发 SSE `[DONE]`（hyper 透出 `error decoding response body: unexpected EOF during chunk size line`），触发条件为模型在 `effort=xhigh` 下推理超过 ~120s 仍未开始 output——属于上游 backend 行为不是 proxy bug；proxy 现在做的是不让该故障吃光账号池（`src/translation/codex-event-extractor.ts`、`src/translation/codex-to-openai.ts`、`src/routes/shared/proxy-handler.ts`、`tests/unit/translation/codex-to-openai.test.ts`、`tests/integration/proxy-handler.test.ts`）
 - `src/utils/debug-dump.ts` 把 dump 文件路径硬编码 `/tmp/codex-proxy-dump-*.jsonl`，Windows 没有 `/tmp` 目录，`fs.appendFileSync` ENOENT 直接被外层 try/catch 吞掉——Windows 用户即使设了 `CODEX_PROXY_DEBUG_DUMP=1` 也拿不到任何 dump 输出，且没有任何 warning。改用 `os.tmpdir()` 解析（macOS `/var/folders/.../T`、Linux `/tmp`、Windows `C:\Users\<u>\AppData\Local\Temp`），跨平台一致。新增 `tests/unit/utils/debug-dump.test.ts` 把"dump path must live under os.tmpdir()"freeze 成显式断言，防止以后再有人为了"看着短"重新硬编码 `/tmp`
 - v2.0.73 用户在 8080 端口被占时弹 `Uncaught Exception: Error: listen EADDRINUSE: address already in use 127.0.0.1:8080`：根因是 `@hono/node-server.serve()` 同步返回 Server 对象但 `listen()` 是异步，`startServer()` 在 socket 真正 bind 之前就 resolve 了，main.ts 那个`try { await startServer(...) } catch { startServer({ port: 0 }) }` 的随机端口 fallback 永远不触发——EADDRINUSE 在 `await` 之外异步抛，逃出 catch 范围变成 uncaughtException 弹给 Electron 用户。同一个 race 在 #486 smoke 健康探测里以另一种形态出现过（grep 命中 "Server started" 后 curl 立刻 connect refused），当时只在外层 retry 上吸收了，没动产品代码；这次把根因修在 src 层。新增 `src/utils/await-listening.ts` 暴露 `awaitServerListening(server)`，监听 `listening` / `error` 二选一并自清 listener；`startServer()` 在 `serve()` 后插一行 `await awaitServerListening(server)` 把 bind 错误真正变成 startServer 的拒绝。配套 `tests/unit/utils/await-listening.test.ts` 5 个单测覆盖 listening / error / 已 listening / 双向不泄漏 listener。修复后 main.ts 的随机端口 fallback 真正生效，8080 被占时会自动换一个端口（`src/utils/await-listening.ts`、`src/index.ts`、`tests/unit/utils/await-listening.test.ts`）
 - Electron auto-updater 真正尊重 `autoUpdate` 选项：`packages/electron/electron/auto-updater.ts` 里 `const isAutoUpdate = options.autoUpdate ?? true` 这个变量声明了但**从来没被使用过**——后续的 `setTimeout(initial check, 30s)` 与 `setInterval(periodic check, 4h)` 直接无条件运行，于是用户即便配了 `autoUpdate: false` 也照样后台 ping 上游 latest release 检查更新。把这两个定时器包到 `if (isAutoUpdate)` 内才让开关真的生效。补 `packages/electron/__tests__/auto-updater.test.ts` 两个 case：(1) `autoUpdate: false` 时 advance fake timer 不会触发任何 `checkForUpdates`；(2) `allowPrerelease: true` 真的写到 `mockAutoUpdater.allowPrerelease`

diff --git a/src/auth/account-persistence.ts b/src/auth/account-persistence.ts
@@ -18,7 +18,77 @@ import {
   extractUserProfile,
   isTokenExpired,
 } from "./jwt-utils.js";
-import type { AccountEntry, AccountsFile } from "./types.js";
+import type { AccountEntry, AccountsFile, CodexQuota } from "./types.js";
+
+/**
+ * Migrate a legacy entry to the new schema:
+ *   status === "rate_limited" + usage.rate_limit_until  →  status="active" +
+ *   cachedQuota.rate_limit.{limit_reached, reset_at}.
+ *
+ * Trust rule: if cachedQuota was fetched AFTER rate_limit_until was last set
+ * (quotaFetchedAt > rate_limit_until), we treat cachedQuota as ground truth
+ * and just drop the local lock. Otherwise we synthesize/overwrite the primary
+ * bucket from rate_limit_until.
+ *
+ * Returns true when the entry was mutated.
+ */
+export function migrateLegacyRateLimit(entry: AccountEntry): boolean {
+  const usage = entry.usage;
+  const legacyUntil = usage.rate_limit_until;
+  // On-disk shape pre-dates the enum narrowing; cast through string to compare
+  // against the retired "rate_limited" literal without tripping TS no-overlap.
+  const wasRateLimitedStatus = (entry.status as string) === "rate_limited";
+  if (!wasRateLimitedStatus && !legacyUntil) return false;
+
+  let mutated = false;
+
+  if (wasRateLimitedStatus) {
+    entry.status = "active";
+    mutated = true;
+  }
+
+  if (legacyUntil) {
+    const untilMs = Date.parse(legacyUntil);
+    const untilSec = Number.isFinite(untilMs) ? Math.floor(untilMs / 1000) : 0;
+    const inFuture = Number.isFinite(untilMs) && untilMs > Date.now();
+
+    const fetchedMs = entry.quotaFetchedAt ? Date.parse(entry.quotaFetchedAt) : NaN;
+    const cachedQuotaIsFresh =
+      entry.cachedQuota != null &&
+      Number.isFinite(fetchedMs) &&
+      Number.isFinite(untilMs) &&
+      fetchedMs > untilMs;
+
+    if (inFuture && !cachedQuotaIsFresh) {
+      const synthesized: CodexQuota = entry.cachedQuota ?? {
+        plan_type: entry.planType ?? "unknown",
+        rate_limit: {
+          allowed: false,
+          limit_reached: true,
+          used_percent: 100,
+          reset_at: untilSec,
+          limit_window_seconds: usage.limit_window_seconds ?? null,
+        },
+        secondary_rate_limit: null,
+        code_review_rate_limit: null,
+      };
+      synthesized.rate_limit = {
+        ...synthesized.rate_limit,
+        allowed: false,
+        limit_reached: true,
+        used_percent: Math.max(synthesized.rate_limit.used_percent ?? 0, 100),
+        reset_at: untilSec,
+      };
+      entry.cachedQuota = synthesized;
+      entry.quotaFetchedAt = new Date().toISOString();
+    }
+
+    usage.rate_limit_until = null;
+    mutated = true;
+  }
+
+  return mutated;
+}
 
 export interface AccountPersistence {
   load(): { entries: AccountEntry[]; needsPersist: boolean };
@@ -212,6 +282,10 @@ function loadPersisted(): { entries: AccountEntry[]; needsPersist: boolean } {
         entry.quotaFetchedAt = null;
         needsPersist = true;
       }
+      // Migrate legacy rate_limit_until + status="rate_limited" → cachedQuota
+      if (migrateLegacyRateLimit(entry)) {
+        needsPersist = true;
+      }
       entries.push(entry);
     }
 

diff --git a/src/auth/account-pool.ts b/src/auth/account-pool.ts
@@ -164,28 +164,23 @@ export class AccountPool {
     }
   }
 
-  markRateLimited(
+  /**
+   * Single source of truth for "this account just got 429'd". Writes the
+   * retry-after hint into cachedQuota.rate_limit (primary bucket); pool
+   * exclusion flows through {@link hasReachedCachedQuota}. See
+   * AccountRegistry.applyRateLimit429 for full semantics including
+   * never-shrink-existing-reset_at and bucket-inference fallback.
+   */
+  applyRateLimit429(
     entryId: string,
-    options?: { retryAfterSec?: number; countRequest?: boolean },
+    options?: { retryAfterSec?: number; resetsAtSec?: number; countRequest?: boolean },
   ): void {
-    if (this.registry.markRateLimited(entryId, this.rateLimitBackoffSeconds, options)) {
+    if (this.registry.applyRateLimit429(entryId, this.rateLimitBackoffSeconds, options)) {
       this.lifecycle.clearLock(entryId);
       this.evictWsPool(entryId);
     }
   }
 
-  clearRateLimit(entryId: string): void {
-    if (this.registry.clearRateLimit(entryId)) {
-      this.lifecycle.clearLock(entryId);
-    }
-  }
-
-  markQuotaExhausted(entryId: string, resetAtUnix: number | null): void {
-    if (this.registry.markQuotaExhausted(entryId, resetAtUnix)) {
-      this.lifecycle.clearLock(entryId);
-    }
-  }
-
   // ── Quota / usage ─────────────────────────────────────────────────
 
   recordEmptyResponse(entryId: string): void {

diff --git a/src/auth/account-registry.ts b/src/auth/account-registry.ts
@@ -106,7 +106,6 @@ export class AccountRegistry {
         cached_tokens: 0,
         empty_response_count: 0,
         last_used: null,
-        rate_limit_until: null,
         window_request_count: 0,
         window_input_tokens: 0,
         window_output_tokens: 0,
@@ -186,19 +185,62 @@ export class AccountRegistry {
   }
 
   /** Returns true if the entry was found and mutated. */
-  markRateLimited(
+  /**
+   * Handle an upstream 429 by writing into cachedQuota.rate_limit (primary
+   * bucket) as the single source of truth. 429 body carries no bucket marker;
+   * the next passive header collection on a successful response will overwrite
+   * with ground truth (which may upgrade this to secondary if needed).
+   *
+   * - Synthesizes a minimal cachedQuota if none exists yet (new account).
+   * - Never shrinks an existing reset_at — if cachedQuota already says we are
+   *   limited further in the future (e.g. weekly bucket), keep that.
+   * - Does NOT mutate `entry.status`; pool exclusion happens via
+   *   {@link hasReachedCachedQuota}.
+   *
+   * Returns true if the entry was found.
+   */
+  applyRateLimit429(
     entryId: string,
     backoffSeconds: number,
-    options?: { retryAfterSec?: number; countRequest?: boolean },
+    options?: { retryAfterSec?: number; resetsAtSec?: number; countRequest?: boolean },
   ): boolean {
     const entry = this.accounts.get(entryId);
     if (!entry) return false;
 
-    const backoff = jitter(options?.retryAfterSec ?? backoffSeconds, 0.2);
-    const until = new Date(Date.now() + backoff * 1000);
+    const nowSec = Date.now() / 1000;
+    const explicit = options?.resetsAtSec;
+    const fromRetry = options?.retryAfterSec != null
+      ? nowSec + jitter(options.retryAfterSec, 0.2)
+      : null;
+    const newResetAt = explicit ?? fromRetry ?? (nowSec + jitter(backoffSeconds, 0.2));
+
+    const quota: CodexQuota = entry.cachedQuota ?? {
+      plan_type: entry.planType ?? "unknown",
+      rate_limit: {
+        allowed: false,
+        limit_reached: true,
+        used_percent: 100,
+        reset_at: newResetAt,
+        limit_window_seconds: entry.usage.limit_window_seconds ?? null,
+      },
+      secondary_rate_limit: null,
+      code_review_rate_limit: null,
+    };
 
-    entry.status = "rate_limited";
-    entry.usage.rate_limit_until = until.toISOString();
+    const existingResetAt = quota.rate_limit.reset_at;
+    const finalResetAt = existingResetAt != null && existingResetAt > newResetAt
+      ? existingResetAt
+      : newResetAt;
+
+    quota.rate_limit = {
+      ...quota.rate_limit,
+      allowed: false,
+      limit_reached: true,
+      used_percent: Math.max(quota.rate_limit.used_percent ?? 0, 100),
+      reset_at: finalResetAt,
+    };
+    entry.cachedQuota = quota;
+    entry.quotaFetchedAt = new Date().toISOString();
 
     if (options?.countRequest) {
       entry.usage.request_count++;
@@ -210,38 +252,6 @@ export class AccountRegistry {
     return true;
   }
 
-  /** Returns true if the entry was found and mutated. */
-  clearRateLimit(entryId: string): boolean {
-    const entry = this.accounts.get(entryId);
-    if (!entry) return false;
-    entry.status = "active";
-    entry.usage.rate_limit_until = null;
-    this.schedulePersist();
-    return true;
-  }
-
-  /** Returns true if the entry was found and actually changed. */
-  markQuotaExhausted(entryId: string, resetAtUnix: number | null): boolean {
-    const entry = this.accounts.get(entryId);
-    if (!entry) return false;
-    if (entry.status === "disabled" || entry.status === "expired" || entry.status === "banned" || entry.status === "refreshing") return false;
-
-    const until = resetAtUnix
-      ? new Date(resetAtUnix * 1000).toISOString()
-      : new Date(Date.now() + 300_000).toISOString();
-
-    if (entry.status === "rate_limited" && entry.usage.rate_limit_until) {
-      const existing = new Date(entry.usage.rate_limit_until).getTime();
-      const proposed = new Date(until).getTime();
-      if (proposed <= existing) return false;
-    }
-
-    entry.status = "rate_limited";
-    entry.usage.rate_limit_until = until;
-    this.schedulePersist();
-    return true;
-  }
-
   // ── Query ─────────────────────────────────────────────────────────
 
   getAccounts(): AccountInfo[] {
@@ -268,7 +278,11 @@ export class AccountRegistry {
     const now = new Date();
     for (const entry of this.accounts.values()) {
       this.refreshStatus(entry, now);
-      if (entry.status === "active") return true;
+      // "Authenticated" used to imply "has a usable account". After retiring
+      // status="rate_limited", we treat any cachedQuota-exhausted account as
+      // unusable too — otherwise an all-exhausted pool would falsely report
+      // authenticated and produce confusing 4xx on requests.
+      if (entry.status === "active" && !hasReachedCachedQuota(entry)) return true;
     }
     return false;
   }
@@ -325,6 +339,8 @@ export class AccountRegistry {
     active: number;
     expired: number;
     quota_exhausted: number;
+    /** Count of accounts whose cachedQuota reports any bucket limit_reached.
+     *  Derived from cachedQuota, NOT from a "rate_limited" status (retired). */
     rate_limited: number;
     refreshing: number;
     disabled: number;
@@ -334,11 +350,14 @@ export class AccountRegistry {
     let active = 0, expired = 0, quota_exhausted = 0, rate_limited = 0, refreshing = 0, disabled = 0, banned = 0;
     for (const entry of this.accounts.values()) {
       this.refreshStatus(entry, now);
+      if (entry.status === "active" && hasReachedCachedQuota(entry)) {
+        rate_limited++;
+        continue;
+      }
       switch (entry.status) {
         case "active": active++; break;
         case "expired": expired++; break;
         case "quota_exhausted": quota_exhausted++; break;
-        case "rate_limited": rate_limited++; break;
         case "refreshing": refreshing++; break;
         case "disabled": disabled++; break;
         case "banned": banned++; break;
@@ -459,7 +478,6 @@ export class AccountRegistry {
       cached_tokens: 0,
       empty_response_count: 0,
       last_used: null,
-      rate_limit_until: null,
       window_reset_at: entry.usage.window_reset_at ?? null,
       window_request_count: 0,
       window_input_tokens: 0,
@@ -475,13 +493,6 @@ export class AccountRegistry {
   // ── Internal ──────────────────────────────────────────────────────
 
   refreshStatus(entry: AccountEntry, now: Date): void {
-    if (entry.status === "rate_limited" && entry.usage.rate_limit_until) {
-      if (now >= new Date(entry.usage.rate_limit_until)) {
-        entry.status = "active";
-        entry.usage.rate_limit_until = null;
-      }
-    }
-
     if (entry.status === "active" && isTokenExpired(entry.token)) {
       entry.status = "expired";
     }

diff --git a/src/auth/quota-skip.ts b/src/auth/quota-skip.ts
@@ -1,7 +1,13 @@
-import type { AccountEntry } from "./types.js";
+import type { AccountEntry, CodexQuota } from "./types.js";
+
+/** True when any of the 3 cachedQuota buckets reports limit_reached. */
+export function isQuotaExhausted(quota: CodexQuota | null | undefined): boolean {
+  if (!quota) return false;
+  return quota.rate_limit.limit_reached === true ||
+    quota.secondary_rate_limit?.limit_reached === true ||
+    quota.code_review_rate_limit?.limit_reached === true;
+}
 
 export function hasReachedCachedQuota(entry: AccountEntry): boolean {
-  return entry.cachedQuota?.rate_limit.limit_reached === true ||
-    entry.cachedQuota?.secondary_rate_limit?.limit_reached === true ||
-    entry.cachedQuota?.code_review_rate_limit?.limit_reached === true;
+  return isQuotaExhausted(entry.cachedQuota);
 }
diff --git a/src/auth/types.ts b/src/auth/types.ts
@@ -6,7 +6,6 @@ export type AccountStatus =
   | "active"
   | "expired"
   | "quota_exhausted"
-  | "rate_limited"
   | "refreshing"
   | "disabled"
   | "banned";
@@ -27,7 +26,12 @@ export interface AccountUsage {
   image_request_failed_count?: number;
   empty_response_count: number;
   last_used: string | null;
-  rate_limit_until: string | null;
+  /**
+   * Legacy local-lock field, retired. Reads survive on disk to support
+   * in-place migration; new code MUST consult cachedQuota.*.limit_reached
+   * instead. Removed from runtime mutation by `migrateLegacyRateLimit`.
+   */
+  rate_limit_until?: string | null;
   /** Tracks the current rate limit window end (Unix seconds). When window rolls over, counters reset. */
   window_reset_at?: number | null;
   /** Per-window request count (resets when window expires). */