Skip to content

Commit d1abe40

Browse files
author
SentienceDEV
committed
feat: add LLM 429 rate-limit retry with exponential backoff
- Add llmRetryBackoffMs (5000ms default) and llmMaxRetries (3 default) to RetryConfig - Add callPlannerWithRetry() method that wraps planner.generate() with retry logic - Exponential backoff with Groq-specific retry-after hint extraction - All config presets use spread from DEFAULT_CONFIG.retry for forward compatibility - All 67 tests pass, type-check and build clean
1 parent 8d96d17 commit d1abe40

3 files changed

Lines changed: 61 additions & 7 deletions

File tree

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/agents/planner-executor/config.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ export interface RetryConfig {
7272
executorRepairAttempts: number;
7373
/** Maximum replan attempts (default: 2) */
7474
maxReplans: number;
75+
/** Base delay in ms for LLM 429 rate-limit retries (default: 5000) */
76+
llmRetryBackoffMs: number;
77+
/** Maximum retries for LLM 429 errors before giving up (default: 3) */
78+
llmMaxRetries: number;
7579
}
7680

7781
/**
@@ -153,6 +157,8 @@ export const DEFAULT_CONFIG: PlannerExecutorConfig = {
153157
verifyMaxAttempts: 4,
154158
executorRepairAttempts: 2,
155159
maxReplans: 2,
160+
llmRetryBackoffMs: 5000,
161+
llmMaxRetries: 3,
156162
},
157163
stepwise: {
158164
maxSteps: 20,
@@ -221,8 +227,8 @@ export function getConfigPreset(preset: ConfigPreset | string): PlannerExecutorC
221227
limitMax: 400, // Higher max for complex pages (was 200)
222228
},
223229
retry: {
230+
...DEFAULT_CONFIG.retry,
224231
verifyTimeoutMs: 15000,
225-
verifyPollMs: 500,
226232
verifyMaxAttempts: 6,
227233
executorRepairAttempts: 3,
228234
maxReplans: 2,
@@ -246,8 +252,8 @@ export function getConfigPreset(preset: ConfigPreset | string): PlannerExecutorC
246252
return {
247253
...DEFAULT_CONFIG,
248254
retry: {
255+
...DEFAULT_CONFIG.retry,
249256
verifyTimeoutMs: 10000,
250-
verifyPollMs: 500,
251257
verifyMaxAttempts: 4,
252258
executorRepairAttempts: 2,
253259
maxReplans: 2,
@@ -265,8 +271,8 @@ export function getConfigPreset(preset: ConfigPreset | string): PlannerExecutorC
265271
return {
266272
...DEFAULT_CONFIG,
267273
retry: {
274+
...DEFAULT_CONFIG.retry,
268275
verifyTimeoutMs: 5000,
269-
verifyPollMs: 500,
270276
verifyMaxAttempts: 2,
271277
executorRepairAttempts: 1,
272278
maxReplans: 1,
@@ -285,8 +291,8 @@ export function getConfigPreset(preset: ConfigPreset | string): PlannerExecutorC
285291
return {
286292
...DEFAULT_CONFIG,
287293
retry: {
294+
...DEFAULT_CONFIG.retry,
288295
verifyTimeoutMs: 20000,
289-
verifyPollMs: 500,
290296
verifyMaxAttempts: 8,
291297
executorRepairAttempts: 3,
292298
maxReplans: 3,

src/agents/planner-executor/planner-executor-agent.ts

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ export class PlannerExecutorAgent {
831831

832832
let plannerResp: LLMResponse | null = null;
833833
try {
834-
plannerResp = await this.planner.generate(systemPrompt, userPrompt, {
834+
plannerResp = await this.callPlannerWithRetry(systemPrompt, userPrompt, {
835835
temperature: this.config.plannerTemperature,
836836
max_tokens: this.config.plannerMaxTokens,
837837
});
@@ -1214,6 +1214,54 @@ export class PlannerExecutorAgent {
12141214
};
12151215
}
12161216

1217+
/**
1218+
* Call the planner LLM with retry and exponential backoff for 429 rate-limit errors.
1219+
*
1220+
* Without this, a single 429 burns an entire step. With retry, we wait for the
1221+
* rate limit to reset (using the `retry-after` hint when available) and try again.
1222+
*/
1223+
private async callPlannerWithRetry(
1224+
systemPrompt: string,
1225+
userPrompt: string,
1226+
options: { temperature: number; max_tokens: number }
1227+
): Promise<LLMResponse> {
1228+
const { llmRetryBackoffMs, llmMaxRetries } = this.config.retry;
1229+
1230+
for (let attempt = 0; attempt <= llmMaxRetries; attempt++) {
1231+
try {
1232+
return await this.planner.generate(systemPrompt, userPrompt, options);
1233+
} catch (err) {
1234+
const msg = err instanceof Error ? err.message : String(err);
1235+
const is429 =
1236+
msg.includes('429') ||
1237+
msg.toLowerCase().includes('rate_limit') ||
1238+
msg.toLowerCase().includes('rate limit');
1239+
1240+
if (!is429 || attempt >= llmMaxRetries) {
1241+
throw err;
1242+
}
1243+
1244+
// Try to extract retry-after from error message (Groq includes "try again in X.XXs")
1245+
const retryMatch = msg.match(/try again in ([\d.]+)\s*s/i);
1246+
let delayMs = llmRetryBackoffMs * Math.pow(2, attempt);
1247+
if (retryMatch) {
1248+
delayMs = Math.max(delayMs, Math.ceil(parseFloat(retryMatch[1]) * 1000) + 500);
1249+
}
1250+
1251+
if (this.config.verbose) {
1252+
console.log(
1253+
`[PLANNER RETRY] 429 rate limit hit (attempt ${attempt + 1}/${llmMaxRetries}), waiting ${Math.round(delayMs / 1000)}s...`
1254+
);
1255+
}
1256+
1257+
await new Promise(resolve => setTimeout(resolve, delayMs));
1258+
}
1259+
}
1260+
1261+
// Should never reach here, but TypeScript needs it
1262+
throw new Error('Planner retry exhausted');
1263+
}
1264+
12171265
private shouldAbortOnPlannerFailure(message: string): boolean {
12181266
const normalized = message.toLowerCase();
12191267
return (

0 commit comments

Comments
 (0)