Skip to content

Commit 9ff4fc7

Browse files
authored
Merge pull request #205 from PredicateSystems/fix_ext2
Honor successful search submission verification
2 parents 2890832 + 299e99d commit 9ff4fc7

4 files changed

Lines changed: 407 additions & 5 deletions

File tree

src/agents/planner-executor/planner-executor-agent.ts

Lines changed: 197 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,7 @@ export class PlannerExecutorAgent {
892892
}
893893
}
894894

895+
plannerAction = this.promoteVisibleResultClick(task, ctx, plannerAction);
895896
this.composableHeuristics.setStepHints(plannerAction.heuristicHints || []);
896897
this.emitPlannerAction(stepNum, plannerAction, plannerActionSource);
897898

@@ -1182,6 +1183,20 @@ export class PlannerExecutorAgent {
11821183
};
11831184
}
11841185

1186+
if (this.isCopiedPlaceholderNavigation(plannerAction.target, currentUrl, task)) {
1187+
return {
1188+
stepId: stepNum,
1189+
goal: stepGoal,
1190+
status: StepStatus.SKIPPED,
1191+
actionTaken: 'SKIPPED(placeholder_navigation)',
1192+
verificationPassed: true,
1193+
usedVision: false,
1194+
durationMs: Date.now() - stepStart,
1195+
urlBefore: currentUrl,
1196+
urlAfter: currentUrl,
1197+
};
1198+
}
1199+
11851200
try {
11861201
await runtime.goto(plannerAction.target);
11871202
const verificationPassed = await this.verifyStepOutcome(runtime, plannerAction);
@@ -1414,10 +1429,29 @@ export class PlannerExecutorAgent {
14141429
const elementId = parsed.args[0] as number;
14151430

14161431
if (parsed.action === 'CLICK') {
1432+
const targetElement =
1433+
activeCtx.snapshot?.elements.find(element => element.id === elementId) || null;
14171434
await runtime.click(elementId);
14181435
await this.handlePostClickEffects(runtime, plannerAction, activeCtx);
1419-
const verificationPassed = await this.verifyStepOutcome(runtime, plannerAction);
14201436
const urlAfter = await runtime.getCurrentUrl();
1437+
const hasUrlVerification = (plannerAction.verify || []).some(
1438+
predicate =>
1439+
predicate.predicate === 'url_contains' ||
1440+
predicate.predicate === 'url_equals' ||
1441+
predicate.predicate === 'url_matches'
1442+
);
1443+
const relevantUrlChange = isUrlChangeRelevantToIntent(
1444+
currentUrl,
1445+
urlAfter,
1446+
plannerAction,
1447+
targetElement
1448+
);
1449+
const navigationSatisfied =
1450+
relevantUrlChange &&
1451+
(!hasUrlVerification ||
1452+
this.clickedHrefMatchesNavigation(currentUrl, urlAfter, targetElement));
1453+
const verificationPassed =
1454+
navigationSatisfied || (await this.verifyStepOutcome(runtime, plannerAction));
14211455
return {
14221456
stepId: stepNum,
14231457
goal: plannerAction.intent || 'Click element',
@@ -1437,6 +1471,7 @@ export class PlannerExecutorAgent {
14371471
const elements = activeCtx.snapshot?.elements || [];
14381472
const inputElement = elements.find(element => element.id === elementId) || null;
14391473
const isSearchLike = isSearchLikeTypeAndSubmit(plannerAction, inputElement);
1474+
let submissionSatisfied = false;
14401475

14411476
// Submit with Enter key for TYPE_AND_SUBMIT, plus planner TYPE actions that clearly target search.
14421477
if (
@@ -1448,7 +1483,6 @@ export class PlannerExecutorAgent {
14481483
const hasRetryBudget = this.config.retry.executorRepairAttempts > 0;
14491484

14501485
let changedUrl: string | null = null;
1451-
let submissionSatisfied = false;
14521486

14531487
const checkSubmissionSatisfied = async (): Promise<boolean> => {
14541488
if (
@@ -1535,7 +1569,8 @@ export class PlannerExecutorAgent {
15351569
}
15361570
}
15371571

1538-
const verificationPassed = await this.verifyStepOutcome(runtime, plannerAction);
1572+
const verificationPassed =
1573+
submissionSatisfied || (await this.verifyStepOutcome(runtime, plannerAction));
15391574
const urlAfter = await runtime.getCurrentUrl();
15401575

15411576
return {
@@ -2055,6 +2090,127 @@ export class PlannerExecutorAgent {
20552090
};
20562091
}
20572092

2093+
private isCopiedPlaceholderNavigation(
2094+
targetUrl: string,
2095+
currentUrl: string,
2096+
task: string
2097+
): boolean {
2098+
if (!this.isExampleDotComUrl(targetUrl)) {
2099+
return false;
2100+
}
2101+
2102+
if (this.isExampleDotComUrl(currentUrl) || /\bexample\.com\b/i.test(task)) {
2103+
return false;
2104+
}
2105+
2106+
return true;
2107+
}
2108+
2109+
private isExampleDotComUrl(url: string): boolean {
2110+
try {
2111+
const hostname = new URL(url).hostname.toLowerCase();
2112+
return hostname === 'example.com' || hostname.endsWith('.example.com');
2113+
} catch {
2114+
return false;
2115+
}
2116+
}
2117+
2118+
private promoteVisibleResultClick(
2119+
task: string,
2120+
ctx: SnapshotContext,
2121+
plannerAction: StepwisePlannerResponse
2122+
): StepwisePlannerResponse {
2123+
if (plannerAction.action !== 'SCROLL' && plannerAction.action !== 'WAIT') {
2124+
return plannerAction;
2125+
}
2126+
2127+
const candidate = this.findVisibleResultLink(task, ctx.snapshot);
2128+
if (!candidate) {
2129+
return plannerAction;
2130+
}
2131+
2132+
const label = this.elementLabel(candidate);
2133+
const hrefVerify = this.hrefVerificationSignal(candidate.href || '', ctx.snapshot?.url || '');
2134+
2135+
return {
2136+
...plannerAction,
2137+
action: 'CLICK',
2138+
goal: plannerAction.goal || 'Open visible result link',
2139+
intent: 'visible result link',
2140+
input: label || plannerAction.input,
2141+
verify: hrefVerify ? [{ predicate: 'url_contains', args: [hrefVerify] }] : [],
2142+
heuristicHints: [
2143+
{
2144+
intent_pattern: 'visible_result_link',
2145+
text_patterns: label ? [label] : [],
2146+
role_filter: ['link'],
2147+
priority: 20,
2148+
},
2149+
],
2150+
reasoning:
2151+
plannerAction.reasoning ||
2152+
'Visible result link matched the task goal; clicking it is more direct than scrolling.',
2153+
};
2154+
}
2155+
2156+
private findVisibleResultLink(
2157+
task: string,
2158+
snapshot: Snapshot | null | undefined
2159+
): SnapshotElement | null {
2160+
const elements = snapshot?.elements || [];
2161+
if (elements.length === 0 || !this.taskWantsResultNavigation(task)) {
2162+
return null;
2163+
}
2164+
2165+
const candidates = elements
2166+
.filter(element => this.isResultNavigationLink(element))
2167+
.sort((left, right) => (right.importance || 0) - (left.importance || 0));
2168+
2169+
return candidates[0] || null;
2170+
}
2171+
2172+
private taskWantsResultNavigation(task: string): boolean {
2173+
const normalized = task.toLowerCase().replace(/[_-]+/g, ' ').replace(/\s+/g, ' ');
2174+
const actionCue = /\b(click|open|pick|choose|select|go to|visit)\b/.test(normalized);
2175+
const targetCue = /\b(product|result|item|listing|detail page|details page)\b/.test(normalized);
2176+
return actionCue && targetCue;
2177+
}
2178+
2179+
private isResultNavigationLink(element: SnapshotElement): boolean {
2180+
const role = (element.role || '').toLowerCase();
2181+
const href = (element.href || '').toLowerCase();
2182+
if (role !== 'link' || !href) {
2183+
return false;
2184+
}
2185+
2186+
if (/\/(?:dp|gp\/product|product|products|item|items|p)\//.test(href)) {
2187+
return true;
2188+
}
2189+
2190+
const label = this.elementLabel(element);
2191+
return Boolean(element.inDominantGroup && label.length >= 15);
2192+
}
2193+
2194+
private elementLabel(element: SnapshotElement): string {
2195+
return (element.text || element.ariaLabel || element.name || '').trim();
2196+
}
2197+
2198+
private hrefVerificationSignal(href: string, baseUrl: string): string | null {
2199+
if (!href.trim()) {
2200+
return null;
2201+
}
2202+
2203+
try {
2204+
const parsed = new URL(href, baseUrl || undefined);
2205+
if (parsed.pathname && parsed.pathname !== '/') {
2206+
return parsed.pathname;
2207+
}
2208+
return parsed.hostname || null;
2209+
} catch {
2210+
return href.startsWith('/') ? href : null;
2211+
}
2212+
}
2213+
20582214
private summarizePlannerActionTarget(plannerAction: StepwisePlannerResponse): string | null {
20592215
if (plannerAction.action === 'TYPE' || plannerAction.action === 'TYPE_AND_SUBMIT') {
20602216
return plannerAction.input || plannerAction.intent || plannerAction.target || null;
@@ -2287,6 +2443,44 @@ export class PlannerExecutorAgent {
22872443
return false;
22882444
}
22892445

2446+
private clickedHrefMatchesNavigation(
2447+
previousUrl: string,
2448+
nextUrl: string,
2449+
element: SnapshotElement | null
2450+
): boolean {
2451+
const href = element?.href?.trim();
2452+
if (!href) {
2453+
return false;
2454+
}
2455+
2456+
try {
2457+
const expected = new URL(href, previousUrl);
2458+
const actual = new URL(nextUrl, previousUrl);
2459+
if (
2460+
!['http:', 'https:'].includes(expected.protocol) ||
2461+
!['http:', 'https:'].includes(actual.protocol)
2462+
) {
2463+
return false;
2464+
}
2465+
2466+
expected.hash = '';
2467+
actual.hash = '';
2468+
expected.search = '';
2469+
actual.search = '';
2470+
2471+
return (
2472+
this.normalizeNavigationUrl(expected.toString()) ===
2473+
this.normalizeNavigationUrl(actual.toString())
2474+
);
2475+
} catch {
2476+
return false;
2477+
}
2478+
}
2479+
2480+
private normalizeNavigationUrl(url: string): string {
2481+
return url.trim().replace(/\/+$/, '').toLowerCase();
2482+
}
2483+
22902484
private async isCartAdditionTerminal(
22912485
runtime: AgentRuntime,
22922486
task: string,

src/agents/planner-executor/prompts.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ CRITICAL RULE FOR ADD TO CART:
7979
- Set "input" to "Add to Cart" (or exact button text from elements)
8080
8181
Output ONLY valid JSON (no markdown, no \`\`\`):
82-
{"action":"NAVIGATE","target":"https://example.com/search","verify":[{"predicate":"url_contains","args":["search"]}],"reasoning":"open the search page"}
82+
{"action":"NAVIGATE","target":"https://shop.test/search","verify":[{"predicate":"url_contains","args":["search"]}],"reasoning":"open the known search page"}
8383
{"action":"TYPE_AND_SUBMIT","intent":"searchbox","input":"wireless headphones","verify":[{"predicate":"url_contains","args":["search"]}],"reasoning":"search for product"}
8484
{"action":"CLICK","intent":"product link","input":"Sony WH-1000XM4 Wireless...","verify":[],"required":true,"heuristic_hints":[{"intent_pattern":"product_link","text_patterns":["sony wh-1000xm4"],"role_filter":["link"],"priority":8}],"reasoning":"click first product result"}
8585
{"action":"CLICK","intent":"add to cart button","input":"Add to Cart","verify":[],"required":true,"heuristic_hints":[{"intent_pattern":"add_to_cart","text_patterns":["add to cart","buy now"],"role_filter":["button"],"priority":10}],"reasoning":"add item to cart"}
@@ -95,7 +95,8 @@ RULES:
9595
7. "heuristic_hints" entries may use snake_case fields: "intent_pattern", "text_patterns", "role_filter", "attribute_patterns", "priority"
9696
8. Output ONLY JSON - no <think> tags, no markdown, no prose
9797
9. Do NOT output <think> or any reasoning
98-
10. Do NOT return DONE until ALL parts of the goal are complete`;
98+
10. Do NOT return DONE until ALL parts of the goal are complete
99+
11. Never copy example URLs from these instructions. Only NAVIGATE to a URL from the user's task, the current page, or a visible element.`;
99100

100101
// NOTE: /no_think MUST be at the START of user message for Qwen3 models
101102
const user = `/no_think

0 commit comments

Comments
 (0)