Skip to content

Commit e46ae72

Browse files
authored
Merge pull request #218 from PredicateSystems/tweak_count
Distinguish noun 'count' from verb 'count' to fix extraction task rou…
2 parents 7a51750 + 0f6da44 commit e46ae72

3 files changed

Lines changed: 140 additions & 26 deletions

File tree

src/agents/planner-executor/extraction-keywords.ts

Lines changed: 125 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,13 @@ export function isTextExtractionTask(task: string): boolean {
230230
// Form-fill negative signal: if the task clearly involves filling a form,
231231
// it's not extraction even if it contains extraction-like keywords
232232
// (e.g., "Display name", "email" are field labels, not extraction targets)
233-
if (FORM_FILL_SIGNALS.some(signal => taskLower.includes(signal))) {
233+
const hasFormFillSignal = FORM_FILL_SIGNALS.some(signal => {
234+
if (signal.endsWith(' ')) {
235+
return taskLower.includes(signal);
236+
}
237+
return new RegExp(`\\b${escapeRegExp(signal)}\\b`).test(taskLower);
238+
});
239+
if (hasFormFillSignal) {
234240
return false;
235241
}
236242

@@ -321,7 +327,10 @@ export function isExtractionTask(task: string): boolean {
321327
taskLower.includes('content of') ||
322328
taskLower.includes('headline of') ||
323329
taskLower.includes('rating of') ||
324-
taskLower.includes('review of')
330+
taskLower.includes('review of') ||
331+
taskLower.includes('summarize') ||
332+
taskLower.includes('note the') ||
333+
taskLower.includes('list the')
325334
);
326335
}
327336

@@ -332,17 +341,34 @@ export function isExtractionTask(task: string): boolean {
332341
* is detected as an extraction task, instructing the planner to
333342
* use EXTRACT instead of CLICK for data that is already visible.
334343
*/
335-
export function getExtractionDomainGuidance(): string {
344+
export function getExtractionDomainGuidance(includeCounting = false): string {
345+
const countingSection = includeCounting
346+
? `
347+
348+
STEP 3 - COUNTING ACROSS FULL PAGE:
349+
If the task asks to COUNT items (e.g., "how many listings", "number of results", "count the products", "total number of entries"):
350+
- Use SCROLL_AND_COUNT instead of EXTRACT
351+
- Set "countTarget" to describe what to count (e.g., "listings", "products", "articles")
352+
- The system will scroll through the entire page and sum up counts
353+
- Do NOT use EXTRACT for counting tasks — EXTRACT only sees the current viewport
354+
355+
Example - count all listings:
356+
Goal: "note how many listings are available"
357+
Current URL: alibaba.com/search?SearchText=smartphones (correct page)
358+
{"action":"SCROLL_AND_COUNT","countTarget":"product listings","goal":"Count total product listings","verify":[]}
359+
`
360+
: '';
361+
336362
return `
337363
338364
IMPORTANT: Extraction Task Planning Rules
339-
========================================
365+
=======================================
340366
341367
STEP 1 - CHECK CURRENT URL:
342368
Before choosing an action, compare the Current URL to the goal.
343369
- Does the current page contain the data requested?
344370
- If the goal mentions a specific section/page (e.g., "show hn", "top stories", "/show"), check if the URL matches.
345-
- If you are NOT on the right page, NAVIGATE to the correct URL first.
371+
- If you are NOT on the right page, NAVIGATE or CLICK to navigate to the correct page first.
346372
347373
STEP 2 - EXTRACT VISIBLE DATA:
348374
If the data is VISIBLE in the page context:
@@ -352,7 +378,7 @@ If the data is VISIBLE in the page context:
352378
CRITICAL: Do NOT click on links to external sites when extracting.
353379
- Post/article titles often link to EXTERNAL sites
354380
- To extract a title that is visible, use EXTRACT directly on the current page
355-
- Only click if you need to navigate to a detail page (e.g., for comments)
381+
- Only click if you need to navigate to a detail page to access the data (e.g., nutritional info on a recipe page)
356382
357383
Example - wrong page, need to navigate first:
358384
Goal: "extract the title of the first showhn post on hackernews show"
@@ -369,18 +395,12 @@ Goal: "find the price of the first laptop"
369395
Current URL: store.com/laptops (correct page, prices visible)
370396
{"action":"EXTRACT","target":"price of first laptop","goal":"Extract the price of the first laptop listing","verify":[],"reasoning":"prices are visible in listing elements"}
371397
372-
STEP 3 - COUNTING ACROSS FULL PAGE:
373-
If the task asks to COUNT items ("how many", "number of", "count", "total"):
374-
- Use SCROLL_AND_COUNT instead of EXTRACT
375-
- Set "countTarget" to describe what to count (e.g., "listings", "products", "articles")
376-
- The system will scroll through the entire page and sum up counts
377-
- Do NOT use EXTRACT for counting tasks — EXTRACT only sees the current viewport
378-
379-
Example - count all listings:
380-
Goal: "note how many listings are available"
381-
Current URL: alibaba.com/search?SearchText=smartphones (correct page)
382-
{"action":"SCROLL_AND_COUNT","countTarget":"product listings","goal":"Count total product listings","verify":[]}
383-
`;
398+
Example - data on a detail page, need to click first:
399+
Goal: "summarize the calorie count from a recipe's nutritional information"
400+
Current URL: allrecipes.com/search?q=cookies (search results, not a recipe page)
401+
{"action":"CLICK","intent":"first recipe link","input":"Best Chocolate Chip Cookies","verify":[],"reasoning":"need to navigate to recipe detail page for nutritional info"}
402+
${countingSection}
403+
`;
384404
}
385405

386406
// ---------------------------------------------------------------------------
@@ -392,22 +412,83 @@ const COUNTING_PHRASES: readonly string[] = [
392412
'how much',
393413
'number of',
394414
'count the',
395-
'count of',
415+
'count all',
416+
'count each',
417+
'count every',
396418
'total number',
397-
'total count',
419+
'total count of',
398420
'how numerous',
399421
];
400422

401-
const COUNTING_VERBS: readonly string[] = ['count', 'tally', 'enumerate'];
423+
// NOTE: "count of" is intentionally excluded from COUNTING_PHRASES because
424+
// it is ambiguous: "count of items" (verb phrase) vs "word count of the
425+
// article" (noun compound). The "number of" phrase covers the counting
426+
// semantics of "count of". Bare "count" is handled by Tier 3 below.
427+
428+
const COUNTING_VERBS: readonly string[] = ['tally', 'enumerate'];
429+
430+
// Words that can syntactically precede "count" when it is used as a VERB
431+
// (imperative, infinitive, or after an auxiliary/modal). This set is finite
432+
// and well-defined in English grammar. Any word NOT in this set, appearing
433+
// immediately before "count", indicates a noun compound like "calorie count",
434+
// "word count", "error count" — regardless of what the modifier noun is.
435+
const COUNT_VERB_PRECEDERS = new Set([
436+
// Infinitive marker
437+
'to',
438+
// Modals and auxiliaries
439+
'can',
440+
'could',
441+
'will',
442+
'would',
443+
'shall',
444+
'should',
445+
'must',
446+
'may',
447+
'might',
448+
'do',
449+
'did',
450+
'does',
451+
'have',
452+
'has',
453+
'had',
454+
// Polite / adverbial markers
455+
'please',
456+
'just',
457+
'also',
458+
'even',
459+
'still',
460+
'not',
461+
'never',
462+
'only',
463+
// Conjunctions that continue an action sequence
464+
'and',
465+
'or',
466+
'but',
467+
'then',
468+
// Verbs that take infinitive complements
469+
'going',
470+
'try',
471+
'want',
472+
'need',
473+
'help',
474+
'let',
475+
'plan',
476+
'attempt',
477+
'aim',
478+
'start',
479+
'begin',
480+
]);
402481

403482
export function isCountingTask(task: string): boolean {
404483
if (!task) return false;
405484
const taskLower = task.toLowerCase();
406485

486+
// Tier 1: Unambiguous counting phrases (substring match)
407487
if (COUNTING_PHRASES.some(phrase => taskLower.includes(phrase))) {
408488
return true;
409489
}
410490

491+
// Tier 2: Unambiguous counting verbs (word boundary match)
411492
if (
412493
COUNTING_VERBS.some(verb =>
413494
new RegExp(`\\b${escapeRegExp(verb)}(s|ed|ing)?\\b`).test(taskLower)
@@ -416,6 +497,29 @@ export function isCountingTask(task: string): boolean {
416497
return true;
417498
}
418499

500+
// Tier 3: Context-aware "count" — distinguish verb from noun compound.
501+
//
502+
// In English, "[noun] count" is a compound noun meaning "the count of
503+
// [noun]" (e.g., "calorie count", "word count", "page count", "error
504+
// count"). As a VERB, "count" appears in imperative position (start of
505+
// clause) or after auxiliaries/modals/infinitive markers.
506+
//
507+
// Strategy: find every "count" preceded by a word. If ALL preceding words
508+
// are NOT in COUNT_VERB_PRECEDERS, every occurrence is a noun compound and
509+
// this is NOT a counting task. If any occurrence has no preceding word
510+
// (imperative) or is preceded by a verb preceder, it IS a counting task.
511+
if (/\bcount(s|ed|ing)?\b/i.test(taskLower)) {
512+
const precedingMatches = [...taskLower.matchAll(/\b([a-z]+)\s+count(s|ed|ing)?\b/g)];
513+
if (precedingMatches.length > 0) {
514+
const allAreNounCompounds = precedingMatches.every(m => !COUNT_VERB_PRECEDERS.has(m[1]));
515+
if (allAreNounCompounds) {
516+
return false;
517+
}
518+
}
519+
// "count" with no preceding word (imperative) or preceded by a verb marker
520+
return true;
521+
}
522+
419523
return false;
420524
}
421525

src/agents/planner-executor/planner-executor-agent.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,7 +1320,9 @@ export class PlannerExecutorAgent {
13201320
isExtractAction &&
13211321
finalOutcome.status === StepStatus.SUCCESS &&
13221322
finalOutcome.extractedData &&
1323-
(isTextExtractionTask(task) || isCountingTask(task)) &&
1323+
(plannerAction.action === 'SCROLL_AND_COUNT'
1324+
? isCountingTask(task)
1325+
: isTextExtractionTask(task) || isCountingTask(task)) &&
13241326
(!taskHasInteractionLocal || hasNonExtractActionLocal)
13251327
) {
13261328
if (this.config.verbose) {
@@ -1847,7 +1849,9 @@ export class PlannerExecutorAgent {
18471849
isRetryExtractOrCount &&
18481850
finalOutcome.status === StepStatus.SUCCESS &&
18491851
finalOutcome.extractedData &&
1850-
(isTextExtractionTask(task) || isCountingTask(task)) &&
1852+
(plannerAction.action === 'SCROLL_AND_COUNT'
1853+
? isCountingTask(task)
1854+
: isTextExtractionTask(task) || isCountingTask(task)) &&
18511855
(!taskHasInteraction || hasNonExtractAction)
18521856
) {
18531857
success = true;

src/agents/planner-executor/prompts.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,11 @@
66
*/
77

88
import type { ActionRecord } from './plan-models';
9-
import { isExtractionTask, getExtractionDomainGuidance } from './extraction-keywords';
9+
import {
10+
isExtractionTask,
11+
isCountingTask,
12+
getExtractionDomainGuidance,
13+
} from './extraction-keywords';
1014

1115
// ---------------------------------------------------------------------------
1216
// Stepwise Planner Prompt (ReAct-style)
@@ -63,7 +67,7 @@ Actions:
6367
- TYPE: Type text into a SINGLE form field. Prefer FILL_FORM for forms with multiple fields.
6468
- TYPE_AND_SUBMIT: Type text into a search box and submit. Set "input" to the SEARCH QUERY from the goal (NOT the element label).
6569
- SCROLL: Scroll page. Set "direction" to "up" or "down".
66-
- SCROLL_AND_COUNT: Scroll through the ENTIRE page and count items. Use when the task asks "how many", "number of", "count", or "total". Set "countTarget" to describe what to count (e.g., "listings", "products", "articles"). The system scrolls viewport-by-viewport, counts matching items at each position, and sums the total.
70+
- SCROLL_AND_COUNT: Scroll through the ENTIRE page and count items. Use ONLY when the task asks to enumerate items (e.g., "how many listings", "number of results", "count the products"). Do NOT use when "count" is a data value to read (e.g., "calorie count", "word count" = use EXTRACT instead). Set "countTarget" to describe what to count.
6771
- WAIT: Wait for content to appear when a follow-up verification is needed.
6872
- EXTRACT: Extract the requested information from the current page when the task is data collection.
6973
- STUCK: Use only when the page state is blocked and you cannot make safe forward progress.
@@ -124,7 +128,9 @@ RULES:
124128
14. Treat history results "success", "skipped", and "vision_fallback" as already satisfied. Do not repeat those steps; choose the next incomplete part of the goal.`;
125129

126130
// Inject extraction-specific guidance when the goal is an extraction task
127-
const extractionGuidance = isExtractionTask(goal) ? getExtractionDomainGuidance() : '';
131+
const extractionGuidance = isExtractionTask(goal)
132+
? getExtractionDomainGuidance(isCountingTask(goal))
133+
: '';
128134

129135
// NOTE: /no_think MUST be at the START of user message for Qwen3 models
130136
const user = `/no_think

0 commit comments

Comments
 (0)