diff --git a/src/ai/researcher.ts b/src/ai/researcher.ts
index 962f69f..25950a6 100644
--- a/src/ai/researcher.ts
+++ b/src/ai/researcher.ts
@@ -15,6 +15,7 @@ import { type HtmlDiffResult, htmlDiff } from '../utils/html-diff.ts';
import { codeToMarkdown, isBodyEmpty } from '../utils/html.ts';
import { createDebug, pluralize, tag } from '../utils/logger.js';
import { collectInteractiveNodes, diffAriaSnapshots } from '../utils/aria.ts';
+import { isErrorPage } from '../utils/error-page.ts';
import { loop } from '../utils/loop.ts';
import type { Agent } from './agent.js';
import type { Conversation } from './conversation.js';
@@ -108,6 +109,18 @@ export class Researcher implements Agent {
const isOnCurrentState = this.actionResult!.getStateHash() === this.stateManager.getCurrentState()?.hash;
await this.ensureNavigated(state.url, screenshot && this.provider.hasVision());
+ if (isErrorPage(this.actionResult!)) {
+ tag('warn').log(`Detected error page at ${state.url}`);
+ return dedent`
+ ## Error Page Detected
+
+ URL: ${state.url}
+ Title: ${this.actionResult!.title || 'N/A'}
+
+ Research skipped. Navigate to a valid page to continue.
+ `;
+ }
+
debugLog('Researching web page:', this.actionResult!.url);
this.hasScreenshotToAnalyze = screenshot && this.provider.hasVision() && isOnCurrentState;
@@ -393,6 +406,17 @@ export class Researcher implements Agent {
return dedent`
Analyze this web page and provide a comprehensive research report in markdown format.
+
+ IMPORTANT: First check if this looks like an error page (404, 500, access denied,
+ not found, server error, forbidden, or similar). If so, respond ONLY with:
+
+ ## Error Page Detected
+ Type: [error type]
+ Reason: [what indicates this is an error page]
+
+ Then stop - do not provide normal research output for error pages.
+
+
${this.buildResearchTaskPrompt()}
URL: ${this.actionResult.url || 'Unknown'}
diff --git a/src/utils/error-page.ts b/src/utils/error-page.ts
new file mode 100644
index 0000000..8b6bee2
--- /dev/null
+++ b/src/utils/error-page.ts
@@ -0,0 +1,23 @@
+import type { ActionResult } from '../action-result.js';
+import { isBodyEmpty } from './html.js';
+
+const HTTP_ERRORS = ['400 Bad Request', '401 Unauthorized', '403 Forbidden', '404 Not Found', '405 Method Not Allowed', '408 Request Timeout', '500 Internal Server Error', '502 Bad Gateway', '503 Service Unavailable', '504 Gateway Timeout'];
+
+const SMALL_PAGE_THRESHOLD = 500;
+
+export function isErrorPage(actionResult: ActionResult): boolean {
+ const checkFields = [actionResult.title, actionResult.h1, actionResult.h2].filter(Boolean) as string[];
+
+ for (const field of checkFields) {
+ for (const error of HTTP_ERRORS) {
+ if (field.toLowerCase().includes(error.toLowerCase())) return true;
+ }
+ }
+
+ if (!actionResult.html || isBodyEmpty(actionResult.html)) return true;
+
+ const bodyMatch = actionResult.html.match(/
]*>([\s\S]*?)<\/body>/i);
+ if (bodyMatch && bodyMatch[1].trim().length < SMALL_PAGE_THRESHOLD) return true;
+
+ return false;
+}
diff --git a/tests/unit/error-page.test.ts b/tests/unit/error-page.test.ts
new file mode 100644
index 0000000..bc67d1a
--- /dev/null
+++ b/tests/unit/error-page.test.ts
@@ -0,0 +1,159 @@
+import { describe, expect, it } from 'vitest';
+import { ActionResult } from '../../src/action-result.ts';
+import { isErrorPage } from '../../src/utils/error-page.ts';
+
+function createActionResult(data: { title?: string; h1?: string; h2?: string; html?: string; url?: string }): ActionResult {
+ const html = data.html ?? `${data.h1 ?? ''}
${data.h2 ?? ''}
`;
+ return new ActionResult({
+ url: data.url ?? '/test',
+ title: data.title ?? '',
+ html,
+ });
+}
+
+describe('isErrorPage', () => {
+ describe('HTTP error detection', () => {
+ it('should detect 400 Bad Request', () => {
+ expect(isErrorPage(createActionResult({ title: '400 Bad Request' }))).toBe(true);
+ });
+
+ it('should detect 401 Unauthorized', () => {
+ expect(isErrorPage(createActionResult({ title: '401 Unauthorized' }))).toBe(true);
+ });
+
+ it('should detect 403 Forbidden', () => {
+ expect(isErrorPage(createActionResult({ title: '403 Forbidden' }))).toBe(true);
+ });
+
+ it('should detect 404 Not Found', () => {
+ expect(isErrorPage(createActionResult({ title: '404 Not Found' }))).toBe(true);
+ });
+
+ it('should detect 404 Not Found in h1', () => {
+ expect(isErrorPage(createActionResult({ h1: '404 Not Found' }))).toBe(true);
+ });
+
+ it('should detect 404 Not Found in h2', () => {
+ expect(isErrorPage(createActionResult({ h2: '404 Not Found' }))).toBe(true);
+ });
+
+ it('should detect 500 Internal Server Error', () => {
+ expect(isErrorPage(createActionResult({ title: '500 Internal Server Error' }))).toBe(true);
+ });
+
+ it('should detect 502 Bad Gateway', () => {
+ expect(isErrorPage(createActionResult({ title: '502 Bad Gateway' }))).toBe(true);
+ });
+
+ it('should detect 503 Service Unavailable', () => {
+ expect(isErrorPage(createActionResult({ title: '503 Service Unavailable' }))).toBe(true);
+ });
+
+ it('should detect 504 Gateway Timeout', () => {
+ expect(isErrorPage(createActionResult({ title: '504 Gateway Timeout' }))).toBe(true);
+ });
+
+ it('should be case insensitive', () => {
+ expect(isErrorPage(createActionResult({ title: '404 NOT FOUND' }))).toBe(true);
+ expect(isErrorPage(createActionResult({ title: '500 internal server error' }))).toBe(true);
+ });
+
+ it('should detect error in longer title', () => {
+ expect(isErrorPage(createActionResult({ title: 'MyApp - 404 Not Found' }))).toBe(true);
+ });
+ });
+
+ describe('empty page detection', () => {
+ it('should detect empty html', () => {
+ expect(isErrorPage(createActionResult({ html: '' }))).toBe(true);
+ });
+
+ it('should detect empty body', () => {
+ expect(isErrorPage(createActionResult({ html: '' }))).toBe(true);
+ });
+
+ it('should detect body with only whitespace', () => {
+ expect(isErrorPage(createActionResult({ html: ' \n\t ' }))).toBe(true);
+ });
+
+ it('should detect very small page (< 500 chars)', () => {
+ const smallContent = 'x'.repeat(100);
+ expect(isErrorPage(createActionResult({ html: `${smallContent}` }))).toBe(true);
+ });
+
+ it('should NOT detect page with 500+ chars as empty', () => {
+ const content = 'x'.repeat(600);
+ expect(isErrorPage(createActionResult({ html: `${content}` }))).toBe(false);
+ });
+ });
+
+ describe('false positive prevention', () => {
+ it('should NOT detect "Room 404" as error page', () => {
+ const result = isErrorPage(
+ createActionResult({
+ h1: 'Room 404',
+ html: 'Room 404
' + 'x'.repeat(600) + '',
+ })
+ );
+ expect(result).toBe(false);
+ });
+
+ it('should NOT detect "Order #500" as error page', () => {
+ const result = isErrorPage(
+ createActionResult({
+ title: 'Order #500 - Details',
+ html: 'Order Details
' + 'x'.repeat(600) + '',
+ })
+ );
+ expect(result).toBe(false);
+ });
+
+ it('should NOT detect standalone 404 number', () => {
+ const result = isErrorPage(
+ createActionResult({
+ title: '404',
+ html: '' + 'x'.repeat(600) + '',
+ })
+ );
+ expect(result).toBe(false);
+ });
+
+ it('should NOT detect normal login page', () => {
+ const result = isErrorPage(
+ createActionResult({
+ title: 'Login',
+ h1: 'Sign In',
+ html: 'Sign In
',
+ })
+ );
+ expect(result).toBe(false);
+ });
+
+ it('should NOT detect normal dashboard page', () => {
+ const result = isErrorPage(
+ createActionResult({
+ title: 'Dashboard',
+ h1: 'Welcome Back',
+ html: 'Welcome Back
' + 'x'.repeat(600) + '',
+ })
+ );
+ expect(result).toBe(false);
+ });
+ });
+
+ describe('edge cases', () => {
+ it('should handle missing title, h1, h2', () => {
+ const result = isErrorPage(
+ createActionResult({
+ html: 'Content
' + 'x'.repeat(600) + '',
+ })
+ );
+ expect(result).toBe(false);
+ });
+
+ it('should handle null/undefined html gracefully', () => {
+ const actionResult = new ActionResult({ url: '/test', title: '' });
+ expect(isErrorPage(actionResult)).toBe(true);
+ });
+ });
+});