From f7a2048f7b99266de52e699b75f8291f1be855d0 Mon Sep 17 00:00:00 2001 From: Chen <99816898+donteatfriedrice@users.noreply.github.com> Date: Wed, 15 Apr 2026 11:35:24 +0800 Subject: [PATCH] fix: implement RFC 6265 domain matching in Chrome cookies capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation built a `LIKE '%.${host}'` pattern, which only matched cookie host_keys that end in the full request hostname. Cookies set with an explicit parent Domain attribute (e.g. `.reddit.com`) were missed when the request URL used a subdomain (`https://www.reddit.com`), because `.reddit.com` does not end in `.www.reddit.com`. Replace with proper RFC 6265 §5.1.3 matching: enumerate all valid host_key values for a given request host (self host-only, self with leading dot, each parent domain with leading dot — stopping before bare TLDs) and query with `IN (...)`. Verified live against Chrome cookie DB: - https://reddit.com → 10 cookies, reddit_session present - https://www.reddit.com → 14 cookies, reddit_session present (includes extra host-only cookies on www.reddit.com, correctly excluded from the apex query) Co-Authored-By: Claude Opus 4.6 (1M context) --- .../capabilities/cookies-chrome.test.ts | 55 ++++++++++++++++++- .../connectors/capabilities/cookies-chrome.ts | 38 +++++++++++-- 2 files changed, 86 insertions(+), 7 deletions(-) diff --git a/packages/core/src/connectors/capabilities/cookies-chrome.test.ts b/packages/core/src/connectors/capabilities/cookies-chrome.test.ts index cb5c111..78e7163 100644 --- a/packages/core/src/connectors/capabilities/cookies-chrome.test.ts +++ b/packages/core/src/connectors/capabilities/cookies-chrome.test.ts @@ -1,7 +1,60 @@ import { describe, it, expect } from 'vitest' -import { makeChromeCookiesCapability } from './cookies-chrome.js' +import { makeChromeCookiesCapability, getMatchingHostKeys } from './cookies-chrome.js' import { SyncError, SyncErrorCode } from '@spool/connector-sdk' +describe('getMatchingHostKeys', () => { + it('matches host-only and same-host domain cookies', () => { + expect(getMatchingHostKeys('reddit.com')).toEqual([ + 'reddit.com', + '.reddit.com', + ]) + }) + + it('matches parent domain cookies for subdomain requests', () => { + expect(getMatchingHostKeys('www.reddit.com')).toEqual([ + 'www.reddit.com', + '.www.reddit.com', + '.reddit.com', + ]) + }) + + it('walks all parent labels for deep subdomains', () => { + expect(getMatchingHostKeys('a.b.example.co.uk')).toEqual([ + 'a.b.example.co.uk', + '.a.b.example.co.uk', + '.b.example.co.uk', + '.example.co.uk', + '.co.uk', + ]) + }) + + it('does not walk into a bare TLD', () => { + const keys = getMatchingHostKeys('reddit.com') + expect(keys).not.toContain('.com') + expect(keys).not.toContain('com') + }) + + it('lower-cases the input host', () => { + expect(getMatchingHostKeys('WWW.Reddit.COM')).toEqual([ + 'www.reddit.com', + '.www.reddit.com', + '.reddit.com', + ]) + }) + + it('strips a leading dot from the input', () => { + expect(getMatchingHostKeys('.reddit.com')).toEqual([ + 'reddit.com', + '.reddit.com', + ]) + }) + + it('returns empty for single-label or empty hosts', () => { + expect(getMatchingHostKeys('localhost')).toEqual([]) + expect(getMatchingHostKeys('')).toEqual([]) + }) +}) + describe('makeChromeCookiesCapability', () => { it('returns a capability with a get method', () => { const cap = makeChromeCookiesCapability() diff --git a/packages/core/src/connectors/capabilities/cookies-chrome.ts b/packages/core/src/connectors/capabilities/cookies-chrome.ts index d3dc612..f7e6b1b 100644 --- a/packages/core/src/connectors/capabilities/cookies-chrome.ts +++ b/packages/core/src/connectors/capabilities/cookies-chrome.ts @@ -123,9 +123,33 @@ interface RawCookieFull { is_httponly: string } -function queryAllCookiesForDomain( +/** + * Enumerate every Chrome `host_key` value that should match a request to `host` + * per RFC 6265 §5.1.3. Chrome stores host-only cookies under the bare hostname + * and domain cookies under `.parent.example.com`; a request to `www.example.com` + * must see cookies at `www.example.com`, `.www.example.com`, and `.example.com` + * but not anything scoped to a sibling (`.other.example.com`) or a TLD alone. + */ +export function getMatchingHostKeys(host: string): string[] { + const normalized = host.toLowerCase().replace(/^\./, '') + if (!normalized || !normalized.includes('.')) return [] + + const keys = [normalized, `.${normalized}`] + let cur = normalized + while (true) { + const idx = cur.indexOf('.') + if (idx < 0) break + const parent = cur.substring(idx + 1) + if (!parent.includes('.')) break + keys.push(`.${parent}`) + cur = parent + } + return keys +} + +function queryAllCookiesForHost( dbPath: string, - domain: string, + host: string, ): { cookies: RawCookieFull[]; dbVersion: number } { if (!existsSync(dbPath)) { throw new SyncError( @@ -134,9 +158,12 @@ function queryAllCookiesForDomain( ) } - const safeDomain = domain.replace(/'/g, "''") + const keys = getMatchingHostKeys(host) + if (keys.length === 0) return { cookies: [], dbVersion: 0 } + + const quoted = keys.map(k => `'${k.replace(/'/g, "''")}'`).join(',') // Fetch cookies and DB version in one sqlite3 invocation to avoid double process spawn - const sql = `SELECT name, host_key, path, hex(encrypted_value) as encrypted_value_hex, value, expires_utc, is_secure, is_httponly, (SELECT value FROM meta WHERE key='version') as db_version FROM cookies WHERE host_key LIKE '%${safeDomain}';` + const sql = `SELECT name, host_key, path, hex(encrypted_value) as encrypted_value_hex, value, expires_utc, is_secure, is_httponly, (SELECT value FROM meta WHERE key='version') as db_version FROM cookies WHERE host_key IN (${quoted});` const output = runSqliteQuery(dbPath, sql) @@ -190,8 +217,7 @@ export function makeChromeCookiesCapability(): CookiesCapability { const key = getMacOSChromeKey() const host = domainFromUrl(query.url) - const dotHost = host.startsWith('.') ? host : `.${host}` - const result = queryAllCookiesForDomain(dbPath, dotHost) + const result = queryAllCookiesForHost(dbPath, host) const cookies: Cookie[] = [] for (const raw of result.cookies) {