From 258a4cb52e14eec4d27acea233024e66f6652711 Mon Sep 17 00:00:00 2001 From: Vijay Yadav Date: Sat, 4 Apr 2026 23:49:26 -0700 Subject: [PATCH 1/2] fix: URL-encode special characters in connection string passwords MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #589 — stored passwords with special characters (`@`, `#`, `:`, `/`) cause cryptic authentication failures when used in URI-style connection strings because the userinfo section wasn't being percent-encoded. Root cause: - `sanitizeConnectionString()` existed in `normalize.ts` but was never wired into `normalizeConfig()` — connection strings with unencoded passwords went directly to drivers - The userinfo regex split on the *first* `@` instead of the *last*, so passwords containing `@` were silently mis-parsed and the function returned early thinking no encoding was needed Fix: - Wire `sanitizeConnectionString()` into `normalizeConfig()` as a single integration point so every URI-based driver benefits automatically - Fix the regex to use greedy matching for the userinfo portion and split on the last `@` before the host - Leave already-encoded passwords (`%XX` sequences) untouched for idempotency; decodeURIComponent wrapped in try/catch for safety Non-URI connection strings (Oracle TNS, key=value) and drivers using individual config fields (host/user/password) are unaffected — the fix only touches `connection_string`. Tests cover @/#/:/ in passwords, already-encoded passthrough, multiple URI schemes (postgresql://, mongodb://, mongodb+srv://), and the connectionString alias path. --- packages/drivers/src/index.ts | 2 +- packages/drivers/src/normalize.ts | 70 ++++++++++ .../test/altimate/driver-normalize.test.ts | 132 +++++++++++++++++- 3 files changed, 202 insertions(+), 2 deletions(-) diff --git a/packages/drivers/src/index.ts b/packages/drivers/src/index.ts index 8102d6e27..4854785c6 100644 --- a/packages/drivers/src/index.ts +++ b/packages/drivers/src/index.ts @@ -2,7 +2,7 @@ export type { Connector, ConnectorResult, SchemaColumn, ConnectionConfig } from "./types" // Re-export config normalization -export { normalizeConfig } from "./normalize" +export { normalizeConfig, sanitizeConnectionString } from "./normalize" // Re-export driver connect functions export { connect as connectPostgres } from "./postgres" diff --git a/packages/drivers/src/normalize.ts b/packages/drivers/src/normalize.ts index 4c43acd8b..e99a47a7a 100644 --- a/packages/drivers/src/normalize.ts +++ b/packages/drivers/src/normalize.ts @@ -111,6 +111,67 @@ const DRIVER_ALIASES: Record = { // duckdb and sqlite have simple configs — no aliases needed } +// --------------------------------------------------------------------------- +// Connection string password encoding +// --------------------------------------------------------------------------- + +/** + * URI-style connection strings (postgres://, mongodb://, etc.) embed + * credentials in the userinfo section: `scheme://user:password@host/db`. + * If the password contains special characters (@, #, :, /, ?, etc.) and + * they are NOT percent-encoded, drivers will mis-parse the URI and fail + * with cryptic auth errors. + * + * This function detects an unencoded password in the userinfo portion and + * re-encodes it so the connection string is valid. Already-encoded + * passwords (containing %XX sequences) are left untouched. + */ +export function sanitizeConnectionString(connectionString: string): string { + // Match scheme://userinfo@host... — we only touch the userinfo part. + // Schemes: postgresql, postgres, mongodb, mongodb+srv, clickhouse, http, https, redshift + // + // IMPORTANT: The password itself may contain '@' characters, so we must + // split on the LAST '@' before the host portion. The regex below uses a + // greedy (.+) for userinfo so it consumes everything up to the final '@'. + const uriMatch = connectionString.match( + /^([a-zA-Z][a-zA-Z0-9+.-]*:\/\/)(.+)@([^@]+)$/, + ) + if (!uriMatch) return connectionString // No userinfo section — nothing to fix + + const [, scheme, userinfo, rest] = uriMatch + + // Split userinfo into user:password on the FIRST colon only + const colonIdx = userinfo.indexOf(":") + if (colonIdx < 0) return connectionString // No password in userinfo + + const user = userinfo.slice(0, colonIdx) + const password = userinfo.slice(colonIdx + 1) + + // If the password already contains percent-encoded sequences, assume + // the caller encoded it properly and leave it alone. + if (/%[0-9A-Fa-f]{2}/.test(password)) return connectionString + + // Check if the password contains characters that need encoding. + // RFC 3986 unreserved: A-Z a-z 0-9 - . _ ~ + // We also allow ! * ' ( ) which are sub-delimiters often safe in passwords. + // Everything else (especially @ : / ? # [ ]) MUST be encoded. + const needsEncoding = /[@:#/?[\]%]/.test(password) + if (!needsEncoding) return connectionString + + // Re-encode both user and password to be safe. + // decodeURIComponent can throw on malformed percent sequences — fall back to + // encoding the raw value if that happens. + let encodedUser: string + try { + encodedUser = encodeURIComponent(decodeURIComponent(user)) + } catch { + encodedUser = encodeURIComponent(user) + } + const encodedPassword = encodeURIComponent(password) + + return `${scheme}${encodedUser}:${encodedPassword}@${rest}` +} + // --------------------------------------------------------------------------- // Core logic // --------------------------------------------------------------------------- @@ -178,6 +239,15 @@ export function normalizeConfig(config: ConnectionConfig): ConnectionConfig { const aliases = DRIVER_ALIASES[type] let result = aliases ? applyAliases(config, aliases) : { ...config } + // Sanitize connection_string: if the password contains special characters + // (@, #, :, /, etc.) that are not percent-encoded, URI-based drivers will + // mis-parse the string and fail with cryptic auth errors. This is the + // single integration point — every caller of normalizeConfig() gets the + // fix automatically. + if (typeof result.connection_string === "string") { + result.connection_string = sanitizeConnectionString(result.connection_string) + } + // Type-specific post-processing // Note: MySQL SSL fields (ssl_ca, ssl_cert, ssl_key) are NOT constructed // into an ssl object here. They stay as top-level fields so the credential diff --git a/packages/opencode/test/altimate/driver-normalize.test.ts b/packages/opencode/test/altimate/driver-normalize.test.ts index 4ca59ef9e..a5e90d84b 100644 --- a/packages/opencode/test/altimate/driver-normalize.test.ts +++ b/packages/opencode/test/altimate/driver-normalize.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from "bun:test" -import { normalizeConfig } from "@altimateai/drivers" +import { normalizeConfig, sanitizeConnectionString } from "@altimateai/drivers" import { isSensitiveField } from "../../src/altimate/native/connections/credential-store" // --------------------------------------------------------------------------- @@ -947,3 +947,133 @@ describe("normalizeConfig — ClickHouse", () => { expect(result.ssl_key).toBeUndefined() }) }) + +// --------------------------------------------------------------------------- +// sanitizeConnectionString — special character encoding +// --------------------------------------------------------------------------- + +describe("sanitizeConnectionString", () => { + test("encodes @ in password", () => { + const input = "postgresql://testuser:t@st@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe("postgresql://testuser:t%40st@localhost:5432/testdb") + }) + + test("encodes # in password", () => { + const input = "postgresql://testuser:test#val@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe("postgresql://testuser:test%23val@localhost:5432/testdb") + }) + + test("encodes : in password", () => { + const input = "postgresql://testuser:test:val@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe("postgresql://testuser:test%3Aval@localhost:5432/testdb") + }) + + test("encodes multiple special characters", () => { + const input = "postgresql://testuser:t@st#v:al@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe("postgresql://testuser:t%40st%23v%3Aal@localhost:5432/testdb") + }) + + test("encodes / in password", () => { + const input = "postgresql://testuser:test/val@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe("postgresql://testuser:test%2Fval@localhost:5432/testdb") + }) + + test("encodes ? in password", () => { + const input = "postgresql://testuser:test?val@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe("postgresql://testuser:test%3Fval@localhost:5432/testdb") + }) + + test("handles malformed percent sequence in username gracefully", () => { + const input = "postgresql://bad%ZZuser:t@st@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + // Should not throw — falls back to encoding the raw username + expect(result).toContain("@localhost:5432/testdb") + }) + + test("leaves already-encoded passwords untouched", () => { + const input = "postgresql://testuser:t%40st%23val@localhost:5432/testdb" + expect(sanitizeConnectionString(input)).toBe(input) + }) + + test("leaves passwords without special characters untouched", () => { + const input = "postgresql://testuser:simpletestval@localhost:5432/testdb" + expect(sanitizeConnectionString(input)).toBe(input) + }) + + test("leaves non-URI strings untouched", () => { + const input = "host=localhost dbname=mydb" + expect(sanitizeConnectionString(input)).toBe(input) + }) + + test("handles mongodb scheme", () => { + const input = "mongodb://testuser:t@st@localhost:27017/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe("mongodb://testuser:t%40st@localhost:27017/testdb") + }) + + test("handles mongodb+srv scheme", () => { + const input = "mongodb+srv://testuser:t@st@cluster.example.com/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe("mongodb+srv://testuser:t%40st@cluster.example.com/testdb") + }) + + test("leaves URIs without password untouched", () => { + const input = "postgresql://testuser@localhost:5432/testdb" + expect(sanitizeConnectionString(input)).toBe(input) + }) +}) + +// --------------------------------------------------------------------------- +// normalizeConfig — connection_string sanitization integration +// --------------------------------------------------------------------------- + +describe("normalizeConfig — connection_string sanitization", () => { + test("sanitizes connection_string with special chars in password", () => { + const result = normalizeConfig({ + type: "postgres", + connection_string: "postgresql://testuser:f@ke#PLACEHOLDER@localhost:5432/testdb", + }) + expect(result.connection_string).toBe( + "postgresql://testuser:f%40ke%23PLACEHOLDER@localhost:5432/testdb", + ) + }) + + test("sanitizes connectionString alias with special chars", () => { + const result = normalizeConfig({ + type: "postgres", + connectionString: "postgresql://testuser:t@st@localhost:5432/testdb", + }) + // alias resolved to connection_string, then sanitized + expect(result.connection_string).toBe( + "postgresql://testuser:t%40st@localhost:5432/testdb", + ) + expect(result.connectionString).toBeUndefined() + }) + + test("does not alter connection_string without special chars", () => { + const result = normalizeConfig({ + type: "redshift", + connection_string: "postgresql://testuser:testval@localhost:5439/testdb", + }) + expect(result.connection_string).toBe( + "postgresql://testuser:testval@localhost:5439/testdb", + ) + }) + + test("does not alter config without connection_string", () => { + const result = normalizeConfig({ + type: "postgres", + host: "localhost", + password: "f@ke#PLACEHOLDER", + }) + // Individual fields are NOT URI-encoded — drivers handle them natively + expect(result.password).toBe("f@ke#PLACEHOLDER") + expect(result.connection_string).toBeUndefined() + }) +}) From 0d473659d5eefab7d73b10b6e3c356c433c3853e Mon Sep 17 00:00:00 2001 From: Vijay Yadav Date: Sun, 5 Apr 2026 07:07:18 -0700 Subject: [PATCH 2/2] fix: address code review findings on URI sanitizer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-model code review flagged three real bugs in sanitizeConnectionString: 1. CRITICAL — greedy regex corrupted URIs with '@' in query/path Example: postgresql://u:p@host/db?email=alice@example.com Rightmost '@' was in the query string, not the userinfo separator. The sanitizer treated host/db?email=alice as part of the password and percent-encoded it, producing a broken URI. 2. MAJOR — short-circuit on %XX skipped passwords with mixed encoding Example: postgresql://user:p%20ss@word@host/db Partially-encoded password contained both a valid %20 and a raw '@'. The 'already-encoded, leave alone' check bailed early and left '@' unencoded. 3. MINOR — username-only userinfo (no colon) was skipped entirely Example: postgresql://alice@example.com@host/db Email-as-username with unencoded '@' broke URI parsing because we returned early on missing colon. Changes: - Scan for the LAST '@' in the afterScheme portion directly instead of first parsing out an 'authority' segment. The URI spec disallows unencoded path/query/fragment delimiters in authority, but real-world passwords do contain them — so we honor user intent over spec. - Replace the %XX short-circuit + needsEncoding pre-check with a single idempotent encoder (decode → encode) applied to both user and password components. This round-trips already-encoded values unchanged and encodes raw values correctly in one pass. - Handle username-only userinfo by encoding it when no colon is present. - Add an ambiguity guard: when the rightmost '@' is followed by no path/query/fragment delimiter but preceded by one, the '@' is in the query/path/fragment — return the URI unchanged and expect the caller to pre-encode. Test coverage added for all three bugs plus IPv6 hosts, URIs with no port, URIs with no path, path-with-@, fragment-with-@, and the ambiguous both-password-and-query-have-@ case. --- packages/drivers/src/normalize.ts | 108 +++++++++++------- .../test/altimate/driver-normalize.test.ts | 79 +++++++++++++ 2 files changed, 147 insertions(+), 40 deletions(-) diff --git a/packages/drivers/src/normalize.ts b/packages/drivers/src/normalize.ts index e99a47a7a..0f936feb1 100644 --- a/packages/drivers/src/normalize.ts +++ b/packages/drivers/src/normalize.ts @@ -127,49 +127,77 @@ const DRIVER_ALIASES: Record = { * passwords (containing %XX sequences) are left untouched. */ export function sanitizeConnectionString(connectionString: string): string { - // Match scheme://userinfo@host... — we only touch the userinfo part. - // Schemes: postgresql, postgres, mongodb, mongodb+srv, clickhouse, http, https, redshift + // Only touch scheme://... URIs. + const schemeMatch = connectionString.match(/^[a-zA-Z][a-zA-Z0-9+.-]*:\/\//) + if (!schemeMatch) return connectionString + + const scheme = schemeMatch[0] + const afterScheme = connectionString.slice(scheme.length) + + // Find the userinfo/host separator. A password can legitimately contain + // '@', '#', '/', '?', or ':' characters, so the URI spec is ambiguous + // when those are unencoded. We use the LAST '@' as the separator because + // that correctly handles the common case of passwords with special + // characters (the stated purpose of this function — see issue #589). // - // IMPORTANT: The password itself may contain '@' characters, so we must - // split on the LAST '@' before the host portion. The regex below uses a - // greedy (.+) for userinfo so it consumes everything up to the final '@'. - const uriMatch = connectionString.match( - /^([a-zA-Z][a-zA-Z0-9+.-]*:\/\/)(.+)@([^@]+)$/, - ) - if (!uriMatch) return connectionString // No userinfo section — nothing to fix - - const [, scheme, userinfo, rest] = uriMatch - - // Split userinfo into user:password on the FIRST colon only - const colonIdx = userinfo.indexOf(":") - if (colonIdx < 0) return connectionString // No password in userinfo - - const user = userinfo.slice(0, colonIdx) - const password = userinfo.slice(colonIdx + 1) - - // If the password already contains percent-encoded sequences, assume - // the caller encoded it properly and leave it alone. - if (/%[0-9A-Fa-f]{2}/.test(password)) return connectionString - - // Check if the password contains characters that need encoding. - // RFC 3986 unreserved: A-Z a-z 0-9 - . _ ~ - // We also allow ! * ' ( ) which are sub-delimiters often safe in passwords. - // Everything else (especially @ : / ? # [ ]) MUST be encoded. - const needsEncoding = /[@:#/?[\]%]/.test(password) - if (!needsEncoding) return connectionString - - // Re-encode both user and password to be safe. - // decodeURIComponent can throw on malformed percent sequences — fall back to - // encoding the raw value if that happens. - let encodedUser: string - try { - encodedUser = encodeURIComponent(decodeURIComponent(user)) - } catch { - encodedUser = encodeURIComponent(user) + // The known trade-off: if the query string or path also contains an + // unencoded '@' (e.g. `?email=alice@example.com`), the rightmost '@' + // is NOT the userinfo separator. We detect this ambiguous case below + // with a guard and bail to avoid corrupting the URI. + const lastAt = afterScheme.lastIndexOf("@") + if (lastAt < 0) return connectionString // No userinfo — nothing to fix + + const beforeAt = afterScheme.slice(0, lastAt) + const afterAt = afterScheme.slice(lastAt + 1) + + // Ambiguity guard: if the content AFTER the '@' has no path/query/ + // fragment delimiter ('/', '?', or '#') but the content BEFORE the + // '@' does, then the '@' is almost certainly inside a path, query, + // or fragment — not the userinfo separator. Bail and leave the URI + // untouched so the caller can pre-encode explicitly. + // + // Examples that trigger the guard (correctly left alone): + // postgresql://u:p@host/db?email=alice@example.com ('@' in query) + // postgresql://u:p@host:5432/db@archive ('@' in path) + // postgresql://u:p@host/db#at@frag ('@' in fragment) + // + // Examples that pass the guard (correctly encoded): + // postgresql://u:p@ss@host/db (last '@' has '/' after) + // postgresql://u:f@ke#PLACEHOLDER@host/db (last '@' has '/' after) + const afterAtHasDelim = /[/?#]/.test(afterAt) + const beforeAtHasDelim = /[/?#]/.test(beforeAt) + if (!afterAtHasDelim && beforeAtHasDelim) { + return connectionString + } + + // Idempotent re-encoding: decode any existing percent-escapes and + // re-encode. Already-encoded values round-trip unchanged; raw values + // with special characters get encoded. Malformed percent sequences + // fall back to encoding the raw input. + const encodeIdempotent = (v: string): string => { + if (v.length === 0) return v + try { + return encodeURIComponent(decodeURIComponent(v)) + } catch { + return encodeURIComponent(v) + } + } + + // Split userinfo on the FIRST ':' only (password may contain ':'). + const colonIdx = beforeAt.indexOf(":") + let encodedUserinfo: string + if (colonIdx < 0) { + // Username-only userinfo: still encode if it contains special chars + // (e.g. email addresses used as usernames). + encodedUserinfo = encodeIdempotent(beforeAt) + } else { + const user = beforeAt.slice(0, colonIdx) + const password = beforeAt.slice(colonIdx + 1) + encodedUserinfo = `${encodeIdempotent(user)}:${encodeIdempotent(password)}` } - const encodedPassword = encodeURIComponent(password) - return `${scheme}${encodedUser}:${encodedPassword}@${rest}` + const rebuilt = `${scheme}${encodedUserinfo}@${afterAt}` + return rebuilt === connectionString ? connectionString : rebuilt } // --------------------------------------------------------------------------- diff --git a/packages/opencode/test/altimate/driver-normalize.test.ts b/packages/opencode/test/altimate/driver-normalize.test.ts index a5e90d84b..95f348289 100644 --- a/packages/opencode/test/altimate/driver-normalize.test.ts +++ b/packages/opencode/test/altimate/driver-normalize.test.ts @@ -1027,6 +1027,85 @@ describe("sanitizeConnectionString", () => { const input = "postgresql://testuser@localhost:5432/testdb" expect(sanitizeConnectionString(input)).toBe(input) }) + + test("preserves @ in query string (does not misinterpret as userinfo)", () => { + const input = + "postgresql://testuser:simpleval@localhost:5432/testdb?contact=alice@example.com" + expect(sanitizeConnectionString(input)).toBe(input) + }) + + test("bails on ambiguous URIs where both password and query contain @", () => { + // When both the password and the query string contain unencoded '@', + // there's no way to deterministically pick the userinfo separator. + // We return the URI untouched and expect the caller to pre-encode. + const input = + "postgresql://testuser:p@ss@localhost:5432/testdb?contact=alice@example.com" + expect(sanitizeConnectionString(input)).toBe(input) + }) + + test("encodes @ in username-only userinfo (no password)", () => { + // Email-as-username with no password: the '@' in the email must be + // encoded so the driver doesn't treat the domain as the host. + const input = "postgresql://alice@example.com@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe( + "postgresql://alice%40example.com@localhost:5432/testdb", + ) + }) + + test("encodes @ in partially-encoded password (not short-circuited by %XX)", () => { + // Password contains an encoded space (%20) AND a raw '@'. Previous + // logic short-circuited on seeing %XX and left '@' unencoded, + // producing a broken URI. + const input = "postgresql://testuser:p%20ss@word@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe( + "postgresql://testuser:p%20ss%40word@localhost:5432/testdb", + ) + }) + + test("encodes # in partially-encoded password", () => { + const input = "postgresql://testuser:pa%40ss#word@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + // %40 is preserved; raw '#' gets encoded to %23 + expect(result).toBe( + "postgresql://testuser:pa%40ss%23word@localhost:5432/testdb", + ) + }) + + test("handles malformed percent sequence in password gracefully", () => { + // '%ZZ' is not a valid percent-escape. Falls back to encoding raw. + const input = "postgresql://testuser:bad%ZZpass@localhost:5432/testdb" + const result = sanitizeConnectionString(input) + // Raw-encoded password contains %25 (encoded '%') and ZZ literal + expect(result).toBe( + "postgresql://testuser:bad%25ZZpass@localhost:5432/testdb", + ) + }) + + test("preserves @ in path after authority", () => { + // A path segment with '@' is unusual but valid and must not be + // treated as userinfo. + const input = "postgresql://testuser:simpleval@localhost:5432/db@archive" + expect(sanitizeConnectionString(input)).toBe(input) + }) + + test("preserves @ in fragment", () => { + const input = "postgresql://testuser:simpleval@localhost:5432/testdb#at@frag" + expect(sanitizeConnectionString(input)).toBe(input) + }) + + test("handles scheme-only URI with no path", () => { + const input = "postgresql://testuser:p@ss@localhost:5432" + const result = sanitizeConnectionString(input) + expect(result).toBe("postgresql://testuser:p%40ss@localhost:5432") + }) + + test("handles URI with no port", () => { + const input = "postgresql://testuser:p@ss@localhost/testdb" + const result = sanitizeConnectionString(input) + expect(result).toBe("postgresql://testuser:p%40ss@localhost/testdb") + }) }) // ---------------------------------------------------------------------------