diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..752c83e --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,127 @@ +# AGENTS.md + +Instructions for AI coding agents working in this repository. + +## Project Overview + +Gatekeeper is a standalone credential-injecting TLS-intercepting proxy. It transparently injects authentication headers (tokens, API keys) into proxied HTTPS requests based on hostname matching. Clients never see raw credentials — they route traffic through the proxy, which handles credential resolution, injection, and TLS interception. + +Key capabilities: + +- **Credential injection** — Resolve credentials from environment variables, static values, or AWS Secrets Manager, then inject them as HTTP headers for matching hosts +- **TLS interception** — MITM proxy with per-host certificate generation from a configured CA +- **MCP relay** — Forward Model Context Protocol requests with credential injection and SSE streaming +- **Network policy** — Allow/deny traffic by host pattern +- **LLM policy** — Evaluate Anthropic API responses against Keep policy rules +- **Host gateway** — Route synthetic container hostnames to the actual host IP +- **OpenTelemetry** — Distributed traces, request metrics, and slog-to-OTel logs bridge; configured entirely via standard `OTEL_*` environment variables + +## Architecture + +``` +proxy/ Core TLS-intercepting proxy engine + proxy.go Main proxy: CONNECT handling, TLS interception, credential injection + ca.go CA certificate loading and per-host cert generation + hosts.go Hostname matching (glob patterns, port stripping) + mcp.go MCP relay handler (SSE streaming, tool credential injection) + llmpolicy.go LLM response policy evaluation via Keep + relay.go HTTP relay for non-CONNECT requests + otel.go OpenTelemetry handler wrapper, metrics instruments, span helpers + server.go Proxy server lifecycle (start/stop/listen) + +gatekeeper.go Standalone server wiring (config → proxy + credential sources) +config.go YAML config parsing (proxy, TLS, credentials, network, log) +config_credential.go Credential source resolution (env, static, AWS Secrets Manager) + +credentialsource/ Pluggable credential backends + source.go Source interface + env.go Environment variable source + static.go Literal value source + awssecretsmanager.go AWS Secrets Manager source + +cmd/gatekeeper/ CLI entry point (--config flag) + +examples/ Sample config, CA generation script, and test harness +``` + +### Key Types + +- **`proxy.Proxy`** — The core proxy. Handles HTTP CONNECT, TLS interception, credential injection, network policy, and request logging. +- **`proxy.RunContextData`** — Per-caller credential and policy context. Holds credentials, network policy, MCP servers, host gateway config, and Keep engines for a single caller. +- **`proxy.ContextResolver`** — Function type (`func(token string) (*RunContextData, bool)`) that resolves a proxy auth token to per-caller context. Standalone mode uses a single static context; moat's daemon maps each registered run to its own scoped context. +- **`gatekeeper.Server`** — Standalone server that loads config, resolves credential sources, and wires up the proxy. + +### How Credential Injection Works + +1. Client sends `CONNECT host:443` through the proxy (via `HTTP_PROXY` env var) +2. Proxy establishes TLS with the client using a dynamically-generated certificate for that host +3. Proxy reads the plaintext HTTP request from the client +4. `RunContextData.Credentials` is checked — if a credential matches the request host, the configured header (default: `Authorization`) is injected +5. Proxy forwards the request to the real server over a separate TLS connection +6. Response streams back to the client + +### Host Gateway + +The `HostGateway` field in `RunContextData` maps a synthetic hostname (used inside containers) to the host machine's IP. When `HostGatewayIP` resolves to a loopback address, the proxy also matches `localhost`/`127.0.0.1`/`::1` as equivalent — so credentials configured for the gateway hostname also apply to direct loopback connections. + +### OpenTelemetry Instrumentation + +OTel integration uses a callback-based architecture — the proxy core (`proxy/proxy.go`) has no OTel imports. Instrumentation is layered on externally: + +- **`proxy.OTelHandler`** wraps the proxy as HTTP middleware, creating root spans and recording request duration/count metrics. Its `statusRecorder` implements `http.Hijacker` so CONNECT requests still work after hijack. +- **Request/policy loggers** (set in `gatekeeper.go`) attach span events and record credential injection/policy denial metrics via exported functions `proxy.RecordCredentialInjection` and `proxy.RecordPolicyDenial`. +- **slog bridge** — `gatekeeper.go` uses a `multiHandler` to fan out log records to both the configured slog handler and `otelslog.NewHandler`, correlating logs with trace context. +- **Provider setup** — `cmd/gatekeeper/main.go` creates OTLP HTTP exporters for traces, metrics, and logs, registering them as global providers. All configuration is via standard `OTEL_*` env vars (no YAML knobs). + +## Development Commands + +```bash +# Build +go build ./... + +# Run tests (includes race detector) +go test -race ./... + +# Run a single test +go test -race -run TestName ./proxy/ + +# Vet +go vet ./... + +# Build the binary +go build -o gatekeeper ./cmd/gatekeeper/ +``` + +## Code Style + +- Follow standard Go conventions and `go fmt` formatting +- Use `go vet` to catch common issues +- No `internal/` packages — this is a library module meant to be imported + +## Git Commits + +- Use [Conventional Commits](https://www.conventionalcommits.org/) format: `type(scope): description` + - Types: `feat`, `fix`, `docs`, `style`, `refactor`, `test`, `chore`, `build`, `ci`, `perf` + - Scope is optional but encouraged (e.g., `feat(proxy): add header injection`) +- Do not include `Co-Authored-By` lines for AI agents in commit messages + +## Security Considerations + +This proxy handles sensitive credentials. When making changes: + +- Never log credential values (tokens, keys, secrets) — log host/grant names only +- Credentials must not appear in error messages returned to clients +- The CA private key must stay in memory only — never written to temp files +- Validate that TLS interception cannot be bypassed (e.g., via malformed CONNECT requests) +- Host matching must be exact or use explicit glob patterns — no accidental wildcard leaks +- Auth token comparison must be constant-time to prevent timing attacks + +## Relationship to Moat + +This module (`github.com/majorcontext/gatekeeper`) was extracted from moat's `internal/proxy/` package. Moat imports gatekeeper as a dependency and provides the daemon layer (per-run registration, token-scoped contexts, Unix socket management API). Gatekeeper has no knowledge of moat — it's a general-purpose credential-injecting proxy. + +## Creating Pull Requests + +- Use `gh pr create` with default flags only (no `--base`, `--head`, etc.) +- If `gh pr create` fails, report the error to the operator immediately +- Do not attempt to work around failures by adding flags or changing configuration diff --git a/CHANGELOG.md b/CHANGELOG.md index 03c5003..9b6fdc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ Gatekeeper is a standalone credential-injecting TLS-intercepting proxy. It trans Gatekeeper is pre-1.0. The configuration schema and credential source interface may change between minor versions. +## v0.10.0 — 2026-05-11 + +### Added + +- **`capture_headers` log config** — new `log.capture_headers` field captures specified request headers as structured attributes in the canonical `"request"` log entry; matched headers are stripped before forwarding upstream; header names are logged as lowercase with hyphens converted to underscores (e.g., `X-Workspace-Slug` → `x_workspace_slug`); values are truncated at 256 characters; sensitive headers (`Authorization`, `Proxy-Authorization`, `Cookie`) are rejected at startup; max 10 headers allowed +- **User ID in canonical request log** — the proxy auth username (from `HTTP_PROXY=http://user:token@host`) is now logged as `user_id` in the canonical request log entry and included in OTel span attributes + ## v0.9.1 — 2026-04-26 ### Fixed diff --git a/CLAUDE.md b/CLAUDE.md index 4851d0f..43c994c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,132 +1 @@ -# CLAUDE.md - -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. - -## Project Overview - -Gatekeeper is a standalone credential-injecting TLS-intercepting proxy. It transparently injects authentication headers (tokens, API keys) into proxied HTTPS requests based on hostname matching. Clients never see raw credentials — they route traffic through the proxy, which handles credential resolution, injection, and TLS interception. - -Key capabilities: - -- **Credential injection** — Resolve credentials from environment variables, static values, or AWS Secrets Manager, then inject them as HTTP headers for matching hosts -- **TLS interception** — MITM proxy with per-host certificate generation from a configured CA -- **MCP relay** — Forward Model Context Protocol requests with credential injection and SSE streaming -- **Network policy** — Allow/deny traffic by host pattern -- **LLM policy** — Evaluate Anthropic API responses against Keep policy rules -- **Host gateway** — Route synthetic container hostnames to the actual host IP -- **OpenTelemetry** — Distributed traces, request metrics, and slog-to-OTel logs bridge; configured entirely via standard `OTEL_*` environment variables - -## Architecture - -``` -proxy/ Core TLS-intercepting proxy engine - proxy.go Main proxy: CONNECT handling, TLS interception, credential injection - ca.go CA certificate loading and per-host cert generation - hosts.go Hostname matching (glob patterns, port stripping) - mcp.go MCP relay handler (SSE streaming, tool credential injection) - llmpolicy.go LLM response policy evaluation via Keep - relay.go HTTP relay for non-CONNECT requests - otel.go OpenTelemetry handler wrapper, metrics instruments, span helpers - server.go Proxy server lifecycle (start/stop/listen) - -gatekeeper.go Standalone server wiring (config → proxy + credential sources) -config.go YAML config parsing (proxy, TLS, credentials, network, log) -config_credential.go Credential source resolution (env, static, AWS Secrets Manager) - -credentialsource/ Pluggable credential backends - source.go Source interface - env.go Environment variable source - static.go Literal value source - awssecretsmanager.go AWS Secrets Manager source - -cmd/gatekeeper/ CLI entry point (--config flag) - -examples/ Sample config, CA generation script, and test harness -``` - -### Key Types - -- **`proxy.Proxy`** — The core proxy. Handles HTTP CONNECT, TLS interception, credential injection, network policy, and request logging. -- **`proxy.RunContextData`** — Per-caller credential and policy context. Holds credentials, network policy, MCP servers, host gateway config, and Keep engines for a single caller. -- **`proxy.ContextResolver`** — Function type (`func(token string) (*RunContextData, bool)`) that resolves a proxy auth token to per-caller context. Standalone mode uses a single static context; moat's daemon maps each registered run to its own scoped context. -- **`gatekeeper.Server`** — Standalone server that loads config, resolves credential sources, and wires up the proxy. - -### How Credential Injection Works - -1. Client sends `CONNECT host:443` through the proxy (via `HTTP_PROXY` env var) -2. Proxy establishes TLS with the client using a dynamically-generated certificate for that host -3. Proxy reads the plaintext HTTP request from the client -4. `RunContextData.Credentials` is checked — if a credential matches the request host, the configured header (default: `Authorization`) is injected -5. Proxy forwards the request to the real server over a separate TLS connection -6. Response streams back to the client - -### Host Gateway - -The `HostGateway` field in `RunContextData` maps a synthetic hostname (used inside containers) to the host machine's IP. When `HostGatewayIP` resolves to a loopback address, the proxy also matches `localhost`/`127.0.0.1`/`::1` as equivalent — so credentials configured for the gateway hostname also apply to direct loopback connections. - -### OpenTelemetry Instrumentation - -OTel integration uses a callback-based architecture — the proxy core (`proxy/proxy.go`) has no OTel imports. Instrumentation is layered on externally: - -- **`proxy.OTelHandler`** wraps the proxy as HTTP middleware, creating root spans and recording request duration/count metrics. Its `statusRecorder` implements `http.Hijacker` so CONNECT requests still work after hijack. -- **Request/policy loggers** (set in `gatekeeper.go`) attach span events and record credential injection/policy denial metrics via exported functions `proxy.RecordCredentialInjection` and `proxy.RecordPolicyDenial`. -- **slog bridge** — `gatekeeper.go` uses a `multiHandler` to fan out log records to both the configured slog handler and `otelslog.NewHandler`, correlating logs with trace context. -- **Provider setup** — `cmd/gatekeeper/main.go` creates OTLP HTTP exporters for traces, metrics, and logs, registering them as global providers. All configuration is via standard `OTEL_*` env vars (no YAML knobs). - -Key env vars for deployment: -- `OTEL_EXPORTER_OTLP_ENDPOINT` — Collector endpoint (e.g., `https://host.betterstackdata.com`) -- `OTEL_EXPORTER_OTLP_HEADERS` — Auth headers (e.g., `Authorization=Bearer `) -- `OTEL_RESOURCE_ATTRIBUTES` — Resource tags (e.g., `environment=production`) - -## Development Commands - -```bash -# Build -go build ./... - -# Run tests (includes race detector) -go test -race ./... - -# Run a single test -go test -race -run TestName ./proxy/ - -# Vet -go vet ./... - -# Build the binary -go build -o gatekeeper ./cmd/gatekeeper/ -``` - -## Code Style - -- Follow standard Go conventions and `go fmt` formatting -- Use `go vet` to catch common issues -- No `internal/` packages — this is a library module meant to be imported - -## Git Commits - -- Use [Conventional Commits](https://www.conventionalcommits.org/) format: `type(scope): description` - - Types: `feat`, `fix`, `docs`, `style`, `refactor`, `test`, `chore`, `build`, `ci`, `perf` - - Scope is optional but encouraged (e.g., `feat(proxy): add header injection`) -- Do not include `Co-Authored-By` lines for Claude in commit messages - -## Security Considerations - -This proxy handles sensitive credentials. When making changes: - -- Never log credential values (tokens, keys, secrets) — log host/grant names only -- Credentials must not appear in error messages returned to clients -- The CA private key must stay in memory only — never written to temp files -- Validate that TLS interception cannot be bypassed (e.g., via malformed CONNECT requests) -- Host matching must be exact or use explicit glob patterns — no accidental wildcard leaks -- Auth token comparison must be constant-time to prevent timing attacks - -## Relationship to Moat - -This module (`github.com/majorcontext/gatekeeper`) was extracted from moat's `internal/proxy/` package. Moat imports gatekeeper as a dependency and provides the daemon layer (per-run registration, token-scoped contexts, Unix socket management API). Gatekeeper has no knowledge of moat — it's a general-purpose credential-injecting proxy. - -## Creating Pull Requests - -- Use `gh pr create` with default flags only (no `--base`, `--head`, etc.) -- If `gh pr create` fails, report the error to the operator immediately -- Do not attempt to work around failures by adding flags or changing configuration +@AGENTS.md diff --git a/config.go b/config.go index ffb29a9..27a0d20 100644 --- a/config.go +++ b/config.go @@ -87,9 +87,10 @@ type NetworkConfig struct { // LogConfig configures logging. type LogConfig struct { - Level string `yaml:"level"` // Log level (e.g., "debug", "info", "warn", "error") - Format string `yaml:"format"` // Output format ("json" or "text") - Output string `yaml:"output"` // Destination ("stderr", "stdout", or a file path; default: stderr) + Level string `yaml:"level"` // Log level (e.g., "debug", "info", "warn", "error") + Format string `yaml:"format"` // Output format ("json" or "text") + Output string `yaml:"output"` // Destination ("stderr", "stdout", or a file path; default: stderr) + CaptureHeaders []string `yaml:"capture_headers,omitempty"` // Request headers to log and strip before forwarding } // ParseConfig parses a Gate Keeper config from YAML bytes. diff --git a/config_test.go b/config_test.go index f58b6b2..4e9cc46 100644 --- a/config_test.go +++ b/config_test.go @@ -1,9 +1,13 @@ package gatekeeper import ( + "fmt" "os" "path/filepath" + "strings" "testing" + + "github.com/majorcontext/gatekeeper/proxy" ) func TestParseConfig_Full(t *testing.T) { @@ -167,3 +171,79 @@ func TestLoadConfig_NotFound(t *testing.T) { t.Fatal("expected error for missing file") } } + +func TestParseConfig_CaptureHeaders(t *testing.T) { + yaml := ` +log: + capture_headers: + - X-Workspace-Slug + - X-Request-Source +` + cfg, err := ParseConfig([]byte(yaml)) + if err != nil { + t.Fatalf("ParseConfig: %v", err) + } + if len(cfg.Log.CaptureHeaders) != 2 { + t.Fatalf("CaptureHeaders len = %d, want 2", len(cfg.Log.CaptureHeaders)) + } + if cfg.Log.CaptureHeaders[0] != "X-Workspace-Slug" { + t.Errorf("CaptureHeaders[0] = %q, want X-Workspace-Slug", cfg.Log.CaptureHeaders[0]) + } +} + +func TestValidateCaptureHeaders_MaxExceeded(t *testing.T) { + headers := make([]string, 11) + for i := range headers { + headers[i] = fmt.Sprintf("X-Header-%d", i) + } + err := proxy.ValidateCaptureHeaders(headers) + if err == nil { + t.Fatal("expected error for >10 headers") + } + if !strings.Contains(err.Error(), "max 10") { + t.Errorf("error = %q, want mention of max 10", err.Error()) + } +} + +func TestValidateCaptureHeaders_SensitiveRejected(t *testing.T) { + tests := []string{"Authorization", "proxy-authorization", "Cookie"} + for _, h := range tests { + t.Run(h, func(t *testing.T) { + err := proxy.ValidateCaptureHeaders([]string{h}) + if err == nil { + t.Fatalf("expected error for sensitive header %q", h) + } + if !strings.Contains(err.Error(), "sensitive") { + t.Errorf("error = %q, want mention of sensitive", err.Error()) + } + }) + } +} + +func TestValidateCaptureHeaders_Valid(t *testing.T) { + err := proxy.ValidateCaptureHeaders([]string{"X-Workspace-Slug", "X-Request-Source"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestValidateCaptureHeaders_Empty(t *testing.T) { + err := proxy.ValidateCaptureHeaders(nil) + if err != nil { + t.Fatalf("unexpected error for nil: %v", err) + } + err = proxy.ValidateCaptureHeaders([]string{}) + if err != nil { + t.Fatalf("unexpected error for empty: %v", err) + } +} + +func TestValidateCaptureHeaders_Duplicate(t *testing.T) { + err := proxy.ValidateCaptureHeaders([]string{"X-Workspace-Slug", "x-workspace-slug"}) + if err == nil { + t.Fatal("expected error for duplicate headers") + } + if !strings.Contains(err.Error(), "duplicate") { + t.Errorf("error = %q, want mention of duplicate", err.Error()) + } +} diff --git a/gatekeeper.go b/gatekeeper.go index c2be6b4..51ee927 100644 --- a/gatekeeper.go +++ b/gatekeeper.go @@ -24,6 +24,7 @@ import ( "strings" "sync" "time" + "unicode/utf8" "github.com/majorcontext/gatekeeper/credentialsource" "github.com/majorcontext/gatekeeper/proxy" @@ -238,6 +239,9 @@ func New(ctx context.Context, cfg *Config, version string) (*Server, error) { if data.RunID != "" { attrs = append(attrs, slog.String("run_id", data.RunID)) } + if data.UserID != "" { + attrs = append(attrs, slog.String("user_id", data.UserID)) + } if data.AuthInjected { attrs = append(attrs, slog.Bool("credential_injected", true)) var headerNames []string @@ -266,6 +270,23 @@ func New(ctx context.Context, cfg *Config, version string) (*Server, error) { attrs = append(attrs, slog.String("error", data.Err.Error())) } + // Append captured request headers as structured log attributes. + if data.RequestHeaders != nil { + for _, h := range cfg.Log.CaptureHeaders { + if v := data.RequestHeaders.Get(h); v != "" { + if len(v) > 256 { + // Truncate at a valid UTF-8 boundary to avoid splitting multi-byte characters. + v = v[:256] + for len(v) > 0 && !utf8.ValidString(v) { + v = v[:len(v)-1] + } + } + key := strings.ReplaceAll(strings.ToLower(h), "-", "_") + attrs = append(attrs, slog.String(key, v)) + } + } + } + level := slog.LevelInfo if data.Err != nil || data.StatusCode >= 500 { level = slog.LevelError @@ -297,6 +318,9 @@ func New(ctx context.Context, cfg *Config, version string) (*Server, error) { if data.RunID != "" { spanAttrs = append(spanAttrs, attribute.String("run_id", data.RunID)) } + if data.UserID != "" { + spanAttrs = append(spanAttrs, attribute.String("user_id", data.UserID)) + } var headerNames []string for name := range data.InjectedHeaders { headerNames = append(headerNames, name) @@ -367,6 +391,13 @@ func New(ctx context.Context, cfg *Config, version string) (*Server, error) { } } + // Configure capture headers if specified. + if len(cfg.Log.CaptureHeaders) > 0 { + if err := p.SetCaptureHeaders(cfg.Log.CaptureHeaders); err != nil { + return nil, fmt.Errorf("capture_headers: %w", err) + } + } + // Configure network policy if specified. if cfg.Network.Policy != "" { p.SetNetworkPolicy(cfg.Network.Policy, cfg.Network.Allow, nil) diff --git a/proxy/intercept_test.go b/proxy/intercept_test.go index 5697eae..e1718b5 100644 --- a/proxy/intercept_test.go +++ b/proxy/intercept_test.go @@ -10,6 +10,7 @@ import ( "net" "net/http" "net/http/httptest" + "net/url" "strings" "sync" "testing" @@ -611,3 +612,115 @@ func TestIntercept_WebSocketUpgrade(t *testing.T) { t.Error("expected credential injection on upgrade request") } } + +func TestIntercept_CaptureHeaders_StrippedBeforeForwarding(t *testing.T) { + var receivedHeaders http.Header + setup := newInterceptTestSetup(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedHeaders = r.Header.Clone() + w.Write([]byte("ok")) + })) + + setup.Proxy.SetCaptureHeaders([]string{"X-Workspace-Slug", "X-Request-Source"}) + + var logged RequestLogData + setup.Proxy.SetLogger(func(data RequestLogData) { + logged = data + }) + + req, _ := http.NewRequest("GET", setup.Backend.URL+"/test", nil) + req.Header.Set("X-Workspace-Slug", "sneaky-plum") + req.Header.Set("X-Request-Source", "agent") + req.Header.Set("X-Other", "keep-this") + + resp, err := setup.Client.Do(req) + if err != nil { + t.Fatalf("request: %v", err) + } + defer resp.Body.Close() + io.ReadAll(resp.Body) + + // Verify capture headers were stripped before forwarding. + if receivedHeaders.Get("X-Workspace-Slug") != "" { + t.Error("X-Workspace-Slug should be stripped before forwarding (CONNECT path)") + } + if receivedHeaders.Get("X-Request-Source") != "" { + t.Error("X-Request-Source should be stripped before forwarding (CONNECT path)") + } + if receivedHeaders.Get("X-Other") != "keep-this" { + t.Errorf("X-Other = %q, want keep-this (non-capture headers should pass through)", receivedHeaders.Get("X-Other")) + } + + // Verify capture headers are preserved in log data (from pre-strip snapshot). + if logged.RequestHeaders == nil { + t.Fatal("RequestHeaders is nil") + } + if got := logged.RequestHeaders.Get("X-Workspace-Slug"); got != "sneaky-plum" { + t.Errorf("logged RequestHeaders[X-Workspace-Slug] = %q, want sneaky-plum", got) + } +} + +func TestIntercept_CaptureHeaders_PreservesInjectedCredential(t *testing.T) { + var receivedAPIKey string + setup := newInterceptTestSetup(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedAPIKey = r.Header.Get("X-Api-Key") + w.Write([]byte("ok")) + })) + + // Configure both a credential injection and a capture header for the same header name. + setup.Proxy.SetCredentialHeader(setup.BackendHost, "X-Api-Key", "secret-key-123") + setup.Proxy.SetCaptureHeaders([]string{"X-Api-Key"}) + + resp, err := setup.Client.Get(setup.Backend.URL + "/test") + if err != nil { + t.Fatalf("request: %v", err) + } + defer resp.Body.Close() + io.ReadAll(resp.Body) + + // The injected credential must survive the capture header stripping. + if receivedAPIKey != "secret-key-123" { + t.Errorf("X-Api-Key = %q, want %q (injected credential should not be stripped)", receivedAPIKey, "secret-key-123") + } +} + +func TestIntercept_UserID_ContextResolver(t *testing.T) { + setup := newInterceptTestSetup(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("ok")) + })) + + setup.Proxy.SetContextResolver(func(token string) (*RunContextData, bool) { + if token == "my-token" { + return &RunContextData{Policy: "permissive"}, true + } + return nil, false + }) + + var logged RequestLogData + setup.Proxy.SetLogger(func(data RequestLogData) { + logged = data + }) + + // Rebuild the client with proxy auth credentials (user:token). + proxyURL := mustParseURL(setup.ProxyServer.URL) + proxyURL.User = url.UserPassword("alice", "my-token") + + clientCAs := x509.NewCertPool() + clientCAs.AppendCertsFromPEM(setup.CA.certPEM) + client := &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyURL(proxyURL), + TLSClientConfig: &tls.Config{RootCAs: clientCAs}, + }, + } + + resp, err := client.Get(setup.Backend.URL + "/test") + if err != nil { + t.Fatalf("request: %v", err) + } + defer resp.Body.Close() + io.ReadAll(resp.Body) + + if logged.UserID != "alice" { + t.Errorf("UserID = %q, want %q (CONNECT path)", logged.UserID, "alice") + } +} diff --git a/proxy/proxy.go b/proxy/proxy.go index 7331f60..db422cc 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -59,6 +59,7 @@ type contextKey int const ( runContextKey contextKey = iota requestIDKey + userIDKey ) // newRequestID generates a TypeID with prefix "req" (e.g., "req_01h455vb4pex5vsknk084sn02q"). @@ -83,6 +84,19 @@ func RequestIDFromContext(ctx context.Context) string { return "" } +// withUserID returns a new context with the given user ID. +func withUserID(ctx context.Context, id string) context.Context { + return context.WithValue(ctx, userIDKey, id) +} + +// UserIDFromContext extracts the user ID from a context, or empty string. +func UserIDFromContext(ctx context.Context) string { + if id, ok := ctx.Value(userIDKey).(string); ok { + return id + } + return "" +} + // ResponseTransformer transforms HTTP responses before body capture. // Cast to *http.Request and *http.Response in the transformer implementation. // Returns the modified response and true if transformed, or original and false. @@ -158,6 +172,7 @@ type RequestLogData struct { Denied bool // True if request was denied by network/keep policy DenyReason string // Why the request was denied (e.g., "network_policy", "keep_policy") RunID string // Run ID from per-run context (daemon mode) + UserID string // User ID from proxy auth username Ctx context.Context // Request context (for OTel span extraction, may be nil) } @@ -263,8 +278,8 @@ func FilterHeaders(headers http.Header, injectedHeaders map[string]bool) map[str // logRequest is a helper that logs request data if a logger is configured. // The ctxReq parameter provides the RunContextData (from CONNECT or HTTP request context) // for extracting the RunID; it may be nil when context is unavailable. -// The data struct is passed by value; this method enriches it with RunID and Ctx -// from ctxReq before forwarding to the logger callback. +// The data struct is passed by value; this method enriches it with RunID, UserID, +// and Ctx from ctxReq before forwarding to the logger callback. func (p *Proxy) logRequest(ctxReq *http.Request, data RequestLogData) { if p.logger == nil { return @@ -273,6 +288,9 @@ func (p *Proxy) logRequest(ctxReq *http.Request, data RequestLogData) { if rc := getRunContext(ctxReq); rc != nil { data.RunID = rc.RunID } + if uid := UserIDFromContext(ctxReq.Context()); uid != "" { + data.UserID = uid + } data.Ctx = ctxReq.Context() if data.RequestID == "" { data.RequestID = RequestIDFromContext(ctxReq.Context()) @@ -408,6 +426,7 @@ type Proxy struct { contextResolver ContextResolver // optional per-run credential resolver policyLogger PolicyLogger // optional policy decision logger upstreamCAs *x509.CertPool // optional CA pool for upstream TLS verification + captureHeaders []string // headers to capture in logs and strip before forwarding } // NewProxy creates a new auth proxy. @@ -503,6 +522,45 @@ func (p *Proxy) SetContextResolver(resolver ContextResolver) { p.contextResolver = resolver } +// SetCaptureHeaders configures headers to capture in request logs and strip +// before forwarding upstream. Header matching is case-insensitive. +// Returns an error if any header is sensitive (Authorization, Proxy-Authorization, Cookie), +// if there are more than 10 headers, or if duplicates are present. +func (p *Proxy) SetCaptureHeaders(headers []string) error { + if err := ValidateCaptureHeaders(headers); err != nil { + return err + } + p.captureHeaders = headers + return nil +} + +// sensitiveHeaders are headers that must never be captured, even if configured. +var sensitiveHeaders = map[string]bool{ + "authorization": true, + "proxy-authorization": true, + "cookie": true, +} + +// ValidateCaptureHeaders checks a capture headers list for validity. +// Rejects sensitive headers, more than 10 entries, and case-insensitive duplicates. +func ValidateCaptureHeaders(headers []string) error { + if len(headers) > 10 { + return fmt.Errorf("capture_headers: max 10 headers allowed, got %d", len(headers)) + } + seen := make(map[string]bool, len(headers)) + for _, h := range headers { + lower := strings.ToLower(h) + if sensitiveHeaders[lower] { + return fmt.Errorf("capture_headers: %q is a sensitive header and cannot be captured", h) + } + if seen[lower] { + return fmt.Errorf("capture_headers: duplicate header %q", h) + } + seen[lower] = true + } + return nil +} + // ResolveContext looks up per-run context data by auth token. // Returns nil, false when no resolver is set or the token is not found. func (p *Proxy) ResolveContext(token string) (*RunContextData, bool) { @@ -1296,15 +1354,26 @@ func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } ctx := context.WithValue(r.Context(), runContextKey, rc) + if uid := extractProxyUsername(r); uid != "" { + ctx = withUserID(ctx, uid) + } r = r.WithContext(ctx) } else if p.delegateAuth { if !hasBasicProxyAuth(r) { writeProxyAuthRequired(w, "Proxy authentication required") return } + if uid := extractProxyUsername(r); uid != "" { + r = r.WithContext(withUserID(r.Context(), uid)) + } } else if p.authToken != "" && !p.checkAuth(r) { writeProxyAuthRequired(w, "Proxy authentication required") return + } else if p.authToken != "" { + // Auth passed — extract username if present. + if uid := extractProxyUsername(r); uid != "" { + r = r.WithContext(withUserID(r.Context(), uid)) + } } // Handle AWS credential endpoint @@ -1388,6 +1457,25 @@ func extractProxyToken(r *http.Request) (string, bool) { return "", false } +// extractProxyUsername extracts the username from a Basic Proxy-Authorization header. +// For HTTP_PROXY=http://user:token@host, this returns "user". +// Returns empty string for Bearer auth or if no valid username is found. +func extractProxyUsername(r *http.Request) string { + auth := r.Header.Get("Proxy-Authorization") + if auth == "" || !strings.HasPrefix(auth, "Basic ") { + return "" + } + decoded, err := base64.StdEncoding.DecodeString(auth[6:]) + if err != nil { + return "" + } + parts := strings.SplitN(string(decoded), ":", 2) + if len(parts) != 2 { + return "" + } + return parts[0] +} + // checkAuth validates the Proxy-Authorization header against the required token. // Accepts both Basic auth (from HTTP_PROXY=http://moat:token@host) and Bearer format. // Uses constant-time comparison to prevent timing attacks. @@ -1451,16 +1539,17 @@ func (p *Proxy) handleHTTP(w http.ResponseWriter, r *http.Request) { if err != nil { http.Error(w, "credential resolution failed", http.StatusBadGateway) p.logRequest(r, RequestLogData{ - Method: r.Method, - URL: r.URL.String(), - Host: host, - Path: r.URL.Path, - RequestType: "http", - StatusCode: http.StatusBadGateway, - Duration: time.Since(start), - RequestSize: r.ContentLength, - ResponseSize: -1, - Err: err, + Method: r.Method, + URL: r.URL.String(), + Host: host, + Path: r.URL.Path, + RequestType: "http", + StatusCode: http.StatusBadGateway, + Duration: time.Since(start), + RequestHeaders: r.Header.Clone(), + RequestSize: r.ContentLength, + ResponseSize: -1, + Err: err, }) return } @@ -1561,6 +1650,16 @@ func (p *Proxy) handleHTTP(w http.ResponseWriter, r *http.Request) { } outReq.Header.Del(headerName) } + + // Strip capture headers — they're metadata for the proxy, not the destination. + // Skip headers that were just injected as credentials. + for _, headerName := range p.captureHeaders { + if credResult.InjectedHeaders[strings.ToLower(headerName)] { + continue + } + outReq.Header.Del(headerName) + } + // Apply token substitution if configured. // Substitution targets outReq (not r), so r.URL.String() used for logging // below still contains the placeholder, not the real token. @@ -1982,6 +2081,15 @@ func (p *Proxy) handleConnectWithInterception(w http.ResponseWriter, r *http.Req pr.Out.Header.Del(headerName) } + // Strip capture headers — they're metadata for the proxy, not the destination. + // Skip headers that were just injected as credentials. + for _, headerName := range p.captureHeaders { + if credResult.InjectedHeaders[strings.ToLower(headerName)] { + continue + } + pr.Out.Header.Del(headerName) + } + // Capture URL before token substitution so logs don't contain real tokens. logURL := pr.Out.URL.String() ctx = context.WithValue(pr.Out.Context(), interceptLogURLKey{}, logURL) @@ -2074,6 +2182,10 @@ func (p *Proxy) handleConnectWithInterception(w http.ResponseWriter, r *http.Req if logURL == "" { logURL = req.URL.String() } + preHeaders, _ := req.Context().Value(interceptPreInjHeadersKey{}).(http.Header) + if preHeaders == nil { + preHeaders = req.Header.Clone() + } p.logRequest(r, RequestLogData{ RequestID: req.Header.Get("X-Request-Id"), Method: req.Method, @@ -2083,6 +2195,7 @@ func (p *Proxy) handleConnectWithInterception(w http.ResponseWriter, r *http.Req RequestType: "connect", StatusCode: http.StatusBadGateway, Duration: time.Since(reqStartFromContext(req.Context())), + RequestHeaders: preHeaders, RequestSize: req.ContentLength, ResponseSize: -1, Err: err, @@ -2105,18 +2218,19 @@ func (p *Proxy) handleConnectWithInterception(w http.ResponseWriter, r *http.Req // Network policy check. if !p.checkNetworkPolicyForRequest(r, host, connectPort, req.Method, req.URL.Path) { p.logRequest(r, RequestLogData{ - RequestID: innerReqID, - Method: req.Method, - URL: "https://" + r.Host + req.URL.RequestURI(), - Host: host, - Path: req.URL.Path, - RequestType: "connect", - StatusCode: http.StatusProxyAuthRequired, - Duration: time.Since(reqStart), - RequestSize: req.ContentLength, - ResponseSize: -1, - Denied: true, - DenyReason: "Request blocked by network policy: " + req.Method + " " + host + req.URL.Path, + RequestID: innerReqID, + Method: req.Method, + URL: "https://" + r.Host + req.URL.RequestURI(), + Host: host, + Path: req.URL.Path, + RequestType: "connect", + StatusCode: http.StatusProxyAuthRequired, + Duration: time.Since(reqStart), + RequestHeaders: req.Header.Clone(), + RequestSize: req.ContentLength, + ResponseSize: -1, + Denied: true, + DenyReason: "Request blocked by network policy: " + req.Method + " " + host + req.URL.Path, }) p.logPolicy(r, "network", "http.request", "", req.Method+" "+host+req.URL.Path) w.Header().Set("X-Moat-Blocked", "request-rule") @@ -2134,19 +2248,20 @@ func (p *Proxy) handleConnectWithInterception(w http.ResponseWriter, r *http.Req result, evalErr := keeplib.SafeEvaluate(eng, call, "http") if evalErr != nil { p.logRequest(r, RequestLogData{ - RequestID: innerReqID, - Method: req.Method, - URL: "https://" + r.Host + req.URL.RequestURI(), - Host: host, - Path: req.URL.Path, - RequestType: "connect", - StatusCode: http.StatusForbidden, - Duration: time.Since(reqStart), - RequestSize: req.ContentLength, - ResponseSize: -1, - Denied: true, - DenyReason: "Keep policy evaluation error", - Err: evalErr, + RequestID: innerReqID, + Method: req.Method, + URL: "https://" + r.Host + req.URL.RequestURI(), + Host: host, + Path: req.URL.Path, + RequestType: "connect", + StatusCode: http.StatusForbidden, + Duration: time.Since(reqStart), + RequestHeaders: req.Header.Clone(), + RequestSize: req.ContentLength, + ResponseSize: -1, + Denied: true, + DenyReason: "Keep policy evaluation error", + Err: evalErr, }) p.logPolicy(r, "http", "http.request", "evaluation-error", "Policy evaluation failed") w.Header().Set("X-Moat-Blocked", "keep-policy") @@ -2157,18 +2272,19 @@ func (p *Proxy) handleConnectWithInterception(w http.ResponseWriter, r *http.Req } if result.Decision == keeplib.Deny { p.logRequest(r, RequestLogData{ - RequestID: innerReqID, - Method: req.Method, - URL: "https://" + r.Host + req.URL.RequestURI(), - Host: host, - Path: req.URL.Path, - RequestType: "connect", - StatusCode: http.StatusForbidden, - Duration: time.Since(reqStart), - RequestSize: req.ContentLength, - ResponseSize: -1, - Denied: true, - DenyReason: "Keep policy denied: " + result.Rule + " " + result.Message, + RequestID: innerReqID, + Method: req.Method, + URL: "https://" + r.Host + req.URL.RequestURI(), + Host: host, + Path: req.URL.Path, + RequestType: "connect", + StatusCode: http.StatusForbidden, + Duration: time.Since(reqStart), + RequestHeaders: req.Header.Clone(), + RequestSize: req.ContentLength, + ResponseSize: -1, + Denied: true, + DenyReason: "Keep policy denied: " + result.Rule + " " + result.Message, }) p.logPolicy(r, "http", "http.request", result.Rule, result.Message) w.Header().Set("X-Moat-Blocked", "keep-policy") @@ -2194,17 +2310,18 @@ func (p *Proxy) handleConnectWithInterception(w http.ResponseWriter, r *http.Req w.WriteHeader(http.StatusBadGateway) fmt.Fprint(w, "credential resolution failed\n") p.logRequest(r, RequestLogData{ - RequestID: innerReqID, - Method: req.Method, - URL: "https://" + r.Host + req.URL.RequestURI(), - Host: host, - Path: req.URL.Path, - RequestType: "connect", - StatusCode: http.StatusBadGateway, - Duration: time.Since(reqStart), - RequestSize: req.ContentLength, - ResponseSize: -1, - Err: credErr, + RequestID: innerReqID, + Method: req.Method, + URL: "https://" + r.Host + req.URL.RequestURI(), + Host: host, + Path: req.URL.Path, + RequestType: "connect", + StatusCode: http.StatusBadGateway, + Duration: time.Since(reqStart), + RequestHeaders: req.Header.Clone(), + RequestSize: req.ContentLength, + ResponseSize: -1, + Err: credErr, }) return } diff --git a/proxy/proxy_test.go b/proxy/proxy_test.go index a390fdb..d7aa5e6 100644 --- a/proxy/proxy_test.go +++ b/proxy/proxy_test.go @@ -6,6 +6,7 @@ import ( "context" "crypto/tls" "crypto/x509" + "encoding/base64" "errors" "fmt" "io" @@ -1126,6 +1127,10 @@ func mustParseURL(s string) *url.URL { return u } +func basicAuth(user, pass string) string { + return base64.StdEncoding.EncodeToString([]byte(user + ":" + pass)) +} + // TestProxy_SetCredentialHeader tests custom header injection (e.g., x-api-key for Anthropic). func TestProxy_SetCredentialHeader(t *testing.T) { var receivedHeader string @@ -3407,3 +3412,211 @@ func TestProxy_CredentialResolverNoMatch(t *testing.T) { t.Errorf("Authorization should be empty for non-matching host, got %q", receivedAuth) } } + +func TestExtractProxyUsername(t *testing.T) { + tests := []struct { + name string + header string + want string + }{ + {"basic auth", "Basic " + basicAuth("alice", "secret"), "alice"}, + {"empty username", "Basic " + basicAuth("", "secret"), ""}, + {"bearer token", "Bearer some-token", ""}, + {"no header", "", ""}, + {"invalid base64", "Basic !!!invalid!!!", ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r, _ := http.NewRequest("GET", "http://example.com", nil) + if tt.header != "" { + r.Header.Set("Proxy-Authorization", tt.header) + } + got := extractProxyUsername(r) + if got != tt.want { + t.Errorf("extractProxyUsername() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestProxy_CanonicalLogLine_UserID_ContextResolver(t *testing.T) { + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer backend.Close() + + p := NewProxy() + p.SetContextResolver(func(token string) (*RunContextData, bool) { + if token == "my-token" { + return &RunContextData{Policy: "permissive"}, true + } + return nil, false + }) + + var logged RequestLogData + p.SetLogger(func(data RequestLogData) { + logged = data + }) + + proxyServer := httptest.NewServer(p) + defer proxyServer.Close() + + proxyURL := mustParseURL(proxyServer.URL) + proxyURL.User = url.UserPassword("alice", "my-token") + + client := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyURL)}} + + resp, err := client.Get(backend.URL + "/test") + if err != nil { + t.Fatalf("request: %v", err) + } + resp.Body.Close() + + if logged.UserID != "alice" { + t.Errorf("UserID = %q, want %q", logged.UserID, "alice") + } +} + +func TestProxy_CanonicalLogLine_UserID_StaticAuthToken(t *testing.T) { + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer backend.Close() + + p := NewProxy() + p.SetAuthToken("secret-token") + + var logged RequestLogData + p.SetLogger(func(data RequestLogData) { + logged = data + }) + + proxyServer := httptest.NewServer(p) + defer proxyServer.Close() + + proxyURL := mustParseURL(proxyServer.URL) + proxyURL.User = url.UserPassword("bob", "secret-token") + + client := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyURL)}} + + resp, err := client.Get(backend.URL + "/test") + if err != nil { + t.Fatalf("request: %v", err) + } + resp.Body.Close() + + if logged.UserID != "bob" { + t.Errorf("UserID = %q, want %q", logged.UserID, "bob") + } +} + +func TestProxy_CanonicalLogLine_UserID_BearerNoUsername(t *testing.T) { + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer backend.Close() + + p := NewProxy() + p.SetContextResolver(func(token string) (*RunContextData, bool) { + if token == "bearer-token" { + return &RunContextData{Policy: "permissive"}, true + } + return nil, false + }) + + var logged RequestLogData + p.SetLogger(func(data RequestLogData) { + logged = data + }) + + proxyServer := httptest.NewServer(p) + defer proxyServer.Close() + + proxyURL := mustParseURL(proxyServer.URL) + client := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(proxyURL)}} + + req, _ := http.NewRequest("GET", backend.URL+"/test", nil) + req.Header.Set("Proxy-Authorization", "Bearer bearer-token") + resp, err := client.Do(req) + if err != nil { + t.Fatalf("request: %v", err) + } + resp.Body.Close() + + if logged.UserID != "" { + t.Errorf("UserID = %q, want empty (Bearer has no username)", logged.UserID) + } +} + +func TestProxy_CaptureHeaders_StrippedBeforeForwarding(t *testing.T) { + var receivedHeaders http.Header + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedHeaders = r.Header.Clone() + w.WriteHeader(http.StatusOK) + })) + defer backend.Close() + + p := NewProxy() + p.SetCaptureHeaders([]string{"X-Workspace-Slug", "X-Request-Source"}) + + proxyServer := httptest.NewServer(p) + defer proxyServer.Close() + + client := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(mustParseURL(proxyServer.URL))}} + + req, _ := http.NewRequest("GET", backend.URL+"/test", nil) + req.Header.Set("X-Workspace-Slug", "sneaky-plum") + req.Header.Set("X-Request-Source", "agent") + req.Header.Set("X-Other", "keep-this") + resp, err := client.Do(req) + if err != nil { + t.Fatalf("request: %v", err) + } + resp.Body.Close() + + if receivedHeaders.Get("X-Workspace-Slug") != "" { + t.Error("X-Workspace-Slug should be stripped before forwarding") + } + if receivedHeaders.Get("X-Request-Source") != "" { + t.Error("X-Request-Source should be stripped before forwarding") + } + if receivedHeaders.Get("X-Other") != "keep-this" { + t.Errorf("X-Other = %q, want keep-this (non-capture headers should pass through)", receivedHeaders.Get("X-Other")) + } +} + +func TestProxy_CaptureHeaders_AvailableInLogData(t *testing.T) { + backend := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer backend.Close() + + p := NewProxy() + p.SetCaptureHeaders([]string{"X-Workspace-Slug"}) + + var logged RequestLogData + p.SetLogger(func(data RequestLogData) { + logged = data + }) + + proxyServer := httptest.NewServer(p) + defer proxyServer.Close() + + client := &http.Client{Transport: &http.Transport{Proxy: http.ProxyURL(mustParseURL(proxyServer.URL))}} + + req, _ := http.NewRequest("GET", backend.URL+"/test", nil) + req.Header.Set("X-Workspace-Slug", "sneaky-plum") + resp, err := client.Do(req) + if err != nil { + t.Fatalf("request: %v", err) + } + resp.Body.Close() + + // RequestHeaders should contain the original headers (before stripping) + if logged.RequestHeaders == nil { + t.Fatal("RequestHeaders is nil") + } + if got := logged.RequestHeaders.Get("X-Workspace-Slug"); got != "sneaky-plum" { + t.Errorf("RequestHeaders[X-Workspace-Slug] = %q, want sneaky-plum", got) + } +}