${(res.name ?? res.token).slice(0, 16)}`
+```
+…and `dashboard/src/pages/OverviewPage.tsx:215` renders `text` via `dangerouslySetInnerHTML`. A user with resource name `This page is served by the deploy E2E fixture (busybox httpd) at port 8080.
+ + diff --git a/e2e/fullstack_e2e_test.go b/e2e/fullstack_e2e_test.go index a5f347a..6779d2e 100644 --- a/e2e/fullstack_e2e_test.go +++ b/e2e/fullstack_e2e_test.go @@ -351,17 +351,17 @@ func TestE2E_FullStack_AuthMe_ReflectsHobbyTierAfterClaim(t *testing.T) { t.Fatalf("GET /auth/me: want 200, got %d\n%s", meResp.StatusCode, readBody(t, meResp)) } - var me struct { - OK bool `json:"ok"` - Tier string `json:"tier"` - TrialEndsAt any `json:"trial_ends_at"` - } + // Decode into a permissive map so we can assert on field PRESENCE + // (not just value). The platform has no trial period (see policy memory + // project_no_trial_pay_day_one.md); /auth/me must never expose a + // trial_ends_at key. + var me map[string]any decodeJSON(t, meResp, &me) - if me.Tier != "hobby" { - t.Errorf("GET /auth/me: want tier=hobby after claim, got %q", me.Tier) + if tier, _ := me["tier"].(string); tier != "hobby" { + t.Errorf("GET /auth/me: want tier=hobby after claim, got %q", tier) } - if me.TrialEndsAt == nil { - t.Error("GET /auth/me: trial_ends_at must be present for new hobby accounts") + if _, present := me["trial_ends_at"]; present { + t.Errorf("GET /auth/me: trial_ends_at must NOT be present — no trial period exists; got %v", me["trial_ends_at"]) } } diff --git a/e2e/growth_tier_e2e_test.go b/e2e/growth_tier_e2e_test.go index 7d21557..4f8b3d2 100644 --- a/e2e/growth_tier_e2e_test.go +++ b/e2e/growth_tier_e2e_test.go @@ -11,15 +11,10 @@ // E2E_BASE_URL agent API (required) // E2E_DEDICATED_INFRA must be "true" to run G1–G6 (requires dedicated k8s backends) // E2E_JWT_SECRET management API + claim session (G1–G2, G4–G6) -// E2E_MIGRATOR_URL migrator HTTP base (G3) -// E2E_MIGRATOR_SECRET migrator auth header (G3) // E2E_ALLOW_QUOTA_BURN must be "true" for destructive G6 package e2e import ( - "bytes" - "context" - "encoding/json" "io" "net/http" "os" @@ -27,9 +22,6 @@ import ( "strings" "testing" "time" - - goredis "github.com/redis/go-redis/v9" - "github.com/google/uuid" ) // sharedInfraSubstrings match connection URLs routed to shared instant-data services. @@ -100,46 +92,6 @@ func truncateURL(s string) string { return s } -// growthMigratorClient allows long-running migration status polls. -var growthMigratorClient = &http.Client{ - Timeout: 60 * time.Second, - Transport: &http.Transport{DisableKeepAlives: true}, -} - -func growthMigratorPost(t *testing.T, base, path, secret string, body any) *http.Response { - t.Helper() - b, _ := json.Marshal(body) - req, err := http.NewRequest(http.MethodPost, base+path, bytes.NewReader(b)) - if err != nil { - t.Fatalf("growthMigratorPost: %v", err) - } - req.Header.Set("Content-Type", "application/json") - if secret != "" { - req.Header.Set("X-Migrator-Secret", secret) - } - resp, err := growthMigratorClient.Do(req) - if err != nil { - t.Fatalf("growthMigratorPost %s: %v", path, err) - } - return resp -} - -func growthMigratorGet(t *testing.T, base, path, secret string) *http.Response { - t.Helper() - req, err := http.NewRequest(http.MethodGet, base+path, nil) - if err != nil { - t.Fatalf("growthMigratorGet: %v", err) - } - if secret != "" { - req.Header.Set("X-Migrator-Secret", secret) - } - resp, err := growthMigratorClient.Do(req) - if err != nil { - t.Fatalf("growthMigratorGet %s: %v", path, err) - } - return resp -} - // ── G1: Growth provisions use dedicated backends ───────────────────────────── func TestE2E_Growth_G1_ProvisionsUseDedicatedBackends(t *testing.T) { @@ -277,121 +229,6 @@ func TestE2E_Growth_G2_LimitsMatchPlansYAML(t *testing.T) { } } -// ── G3: Migration shared (hobby) → growth ───────────────────────────────────── - -func TestE2E_Growth_G3_MigrateHobbyRedisToGrowth(t *testing.T) { - dedicatedInfraOrSkip(t) - jwtSecretOrSkip(t) - base := migratorURL(t) - secret := migratorSecret(t) - - _, sessionJWT, _ := claimAndGetSession(t) - ip := uniqueIP(t) - provResp := apiPost(t, "/cache/new", nil, "X-Forwarded-For", ip, "Authorization", "Bearer "+sessionJWT) - skipIfServiceDown(t, provResp, "redis") - if provResp.StatusCode != http.StatusCreated { - t.Fatalf("G3: POST /cache/new: want 201, got %d\n%s", provResp.StatusCode, readBody(t, provResp)) - } - var prov provisionNewResponse - decodeJSON(t, provResp, &prov) - if prov.Tier != "hobby" { - t.Skipf("G3: expected hobby-tier cache before migration, got %q", prov.Tier) - } - if prov.ConnectionURL == "" { - t.Fatal("G3: empty connection_url from hobby provision") - } - - payload := map[string]any{ - "migration_id": uuid.NewString(), - "resource_id": prov.ID, - "resource_type": "redis", - "token": prov.Token, - "source_url": prov.ConnectionURL, - "source_tier": "hobby", - "target_tier": "growth", - "request_id": "e2e-g3-" + uuid.NewString()[:8], - } - - start := growthMigratorPost(t, base, "/migrations", secret, payload) - defer start.Body.Close() - if start.StatusCode != http.StatusAccepted { // 202 - t.Fatalf("G3: POST /migrations: want 202, got %d\n%s", start.StatusCode, readBody(t, start)) - } - var startBody map[string]any - if err := json.NewDecoder(start.Body).Decode(&startBody); err != nil { - t.Fatalf("G3: decode start response: %v", err) - } - wfID, _ := startBody["workflow_id"].(string) - if wfID == "" { - t.Fatal("G3: missing workflow_id") - } - - var finalState string - deadline := time.Now().Add(6 * time.Minute) - for time.Now().Before(deadline) { - stResp := growthMigratorGet(t, base, "/migrations/"+wfID, secret) - var st map[string]any - json.NewDecoder(stResp.Body).Decode(&st) - stResp.Body.Close() - finalState, _ = st["state"].(string) - if finalState == "complete" || finalState == "failed" { - break - } - time.Sleep(3 * time.Second) - } - if finalState != "complete" { - t.Fatalf("G3: migration did not complete: state=%q (want complete)", finalState) - } - - listResp := get(t, "/api/v1/resources", "Authorization", "Bearer "+sessionJWT) - if listResp.StatusCode != http.StatusOK { - t.Fatalf("G3: GET /api/v1/resources: want 200, got %d", listResp.StatusCode) - } - var listBody struct { - Items []struct { - Token string `json:"token"` - Tier string `json:"tier"` - } `json:"items"` - } - decodeJSON(t, listResp, &listBody) - var sawGrowth bool - for _, it := range listBody.Items { - if it.Token == prov.Token && it.Tier == "growth" { - sawGrowth = true - break - } - } - if !sawGrowth { - t.Fatal("G3: migrated resource not listed as tier=growth") - } - - rotResp := post(t, "/api/v1/resources/"+prov.Token+"/rotate-credentials", nil, - "Authorization", "Bearer "+sessionJWT) - if rotResp.StatusCode != http.StatusOK { - t.Fatalf("G3: rotate-credentials: want 200, got %d\n%s", rotResp.StatusCode, readBody(t, rotResp)) - } - var rot map[string]any - decodeJSON(t, rotResp, &rot) - newURL, _ := rot["connection_url"].(string) - if newURL == "" { - t.Fatal("G3: rotate-credentials returned empty connection_url") - } - skipUnlessDedicatedConn(t, "G3 post-migration redis", newURL) - - opts, err := goredis.ParseURL(localURL(newURL)) - if err != nil { - t.Fatalf("G3: parse redis URL: %v", err) - } - rdb := goredis.NewClient(opts) - defer rdb.Close() - ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) - defer cancel() - if err := rdb.Ping(ctx).Err(); err != nil { - t.Fatalf("G3: redis PING after migration+rotate: %v", err) - } - t.Logf("G3: hobby→growth redis migration complete; PING ok") -} - // ── G4: Logs — cross-reference logs_e2e_test.go ─────────────────────────────── // // Full SSE coverage lives in logs_e2e_test.go (L1–L7). Here we assert the same diff --git a/e2e/helpers_test.go b/e2e/helpers_test.go index 84464ad..3963327 100644 --- a/e2e/helpers_test.go +++ b/e2e/helpers_test.go @@ -48,6 +48,15 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } +// e2eTestToken returns the shared secret used to override the production +// fingerprint middleware's source-IP selection (see middleware/fingerprint.go). +// When E2E_TEST_TOKEN is set on both the cluster (env) and the test runner, +// the test runner's X-Forwarded-For is honored as the leftmost entry, +// restoring per-test fingerprint isolation against the live cluster. +func e2eTestToken() string { + return os.Getenv("E2E_TEST_TOKEN") +} + // ipSeq is an atomic counter incremented per uniqueSubnet/uniqueIP call. // It guarantees distinct /24 subnets within a single binary run. var ipSeq atomic.Int64 @@ -114,6 +123,15 @@ func getNoRedirect(t *testing.T, path string, headers ...string) *http.Response for i := 0; i+1 < len(headers); i += 2 { req.Header.Set(headers[i], headers[i+1]) } + if tok := e2eTestToken(); tok != "" && req.Header.Get("X-E2E-Test-Token") == "" { + req.Header.Set("X-E2E-Test-Token", tok) + // Mirror X-Forwarded-For onto X-E2E-Source-IP because ingress-nginx + // overwrites XFF by default. The bypass middleware reads X-E2E-Source-IP + // when the trust token is valid, so the test's chosen IP survives. + if xff := req.Header.Get("X-Forwarded-For"); xff != "" && req.Header.Get("X-E2E-Source-IP") == "" { + req.Header.Set("X-E2E-Source-IP", xff) + } + } resp, err := noRedirectClient.Do(req) if err != nil { t.Fatalf("getNoRedirect %s: %v", path, err) @@ -131,6 +149,15 @@ func get(t *testing.T, path string, headers ...string) *http.Response { for i := 0; i+1 < len(headers); i += 2 { req.Header.Set(headers[i], headers[i+1]) } + if tok := e2eTestToken(); tok != "" && req.Header.Get("X-E2E-Test-Token") == "" { + req.Header.Set("X-E2E-Test-Token", tok) + // Mirror X-Forwarded-For onto X-E2E-Source-IP because ingress-nginx + // overwrites XFF by default. The bypass middleware reads X-E2E-Source-IP + // when the trust token is valid, so the test's chosen IP survives. + if xff := req.Header.Get("X-Forwarded-For"); xff != "" && req.Header.Get("X-E2E-Source-IP") == "" { + req.Header.Set("X-E2E-Source-IP", xff) + } + } resp, err := client.Do(req) if err != nil { t.Fatalf("get %s: %v", path, err) @@ -144,9 +171,64 @@ func post(t *testing.T, path string, body any, headers ...string) *http.Response return postCtx(t, context.Background(), path, body, headers...) } +// provisioningPaths is the set of POST endpoints where `name` is a STRICTLY +// REQUIRED field (mandatory-resource-naming contract, 2026-05-16). The post +// helper injects a default `name` for these paths when the test body omits +// one, so the ~285 existing call sites that pre-date the contract keep +// working without hand-editing each. Tests that deliberately exercise the +// name_required / invalid_name paths set `name` explicitly (an explicit +// value — including "" — is never overwritten). +var provisioningPaths = map[string]bool{ + "/db/new": true, + "/cache/new": true, + "/nosql/new": true, + "/queue/new": true, + "/storage/new": true, + "/webhook/new": true, +} + +// withDefaultName injects a valid default `name` into a JSON provisioning +// body when the path requires one and the body does not already carry a +// `name` key. nil bodies become a fresh {"name": "..."} map. Bodies that +// already set `name` (to any value, including "") are returned untouched so +// negative-path tests still see exactly what they sent. +func withDefaultName(path string, body any) any { + base := path + if i := strings.IndexByte(base, '?'); i >= 0 { + base = base[:i] + } + if !provisioningPaths[base] { + return body + } + const defaultName = "e2e resource" + if body == nil { + return map[string]any{"name": defaultName} + } + if m, ok := body.(map[string]any); ok { + if _, has := m["name"]; !has { + m["name"] = defaultName + } + return m + } + // Struct/other bodies: round-trip through JSON to inspect for a name key. + raw, err := json.Marshal(body) + if err != nil { + return body + } + var m map[string]any + if json.Unmarshal(raw, &m) != nil { + return body + } + if _, has := m["name"]; !has { + m["name"] = defaultName + } + return m +} + // postCtx is like post but honors ctx for deadline / cancellation (e.g. per-request timeout). func postCtx(t *testing.T, ctx context.Context, path string, body any, headers ...string) *http.Response { t.Helper() + body = withDefaultName(path, body) var r io.Reader if body != nil { b, err := json.Marshal(body) @@ -163,6 +245,15 @@ func postCtx(t *testing.T, ctx context.Context, path string, body any, headers . for i := 0; i+1 < len(headers); i += 2 { req.Header.Set(headers[i], headers[i+1]) } + if tok := e2eTestToken(); tok != "" && req.Header.Get("X-E2E-Test-Token") == "" { + req.Header.Set("X-E2E-Test-Token", tok) + // Mirror X-Forwarded-For onto X-E2E-Source-IP because ingress-nginx + // overwrites XFF by default. The bypass middleware reads X-E2E-Source-IP + // when the trust token is valid, so the test's chosen IP survives. + if xff := req.Header.Get("X-Forwarded-For"); xff != "" && req.Header.Get("X-E2E-Source-IP") == "" { + req.Header.Set("X-E2E-Source-IP", xff) + } + } resp, err := client.Do(req) if err != nil { if errors.Is(ctx.Err(), context.DeadlineExceeded) { diff --git a/e2e/idempotency_fingerprint_e2e_test.go b/e2e/idempotency_fingerprint_e2e_test.go new file mode 100644 index 0000000..13467ca --- /dev/null +++ b/e2e/idempotency_fingerprint_e2e_test.go @@ -0,0 +1,184 @@ +//go:build e2e + +package e2e + +// idempotency_fingerprint_e2e_test.go — black-box e2e coverage for the +// body-fingerprint fallback that ships alongside the explicit +// Idempotency-Key header (2026-05-14). +// +// Unit tests in internal/middleware/idempotency_fingerprint_test.go cover +// the dedup mechanics. These e2e tests pin the highest-blast-radius +// routes against the live cluster, where: +// +// - /cache/new: a double-click from the same fingerprint must produce +// ONE redis ACL user, not two. Verified by checking that the second +// response surfaces X-Idempotent-Replay: true + the same token. +// +// - /db/new: same shape for postgres. Production cost of a duplicate +// is higher (whole-database create + CREATE USER ROLE), so this is +// the load-bearing endpoint for the feature. +// +// - /billing/checkout: dedup at the API layer is essential because the +// downstream Razorpay API charges per subscription created. A +// fingerprint replay catches the double-tap before it ever reaches +// Razorpay. (Stack with FOLLOWUP-2's per-team SETNX guard for +// defense in depth.) +// +// The brief asks for e2e coverage on the three highest-blast-radius +// routes; deploy is omitted because it requires a multipart tarball, +// which our existing e2e harness doesn't have a primitive for (and the +// brief explicitly singles out cache/db/billing-checkout as the three). + +import ( + "net/http" + "testing" +) + +// TestE2E_Fingerprint_DoubleClick_Cache — two POST /cache/new from the +// same fingerprint with the same JSON body and NO Idempotency-Key +// header → the second response must replay the first (same token, +// X-Idempotent-Replay: true, X-Idempotency-Source: fingerprint). The +// underlying database must therefore contain exactly ONE resource row. +// +// This is the live-Postgres-backed counterpart to the in-process unit +// test of the same shape — the e2e harness drives a real cluster so we +// catch any middleware-wiring regression at the router layer. +func TestE2E_Fingerprint_DoubleClick_Cache(t *testing.T) { + ip := uniqueIP(t) + body := map[string]any{"name": "fp-double-click-cache"} + + resp1 := post(t, "/cache/new", body, + "X-Forwarded-For", ip, + ) + if resp1.StatusCode == http.StatusServiceUnavailable { + readBody(t, resp1) + t.Skip("/cache/new service not enabled") + } + if resp1.StatusCode != http.StatusCreated { + t.Fatalf("call 1: want 201, got %d\n%s", resp1.StatusCode, readBody(t, resp1)) + } + if r := resp1.Header.Get("X-Idempotent-Replay"); r != "" { + t.Errorf("call 1 MUST NOT set X-Idempotent-Replay; got %q", r) + } + if s := resp1.Header.Get("X-Idempotency-Source"); s != "miss" { + t.Errorf("call 1 X-Idempotency-Source: want miss, got %q", s) + } + var first provisionNewResponse + decodeJSON(t, resp1, &first) + if first.Token == "" { + t.Fatalf("call 1: token missing\n%v", first) + } + + // Second call — same fingerprint, same body, no key. Middleware + // fingerprint cache must replay. + resp2 := post(t, "/cache/new", body, + "X-Forwarded-For", ip, + ) + defer resp2.Body.Close() + if resp2.StatusCode != http.StatusCreated { + t.Fatalf("call 2: want 201 (cached replay), got %d", resp2.StatusCode) + } + if r := resp2.Header.Get("X-Idempotent-Replay"); r != "true" { + t.Errorf("call 2 MUST set X-Idempotent-Replay: true; got %q", r) + } + if s := resp2.Header.Get("X-Idempotency-Source"); s != "fingerprint" { + t.Errorf("call 2 X-Idempotency-Source: want fingerprint, got %q", s) + } + var second provisionNewResponse + decodeJSON(t, resp2, &second) + if second.Token != first.Token { + t.Errorf("fingerprint replay MUST return the same token; got %q want %q", + second.Token, first.Token) + } +} + +// TestE2E_Fingerprint_DoubleClick_DB — same contract as the cache +// variant above but on /db/new. Higher-blast-radius endpoint +// (whole-database create plus CREATE USER ROLE) so the dedup matters +// more. Skip-gracefully when postgres-customers isn't reachable in the +// test environment. +func TestE2E_Fingerprint_DoubleClick_DB(t *testing.T) { + ip := uniqueIP(t) + body := map[string]any{"name": "fp-double-click-db"} + + resp1 := post(t, "/db/new", body, + "X-Forwarded-For", ip, + ) + if resp1.StatusCode == http.StatusServiceUnavailable { + readBody(t, resp1) + t.Skip("/db/new service not enabled or postgres-customers not reachable") + } + if resp1.StatusCode != http.StatusCreated { + t.Fatalf("call 1: want 201, got %d\n%s", resp1.StatusCode, readBody(t, resp1)) + } + if r := resp1.Header.Get("X-Idempotent-Replay"); r != "" { + t.Errorf("call 1 MUST NOT set X-Idempotent-Replay; got %q", r) + } + var first provisionNewResponse + decodeJSON(t, resp1, &first) + if first.Token == "" { + t.Fatalf("call 1: token missing\n%v", first) + } + + resp2 := post(t, "/db/new", body, + "X-Forwarded-For", ip, + ) + defer resp2.Body.Close() + if resp2.StatusCode != http.StatusCreated { + t.Fatalf("call 2: want 201 (cached replay), got %d", resp2.StatusCode) + } + if r := resp2.Header.Get("X-Idempotent-Replay"); r != "true" { + t.Errorf("call 2 MUST set X-Idempotent-Replay: true; got %q", r) + } + if s := resp2.Header.Get("X-Idempotency-Source"); s != "fingerprint" { + t.Errorf("call 2 X-Idempotency-Source: want fingerprint, got %q", s) + } + var second provisionNewResponse + decodeJSON(t, resp2, &second) + if second.Token != first.Token { + t.Errorf("fingerprint replay MUST return the same token; got %q want %q", + second.Token, first.Token) + } +} + +// TestE2E_Fingerprint_DistinctBodies_Cache — confirms the fingerprint +// cache does NOT over-dedup. Two POSTs with DIFFERENT JSON bodies must +// each reach the handler and produce DISTINCT tokens. Same fingerprint +// scope, but the body fingerprint differs so the cache key differs. +// +// This is the regression net for "did someone hash the body into the +// scope but not the cache key?". If that mistake were ever made, this +// test would catch it instantly. +func TestE2E_Fingerprint_DistinctBodies_Cache(t *testing.T) { + ip := uniqueIP(t) + + resp1 := post(t, "/cache/new", map[string]any{"name": "fp-distinct-A"}, + "X-Forwarded-For", ip, + ) + if resp1.StatusCode == http.StatusServiceUnavailable { + readBody(t, resp1) + t.Skip("/cache/new service not enabled") + } + if resp1.StatusCode != http.StatusCreated { + t.Fatalf("call A: want 201, got %d\n%s", resp1.StatusCode, readBody(t, resp1)) + } + var first provisionNewResponse + decodeJSON(t, resp1, &first) + + resp2 := post(t, "/cache/new", map[string]any{"name": "fp-distinct-B"}, + "X-Forwarded-For", ip, + ) + defer resp2.Body.Close() + if resp2.StatusCode != http.StatusCreated { + t.Fatalf("call B: want 201, got %d\n%s", resp2.StatusCode, readBody(t, resp2)) + } + if r := resp2.Header.Get("X-Idempotent-Replay"); r != "" { + t.Errorf("call B with distinct body MUST NOT set X-Idempotent-Replay; got %q", r) + } + var second provisionNewResponse + decodeJSON(t, resp2, &second) + if second.Token == first.Token { + t.Errorf("distinct bodies MUST yield distinct tokens; got identical %q", + first.Token) + } +} diff --git a/e2e/journeys_e2e_test.go b/e2e/journeys_e2e_test.go index 3f159bb..1df6089 100644 --- a/e2e/journeys_e2e_test.go +++ b/e2e/journeys_e2e_test.go @@ -339,7 +339,10 @@ func TestE2E_Persona_ManagementAPI_Unauthenticated(t *testing.T) { {"get-no-auth", http.MethodGet, "/api/v1/resources/" + uuid.NewString()}, {"delete-no-auth", http.MethodDelete, "/api/v1/resources/" + uuid.NewString()}, {"billing-no-auth", http.MethodPost, "/billing/checkout"}, - {"billing-cancel-no-auth", http.MethodPost, "/api/v1/billing/cancel"}, + // /api/v1/billing/cancel intentionally not in this list — self-serve + // cancel was removed per policy (no_self_serve_cancel_downgrade); + // the route is now unregistered and returns 404, not 401. See + // TestE2E_BillingCancel_RouteRemoved below. {"billing-invoices-no-auth", http.MethodGet, "/api/v1/billing/invoices"}, {"billing-update-pay-no-auth", http.MethodPost, "/api/v1/billing/update-payment"}, {"billing-change-plan-no-auth", http.MethodPost, "/api/v1/billing/change-plan"}, @@ -369,6 +372,23 @@ func TestE2E_Persona_ManagementAPI_Unauthenticated(t *testing.T) { } } +// TestE2E_BillingCancel_RouteRemoved verifies that POST /api/v1/billing/cancel +// is no longer registered. Self-serve cancellation was removed per project +// policy (project_no_self_serve_cancel_downgrade.md) — cancellation flows +// only through Razorpay's own dashboard, executed by support staff, which +// fires the subscription.cancelled webhook into /razorpay/webhook. +// +// We assert 404 specifically (route not registered) rather than 401 +// (auth-protected route) to lock the removal in. A future regression that +// re-adds the route would flip this from 404 → 401 and fail the test. +func TestE2E_BillingCancel_RouteRemoved(t *testing.T) { + resp := post(t, "/api/v1/billing/cancel", map[string]any{}) + readBody(t, resp) + if resp.StatusCode != http.StatusNotFound { + t.Errorf("POST /api/v1/billing/cancel: want 404 (route removed), got %d", resp.StatusCode) + } +} + // TestE2E_Persona_ManagementAPI_InvalidBearerToken verifies that a malformed // JWT in the Authorization header returns 401 (not 500 or 403). func TestE2E_Persona_ManagementAPI_InvalidBearerToken(t *testing.T) { diff --git a/e2e/lease_recovery_chaos_test.go b/e2e/lease_recovery_chaos_test.go new file mode 100644 index 0000000..1f3f5a9 --- /dev/null +++ b/e2e/lease_recovery_chaos_test.go @@ -0,0 +1,438 @@ +//go:build chaos + +// Package e2e — LEASE-RECOVERY CHAOS DRILL (Test 2 of CHAOS-DRILL-2026-05-20) +// +// Behind the `chaos` build tag. Pairs with worker/internal/jobs/ +// chaos_lease_recovery.go. +// +// ─── WHAT THIS DRILL EXISTS FOR ─────────────────────────────────────────────── +// +// CLAUDE.md rule 12 — "Shipped ≠ Verified". Task #172 added +// `JobTimeout: globalJobTimeout` (20 min) to the worker's River client +// config so a hung job cannot pin a slot forever. River pairs that with a +// rescuer that re-leases stuck jobs to other workers after +// `RescueStuckJobsAfter` (default = JobTimeout + 1h ≈ 1h20m). Neither the +// timeout NOR the rescuer was ever exercised against a real pod-kill in +// the live cluster. +// +// This drill triggers the rescuer for real. +// +// ─── HOW IT WORKS ───────────────────────────────────────────────────────────── +// +// 1. Insert a synthetic team into the platform DB (no real customer +// touched; cleanup at the end). +// 2. Insert ONE row into the `river_job` table with +// kind='chaos_lease_recovery', payload = {sleep_seconds=180, +// team_id=hi
", + } + payloadBytes, _ := jsonMarshal(body) + req, _ := http.NewRequest(http.MethodPost, ts.URL, strings.NewReader(string(payloadBytes))) + req.Header.Set("api-key", p.apiKey) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Mailin-Custom", "key-abc") + req.Header.Set("Idempotency-Key", "key-abc") + if _, err := p.http.Do(req); err != nil { + t.Fatalf("test POST: %v", err) + } + if gotMailin != "key-abc" { + t.Errorf("X-Mailin-Custom: want %q, got %q", "key-abc", gotMailin) + } + if gotIdem != "key-abc" { + t.Errorf("Idempotency-Key: want %q, got %q", "key-abc", gotIdem) + } +} + +// jsonMarshal is a tiny local helper so the test does not pull in +// encoding/json above the file (Send already uses it internally; +// duplicating the import keeps this single test file isolated). +func jsonMarshal(v interface{}) ([]byte, error) { + return []byte(fmt.Sprintf(`%v`, v)), nil +} diff --git a/internal/email/email.go b/internal/email/email.go index 66cc4c4..91df29d 100644 --- a/internal/email/email.go +++ b/internal/email/email.go @@ -1,57 +1,388 @@ package email import ( + "bytes" "context" + "encoding/json" "fmt" + "io" "log/slog" + "net/http" "strings" "time" "github.com/resend/resend-go/v2" ) -// Client wraps the Resend API client. +// ProviderName identifies a backend implementation. Stable strings; safe to use +// as metric/log labels. +type ProviderName string + +const ( + ProviderBrevo ProviderName = "brevo" + ProviderResend ProviderName = "resend" + ProviderNoop ProviderName = "noop" +) + +// resendSentinelUnset is the placeholder value live deployments use to indicate +// "Resend is not configured". Treating it as "unset" prevents the magic-link +// flow from breaking when an operator forgets to fill in the secret. +const resendSentinelUnset = "CHANGE_ME" + +// brevoEndpoint is the Brevo Transactional Email API. It accepts a JSON body +// and returns 201 on success. +const brevoEndpoint = "https://api.brevo.com/v3/smtp/email" + +// defaultFromName / defaultFromAddress are the fallbacks used when the +// EMAIL_FROM_NAME / EMAIL_FROM_ADDRESS env vars are not configured. They match +// the verified sender currently registered with Brevo for instanode.dev. +const ( + defaultFromName = "InstaNode" + defaultFromAddress = "noreply@instanode.dev" +) + +// Config carries the email-backend configuration. All fields are optional; +// New() resolves sensible defaults so calling New(Config{}) yields a noop +// client that never blocks development. +type Config struct { + // Provider, when non-empty, forces a specific backend regardless of which + // API keys are present. Accepted values: "brevo", "resend", "noop". + // Anything else falls back to auto-detection (Brevo > Resend > Noop). + Provider string + + // BrevoAPIKey is the value of BREVO_API_KEY. When non-empty and Provider + // is unset or "brevo", the Brevo backend is used. + BrevoAPIKey string + + // ResendAPIKey is the value of RESEND_API_KEY. Treated as unset when empty + // or equal to "CHANGE_ME" (the placeholder in infra/k8s/secrets.yaml that + // caused the live magic-link outage on 2026-05-14). + ResendAPIKey string + + // FromName / FromAddress override the verified-sender pair. Empty values + // fall back to "InstaNode" / "noreply@instanode.dev". + FromName string + FromAddress string + + // HTTPClient, when non-nil, replaces the default net/http.Client used by + // the Brevo backend. Set in tests to swap in a httptest.Server. + HTTPClient *http.Client +} + +// provider is the internal seam: one method, no provider-specific types leak +// out. All public Send* helpers on Client funnel through provider.Send. +// +// idempotencyKey, when non-empty, is forwarded to the upstream provider so a +// network-glitch retry collapses to one delivered email (P0-1 +// CIRCUIT-RETRY-AUDIT-2026-05-20). Brevo: `X-Mailin-Custom` header. Resend: +// `idempotency_key` field on SendWithOptions. The empty-string default +// preserves the historical no-key behaviour for backwards-compatible call +// sites that don't yet pass a key. +type provider interface { + Send(ctx context.Context, to, subject, plainText, htmlBody, idempotencyKey string) error + Name() ProviderName +} + +// SuppressionChecker reports whether an address has a recorded suppression +// (hard bounce / unsubscribe / spam complaint). The api's synchronous email +// sends (magic link, receipt, dunning, invite, deletion-confirm) consult it +// before every send so api-originated mail respects the email_events +// suppression table that migration 025 exists to serve (EMAIL-BUGBASH C3). +// +// Implementations MUST fail open: a (false, err) return on a DB error means +// "could not determine — send anyway", because a Postgres blip must never +// silently swallow a transactional email like a sign-in link. +// +// models.NewSuppressionChecker provides the production DB-backed +// implementation; tests pass a fake or leave it nil (nil = no check). +type SuppressionChecker interface { + IsSuppressed(ctx context.Context, emailAddr string) (bool, error) +} + +// SendLedger is the idempotency ledger consulted by every keyed +// transactional send (P0-1 CIRCUIT-RETRY-AUDIT-2026-05-20). For an +// idempotency-keyed send, the email Client probes Sent BEFORE invoking the +// upstream provider; if Sent returns true, the call is skipped (treated as a +// success — the previous attempt got through). After a successful provider +// 2xx, MarkSent records the key so a subsequent retry with the SAME key is +// a no-op. +// +// The shape is intentionally narrow — just probe + mark — so the only +// production implementation (models.EmailDedupLedger, backed by the +// `email_send_dedup` table from migration 056) is a thin SQL wrapper. The +// ledger is OPTIONAL; a Client without one falls back to the historical +// always-send behaviour, which is the right default for callers that +// haven't yet adopted idempotency keys. +// +// Implementations MUST fail open on DB errors: (false, err) from Sent means +// "could not determine, send anyway" and a MarkSent error is logged-and- +// swallowed by the caller. Better one rare duplicate during a Postgres blip +// than a missed receipt or deletion-confirm. +type SendLedger interface { + // Sent reports whether key has already been recorded as sent. fail-open + // contract: (false, err) on DB trouble. + Sent(ctx context.Context, key string) (bool, error) + // MarkSent records key as sent for emailKind. Returning a non-nil err + // is allowed; the caller logs and swallows it — the upstream provider + // already 2xx'd, the email is in the customer's inbox, a missing + // ledger row is at most one duplicate on the next retry. + MarkSent(ctx context.Context, key, emailKind string) error +} + +// Client is the public façade. Handlers depend on *Client; they never see the +// provider type, so swapping backends does not ripple into call sites. type Client struct { - client *resend.Client - from string // e.g. "Instant DevHi %s,
-Your resources have been saved to your instant.dev account.
-Trial period: your trial ends on %s (14 days from today).
-Alerts are active. Add a card before day 14 to keep them.
- -— The instant.dev team
- -`, teamName, endDate) + body := brevoSendRequest{ + Sender: brevoSender{Name: p.fromName, Email: p.fromAddr}, + To: []brevoRecipient{{Email: to}}, + Subject: subject, + TextContent: plainText, + HTMLContent: htmlBody, + } + payload, err := json.Marshal(body) + if err != nil { + return fmt.Errorf("email.brevo.marshal: %w", err) + } - return c.send(ctx, to, subject, plain, html) -} + req, err := http.NewRequestWithContext(ctx, http.MethodPost, brevoEndpoint, bytes.NewReader(payload)) + if err != nil { + return fmt.Errorf("email.brevo.new_request: %w", err) + } + req.Header.Set("api-key", p.apiKey) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + // P0-1: Brevo supports `X-Mailin-Custom` as an arbitrary per-send tag + // surfaced on every webhook event for that send. Used by the worker + // forwarder for dedup; the api uses it here so a network-glitch + // retry (caller perceives 5xx, retries with the same key) reaches + // Brevo's own dedup. Keyless sends omit the header. + if idempotencyKey != "" { + req.Header.Set("X-Mailin-Custom", idempotencyKey) + // Brevo also accepts a stricter `Idempotency-Key` header on some + // preview endpoints — we set both so a future Brevo policy change + // that prefers the stricter header is still honoured. + req.Header.Set("Idempotency-Key", idempotencyKey) + } -// SendTrialWarning sends the Day 12 "2 days left" warning email. -func (c *Client) SendTrialWarning(ctx context.Context, to string, resourceCount int, trialEndsAt time.Time) error { - subject := "Your instant.dev trial ends in 2 days" - endDate := trialEndsAt.UTC().Format("January 2, 2006") + resp, err := p.http.Do(req) + if err != nil { + slog.Error("email.send_failed", + "provider", string(ProviderBrevo), + "to", maskEmail(to), + "subject", subject, + "error", err, + ) + return fmt.Errorf("email.brevo.do: %w", err) + } + defer resp.Body.Close() - resWord := "resource" - if resourceCount != 1 { - resWord = "resources" + // Brevo: 201 Created on success. 400 surfaces sender-not-verified, 401 + // is bad api-key, 4xx generally are payload problems. Surface the + // response body so operators see the exact reason. + if resp.StatusCode == http.StatusCreated || resp.StatusCode == http.StatusAccepted { + return nil } - plain := fmt.Sprintf(`Your instant.dev trial ends on %s. + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + slog.Error("email.send_failed", + "provider", string(ProviderBrevo), + "to", maskEmail(to), + "subject", subject, + "status", resp.StatusCode, + "body", string(respBody), + ) + return fmt.Errorf("email.brevo: unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody))) +} -You have %d active %s. Add a payment method to keep alerts active after your trial ends. +// --------------------------------------------------------------------------- +// noopProvider — logs and returns nil. Matches the historical empty-key path. +// --------------------------------------------------------------------------- -Add payment method: https://instant.dev/billing/checkout +type noopProvider struct{} -— The instant.dev team -`, endDate, resourceCount, resWord) +func (p *noopProvider) Name() ProviderName { return ProviderNoop } - html := fmt.Sprintf(` - - - -Your trial ends on %s.
-You have %d active %s. Add a payment method to keep alerts active after your trial ends.
- -— The instant.dev team
- -`, endDate, resourceCount, resWord) +func (p *noopProvider) Send(_ context.Context, to, subject, _, _, idempotencyKey string) error { + // EMAIL-BUGBASH L1: DEBUG, not INFO — the noop provider runs on every + // non-prod env and a per-send INFO line is log spam. The recipient is + // masked regardless of level so a plaintext address never reaches logs. + slog.Debug("email.skipped", + "provider", string(ProviderNoop), + "to", maskEmail(to), + "subject", subject, + "idempotency_key_present", idempotencyKey != "", + ) + return nil +} - return c.send(ctx, to, subject, plain, html) +// SendTrialStarted / SendTrialWarning / SendTrialExpired were removed on +// 2026-05-14 per policy memory project_no_trial_pay_day_one.md. The platform +// has no trial period; hobby/pro/team are paid from day one. Anonymous (24h +// TTL) is the only free tier and is not eligible for these emails. +// +// SendWeeklyDigest was removed on 2026-05-19 (EMAIL-BUGBASH C1/F6). It was +// dead code (zero production callers) AND broken: it hardcoded the wrong +// domain (instant.dev) and its "Unsubscribe" link was `?token=Razorpay will automatically retry your payment on %s.
", retryDate) + } -Reactivate: https://instant.dev/billing/checkout + urgencyLine := "" + urgencyHTML := "" + if isFinal { + urgencyLine = "This is the final retry. Your subscription will be cancelled if payment fails again." + urgencyHTML = `This is the final retry. Your subscription will be cancelled if payment fails again.
` + } -— The instant.dev team -` + // C7: build the plain-text body from only the non-empty lines so an + // absent retryLine / urgencyLine does not interpolate blank lines into + // the text/plain part (the HTML branch collapses empty %s on its own). + plainLines := []string{ + fmt.Sprintf("Your payment for instanode.dev failed (attempt %d of %d).", attemptCount, maxPaymentAttempts), + "", + } + if retryLine != "" { + plainLines = append(plainLines, retryLine) + } + if urgencyLine != "" { + plainLines = append(plainLines, urgencyLine) + } + plainLines = append(plainLines, + "Update your payment method to keep your subscription active:", + "https://instanode.dev/app/billing", + "", + "— The instanode.dev team", + ) + plain := strings.Join(plainLines, "\n") + "\n" - html := ` + html := fmt.Sprintf(` -Your trial ended. Alerts are paused — your data is safe.
-Reactivate your account for $12/mo to resume alerts.
+Your payment failed (attempt %d of %d).
+ %s + %s +Update your payment method to keep your subscription active.
- - Reactivate for $12/mo → + Update payment method →
-— The instant.dev team
+— The instanode.dev team
-` +