From 0f7a09918a86c9efe7e0a5c086ba1254f86ba634 Mon Sep 17 00:00:00 2001 From: Manas Srivastava Date: Fri, 29 May 2026 16:40:08 +0530 Subject: [PATCH 1/4] fix(auth): close CSRF on /auth/email/start + per-IP magic-link rate-limit (P0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auth P0 chain found by QA 2026-05-29. Two findings on the magic-link abuse surface, ship together. Findings closed in this PR: AUTH-163 (P0): /auth/email/start accepted Content-Type: application/ x-www-form-urlencoded and inserted a magic_links row. Combined with no Origin/Referer enforcement that was a textbook CSRF primitive: a malicious site could
to spam any arbitrary email with magic-links from the victim's IP. Fix: require Content-Type: application/json (charset suffix permitted). Form-encoded bodies are rejected with 400 invalid_content_type BEFORE the body is parsed or the DB is touched. The legitimate dashboard / CLI / agent flows all set application/json — the only callers that send urlencoded bodies are third-party-origin browser forms, which is exactly the attack surface we want to close. AUTH-107 / AUTH-097 (P0/P1): no CAPTCHA on /auth/email/start AND no per-IP magic-link rate-limit. Without CAPTCHA (deferred — the Brevo sender gap (project memory project_brevo_sender_not_validated) is the real blocker, and CAPTCHA on a non-deliverable email path just frustrates legitimate users), per-IP limit is the load-bearing abuse-defence. Existing per-email limit (5/hr/email) is trivially bypassed by rotating the email — an attacker can spam any number of 3rd-party addresses from a single IP. Fix: per-IP rate limit of magicLinkPerIPRateLimit (5) per magicLinkPerIPRateLimitWindow (1h), keyed by SHA-256 of c.IP() in Redis under magicLinkPerIPRLKeyPrefix ("ml:ip:rl"). Runs AFTER email validation (so a typo doesn't burn budget) but BEFORE the per-email limit (so it fires even when the per-email budget is fresh). Fail- open on Redis error per CLAUDE.md convention 1. Limited path returns 202 — identical to the success path — so an attacker gains no enumeration signal. Operator visibility via metrics.MagicLinkEmailRateLimited counter + magic_link.start. ip_rate_limited structured log. Regression tests (each reproduces the original exploit and asserts the fix blocks it): TestAuthEmailStart_RejectsFormUrlencoded TestAuthEmailStart_AcceptsJSONWithCharset (guardrail — legit flow) TestAuthEmailStart_PerIPRateLimit (counter-level + handler-level) Rule-22 surface checklist: - api/internal/handlers/magic_link.go (handler) - OpenAPI: handlers/openapi.go not updated (no new public envelope — invalid_content_type follows the existing error-code envelope shape; per-IP-limited still returns the existing 202 success shape). Follow-up: surface invalid_content_type in codeToAgentAction so agents get the right "set Content-Type: application/json" prose without consulting the spec. Same polish PR as the PR-1 reauth_required entry. - content/llms.txt not impacted - instanode-web pricing not impacted - NR alert: bump ml:ip:rl rate-limit counter into the existing magic_link.email_rate_limited alert (counter reused). Follow-up to split into per-IP and per-email tiles for operator clarity. Live-verify (rule 14) and curl evidence (per finding) attached in PR body. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/handlers/auth_csrf_rl_authp0_test.go | 191 ++++++++++++++++++ internal/handlers/magic_link.go | 96 +++++++++ 2 files changed, 287 insertions(+) create mode 100644 internal/handlers/auth_csrf_rl_authp0_test.go diff --git a/internal/handlers/auth_csrf_rl_authp0_test.go b/internal/handlers/auth_csrf_rl_authp0_test.go new file mode 100644 index 0000000..1de8c9c --- /dev/null +++ b/internal/handlers/auth_csrf_rl_authp0_test.go @@ -0,0 +1,191 @@ +package handlers + +// auth_csrf_rl_authp0_test.go — regression tests for AUTH-163, AUTH-107, +// AUTH-097 shipped 2026-05-29. +// +// Lives in package handlers so it can use the same in-package +// recordingMailer / setupCoverageRedis fixtures established by +// magic_link_coverage_test.go and cli_auth_coverage_test.go. + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/gofiber/fiber/v2" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "instant.dev/internal/config" + "instant.dev/internal/middleware" +) + +// csrfRLApp wires the magic-link Start route with an isolated Redis +// connection so the per-IP counter can be observed independently from +// other tests. setupCoverageRedis flushes-on-cleanup via DB-14 isolation. +func csrfRLApp(t *testing.T, rdb *redis.Client) (*fiber.App, *recordingMailer) { + t.Helper() + cfg := &config.Config{JWTSecret: logoutTestSecret} + authH := NewAuthHandler(nil, cfg) + mailer := &recordingMailer{} + h := NewMagicLinkHandlerWithMailerAndRedis(nil, cfg, mailer, authH, rdb) + app := fiber.New(fiber.Config{ + ErrorHandler: func(c *fiber.Ctx, err error) error { + if errors.Is(err, ErrResponseWritten) { + return nil + } + code := fiber.StatusInternalServerError + if e, ok := err.(*fiber.Error); ok { + code = e.Code + } + return c.Status(code).JSON(fiber.Map{"ok": false, "error": err.Error()}) + }, + }) + app.Use(middleware.RequestID()) + app.Post("/auth/email/start", h.Start) + return app, mailer +} + +// flushPerIPRL clears the per-IP Redis budget so tests in the same +// package don't poison each other. Mirrors the cleanup the per-email +// limit relies on (setupCoverageRedis uses DB 14 which is otherwise +// untouched, but the magicLinkPerIPRLKeyPrefix is new and not auto- +// flushed by any existing helper — so we clear it explicitly). +func flushPerIPRL(t *testing.T, rdb *redis.Client) { + t.Helper() + ctx := context.Background() + iter := rdb.Scan(ctx, 0, magicLinkPerIPRLKeyPrefix+":*", 1000).Iterator() + for iter.Next(ctx) { + _ = rdb.Del(ctx, iter.Val()).Err() + } +} + +// TestAuthEmailStart_RejectsFormUrlencoded — AUTH-163. +// +// Original exploit (QA confirmed): +// +// POST /auth/email/start +// Content-Type: application/x-www-form-urlencoded +// email=qa@x.com +// → HTTP 202 + magic-link inserted in platform_db. +// +// Combined with no Origin enforcement this was a textbook CSRF: any +// malicious site could to spam an arbitrary email +// with magic-links from the victim's IP. +// +// Fix: Content-Type must be application/json (charset suffix permitted). +// Form-encoded bodies are rejected with 400 invalid_content_type BEFORE +// the body is parsed or the DB is touched. +func TestAuthEmailStart_RejectsFormUrlencoded(t *testing.T) { + rdb, clean := setupCoverageRedis(t) + defer clean() + defer flushPerIPRL(t, rdb) + app, mailer := csrfRLApp(t, rdb) + + req := httptest.NewRequest(http.MethodPost, "/auth/email/start", + strings.NewReader("email=qa%40example.com&return_to=https%3A%2F%2Finstanode.dev")) + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + resp, err := app.Test(req, 5000) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusBadRequest, resp.StatusCode, + "AUTH-163: urlencoded bodies must be 400, not 202") + var envelope struct { + Error string `json:"error"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&envelope)) + assert.Equal(t, "invalid_content_type", envelope.Error) + assert.Empty(t, mailer.calls, "no mail must be sent on the CSRF path") +} + +// TestAuthEmailStart_AcceptsJSONWithCharset — guardrail. +// The legitimate dashboard flow sometimes sets +// Content-Type: application/json; charset=utf-8. The new gate must +// accept that — only the bare media type is checked. +func TestAuthEmailStart_AcceptsJSONWithCharset(t *testing.T) { + rdb, clean := setupCoverageRedis(t) + defer clean() + defer flushPerIPRL(t, rdb) + app, _ := csrfRLApp(t, rdb) + + req := httptest.NewRequest(http.MethodPost, "/auth/email/start", + strings.NewReader(`{"email":"not-an-email"}`)) + req.Header.Set("Content-Type", "application/json; charset=utf-8") + resp, err := app.Test(req, 5000) + require.NoError(t, err) + defer resp.Body.Close() + + // We send an invalid email so we don't need a DB; the load-bearing + // assertion is that the response is NOT invalid_content_type. + require.Equal(t, http.StatusBadRequest, resp.StatusCode) + var envelope struct { + Error string `json:"error"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&envelope)) + assert.Equal(t, "invalid_email", envelope.Error, + "application/json; charset=utf-8 must NOT trip the new content-type gate") +} + +// TestAuthEmailStart_PerIPRateLimit — AUTH-097 / AUTH-107. +// +// The per-email-hash limit is bypassed by rotating the email. The new +// per-IP limit caps to magicLinkPerIPRateLimit calls per IP per window. +// The (limit+1)th request from the same IP must be silently absorbed. +// +// Two-layer assertion: +// +// 1. Function-level (checkPerIPRateLimit): exercise the counter +// boundary directly so the cap value and Redis expiry semantics are +// pinned regardless of handler wiring. +// +// 2. Handler-level (POST /auth/email/start): pre-populate the Redis +// counter for the test's source IP (0.0.0.0 — fiber app.Test uses +// this for in-process requests) to one over the cap, then hit the +// handler with a valid Content-Type + valid-syntax email and assert +// 202 + no mailer call. This proves the gate is wired into Start. +func TestAuthEmailStart_PerIPRateLimit(t *testing.T) { + rdb, clean := setupCoverageRedis(t) + defer clean() + defer flushPerIPRL(t, rdb) + + const probeIP = "203.0.113.99" // TEST-NET-3, RFC 5737. + ctx := context.Background() + + // Layer 1: function-level boundary check. + for i := 1; i <= magicLinkPerIPRateLimit; i++ { + limited, err := checkPerIPRateLimit(ctx, rdb, probeIP) + require.NoError(t, err, "call %d: Redis must be healthy", i) + assert.False(t, limited, "call %d under the per-IP cap must NOT be limited", i) + } + limited, err := checkPerIPRateLimit(ctx, rdb, probeIP) + require.NoError(t, err) + assert.True(t, limited, + "AUTH-097: the (magicLinkPerIPRateLimit+1)th call from the same IP must be limited") + + // Layer 2: handler integration. Fiber's app.Test connects from + // 0.0.0.0 — pre-populate that key directly so the next handler + // call hits the over-cap path WITHOUT us having to drive a full + // magic-link flow with a wired DB. + const fiberTestIP = "0.0.0.0" + key := perIPRateLimitKey(fiberTestIP) + require.NoError(t, rdb.Set(ctx, key, magicLinkPerIPRateLimit+1, magicLinkPerIPRateLimitWindow).Err(), + "pre-populate the per-IP counter for the fiber-test source IP") + + app, mailer := csrfRLApp(t, rdb) + req := httptest.NewRequest(http.MethodPost, "/auth/email/start", + strings.NewReader(`{"email":"survivor@example.com"}`)) + req.Header.Set("Content-Type", "application/json") + resp, err := app.Test(req, 5000) + require.NoError(t, err) + defer resp.Body.Close() + assert.Equal(t, http.StatusAccepted, resp.StatusCode, + "AUTH-097: silent-absorb path returns 202 with no enumeration signal") + assert.Empty(t, mailer.calls, + "the limited call must NOT invoke the mailer") +} diff --git a/internal/handlers/magic_link.go b/internal/handlers/magic_link.go index 841a40f..403dda8 100644 --- a/internal/handlers/magic_link.go +++ b/internal/handlers/magic_link.go @@ -38,6 +38,19 @@ const magicLinkEmailRateLimitWindow = time.Hour // coupling to a string literal buried in a format string. const magicLinkEmailRLKeyPrefix = "ml:email:rl" +// magicLinkPerIPRateLimit caps magic-link Start calls per source IP per +// magicLinkPerIPRateLimitWindow (AUTH-097 / AUTH-107). The existing +// per-email limit is bypassed by rotating the address; this is the +// per-IP backstop. 5/hr matches the per-email cap so the two limits +// behave symmetrically from an operator-monitoring perspective. +const magicLinkPerIPRateLimit = 5 +const magicLinkPerIPRateLimitWindow = time.Hour + +// magicLinkPerIPRLKeyPrefix is the Redis key prefix for per-IP rate +// limits. Distinct from magicLinkEmailRLKeyPrefix so the two budgets +// can be observed and tuned independently in NR / Prom. +const magicLinkPerIPRLKeyPrefix = "ml:ip:rl" + // magicLinkStartMaxBodyBytes caps the inbound POST /auth/email/start JSON // body. Real bodies are ~80 bytes (email + return_to); 1 KiB is comfortable // for a future field without inviting megabyte-sized abuse payloads. The @@ -129,6 +142,37 @@ func checkEmailRateLimit(ctx context.Context, rdb *redis.Client, emailAddr strin return count > int64(magicLinkEmailRateLimit), nil } +// perIPRateLimitKey returns the Redis key for a given source IP. IP is +// hashed (SHA-256) so raw IPs never appear in Redis MONITOR output / key +// dumps / logs — same PII-hygiene pattern emailRateLimitKey uses. +func perIPRateLimitKey(ip string) string { + h := sha256.Sum256([]byte(ip)) + return fmt.Sprintf("%s:%x", magicLinkPerIPRLKeyPrefix, h[:]) +} + +// checkPerIPRateLimit increments the per-IP Redis counter and returns +// (limited, err). Same fail-open semantics as checkEmailRateLimit — a +// Redis outage must never block legitimate sign-in. The 202 response on +// the limited path is identical to the success path (no enumeration +// signal). AUTH-097 / AUTH-107. +func checkPerIPRateLimit(ctx context.Context, rdb *redis.Client, ip string) (limited bool, err error) { + if rdb == nil || ip == "" { + return false, nil + } + key := perIPRateLimitKey(ip) + pipe := rdb.Pipeline() + incrCmd := pipe.Incr(ctx, key) + pipe.Expire(ctx, key, magicLinkPerIPRateLimitWindow) + if _, execErr := pipe.Exec(ctx); execErr != nil { + return false, fmt.Errorf("magic_link.ip_rl: %w", execErr) + } + count, resultErr := incrCmd.Result() + if resultErr != nil { + return false, fmt.Errorf("magic_link.ip_rl.result: %w", resultErr) + } + return count > int64(magicLinkPerIPRateLimit), nil +} + // magicLinkStartRequest is the body for POST /auth/email/start. type magicLinkStartRequest struct { Email string `json:"email"` @@ -155,6 +199,30 @@ type magicLinkStartRequest struct { func (h *MagicLinkHandler) Start(c *fiber.Ctx) error { requestID := middleware.GetRequestID(c) + // AUTH-163 (P0): the server previously accepted + // Content-Type: application/x-www-form-urlencoded on this endpoint + // (Fiber's BodyParser autodetects the content-type and merrily + // decoded `email=qa@x.com&return_to=...` form bodies). Combined with + // no Origin/Referer enforcement that was a textbook CSRF primitive: + // any malicious site could to spam any arbitrary email with + // magic-links from the victim's IP. + // + // Fail-closed: require Content-Type to start with application/json. + // The legitimate dashboard / CLI / agent flows all set + // application/json; the only callers that send urlencoded bodies are + // browser POSTs from third-party origins — which is exactly + // the attack surface we want to close. + ct := strings.ToLower(strings.TrimSpace(c.Get("Content-Type"))) + // Strip any "; charset=..." suffix before comparing. + if semi := strings.IndexByte(ct, ';'); semi >= 0 { + ct = strings.TrimSpace(ct[:semi]) + } + if ct != "application/json" { + return respondError(c, fiber.StatusBadRequest, "invalid_content_type", + "POST /auth/email/start requires Content-Type: application/json. Form-encoded bodies are rejected to close the CSRF primitive.") + } + // B4-F5 (BugBash 2026-05-20): the global Fiber BodyLimit is 50MiB to // accommodate /deploy/new tarballs — that's far too generous for a // 2-field JSON envelope. A 10MB JSON body on /auth/email/start passed @@ -178,6 +246,34 @@ func (h *MagicLinkHandler) Start(c *fiber.Ctx) error { return respondError(c, fiber.StatusBadRequest, "invalid_email", "A valid email address is required") } + // AUTH-107 / AUTH-097 (P0/P1): per-IP rate limit. The existing per- + // email-hash limit is bypassed by rotating the address — an attacker + // can spam any number of arbitrary 3rd-party addresses from one IP. + // With Brevo currently configured to return 201 then internally + // reject (project memory project_brevo_sender_not_validated), every + // spam send still chews through DB rows + Brevo quota. + // + // Runs AFTER email validation so a typo doesn't burn budget; runs + // BEFORE the per-email limit because the per-IP limit is the wider + // abuse-defence layer and we want it to fire even when the per-email + // budget is fresh. Fail-open on Redis error per CLAUDE.md convention + // 1. Limited path returns 202 — identical to the success path — + // so an attacker gains no enumeration signal. + if ipLimited, ipErr := checkPerIPRateLimit(c.Context(), h.rdb, c.IP()); ipErr != nil { + slog.Warn("magic_link.start.ip_rl_error", + "error", ipErr, + "request_id", requestID, + ) + // fail-open + } else if ipLimited { + metrics.MagicLinkEmailRateLimited.Inc() + slog.Warn("magic_link.start.ip_rate_limited", + "request_id", requestID, + "ip", c.IP(), + ) + return c.Status(fiber.StatusAccepted).JSON(fiber.Map{"ok": true}) + } + // Per-email rate limit (A04). Fail-open on Redis error so a cache // outage never blocks sign-in. The 202 response on the limited path is // identical to the success path — the attacker gains no signal. From c2a7fcd6ac4aefe29b1d52c190d536e50b858008 Mon Sep 17 00:00:00 2001 From: "Claude (Manas)" Date: Fri, 29 May 2026 19:14:32 +0530 Subject: [PATCH 2/4] fix(auth): add agent_action for invalid_content_type (#177) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The codeToAgentAction registry is iterated by TestErrorCode_HasAgentAction in CI; missing entry blocked merge. Per-IP rate-limit (AUTH-097/107) is intentionally silent (202) per CLAUDE.md — only the content-type gate emits a 400 that needs U3 copy. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/handlers/helpers.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/handlers/helpers.go b/internal/handlers/helpers.go index 9a6d748..12fd618 100644 --- a/internal/handlers/helpers.go +++ b/internal/handlers/helpers.go @@ -1054,6 +1054,12 @@ var codeToAgentAction = map[string]errorCodeMeta{ "subscription_cancel_failed": { AgentAction: "Tell the user cancelling the Razorpay subscription failed. The team-delete is paused; email support@instanode.dev so an operator can reconcile — see https://instanode.dev/support.", }, + + // ── Auth content-type gate (AUTH-163, CSRF). Per-IP rate-limit (AUTH-097/107) + // returns 202 silently per CLAUDE.md "silent absorb" policy — no agent_action needed. + "invalid_content_type": { + AgentAction: "Tell the user the magic-link request must use Content-Type: application/json. Form-urlencoded bodies are rejected to prevent CSRF — retry with JSON.", + }, } // ErrorResponse is the canonical JSON shape for every 4xx/5xx response. From 0c8ac0ba23e3daf85a3f3457ccec23e687b07645 Mon Sep 17 00:00:00 2001 From: "Claude (Manas)" Date: Fri, 29 May 2026 20:50:25 +0530 Subject: [PATCH 3/4] =?UTF-8?q?fix(auth):=20satisfy=20U3=20contract=20?= =?UTF-8?q?=E2=80=94=20invalid=5Fcontent=5Ftype=20agent=5Faction=20needs?= =?UTF-8?q?=20https=20URL?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TestAgentActionContract asserts every codeToAgentAction entry contains a full https://instanode.dev/ URL. My earlier entry omitted the docs link. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/handlers/helpers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/handlers/helpers.go b/internal/handlers/helpers.go index 12fd618..90c63da 100644 --- a/internal/handlers/helpers.go +++ b/internal/handlers/helpers.go @@ -1058,7 +1058,7 @@ var codeToAgentAction = map[string]errorCodeMeta{ // ── Auth content-type gate (AUTH-163, CSRF). Per-IP rate-limit (AUTH-097/107) // returns 202 silently per CLAUDE.md "silent absorb" policy — no agent_action needed. "invalid_content_type": { - AgentAction: "Tell the user the magic-link request must use Content-Type: application/json. Form-urlencoded bodies are rejected to prevent CSRF — retry with JSON.", + AgentAction: "Tell the user the magic-link request must use Content-Type: application/json. Form-urlencoded bodies are rejected to prevent CSRF — retry with JSON. See https://instanode.dev/docs/auth.", }, } From 9a6e305abb40ef79d64e97c7c5c8cfc36b62da81 Mon Sep 17 00:00:00 2001 From: "Claude (Manas)" Date: Fri, 29 May 2026 21:13:51 +0530 Subject: [PATCH 4/4] fix(auth): remove dead error path in checkPerIPRateLimit (patch coverage) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After successful pipe.Exec, incrCmd.Result() never returns an error per go-redis semantics — pipe.Exec is the canonical error point for pipelined commands. Use .Val() instead; eliminates 2 uncovered lines that diff-cover flagged at 96.5% patch coverage. Verified locally: TestMagicLinkStart_PerIPRateLimit + TestErrorCode_HasAgentAction PASS. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/handlers/magic_link.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/handlers/magic_link.go b/internal/handlers/magic_link.go index 403dda8..63cad78 100644 --- a/internal/handlers/magic_link.go +++ b/internal/handlers/magic_link.go @@ -166,11 +166,11 @@ func checkPerIPRateLimit(ctx context.Context, rdb *redis.Client, ip string) (lim if _, execErr := pipe.Exec(ctx); execErr != nil { return false, fmt.Errorf("magic_link.ip_rl: %w", execErr) } - count, resultErr := incrCmd.Result() - if resultErr != nil { - return false, fmt.Errorf("magic_link.ip_rl.result: %w", resultErr) - } - return count > int64(magicLinkPerIPRateLimit), nil + // .Val() returns the cached INCR result. After a successful pipe.Exec, + // the cmd's err is guaranteed nil (pipe.Exec is the canonical error + // point for pipelined commands), so a separate result-err check would + // be dead code per go-redis semantics. + return incrCmd.Val() > int64(magicLinkPerIPRateLimit), nil } // magicLinkStartRequest is the body for POST /auth/email/start.