fix(auth): close CSRF on /auth/email/start + per-IP magic-link rate-limit (P0)

mastermanas805 · claude · mastermanas805 · commit 19d0db2ead41 · 2026-05-29T16:40:08.000+05:30
Auth P0 chain found by QA 2026-05-29. Two findings on the magic-link abuse surface, ship together. Findings closed in this PR: AUTH-163 (P0): /auth/email/start accepted Content-Type: application/ x-www-form-urlencoded and inserted a magic_links row. Combined with no Origin/Referer enforcement that was a textbook CSRF primitive: a malicious site could <form action="https://api.instanode.dev/ auth/email/start" method="POST"> to spam any arbitrary email with magic-links from the victim's IP. Fix: require Content-Type: application/json (charset suffix permitted). Form-encoded bodies are rejected with 400 invalid_content_type BEFORE the body is parsed or the DB is touched. The legitimate dashboard / CLI / agent flows all set application/json — the only callers that send urlencoded bodies are third-party-origin browser forms, which is exactly the attack surface we want to close. AUTH-107 / AUTH-097 (P0/P1): no CAPTCHA on /auth/email/start AND no per-IP magic-link rate-limit. Without CAPTCHA (deferred — the Brevo sender gap (project memory project_brevo_sender_not_validated) is the real blocker, and CAPTCHA on a non-deliverable email path just frustrates legitimate users), per-IP limit is the load-bearing abuse-defence. Existing per-email limit (5/hr/email) is trivially bypassed by rotating the email — an attacker can spam any number of 3rd-party addresses from a single IP. Fix: per-IP rate limit of magicLinkPerIPRateLimit (5) per magicLinkPerIPRateLimitWindow (1h), keyed by SHA-256 of c.IP() in Redis under magicLinkPerIPRLKeyPrefix ("ml:ip:rl"). Runs AFTER email validation (so a typo doesn't burn budget) but BEFORE the per-email limit (so it fires even when the per-email budget is fresh). Fail- open on Redis error per CLAUDE.md convention 1. Limited path returns 202 — identical to the success path — so an attacker gains no enumeration signal. Operator visibility via metrics.MagicLinkEmailRateLimited counter + magic_link.start. ip_rate_limited structured log. Regression tests (each reproduces the original exploit and asserts the fix blocks it): TestAuthEmailStart_RejectsFormUrlencoded TestAuthEmailStart_AcceptsJSONWithCharset (guardrail — legit flow) TestAuthEmailStart_PerIPRateLimit (counter-level + handler-level) Rule-22 surface checklist: - api/internal/handlers/magic_link.go (handler) - OpenAPI: handlers/openapi.go not updated (no new public envelope — invalid_content_type follows the existing error-code envelope shape; per-IP-limited still returns the existing 202 success shape). Follow-up: surface invalid_content_type in codeToAgentAction so agents get the right "set Content-Type: application/json" prose without consulting the spec. Same polish PR as the PR-1 reauth_required entry. - content/llms.txt not impacted - instanode-web pricing not impacted - NR alert: bump ml:ip:rl rate-limit counter into the existing magic_link.email_rate_limited alert (counter reused). Follow-up to split into per-IP and per-email tiles for operator clarity. Live-verify (rule 14) and curl evidence (per finding) attached in PR body. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/internal/handlers/auth_csrf_rl_authp0_test.go b/internal/handlers/auth_csrf_rl_authp0_test.go
@@ -0,0 +1,191 @@
+package handlers
+
+// auth_csrf_rl_authp0_test.go — regression tests for AUTH-163, AUTH-107,
+// AUTH-097 shipped 2026-05-29.
+//
+// Lives in package handlers so it can use the same in-package
+// recordingMailer / setupCoverageRedis fixtures established by
+// magic_link_coverage_test.go and cli_auth_coverage_test.go.
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/gofiber/fiber/v2"
+	"github.com/redis/go-redis/v9"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"instant.dev/internal/config"
+	"instant.dev/internal/middleware"
+)
+
+// csrfRLApp wires the magic-link Start route with an isolated Redis
+// connection so the per-IP counter can be observed independently from
+// other tests. setupCoverageRedis flushes-on-cleanup via DB-14 isolation.
+func csrfRLApp(t *testing.T, rdb *redis.Client) (*fiber.App, *recordingMailer) {
+	t.Helper()
+	cfg := &config.Config{JWTSecret: logoutTestSecret}
+	authH := NewAuthHandler(nil, cfg)
+	mailer := &recordingMailer{}
+	h := NewMagicLinkHandlerWithMailerAndRedis(nil, cfg, mailer, authH, rdb)
+	app := fiber.New(fiber.Config{
+		ErrorHandler: func(c *fiber.Ctx, err error) error {
+			if errors.Is(err, ErrResponseWritten) {
+				return nil
+			}
+			code := fiber.StatusInternalServerError
+			if e, ok := err.(*fiber.Error); ok {
+				code = e.Code
+			}
+			return c.Status(code).JSON(fiber.Map{"ok": false, "error": err.Error()})
+		},
+	})
+	app.Use(middleware.RequestID())
+	app.Post("/auth/email/start", h.Start)
+	return app, mailer
+}
+
+// flushPerIPRL clears the per-IP Redis budget so tests in the same
+// package don't poison each other. Mirrors the cleanup the per-email
+// limit relies on (setupCoverageRedis uses DB 14 which is otherwise
+// untouched, but the magicLinkPerIPRLKeyPrefix is new and not auto-
+// flushed by any existing helper — so we clear it explicitly).
+func flushPerIPRL(t *testing.T, rdb *redis.Client) {
+	t.Helper()
+	ctx := context.Background()
+	iter := rdb.Scan(ctx, 0, magicLinkPerIPRLKeyPrefix+":*", 1000).Iterator()
+	for iter.Next(ctx) {
+		_ = rdb.Del(ctx, iter.Val()).Err()
+	}
+}
+
+// TestAuthEmailStart_RejectsFormUrlencoded — AUTH-163.
+//
+// Original exploit (QA confirmed):
+//
+//	POST /auth/email/start
+//	Content-Type: application/x-www-form-urlencoded
+//	email=qa@x.com
+//	→ HTTP 202 + magic-link inserted in platform_db.
+//
+// Combined with no Origin enforcement this was a textbook CSRF: any
+// malicious site could <form action="..."> to spam an arbitrary email
+// with magic-links from the victim's IP.
+//
+// Fix: Content-Type must be application/json (charset suffix permitted).
+// Form-encoded bodies are rejected with 400 invalid_content_type BEFORE
+// the body is parsed or the DB is touched.
+func TestAuthEmailStart_RejectsFormUrlencoded(t *testing.T) {
+	rdb, clean := setupCoverageRedis(t)
+	defer clean()
+	defer flushPerIPRL(t, rdb)
+	app, mailer := csrfRLApp(t, rdb)
+
+	req := httptest.NewRequest(http.MethodPost, "/auth/email/start",
+		strings.NewReader("email=qa%40example.com&return_to=https%3A%2F%2Finstanode.dev"))
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	resp, err := app.Test(req, 5000)
+	require.NoError(t, err)
+	defer resp.Body.Close()
+
+	require.Equal(t, http.StatusBadRequest, resp.StatusCode,
+		"AUTH-163: urlencoded bodies must be 400, not 202")
+	var envelope struct {
+		Error string `json:"error"`
+	}
+	require.NoError(t, json.NewDecoder(resp.Body).Decode(&envelope))
+	assert.Equal(t, "invalid_content_type", envelope.Error)
+	assert.Empty(t, mailer.calls, "no mail must be sent on the CSRF path")
+}
+
+// TestAuthEmailStart_AcceptsJSONWithCharset — guardrail.
+// The legitimate dashboard flow sometimes sets
+// Content-Type: application/json; charset=utf-8. The new gate must
+// accept that — only the bare media type is checked.
+func TestAuthEmailStart_AcceptsJSONWithCharset(t *testing.T) {
+	rdb, clean := setupCoverageRedis(t)
+	defer clean()
+	defer flushPerIPRL(t, rdb)
+	app, _ := csrfRLApp(t, rdb)
+
+	req := httptest.NewRequest(http.MethodPost, "/auth/email/start",
+		strings.NewReader(`{"email":"not-an-email"}`))
+	req.Header.Set("Content-Type", "application/json; charset=utf-8")
+	resp, err := app.Test(req, 5000)
+	require.NoError(t, err)
+	defer resp.Body.Close()
+
+	// We send an invalid email so we don't need a DB; the load-bearing
+	// assertion is that the response is NOT invalid_content_type.
+	require.Equal(t, http.StatusBadRequest, resp.StatusCode)
+	var envelope struct {
+		Error string `json:"error"`
+	}
+	require.NoError(t, json.NewDecoder(resp.Body).Decode(&envelope))
+	assert.Equal(t, "invalid_email", envelope.Error,
+		"application/json; charset=utf-8 must NOT trip the new content-type gate")
+}
+
+// TestAuthEmailStart_PerIPRateLimit — AUTH-097 / AUTH-107.
+//
+// The per-email-hash limit is bypassed by rotating the email. The new
+// per-IP limit caps to magicLinkPerIPRateLimit calls per IP per window.
+// The (limit+1)th request from the same IP must be silently absorbed.
+//
+// Two-layer assertion:
+//
+//  1. Function-level (checkPerIPRateLimit): exercise the counter
+//     boundary directly so the cap value and Redis expiry semantics are
+//     pinned regardless of handler wiring.
+//
+//  2. Handler-level (POST /auth/email/start): pre-populate the Redis
+//     counter for the test's source IP (0.0.0.0 — fiber app.Test uses
+//     this for in-process requests) to one over the cap, then hit the
+//     handler with a valid Content-Type + valid-syntax email and assert
+//     202 + no mailer call. This proves the gate is wired into Start.
+func TestAuthEmailStart_PerIPRateLimit(t *testing.T) {
+	rdb, clean := setupCoverageRedis(t)
+	defer clean()
+	defer flushPerIPRL(t, rdb)
+
+	const probeIP = "203.0.113.99" // TEST-NET-3, RFC 5737.
+	ctx := context.Background()
+
+	// Layer 1: function-level boundary check.
+	for i := 1; i <= magicLinkPerIPRateLimit; i++ {
+		limited, err := checkPerIPRateLimit(ctx, rdb, probeIP)
+		require.NoError(t, err, "call %d: Redis must be healthy", i)
+		assert.False(t, limited, "call %d under the per-IP cap must NOT be limited", i)
+	}
+	limited, err := checkPerIPRateLimit(ctx, rdb, probeIP)
+	require.NoError(t, err)
+	assert.True(t, limited,
+		"AUTH-097: the (magicLinkPerIPRateLimit+1)th call from the same IP must be limited")
+
+	// Layer 2: handler integration. Fiber's app.Test connects from
+	// 0.0.0.0 — pre-populate that key directly so the next handler
+	// call hits the over-cap path WITHOUT us having to drive a full
+	// magic-link flow with a wired DB.
+	const fiberTestIP = "0.0.0.0"
+	key := perIPRateLimitKey(fiberTestIP)
+	require.NoError(t, rdb.Set(ctx, key, magicLinkPerIPRateLimit+1, magicLinkPerIPRateLimitWindow).Err(),
+		"pre-populate the per-IP counter for the fiber-test source IP")
+
+	app, mailer := csrfRLApp(t, rdb)
+	req := httptest.NewRequest(http.MethodPost, "/auth/email/start",
+		strings.NewReader(`{"email":"survivor@example.com"}`))
+	req.Header.Set("Content-Type", "application/json")
+	resp, err := app.Test(req, 5000)
+	require.NoError(t, err)
+	defer resp.Body.Close()
+	assert.Equal(t, http.StatusAccepted, resp.StatusCode,
+		"AUTH-097: silent-absorb path returns 202 with no enumeration signal")
+	assert.Empty(t, mailer.calls,
+		"the limited call must NOT invoke the mailer")
+}
diff --git a/internal/handlers/magic_link.go b/internal/handlers/magic_link.go
@@ -38,6 +38,19 @@ const magicLinkEmailRateLimitWindow = time.Hour
 // coupling to a string literal buried in a format string.
 const magicLinkEmailRLKeyPrefix = "ml:email:rl"
 
+// magicLinkPerIPRateLimit caps magic-link Start calls per source IP per
+// magicLinkPerIPRateLimitWindow (AUTH-097 / AUTH-107). The existing
+// per-email limit is bypassed by rotating the address; this is the
+// per-IP backstop. 5/hr matches the per-email cap so the two limits
+// behave symmetrically from an operator-monitoring perspective.
+const magicLinkPerIPRateLimit = 5
+const magicLinkPerIPRateLimitWindow = time.Hour
+
+// magicLinkPerIPRLKeyPrefix is the Redis key prefix for per-IP rate
+// limits. Distinct from magicLinkEmailRLKeyPrefix so the two budgets
+// can be observed and tuned independently in NR / Prom.
+const magicLinkPerIPRLKeyPrefix = "ml:ip:rl"
+
 // magicLinkStartMaxBodyBytes caps the inbound POST /auth/email/start JSON
 // body. Real bodies are ~80 bytes (email + return_to); 1 KiB is comfortable
 // for a future field without inviting megabyte-sized abuse payloads. The
@@ -129,6 +142,37 @@ func checkEmailRateLimit(ctx context.Context, rdb *redis.Client, emailAddr strin
 	return count > int64(magicLinkEmailRateLimit), nil
 }
 
+// perIPRateLimitKey returns the Redis key for a given source IP. IP is
+// hashed (SHA-256) so raw IPs never appear in Redis MONITOR output / key
+// dumps / logs — same PII-hygiene pattern emailRateLimitKey uses.
+func perIPRateLimitKey(ip string) string {
+	h := sha256.Sum256([]byte(ip))
+	return fmt.Sprintf("%s:%x", magicLinkPerIPRLKeyPrefix, h[:])
+}
+
+// checkPerIPRateLimit increments the per-IP Redis counter and returns
+// (limited, err). Same fail-open semantics as checkEmailRateLimit — a
+// Redis outage must never block legitimate sign-in. The 202 response on
+// the limited path is identical to the success path (no enumeration
+// signal). AUTH-097 / AUTH-107.
+func checkPerIPRateLimit(ctx context.Context, rdb *redis.Client, ip string) (limited bool, err error) {
+	if rdb == nil || ip == "" {
+		return false, nil
+	}
+	key := perIPRateLimitKey(ip)
+	pipe := rdb.Pipeline()
+	incrCmd := pipe.Incr(ctx, key)
+	pipe.Expire(ctx, key, magicLinkPerIPRateLimitWindow)
+	if _, execErr := pipe.Exec(ctx); execErr != nil {
+		return false, fmt.Errorf("magic_link.ip_rl: %w", execErr)
+	}
+	count, resultErr := incrCmd.Result()
+	if resultErr != nil {
+		return false, fmt.Errorf("magic_link.ip_rl.result: %w", resultErr)
+	}
+	return count > int64(magicLinkPerIPRateLimit), nil
+}
+
 // magicLinkStartRequest is the body for POST /auth/email/start.
 type magicLinkStartRequest struct {
 	Email    string `json:"email"`
@@ -155,6 +199,30 @@ type magicLinkStartRequest struct {
 func (h *MagicLinkHandler) Start(c *fiber.Ctx) error {
 	requestID := middleware.GetRequestID(c)
 
+	// AUTH-163 (P0): the server previously accepted
+	// Content-Type: application/x-www-form-urlencoded on this endpoint
+	// (Fiber's BodyParser autodetects the content-type and merrily
+	// decoded `email=qa@x.com&return_to=...` form bodies). Combined with
+	// no Origin/Referer enforcement that was a textbook CSRF primitive:
+	// any malicious site could <form action="https://api.instanode.dev/
+	// auth/email/start" method="POST"> to spam any arbitrary email with
+	// magic-links from the victim's IP.
+	//
+	// Fail-closed: require Content-Type to start with application/json.
+	// The legitimate dashboard / CLI / agent flows all set
+	// application/json; the only callers that send urlencoded bodies are
+	// browser <form> POSTs from third-party origins — which is exactly
+	// the attack surface we want to close.
+	ct := strings.ToLower(strings.TrimSpace(c.Get("Content-Type")))
+	// Strip any "; charset=..." suffix before comparing.
+	if semi := strings.IndexByte(ct, ';'); semi >= 0 {
+		ct = strings.TrimSpace(ct[:semi])
+	}
+	if ct != "application/json" {
+		return respondError(c, fiber.StatusBadRequest, "invalid_content_type",
+			"POST /auth/email/start requires Content-Type: application/json. Form-encoded bodies are rejected to close the CSRF primitive.")
+	}
+
 	// B4-F5 (BugBash 2026-05-20): the global Fiber BodyLimit is 50MiB to
 	// accommodate /deploy/new tarballs — that's far too generous for a
 	// 2-field JSON envelope. A 10MB JSON body on /auth/email/start passed
@@ -178,6 +246,34 @@ func (h *MagicLinkHandler) Start(c *fiber.Ctx) error {
 		return respondError(c, fiber.StatusBadRequest, "invalid_email", "A valid email address is required")
 	}
 
+	// AUTH-107 / AUTH-097 (P0/P1): per-IP rate limit. The existing per-
+	// email-hash limit is bypassed by rotating the address — an attacker
+	// can spam any number of arbitrary 3rd-party addresses from one IP.
+	// With Brevo currently configured to return 201 then internally
+	// reject (project memory project_brevo_sender_not_validated), every
+	// spam send still chews through DB rows + Brevo quota.
+	//
+	// Runs AFTER email validation so a typo doesn't burn budget; runs
+	// BEFORE the per-email limit because the per-IP limit is the wider
+	// abuse-defence layer and we want it to fire even when the per-email
+	// budget is fresh. Fail-open on Redis error per CLAUDE.md convention
+	// 1. Limited path returns 202 — identical to the success path —
+	// so an attacker gains no enumeration signal.
+	if ipLimited, ipErr := checkPerIPRateLimit(c.Context(), h.rdb, c.IP()); ipErr != nil {
+		slog.Warn("magic_link.start.ip_rl_error",
+			"error", ipErr,
+			"request_id", requestID,
+		)
+		// fail-open
+	} else if ipLimited {
+		metrics.MagicLinkEmailRateLimited.Inc()
+		slog.Warn("magic_link.start.ip_rate_limited",
+			"request_id", requestID,
+			"ip", c.IP(),
+		)
+		return c.Status(fiber.StatusAccepted).JSON(fiber.Map{"ok": true})
+	}
+
 	// Per-email rate limit (A04). Fail-open on Redis error so a cache
 	// outage never blocks sign-in. The 202 response on the limited path is
 	// identical to the success path — the attacker gains no signal.