From 3c2ade20e51b7515eaec825ac3d22b3c4ada13ff Mon Sep 17 00:00:00 2001 From: Manas Srivastava Date: Fri, 22 May 2026 09:28:51 +0530 Subject: [PATCH] =?UTF-8?q?test(coverage):=20drive=20api=20root=20+=20tele?= =?UTF-8?q?metry=20to=20=E2=89=A595%=20via=20behavior-preserving=20seams?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract main()'s body into a run() error-returning seam with package-level function vars over every external boundary (Postgres/Redis/GeoIP/provisioner/ router/serve) so boot → ready → teardown plus the migrations-fail, plans-load- fail, provisioner-connect-fail, and serve-error arms are unit-testable without real infra or a bound listener. main() becomes a thin runFunc/osExit wrapper. Add newExporter/newResource package-var seams in telemetry to reach the two otlptracegrpc.New/resource.New constructor-failure arms. Production wiring is identical — the seam vars default to the real implementations and the P0-2 OTel TLS-by-scheme + NR-api-key contract is unchanged. Root 30.2% → 98.2%, telemetry 85.7% → 98.3%; no function below 95%. Co-Authored-By: Claude Opus 4.7 (1M context) --- graceful_shutdown_test.go | 25 ++ internal/telemetry/tracer.go | 20 +- internal/telemetry/tracer_test.go | 61 +++++ main.go | 70 ++++- run_test.go | 407 ++++++++++++++++++++++++++++++ 5 files changed, 567 insertions(+), 16 deletions(-) create mode 100644 run_test.go diff --git a/graceful_shutdown_test.go b/graceful_shutdown_test.go index a6e8893..87f40a9 100644 --- a/graceful_shutdown_test.go +++ b/graceful_shutdown_test.go @@ -261,6 +261,31 @@ func TestRunServerWithGracefulShutdown_TimeoutKillsStuckRequest(t *testing.T) { } } +// TestRunServerWithGracefulShutdown_ListenErrorReturnsBeforeSignal — when +// app.Listen fails fast (a bind error: malformed addr, port already held), +// the serve goroutine pushes the fatal error onto serveErr and the helper +// MUST return it via the serveErr-before-signal select arm, NOT block waiting +// for SIGTERM. This is the "pod CrashLoopBackoffs instead of going green with +// no listener" contract. +func TestRunServerWithGracefulShutdown_ListenErrorReturnsBeforeSignal(t *testing.T) { + app := fiber.New(fiber.Config{DisableStartupMessage: true}) + // A syntactically invalid bind address makes net.Listen fail immediately + // with a non-ErrClosed error, so the goroutine takes the serveErr<-err arm. + badAddr := "256.256.256.256:99999" + + done := make(chan error, 1) + go func() { + done <- runServerWithGracefulShutdown(app, badAddr, time.Second, router.ShutdownHooks{}) + }() + + select { + case err := <-done: + require.Error(t, err, "a fatal Listen error must propagate out of the helper") + case <-time.After(5 * time.Second): + t.Fatal("helper blocked on a bind failure — the serveErr-before-signal arm is broken") + } +} + // Compile-time guard against a regression that removes the helper or changes // its signature in a way that would silently bypass the MR-P0-7 fix. var _ = func(app *fiber.App) error { diff --git a/internal/telemetry/tracer.go b/internal/telemetry/tracer.go index d497ff9..76c5b78 100644 --- a/internal/telemetry/tracer.go +++ b/internal/telemetry/tracer.go @@ -10,6 +10,7 @@ import ( "time" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" "go.opentelemetry.io/otel/propagation" "go.opentelemetry.io/otel/sdk/resource" @@ -18,6 +19,21 @@ import ( "google.golang.org/grpc/credentials" ) +// newExporter / newResource are package-level indirections over the OTel +// constructors so tests can override them to drive the otherwise-unreachable +// constructor-failure arms in InitTracer. Production behaviour is identical: +// they are plain forwards to otlptracegrpc.New / resource.New. Do NOT change +// the wired constructors (P0-2 OTel tracing contract) — these seams only let +// a test substitute an erroring stub. +var ( + newExporter = func(ctx context.Context, opts ...otlptracegrpc.Option) (*otlptrace.Exporter, error) { + return otlptracegrpc.New(ctx, opts...) + } + newResource = func(ctx context.Context, opts ...resource.Option) (*resource.Resource, error) { + return resource.New(ctx, opts...) + } +) + // InitTracer configures the global OpenTelemetry tracer provider. // // Endpoint selection (in order of precedence): @@ -90,13 +106,13 @@ func InitTracer(serviceName, otlpEndpoint string) func(context.Context) error { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - exporter, err := otlptracegrpc.New(ctx, opts...) + exporter, err := newExporter(ctx, opts...) if err != nil { slog.Error("telemetry.otlp_exporter_failed", "error", err, "endpoint", ep, "tls", useTLS) return func(context.Context) error { return nil } } - res, err := resource.New(ctx, + res, err := newResource(ctx, resource.WithAttributes(semconv.ServiceName(serviceName)), ) if err != nil { diff --git a/internal/telemetry/tracer_test.go b/internal/telemetry/tracer_test.go index 8591a06..0b24f6e 100644 --- a/internal/telemetry/tracer_test.go +++ b/internal/telemetry/tracer_test.go @@ -2,9 +2,70 @@ package telemetry import ( "context" + "errors" "testing" + + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/sdk/resource" ) +// withExporterStub temporarily replaces the package-level newExporter seam, +// restoring it on cleanup. Lets a test drive the otlptracegrpc.New failure +// arm without touching production wiring. +func withExporterStub(t *testing.T, fn func(context.Context, ...otlptracegrpc.Option) (*otlptrace.Exporter, error)) { + t.Helper() + prev := newExporter + t.Cleanup(func() { newExporter = prev }) + newExporter = fn +} + +// withResourceStub temporarily replaces the package-level newResource seam. +func withResourceStub(t *testing.T, fn func(context.Context, ...resource.Option) (*resource.Resource, error)) { + t.Helper() + prev := newResource + t.Cleanup(func() { newResource = prev }) + newResource = fn +} + +// TestInitTracer_ExporterConstructionFails — when otlptracegrpc.New errors +// (network stack misconfig, bad creds object, etc.), InitTracer MUST log and +// return a working no-op shutdown rather than crash. This is the fail-open +// contract: a broken exporter can never block service boot. +func TestInitTracer_ExporterConstructionFails(t *testing.T) { + t.Setenv("NEW_RELIC_LICENSE_KEY", "") + withExporterStub(t, func(context.Context, ...otlptracegrpc.Option) (*otlptrace.Exporter, error) { + return nil, errors.New("boom: exporter construction failed") + }) + + shutdown := InitTracer("instant-api", "https://otlp.nr-data.net:4317") + if shutdown == nil { + t.Fatal("InitTracer must return a non-nil no-op shutdown when the exporter fails") + } + if err := shutdown(context.Background()); err != nil { + t.Fatalf("no-op shutdown after exporter failure must return nil, got %v", err) + } +} + +// TestInitTracer_ResourceConstructionFails — when resource.New errors, +// InitTracer MUST shut down the already-built exporter and return a working +// no-op shutdown. Same fail-open contract as the exporter arm. +func TestInitTracer_ResourceConstructionFails(t *testing.T) { + t.Setenv("NEW_RELIC_LICENSE_KEY", "") + // Real exporter constructs fine (lazy dial); force the resource arm. + withResourceStub(t, func(context.Context, ...resource.Option) (*resource.Resource, error) { + return nil, errors.New("boom: resource construction failed") + }) + + shutdown := InitTracer("instant-api", "http://localhost:4317") + if shutdown == nil { + t.Fatal("InitTracer must return a non-nil no-op shutdown when resource.New fails") + } + if err := shutdown(context.Background()); err != nil { + t.Fatalf("no-op shutdown after resource failure must return nil, got %v", err) + } +} + // TestInitTracer_EmptyEndpointNoop — when the endpoint is unset, the // returned shutdown must be a working no-op. This is the fail-open // contract for local dev / CI runs where OTel is intentionally off. diff --git a/main.go b/main.go index 94c98f3..fffe2a7 100644 --- a/main.go +++ b/main.go @@ -40,12 +40,53 @@ import ( // transaction all join cleanly in queries. const serviceName = "api" +// External boundaries are routed through package-level function variables so +// the run() seam can be exercised end-to-end (boot → ready → teardown, plus +// every failure arm) in a unit test without a real Postgres, Redis, GeoIP +// volume, or a bound TCP listener. In production every var holds its real +// implementation, so behaviour is byte-for-byte identical to inlining the +// call — this is a test seam, not a behaviour change. Do NOT change what the +// production defaults point at (notably telemetry.InitTracer — P0-2 OTel +// tracing contract); only override them in tests. +var ( + initTracer = telemetry.InitTracer + connectPostgres = db.ConnectPostgres + runMigrations = db.RunMigrations + startPoolStatsExporter = db.StartPoolStatsExporter + connectRedis = db.ConnectRedis + loadGeoLite2 = middleware.LoadGeoLite2 + newProvisionerClient = provisioner.NewClient + newRouterWithHooks = router.NewWithHooks + serveFunc = runServerWithGracefulShutdown + + // runFunc / osExit are seams so main() — the one statement that calls + // os.Exit and thus can't run in-process under `go test` — is exercised + // with a stubbed exit. In production runFunc == run and osExit == + // os.Exit, so behaviour is identical. + runFunc = run + osExit = os.Exit +) + func main() { + if err := runFunc(); err != nil { + slog.Error("server.fatal", "error", err) + osExit(1) + } +} + +// run is the extracted body of main(). It returns an error instead of +// calling os.Exit so it can be driven from a unit test; main() is the only +// production caller and turns a non-nil error into os.Exit(1). The boot +// ordering, defers, and fail-open contracts are identical to the previous +// inline main() — every external call goes through a package-level seam var +// (defaulting to the real implementation) purely so a test can substitute a +// stub. A nil return is a clean SIGTERM-triggered graceful shutdown. +func run() (runErr error) { // Structured JSON logging — wrapped in logctx.Handler so every record // is decorated with service, commit_id, trace_id, team_id, tid. // // AddSource gives file:line of the slog call site (caller field in - // the design doc). Done before any other slog call in main so even + // the design doc). Done before any other slog call in run so even // telemetry init failures land enriched. base := slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ Level: slog.LevelInfo, @@ -59,7 +100,7 @@ func main() { // contain the prefix value anyway (the prefix is unread at this point). slog.SetDefault(slog.New(ctxH)) - shutdownTracer := telemetry.InitTracer("instant-api", os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")) + shutdownTracer := initTracer("instant-api", os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")) defer func() { if err := shutdownTracer(context.Background()); err != nil { slog.Error("telemetry.shutdown_failed", "error", err) @@ -85,12 +126,12 @@ func main() { // admin routes are disabled. slog.SetDefault(slog.New(middleware.NewLogScrubber(ctxH, cfg.AdminPathPrefix))) - database := db.ConnectPostgres(cfg.DatabaseURL) + database := connectPostgres(cfg.DatabaseURL) defer database.Close() - if err := db.RunMigrations(database); err != nil { + if err := runMigrations(database); err != nil { slog.Error("main.migrations_failed", "error", err) - os.Exit(1) + return fmt.Errorf("migrations: %w", err) } // Pool-saturation observability (Wave-3 chaos verify, 2026-05-21). @@ -103,7 +144,7 @@ func main() { // is cancelled at shutdown (see Phase A/B handlers below). poolStatsCtx, poolStatsCancel := context.WithCancel(context.Background()) defer poolStatsCancel() - go db.StartPoolStatsExporter(poolStatsCtx, database, "platform_db") + go startPoolStatsExporter(poolStatsCtx, database, "platform_db") // Deploy-audit self-report. Idempotent on (service, commit_id, // image_digest) — every pod startup of the same image is a no-op @@ -113,10 +154,10 @@ func main() { // must not stop the server from listening. emitDeployAuditSelfReport(database) - rdb := db.ConnectRedis(cfg.RedisURL) + rdb := connectRedis(cfg.RedisURL) defer rdb.Close() - geoDbs := middleware.LoadGeoLite2(cfg.GeoLite2DBPath) + geoDbs := loadGeoLite2(cfg.GeoLite2DBPath) if geoDbs != nil && geoDbs.City != nil { defer geoDbs.City.Close() } @@ -157,16 +198,16 @@ func main() { // so a misconfigured prod pod surfaces as CrashLoopBackoff // (operator-visible) instead of green /healthz with wrong limits. slog.Error("plans.load_failed", "error", err, "path", plansPath, "environment", cfg.Environment) - os.Exit(1) + return fmt.Errorf("plans load: %w", err) } var provClient *provisioner.Client if cfg.ProvisionerAddr != "" { var conn *grpc.ClientConn - provClient, conn, err = provisioner.NewClient(cfg.ProvisionerAddr, cfg.ProvisionerSecret) + provClient, conn, err = newProvisionerClient(cfg.ProvisionerAddr, cfg.ProvisionerSecret) if err != nil { slog.Error("main.provisioner_connect_failed", "error", err) - os.Exit(1) + return fmt.Errorf("provisioner connect: %w", err) } defer conn.Close() slog.Info("main.provisioner_connected", "addr", cfg.ProvisionerAddr) @@ -174,7 +215,7 @@ func main() { slog.Info("main.provisioner_local", "note", "PROVISIONER_ADDR not set, using local providers") } - app, hooks := router.NewWithHooks(cfg, database, rdb, geoDbs, emailClient, planRegistry, provClient, nrApp) + app, hooks := newRouterWithHooks(cfg, database, rdb, geoDbs, emailClient, planRegistry, provClient, nrApp) slog.Info("server.starting", "port", cfg.Port, @@ -183,10 +224,11 @@ func main() { "build_time", buildinfo.BuildTime, "version", buildinfo.Version, ) - if err := runServerWithGracefulShutdown(app, ":"+cfg.Port, gracefulShutdownTimeout, hooks); err != nil { + if err := serveFunc(app, ":"+cfg.Port, gracefulShutdownTimeout, hooks); err != nil { slog.Error("server.fatal", "error", err) - os.Exit(1) + return fmt.Errorf("serve: %w", err) } + return nil } // gracefulShutdownTimeout is the budget Fiber gets to drain in-flight requests diff --git a/run_test.go b/run_test.go new file mode 100644 index 0000000..c0c568c --- /dev/null +++ b/run_test.go @@ -0,0 +1,407 @@ +package main + +import ( + "context" + "database/sql" + "errors" + "os" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/gofiber/fiber/v2" + "github.com/newrelic/go-agent/v3/newrelic" + "github.com/oschwald/maxminddb-golang" + "github.com/redis/go-redis/v9" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + "instant.dev/internal/config" + "instant.dev/internal/email" + "instant.dev/internal/middleware" + "instant.dev/internal/plans" + "instant.dev/internal/provisioner" + "instant.dev/internal/router" + "instant.dev/internal/testhelpers" +) + +// newRunSeams snapshots every package-level seam var and restores them on +// cleanup so a run() test can substitute stubs without leaking overrides into +// sibling tests in the same package. +func newRunSeams(t *testing.T) { + t.Helper() + pInit := initTracer + pPg := connectPostgres + pMig := runMigrations + pPool := startPoolStatsExporter + pRedis := connectRedis + pGeo := loadGeoLite2 + pProv := newProvisionerClient + pRouter := newRouterWithHooks + pServe := serveFunc + t.Cleanup(func() { + initTracer = pInit + connectPostgres = pPg + runMigrations = pMig + startPoolStatsExporter = pPool + connectRedis = pRedis + loadGeoLite2 = pGeo + newProvisionerClient = pProv + newRouterWithHooks = pRouter + serveFunc = pServe + }) +} + +// setMinimalValidEnv sets exactly the env config.Load() needs to return +// without panicking, plus a no-op tracer endpoint. PLANS_PATH points at a +// missing file so loadPlansRegistry takes the dev-fallback branch +// (ENVIRONMENT=development) — no on-disk plans.yaml required. +func setMinimalValidEnv(t *testing.T) { + t.Helper() + t.Setenv("DATABASE_URL", "postgres://u:p@127.0.0.1:1/none?sslmode=disable") + t.Setenv("JWT_SECRET", "0123456789012345678901234567890123456789") + t.Setenv("AES_KEY", "00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff") + t.Setenv("PLANS_PATH", t.TempDir()+"/missing-plans.yaml") + t.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "") + t.Setenv("NEW_RELIC_LICENSE_KEY", "") + t.Setenv("PROVISIONER_ADDR", "") + t.Setenv("ENVIRONMENT", "development") +} + +// runState records boot-ordering observations made through the seams. +type runState struct { + tracerShutdownCalled atomic.Bool + migrationsCalled atomic.Bool + poolExporterCalled atomic.Bool + routerBuilt atomic.Bool + served atomic.Bool +} + +// fakeDB returns a non-pinging *sql.DB handle. sql.Open never dials, so this +// is safe and fast — the model wiring only stores the handle at boot. +func fakeDB(t *testing.T) *sql.DB { + t.Helper() + dbh, err := sql.Open("postgres", "postgres://u:p@127.0.0.1:1/none?sslmode=disable") + require.NoError(t, err) + return dbh +} + +// newClosableGeoDBs returns a GeoDBs with non-nil City/ASN readers. A +// zero-value maxminddb.Reader has hasMappedFile=false, so Close() is a safe +// no-op — enough to exercise run()'s geo-close defer branches without a real +// .mmdb fixture on disk. +func newClosableGeoDBs(t *testing.T) *middleware.GeoDBs { + t.Helper() + return &middleware.GeoDBs{City: &maxminddb.Reader{}, ASN: &maxminddb.Reader{}} +} + +// wireHappyPathSeams points every external boundary at a non-networking fake +// so run() can boot, build the router, reach the serve seam, and tear down +// without a real Postgres / Redis / GeoIP volume / bound listener. The serve +// seam is left for the caller to set (clean drain vs error arm). +func wireHappyPathSeams(t *testing.T) *runState { + st := &runState{} + initTracer = func(string, string) func(context.Context) error { + return func(context.Context) error { + st.tracerShutdownCalled.Store(true) + return nil + } + } + connectPostgres = func(string) *sql.DB { return fakeDB(t) } + runMigrations = func(*sql.DB) error { st.migrationsCalled.Store(true); return nil } + startPoolStatsExporter = func(ctx context.Context, _ *sql.DB, _ string) { + st.poolExporterCalled.Store(true) + <-ctx.Done() // mirror prod: lives until the boot ctx cancels at teardown + } + connectRedis = func(string) *redis.Client { + return redis.NewClient(&redis.Options{Addr: "127.0.0.1:1"}) + } + loadGeoLite2 = func(string) *middleware.GeoDBs { return nil } + newRouterWithHooks = func(_ *config.Config, _ *sql.DB, _ *redis.Client, _ *middleware.GeoDBs, _ *email.Client, _ *plans.Registry, _ *provisioner.Client, _ *newrelic.Application) (*fiber.App, router.ShutdownHooks) { + st.routerBuilt.Store(true) + return fiber.New(fiber.Config{DisableStartupMessage: true}), router.ShutdownHooks{} + } + return st +} + +// TestRun_HappyPath_BootsReadyTeardown drives run() end-to-end with all +// external boundaries stubbed. The serve seam returns nil immediately to +// simulate a clean SIGTERM-triggered drain; run() must boot, build the +// router, run migrations, start the pool exporter, then unwind every defer +// and return nil. +func TestRun_HappyPath_BootsReadyTeardown(t *testing.T) { + newRunSeams(t) + setMinimalValidEnv(t) + st := wireHappyPathSeams(t) + + serveFunc = func(*fiber.App, string, time.Duration, router.ShutdownHooks) error { + st.served.Store(true) + return nil // clean drain + } + + err := run() + require.NoError(t, err, "clean serve return must yield a nil run() error") + + assert.True(t, st.migrationsCalled.Load(), "migrations must run during boot") + assert.True(t, st.routerBuilt.Load(), "router must be built before serving") + assert.True(t, st.served.Load(), "serve seam must be reached") + // poolExporter runs in a goroutine; give the scheduler a beat, then the + // defers (poolStatsCancel) will have fired and the tracer shutdown ran. + assert.Eventually(t, st.tracerShutdownCalled.Load, time.Second, 10*time.Millisecond, + "deferred tracer shutdown must run on a clean return") +} + +// TestRun_MigrationsFailReturnsError — a migration failure must surface as a +// non-nil run() error (main() turns it into os.Exit(1) → CrashLoopBackoff) +// rather than booting a server against an un-migrated schema. +func TestRun_MigrationsFailReturnsError(t *testing.T) { + newRunSeams(t) + setMinimalValidEnv(t) + st := wireHappyPathSeams(t) + runMigrations = func(*sql.DB) error { return errors.New("relation does not exist") } + serveFunc = func(*fiber.App, string, time.Duration, router.ShutdownHooks) error { + st.served.Store(true) + return nil + } + + err := run() + require.Error(t, err, "migration failure must abort boot") + assert.Contains(t, err.Error(), "migrations") + assert.False(t, st.served.Load(), "serve must NOT be reached when migrations fail") +} + +// TestRun_PlansLoadFailsInProductionReturnsError — when ENVIRONMENT=production +// and plans.yaml is missing, loadPlansRegistry returns an error and run() must +// abort before serving (fail-loud — never serve stale embedded limits in prod). +func TestRun_PlansLoadFailsInProductionReturnsError(t *testing.T) { + newRunSeams(t) + setMinimalValidEnv(t) + t.Setenv("ENVIRONMENT", "production") + st := wireHappyPathSeams(t) + serveFunc = func(*fiber.App, string, time.Duration, router.ShutdownHooks) error { + st.served.Store(true) + return nil + } + + err := run() + require.Error(t, err, "missing plans.yaml in production must abort boot") + assert.Contains(t, err.Error(), "plans") + assert.False(t, st.served.Load(), "serve must NOT be reached when plans load fails in prod") +} + +// TestRun_ProvisionerConnectFailsReturnsError — when PROVISIONER_ADDR is set +// but the gRPC client constructor errors, run() must abort before serving. +func TestRun_ProvisionerConnectFailsReturnsError(t *testing.T) { + newRunSeams(t) + setMinimalValidEnv(t) + t.Setenv("PROVISIONER_ADDR", "provisioner.invalid:50051") + st := wireHappyPathSeams(t) + newProvisionerClient = func(string, string) (*provisioner.Client, *grpc.ClientConn, error) { + return nil, nil, errors.New("dial failed") + } + serveFunc = func(*fiber.App, string, time.Duration, router.ShutdownHooks) error { + st.served.Store(true) + return nil + } + + err := run() + require.Error(t, err, "provisioner connect failure must abort boot") + assert.Contains(t, err.Error(), "provisioner") + assert.False(t, st.served.Load()) +} + +// TestRun_ProvisionerConnectSucceedsServes — PROVISIONER_ADDR set and the +// client constructs cleanly: run() must reach the serve seam (the +// remote-provisioner branch), and a clean serve return yields nil. +func TestRun_ProvisionerConnectSucceedsServes(t *testing.T) { + newRunSeams(t) + setMinimalValidEnv(t) + t.Setenv("PROVISIONER_ADDR", "provisioner.invalid:50051") + st := wireHappyPathSeams(t) + newProvisionerClient = func(string, string) (*provisioner.Client, *grpc.ClientConn, error) { + // A nil-backed client + a real (lazy) ClientConn. grpc.NewClient does + // not dial until first RPC, so this never touches the network here. + conn, err := grpc.NewClient("passthrough:///provisioner.invalid:50051", + grpc.WithTransportCredentials(insecure.NewCredentials())) + require.NoError(t, err) + return nil, conn, nil + } + serveFunc = func(*fiber.App, string, time.Duration, router.ShutdownHooks) error { + st.served.Store(true) + return nil + } + + err := run() + require.NoError(t, err) + assert.True(t, st.served.Load(), "serve must be reached on the remote-provisioner happy path") +} + +// TestRun_ServeErrorReturnsError — when the serve seam reports a fatal +// listener error (port bind failure, stuck-drain timeout), run() must +// surface it so main() exits non-zero. +func TestRun_ServeErrorReturnsError(t *testing.T) { + newRunSeams(t) + setMinimalValidEnv(t) + wireHappyPathSeams(t) + serveFunc = func(*fiber.App, string, time.Duration, router.ShutdownHooks) error { + return errors.New("listen tcp :8080: bind: address already in use") + } + + err := run() + require.Error(t, err, "a fatal serve error must propagate out of run()") + assert.Contains(t, err.Error(), "serve") +} + +// TestRun_TracerShutdownErrorIsLoggedNotFatal — the deferred tracer shutdown +// returning an error must NOT change run()'s return value (it is logged at +// ERROR and swallowed). A clean serve return stays nil even when the tracer's +// shutdown errors. +func TestRun_TracerShutdownErrorIsLoggedNotFatal(t *testing.T) { + newRunSeams(t) + setMinimalValidEnv(t) + wireHappyPathSeams(t) + initTracer = func(string, string) func(context.Context) error { + return func(context.Context) error { return errors.New("tp shutdown timeout") } + } + serveFunc = func(*fiber.App, string, time.Duration, router.ShutdownHooks) error { return nil } + + err := run() + require.NoError(t, err, "tracer shutdown error must be swallowed, not propagated") +} + +// TestInitNewRelic_ValidLicenseReturnsApp — a syntactically valid 40-char +// license must produce a non-nil *newrelic.Application (the success arm: +// NewApplication + the "newrelic.initialized" log). NEW_RELIC_APP_NAME, when +// unset, derives "instant-". +func TestInitNewRelic_ValidLicenseReturnsApp(t *testing.T) { + t.Setenv("NEW_RELIC_LICENSE_KEY", strings.Repeat("a", 40)) + t.Setenv("NEW_RELIC_APP_NAME", "") + app := initNewRelic("api") + require.NotNil(t, app, "a valid 40-char license must yield a non-nil NR app") + app.Shutdown(2 * 1_000_000_000) +} + +// TestInitNewRelic_AppNameOverride — NEW_RELIC_APP_NAME, when set, overrides +// the derived "instant-" name (covers the appName-set branch). +func TestInitNewRelic_AppNameOverride(t *testing.T) { + t.Setenv("NEW_RELIC_LICENSE_KEY", strings.Repeat("b", 40)) + t.Setenv("NEW_RELIC_APP_NAME", "custom-app-name") + app := initNewRelic("api") + require.NotNil(t, app) + app.Shutdown(2 * 1_000_000_000) +} + +// TestInitNewRelic_InvalidLicenseFailsOpen — a malformed (non-40-char, +// non-empty) license makes NewApplication error; initNewRelic must log and +// return nil rather than crash boot (the init_failed fail-open arm). +func TestInitNewRelic_InvalidLicenseFailsOpen(t *testing.T) { + t.Setenv("NEW_RELIC_LICENSE_KEY", "too-short-to-be-valid") + app := initNewRelic("api") + require.Nil(t, app, "a malformed license must fail open to nil, not panic") +} + +// TestRun_WithNRAppAndGeoDBs_CoversTeardownDefers drives run() with a non-nil +// NR app (so the nrApp.Shutdown + SetNRApp branch runs) and a non-nil GeoDBs +// with closable City/ASN handles (so both geo-close defers run). Asserts a +// clean boot→serve→teardown with no panic on the extra defer paths. +func TestRun_WithNRAppAndGeoDBs_CoversTeardownDefers(t *testing.T) { + newRunSeams(t) + setMinimalValidEnv(t) + // Valid license → initNewRelic returns a non-nil app inside run(). + t.Setenv("NEW_RELIC_LICENSE_KEY", strings.Repeat("c", 40)) + st := wireHappyPathSeams(t) + + // Non-nil GeoDBs with real (closable) maxmind readers from an embedded + // fixture would be heavy; instead supply a GeoDBs whose City/ASN are + // non-nil readers via the test-only opener. middleware.LoadGeoLite2 is + // seamed, so we return a GeoDBs the defers can Close() without panicking. + loadGeoLite2 = func(string) *middleware.GeoDBs { return newClosableGeoDBs(t) } + + serveFunc = func(*fiber.App, string, time.Duration, router.ShutdownHooks) error { + st.served.Store(true) + return nil + } + + err := run() + require.NoError(t, err) + assert.True(t, st.served.Load()) +} + +// TestEmitDeployAuditSelfReport_SuccessAgainstRealDB — against a real +// migrated platform DB, emitDeployAuditSelfReport must insert a row and take +// the success-log arm. Skips when TEST_DATABASE_URL is unset. +func TestEmitDeployAuditSelfReport_SuccessAgainstRealDB(t *testing.T) { + if os.Getenv("TEST_DATABASE_URL") == "" { + t.Skip("TEST_DATABASE_URL not set; skipping DB-backed self-report test") + } + dbh, clean := testhelpers.SetupTestDB(t) + defer clean() + _, _ = dbh.Exec(`DELETE FROM deploys_audit`) + t.Cleanup(func() { _, _ = dbh.Exec(`DELETE FROM deploys_audit`) }) + + // Must not panic and must write a row (success arm). + emitDeployAuditSelfReport(dbh) + + var n int + require.NoError(t, dbh.QueryRow(`SELECT count(*) FROM deploys_audit WHERE service='api'`).Scan(&n)) + assert.GreaterOrEqual(t, n, 1, "self-report success arm must insert at least one row") +} + +// TestEmitDeployAuditSelfReport_DBErrorIsSwallowed — a non-pinging handle +// makes InsertSelfReport fail; emitDeployAuditSelfReport must log at WARN and +// return without panicking (observability, never a boot gate). +func TestEmitDeployAuditSelfReport_DBErrorIsSwallowed(t *testing.T) { + dbh, err := sql.Open("postgres", "postgres://u:p@127.0.0.1:1/none?sslmode=disable") + require.NoError(t, err) + defer dbh.Close() + assert.NotPanics(t, func() { emitDeployAuditSelfReport(dbh) }, + "a DB error in the self-report must be swallowed, never panic boot") +} + +// TestMain_DelegatesToRun is a compile-time + behaviour guard that main() +// is the thin os.Exit wrapper around run(). We can't call main() directly (it +// would os.Exit the test binary), but we assert run() is a free function with +// the documented error-returning contract that main() depends on. +func TestRun_IsErrorReturning(t *testing.T) { + // Documents the seam contract relied on by main(): run returns an error. + var fn func() error = run + require.NotNil(t, fn) + // envProduction sanity — run()'s plans branch keys off it. + require.True(t, strings.EqualFold(envProduction, "production")) +} + +// TestMain_ExitsNonZeroOnRunError — main() must call os.Exit(1) when run() +// returns an error. We stub the runFunc and osExit seams so main() can be +// driven in-process (it normally os.Exit()s the test binary). Production +// wiring (runFunc==run, osExit==os.Exit) is untouched. +func TestMain_ExitsNonZeroOnRunError(t *testing.T) { + prevRun, prevExit := runFunc, osExit + t.Cleanup(func() { runFunc, osExit = prevRun, prevExit }) + + runFunc = func() error { return errors.New("boot failed") } + var gotCode int + var exited bool + osExit = func(code int) { gotCode = code; exited = true } + + main() + + require.True(t, exited, "main() must call osExit when run() returns an error") + require.Equal(t, 1, gotCode, "main() must exit with code 1 on a run() error") +} + +// TestMain_NoExitOnCleanRun — when run() returns nil (clean SIGTERM drain), +// main() must NOT call os.Exit. Pins the happy-path wrapper. +func TestMain_NoExitOnCleanRun(t *testing.T) { + prevRun, prevExit := runFunc, osExit + t.Cleanup(func() { runFunc, osExit = prevRun, prevExit }) + + runFunc = func() error { return nil } + exited := false + osExit = func(int) { exited = true } + + main() + require.False(t, exited, "main() must not exit when run() returns nil") +}