From a9bbe06fe5ef9efeea76c279e20e1682b5fc1c8e Mon Sep 17 00:00:00 2001 From: Fredrik Skogman Date: Tue, 20 Jan 2026 12:53:19 +0100 Subject: [PATCH 1/4] Update builder image to latest --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 1b76f93..76802ad 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # 1.25.5-alpine3.23 -FROM golang@sha256:26111811bc967321e7b6f852e914d14bede324cd1accb7f81811929a6a57fea9 AS builder +FROM golang@sha256:d9b2e14101f27ec8d09674cd01186798d227bb0daec90e032aeb1cd22ac0f029 AS builder WORKDIR /app COPY go.mod go.sum ./ From 3f8485e25db51f9a05b59b687704a4372b1e22a6 Mon Sep 17 00:00:00 2001 From: Fredrik Skogman Date: Tue, 20 Jan 2026 14:26:26 +0100 Subject: [PATCH 2/4] Don't retry request on client errors --- internal/controller/controller.go | 14 ++++++++++++++ pkg/deploymentrecord/client.go | 18 +++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 9a0495c..97ed8ec 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -323,6 +323,20 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta ) if err := c.apiClient.PostOne(ctx, record); err != nil { + // Make sure to not retry on client error messages + var clientErr *deploymentrecord.ClientError + if errors.As(err, &clientErr) { + slog.Warn("Failed to post record", + "event_type", eventType, + "name", record.Name, + "deployment_name", record.DeploymentName, + "status", record.Status, + "digest", record.Digest, + "error", err, + ) + return nil + } + slog.Error("Failed to post record", "event_type", eventType, "name", record.Name, diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index d71cf13..660f6d7 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -65,6 +65,19 @@ func WithAPIToken(token string) ClientOption { } } +// ClientError represents a client error that can not be retried. +type ClientError struct { + err error +} + +func (c *ClientError) Error() string { + return fmt.Sprintf("client_error: %s", c.err.Error()) +} + +func (c *ClientError) Unwrap() error { + return c.err +} + // PostOne posts a single deployment record to the GitHub deployment // records API. func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { @@ -129,11 +142,10 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { // Don't retry on client errors (4xx) except for 429 // (rate limit) if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 { - metrics.PostDeploymentRecordHardFail.Inc() - slog.Error("irrecoverable error, aborting", + slog.Warn("client error, aborting", "attempt", attempt, "error", lastErr) - return lastErr + return &ClientError{err: lastErr} } metrics.PostDeploymentRecordSoftFail.Inc() } From 17bc18285e1f4b6125d1b3f4085f110c8b62b016 Mon Sep 17 00:00:00 2001 From: Fredrik Skogman Date: Tue, 20 Jan 2026 14:33:15 +0100 Subject: [PATCH 3/4] Added metric for client errors --- README.md | 2 ++ pkg/deploymentrecord/client.go | 1 + pkg/metrics/prom.go | 8 ++++++++ 3 files changed, 11 insertions(+) diff --git a/README.md b/README.md index b4e4df5..7f097c4 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,8 @@ The metrics exposed beyond the default Prometheus metrics are: * `deptracker_post_record_hard_fail`: the number of failures to persist a record via the HTTP API (either an irrecoverable error or all retries are exhausted). +* `deptracker_post_record_client_error`: the number of client errors, + there are never retried nor reprocessed. ## License diff --git a/pkg/deploymentrecord/client.go b/pkg/deploymentrecord/client.go index 660f6d7..4c2dd93 100644 --- a/pkg/deploymentrecord/client.go +++ b/pkg/deploymentrecord/client.go @@ -142,6 +142,7 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error { // Don't retry on client errors (4xx) except for 429 // (rate limit) if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 { + metrics.PostDeploymentRecordClientError.Inc() slog.Warn("client error, aborting", "attempt", attempt, "error", lastErr) diff --git a/pkg/metrics/prom.go b/pkg/metrics/prom.go index cdcb51d..d0c0c71 100644 --- a/pkg/metrics/prom.go +++ b/pkg/metrics/prom.go @@ -64,4 +64,12 @@ var ( Help: "The total number of hard post failures", }, ) + + //nolint: revive + PostDeploymentRecordClientError = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "deptracker_post_record_client_error", + Help: "The total number of client failures", + }, + ) ) From d010bdc0353786e1e5dce6c19e799fe536c75c4e Mon Sep 17 00:00:00 2001 From: Fredrik Skogman Date: Tue, 20 Jan 2026 14:41:37 +0100 Subject: [PATCH 4/4] Spelling error --- README.md | 2 +- pkg/metrics/prom.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7f097c4..28fa011 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ The metrics exposed beyond the default Prometheus metrics are: persist a record via the HTTP API (either an irrecoverable error or all retries are exhausted). * `deptracker_post_record_client_error`: the number of client errors, - there are never retried nor reprocessed. + these are never retried nor reprocessed. ## License diff --git a/pkg/metrics/prom.go b/pkg/metrics/prom.go index d0c0c71..3d28cb4 100644 --- a/pkg/metrics/prom.go +++ b/pkg/metrics/prom.go @@ -69,7 +69,7 @@ var ( PostDeploymentRecordClientError = promauto.NewCounter( prometheus.CounterOpts{ Name: "deptracker_post_record_client_error", - Help: "The total number of client failures", + Help: "The total number of non-retryable client failures", }, ) )