Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# 1.25.5-alpine3.23
FROM golang@sha256:26111811bc967321e7b6f852e914d14bede324cd1accb7f81811929a6a57fea9 AS builder
FROM golang@sha256:d9b2e14101f27ec8d09674cd01186798d227bb0daec90e032aeb1cd22ac0f029 AS builder

WORKDIR /app
COPY go.mod go.sum ./
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ The metrics exposed beyond the default Prometheus metrics are:
* `deptracker_post_record_hard_fail`: the number of failures to
persist a record via the HTTP API (either an irrecoverable error or
all retries are exhausted).
* `deptracker_post_record_client_error`: the number of client errors,
these are never retried nor reprocessed.

## License

Expand Down
14 changes: 14 additions & 0 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,20 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
)

if err := c.apiClient.PostOne(ctx, record); err != nil {
// Make sure to not retry on client error messages
var clientErr *deploymentrecord.ClientError
if errors.As(err, &clientErr) {
slog.Warn("Failed to post record",
"event_type", eventType,
"name", record.Name,
"deployment_name", record.DeploymentName,
"status", record.Status,
"digest", record.Digest,
"error", err,
)
return nil
}

slog.Error("Failed to post record",
"event_type", eventType,
"name", record.Name,
Expand Down
19 changes: 16 additions & 3 deletions pkg/deploymentrecord/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,19 @@ func WithAPIToken(token string) ClientOption {
}
}

// ClientError represents a client error that can not be retried.
type ClientError struct {
err error
}

func (c *ClientError) Error() string {
return fmt.Sprintf("client_error: %s", c.err.Error())
}

func (c *ClientError) Unwrap() error {
return c.err
}

// PostOne posts a single deployment record to the GitHub deployment
// records API.
func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error {
Expand Down Expand Up @@ -129,11 +142,11 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error {
// Don't retry on client errors (4xx) except for 429
// (rate limit)
if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 {
metrics.PostDeploymentRecordHardFail.Inc()
slog.Error("irrecoverable error, aborting",
metrics.PostDeploymentRecordClientError.Inc()
slog.Warn("client error, aborting",
"attempt", attempt,
"error", lastErr)
Comment thread
kommendorkapten marked this conversation as resolved.
return lastErr
return &ClientError{err: lastErr}
}
metrics.PostDeploymentRecordSoftFail.Inc()
}
Expand Down
8 changes: 8 additions & 0 deletions pkg/metrics/prom.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,12 @@ var (
Help: "The total number of hard post failures",
},
)

//nolint: revive
PostDeploymentRecordClientError = promauto.NewCounter(
prometheus.CounterOpts{
Name: "deptracker_post_record_client_error",
Help: "The total number of non-retryable client failures",
},
)
)