From 78f3e12cf5076682950f84f5e4fb27a226aa5288 Mon Sep 17 00:00:00 2001 From: thiagoalessio Date: Tue, 17 Mar 2026 12:07:16 +0100 Subject: [PATCH 1/4] OCPCRT-450: Rename convertToAccount2 to applyClusterProfile Renaming this function to better reflect its purpose, especially if at some point we have more than two accounts. Co-Authored-By: Claude Opus 4.6 --- pkg/manager/prow.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/manager/prow.go b/pkg/manager/prow.go index 30ff9e00f..58d22b305 100644 --- a/pkg/manager/prow.go +++ b/pkg/manager/prow.go @@ -1713,7 +1713,7 @@ func (e *resolvedEnvironment) Lookup(name string) string { return "" } -func convertToAccount2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration, profileName, profileSecret, accountDomain string) error { +func applyClusterProfile(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration, profileName, profileSecret, accountDomain string) error { job.Labels["ci-operator.openshift.io/cloud-cluster-profile"] = profileName for index, volume := range job.Spec.PodSpec.Volumes { // TODO: only some ci-chat-bot jobs have this; check if they can all be removed @@ -1746,13 +1746,13 @@ func convertToAccount2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuil } func convertAWSToAWS2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error { - return convertToAccount2(job, sourceConfig, "aws-2", "cluster-secrets-aws-2", "aws-2.ci.openshift.org") + return applyClusterProfile(job, sourceConfig, "aws-2", "cluster-secrets-aws-2", "aws-2.ci.openshift.org") } func convertAzureToAzure2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error { - return convertToAccount2(job, sourceConfig, "azure-2", "cluster-secrets-azure-2", "ci2.azure.devcluster.openshift.com") + return applyClusterProfile(job, sourceConfig, "azure-2", "cluster-secrets-azure-2", "ci2.azure.devcluster.openshift.com") } func convertGCPToGCP2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error { - return convertToAccount2(job, sourceConfig, "gcp-openshift-gce-devel-ci-2", "cluster-secrets-gcp-openshift-gce-devel-ci-2", "") + return applyClusterProfile(job, sourceConfig, "gcp-openshift-gce-devel-ci-2", "cluster-secrets-gcp-openshift-gce-devel-ci-2", "") } From e43972334b92992bfe889c870f8c87b0454295f1 Mon Sep 17 00:00:00 2001 From: thiagoalessio Date: Tue, 17 Mar 2026 12:16:07 +0100 Subject: [PATCH 2/4] OCPCRT-450: Add CloudAccountProfile struct and platformQuotaSlices map Introduce the data structures that will replace the hardcoded switch block for quota-slice selection. Co-Authored-By: Claude Opus 4.6 --- pkg/manager/manager.go | 32 ++++++++++++++++++++++++++++++++ pkg/manager/types.go | 9 +++++++++ 2 files changed, 41 insertions(+) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 1ed89b155..b37cf6283 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -107,6 +107,38 @@ var HypershiftSupportedVersions = HypershiftSupportedVersionsType{} var reBranchVersion = regexp.MustCompile(`^(openshift-|release-)(\d+\.\d+)$`) var reMajorMinorVersion = regexp.MustCompile(`^(\d+)\.(\d+)$`) +// platformQuotaSlices maps each cloud platform to its available quota-slice +// accounts. The first entry is the primary (default) account. Subsequent entries +// are alternates that can be selected when they have more free resources. +var platformQuotaSlices = map[string][]CloudAccountProfile{ + "aws": { + {QuotaSlice: "aws-quota-slice"}, + { + QuotaSlice: "aws-2-quota-slice", + ProfileName: "aws-2", + ProfileSecret: "cluster-secrets-aws-2", + AccountDomain: "aws-2.ci.openshift.org", + }, + }, + "azure": { + {QuotaSlice: "azure4-quota-slice"}, + { + QuotaSlice: "azure-2-quota-slice", + ProfileName: "azure-2", + ProfileSecret: "cluster-secrets-azure-2", + AccountDomain: "ci2.azure.devcluster.openshift.com", + }, + }, + "gcp": { + {QuotaSlice: "gcp-quota-slice"}, + { + QuotaSlice: "gcp-openshift-gce-devel-ci-2-quota-slice", + ProfileName: "gcp-openshift-gce-devel-ci-2", + ProfileSecret: "cluster-secrets-gcp-openshift-gce-devel-ci-2", + }, + }, +} + func (j Job) IsComplete() bool { return j.Complete || len(j.Credentials) > 0 || (len(j.State) > 0 && j.State != prowapiv1.PendingState) } diff --git a/pkg/manager/types.go b/pkg/manager/types.go index a8dcaed4a..ec8a105de 100644 --- a/pkg/manager/types.go +++ b/pkg/manager/types.go @@ -449,6 +449,15 @@ type JobInput struct { Refs []prowapiv1.Refs } +// CloudAccountProfile holds the parameters needed to redirect a ProwJob to an +// alternate cloud account (e.g. aws-2 instead of the default aws account). +type CloudAccountProfile struct { + QuotaSlice string // boskos resource type, e.g. "aws-2-quota-slice" + ProfileName string // cluster profile name, e.g. "aws-2" + ProfileSecret string // k8s secret name, e.g. "cluster-secrets-aws-2" + AccountDomain string // base domain override (optional), e.g. "aws-2.ci.openshift.org" +} + // Job responds to user requests and tracks the state of the launched // jobs. This object must be recreatable from a ProwJob, but the RequestedChannel // field may be empty to indicate the user has already been notified. From 7965023ef069d37bf8d2484d5c118aa439b3126f Mon Sep 17 00:00:00 2001 From: thiagoalessio Date: Tue, 17 Mar 2026 12:21:59 +0100 Subject: [PATCH 3/4] OCPCRT-450: Add selectCloudAccountProfile with tests Extract the quota-slice selection logic into a standalone function that loops over platformQuotaSlices entries, queries Boskos metrics for each, and returns the profile with the most free resources. The old switch block still exists for now. Co-Authored-By: Claude Opus 4.6 --- pkg/manager/manager.go | 27 ++++++++ pkg/manager/manager_test.go | 120 +++++++++++++++++++++++++++++++++++- 2 files changed, 146 insertions(+), 1 deletion(-) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index b37cf6283..e0fdf4111 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -139,6 +139,33 @@ var platformQuotaSlices = map[string][]CloudAccountProfile{ }, } +// selectCloudAccountProfile queries Boskos metrics for each quota-slice +// candidate for the given platform and returns the profile with the most free +// resources. Returns nil if the platform has no configured accounts or if the +// primary (index 0) has the most free resources (no conversion needed). +func selectCloudAccountProfile(platform string, lClient LeaseClient) (*CloudAccountProfile, error) { + accounts, ok := platformQuotaSlices[platform] + if !ok || len(accounts) < 2 { + return nil, nil + } + bestIdx := 0 + bestFree := -1 + for i := range accounts { + metrics, err := lClient.Metrics(accounts[i].QuotaSlice) + if err != nil { + return nil, fmt.Errorf("failed to get metrics for %q leases: %v", accounts[i].QuotaSlice, err) + } + if metrics.Free > bestFree { + bestIdx = i + bestFree = metrics.Free + } + } + if bestIdx == 0 { + return nil, nil + } + return &accounts[bestIdx], nil +} + func (j Job) IsComplete() bool { return j.Complete || len(j.Credentials) > 0 || (len(j.State) > 0 && j.State != prowapiv1.PendingState) } diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 3408f44f9..90fa78767 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -1,6 +1,124 @@ package manager -import "testing" +import ( + "fmt" + "testing" + + "github.com/openshift/ci-tools/pkg/lease" +) + +type mockLeaseClient struct { + metrics map[string]lease.Metrics +} + +func (m *mockLeaseClient) Metrics(rtype string) (lease.Metrics, error) { + if metrics, ok := m.metrics[rtype]; ok { + return metrics, nil + } + return lease.Metrics{}, fmt.Errorf("resource type %q not found", rtype) +} + +func Test_selectCloudAccountProfile(t *testing.T) { + tests := []struct { + name string + platform string + metrics map[string]lease.Metrics + wantNil bool + wantProfile string + wantErr bool + }{ + { + name: "platform not in map returns nil", + platform: "metal", + metrics: map[string]lease.Metrics{}, + wantNil: true, + }, + { + name: "primary has more free resources returns nil", + platform: "aws", + metrics: map[string]lease.Metrics{ + "aws-quota-slice": {Free: 10, Leased: 5}, + "aws-2-quota-slice": {Free: 3, Leased: 12}, + }, + wantNil: true, + }, + { + name: "secondary has more free resources returns that profile", + platform: "aws", + metrics: map[string]lease.Metrics{ + "aws-quota-slice": {Free: 2, Leased: 13}, + "aws-2-quota-slice": {Free: 8, Leased: 7}, + }, + wantProfile: "aws-2", + }, + { + name: "equal free counts returns nil (primary wins)", + platform: "aws", + metrics: map[string]lease.Metrics{ + "aws-quota-slice": {Free: 5, Leased: 10}, + "aws-2-quota-slice": {Free: 5, Leased: 10}, + }, + wantNil: true, + }, + { + name: "all zero free returns nil", + platform: "aws", + metrics: map[string]lease.Metrics{ + "aws-quota-slice": {Free: 0, Leased: 15}, + "aws-2-quota-slice": {Free: 0, Leased: 15}, + }, + wantNil: true, + }, + { + name: "gcp secondary wins", + platform: "gcp", + metrics: map[string]lease.Metrics{ + "gcp-quota-slice": {Free: 1, Leased: 14}, + "gcp-openshift-gce-devel-ci-2-quota-slice": {Free: 7, Leased: 8}, + }, + wantProfile: "gcp-openshift-gce-devel-ci-2", + }, + { + name: "azure secondary wins", + platform: "azure", + metrics: map[string]lease.Metrics{ + "azure4-quota-slice": {Free: 3, Leased: 12}, + "azure-2-quota-slice": {Free: 9, Leased: 6}, + }, + wantProfile: "azure-2", + }, + { + name: "metrics error returns error", + platform: "aws", + metrics: map[string]lease.Metrics{}, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := &mockLeaseClient{metrics: tt.metrics} + got, err := selectCloudAccountProfile(tt.platform, client) + if (err != nil) != tt.wantErr { + t.Errorf("selectCloudAccountProfile() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.wantErr { + return + } + if tt.wantNil && got != nil { + t.Errorf("selectCloudAccountProfile() = %+v, want nil", got) + return + } + if !tt.wantNil && got == nil { + t.Errorf("selectCloudAccountProfile() = nil, want profile %q", tt.wantProfile) + return + } + if !tt.wantNil && got.ProfileName != tt.wantProfile { + t.Errorf("selectCloudAccountProfile() ProfileName = %q, want %q", got.ProfileName, tt.wantProfile) + } + }) + } +} func Test_containsValidVersion(t *testing.T) { type args struct { From 0be8b0712e2259133dada99553e7e8138416f497 Mon Sep 17 00:00:00 2001 From: thiagoalessio Date: Tue, 17 Mar 2026 12:25:24 +0100 Subject: [PATCH 4/4] OCPCRT-450: Replace UseSecondaryAccount with CloudAccountProfile selection Got rid of the switch block in favor of the newly introduced `selectCloudAccountProfile`. Also replaced the other switch block with those wrapper functions in `prow.go` with a single nil check that calls applyClusterProfile directly from the profile struct fields. Co-Authored-By: Claude Opus 4.6 --- pkg/manager/manager.go | 41 ++++------------------------------------- pkg/manager/prow.go | 32 +++++--------------------------- pkg/manager/types.go | 2 +- 3 files changed, 10 insertions(+), 65 deletions(-) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index e0fdf4111..d9eacd9af 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -2184,44 +2184,11 @@ func (m *jobManager) LaunchJobForUser(req *JobRequest) (string, error) { // check what leases are available for platform if req.Architecture == "amd64" && m.lClient != nil { - switch req.Platform { - case "aws": - metrics1, err := m.lClient.Metrics("aws-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `aws` leases: %v", err) - } - metrics2, err := m.lClient.Metrics("aws-2-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `aws-2` leases: %v", err) - } - if metrics2.Free > metrics1.Free { - job.UseSecondaryAccount = true - } - case "azure": - metrics1, err := m.lClient.Metrics("azure4-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `azure` leases: %v", err) - } - metrics2, err := m.lClient.Metrics("azure-2-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `azure-2` leases: %v", err) - } - if metrics2.Free > metrics1.Free { - job.UseSecondaryAccount = true - } - case "gcp": - metrics1, err := m.lClient.Metrics("gcp-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `gcp` leases: %v", err) - } - metrics2, err := m.lClient.Metrics("gcp-openshift-gce-devel-ci-2-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `gcp-openshift-gce-devel-ci-2` leases: %v", err) - } - if metrics2.Free > metrics1.Free { - job.UseSecondaryAccount = true - } + profile, err := selectCloudAccountProfile(req.Platform, m.lClient) + if err != nil { + return "", err } + job.CloudAccountProfile = profile } msg, err := func() (string, error) { diff --git a/pkg/manager/prow.go b/pkg/manager/prow.go index 58d22b305..82fd3b538 100644 --- a/pkg/manager/prow.go +++ b/pkg/manager/prow.go @@ -562,21 +562,11 @@ func (m *jobManager) newJob(job *Job) (string, error) { } } - // if a step based config, launch should now be the test config we will run; time to update the config for lease balancing - if job.UseSecondaryAccount { - switch job.Platform { - case "aws": - if err := convertAWSToAWS2(pj, sourceConfig); err != nil { - return "", fmt.Errorf("failed updating aws job to aws-2: %w", err) - } - case "gcp": - if err := convertGCPToGCP2(pj, sourceConfig); err != nil { - return "", fmt.Errorf("failed updating gcp job to gcp-openshift-gce-devel-ci-2: %w", err) - } - case "azure": - if err := convertAzureToAzure2(pj, sourceConfig); err != nil { - return "", fmt.Errorf("failed updating azure job to azure-2: %w", err) - } + // if an alternate cloud account was selected for lease balancing, apply it + if job.CloudAccountProfile != nil { + p := job.CloudAccountProfile + if err := applyClusterProfile(pj, sourceConfig, p.ProfileName, p.ProfileSecret, p.AccountDomain); err != nil { + return "", fmt.Errorf("failed applying cluster profile %q: %w", p.ProfileName, err) } } @@ -1744,15 +1734,3 @@ func applyClusterProfile(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBu } return nil } - -func convertAWSToAWS2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error { - return applyClusterProfile(job, sourceConfig, "aws-2", "cluster-secrets-aws-2", "aws-2.ci.openshift.org") -} - -func convertAzureToAzure2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error { - return applyClusterProfile(job, sourceConfig, "azure-2", "cluster-secrets-azure-2", "ci2.azure.devcluster.openshift.com") -} - -func convertGCPToGCP2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error { - return applyClusterProfile(job, sourceConfig, "gcp-openshift-gce-devel-ci-2", "cluster-secrets-gcp-openshift-gce-devel-ci-2", "") -} diff --git a/pkg/manager/types.go b/pkg/manager/types.go index ec8a105de..2df0cb3cd 100644 --- a/pkg/manager/types.go +++ b/pkg/manager/types.go @@ -495,7 +495,7 @@ type Job struct { WorkflowName string - UseSecondaryAccount bool + CloudAccountProfile *CloudAccountProfile Operator OperatorInfo CatalogComplete bool