diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 1ed89b155..d9eacd9af 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -107,6 +107,65 @@ var HypershiftSupportedVersions = HypershiftSupportedVersionsType{} var reBranchVersion = regexp.MustCompile(`^(openshift-|release-)(\d+\.\d+)$`) var reMajorMinorVersion = regexp.MustCompile(`^(\d+)\.(\d+)$`) +// platformQuotaSlices maps each cloud platform to its available quota-slice +// accounts. The first entry is the primary (default) account. Subsequent entries +// are alternates that can be selected when they have more free resources. +var platformQuotaSlices = map[string][]CloudAccountProfile{ + "aws": { + {QuotaSlice: "aws-quota-slice"}, + { + QuotaSlice: "aws-2-quota-slice", + ProfileName: "aws-2", + ProfileSecret: "cluster-secrets-aws-2", + AccountDomain: "aws-2.ci.openshift.org", + }, + }, + "azure": { + {QuotaSlice: "azure4-quota-slice"}, + { + QuotaSlice: "azure-2-quota-slice", + ProfileName: "azure-2", + ProfileSecret: "cluster-secrets-azure-2", + AccountDomain: "ci2.azure.devcluster.openshift.com", + }, + }, + "gcp": { + {QuotaSlice: "gcp-quota-slice"}, + { + QuotaSlice: "gcp-openshift-gce-devel-ci-2-quota-slice", + ProfileName: "gcp-openshift-gce-devel-ci-2", + ProfileSecret: "cluster-secrets-gcp-openshift-gce-devel-ci-2", + }, + }, +} + +// selectCloudAccountProfile queries Boskos metrics for each quota-slice +// candidate for the given platform and returns the profile with the most free +// resources. Returns nil if the platform has no configured accounts or if the +// primary (index 0) has the most free resources (no conversion needed). +func selectCloudAccountProfile(platform string, lClient LeaseClient) (*CloudAccountProfile, error) { + accounts, ok := platformQuotaSlices[platform] + if !ok || len(accounts) < 2 { + return nil, nil + } + bestIdx := 0 + bestFree := -1 + for i := range accounts { + metrics, err := lClient.Metrics(accounts[i].QuotaSlice) + if err != nil { + return nil, fmt.Errorf("failed to get metrics for %q leases: %v", accounts[i].QuotaSlice, err) + } + if metrics.Free > bestFree { + bestIdx = i + bestFree = metrics.Free + } + } + if bestIdx == 0 { + return nil, nil + } + return &accounts[bestIdx], nil +} + func (j Job) IsComplete() bool { return j.Complete || len(j.Credentials) > 0 || (len(j.State) > 0 && j.State != prowapiv1.PendingState) } @@ -2125,44 +2184,11 @@ func (m *jobManager) LaunchJobForUser(req *JobRequest) (string, error) { // check what leases are available for platform if req.Architecture == "amd64" && m.lClient != nil { - switch req.Platform { - case "aws": - metrics1, err := m.lClient.Metrics("aws-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `aws` leases: %v", err) - } - metrics2, err := m.lClient.Metrics("aws-2-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `aws-2` leases: %v", err) - } - if metrics2.Free > metrics1.Free { - job.UseSecondaryAccount = true - } - case "azure": - metrics1, err := m.lClient.Metrics("azure4-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `azure` leases: %v", err) - } - metrics2, err := m.lClient.Metrics("azure-2-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `azure-2` leases: %v", err) - } - if metrics2.Free > metrics1.Free { - job.UseSecondaryAccount = true - } - case "gcp": - metrics1, err := m.lClient.Metrics("gcp-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `gcp` leases: %v", err) - } - metrics2, err := m.lClient.Metrics("gcp-openshift-gce-devel-ci-2-quota-slice") - if err != nil { - return "", fmt.Errorf("failed to get metrics for `gcp-openshift-gce-devel-ci-2` leases: %v", err) - } - if metrics2.Free > metrics1.Free { - job.UseSecondaryAccount = true - } + profile, err := selectCloudAccountProfile(req.Platform, m.lClient) + if err != nil { + return "", err } + job.CloudAccountProfile = profile } msg, err := func() (string, error) { diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 3408f44f9..90fa78767 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -1,6 +1,124 @@ package manager -import "testing" +import ( + "fmt" + "testing" + + "github.com/openshift/ci-tools/pkg/lease" +) + +type mockLeaseClient struct { + metrics map[string]lease.Metrics +} + +func (m *mockLeaseClient) Metrics(rtype string) (lease.Metrics, error) { + if metrics, ok := m.metrics[rtype]; ok { + return metrics, nil + } + return lease.Metrics{}, fmt.Errorf("resource type %q not found", rtype) +} + +func Test_selectCloudAccountProfile(t *testing.T) { + tests := []struct { + name string + platform string + metrics map[string]lease.Metrics + wantNil bool + wantProfile string + wantErr bool + }{ + { + name: "platform not in map returns nil", + platform: "metal", + metrics: map[string]lease.Metrics{}, + wantNil: true, + }, + { + name: "primary has more free resources returns nil", + platform: "aws", + metrics: map[string]lease.Metrics{ + "aws-quota-slice": {Free: 10, Leased: 5}, + "aws-2-quota-slice": {Free: 3, Leased: 12}, + }, + wantNil: true, + }, + { + name: "secondary has more free resources returns that profile", + platform: "aws", + metrics: map[string]lease.Metrics{ + "aws-quota-slice": {Free: 2, Leased: 13}, + "aws-2-quota-slice": {Free: 8, Leased: 7}, + }, + wantProfile: "aws-2", + }, + { + name: "equal free counts returns nil (primary wins)", + platform: "aws", + metrics: map[string]lease.Metrics{ + "aws-quota-slice": {Free: 5, Leased: 10}, + "aws-2-quota-slice": {Free: 5, Leased: 10}, + }, + wantNil: true, + }, + { + name: "all zero free returns nil", + platform: "aws", + metrics: map[string]lease.Metrics{ + "aws-quota-slice": {Free: 0, Leased: 15}, + "aws-2-quota-slice": {Free: 0, Leased: 15}, + }, + wantNil: true, + }, + { + name: "gcp secondary wins", + platform: "gcp", + metrics: map[string]lease.Metrics{ + "gcp-quota-slice": {Free: 1, Leased: 14}, + "gcp-openshift-gce-devel-ci-2-quota-slice": {Free: 7, Leased: 8}, + }, + wantProfile: "gcp-openshift-gce-devel-ci-2", + }, + { + name: "azure secondary wins", + platform: "azure", + metrics: map[string]lease.Metrics{ + "azure4-quota-slice": {Free: 3, Leased: 12}, + "azure-2-quota-slice": {Free: 9, Leased: 6}, + }, + wantProfile: "azure-2", + }, + { + name: "metrics error returns error", + platform: "aws", + metrics: map[string]lease.Metrics{}, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := &mockLeaseClient{metrics: tt.metrics} + got, err := selectCloudAccountProfile(tt.platform, client) + if (err != nil) != tt.wantErr { + t.Errorf("selectCloudAccountProfile() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.wantErr { + return + } + if tt.wantNil && got != nil { + t.Errorf("selectCloudAccountProfile() = %+v, want nil", got) + return + } + if !tt.wantNil && got == nil { + t.Errorf("selectCloudAccountProfile() = nil, want profile %q", tt.wantProfile) + return + } + if !tt.wantNil && got.ProfileName != tt.wantProfile { + t.Errorf("selectCloudAccountProfile() ProfileName = %q, want %q", got.ProfileName, tt.wantProfile) + } + }) + } +} func Test_containsValidVersion(t *testing.T) { type args struct { diff --git a/pkg/manager/prow.go b/pkg/manager/prow.go index 30ff9e00f..82fd3b538 100644 --- a/pkg/manager/prow.go +++ b/pkg/manager/prow.go @@ -562,21 +562,11 @@ func (m *jobManager) newJob(job *Job) (string, error) { } } - // if a step based config, launch should now be the test config we will run; time to update the config for lease balancing - if job.UseSecondaryAccount { - switch job.Platform { - case "aws": - if err := convertAWSToAWS2(pj, sourceConfig); err != nil { - return "", fmt.Errorf("failed updating aws job to aws-2: %w", err) - } - case "gcp": - if err := convertGCPToGCP2(pj, sourceConfig); err != nil { - return "", fmt.Errorf("failed updating gcp job to gcp-openshift-gce-devel-ci-2: %w", err) - } - case "azure": - if err := convertAzureToAzure2(pj, sourceConfig); err != nil { - return "", fmt.Errorf("failed updating azure job to azure-2: %w", err) - } + // if an alternate cloud account was selected for lease balancing, apply it + if job.CloudAccountProfile != nil { + p := job.CloudAccountProfile + if err := applyClusterProfile(pj, sourceConfig, p.ProfileName, p.ProfileSecret, p.AccountDomain); err != nil { + return "", fmt.Errorf("failed applying cluster profile %q: %w", p.ProfileName, err) } } @@ -1713,7 +1703,7 @@ func (e *resolvedEnvironment) Lookup(name string) string { return "" } -func convertToAccount2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration, profileName, profileSecret, accountDomain string) error { +func applyClusterProfile(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration, profileName, profileSecret, accountDomain string) error { job.Labels["ci-operator.openshift.io/cloud-cluster-profile"] = profileName for index, volume := range job.Spec.PodSpec.Volumes { // TODO: only some ci-chat-bot jobs have this; check if they can all be removed @@ -1744,15 +1734,3 @@ func convertToAccount2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuil } return nil } - -func convertAWSToAWS2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error { - return convertToAccount2(job, sourceConfig, "aws-2", "cluster-secrets-aws-2", "aws-2.ci.openshift.org") -} - -func convertAzureToAzure2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error { - return convertToAccount2(job, sourceConfig, "azure-2", "cluster-secrets-azure-2", "ci2.azure.devcluster.openshift.com") -} - -func convertGCPToGCP2(job *prowapiv1.ProwJob, sourceConfig *citools.ReleaseBuildConfiguration) error { - return convertToAccount2(job, sourceConfig, "gcp-openshift-gce-devel-ci-2", "cluster-secrets-gcp-openshift-gce-devel-ci-2", "") -} diff --git a/pkg/manager/types.go b/pkg/manager/types.go index a8dcaed4a..2df0cb3cd 100644 --- a/pkg/manager/types.go +++ b/pkg/manager/types.go @@ -449,6 +449,15 @@ type JobInput struct { Refs []prowapiv1.Refs } +// CloudAccountProfile holds the parameters needed to redirect a ProwJob to an +// alternate cloud account (e.g. aws-2 instead of the default aws account). +type CloudAccountProfile struct { + QuotaSlice string // boskos resource type, e.g. "aws-2-quota-slice" + ProfileName string // cluster profile name, e.g. "aws-2" + ProfileSecret string // k8s secret name, e.g. "cluster-secrets-aws-2" + AccountDomain string // base domain override (optional), e.g. "aws-2.ci.openshift.org" +} + // Job responds to user requests and tracks the state of the launched // jobs. This object must be recreatable from a ProwJob, but the RequestedChannel // field may be empty to indicate the user has already been notified. @@ -486,7 +495,7 @@ type Job struct { WorkflowName string - UseSecondaryAccount bool + CloudAccountProfile *CloudAccountProfile Operator OperatorInfo CatalogComplete bool