From f5bb01354d12614e9003c31b17fece4924258f36 Mon Sep 17 00:00:00 2001 From: Jon Cope Date: Tue, 7 Apr 2026 11:04:59 -0500 Subject: [PATCH 1/2] fix(kube-apiserver): Add fail-fast RBAC bootstrap hook deadlock detection On MicroShift restart, the RBAC bootstrap hook can deadlock when etcd contains existing data. The hook uses context.TODO() for API calls, which has no timeout. When the loopback client hangs, this creates a circular dependency where the hook waits for the API server while the API server waits for the hook to complete. This change adds a parallel deadlock detector that: - Monitors /readyz/poststarthook/rbac/bootstrap-roles specifically - Checks if etcd is healthy while the hook is stuck - Detects deadlock in ~15 seconds instead of waiting 60 seconds - Restarts microshift-etcd.scope to recover from the deadlock This breaks the crash loop by detecting the condition early and taking recovery action at the MicroShift level, without requiring changes to vendored upstream Kubernetes code. Related upstream issues: kubernetes/kubernetes#86715, #97119 Co-Authored-By: Claude Opus 4.5 KAS rbac pre-hook creates an unbound context that can result in system deadlock, replace unbound context with caller-bound context to ensure the KAS can be restarted safely without restart microshift --- .../pkg/registry/rbac/rest/storage_rbac.go | 8 +- pkg/controllers/kube-apiserver.go | 185 +++++++++++++++++- .../pkg/registry/rbac/rest/storage_rbac.go | 8 +- 3 files changed, 190 insertions(+), 11 deletions(-) diff --git a/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac.go b/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac.go index 16502c476c..e776aa4cef 100644 --- a/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac.go +++ b/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac.go @@ -169,7 +169,7 @@ func (p *PolicyData) EnsureRBACPolicy() genericapiserver.PostStartHookFunc { utilruntime.HandleError(fmt.Errorf("unable to initialize client set: %v", err)) return false, nil } - return ensureRBACPolicy(p, client) + return ensureRBACPolicy(hookContext, p, client) }) // if we're never able to make it through initialization, kill the API server if err != nil { @@ -180,14 +180,14 @@ func (p *PolicyData) EnsureRBACPolicy() genericapiserver.PostStartHookFunc { } } -func ensureRBACPolicy(p *PolicyData, client clientset.Interface) (done bool, err error) { +func ensureRBACPolicy(ctx context.Context, p *PolicyData, client clientset.Interface) (done bool, err error) { failedReconciliation := false // Make sure etcd is responding before we start reconciling - if _, err := client.RbacV1().ClusterRoles().List(context.TODO(), metav1.ListOptions{}); err != nil { + if _, err := client.RbacV1().ClusterRoles().List(ctx, metav1.ListOptions{}); err != nil { utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err)) return false, nil } - if _, err := client.RbacV1().ClusterRoleBindings().List(context.TODO(), metav1.ListOptions{}); err != nil { + if _, err := client.RbacV1().ClusterRoleBindings().List(ctx, metav1.ListOptions{}); err != nil { utilruntime.HandleError(fmt.Errorf("unable to initialize clusterrolebindings: %v", err)) return false, nil } diff --git a/pkg/controllers/kube-apiserver.go b/pkg/controllers/kube-apiserver.go index 70cb89dcdc..d70179318d 100644 --- a/pkg/controllers/kube-apiserver.go +++ b/pkg/controllers/kube-apiserver.go @@ -23,6 +23,7 @@ import ( "io" "net" "os" + "os/exec" "path/filepath" "strconv" "strings" @@ -54,7 +55,18 @@ import ( ) const ( - kubeAPIStartupTimeout = 60 + kubeAPIStartupTimeout = 60 * time.Second + // rbacHookDeadlockTimeout is the time to wait for the RBAC bootstrap hook + // before declaring a deadlock. This is shorter than kubeAPIStartupTimeout + // to allow for faster recovery. + rbacHookDeadlockTimeout = 15 * time.Second + // rbacHookCheckInterval is how often to check the RBAC hook status + rbacHookPollDelayStart = 5 * time.Second + rbacHookCheckInterval = 2 * time.Second + // rbacHookMaxWaitDuration is the absolute maximum time to wait for the RBAC hook + // regardless of etcd health state changes. This prevents flapping from extending + // detection indefinitely. + rbacHookMaxWaitDuration = 30 * time.Second ) var ( @@ -348,9 +360,15 @@ func (s *KubeAPIServer) Run(ctx context.Context, ready chan<- struct{}, stopped return err } - // run readiness check + // Channel to signal RBAC hook deadlock detection + rbacDeadlockDetected := make(chan struct{}) + + // Run RBAC hook deadlock detector + go s.detectRBACHookDeadlock(ctx, restClient, rbacDeadlockDetected) + + // Run standard readiness check go func() { - err := wait.PollUntilContextTimeout(ctx, time.Second, kubeAPIStartupTimeout*time.Second, true, func(ctx context.Context) (bool, error) { + err := wait.PollUntilContextTimeout(ctx, time.Second, kubeAPIStartupTimeout, true, func(ctx context.Context) (bool, error) { var status int if err := restClient.Get().AbsPath("/readyz").Do(ctx).StatusCode(&status).Error(); err != nil { klog.Infof("%q not yet ready: %v", s.Name(), err) @@ -420,7 +438,168 @@ func (s *KubeAPIServer) Run(ctx context.Context, ready chan<- struct{}, stopped return err case perr := <-panicChannel: panic(perr) + case <-rbacDeadlockDetected: + klog.Error("RBAC bootstrap hook deadlock detected - restarting microshift-etcd.scope to recover") + if err := restartMicroshiftEtcdScope(ctx); err != nil { + klog.Errorf("Failed to restart microshift-etcd.scope: %v", err) + } + return fmt.Errorf("RBAC bootstrap hook deadlock detected after %v", rbacHookDeadlockTimeout) + } +} + +// detectRBACHookDeadlock monitors the RBAC bootstrap hook status and detects deadlock conditions. +// A deadlock is detected when: +// 1. The RBAC hook is not completing (stuck in "not finished" state) +// 2. etcd is healthy and responsive +// This indicates the circular dependency where the hook waits for API server +// while API server waits for the hook. +// +// Closed upstream Kubernetes issues: +// https://github.com/kubernetes/kubernetes/issues/86715 +// https://github.com/kubernetes/kubernetes/issues/97119 +func (s *KubeAPIServer) detectRBACHookDeadlock(ctx context.Context, restClient rest.Interface, deadlockDetected chan<- struct{}) { + // Wait a few seconds before starting detection to allow normal startup + select { + case <-ctx.Done(): + return + case <-time.After(rbacHookPollDelayStart): + } + + checkCount := 0 + maxChecks := int((rbacHookDeadlockTimeout - rbacHookPollDelayStart) / rbacHookCheckInterval) // Account for initial delay + // Track wall-clock deadline to prevent flapping from extending detection indefinitely + startTime := time.Now() + + for { + // Check absolute deadline first - this cannot be reset by etcd state changes + if time.Since(startTime) >= rbacHookMaxWaitDuration { + klog.Errorf("RBAC bootstrap hook exceeded maximum wait duration of %v", rbacHookMaxWaitDuration) + // Only trigger deadlock recovery if we've confirmed the predicate enough times + if checkCount >= maxChecks { + break // Fall through to close(deadlockDetected) + } + // Timeout but not confirmed deadlock - exit without triggering recovery + return + } + + // Check if we've confirmed deadlock enough times + if checkCount >= maxChecks { + break // Fall through to close(deadlockDetected) + } + + select { + case <-ctx.Done(): + return + case <-time.After(rbacHookCheckInterval * time.Second): + } + + // Check RBAC hook status + probeCtx, cancel := context.WithTimeout(ctx, time.Second) + var status int + result := restClient.Get(). + AbsPath("/readyz/poststarthook/rbac/bootstrap-roles"). + Do(probeCtx). + StatusCode(&status) + err := result.Error() + cancel() + + // If hook is ready, no deadlock + if err == nil && status == 200 { + klog.V(4).Info("RBAC bootstrap hook completed successfully") + return + } + + // If no HTTP status was received, skip this iteration (transport/timeout). + if err != nil && status == 0 { + klog.V(4).Infof("RBAC probe error (not counting toward deadlock): %v", err) + continue + } + + // Hook not ready (status != 200) - check if etcd is healthy + etcdHealthy, etcdErr := isEtcdHealthy(ctx) + if etcdErr != nil { + klog.V(4).Infof("Could not check etcd health (not counting toward deadlock): %v", etcdErr) + continue + } + + if etcdHealthy { + // Only increment when BOTH conditions are met: + // RBAC probe returned not-ready AND etcd is healthy + checkCount++ + klog.Warningf("RBAC bootstrap hook not ready (check %d/%d, elapsed %v), but etcd is healthy - potential deadlock", + checkCount, maxChecks, time.Since(startTime).Round(time.Second)) + } else { + // etcd not healthy - not a deadlock, just waiting for etcd + klog.V(4).Infof("RBAC hook waiting, etcd not yet healthy (check %d/%d)", checkCount, maxChecks) + // Reset counter since this isn't a deadlock condition + // Note: wall-clock deadline (startTime) is NOT reset - flapping cannot extend indefinitely + checkCount = 0 + } + } + + // Only reached when checkCount >= maxChecks (deadlock confirmed) + klog.Errorf("RBAC bootstrap hook deadlock confirmed after %v: etcd healthy but hook not completing", + time.Since(startTime).Round(time.Second)) + close(deadlockDetected) +} + +// isEtcdHealthy checks if etcd is responsive by attempting to connect and get status. +func isEtcdHealthy(ctx context.Context) (bool, error) { + certsDir := cryptomaterial.CertsDirectory(config.DataDir) + etcdAPIServerClientCertDir := cryptomaterial.EtcdAPIServerClientCertDir(certsDir) + + tlsInfo := transport.TLSInfo{ + CertFile: cryptomaterial.ClientCertPath(etcdAPIServerClientCertDir), + KeyFile: cryptomaterial.ClientKeyPath(etcdAPIServerClientCertDir), + TrustedCAFile: cryptomaterial.CACertPath(cryptomaterial.EtcdSignerDir(certsDir)), + } + tlsConfig, err := tlsInfo.ClientConfig() + if err != nil { + return false, fmt.Errorf("failed to create TLS config: %w", err) + } + + // Use a short timeout for health check + checkCtx, cancel := context.WithTimeout(ctx, 2*time.Second) + defer cancel() + + client, err := clientv3.New(clientv3.Config{ + Endpoints: []string{"https://localhost:2379"}, + DialTimeout: 1 * time.Second, + TLS: tlsConfig, + Context: checkCtx, + }) + if err != nil { + return false, fmt.Errorf("failed to create etcd client: %w", err) } + defer func() { _ = client.Close() }() + + _, err = client.Status(checkCtx, "localhost:2379") + if err != nil { + return false, nil // etcd not healthy, but not an error condition + } + + return true, nil +} + +// restartMicroshiftEtcdScope restarts the microshift-etcd.scope to recover from deadlock. +// This forces a clean restart of etcd which can help break the circular dependency. +func restartMicroshiftEtcdScope(ctx context.Context) error { + klog.Info("Stopping microshift-etcd.scope for recovery") + + // Set a timeout in case systemd or DBus stalls and the fail-fast recovery path hangs and Run never returns + cmdCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + stopCmd := exec.CommandContext(cmdCtx, "systemctl", "stop", "microshift-etcd.scope") + if out, err := stopCmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to stop microshift-etcd.scope: %w, output: %s", err, string(out)) + } + + // Wait briefly for cleanup + time.Sleep(1 * time.Second) + + klog.Info("microshift-etcd.scope stopped - MicroShift will restart") + return nil } func discoverEtcdServers(ctx context.Context, kubeconfigPath string) ([]string, error) { diff --git a/vendor/k8s.io/kubernetes/pkg/registry/rbac/rest/storage_rbac.go b/vendor/k8s.io/kubernetes/pkg/registry/rbac/rest/storage_rbac.go index 16502c476c..e776aa4cef 100644 --- a/vendor/k8s.io/kubernetes/pkg/registry/rbac/rest/storage_rbac.go +++ b/vendor/k8s.io/kubernetes/pkg/registry/rbac/rest/storage_rbac.go @@ -169,7 +169,7 @@ func (p *PolicyData) EnsureRBACPolicy() genericapiserver.PostStartHookFunc { utilruntime.HandleError(fmt.Errorf("unable to initialize client set: %v", err)) return false, nil } - return ensureRBACPolicy(p, client) + return ensureRBACPolicy(hookContext, p, client) }) // if we're never able to make it through initialization, kill the API server if err != nil { @@ -180,14 +180,14 @@ func (p *PolicyData) EnsureRBACPolicy() genericapiserver.PostStartHookFunc { } } -func ensureRBACPolicy(p *PolicyData, client clientset.Interface) (done bool, err error) { +func ensureRBACPolicy(ctx context.Context, p *PolicyData, client clientset.Interface) (done bool, err error) { failedReconciliation := false // Make sure etcd is responding before we start reconciling - if _, err := client.RbacV1().ClusterRoles().List(context.TODO(), metav1.ListOptions{}); err != nil { + if _, err := client.RbacV1().ClusterRoles().List(ctx, metav1.ListOptions{}); err != nil { utilruntime.HandleError(fmt.Errorf("unable to initialize clusterroles: %v", err)) return false, nil } - if _, err := client.RbacV1().ClusterRoleBindings().List(context.TODO(), metav1.ListOptions{}); err != nil { + if _, err := client.RbacV1().ClusterRoleBindings().List(ctx, metav1.ListOptions{}); err != nil { utilruntime.HandleError(fmt.Errorf("unable to initialize clusterrolebindings: %v", err)) return false, nil } From d255cf864deafa94d9c51093e39a46e6d48d5082 Mon Sep 17 00:00:00 2001 From: Jon Cope Date: Thu, 7 May 2026 12:22:03 -0500 Subject: [PATCH 2/2] Replace context.TODO() with cancelable context in RBAC bootstrap hook The RBAC bootstrap hook's helper functions primeAggregatedClusterRoles and primeSplitClusterRoleBindings use context.TODO(), which has no timeout or cancellation. On resource-constrained systems, these calls can block indefinitely through the loopback client, causing KAS to deadlock on restart with no recovery path. Pass the hook's cancelable context through to all API calls so they respect shutdown signals and cannot hang forever. Upstream: kubernetes/kubernetes#86715, kubernetes/kubernetes#97119 --- .../pkg/registry/rbac/rest/storage_rbac.go | 20 +- .../registry/rbac/rest/storage_rbac_test.go | 3 +- pkg/controllers/kube-apiserver.go | 185 +----------------- .../pkg/registry/rbac/rest/storage_rbac.go | 20 +- 4 files changed, 25 insertions(+), 203 deletions(-) diff --git a/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac.go b/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac.go index e776aa4cef..2990343600 100644 --- a/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac.go +++ b/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac.go @@ -194,12 +194,12 @@ func ensureRBACPolicy(ctx context.Context, p *PolicyData, client clientset.Inter // if the new cluster roles to aggregate do not yet exist, then we need to copy the old roles if they don't exist // in new locations - if err := primeAggregatedClusterRoles(p.ClusterRolesToAggregate, client.RbacV1()); err != nil { + if err := primeAggregatedClusterRoles(ctx, p.ClusterRolesToAggregate, client.RbacV1()); err != nil { utilruntime.HandleError(fmt.Errorf("unable to prime aggregated clusterroles: %v", err)) return false, nil } - if err := primeSplitClusterRoleBindings(p.ClusterRoleBindingsToSplit, client.RbacV1()); err != nil { + if err := primeSplitClusterRoleBindings(ctx, p.ClusterRoleBindingsToSplit, client.RbacV1()); err != nil { utilruntime.HandleError(fmt.Errorf("unable to prime split ClusterRoleBindings: %v", err)) return false, nil } @@ -345,9 +345,9 @@ func (p RESTStorageProvider) GroupName() string { // primeAggregatedClusterRoles copies roles that have transitioned to aggregated roles and may need to pick up changes // that were done to the legacy roles. -func primeAggregatedClusterRoles(clusterRolesToAggregate map[string]string, clusterRoleClient rbacv1client.ClusterRolesGetter) error { +func primeAggregatedClusterRoles(ctx context.Context, clusterRolesToAggregate map[string]string, clusterRoleClient rbacv1client.ClusterRolesGetter) error { for oldName, newName := range clusterRolesToAggregate { - _, err := clusterRoleClient.ClusterRoles().Get(context.TODO(), newName, metav1.GetOptions{}) + _, err := clusterRoleClient.ClusterRoles().Get(ctx, newName, metav1.GetOptions{}) if err == nil { continue } @@ -355,7 +355,7 @@ func primeAggregatedClusterRoles(clusterRolesToAggregate map[string]string, clus return err } - existingRole, err := clusterRoleClient.ClusterRoles().Get(context.TODO(), oldName, metav1.GetOptions{}) + existingRole, err := clusterRoleClient.ClusterRoles().Get(ctx, oldName, metav1.GetOptions{}) if apierrors.IsNotFound(err) { continue } @@ -369,7 +369,7 @@ func primeAggregatedClusterRoles(clusterRolesToAggregate map[string]string, clus klog.V(1).Infof("migrating %v to %v", existingRole.Name, newName) existingRole.Name = newName existingRole.ResourceVersion = "" // clear this so the object can be created. - if _, err := clusterRoleClient.ClusterRoles().Create(context.TODO(), existingRole, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { + if _, err := clusterRoleClient.ClusterRoles().Create(ctx, existingRole, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { return err } } @@ -380,10 +380,10 @@ func primeAggregatedClusterRoles(clusterRolesToAggregate map[string]string, clus // primeSplitClusterRoleBindings ensures the existence of target ClusterRoleBindings // by copying Subjects, Annotations, and Labels from the specified source // ClusterRoleBinding, if present. -func primeSplitClusterRoleBindings(clusterRoleBindingToSplit map[string]rbacapiv1.ClusterRoleBinding, clusterRoleBindingClient rbacv1client.ClusterRoleBindingsGetter) error { +func primeSplitClusterRoleBindings(ctx context.Context, clusterRoleBindingToSplit map[string]rbacapiv1.ClusterRoleBinding, clusterRoleBindingClient rbacv1client.ClusterRoleBindingsGetter) error { for existingBindingName, clusterRoleBindingToCreate := range clusterRoleBindingToSplit { // If source ClusterRoleBinding does not exist, do nothing. - existingRoleBinding, err := clusterRoleBindingClient.ClusterRoleBindings().Get(context.TODO(), existingBindingName, metav1.GetOptions{}) + existingRoleBinding, err := clusterRoleBindingClient.ClusterRoleBindings().Get(ctx, existingBindingName, metav1.GetOptions{}) if apierrors.IsNotFound(err) { continue } @@ -392,7 +392,7 @@ func primeSplitClusterRoleBindings(clusterRoleBindingToSplit map[string]rbacapiv } // If the target ClusterRoleBinding already exists, do nothing. - _, err = clusterRoleBindingClient.ClusterRoleBindings().Get(context.TODO(), clusterRoleBindingToCreate.Name, metav1.GetOptions{}) + _, err = clusterRoleBindingClient.ClusterRoleBindings().Get(ctx, clusterRoleBindingToCreate.Name, metav1.GetOptions{}) if err == nil { continue } @@ -407,7 +407,7 @@ func primeSplitClusterRoleBindings(clusterRoleBindingToSplit map[string]rbacapiv newCRB.Subjects = existingRoleBinding.Subjects newCRB.Labels = existingRoleBinding.Labels newCRB.Annotations = existingRoleBinding.Annotations - if _, err := clusterRoleBindingClient.ClusterRoleBindings().Create(context.TODO(), newCRB, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { + if _, err := clusterRoleBindingClient.ClusterRoleBindings().Create(ctx, newCRB, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { return err } } diff --git a/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac_test.go b/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac_test.go index 11f8ce6175..958ca19e9c 100644 --- a/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/registry/rbac/rest/storage_rbac_test.go @@ -17,6 +17,7 @@ limitations under the License. package rest import ( + "context" "testing" "k8s.io/client-go/kubernetes/fake" @@ -34,6 +35,6 @@ func BenchmarkEnsureRBACPolicy(b *testing.B) { ClusterRoleBindingsToSplit: bootstrappolicy.ClusterRoleBindingsToSplit(), } coreClientSet := fake.NewSimpleClientset() - _, _ = ensureRBACPolicy(policy, coreClientSet) + _, _ = ensureRBACPolicy(context.Background(), policy, coreClientSet) } } diff --git a/pkg/controllers/kube-apiserver.go b/pkg/controllers/kube-apiserver.go index d70179318d..70cb89dcdc 100644 --- a/pkg/controllers/kube-apiserver.go +++ b/pkg/controllers/kube-apiserver.go @@ -23,7 +23,6 @@ import ( "io" "net" "os" - "os/exec" "path/filepath" "strconv" "strings" @@ -55,18 +54,7 @@ import ( ) const ( - kubeAPIStartupTimeout = 60 * time.Second - // rbacHookDeadlockTimeout is the time to wait for the RBAC bootstrap hook - // before declaring a deadlock. This is shorter than kubeAPIStartupTimeout - // to allow for faster recovery. - rbacHookDeadlockTimeout = 15 * time.Second - // rbacHookCheckInterval is how often to check the RBAC hook status - rbacHookPollDelayStart = 5 * time.Second - rbacHookCheckInterval = 2 * time.Second - // rbacHookMaxWaitDuration is the absolute maximum time to wait for the RBAC hook - // regardless of etcd health state changes. This prevents flapping from extending - // detection indefinitely. - rbacHookMaxWaitDuration = 30 * time.Second + kubeAPIStartupTimeout = 60 ) var ( @@ -360,15 +348,9 @@ func (s *KubeAPIServer) Run(ctx context.Context, ready chan<- struct{}, stopped return err } - // Channel to signal RBAC hook deadlock detection - rbacDeadlockDetected := make(chan struct{}) - - // Run RBAC hook deadlock detector - go s.detectRBACHookDeadlock(ctx, restClient, rbacDeadlockDetected) - - // Run standard readiness check + // run readiness check go func() { - err := wait.PollUntilContextTimeout(ctx, time.Second, kubeAPIStartupTimeout, true, func(ctx context.Context) (bool, error) { + err := wait.PollUntilContextTimeout(ctx, time.Second, kubeAPIStartupTimeout*time.Second, true, func(ctx context.Context) (bool, error) { var status int if err := restClient.Get().AbsPath("/readyz").Do(ctx).StatusCode(&status).Error(); err != nil { klog.Infof("%q not yet ready: %v", s.Name(), err) @@ -438,168 +420,7 @@ func (s *KubeAPIServer) Run(ctx context.Context, ready chan<- struct{}, stopped return err case perr := <-panicChannel: panic(perr) - case <-rbacDeadlockDetected: - klog.Error("RBAC bootstrap hook deadlock detected - restarting microshift-etcd.scope to recover") - if err := restartMicroshiftEtcdScope(ctx); err != nil { - klog.Errorf("Failed to restart microshift-etcd.scope: %v", err) - } - return fmt.Errorf("RBAC bootstrap hook deadlock detected after %v", rbacHookDeadlockTimeout) - } -} - -// detectRBACHookDeadlock monitors the RBAC bootstrap hook status and detects deadlock conditions. -// A deadlock is detected when: -// 1. The RBAC hook is not completing (stuck in "not finished" state) -// 2. etcd is healthy and responsive -// This indicates the circular dependency where the hook waits for API server -// while API server waits for the hook. -// -// Closed upstream Kubernetes issues: -// https://github.com/kubernetes/kubernetes/issues/86715 -// https://github.com/kubernetes/kubernetes/issues/97119 -func (s *KubeAPIServer) detectRBACHookDeadlock(ctx context.Context, restClient rest.Interface, deadlockDetected chan<- struct{}) { - // Wait a few seconds before starting detection to allow normal startup - select { - case <-ctx.Done(): - return - case <-time.After(rbacHookPollDelayStart): - } - - checkCount := 0 - maxChecks := int((rbacHookDeadlockTimeout - rbacHookPollDelayStart) / rbacHookCheckInterval) // Account for initial delay - // Track wall-clock deadline to prevent flapping from extending detection indefinitely - startTime := time.Now() - - for { - // Check absolute deadline first - this cannot be reset by etcd state changes - if time.Since(startTime) >= rbacHookMaxWaitDuration { - klog.Errorf("RBAC bootstrap hook exceeded maximum wait duration of %v", rbacHookMaxWaitDuration) - // Only trigger deadlock recovery if we've confirmed the predicate enough times - if checkCount >= maxChecks { - break // Fall through to close(deadlockDetected) - } - // Timeout but not confirmed deadlock - exit without triggering recovery - return - } - - // Check if we've confirmed deadlock enough times - if checkCount >= maxChecks { - break // Fall through to close(deadlockDetected) - } - - select { - case <-ctx.Done(): - return - case <-time.After(rbacHookCheckInterval * time.Second): - } - - // Check RBAC hook status - probeCtx, cancel := context.WithTimeout(ctx, time.Second) - var status int - result := restClient.Get(). - AbsPath("/readyz/poststarthook/rbac/bootstrap-roles"). - Do(probeCtx). - StatusCode(&status) - err := result.Error() - cancel() - - // If hook is ready, no deadlock - if err == nil && status == 200 { - klog.V(4).Info("RBAC bootstrap hook completed successfully") - return - } - - // If no HTTP status was received, skip this iteration (transport/timeout). - if err != nil && status == 0 { - klog.V(4).Infof("RBAC probe error (not counting toward deadlock): %v", err) - continue - } - - // Hook not ready (status != 200) - check if etcd is healthy - etcdHealthy, etcdErr := isEtcdHealthy(ctx) - if etcdErr != nil { - klog.V(4).Infof("Could not check etcd health (not counting toward deadlock): %v", etcdErr) - continue - } - - if etcdHealthy { - // Only increment when BOTH conditions are met: - // RBAC probe returned not-ready AND etcd is healthy - checkCount++ - klog.Warningf("RBAC bootstrap hook not ready (check %d/%d, elapsed %v), but etcd is healthy - potential deadlock", - checkCount, maxChecks, time.Since(startTime).Round(time.Second)) - } else { - // etcd not healthy - not a deadlock, just waiting for etcd - klog.V(4).Infof("RBAC hook waiting, etcd not yet healthy (check %d/%d)", checkCount, maxChecks) - // Reset counter since this isn't a deadlock condition - // Note: wall-clock deadline (startTime) is NOT reset - flapping cannot extend indefinitely - checkCount = 0 - } - } - - // Only reached when checkCount >= maxChecks (deadlock confirmed) - klog.Errorf("RBAC bootstrap hook deadlock confirmed after %v: etcd healthy but hook not completing", - time.Since(startTime).Round(time.Second)) - close(deadlockDetected) -} - -// isEtcdHealthy checks if etcd is responsive by attempting to connect and get status. -func isEtcdHealthy(ctx context.Context) (bool, error) { - certsDir := cryptomaterial.CertsDirectory(config.DataDir) - etcdAPIServerClientCertDir := cryptomaterial.EtcdAPIServerClientCertDir(certsDir) - - tlsInfo := transport.TLSInfo{ - CertFile: cryptomaterial.ClientCertPath(etcdAPIServerClientCertDir), - KeyFile: cryptomaterial.ClientKeyPath(etcdAPIServerClientCertDir), - TrustedCAFile: cryptomaterial.CACertPath(cryptomaterial.EtcdSignerDir(certsDir)), - } - tlsConfig, err := tlsInfo.ClientConfig() - if err != nil { - return false, fmt.Errorf("failed to create TLS config: %w", err) - } - - // Use a short timeout for health check - checkCtx, cancel := context.WithTimeout(ctx, 2*time.Second) - defer cancel() - - client, err := clientv3.New(clientv3.Config{ - Endpoints: []string{"https://localhost:2379"}, - DialTimeout: 1 * time.Second, - TLS: tlsConfig, - Context: checkCtx, - }) - if err != nil { - return false, fmt.Errorf("failed to create etcd client: %w", err) } - defer func() { _ = client.Close() }() - - _, err = client.Status(checkCtx, "localhost:2379") - if err != nil { - return false, nil // etcd not healthy, but not an error condition - } - - return true, nil -} - -// restartMicroshiftEtcdScope restarts the microshift-etcd.scope to recover from deadlock. -// This forces a clean restart of etcd which can help break the circular dependency. -func restartMicroshiftEtcdScope(ctx context.Context) error { - klog.Info("Stopping microshift-etcd.scope for recovery") - - // Set a timeout in case systemd or DBus stalls and the fail-fast recovery path hangs and Run never returns - cmdCtx, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - - stopCmd := exec.CommandContext(cmdCtx, "systemctl", "stop", "microshift-etcd.scope") - if out, err := stopCmd.CombinedOutput(); err != nil { - return fmt.Errorf("failed to stop microshift-etcd.scope: %w, output: %s", err, string(out)) - } - - // Wait briefly for cleanup - time.Sleep(1 * time.Second) - - klog.Info("microshift-etcd.scope stopped - MicroShift will restart") - return nil } func discoverEtcdServers(ctx context.Context, kubeconfigPath string) ([]string, error) { diff --git a/vendor/k8s.io/kubernetes/pkg/registry/rbac/rest/storage_rbac.go b/vendor/k8s.io/kubernetes/pkg/registry/rbac/rest/storage_rbac.go index e776aa4cef..2990343600 100644 --- a/vendor/k8s.io/kubernetes/pkg/registry/rbac/rest/storage_rbac.go +++ b/vendor/k8s.io/kubernetes/pkg/registry/rbac/rest/storage_rbac.go @@ -194,12 +194,12 @@ func ensureRBACPolicy(ctx context.Context, p *PolicyData, client clientset.Inter // if the new cluster roles to aggregate do not yet exist, then we need to copy the old roles if they don't exist // in new locations - if err := primeAggregatedClusterRoles(p.ClusterRolesToAggregate, client.RbacV1()); err != nil { + if err := primeAggregatedClusterRoles(ctx, p.ClusterRolesToAggregate, client.RbacV1()); err != nil { utilruntime.HandleError(fmt.Errorf("unable to prime aggregated clusterroles: %v", err)) return false, nil } - if err := primeSplitClusterRoleBindings(p.ClusterRoleBindingsToSplit, client.RbacV1()); err != nil { + if err := primeSplitClusterRoleBindings(ctx, p.ClusterRoleBindingsToSplit, client.RbacV1()); err != nil { utilruntime.HandleError(fmt.Errorf("unable to prime split ClusterRoleBindings: %v", err)) return false, nil } @@ -345,9 +345,9 @@ func (p RESTStorageProvider) GroupName() string { // primeAggregatedClusterRoles copies roles that have transitioned to aggregated roles and may need to pick up changes // that were done to the legacy roles. -func primeAggregatedClusterRoles(clusterRolesToAggregate map[string]string, clusterRoleClient rbacv1client.ClusterRolesGetter) error { +func primeAggregatedClusterRoles(ctx context.Context, clusterRolesToAggregate map[string]string, clusterRoleClient rbacv1client.ClusterRolesGetter) error { for oldName, newName := range clusterRolesToAggregate { - _, err := clusterRoleClient.ClusterRoles().Get(context.TODO(), newName, metav1.GetOptions{}) + _, err := clusterRoleClient.ClusterRoles().Get(ctx, newName, metav1.GetOptions{}) if err == nil { continue } @@ -355,7 +355,7 @@ func primeAggregatedClusterRoles(clusterRolesToAggregate map[string]string, clus return err } - existingRole, err := clusterRoleClient.ClusterRoles().Get(context.TODO(), oldName, metav1.GetOptions{}) + existingRole, err := clusterRoleClient.ClusterRoles().Get(ctx, oldName, metav1.GetOptions{}) if apierrors.IsNotFound(err) { continue } @@ -369,7 +369,7 @@ func primeAggregatedClusterRoles(clusterRolesToAggregate map[string]string, clus klog.V(1).Infof("migrating %v to %v", existingRole.Name, newName) existingRole.Name = newName existingRole.ResourceVersion = "" // clear this so the object can be created. - if _, err := clusterRoleClient.ClusterRoles().Create(context.TODO(), existingRole, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { + if _, err := clusterRoleClient.ClusterRoles().Create(ctx, existingRole, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { return err } } @@ -380,10 +380,10 @@ func primeAggregatedClusterRoles(clusterRolesToAggregate map[string]string, clus // primeSplitClusterRoleBindings ensures the existence of target ClusterRoleBindings // by copying Subjects, Annotations, and Labels from the specified source // ClusterRoleBinding, if present. -func primeSplitClusterRoleBindings(clusterRoleBindingToSplit map[string]rbacapiv1.ClusterRoleBinding, clusterRoleBindingClient rbacv1client.ClusterRoleBindingsGetter) error { +func primeSplitClusterRoleBindings(ctx context.Context, clusterRoleBindingToSplit map[string]rbacapiv1.ClusterRoleBinding, clusterRoleBindingClient rbacv1client.ClusterRoleBindingsGetter) error { for existingBindingName, clusterRoleBindingToCreate := range clusterRoleBindingToSplit { // If source ClusterRoleBinding does not exist, do nothing. - existingRoleBinding, err := clusterRoleBindingClient.ClusterRoleBindings().Get(context.TODO(), existingBindingName, metav1.GetOptions{}) + existingRoleBinding, err := clusterRoleBindingClient.ClusterRoleBindings().Get(ctx, existingBindingName, metav1.GetOptions{}) if apierrors.IsNotFound(err) { continue } @@ -392,7 +392,7 @@ func primeSplitClusterRoleBindings(clusterRoleBindingToSplit map[string]rbacapiv } // If the target ClusterRoleBinding already exists, do nothing. - _, err = clusterRoleBindingClient.ClusterRoleBindings().Get(context.TODO(), clusterRoleBindingToCreate.Name, metav1.GetOptions{}) + _, err = clusterRoleBindingClient.ClusterRoleBindings().Get(ctx, clusterRoleBindingToCreate.Name, metav1.GetOptions{}) if err == nil { continue } @@ -407,7 +407,7 @@ func primeSplitClusterRoleBindings(clusterRoleBindingToSplit map[string]rbacapiv newCRB.Subjects = existingRoleBinding.Subjects newCRB.Labels = existingRoleBinding.Labels newCRB.Annotations = existingRoleBinding.Annotations - if _, err := clusterRoleBindingClient.ClusterRoleBindings().Create(context.TODO(), newCRB, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { + if _, err := clusterRoleBindingClient.ClusterRoleBindings().Create(ctx, newCRB, metav1.CreateOptions{}); err != nil && !apierrors.IsAlreadyExists(err) { return err } }