diff --git a/.gitignore b/.gitignore
index 619d645..0d93063 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@
*.tgz
/coverprofile
common.mk
+/.secrets/
diff --git a/Makefile b/Makefile
index a963d34..8c40a0a 100644
--- a/Makefile
+++ b/Makefile
@@ -87,9 +87,16 @@ test: $(GINKGO) $(KCP) generate ## Run all tests (excludes e2e).
TEST_KCP_ASSETS=$(LOCALBIN) $(GINKGO) -r -cover --fail-fast --require-suite -covermode count --output-dir=$(BUILD_PATH) -coverprofile=coverprofile --skip-package=test/e2e $(testargs)
.PHONY: test-e2e
-test-e2e: $(GINKGO) ## Run e2e tests (kind + kcp + helm).
+test-e2e: $(GINKGO) ## Run e2e tests (kind + kcp + helm). Set E2E_SHARD_CONFIG=single-shard|multi-shard (default: multi-shard).
$(GINKGO) -r --fail-fast -v --timeout 30m ./test/e2e/ $(testargs)
+.PHONY: test-e2e-matrix
+test-e2e-matrix: ## Run e2e tests against both shard configs (single-shard, multi-shard).
+ $(MAKE) clean-e2e
+ E2E_SHARD_CONFIG=single-shard $(MAKE) test-e2e
+ $(MAKE) clean-e2e
+ E2E_SHARD_CONFIG=multi-shard $(MAKE) test-e2e
+
.PHONY: e2e-cleanup
clean-e2e: ## Remove kind cluster from e2e tests.
-$(KIND) delete cluster --name dep-ctrl-e2e 2>/dev/null
diff --git a/README.md b/README.md
index 9628d2c..29c6b55 100644
--- a/README.md
+++ b/README.md
@@ -201,12 +201,12 @@ workspace paths to logical cluster names.
for VW URL discovery and full CRUD on `apiexports/content` to manage webhooks in
binding workspaces via the VW.
-**system:admin** (shard-local) -- the webhook SA gets shard-wide read access to
-`apiexports/content` and `apiexportendpointslices`. This is evaluated by the Bootstrap
-Policy Authorizer for every request on the shard, giving the webhook access to all
-provider APIExport virtual workspaces without per-workspace RBAC.
+No shard-wide RBAC is needed. The webhook watches dependent resources through the
+dep-ctrl APIExport's virtual workspace, authorized by dynamically managed
+permissionClaims. Providers accept these claims in their APIBinding.
-See [docs/getting-started.md](docs/getting-started.md) for the full bootstrap procedure.
+See [docs/getting-started.md](docs/getting-started.md) for the full deployment guide
+using [kcp-operator](https://github.com/kcp-dev/helm-charts).
## Development
@@ -228,6 +228,7 @@ make build
make test
# E2E tests (requires kind, helm, docker)
+# Deploys a multi-shard kcp via kcp-operator (root + shard1)
make test-e2e
```
diff --git a/api/v1alpha1/types.go b/api/v1alpha1/types.go
index f664b5b..b1ec7d9 100644
--- a/api/v1alpha1/types.go
+++ b/api/v1alpha1/types.go
@@ -57,6 +57,13 @@ type DependentRef struct {
// +kubebuilder:validation:MaxLength=63
// +kubebuilder:validation:Pattern=`^[A-Z][A-Za-z0-9]*$`
Kind string `json:"kind"`
+
+ // Resource is the plural resource name of the dependent (e.g., "virtualmachines").
+ // Used by the webhook to construct the GVR for listing dependent resources.
+ // +kubebuilder:validation:MinLength=1
+ // +kubebuilder:validation:MaxLength=63
+ // +kubebuilder:validation:Pattern=`^[a-z][a-z0-9]*$`
+ Resource string `json:"resource"`
}
// APIExportReference identifies an APIExport by workspace path and name.
diff --git a/api/v1alpha1/validation_test.go b/api/v1alpha1/validation_test.go
index 3fab949..bfd2a9b 100644
--- a/api/v1alpha1/validation_test.go
+++ b/api/v1alpha1/validation_test.go
@@ -68,6 +68,14 @@ func TestCRDFieldValidation(t *testing.T) {
valid: []string{"A", "VirtualMachine", "VPC", "Foo123"},
invalid: []string{"", "virtualMachine", "1Foo", "Foo-Bar", "Foo.Bar", "Foo_Bar"},
},
+ {
+ path: "spec.dependent.resource",
+ pattern: `^[a-z][a-z0-9]*$`,
+ minLength: 1,
+ maxLength: 63,
+ valid: []string{"virtualmachines", "vpcs", "subnets"},
+ invalid: []string{"", "VirtualMachines", "virtual-machines", "virtual_machines"},
+ },
{
path: "spec.dependencies.items.apiExportRef.path",
pattern: `^[a-z][a-z0-9-]*(:[a-z][a-z0-9-]*)*$`,
diff --git a/charts/dependency-controller/files/apiexport-dependencies.opendefense.cloud.yaml b/charts/dependency-controller/files/apiexport-dependencies.opendefense.cloud.yaml
index f46e8aa..4c244bf 100644
--- a/charts/dependency-controller/files/apiexport-dependencies.opendefense.cloud.yaml
+++ b/charts/dependency-controller/files/apiexport-dependencies.opendefense.cloud.yaml
@@ -16,7 +16,7 @@ spec:
resources:
- group: dependencies.opendefense.cloud
name: dependencyrules
- schema: v260428-3564c91.dependencyrules.dependencies.opendefense.cloud
+ schema: v260504-f130a11.dependencyrules.dependencies.opendefense.cloud
storage:
crd: {}
status: {}
diff --git a/charts/dependency-controller/files/apiresourceschema-dependencyrules.dependencies.opendefense.cloud.yaml b/charts/dependency-controller/files/apiresourceschema-dependencyrules.dependencies.opendefense.cloud.yaml
index 4fbac37..ec8e4e9 100644
--- a/charts/dependency-controller/files/apiresourceschema-dependencyrules.dependencies.opendefense.cloud.yaml
+++ b/charts/dependency-controller/files/apiresourceschema-dependencyrules.dependencies.opendefense.cloud.yaml
@@ -1,7 +1,7 @@
apiVersion: apis.kcp.io/v1alpha1
kind: APIResourceSchema
metadata:
- name: v260428-3564c91.dependencyrules.dependencies.opendefense.cloud
+ name: v260504-f130a11.dependencyrules.dependencies.opendefense.cloud
spec:
group: dependencies.opendefense.cloud
names:
@@ -134,6 +134,14 @@ spec:
minLength: 1
pattern: ^[A-Z][A-Za-z0-9]*$
type: string
+ resource:
+ description: |-
+ Resource is the plural resource name of the dependent (e.g., "virtualmachines").
+ Used by the webhook to construct the GVR for listing dependent resources.
+ maxLength: 63
+ minLength: 1
+ pattern: ^[a-z][a-z0-9]*$
+ type: string
version:
description: Version is the API version of the dependent resource.
maxLength: 63
@@ -144,6 +152,7 @@ spec:
- apiExportName
- group
- kind
+ - resource
- version
type: object
required:
diff --git a/charts/dependency-controller/files/dependencies.opendefense.cloud_dependencyrules.yaml b/charts/dependency-controller/files/dependencies.opendefense.cloud_dependencyrules.yaml
index 85546cc..549fa36 100644
--- a/charts/dependency-controller/files/dependencies.opendefense.cloud_dependencyrules.yaml
+++ b/charts/dependency-controller/files/dependencies.opendefense.cloud_dependencyrules.yaml
@@ -140,6 +140,14 @@ spec:
minLength: 1
pattern: ^[A-Z][A-Za-z0-9]*$
type: string
+ resource:
+ description: |-
+ Resource is the plural resource name of the dependent (e.g., "virtualmachines").
+ Used by the webhook to construct the GVR for listing dependent resources.
+ maxLength: 63
+ minLength: 1
+ pattern: ^[a-z][a-z0-9]*$
+ type: string
version:
description: Version is the API version of the dependent resource.
maxLength: 63
@@ -150,6 +158,7 @@ spec:
- apiExportName
- group
- kind
+ - resource
- version
type: object
required:
diff --git a/charts/dependency-controller/templates/webhook-deployment.yaml b/charts/dependency-controller/templates/webhook-deployment.yaml
index 43bac1c..bc0aa90 100644
--- a/charts/dependency-controller/templates/webhook-deployment.yaml
+++ b/charts/dependency-controller/templates/webhook-deployment.yaml
@@ -27,9 +27,6 @@ spec:
- /dependency-webhook
args:
- --api-export-name={{ .Values.apiExportName }}
- {{- if .Values.kcpBaseHost }}
- - --kcp-base-host={{ .Values.kcpBaseHost }}
- {{- end }}
- --webhook-port={{ .Values.webhook.port }}
- --tls-cert-dir={{ .Values.webhook.tlsCertDir }}
- --health-probe-bind-address={{ .Values.webhook.healthProbeBindAddress }}
diff --git a/charts/dependency-controller/values.yaml b/charts/dependency-controller/values.yaml
index 7b94918..f8af3fe 100644
--- a/charts/dependency-controller/values.yaml
+++ b/charts/dependency-controller/values.yaml
@@ -7,8 +7,9 @@ imagePullSecrets: []
apiExportName: dependencies.opendefense.cloud
-# Base kcp host URL (without workspace path). Used to resolve workspace paths
-# to logical cluster names. If empty, derived from kubeconfig.
+# Base kcp host URL (without workspace path). Used by the controller to resolve
+# workspace paths to logical cluster names.
+# If empty, derived from kubeconfig.
kcpBaseHost: ""
# Controller configuration.
diff --git a/cmd/controller/main.go b/cmd/controller/main.go
index 44c2362..d52ece3 100644
--- a/cmd/controller/main.go
+++ b/cmd/controller/main.go
@@ -4,6 +4,7 @@
package main
import (
+ "context"
"flag"
"os"
@@ -14,8 +15,8 @@ import (
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
- "k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/manager"
@@ -25,6 +26,7 @@ import (
v1alpha1 "go.opendefense.cloud/dependency-controller/api/v1alpha1"
"go.opendefense.cloud/dependency-controller/internal/controller"
+ "go.opendefense.cloud/dependency-controller/internal/kcp"
)
var scheme = runtime.NewScheme()
@@ -58,14 +60,37 @@ func main() {
cfg := ctrl.GetConfigOrDie()
+ if err := kcp.ValidateKubeconfig(cfg); err != nil {
+ setupLog.Error(err, "invalid kubeconfig")
+ os.Exit(1)
+ }
+
// Derive base config (root kcp URL without workspace path).
- baseCfg := rest.CopyConfig(cfg)
+ baseCfg, err := kcp.BaseConfig(cfg)
+ if err != nil {
+ setupLog.Error(err, "unable to derive front-proxy base URL from kubeconfig")
+ os.Exit(1)
+ }
if kcpBaseHost != "" {
baseCfg.Host = kcpBaseHost
}
+ // Resolve the APIExportEndpointSlice name for the dep-ctrl's APIExport.
+ // The slice name is not necessarily the same as the APIExport name.
+ directClient, err := client.New(cfg, client.Options{Scheme: scheme})
+ if err != nil {
+ setupLog.Error(err, "unable to create client for endpoint slice discovery")
+ os.Exit(1)
+ }
+
+ ess, err := kcp.FindEndpointSlice(context.Background(), directClient, apiExportName)
+ if err != nil {
+ setupLog.Error(err, "unable to find APIExportEndpointSlice", "apiExport", apiExportName)
+ os.Exit(1)
+ }
+
// Create apiexport provider for the dependency-controller's own APIExport.
- depCtrlProvider, err := apiexport.New(cfg, apiExportName, apiexport.Options{
+ depCtrlProvider, err := apiexport.New(cfg, ess.Name, apiexport.Options{
Scheme: scheme,
})
if err != nil {
@@ -97,7 +122,6 @@ func main() {
// Register the multicluster DependencyRule reconciler.
reconciler := controller.NewDependencyRuleReconciler(mgr)
- reconciler.APIExportName = apiExportName
reconciler.BaseConfig = baseCfg
// Wire up webhook installer if configured.
@@ -110,7 +134,7 @@ func main() {
os.Exit(1)
}
}
- reconciler.WebhookInstaller = controller.NewWebhookInstaller(nil, webhookURL, caBundle)
+ reconciler.WebhookInstaller = controller.NewWebhookInstaller(mgr, webhookURL, caBundle)
}
if err := mcbuilder.ControllerManagedBy(mgr).
diff --git a/cmd/webhook/main.go b/cmd/webhook/main.go
index c6cb707..8c66ecb 100644
--- a/cmd/webhook/main.go
+++ b/cmd/webhook/main.go
@@ -15,8 +15,8 @@ import (
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
- "k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sigs.k8s.io/controller-runtime/pkg/manager"
@@ -26,6 +26,7 @@ import (
mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile"
v1alpha1 "go.opendefense.cloud/dependency-controller/api/v1alpha1"
+ "go.opendefense.cloud/dependency-controller/internal/kcp"
"go.opendefense.cloud/dependency-controller/internal/webhook"
)
@@ -41,12 +42,10 @@ func init() {
func main() {
var apiExportName string
- var kcpBaseHost string
var webhookPort int
var tlsCertDir string
var healthProbeBindAddress string
flag.StringVar(&apiExportName, "api-export-name", "dependencies.opendefense.cloud", "Name of the dependency-controller's APIExport")
- flag.StringVar(&kcpBaseHost, "kcp-base-host", "", "Base kcp host URL (without workspace path). If empty, derived from kubeconfig.")
flag.IntVar(&webhookPort, "webhook-port", 9443, "Port for the webhook server")
flag.StringVar(&tlsCertDir, "tls-cert-dir", "/etc/webhook-tls", "Directory containing tls.crt and tls.key for the webhook server")
flag.StringVar(&healthProbeBindAddress, "health-probe-bind-address", ":8081", "Address to bind the health probe endpoint")
@@ -60,14 +59,35 @@ func main() {
cfg := ctrl.GetConfigOrDie()
- // Derive base config (root kcp URL without workspace path).
- baseCfg := rest.CopyConfig(cfg)
- if kcpBaseHost != "" {
- baseCfg.Host = kcpBaseHost
+ if err := kcp.ValidateKubeconfig(cfg); err != nil {
+ setupLog.Error(err, "invalid kubeconfig")
+ os.Exit(1)
+ }
+
+ // Derive front-proxy base config by stripping the /clusters/... workspace
+ // path from the kubeconfig host. This base URL is used by the webhook to
+ // construct per-request clients targeting specific consumer workspaces.
+ baseCfg, err := kcp.BaseConfig(cfg)
+ if err != nil {
+ setupLog.Error(err, "unable to derive front-proxy base URL from kubeconfig")
+ os.Exit(1)
+ }
+
+ // Resolve the APIExportEndpointSlice name for the dep-ctrl's APIExport.
+ directClient, err := client.New(cfg, client.Options{Scheme: scheme})
+ if err != nil {
+ setupLog.Error(err, "unable to create client for endpoint slice discovery")
+ os.Exit(1)
+ }
+
+ ess, err := kcp.FindEndpointSlice(context.Background(), directClient, apiExportName)
+ if err != nil {
+ setupLog.Error(err, "unable to find APIExportEndpointSlice", "apiExport", apiExportName)
+ os.Exit(1)
}
// Create apiexport provider for the dependency-controller's own APIExport.
- depCtrlProvider, err := apiexport.New(cfg, apiExportName, apiexport.Options{
+ depCtrlProvider, err := apiexport.New(cfg, ess.Name, apiexport.Options{
Scheme: scheme,
})
if err != nil {
@@ -95,7 +115,6 @@ func main() {
cacheMgr := &webhook.RuleCacheManager{
DepCtrlManager: mgr,
- BaseConfig: baseCfg,
Scheme: scheme,
APIExportName: apiExportName,
Registry: registry,
@@ -139,6 +158,7 @@ func main() {
validator := &webhook.DeletionValidator{
Registry: registry,
Initialized: initialized,
+ BaseConfig: baseCfg,
}
mgr.GetWebhookServer().Register("/validate", &ctrlwebhook.Admission{Handler: validator})
diff --git a/config/crds/dependencies.opendefense.cloud_dependencyrules.yaml b/config/crds/dependencies.opendefense.cloud_dependencyrules.yaml
index 85546cc..549fa36 100644
--- a/config/crds/dependencies.opendefense.cloud_dependencyrules.yaml
+++ b/config/crds/dependencies.opendefense.cloud_dependencyrules.yaml
@@ -140,6 +140,14 @@ spec:
minLength: 1
pattern: ^[A-Z][A-Za-z0-9]*$
type: string
+ resource:
+ description: |-
+ Resource is the plural resource name of the dependent (e.g., "virtualmachines").
+ Used by the webhook to construct the GVR for listing dependent resources.
+ maxLength: 63
+ minLength: 1
+ pattern: ^[a-z][a-z0-9]*$
+ type: string
version:
description: Version is the API version of the dependent resource.
maxLength: 63
@@ -150,6 +158,7 @@ spec:
- apiExportName
- group
- kind
+ - resource
- version
type: object
required:
diff --git a/config/kcp/apiexport-dependencies.opendefense.cloud.yaml b/config/kcp/apiexport-dependencies.opendefense.cloud.yaml
index f46e8aa..4c244bf 100644
--- a/config/kcp/apiexport-dependencies.opendefense.cloud.yaml
+++ b/config/kcp/apiexport-dependencies.opendefense.cloud.yaml
@@ -16,7 +16,7 @@ spec:
resources:
- group: dependencies.opendefense.cloud
name: dependencyrules
- schema: v260428-3564c91.dependencyrules.dependencies.opendefense.cloud
+ schema: v260504-f130a11.dependencyrules.dependencies.opendefense.cloud
storage:
crd: {}
status: {}
diff --git a/config/kcp/apiresourceschema-dependencyrules.dependencies.opendefense.cloud.yaml b/config/kcp/apiresourceschema-dependencyrules.dependencies.opendefense.cloud.yaml
index 4fbac37..ec8e4e9 100644
--- a/config/kcp/apiresourceschema-dependencyrules.dependencies.opendefense.cloud.yaml
+++ b/config/kcp/apiresourceschema-dependencyrules.dependencies.opendefense.cloud.yaml
@@ -1,7 +1,7 @@
apiVersion: apis.kcp.io/v1alpha1
kind: APIResourceSchema
metadata:
- name: v260428-3564c91.dependencyrules.dependencies.opendefense.cloud
+ name: v260504-f130a11.dependencyrules.dependencies.opendefense.cloud
spec:
group: dependencies.opendefense.cloud
names:
@@ -134,6 +134,14 @@ spec:
minLength: 1
pattern: ^[A-Z][A-Za-z0-9]*$
type: string
+ resource:
+ description: |-
+ Resource is the plural resource name of the dependent (e.g., "virtualmachines").
+ Used by the webhook to construct the GVR for listing dependent resources.
+ maxLength: 63
+ minLength: 1
+ pattern: ^[a-z][a-z0-9]*$
+ type: string
version:
description: Version is the API version of the dependent resource.
maxLength: 63
@@ -144,6 +152,7 @@ spec:
- apiExportName
- group
- kind
+ - resource
- version
type: object
required:
diff --git a/docs/architecture.md b/docs/architecture.md
index 180af68..fac8c48 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -17,31 +17,36 @@ The system solves this with two cooperating binaries:
- **Controller** -- watches `DependencyRule` objects and installs admission webhooks
in provider workspaces
-- **Webhook** -- maintains indexed caches of dependent resources and serves
- admission requests that block deletion of still-referenced resources
+- **Webhook** -- maintains a metadata registry of dependency rules and serves
+ admission requests that block deletion of still-referenced resources by querying
+ consumer workspaces directly
## Workspace Topology
```mermaid
graph LR
subgraph DC["Dep-Ctrl Workspace"]
- DCExport["APIExport:
DependencyRule
+ permissionClaims"]
+ DCExport["APIExport:
DependencyRule
+ VWC permissionClaim"]
end
subgraph CP["Compute Provider WS"]
- CPBinding["APIBinding: dep-ctrl
(claims accepted)"]
+ CPBinding["APIBinding: dep-ctrl
(VWC claim accepted)"]
CPExport["APIExport: compute"]
CPRule["DependencyRule:
VM → VPC"]
end
subgraph NP["Network Provider WS"]
- NPBinding["APIBinding: dep-ctrl
(claims accepted)"]
+ NPBinding["APIBinding: dep-ctrl
(VWC claim accepted)"]
NPExport["APIExport: VPCs"]
NPWebhook["ValidatingWebhook"]
end
subgraph ROOT["Root Workspace"]
- ROOTROLE["ClusterRoles:
workspaces/content +
workspace resolution"]
+ ROOTROLE["ClusterRoles:
controller: workspaces/content
+ workspaces read"]
+ end
+
+ subgraph SYSADMIN["system:admin (per shard)"]
+ SACR["ClusterRoles:
webhook: wildcard read
(applied directly to each shard)"]
end
subgraph CW["Consumer WS"]
@@ -58,6 +63,7 @@ graph LR
style CP fill:#e1f0da,color:#1a3e12
style NP fill:#e1f0da,color:#1a3e12
style ROOT fill:#f3e8ff,color:#4a1d7a
+ style SYSADMIN fill:#f3e8ff,color:#4a1d7a
style CW fill:#fef3c7,color:#664d03
```
@@ -70,17 +76,30 @@ the permissionClaim).
**Provider workspaces** -- each provider (compute, network, ...) exports its own
resource types and binds to the dep-ctrl APIExport to create `DependencyRule`
-objects. The APIBinding must **accept** the dep-ctrl's permissionClaim, which
+objects. The APIBinding must **accept** the dep-ctrl's VWC permissionClaim, which
grants the controller access to manage `ValidatingWebhookConfigurations` in
those workspaces through the virtual workspace.
**Consumer workspaces** -- bind to provider exports and create the actual
-resources (VPCs, VMs). Consumers don't interact with the dependency system directly.
-
-**Root workspace** -- hosts static `ClusterRoles` granting both components
-`workspaces/content` access (needed to enter child workspaces). The controller
-additionally gets `workspaces` read access to resolve workspace paths to logical
-cluster names. This is a deployment prerequisite.
+resources (VPCs, VMs). Consumers don't interact with the dependency system
+directly. The webhook queries dependent resources in consumer workspaces via
+the front-proxy using broad read RBAC.
+
+**Root workspace** -- hosts static `ClusterRoles` for the **controller**:
+`workspaces/content` access (to enter child workspaces) and `workspaces` read
+(to resolve workspace paths to logical cluster names). This is a deployment
+prerequisite.
+
+**`system:admin` workspace, per shard** -- hosts a `ClusterRole` +
+`ClusterRoleBinding` granting the **webhook** wildcard read access (`get`,
+`list` on all resources). The webhook queries dependent resources directly
+in consumer workspaces, and consumer workspaces can live on any shard. kcp's
+`BootstrapPolicyAuthorizer` reads RBAC from the local shard's `system:admin`
+only — bindings do not propagate across shards — so this binding must be
+applied to **every shard** that hosts consumer workspaces. `system:admin` is
+not reachable through the front-proxy; the binding is applied via direct
+(port-forwarded) shard access using a `system:masters` identity issued from
+a kcp-operator `Kubeconfig` CR with `rootShardRef` / `shardRef`.
## Component Overview
@@ -92,11 +111,11 @@ flowchart TD
end
subgraph Webhook["Webhook Server Binary (cmd/webhook)"]
- RCM["Rule Cache Manager"]
- RCM -->|"creates per rule"| IC["Indexed Cache
(mcmanager per provider APIExport)"]
- IC -->|"stored in"| RR["Rule Registry"]
+ RCM["Rule Registry Manager"]
+ RCM -->|"populates"| RR["Rule Registry
(metadata only)"]
DV["Deletion Validator"]
- DV -->|"queries"| RR
+ DV -->|"queries rules"| RR
+ DV -->|"queries dependents via
front-proxy per request"| CW["Consumer Workspaces"]
end
WI -->|"installs via dep-ctrl VW"| PW["Provider Workspaces"]
@@ -113,10 +132,8 @@ flowchart TD
**Entry point:** [`cmd/controller/main.go`](../cmd/controller/main.go)
-The controller watches `DependencyRule` objects and ensures admission webhooks
-exist in the right provider workspaces. The webhook's read access to dependent
-resources is granted shard-wide via `system:admin` bootstrap RBAC, so the
-controller only handles webhook installation.
+The controller watches `DependencyRule` objects and installs
+`ValidatingWebhookConfiguration` objects in the right provider workspaces.
All operations in provider workspaces are routed through the dep-ctrl APIExport's
virtual workspace, authorized by `permissionClaims`. The controller never connects
@@ -135,16 +152,14 @@ On first reconcile, the controller lazily initializes two components
The VW only accepts logical cluster names in its `/clusters/` path, not
workspace paths. The resolver caches mappings and is consulted before every
-webhook or RBAC operation.
+webhook operation.
### How a DependencyRule becomes a webhook
When a provider creates a `DependencyRule` ([`api/v1alpha1/types.go`](../api/v1alpha1/types.go)),
the controller's reconciler
([`internal/controller/dependencyrule_controller.go:Reconcile`](../internal/controller/dependencyrule_controller.go))
-picks it up via the dep-ctrl APIExport's virtual workspace and performs two actions:
-
-#### 1. Webhook Installation
+picks it up via the dep-ctrl APIExport's virtual workspace and installs webhooks.
The [`WebhookInstaller`](../internal/controller/webhook_installer.go) creates or
updates a `ValidatingWebhookConfiguration` named `dependency-controller` in each
@@ -181,17 +196,18 @@ workspace, the webhook is deleted entirely.
**Entry point:** [`cmd/webhook/main.go`](../cmd/webhook/main.go)
The webhook server watches the same `DependencyRule` objects as the controller,
-but its job is different: it builds indexed caches of dependent resources and uses
-them to answer admission requests.
+but its job is different: it maintains a metadata registry of active rules and
+uses per-request direct queries to check for active dependents when a deletion
+is attempted.
-### Startup and Prewarming
+### Startup and Registry Population
On startup, the webhook creates an `mcmanager` backed by the dep-ctrl APIExport
provider, then registers the
[`RuleCacheManager`](../internal/webhook/rule_cache_manager.go) as a controller
watching `DependencyRule` objects.
-Before the webhook can serve requests safely, it must populate its caches from
+Before the webhook can serve requests safely, it must populate its registry with
all existing rules. This happens in a
[`manager.RunnableFunc`](../cmd/webhook/main.go) that runs after the manager
starts:
@@ -199,9 +215,8 @@ starts:
1. [`PopulateRegistry`](../internal/webhook/rule_cache_manager.go) resolves
the dep-ctrl APIExport's virtual workspace URL from its
`APIExportEndpointSlice`
- ([`virtualWorkspaceClient`](../internal/webhook/rule_cache_manager.go))
2. Lists all existing `DependencyRule` objects across all bound workspaces
-3. Calls [`ensureCache`](../internal/webhook/rule_cache_manager.go) for each rule
+3. Registers each rule's metadata (GVK, GVR, field paths) in the registry
4. Closes the `initialized` channel
Until `initialized` is closed:
@@ -209,45 +224,22 @@ Until `initialized` is closed:
returns unhealthy
- The `DeletionValidator` denies all DELETE requests with "not yet initialized"
-### Per-Rule Indexed Caches
-
-Each `DependencyRule` gets a dedicated `mcmanager.Manager` connected to the
-dependent resource's provider APIExport virtual workspace. This 1:1 mapping
-([`ensureCache`](../internal/webhook/rule_cache_manager.go)) exists because:
-
-- **Different rules reference different APIExports** -- each provider's virtual
- workspace is a distinct endpoint, so each needs its own manager
-- **Clean lifecycle** -- cancelling one rule's context tears down only its
- informers, without affecting other rules
-
-For each dependency target in the rule,
-[`ensureCache`](../internal/webhook/rule_cache_manager.go) registers a field
-index on the dependent resource's informer:
-
-```go
-mgr.GetFieldIndexer().IndexField(ctx, watchObj, fieldPath, func(obj client.Object) []string {
- val := fieldpath.Resolve(u.Object, fieldPath) // e.g., ".spec.vpcRef.name" -> "my-vpc"
- return []string{val}
-})
-```
+### Per-Request Direct Queries
-This enables O(1) lookups: "find all VMs whose `.spec.vpcRef.name` equals `my-vpc`".
+Unlike a cache-based approach, the webhook does not maintain persistent informers
+for dependent resources. Instead, on each DELETE admission request, it constructs
+a temporary dynamic client scoped to the consumer workspace via the kcp front-proxy
+and lists dependent resources directly.
-A minimal controller is registered on the same GVK to activate the informer
-(controller-runtime won't start an informer without a controller watching it)
-and to track which logical clusters have been discovered and mark the cache
-as ready.
+The webhook derives the front-proxy base URL from its kubeconfig at startup by
+stripping the `/clusters/...` workspace path suffix. For each admission request,
+it builds a workspace-scoped URL: `{frontProxyBase}/clusters/{logicalClusterName}`.
-The resulting state is stored in the
-[`RuleRegistry`](../internal/webhook/rule_registry.go) as a `RuleState` keyed by
-`clusterName/ruleName`. The registry also maintains a reverse index
-(`byTarget map[GVR][]string`) so the webhook can quickly find which rules
-protect a given resource type.
-
-On `DependencyRule` deletion, `Reconcile` calls
-[`Registry.Unregister(key)`](../internal/webhook/rule_registry.go) which removes
-the state from the map and calls `state.Cancel()` to stop the background manager
-goroutine and tear down all informers.
+This approach is routing-transparent across shards -- no webhook configuration
+change is required when shards are added, as the front-proxy handles routing
+based on the logical cluster name. Each new shard does, however, need the
+`system:admin` RBAC binding (see [Workspace Topology](#workspace-topology))
+applied so the webhook is authorized to list dependents on it.
### Admission Request Flow
@@ -272,24 +264,25 @@ DELETE vpcs/my-vpc (from consumer workspace)
6. Registry.FindByTargetGVR(vpcs GVR)
| returns []RuleEntry with matched IndexedFields
|
-7. For each matching rule:
- | a. Cache not ready? --> Deny ("cache warming up, retry later")
- | b. Get cluster from rule's manager
- | c. Query: cache.List(ctx, &list, MatchingFields{".spec.vpcRef.name": "my-vpc"})
- | d. Each result is a blocker: "VirtualMachine/my-vm"
+7. Create dynamic client for {frontProxy}/clusters/{clusterName}
+ |
+8. For each matching rule:
+ | a. List dependent resources in namespace
+ | b. Filter by field path (fieldpath.Resolve == deleted resource name)
+ | c. Each match is a blocker: "VirtualMachine/my-vm"
|
-8. Blockers found? --> Deny ("still referenced by VirtualMachine/my-vm")
+9. Blockers found? --> Deny ("still referenced by VirtualMachine/my-vm")
|
-9. No blockers --> Allow
+10. No blockers --> Allow
```
The validator is rule-agnostic -- it doesn't need to know the structure of each
-`DependencyRule`, only how to query the indexed caches via the field paths stored
-in `RuleEntry.MatchedField`.
+`DependencyRule`, only how to query the dependent resources via the GVR and field
+paths stored in `RuleEntry`.
### Force-Deleting Protected Resources
-If the dependency lifecycle has broken down (stale caches, crashed webhook),
+If the dependency lifecycle has broken down (stale rules, crashed webhook),
operators can bypass protection:
```sh
@@ -318,6 +311,7 @@ spec:
group: compute.test.io
version: v1
kind: VirtualMachine
+ resource: virtualmachines
dependencies:
- apiExportRef:
path: "root:network-provider"
@@ -329,14 +323,15 @@ spec:
path: ".spec.vpcRef.name"
```
-`spec.dependent` -- the resource type that holds references (the one being cached).
+`spec.dependent` -- the resource type that holds references (the one queried on deletion).
`spec.dependent.apiExportName` -- the APIExport in the same workspace that provides this type.
+`spec.dependent.resource` -- the plural resource name, used to construct the GVR for dynamic client queries.
`spec.dependencies[]` -- the resource types being referenced (the ones being protected).
`spec.dependencies[].fieldRef.path` -- where in the dependent resource the reference lives.
### RuleRegistry (`internal/webhook/rule_registry.go`)
-Thread-safe store shared between the `RuleCacheManager` (writer) and
+Thread-safe metadata store shared between the `RuleCacheManager` (writer) and
`DeletionValidator` (reader).
- `rules map[string]*RuleState` -- keyed by `clusterName/ruleName`
@@ -344,36 +339,70 @@ Thread-safe store shared between the `RuleCacheManager` (writer) and
rebuilt on every `Register`/`Unregister`
Key operations:
-- `Register(key, state) *RuleState` -- adds/replaces a rule, returns old state for cleanup
-- `Unregister(key)` -- removes and cancels the rule's manager
+- `Register(key, state) *RuleState` -- adds/replaces a rule, returns old state
+- `Unregister(key)` -- removes the rule
- `FindByTargetGVR(gvr) []RuleEntry` -- O(1) lookup used by the admission handler
### Field Path Resolution (`internal/fieldpath/fieldpath.go`)
[`fieldpath.Resolve`](../internal/fieldpath/fieldpath.go) extracts a string value
from an unstructured object given a dot-notation path (e.g., `.spec.vpcRef.name`).
-Used by the index functions registered on per-rule informers.
+Used by the webhook to filter dependent resources by matching field values against
+the deleted resource name.
---
## Known Limitations
+### Dependency-provider workspaces must be direct children of root
+
+The path named in `DependencyRule.spec.dependencies[].apiExportRef.path`
+must currently be a direct child of `root` (e.g., `root:network-provider`).
+Nested paths such as `root:org:network-provider` will fail to resolve.
+
+The cause is in
+[`workspaceResolver.ensureResolved`](../internal/controller/dependencyrule_controller.go):
+the resolver splits the supplied path on `:`, takes only the last segment as
+the `Workspace` name, and looks that name up in the **root** workspace. A
+nested workspace will not be found there.
+
+This restriction applies **only** to the workspace hosting the protected
+APIExport. It does **not** apply to:
+
+- Consumer workspaces — the webhook identifies them by the `kcp.io/cluster`
+ annotation on the admission request (a logical cluster name, not a path)
+ and queries them directly through the front-proxy.
+- The dependent-provider workspace (where the `DependencyRule` itself
+ lives) — the controller and webhook discover rules through the dep-ctrl
+ APIExport's multicluster watch, which delivers the logical cluster name
+ with each event.
+- The dep-ctrl workspace — its location is pinned at deploy time via the
+ controller's kubeconfig.
+
+Lifting this limitation requires the resolver to walk the path segment by
+segment (or call kcp's path-aware Workspace lookup) instead of bottoming
+out at root.
+
### Circular dependencies
The system does not detect cycles. If rule A says VM depends on VPC and rule B
says VPC depends on VM, neither can be deleted normally. Use `skip-protection`
to break the cycle.
-### Informer cache lag
+### Per-request query latency
-Between a dependent being created and the informer cache syncing (typically
-sub-second), the referenced resource can be deleted without the webhook blocking
-it. Similarly, after a dependent is deleted, there's a brief window where the
-webhook may still block deletion of the referenced resource.
+Each DELETE admission request triggers a live API call to list dependent resources
+in the consumer workspace. This adds latency compared to a cache-based approach,
+but DELETE operations are infrequent and the namespace-scoped listings are
+typically small.
-### Rule updates are not detected
+### Rule updates
-[`ensureCache`](../internal/webhook/rule_cache_manager.go) is a no-op if a
-cache already exists for the rule key. If a `DependencyRule`'s spec changes
-(e.g., a new dependency target is added), the existing cache won't be updated.
-The workaround is to delete and recreate the rule.
+In-place `DependencyRule` spec edits are picked up automatically: the
+`RuleCacheManager` reconciles on each update event and overwrites the
+existing registry entry, and `WebhookInstaller.reconcileWorkspaceWebhook`
+recomputes the desired webhook config from scratch on every change rather
+than maintaining incremental state. This is covered by the
+*"should propagate in-place DependencyRule updates to the webhook"* e2e
+scenario, which patches `fieldRef.path` on a live rule and verifies the
+webhook re-evaluates without a recreate cycle.
diff --git a/docs/development.md b/docs/development.md
index 1de74ee..1dbf002 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -69,11 +69,21 @@ make run-webhook # Run the webhook server from source
### Test
```sh
-make test # Unit + integration tests (requires kcp binary, excludes e2e)
-make test-e2e # E2E tests (requires kind, helm, docker)
-make clean-e2e # Remove kind cluster from e2e tests
+make test # Unit + integration tests (requires kcp binary, excludes e2e)
+make test-e2e # E2E tests (requires kind, helm, docker) — uses the active shard config
+make test-e2e-matrix # Run e2e tests against both shard configs sequentially
+make clean-e2e # Remove kind cluster from e2e tests
```
+`make test-e2e` honors the `E2E_SHARD_CONFIG` environment variable
+(`single-shard` or `multi-shard`, default `multi-shard`); see the [E2E Tests](#e2e-tests)
+section for what each config exercises. `make test-e2e-matrix` runs both back-to-back
+with a kind cleanup between runs.
+
+Tool paths can be overridden via `KIND`, `KUBECTL`, `HELM`, `DOCKER` env vars
+(fallback: PATH lookup). Set `E2E_SKIP_CLEANUP=1` to keep the kind cluster
+running after the suite for inspection.
+
### Lint & Format
```sh
@@ -105,14 +115,79 @@ webhook server with a self-signed CA. It verifies the full lifecycle:
## E2E Tests
-The e2e tests (`test/e2e/`) run against a real kind cluster with kcp and
-cert-manager deployed via Helm. They build the Docker image, load it into
-kind, deploy the Helm chart (which includes both controller and webhook), and
-exercise the full system including TLS webhook dispatch through kcp's admission
-pipeline.
-
-Tool paths can be configured via environment variables (`KIND`, `KUBECTL`,
-`HELM`, `DOCKER`) with fallback to PATH lookup.
+The e2e tests (`test/e2e/`) run against a real kind cluster with a multi-shard
+kcp instance deployed via the
+[kcp-operator](https://github.com/kcp-dev/helm-charts). The test suite:
+
+1. Creates a kind cluster with a NodePort for the kcp front-proxy
+2. Installs cert-manager and the kcp-operator Helm chart
+3. Deploys two etcd instances and creates a `RootShard`, `Shard` (`shard1`), and
+ `FrontProxy` via kcp-operator CRs
+4. Generates admin and component kubeconfigs via kcp-operator `Kubeconfig` CRs
+ (using `rootShardRef` so certs are trusted by both front-proxy and shards)
+5. Bootstraps RBAC (see below)
+6. Builds the Docker image, loads it into kind, and deploys via Helm
+7. Exercises the full system including TLS webhook dispatch through kcp's
+ admission pipeline
+
+### Workspace topology and shard placement
+
+The five test workspaces (`dep-ctrl`, `network-provider`, `compute-provider`,
+`consumer1`, `consumer2`) are pinned deterministically to either `root` or
+`shard1` via `spec.location.selector`. Both shards carry an `e2e-target=`
+label. After workspace readiness, `verifyShardPlacements` reads each
+workspace's `internal.tenancy.kcp.io/shard` annotation and asserts that
+selectors weren't ignored.
+
+Selection is driven by the `E2E_SHARD_CONFIG` env var:
+
+| Config | Purpose | Placements |
+|------------------|------------------------------------------|-------------------------------------------------------------------------------------------------------------|
+| `single-shard` | sanity / single-shard fast paths | all five workspaces on `root` |
+| `multi-shard` (default) | exercises every cross-shard path | `compute-provider` and `consumer2` on `shard1`; `dep-ctrl`, `network-provider`, `consumer1` on `root` |
+
+Together, the two configs exercise: same-shard fast paths (single-shard);
+controller-VW cross-shard webhook installation, provider ↔ consumer
+cross-shard binding, and webhook ↔ consumer cross-shard query (multi-shard).
+Run `make test-e2e-matrix` to execute both sequentially.
+
+### Bootstrap RBAC
+
+Three RBAC bundles are applied during setup:
+
+1. **`system:admin` per shard** (`test/fixtures/system-admin-rbac-bootstrap.yaml`):
+ ClusterRole + ClusterRoleBinding granting the webhook SA `*/*` get/list.
+ Applied via direct (port-forwarded) connection to each shard, using a
+ `system:masters` kubeconfig generated from a kcp-operator `Kubeconfig` CR
+ with `rootShardRef` / `shardRef`. kcp's `BootstrapPolicyAuthorizer` reads
+ this binding from the local shard only — bindings do not propagate across
+ shards — so the fixture is applied once per shard.
+2. **Root workspace** (`test/fixtures/root-rbac-bootstrap.yaml`): controller-only
+ rules (workspaces/content access + tenancy.kcp.io/workspaces read), applied
+ via the front-proxy.
+3. **Dep-ctrl workspace** (`test/fixtures/depctrl-rbac-bootstrap.yaml`):
+ apiexportendpointslices read + dep-ctrl APIExport content access for both
+ the controller and webhook SAs.
+
+The webhook installation in provider workspaces is authorized by the
+`validatingwebhookconfigurations` permissionClaim on the dep-ctrl APIExport,
+not by RBAC.
+
+### Test scenarios
+
+`test/e2e/dependency_test.go` is an `Ordered` `Describe` covering:
+
+- Initial install (DependencyRule → ValidatingWebhookConfiguration appears)
+- Block / unblock cycles with a single dependent and with multiple dependents
+- Cross-shard isolation (`consumer2` with no VMs)
+- Cross-shard protection (`consumer2` with a VM, on `shard1` under multi-shard)
+- `skip-protection` annotation
+- DependencyRule deletion → webhook removal
+- DependencyRule recreation → protection restored
+- **In-place rule update**: patches `fieldRef.path` on a live rule and
+ verifies the webhook re-evaluates without a recreate cycle (proves
+ `WebhookInstaller.reconcileWorkspaceWebhook`'s update branch and the
+ webhook's `RuleCacheManager` re-indexing both work end-to-end).
### Fixtures
@@ -120,95 +195,22 @@ Test fixtures are loaded from YAML files rather than constructed inline:
- `config/kcp/` -- the generated dep-ctrl APIResourceSchemas and APIExport
(same files used for real deployment)
-- `test/fixtures/` -- test provider schemas (VPC, VirtualMachine) and their
- APIExports
+- `test/fixtures/` -- test provider schemas (VPC, VirtualMachine), APIExports,
+ RBAC bundles, and per-test VPC/VM resources
## Deploying to kcp
-### 1. Create the dep-ctrl workspace and apply schemas
-
-```sh
-kubectl ws create dep-ctrl --enter
-kubectl apply -k config/kcp/
-```
-
-This creates the `APIResourceSchema` objects and the
-`dependencies.opendefense.cloud` APIExport. The APIExport includes a
-`permissionClaim` for `validatingwebhookconfigurations` -- this authorizes the
-controller to manage webhooks in provider workspaces through the virtual workspace.
-
-### 2. Apply bootstrap RBAC
-
-Bootstrap RBAC is required in three locations, applied with a privileged identity:
-
-**Root workspace** -- both components need `workspaces/content` access to enter
-child workspaces. The controller also needs `workspaces` read access for
-workspace path resolution:
-
-```sh
-kubectl ws root
-kubectl apply -f test/fixtures/root-rbac-bootstrap.yaml
-```
-
-**Dep-ctrl workspace** -- the controller needs full CRUD on `apiexports/content`
-(to manage claimed resources through the VW) and `apiexportendpointslices` read
-access for VW URL discovery:
-
-```sh
-kubectl ws root:dep-ctrl
-kubectl apply -f test/fixtures/depctrl-rbac-bootstrap.yaml
-```
+See [docs/getting-started.md](getting-started.md) for the full step-by-step
+deployment guide using kcp-operator. The guide covers kcp-operator setup,
+multi-shard configuration, bootstrap RBAC, kubeconfig generation via
+kcp-operator Kubeconfig CRs, and Helm deployment.
-**system:admin** (shard-local) -- the webhook SA gets shard-wide read access to
-`apiexports/content` and `apiexportendpointslices`, evaluated by the Bootstrap
-Policy Authorizer for every request on the shard. Must be applied via the kcp
-server (not front-proxy):
-
-```sh
-kubectl --server=https://:6443/clusters/system:admin \
- apply -f test/fixtures/shard-admin-rbac-bootstrap.yaml
-```
-
-Adjust the service account names in the `ClusterRoleBinding` subjects to match
-your deployment (defaults: `system:serviceaccount:dependency-system:dependency-controller`
-and `system:serviceaccount:dependency-system:dependency-webhook`).
-
-### 3. Deploy with Helm
-
-The chart deploys both the controller and webhook server as separate Deployments:
-
-```sh
-helm install dep-ctrl charts/dependency-controller \
- --namespace dependency-system --create-namespace \
- --set kcpBaseHost=https://kcp.example.com:6443 \
- --set controller.kubeconfig.secretName=kcp-controller-kubeconfig \
- --set webhook.kubeconfig.secretName=kcp-webhook-kubeconfig \
- --set webhook.tls.certManager.issuerRef.name=my-issuer
-```
+### Quick reference
-The controller automatically discovers the webhook service URL and CA bundle
-from the co-deployed webhook resources. It routes all provider workspace
-operations through the dep-ctrl APIExport's virtual workspace, resolving
-workspace paths to logical cluster names via root Workspace objects.
-
-### 4. Provider setup
-
-Each API provider that wants deletion protection:
-
-1. Binds to the dep-ctrl APIExport in their provider workspace, **accepting the
- permissionClaim** (required for the controller to install webhooks):
- ```yaml
- spec:
- permissionClaims:
- - group: "admissionregistration.k8s.io"
- resource: "validatingwebhookconfigurations"
- selector: { matchAll: true }
- state: Accepted
- ```
-2. Creates a `DependencyRule` declaring which of their resources depend on which
- other resources
-
-The controller then automatically installs a `ValidatingWebhookConfiguration`
-in each dependency provider's workspace (via the VW). The webhook server picks
-up the rule, starts an indexed cache for the dependent resource type (authorized
-by the shard-wide system:admin RBAC), and begins serving admission requests.
+1. Deploy kcp via kcp-operator (RootShard, FrontProxy, optional additional Shards)
+2. Create the dep-ctrl workspace and apply `config/kcp/` schemas
+3. Apply bootstrap RBAC: per-shard `system:admin` (webhook get/list), root
+ workspace (controller), dep-ctrl workspace (APIExport access for both)
+4. Generate component kubeconfigs via kcp-operator Kubeconfig CRs (use `rootShardRef`)
+5. Deploy with Helm
+6. Providers bind to the dep-ctrl APIExport (accepting permissionClaims) and create DependencyRules
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 85a5561..83168b3 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -1,10 +1,11 @@
# Getting Started
-This guide walks through deploying the dependency-controller from scratch. It
-assumes you have a running kcp instance and a Kubernetes cluster where the
-controller and webhook will run (they connect to kcp remotely via kubeconfigs).
+This guide walks through deploying the dependency-controller on a Kubernetes
+cluster with kcp managed by the
+[kcp-operator](https://github.com/kcp-dev/helm-charts). By the end you will
+have:
-By the end, you'll have:
+- A multi-shard kcp instance (root shard + one additional shard)
- The `DependencyRule` API available in kcp
- The controller and webhook running in your Kubernetes cluster
- A working example where deleting a VPC is blocked while a VirtualMachine
@@ -12,20 +13,25 @@ By the end, you'll have:
## Prerequisites
-- A running [kcp](https://github.com/kcp-dev/kcp) instance
-- A Kubernetes cluster (the "management cluster") with:
- - [cert-manager](https://cert-manager.io/) installed (for webhook TLS)
- - Network connectivity to kcp
+- A Kubernetes cluster (the "management cluster") -- [kind](https://kind.sigs.k8s.io/) works well for dev
+- [cert-manager](https://cert-manager.io/) installed in the cluster
- `kubectl` with the [kcp plugin](https://github.com/kcp-dev/kcp/tree/main/cli)
- `helm` v3
-### Quick setup with kind and kcp
-
-If you don't have a cluster yet:
+### Quick setup with kind
```sh
-# Create a kind cluster
-kind create cluster --name dep-ctrl
+# Create a kind cluster (expose a NodePort for the kcp front-proxy)
+cat <<'EOF' | kind create cluster --name dep-ctrl --config -
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+ - role: control-plane
+ extraPortMappings:
+ - containerPort: 31443
+ hostPort: 31443
+ protocol: TCP
+EOF
# Install cert-manager
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.17.2/cert-manager.yaml
@@ -42,54 +48,214 @@ spec:
EOF
```
-For kcp, follow the [kcp installation guide](https://docs.kcp.io/kcp/main/setup/install/)
-or deploy it via its [Helm chart](https://github.com/kcp-dev/helm-charts).
-
## Overview
-The deployment has four phases:
-
-1. **kcp setup** -- create the dep-ctrl workspace, apply schemas and the
- APIExport
-2. **Bootstrap RBAC** -- grant the controller and webhook identities the
- minimum permissions they need in kcp
-3. **Helm install** -- deploy the controller and webhook into your Kubernetes
+The deployment has six phases:
+
+1. **Deploy kcp** -- install kcp-operator, etcd, and create the kcp shards and
+ front-proxy
+2. **kcp workspace setup** -- create the dep-ctrl workspace, apply schemas and
+ the APIExport
+3. **Bootstrap RBAC** -- grant the controller and webhook identities the minimum
+ permissions they need in kcp
+4. **Create kubeconfigs** -- generate client certificates for the controller and
+ webhook via kcp-operator Kubeconfig CRs
+5. **Helm install** -- deploy the controller and webhook into your Kubernetes
cluster
-4. **Provider onboarding** -- providers bind to the dep-ctrl APIExport and
+6. **Provider onboarding** -- providers bind to the dep-ctrl APIExport and
create DependencyRules
-```
-Management Cluster (kind) kcp
+```text
+Management Cluster (kind) kcp (via kcp-operator)
+-------------------------------+ +----------------------------------+
-| dependency-controller pod | | root workspace |
-| reads Workspace objects |--->| ClusterRoles for both SAs |
-| manages webhooks via VW | | |
-| | | root:dep-ctrl workspace |
-| dependency-webhook pod | | APIExport: DependencyRule |
-| watches rules via VW |--->| ClusterRoles for controller SA |
-| serves admission requests | | |
-+-------------------------------+ | system:admin (shard-local) |
- | ClusterRole for webhook SA |
- | (shard-wide apiexports/content)|
- | |
- | root:network-provider |
- | APIExport: VPCs |
- | ValidatingWebhook (installed |
- | by controller via VW) |
+| kcp-operator | | root shard |
+| etcd (one per shard) | | system:admin (per-shard) |
+| root-kcp (root shard pod) | | ClusterRoleBinding for |
+| shard1-kcp (shard1 pod) | | webhook SA (wildcard read) |
+| kcp-front-proxy | | root workspace |
+| | | ClusterRole for controller |
+| dependency-controller pod | | root:dep-ctrl workspace |
+| reads Workspace objects |--->| APIExport: DependencyRule |
+| manages webhooks via VW | | ClusterRoles for both SAs |
+| | | root:compute-provider |
+| dependency-webhook pod | | APIExport: VMs |
+| watches rules via VW | | DependencyRule: VM -> VPC |
+| serves admission requests |--->| root:network-provider |
++-------------------------------+ | APIExport: VPCs |
+ | ValidatingWebhook (installed |
+ | by controller via VW) |
| |
- | root:compute-provider |
- | APIExport: VMs |
- | DependencyRule: VM -> VPC |
+ | shard1 |
+ | system:admin (per-shard) |
+ | same webhook binding as root |
+ | (consumer workspaces may live |
+ | on any shard) |
+----------------------------------+
```
-## Step 1: Create the dep-ctrl workspace and apply schemas
+## Step 1: Deploy kcp via kcp-operator
+
+### 1a. Install the kcp-operator
+
+```sh
+helm repo add kcp https://kcp-dev.github.io/helm-charts
+helm repo update kcp
+
+helm install kcp-operator kcp/kcp-operator \
+ --namespace kcp-system --create-namespace \
+ --wait --timeout 300s
+```
+
+### 1b. Deploy etcd
+
+Each kcp shard needs its own etcd instance. For production, use a proper etcd
+cluster; for dev/testing a single-replica StatefulSet per shard is sufficient.
+You need a `Service` (client port 2379) and a `StatefulSet` per shard.
+
+For a working example, see the `applyEtcd()` function in
+[`test/e2e/suite_test.go`](../test/e2e/suite_test.go) which creates minimal
+single-node etcd instances.
+
+```sh
+# Create etcd for the root shard (Service + StatefulSet named "etcd-root")
+# Create etcd for the secondary shard ("etcd-shard1", optional for multi-shard)
+
+# Wait for etcd to be ready
+kubectl -n kcp-system wait statefulset etcd-root \
+ --for=jsonpath='{.status.readyReplicas}'=1 --timeout=120s
+```
+
+### 1c. Create a cert-manager Issuer
+
+kcp-operator uses cert-manager for PKI (server certs, client CAs, etc.):
+
+```sh
+kubectl -n kcp-system apply -f - < /tmp/kcp-admin.kubeconfig
+```
+
+If you're using kind with a NodePort, rewrite the server URL in the kubeconfig
+to `https://localhost:`. The kcp-operator generates two contexts:
+`base` (bare URL) and `default` (URL + `/clusters/root`). Use `base` if you
+want to specify workspace paths manually via `--server`.
-The dependency-controller manages its own API type (`DependencyRule`) via a kcp
-APIExport. You need to create a workspace for it and apply the schema and export
-definitions.
+## Step 2: Create the dep-ctrl workspace and apply schemas
```sh
+export KUBECONFIG=/tmp/kcp-admin.kubeconfig
+
# Create the dep-ctrl workspace
kubectl ws root
kubectl ws create dep-ctrl --enter
@@ -118,52 +284,33 @@ access another workspace's resources -- instead, the APIExport's
acts as a proxy. `permissionClaims` tell kcp which resource types the APIExport
provider is allowed to manage in binding workspaces via that proxy. Provider
workspaces must explicitly accept these claims when creating their APIBinding
-(covered in [Step 5](#step-5-onboard-a-provider)).
+(covered in [Step 6](#step-6-onboard-a-provider)).
-## Step 2: Bootstrap RBAC in kcp
+## Step 3: Bootstrap RBAC in kcp
Both components run with dedicated service account identities. They need
-specific permissions in several kcp locations, applied once using a privileged
-identity (e.g., `system:masters` via a bootstrap certificate).
+specific permissions in several kcp locations, applied once using the admin
+kubeconfig.
### Root workspace RBAC
-Both components need `workspaces/content` access to enter child workspaces
-(this is how kcp authorizes traversing the workspace hierarchy). The controller
-additionally needs to read `Workspace` objects to resolve workspace paths
-(like `root:network-provider`) to the logical cluster names that the virtual
-workspace requires.
+The **controller** needs `workspaces/content` access to enter child workspaces
+(this is how kcp authorizes traversing the workspace hierarchy) and
+`workspaces` read access to resolve paths like `root:network-provider` to the
+logical cluster names the virtual workspace requires.
```sh
kubectl ws root
kubectl apply -f test/fixtures/root-rbac-bootstrap.yaml
```
-The file
-([`test/fixtures/root-rbac-bootstrap.yaml`](../test/fixtures/root-rbac-bootstrap.yaml))
-creates:
-
-| Resource | Name | Purpose |
-|---|---|---|
-| ClusterRole | `dependency-controller` | `workspaces/content` access + `workspaces` read (get/list/watch) |
-| ClusterRoleBinding | `dependency-controller` | Binds to controller SA |
-| ClusterRole | `dependency-controller-webhook` | `workspaces/content` access only |
-| ClusterRoleBinding | `dependency-controller-webhook` | Binds to webhook SA |
-
-**Why `workspaces/content`?** Without it, kcp rejects any request to a child
-workspace's API, including virtual workspace endpoints. Both components access
-the dep-ctrl workspace where their kubeconfig points.
-
-**Why `workspaces` read for the controller?** The virtual workspace accepts
-logical cluster names (opaque IDs like `qh6707jkfsen31z9`), not workspace paths
-(`root:network-provider`). The controller reads `Workspace` objects from root
-to map paths to cluster names via `workspace.spec.cluster`.
+(The webhook does **not** get RBAC here — its broad read is granted in
+`system:admin` per shard, see below.)
### Dep-ctrl workspace RBAC
-The controller needs access to the dep-ctrl APIExport's virtual workspace.
-This is authorized by the `apiexports/content` subresource in the workspace
-where the APIExport is defined.
+Both components need access to the dep-ctrl APIExport's virtual workspace and
+`apiexportendpointslices` for VW URL discovery:
```sh
kubectl ws root:dep-ctrl
@@ -172,58 +319,83 @@ kubectl apply -f test/fixtures/depctrl-rbac-bootstrap.yaml
The file
([`test/fixtures/depctrl-rbac-bootstrap.yaml`](../test/fixtures/depctrl-rbac-bootstrap.yaml))
-creates:
+grants both the controller and webhook SAs:
-| Resource | Name | Permissions |
-|---|---|---|
-| ClusterRole | `dependency-controller` | `apiexportendpointslices` read + `apiexports/content` full CRUD |
-| ClusterRoleBinding | `dependency-controller` | Binds to controller SA |
+- `apiexportendpointslices` read access (for VW URL discovery at startup)
+- `apiexports/content` full CRUD (for managing resources through the VW)
-**Why `apiexportendpointslices`?** The controller discovers the virtual
-workspace URL by reading the `APIExportEndpointSlice` for the
-`dependencies.opendefense.cloud` APIExport. This URL is the entry point for
-all operations through the virtual workspace.
+### Webhook RBAC in `system:admin` (per shard)
-**Why `apiexports/content` with full CRUD?** The controller writes resources
-(webhooks) in binding workspaces through the virtual workspace. The verb on
-`apiexports/content` controls what operations are allowed through the VW --
-read-only access would block the controller from creating anything.
+The webhook queries dependent resources directly in each consumer workspace,
+and consumer workspaces can live on any shard. kcp's
+`BootstrapPolicyAuthorizer` reads RBAC from the local shard's `system:admin`
+logical cluster only — bindings do **not** propagate across shards — so the
+webhook needs a `ClusterRole` + `ClusterRoleBinding` in `system:admin` of
+**every shard** that hosts consumer workspaces.
-### Shard-wide RBAC via system:admin
+`system:admin` is intentionally not reachable through the front-proxy. To
+apply RBAC there you need:
-The webhook needs read access to `apiexports/content` and
-`apiexportendpointslices` across **all** workspaces on the shard. Rather than
-creating per-workspace RBAC, this is granted once in kcp's `system:admin`
-logical cluster. The Bootstrap Policy Authorizer evaluates `system:admin` RBAC
-for every request on the shard.
+1. A kubeconfig issued via a kcp-operator `Kubeconfig` CR with
+ `target.rootShardRef` (or `target.shardRef` for secondary shards) and
+ `groups: ["system:masters"]` — `system:masters` is honored by the shard
+ directly but not by the front-proxy.
+2. Direct shard access. The shard's ClusterIP service is normally not exposed;
+ `kubectl port-forward svc/` to localhost is the simplest path.
+ Make sure the shard's server certificate has `localhost` / `127.0.0.1` in
+ its SANs (kcp-operator's `RootShard` / `Shard` `certificateTemplates`).
-This must be applied via the **kcp server** (not the front-proxy), using a
-`system:masters` identity:
+Example (root shard):
```sh
-kubectl --server=https://:6443/clusters/system:admin \
- apply -f test/fixtures/shard-admin-rbac-bootstrap.yaml
-```
+# 1. Generate a system:masters kubeconfig for the root shard
+kubectl apply -f - <<'EOF'
+apiVersion: operator.kcp.io/v1alpha1
+kind: Kubeconfig
+metadata:
+ name: root-system-masters
+ namespace: kcp-system
+spec:
+ username: bootstrap-system-masters
+ groups:
+ - "system:masters"
+ validity: 8766h
+ secretRef:
+ name: root-system-masters-kubeconfig
+ target:
+ rootShardRef:
+ name: root
+EOF
-The file
-([`test/fixtures/shard-admin-rbac-bootstrap.yaml`](../test/fixtures/shard-admin-rbac-bootstrap.yaml))
-creates:
+kubectl -n kcp-system wait secret root-system-masters-kubeconfig \
+ --for=jsonpath='{.data.kubeconfig}' --timeout=120s
-| Resource | Name | Permissions |
-|---|---|---|
-| ClusterRole | `dependency-controller-webhook-apiexport-reader` | `apiexports/content` + `apiexportendpointslices` read |
-| ClusterRoleBinding | `dependency-controller-webhook-apiexport-reader` | Binds to webhook SA |
+kubectl -n kcp-system get secret root-system-masters-kubeconfig \
+ -o jsonpath='{.data.kubeconfig}' | base64 -d > /tmp/root-masters.kubeconfig
+# Rewrite the server URL in /tmp/root-masters.kubeconfig to https://localhost:6443
+# (or any free port you pick for the port-forward).
-**Why shard-wide?** The webhook watches DependencyRules via the dep-ctrl
-APIExport VW, and reads dependent resources (e.g., VirtualMachines) via other
-providers' APIExport VWs. These VWs span many workspaces. Granting read access
-at the shard level avoids the need for per-workspace RBAC management and
-removes the need for `clusterroles`/`clusterrolebindings` permissionClaims on
-the APIExport.
+# 2. Port-forward the root shard service
+kubectl -n kcp-system port-forward svc/root-kcp 6443:6443 &
+PF_PID=$!
-**Why `system:admin`?** This is a shard-local logical cluster (not a workspace)
-where the Bootstrap Policy Authorizer evaluates RBAC for every request on the
-shard. It is accessed via the kcp server directly, not the front-proxy.
+# 3. Apply the binding in /clusters/system:admin
+kubectl --kubeconfig /tmp/root-masters.kubeconfig \
+ --server https://localhost:6443/clusters/system:admin \
+ apply --validate=false -f test/fixtures/system-admin-rbac-bootstrap.yaml
+
+# 4. Stop the port-forward
+kill $PF_PID
+```
+
+Repeat for every additional shard, swapping `rootShardRef` for
+`shardRef: {name: }` in the `Kubeconfig` CR and pointing the
+port-forward at that shard's service (e.g., `svc/shard1-shard-kcp`).
+
+The fixture
+([`test/fixtures/system-admin-rbac-bootstrap.yaml`](../test/fixtures/system-admin-rbac-bootstrap.yaml))
+grants the webhook SA `*/*` `get`/`list`. Within a shard, this binding
+applies to every workspace on that shard — no per-consumer RBAC is needed.
### Adjusting service account names
@@ -233,37 +405,95 @@ The bootstrap RBAC files bind to these default identities:
If your deployment uses different service account names or namespaces, edit the
`subjects` in the `ClusterRoleBinding` resources before applying. The names must
-match what you configure in the Helm values (Step 3).
+match what you configure in the Helm values (Step 5).
-## Step 3: Create kubeconfigs for the components
+## Step 4: Create kubeconfigs for the components
Each component needs a kubeconfig that authenticates as its service account
-identity and targets the dep-ctrl workspace. How you create these depends on
-your kcp setup:
-
-- **Client certificates**: create certificates with the appropriate CN
- (e.g., `system:serviceaccount:dependency-system:dependency-controller`)
- signed by kcp's CA
-- **Service account tokens**: if kcp supports token-based auth, create tokens
- bound to the service accounts
+identity. Use kcp-operator `Kubeconfig` CRs to generate client certificates:
-The kubeconfig server URL should point to kcp with the dep-ctrl workspace path:
+```sh
+# Controller kubeconfig
+kubectl apply -f - <<'EOF'
+apiVersion: operator.kcp.io/v1alpha1
+kind: Kubeconfig
+metadata:
+ name: controller-kubeconfig
+ namespace: kcp-system
+spec:
+ username: "system:serviceaccount:dependency-system:dependency-controller"
+ groups:
+ - "system:authenticated"
+ - "system:serviceaccounts"
+ - "system:serviceaccounts:dependency-system"
+ validity: 8766h
+ secretRef:
+ name: controller-kubeconfig
+ target:
+ rootShardRef:
+ name: root
+EOF
+# Webhook kubeconfig
+kubectl apply -f - <<'EOF'
+apiVersion: operator.kcp.io/v1alpha1
+kind: Kubeconfig
+metadata:
+ name: webhook-kubeconfig
+ namespace: kcp-system
+spec:
+ username: "system:serviceaccount:dependency-system:dependency-webhook"
+ groups:
+ - "system:authenticated"
+ - "system:serviceaccounts"
+ - "system:serviceaccounts:dependency-system"
+ validity: 8766h
+ secretRef:
+ name: webhook-kubeconfig
+ target:
+ rootShardRef:
+ name: root
+EOF
```
-https://:6443/clusters/root:dep-ctrl
-```
-Store the kubeconfigs as Kubernetes secrets in the management cluster:
+**Important notes about kcp-operator Kubeconfig CRs:**
+
+- **`rootShardRef` (not `frontProxyRef`)**: This generates client certificates
+ signed by `root-client-ca`, which is trusted by both the front-proxy (via
+ `kcp-merged-client-ca`) and all shards directly. This is required because the
+ multicluster-provider connects to APIExport virtual workspace URLs that point
+ directly at shards, not through the front-proxy.
+
+- **`system:authenticated` group**: kcp's front-proxy uses request header
+ impersonation. Unlike direct Kubernetes authentication, impersonated
+ identities do not automatically get the `system:authenticated` group -- it
+ must be listed explicitly.
+
+- **Two contexts**: kcp-operator generates kubeconfigs with two contexts:
+ `base` (bare shard URL) and `default` (shard URL + `/clusters/root`). The
+ component kubeconfigs should be rewritten to point at the front-proxy with
+ the dep-ctrl workspace path (`/clusters/root:dep-ctrl`) before mounting
+ them as secrets.
+
+Extract and rewrite the kubeconfigs:
```sh
+# Extract the controller kubeconfig
+kubectl -n kcp-system get secret controller-kubeconfig \
+ -o jsonpath='{.data.kubeconfig}' | base64 -d > /tmp/controller.kubeconfig
+
+# Rewrite the server URL from the shard URL to the front-proxy + workspace path
+# (replace the shard URL with https://:/clusters/root:dep-ctrl)
+
+# Store as Kubernetes secrets
kubectl -n dependency-system create secret generic kcp-controller-kubeconfig \
- --from-file=kubeconfig=/path/to/controller.kubeconfig
+ --from-file=kubeconfig=/tmp/controller.kubeconfig
kubectl -n dependency-system create secret generic kcp-webhook-kubeconfig \
- --from-file=kubeconfig=/path/to/webhook.kubeconfig
+ --from-file=kubeconfig=/tmp/webhook.kubeconfig
```
-## Step 4: Deploy with Helm
+## Step 5: Deploy with Helm
The Helm chart deploys both the controller and webhook as separate Deployments
in a single release. The controller automatically discovers the webhook's
@@ -272,7 +502,6 @@ service URL and TLS CA from the co-deployed resources.
```sh
helm install dep-ctrl charts/dependency-controller \
--namespace dependency-system --create-namespace \
- --set kcpBaseHost=https://:6443 \
--set controller.kubeconfig.secretName=kcp-controller-kubeconfig \
--set webhook.kubeconfig.secretName=kcp-webhook-kubeconfig \
--set webhook.tls.certManager.issuerRef.name=selfsigned
@@ -282,11 +511,15 @@ Key values:
| Value | Purpose |
|---|---|
-| `kcpBaseHost` | Root kcp URL (no workspace path). Used by the controller to resolve workspace paths and by the webhook to discover APIExport VW URLs. |
-| `controller.kubeconfig.secretName` | Secret containing the controller's kubeconfig (from Step 3). |
-| `webhook.kubeconfig.secretName` | Secret containing the webhook's kubeconfig (from Step 3). |
+| `controller.kubeconfig.secretName` | Secret containing the controller's kubeconfig (from Step 4). |
+| `webhook.kubeconfig.secretName` | Secret containing the webhook's kubeconfig (from Step 4). |
| `webhook.tls.certManager.issuerRef.name` | cert-manager issuer for the webhook's TLS certificate. |
+Both components automatically derive the kcp front-proxy base URL from their
+kubeconfig by stripping the `/clusters/...` workspace path suffix. No
+additional base URL flag is needed (though `kcpBaseHost` can be set to
+override this if your kubeconfig host doesn't follow the standard pattern).
+
See [`charts/dependency-controller/values.yaml`](../charts/dependency-controller/values.yaml)
for all available options.
@@ -303,14 +536,14 @@ The webhook pod's readiness probe only passes once it has populated its rule
registry (listed all existing DependencyRules). On first deploy with no rules,
this is near-instant.
-## Step 5: Onboard a provider
+## Step 6: Onboard a provider
This example uses two providers -- a network provider (exports VPCs) and a
compute provider (exports VirtualMachines). The compute provider will create a
DependencyRule saying "VMs depend on VPCs", which blocks VPC deletion while
any VM references it.
-### 5a. Create provider workspaces and APIExports
+### 6a. Create provider workspaces and APIExports
```sh
# Network provider
@@ -326,7 +559,7 @@ kubectl apply -f test/fixtures/apiresourceschema-virtualmachines.yaml
kubectl apply -f test/fixtures/apiexport-compute.test.io.yaml
```
-### 5b. Bind providers to the dep-ctrl APIExport
+### 6b. Bind providers to the dep-ctrl APIExport
**Every provider workspace referenced in a DependencyRule must bind to the
dep-ctrl APIExport and accept the permissionClaims.** This applies to both
@@ -397,10 +630,8 @@ A reference fixture is available at
The accepted permissionClaim grants the controller permission to create
`ValidatingWebhookConfigurations` in the workspace (for deletion protection).
-The webhook's read access to provider APIExports is handled shard-wide via the
-`system:admin` bootstrap RBAC (Step 2) -- no per-workspace RBAC is needed.
-### 5c. Create a DependencyRule
+### 6c. Create a DependencyRule
The compute provider declares that VirtualMachines depend on VPCs:
@@ -417,6 +648,7 @@ spec:
group: compute.test.io
version: v1
kind: VirtualMachine
+ resource: virtualmachines
dependencies:
- apiExportRef:
path: root:network-provider
@@ -433,14 +665,11 @@ A reference fixture is available at
[`test/fixtures/dependencyrule-vm-dependencies.yaml`](../test/fixtures/dependencyrule-vm-dependencies.yaml).
Once applied, the controller will install a `ValidatingWebhookConfiguration`
-in `root:network-provider` (protecting VPC deletions).
+in `root:network-provider` (protecting VPC deletions). The webhook registers
+the rule's metadata in its registry and begins serving admission requests
+for VPC deletions.
-The webhook will:
-1. Start an indexed cache watching VirtualMachines via the compute APIExport VW
- (authorized by the shard-wide `system:admin` RBAC from Step 2)
-2. Begin serving admission requests for VPC deletions
-
-### 5d. Create a consumer workspace and test resources
+### 6d. Create a consumer workspace and test resources
```sh
kubectl ws root
@@ -497,7 +726,7 @@ spec:
EOF
```
-### 5e. Verify deletion protection
+### 6e. Verify deletion protection
```sh
# This should be denied:
@@ -513,7 +742,7 @@ kubectl delete vpc my-vpc -n default
## How the pieces fit together
-Here's the flow that makes Step 5e work:
+Here's the flow that makes Step 6e work:
1. The **controller** watches DependencyRules via the dep-ctrl APIExport's
virtual workspace
@@ -523,13 +752,44 @@ Here's the flow that makes Step 5e work:
`/clusters/` and creates a
`ValidatingWebhookConfiguration` in the network-provider workspace
(authorized by the accepted `validatingwebhookconfigurations` permissionClaim)
-4. The **webhook** also watches the same DependencyRule and starts an indexed
- cache watching VirtualMachines via compute.test.io's VW (authorized by the
- shard-wide `system:admin` RBAC from Step 2)
+4. The **webhook** also watches the same DependencyRule and registers the rule's
+ metadata (dependent GVR, field paths, target GVR) in its in-memory registry
5. When a consumer deletes a VPC, kcp dispatches the DELETE to the webhook
(via the installed `ValidatingWebhookConfiguration`)
-6. The webhook queries its indexed cache: "any VMs where `.spec.vpcRef.name`
- equals `my-vpc`?" -- finds `my-vm` and denies the deletion
+6. The webhook extracts the logical cluster name from the `kcp.io/cluster`
+ annotation on the object, constructs a temporary dynamic client scoped to
+ `{frontProxy}/clusters/{clusterName}`, and lists VirtualMachines in that
+ namespace
+7. It filters by field path: "any VMs where `.spec.vpcRef.name` equals
+ `my-vpc`?" -- finds `my-vm` and denies the deletion
+
+## Multi-shard considerations
+
+The dependency-controller is multi-shard aware. The runtime data path needs
+no per-shard configuration — but the **bootstrap RBAC does**:
+
+- **Webhook installation**: The controller installs webhooks via the dep-ctrl
+ APIExport's virtual workspace, which automatically routes to the correct
+ shard for each provider workspace. No changes per shard.
+
+- **Per-request queries**: The webhook queries dependent resources via the
+ kcp front-proxy, which transparently routes requests to the correct shard
+ based on the logical cluster name. No webhook-side configuration changes
+ per shard.
+
+- **`system:admin` RBAC must be applied per shard.** kcp's
+ `BootstrapPolicyAuthorizer` reads RBAC from the local shard's `system:admin`
+ logical cluster only; bindings do not propagate across shards. Each new
+ shard that hosts consumer workspaces needs the webhook's wildcard read
+ binding applied via the procedure in [Step 3](#webhook-rbac-in-systemadmin-per-shard).
+
+- **Root and dep-ctrl workspace RBAC are applied once.** Those workspaces
+ live on the root shard and the bindings there are not duplicated.
+
+- **Kubeconfigs**: Component kubeconfigs must use certificates signed by
+ `root-client-ca` (via `rootShardRef` in the Kubeconfig CR). This CA is
+ trusted by both the front-proxy and all shards, which is required because
+ VW URLs point directly at shards.
## Troubleshooting
@@ -543,11 +803,19 @@ kubectl -n dependency-system logs -l app.kubernetes.io/component=webhook
```
Common issues:
+
- **Kubeconfig invalid** -- the webhook can't reach kcp
-- **Missing system:admin RBAC** -- the webhook SA needs shard-wide
- `apiexports/content` and `apiexportendpointslices` read access (Step 2)
-- **Missing root RBAC** -- the webhook SA needs `workspaces/content` in root
- (Step 2)
+- **Missing dep-ctrl workspace RBAC** -- ensure
+ [Step 3](#dep-ctrl-workspace-rbac) was applied
+- **Missing `system:admin` RBAC on a shard** -- if the webhook can list on some
+ consumer workspaces but not others, the failing ones are likely on a shard
+ where the `system:admin` binding from
+ [Step 3](#webhook-rbac-in-systemadmin-per-shard) was never applied.
+ Verify with: `kubectl auth can-i list
+ --as=system:serviceaccount:dependency-system:dependency-webhook`
+ against the consumer workspace path.
+- **Certificate not trusted by shards** -- ensure kubeconfigs use `rootShardRef`
+ (not `frontProxyRef`) so certs are signed by `root-client-ca`
### Webhook not blocking deletions
@@ -567,8 +835,8 @@ kubectl get apibinding dependencies.opendefense.cloud -o jsonpath='{.status.phas
### Controller can't create webhooks
Check that the provider workspace's APIBinding has accepted the
-permissionClaims (Step 5b). Without acceptance, the VW rejects write
-operations:
+`validatingwebhookconfigurations` permissionClaim (Step 6b). Without
+acceptance, the VW rejects write operations:
```sh
kubectl ws root:network-provider
@@ -578,7 +846,7 @@ kubectl get apibinding dependencies.opendefense.cloud -o yaml
### Force-deleting a protected resource
-If the webhook is down or caches are stale, annotate the resource to bypass
+If the webhook is down or rules are stale, annotate the resource to bypass
protection:
```sh
diff --git a/internal/controller/dependencyrule_controller.go b/internal/controller/dependencyrule_controller.go
index 41699fa..8e948ed 100644
--- a/internal/controller/dependencyrule_controller.go
+++ b/internal/controller/dependencyrule_controller.go
@@ -10,7 +10,6 @@ import (
"sync"
"github.com/kcp-dev/logicalcluster/v3"
- apisv1alpha1 "github.com/kcp-dev/sdk/apis/apis/v1alpha1"
tenancyv1alpha1 "github.com/kcp-dev/sdk/apis/tenancy/v1alpha1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/rest"
@@ -35,18 +34,13 @@ type DependencyRuleReconciler struct {
// workspaces. Nil if webhook installation is not configured.
WebhookInstaller *WebhookInstaller
- // APIExportName is the name of the dep-ctrl APIExport, used to discover
- // the virtual workspace URL from the APIExportEndpointSlice.
- APIExportName string
-
- // BaseConfig is the root kcp REST config (no workspace path suffix).
- // Used to resolve workspace paths to logical cluster names by listing
- // Workspace objects in the root workspace.
+ // BaseConfig is the kcp front-proxy REST config (no workspace path suffix).
+ // Used for webhook installation (routed to the correct shard per workspace)
+ // and to resolve workspace paths to logical cluster names.
BaseConfig *rest.Config
- // mu protects lazy initialization of vwBaseConfig and wsResolver.
+ // mu protects lazy initialization of wsResolver.
mu sync.Mutex
- vwBaseCfg *rest.Config
wsResolver *workspaceResolver
}
@@ -56,44 +50,23 @@ func NewDependencyRuleReconciler(depCtrlMgr mcmanager.Manager) *DependencyRuleRe
}
}
-// ensureInitialized lazily discovers the VW base URL and creates a workspace
-// resolver. Must be called before using WebhookInstaller.
-func (r *DependencyRuleReconciler) ensureInitialized(ctx context.Context) error {
+// ensureInitialized lazily creates the workspace resolver used to map
+// workspace paths to logical cluster names for webhook installation.
+func (r *DependencyRuleReconciler) ensureInitialized() error {
r.mu.Lock()
defer r.mu.Unlock()
- if r.vwBaseCfg != nil {
+ if r.wsResolver != nil {
return nil
}
localMgr := r.DepCtrlManager.GetLocalManager()
- localCfg := localMgr.GetConfig()
-
- directClient, err := client.New(localCfg, client.Options{Scheme: localMgr.GetScheme()})
- if err != nil {
- return fmt.Errorf("creating client for VW URL discovery: %w", err)
- }
-
- var ess apisv1alpha1.APIExportEndpointSlice
- if err := directClient.Get(ctx, client.ObjectKey{Name: r.APIExportName}, &ess); err != nil {
- return fmt.Errorf("getting APIExportEndpointSlice %s: %w", r.APIExportName, err)
- }
-
- if len(ess.Status.APIExportEndpoints) == 0 {
- return fmt.Errorf("APIExportEndpointSlice %s has no endpoints", r.APIExportName)
- }
- vwURL := ess.Status.APIExportEndpoints[0].URL
- r.vwBaseCfg = rest.CopyConfig(localCfg)
- r.vwBaseCfg.Host = vwURL
-
- // Create workspace resolver using the base kcp config.
+ // Create workspace resolver using the base kcp config (front-proxy URL).
rootCfg := rest.CopyConfig(r.BaseConfig)
rootCfg.Host += logicalcluster.NewPath("root").RequestPath()
r.wsResolver = &workspaceResolver{rootCfg: rootCfg, scheme: localMgr.GetScheme()}
- log.FromContext(ctx).Info("discovered VW base URL for webhook operations", "url", vwURL)
-
return nil
}
@@ -102,8 +75,8 @@ func (r *DependencyRuleReconciler) ensureInitialized(ctx context.Context) error
func (r *DependencyRuleReconciler) Reconcile(ctx context.Context, req mcreconcile.Request) (ctrl.Result, error) {
logger := log.FromContext(ctx).WithValues("rule", req.Name, "cluster", req.ClusterName)
- if err := r.ensureInitialized(ctx); err != nil {
- logger.Error(err, "failed to initialize VW config")
+ if err := r.ensureInitialized(); err != nil {
+ logger.Error(err, "failed to initialize workspace resolver")
return ctrl.Result{}, err
}
@@ -142,22 +115,25 @@ func (r *DependencyRuleReconciler) Reconcile(ctx context.Context, req mcreconcil
return ctrl.Result{}, nil
}
-// ensureWebhooks installs webhooks in dependency provider workspaces via the VW.
+// ensureWebhooks installs webhooks in dependency provider workspaces via the virtual workspace.
func (r *DependencyRuleReconciler) ensureWebhooks(ctx context.Context, ruleKey string, rule *v1alpha1.DependencyRule) error {
- // Replace workspace paths with logical cluster names for the installer.
- resolvedRule := rule.DeepCopy()
- for i := range resolvedRule.Spec.Dependencies {
- wsPath := resolvedRule.Spec.Dependencies[i].APIExportRef.Path
+ // Build a mapping from workspace path to logical cluster name.
+ clusterNames := make(map[string]string, len(rule.Spec.Dependencies))
+ for _, dep := range rule.Spec.Dependencies {
+ wsPath := dep.APIExportRef.Path
+ if _, ok := clusterNames[wsPath]; ok {
+ continue
+ }
+
clusterName, err := r.wsResolver.resolve(wsPath)
if err != nil {
return fmt.Errorf("resolving %s: %w", wsPath, err)
}
- resolvedRule.Spec.Dependencies[i].APIExportRef.Path = clusterName
- }
- r.WebhookInstaller.BaseConfig = r.vwBaseCfg
+ clusterNames[wsPath] = clusterName
+ }
- return r.WebhookInstaller.EnsureWebhooks(ctx, ruleKey, resolvedRule)
+ return r.WebhookInstaller.EnsureWebhooks(ctx, ruleKey, rule, clusterNames)
}
// collectWorkspacePaths extracts dependency workspace paths referenced in a DependencyRule.
diff --git a/internal/controller/integration_test.go b/internal/controller/integration_test.go
index f52f6e6..8d6531b 100644
--- a/internal/controller/integration_test.go
+++ b/internal/controller/integration_test.go
@@ -194,7 +194,6 @@ var _ = Describe("Dependency Controller", Ordered, func() {
cacheMgr := &depwebhook.RuleCacheManager{
DepCtrlManager: mgr,
- BaseConfig: kcpConfig,
Scheme: scheme.Scheme,
APIExportName: "dependencies.opendefense.cloud",
Registry: registry,
@@ -219,10 +218,9 @@ var _ = Describe("Dependency Controller", Ordered, func() {
Expect(err).NotTo(HaveOccurred())
// Create the controller-side reconciler (webhook install only, no RBAC in envtest).
- webhookInstaller := controller.NewWebhookInstaller(kcpConfig, webhookURL, caBundle)
+ webhookInstaller := controller.NewWebhookInstaller(mgr, webhookURL, caBundle)
ruleReconciler := controller.NewDependencyRuleReconciler(mgr)
- ruleReconciler.APIExportName = "dependencies.opendefense.cloud"
ruleReconciler.BaseConfig = kcpConfig
ruleReconciler.WebhookInstaller = webhookInstaller
@@ -233,7 +231,7 @@ var _ = Describe("Dependency Controller", Ordered, func() {
Expect(err).NotTo(HaveOccurred())
// Wire up the webhook handler with the rule registry.
- webhookHandler = &depwebhook.DeletionValidator{Registry: registry, Initialized: initialized}
+ webhookHandler = &depwebhook.DeletionValidator{Registry: registry, Initialized: initialized, BaseConfig: kcpConfig}
// Start the manager.
go func() {
diff --git a/internal/controller/webhook_installer.go b/internal/controller/webhook_installer.go
index c8f2dc3..cd38551 100644
--- a/internal/controller/webhook_installer.go
+++ b/internal/controller/webhook_installer.go
@@ -8,15 +8,13 @@ import (
"fmt"
"sync"
- "github.com/kcp-dev/logicalcluster/v3"
registrationv1 "k8s.io/api/admissionregistration/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
- "k8s.io/client-go/kubernetes/scheme"
- "k8s.io/client-go/rest"
- "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
+ mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager"
+ "sigs.k8s.io/multicluster-runtime/pkg/multicluster"
v1alpha1 "go.opendefense.cloud/dependency-controller/api/v1alpha1"
)
@@ -35,7 +33,11 @@ const webhookName = "dependency-controller"
// When a DependencyRule is deleted, its contributions are removed. If no rules
// remain for a workspace the webhook is deleted entirely.
type WebhookInstaller struct {
- BaseConfig *rest.Config
+ // Manager is the multicluster manager for the dep-ctrl APIExport. Used to
+ // obtain per-workspace clients via the virtual workspace, which scopes
+ // access to workspaces that have bound the APIExport and accepted the
+ // validatingwebhookconfigurations permissionClaim.
+ Manager mcmanager.Manager
WebhookURL string
CABundle []byte
@@ -56,9 +58,9 @@ type webhookRuleKey struct {
Resource string
}
-func NewWebhookInstaller(baseCfg *rest.Config, webhookURL string, caBundle []byte) *WebhookInstaller {
+func NewWebhookInstaller(mgr mcmanager.Manager, webhookURL string, caBundle []byte) *WebhookInstaller {
return &WebhookInstaller{
- BaseConfig: baseCfg,
+ Manager: mgr,
WebhookURL: webhookURL,
CABundle: caBundle,
ruleTargets: make(map[string][]ruleTarget),
@@ -66,17 +68,19 @@ func NewWebhookInstaller(baseCfg *rest.Config, webhookURL string, caBundle []byt
}
// EnsureWebhooks installs or updates ValidatingWebhookConfigurations for all
-// dependency targets in the given rule.
-func (w *WebhookInstaller) EnsureWebhooks(ctx context.Context, ruleKey string, rule *v1alpha1.DependencyRule) error {
- // Group targets by provider workspace so we do one update per workspace.
- byWorkspace := make(map[string][]v1alpha1.DependencyTarget)
+// dependency targets in the given rule. The clusterNames map translates
+// workspace paths (from APIExportRef.Path) to logical cluster names used
+// to connect via the virtual workspace.
+func (w *WebhookInstaller) EnsureWebhooks(ctx context.Context, ruleKey string, rule *v1alpha1.DependencyRule, clusterNames map[string]string) error {
+ // Group targets by logical cluster name so we do one update per workspace.
+ byCluster := make(map[string][]v1alpha1.DependencyTarget)
for _, dep := range rule.Spec.Dependencies {
- wsPath := dep.APIExportRef.Path
- byWorkspace[wsPath] = append(byWorkspace[wsPath], dep)
+ clusterName := clusterNames[dep.APIExportRef.Path]
+ byCluster[clusterName] = append(byCluster[clusterName], dep)
}
- for wsPath, deps := range byWorkspace {
- if err := w.ensureWebhookForWorkspace(ctx, ruleKey, wsPath, deps); err != nil {
+ for clusterName, deps := range byCluster {
+ if err := w.ensureWebhookForWorkspace(ctx, ruleKey, clusterName, deps); err != nil {
return err
}
}
@@ -151,13 +155,11 @@ func (w *WebhookInstaller) reconcileWorkspaceWebhook(ctx context.Context, wsPath
// Compute desired rules for this workspace across all DependencyRules.
desired := w.desiredRulesForWorkspace(wsPath)
- cfg := rest.CopyConfig(w.BaseConfig)
- cfg.Host += logicalcluster.NewPath(wsPath).RequestPath()
-
- c, err := client.New(cfg, client.Options{Scheme: scheme.Scheme})
+ cl, err := w.Manager.GetCluster(ctx, multicluster.ClusterName(wsPath))
if err != nil {
- return fmt.Errorf("creating client for %s: %w", wsPath, err)
+ return fmt.Errorf("getting cluster %s from manager: %w", wsPath, err)
}
+ c := cl.GetClient()
whCfg := ®istrationv1.ValidatingWebhookConfiguration{}
err = c.Get(ctx, types.NamespacedName{Name: webhookName}, whCfg)
diff --git a/internal/kcp/endpointslice.go b/internal/kcp/endpointslice.go
new file mode 100644
index 0000000..b833d36
--- /dev/null
+++ b/internal/kcp/endpointslice.go
@@ -0,0 +1,31 @@
+// Copyright 2026 BWI GmbH and Dependency Controller contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package kcp
+
+import (
+ "context"
+ "fmt"
+
+ apisv1alpha1 "github.com/kcp-dev/sdk/apis/apis/v1alpha1"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+// FindEndpointSlice finds the APIExportEndpointSlice whose spec.export.name
+// matches the given APIExport name. The slice name is not guaranteed to match
+// the APIExport name — service providers can create custom slices with
+// different names.
+func FindEndpointSlice(ctx context.Context, c client.Reader, apiExportName string) (*apisv1alpha1.APIExportEndpointSlice, error) {
+ var list apisv1alpha1.APIExportEndpointSliceList
+ if err := c.List(ctx, &list); err != nil {
+ return nil, fmt.Errorf("listing APIExportEndpointSlices: %w", err)
+ }
+
+ for i := range list.Items {
+ if list.Items[i].Spec.APIExport.Name == apiExportName {
+ return &list.Items[i], nil
+ }
+ }
+
+ return nil, fmt.Errorf("no APIExportEndpointSlice found for APIExport %q", apiExportName)
+}
diff --git a/internal/kcp/kubeconfig.go b/internal/kcp/kubeconfig.go
new file mode 100644
index 0000000..932d354
--- /dev/null
+++ b/internal/kcp/kubeconfig.go
@@ -0,0 +1,45 @@
+// Copyright 2026 BWI GmbH and Dependency Controller contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package kcp
+
+import (
+ "errors"
+ "strings"
+
+ "k8s.io/client-go/rest"
+)
+
+// ValidateKubeconfig checks that the given rest.Config points to a kcp
+// workspace (i.e. the host URL contains /clusters/). This catches the most
+// common misconfiguration — pointing at a plain Kubernetes cluster or at kcp
+// without a workspace path.
+func ValidateKubeconfig(cfg *rest.Config) error {
+ if cfg == nil {
+ return errors.New("kubeconfig is nil")
+ }
+
+ if !strings.Contains(cfg.Host, "/clusters/") {
+ return errors.New("kubeconfig host does not contain a /clusters/ workspace path — ensure the kubeconfig points to a kcp workspace (e.g. https://host/clusters/root:my-workspace)")
+ }
+
+ return nil
+}
+
+// BaseConfig returns a copy of the given rest.Config with the /clusters/...
+// workspace path stripped from the host URL, leaving only the front-proxy
+// base URL. This base URL can be used to construct workspace-scoped clients
+// by appending /clusters/.
+//
+// Returns an error if the host URL does not contain a /clusters/ path.
+func BaseConfig(cfg *rest.Config) (*rest.Config, error) {
+ idx := strings.Index(cfg.Host, "/clusters/")
+ if idx == -1 {
+ return nil, errors.New("kubeconfig host does not contain a /clusters/ workspace path — cannot derive front-proxy base URL")
+ }
+
+ baseCfg := rest.CopyConfig(cfg)
+ baseCfg.Host = cfg.Host[:idx]
+
+ return baseCfg, nil
+}
diff --git a/internal/kcp/kubeconfig_test.go b/internal/kcp/kubeconfig_test.go
new file mode 100644
index 0000000..87d8891
--- /dev/null
+++ b/internal/kcp/kubeconfig_test.go
@@ -0,0 +1,107 @@
+// Copyright 2026 BWI GmbH and Dependency Controller contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package kcp
+
+import (
+ "testing"
+
+ "k8s.io/client-go/rest"
+)
+
+func TestValidateKubeconfig(t *testing.T) {
+ tests := []struct {
+ name string
+ cfg *rest.Config
+ wantErr bool
+ }{
+ {
+ name: "nil config",
+ cfg: nil,
+ wantErr: true,
+ },
+ {
+ name: "plain kubernetes URL",
+ cfg: &rest.Config{Host: "https://localhost:6443"},
+ wantErr: true,
+ },
+ {
+ name: "kcp URL without workspace path",
+ cfg: &rest.Config{Host: "https://kcp.example.com"},
+ wantErr: true,
+ },
+ {
+ name: "valid kcp workspace URL",
+ cfg: &rest.Config{Host: "https://kcp.example.com/clusters/root:dep-ctrl"},
+ wantErr: false,
+ },
+ {
+ name: "valid kcp root URL",
+ cfg: &rest.Config{Host: "https://kcp.example.com/clusters/root"},
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ err := ValidateKubeconfig(tt.cfg)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("ValidateKubeconfig() error = %v, wantErr %v", err, tt.wantErr)
+ }
+ })
+ }
+}
+
+func TestBaseConfig(t *testing.T) {
+ tests := []struct {
+ name string
+ host string
+ wantHost string
+ wantErr bool
+ }{
+ {
+ name: "no clusters path",
+ host: "https://localhost:6443",
+ wantErr: true,
+ },
+ {
+ name: "strips workspace path",
+ host: "https://kcp.example.com/clusters/root:dep-ctrl",
+ wantHost: "https://kcp.example.com",
+ wantErr: false,
+ },
+ {
+ name: "strips root path",
+ host: "https://kcp.example.com/clusters/root",
+ wantHost: "https://kcp.example.com",
+ wantErr: false,
+ },
+ {
+ name: "preserves port",
+ host: "https://localhost:31443/clusters/root:dep-ctrl",
+ wantHost: "https://localhost:31443",
+ wantErr: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ cfg := &rest.Config{Host: tt.host}
+ baseCfg, err := BaseConfig(cfg)
+ if (err != nil) != tt.wantErr {
+ t.Errorf("BaseConfig() error = %v, wantErr %v", err, tt.wantErr)
+ return
+ }
+ if err != nil {
+ return
+ }
+ if baseCfg.Host != tt.wantHost {
+ t.Errorf("BaseConfig() host = %q, want %q", baseCfg.Host, tt.wantHost)
+ }
+ // Verify original config is not mutated.
+ if cfg.Host != tt.host {
+ t.Errorf("original config mutated: host = %q, want %q", cfg.Host, tt.host)
+ }
+ })
+ }
+}
diff --git a/internal/webhook/deletion_validator.go b/internal/webhook/deletion_validator.go
index c553d14..1f7776a 100644
--- a/internal/webhook/deletion_validator.go
+++ b/internal/webhook/deletion_validator.go
@@ -11,12 +11,16 @@ import (
"strings"
"github.com/kcp-dev/logicalcluster/v3"
+ "k8s.io/apimachinery/pkg/api/errors"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
- "sigs.k8s.io/controller-runtime/pkg/client"
+ "k8s.io/client-go/dynamic"
+ "k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
- "sigs.k8s.io/multicluster-runtime/pkg/multicluster"
+
+ "go.opendefense.cloud/dependency-controller/internal/fieldpath"
)
const (
@@ -41,8 +45,9 @@ func ReadyzCheck(initialized <-chan struct{}) func(*http.Request) error {
// DeletionValidator is a validating admission webhook handler that blocks
// deletion of resources that have active dependents referencing them.
//
-// It queries indexed caches maintained by the RuleCacheManager's per-rule
-// multicluster managers. No Dependency marker objects are needed.
+// On each DELETE request it constructs a temporary dynamic client scoped to
+// the consumer workspace (via the front-proxy) and lists dependent resources
+// directly, filtering by field path to find references to the deleted resource.
type DeletionValidator struct {
Registry *RuleRegistry
@@ -50,6 +55,11 @@ type DeletionValidator struct {
// all existing DependencyRules. Until then, DELETE requests are denied
// to prevent deletions slipping through before the registry is ready.
Initialized <-chan struct{}
+
+ // BaseConfig is the front-proxy REST config without a workspace path.
+ // Per-request clients are created by copying this config and setting
+ // Host to {base}/clusters/{logicalClusterName}.
+ BaseConfig *rest.Config
}
func (v *DeletionValidator) Handle(ctx context.Context, req admission.Request) admission.Response {
@@ -100,40 +110,23 @@ func (v *DeletionValidator) Handle(ctx context.Context, req admission.Request) a
return admission.Allowed("")
}
- // Query each matching rule's indexed cache for dependents referencing this resource.
+ // Build a dynamic client scoped to the consumer workspace.
+ dynClient, err := v.workspaceClient(clusterName.String())
+ if err != nil {
+ logger.Error(err, "failed to create workspace client")
+ return admission.Errored(http.StatusInternalServerError, fmt.Errorf("creating workspace client: %w", err))
+ }
+
+ // Query each matching rule's dependent resources in the consumer workspace.
var blockers []string
for _, entry := range entries {
- if !entry.State.IsReady() {
- msg := fmt.Sprintf("dependency check unavailable for rule %s: cache warming up, retry later", entry.Key)
- logger.Info(msg)
-
- return admission.Denied(msg)
- }
-
- cluster, err := entry.State.Manager.GetCluster(ctx, multicluster.ClusterName(clusterName.String()))
+ dependents, err := v.listDependents(ctx, dynClient, entry, req.Name, req.Namespace)
if err != nil {
- // Cluster not known to this rule's manager — no dependents here.
- logger.V(1).Info("cluster not found in rule manager, skipping", "rule", entry.Key, "cluster", clusterName)
- continue
+ logger.Error(err, "failed to query dependents", "rule", entry.Key)
+ return admission.Errored(http.StatusInternalServerError, fmt.Errorf("checking dependencies for rule %s: %w", entry.Key, err))
}
- // Query the field index for dependents referencing the deleted resource name.
- // Scoped to the same namespace — cross-namespace references are not supported.
- list := &unstructured.UnstructuredList{}
- list.SetGroupVersionKind(entry.State.DependentGVK.GroupVersion().WithKind(entry.State.DependentGVK.Kind + "List"))
-
- err = cluster.GetCache().List(ctx, list,
- client.MatchingFields{entry.MatchedField.FieldPath: req.Name},
- client.InNamespace(req.Namespace),
- )
- if err != nil {
- logger.Error(err, "failed to query indexed cache", "rule", entry.Key)
- return admission.Errored(http.StatusInternalServerError, fmt.Errorf("failed to check dependencies for rule %s: %w", entry.Key, err))
- }
-
- for _, item := range list.Items {
- blockers = append(blockers, fmt.Sprintf("%s/%s", entry.State.DependentGVK.Kind, item.GetName()))
- }
+ blockers = append(blockers, dependents...)
}
if len(blockers) > 0 {
@@ -147,6 +140,56 @@ func (v *DeletionValidator) Handle(ctx context.Context, req admission.Request) a
return admission.Allowed("")
}
+// workspaceClient creates a dynamic client targeting a specific workspace
+// through the front-proxy.
+func (v *DeletionValidator) workspaceClient(clusterName string) (dynamic.Interface, error) {
+ cfg := rest.CopyConfig(v.BaseConfig)
+ cfg.Host = fmt.Sprintf("%s/clusters/%s", strings.TrimRight(cfg.Host, "/"), clusterName)
+
+ return dynamic.NewForConfig(cfg)
+}
+
+// listDependents lists dependent resources in the consumer workspace and
+// returns the names of those that reference the deleted resource via the
+// rule's field path.
+func (v *DeletionValidator) listDependents(
+ ctx context.Context,
+ dynClient dynamic.Interface,
+ entry RuleEntry,
+ targetName, namespace string,
+) ([]string, error) {
+ gvr := entry.State.DependentGVR
+
+ var res dynamic.ResourceInterface
+ if namespace != "" {
+ res = dynClient.Resource(gvr).Namespace(namespace)
+ } else {
+ res = dynClient.Resource(gvr)
+ }
+
+ list, err := res.List(ctx, metav1.ListOptions{})
+ if err != nil {
+ // If the dependent resource type doesn't exist in this workspace
+ // (e.g., consumer hasn't bound the provider APIExport), there are
+ // no dependents — allow deletion.
+ if errors.IsNotFound(err) {
+ return nil, nil
+ }
+
+ return nil, fmt.Errorf("listing %s: %w", gvr, err)
+ }
+
+ var blockers []string
+ for _, item := range list.Items {
+ val := fieldpath.Resolve(item.Object, entry.MatchedField.FieldPath)
+ if val == targetName {
+ blockers = append(blockers, fmt.Sprintf("%s/%s", entry.State.DependentGVK.Kind, item.GetName()))
+ }
+ }
+
+ return blockers, nil
+}
+
// objectFromRequest extracts the unstructured object from the admission
// request's OldObject (for DELETE) or Object (for other operations).
func objectFromRequest(req admission.Request) (*unstructured.Unstructured, error) {
diff --git a/internal/webhook/rule_cache_manager.go b/internal/webhook/rule_cache_manager.go
index 65327c6..100cb22 100644
--- a/internal/webhook/rule_cache_manager.go
+++ b/internal/webhook/rule_cache_manager.go
@@ -8,39 +8,28 @@ import (
"fmt"
"github.com/kcp-dev/logicalcluster/v3"
- "github.com/kcp-dev/multicluster-provider/apiexport"
- apisv1alpha1 "github.com/kcp-dev/sdk/apis/apis/v1alpha1"
- "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/rest"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
- "sigs.k8s.io/controller-runtime/pkg/manager"
- metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
- mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder"
- mccontroller "sigs.k8s.io/multicluster-runtime/pkg/controller"
mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager"
mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile"
v1alpha1 "go.opendefense.cloud/dependency-controller/api/v1alpha1"
- "go.opendefense.cloud/dependency-controller/internal/fieldpath"
+ "go.opendefense.cloud/dependency-controller/internal/kcp"
)
-// RuleCacheManager reconciles DependencyRule objects and manages a dedicated
-// indexed cache per rule. Each rule's dependent resource type (e.g., VirtualMachine)
-// is watched through a multicluster manager connected to the provider's APIExport
-// virtual workspace. Field indices on the dependent resources allow the
-// DeletionValidator to efficiently query "which dependents reference resource X?".
+// RuleCacheManager reconciles DependencyRule objects and populates the
+// RuleRegistry with rule metadata. The DeletionValidator uses the registry
+// to look up which rules protect a given resource type, then queries
+// dependent resources directly per admission request.
type RuleCacheManager struct {
// DepCtrlManager is the multicluster manager for the dep-ctrl's APIExport.
DepCtrlManager mcmanager.Manager
- // BaseConfig is the root kcp REST config (no workspace path suffix).
- BaseConfig *rest.Config
-
- // Scheme is the runtime scheme used when creating per-rule managers.
+ // Scheme is the runtime scheme used when creating clients.
Scheme *runtime.Scheme
// APIExportName is the name of the dep-ctrl APIExport (and its
@@ -48,48 +37,49 @@ type RuleCacheManager struct {
// during initial registry population.
APIExportName string
- // Registry holds the per-rule cache state queried by the DeletionValidator.
+ // Registry holds the rule metadata queried by the DeletionValidator.
Registry *RuleRegistry
}
-// PopulateRegistry lists all existing DependencyRules from the APIExport virtual
-// workspace and ensures an indexed cache exists for each one. This must be called
-// after the manager has started (e.g., from a manager.Runnable) so that the
-// APIExportEndpointSlice is available.
+// PopulateRegistry lists all existing DependencyRules from every shard's
+// APIExport virtual workspace and registers their metadata in the registry.
+// This must be called after the manager has started (e.g., from a
+// manager.Runnable) so that the APIExportEndpointSlice is available.
func (m *RuleCacheManager) PopulateRegistry(ctx context.Context) error {
logger := log.FromContext(ctx).WithName("rule-cache-manager")
- vwClient, err := m.virtualWorkspaceClient(ctx)
+ vwClients, err := m.virtualWorkspaceClients(ctx)
if err != nil {
- return fmt.Errorf("creating virtual workspace client: %w", err)
+ return fmt.Errorf("creating virtual workspace clients: %w", err)
}
- var ruleList v1alpha1.DependencyRuleList
- if err := vwClient.List(ctx, &ruleList); err != nil {
- return fmt.Errorf("listing initial DependencyRules: %w", err)
- }
+ var totalRules int
+ for _, vwClient := range vwClients {
+ var ruleList v1alpha1.DependencyRuleList
+ if err := vwClient.List(ctx, &ruleList); err != nil {
+ return fmt.Errorf("listing initial DependencyRules: %w", err)
+ }
- logger.Info("populating rule registry", "ruleCount", len(ruleList.Items))
+ totalRules += len(ruleList.Items)
- for i := range ruleList.Items {
- rule := &ruleList.Items[i]
- clusterName := logicalcluster.From(rule)
- key := ruleStateKey(clusterName.String(), rule.Name)
- if err := m.ensureCache(ctx, key, clusterName.String(), rule); err != nil {
- return fmt.Errorf("populating rule %s/%s: %w", clusterName, rule.Name, err)
+ for i := range ruleList.Items {
+ rule := &ruleList.Items[i]
+ clusterName := logicalcluster.From(rule)
+ key := ruleStateKey(clusterName.String(), rule.Name)
+ m.registerRule(key, rule)
}
}
- logger.Info("rule registry populated")
+ logger.Info("rule registry populated", "ruleCount", totalRules, "shards", len(vwClients))
return nil
}
-// virtualWorkspaceClient reads the APIExportEndpointSlice from the dep-ctrl
-// workspace to discover the virtual workspace URL, then returns a client
-// pointing at {vwURL}/clusters/* so it can list resources across all bound
-// workspaces.
-func (m *RuleCacheManager) virtualWorkspaceClient(ctx context.Context) (client.Client, error) {
+// virtualWorkspaceClients reads the APIExportEndpointSlice from the dep-ctrl
+// workspace to discover the virtual workspace URLs (one per kcp shard), then
+// returns a client per shard pointing at {vwURL}/clusters/* so it can list
+// resources across all bound workspaces on that shard.
+func (m *RuleCacheManager) virtualWorkspaceClients(ctx context.Context) ([]client.Client, error) {
// Use a direct (non-cached) client to read the APIExportEndpointSlice
// from the dep-ctrl workspace.
localCfg := m.DepCtrlManager.GetLocalManager().GetConfig()
@@ -98,32 +88,34 @@ func (m *RuleCacheManager) virtualWorkspaceClient(ctx context.Context) (client.C
return nil, fmt.Errorf("creating direct client: %w", err)
}
- var ess apisv1alpha1.APIExportEndpointSlice
- if err := directClient.Get(ctx, client.ObjectKey{Name: m.APIExportName}, &ess); err != nil {
- return nil, fmt.Errorf("getting APIExportEndpointSlice %s: %w", m.APIExportName, err)
+ ess, err := kcp.FindEndpointSlice(ctx, directClient, m.APIExportName)
+ if err != nil {
+ return nil, fmt.Errorf("resolving APIExportEndpointSlice for %s: %w", m.APIExportName, err)
}
if len(ess.Status.APIExportEndpoints) == 0 {
- return nil, fmt.Errorf("APIExportEndpointSlice %s has no endpoints", m.APIExportName)
+ return nil, fmt.Errorf("APIExportEndpointSlice %s has no endpoints", ess.Name)
}
- vwURL := ess.Status.APIExportEndpoints[0].URL
+ // Create a client for each shard's virtual workspace URL.
+ var clients []client.Client
+ for _, ep := range ess.Status.APIExportEndpoints {
+ vwCfg := rest.CopyConfig(localCfg)
+ vwCfg.Host = ep.URL + "/clusters/*"
- // Create a client for the wildcard cluster path to list across all
- // logical clusters visible through the virtual workspace.
- vwCfg := rest.CopyConfig(localCfg)
- vwCfg.Host = vwURL + "/clusters/*"
+ vwClient, err := client.New(vwCfg, client.Options{Scheme: m.Scheme})
+ if err != nil {
+ return nil, fmt.Errorf("creating VW client for %s: %w", ep.URL, err)
+ }
- vwClient, err := client.New(vwCfg, client.Options{Scheme: m.Scheme})
- if err != nil {
- return nil, fmt.Errorf("creating VW client: %w", err)
+ clients = append(clients, vwClient)
}
- return vwClient, nil
+ return clients, nil
}
-// Reconcile handles DependencyRule events. On creation/update it ensures an
-// indexed cache exists for the rule. On deletion it tears down the cache.
+// Reconcile handles DependencyRule events. On creation/update it registers
+// the rule metadata. On deletion it removes it from the registry.
func (m *RuleCacheManager) Reconcile(ctx context.Context, req mcreconcile.Request) (ctrl.Result, error) {
logger := log.FromContext(ctx).WithValues("rule", req.Name, "cluster", req.ClusterName)
@@ -139,7 +131,7 @@ func (m *RuleCacheManager) Reconcile(ctx context.Context, req mcreconcile.Reques
var rule v1alpha1.DependencyRule
if err := cl.GetClient().Get(ctx, client.ObjectKey{Name: req.Name}, &rule); err != nil {
if client.IgnoreNotFound(err) == nil {
- logger.Info("DependencyRule deleted, tearing down cache")
+ logger.Info("DependencyRule deleted, removing from registry")
m.Registry.Unregister(key)
return ctrl.Result{}, nil
@@ -148,143 +140,44 @@ func (m *RuleCacheManager) Reconcile(ctx context.Context, req mcreconcile.Reques
return ctrl.Result{}, err
}
- if err := m.ensureCache(ctx, key, clusterName, &rule); err != nil {
- logger.Error(err, "failed to ensure cache for rule")
- return ctrl.Result{}, err
- }
+ m.registerRule(key, &rule)
return ctrl.Result{}, nil
}
-// ensureCache creates a multicluster manager for the rule's dependent resource
-// type, registers field indices for each dependency target, and stores the
-// resulting cache state in the registry. If a cache already exists for the
-// given key this is a no-op.
-func (m *RuleCacheManager) ensureCache(ctx context.Context, key string, clusterName string, rule *v1alpha1.DependencyRule) error {
- if m.Registry.Exists(key) {
- return nil
- }
-
+// registerRule extracts metadata from a DependencyRule and stores it in the
+// registry.
+func (m *RuleCacheManager) registerRule(key string, rule *v1alpha1.DependencyRule) {
dep := rule.Spec.Dependent
-
- mgr, mgrCancel, err := m.startProviderManager(ctx, clusterName, dep.APIExportName)
- if err != nil {
- return fmt.Errorf("creating manager for rule %s: %w", rule.Name, err)
- }
-
gvk := schema.GroupVersionKind{
Group: dep.Group,
Version: dep.Version,
Kind: dep.Kind,
}
+ gvr := schema.GroupVersionResource{
+ Group: dep.Group,
+ Version: dep.Version,
+ Resource: dep.Resource,
+ }
- // Build indexed fields for each dependency target.
var indexFields []IndexedField
- watchObj := &unstructured.Unstructured{}
- watchObj.SetGroupVersionKind(gvk)
-
for _, depTarget := range rule.Spec.Dependencies {
- fieldPath := depTarget.FieldRef.Path
- targetGVR := schema.GroupVersionResource{
- Group: depTarget.Group,
- Version: depTarget.Version,
- Resource: depTarget.Resource,
- }
-
- // Register an index on the dependent resource's field path.
- if err := mgr.GetFieldIndexer().IndexField(ctx, watchObj, fieldPath, func(obj client.Object) []string {
- u, ok := obj.(*unstructured.Unstructured)
- if !ok {
- return nil
- }
- val := fieldpath.Resolve(u.Object, fieldPath)
- if val == "" {
- return nil
- }
-
- return []string{val}
- }); err != nil {
- mgrCancel()
- return fmt.Errorf("indexing field %s for rule %s: %w", fieldPath, rule.Name, err)
- }
-
indexFields = append(indexFields, IndexedField{
- FieldPath: fieldPath,
- TargetGVR: targetGVR,
+ FieldPath: depTarget.FieldRef.Path,
+ TargetGVR: schema.GroupVersionResource{
+ Group: depTarget.Group,
+ Version: depTarget.Version,
+ Resource: depTarget.Resource,
+ },
})
}
- // Register a controller to activate the informer and track discovered clusters.
- registry := m.Registry
- ruleKey := key
- if err := mcbuilder.ControllerManagedBy(mgr).
- Named(fmt.Sprintf("dep-index-%s", key)).
- WithOptions(mccontroller.Options{SkipNameValidation: new(true)}).
- For(watchObj).
- Complete(mcreconcile.Func(func(ctx context.Context, req mcreconcile.Request) (ctrl.Result, error) {
- registry.TrackCluster(ruleKey, string(req.ClusterName))
- if state := registry.Get(ruleKey); state != nil && !state.IsReady() {
- registry.MarkReady(ruleKey)
- }
-
- return ctrl.Result{}, nil
- })); err != nil {
- mgrCancel()
- return fmt.Errorf("registering index controller for rule %s: %w", rule.Name, err)
- }
-
- state := &RuleState{
- Manager: mgr,
- Cancel: mgrCancel,
+ m.Registry.Register(key, &RuleState{
Rule: rule.Spec,
DependentGVK: gvk,
+ DependentGVR: gvr,
IndexFields: indexFields,
- }
-
- // Register atomically; if a concurrent reconcile raced us, cancel the
- // duplicate manager we just created.
- if old := m.Registry.Register(key, state); old != nil {
- old.Cancel()
- }
-
- return nil
-}
-
-// startProviderManager creates a new multicluster manager backed by the given
-// APIExport's virtual workspace. The manager is started in a background
-// goroutine and the returned cancel function tears it down.
-func (m *RuleCacheManager) startProviderManager(ctx context.Context, clusterName string, apiExportName string) (mcmanager.Manager, context.CancelFunc, error) {
- logger := log.FromContext(ctx).WithValues("apiExport", apiExportName, "cluster", clusterName)
-
- cfg := rest.CopyConfig(m.BaseConfig)
- cfg.Host += logicalcluster.NewPath(clusterName).RequestPath()
-
- provider, err := apiexport.New(cfg, apiExportName, apiexport.Options{
- Scheme: m.Scheme,
})
- if err != nil {
- return nil, nil, fmt.Errorf("creating apiexport provider: %w", err)
- }
-
- mgr, err := mcmanager.New(cfg, provider, manager.Options{
- Scheme: m.Scheme,
- Metrics: metricsserver.Options{BindAddress: "0"},
- HealthProbeBindAddress: "0",
- })
- if err != nil {
- return nil, nil, fmt.Errorf("creating multicluster manager: %w", err)
- }
-
- mgrCtx, cancel := context.WithCancel(ctx)
-
- go func() {
- logger.Info("starting provider manager for APIExport")
- if err := mgr.Start(mgrCtx); err != nil {
- logger.Error(err, "provider manager failed")
- }
- }()
-
- return mgr, cancel, nil
}
// ruleStateKey returns a qualified key combining the cluster and rule name.
diff --git a/internal/webhook/rule_registry.go b/internal/webhook/rule_registry.go
index 08ab27e..50128b5 100644
--- a/internal/webhook/rule_registry.go
+++ b/internal/webhook/rule_registry.go
@@ -7,32 +7,26 @@ import (
"sync"
"k8s.io/apimachinery/pkg/runtime/schema"
- mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager"
v1alpha1 "go.opendefense.cloud/dependency-controller/api/v1alpha1"
)
// RuleRegistry is a thread-safe registry of active DependencyRules and their
-// associated multicluster managers. It maintains a reverse index from dependency
-// target GVRs to rule keys, enabling the webhook to quickly find which rules
-// protect a given resource type.
+// metadata. It maintains a reverse index from dependency target GVRs to rule
+// keys, enabling the webhook to quickly find which rules protect a given
+// resource type.
type RuleRegistry struct {
mu sync.RWMutex
rules map[string]*RuleState
byTarget map[schema.GroupVersionResource][]string // GVR -> rule keys
}
-// RuleState holds the runtime state for a single DependencyRule.
+// RuleState holds the metadata for a single DependencyRule.
type RuleState struct {
- Manager mcmanager.Manager
- Cancel func()
Rule v1alpha1.DependencyRuleSpec
DependentGVK schema.GroupVersionKind
+ DependentGVR schema.GroupVersionResource
IndexFields []IndexedField
- Ready bool
-
- mu sync.Mutex
- knownClusters map[string]struct{}
}
// IndexedField maps a field path on the dependent resource to the dependency
@@ -59,7 +53,7 @@ func NewRuleRegistry() *RuleRegistry {
// Register adds a rule to the registry and rebuilds the reverse index.
// If a state was already registered under this key, it is replaced and
-// the old state is returned so the caller can clean it up.
+// the old state is returned so the caller can detect duplicates.
func (r *RuleRegistry) Register(key string, state *RuleState) *RuleState {
r.mu.Lock()
defer r.mu.Unlock()
@@ -71,21 +65,17 @@ func (r *RuleRegistry) Register(key string, state *RuleState) *RuleState {
return old
}
-// Unregister removes a rule from the registry, cancels its manager, and
-// rebuilds the reverse index.
+// Unregister removes a rule from the registry and rebuilds the reverse index.
func (r *RuleRegistry) Unregister(key string) {
r.mu.Lock()
- state, exists := r.rules[key]
- if !exists {
- r.mu.Unlock()
+ defer r.mu.Unlock()
+
+ if _, exists := r.rules[key]; !exists {
return
}
+
delete(r.rules, key)
r.rebuildTargetIndex()
- r.mu.Unlock()
-
- // Cancel outside the lock to avoid holding it during teardown.
- state.Cancel()
}
// Exists returns true if the given rule key is registered.
@@ -160,41 +150,3 @@ func (r *RuleRegistry) rebuildTargetIndex() {
}
}
}
-
-// TrackCluster records that the given rule has seen a resource in the given cluster.
-func (r *RuleRegistry) TrackCluster(key, clusterName string) {
- r.mu.RLock()
- state := r.rules[key]
- r.mu.RUnlock()
- if state == nil {
- return
- }
- state.mu.Lock()
- defer state.mu.Unlock()
- if state.knownClusters == nil {
- state.knownClusters = make(map[string]struct{})
- }
- state.knownClusters[clusterName] = struct{}{}
-}
-
-// IsReady returns true if the rule's cache is synced and ready for queries.
-func (s *RuleState) IsReady() bool {
- s.mu.Lock()
- defer s.mu.Unlock()
-
- return s.Ready
-}
-
-// MarkReady marks a rule's cache as synced and ready for queries.
-func (r *RuleRegistry) MarkReady(key string) {
- r.mu.RLock()
- state := r.rules[key]
- r.mu.RUnlock()
- if state == nil {
- return
- }
-
- state.mu.Lock()
- defer state.mu.Unlock()
- state.Ready = true
-}
diff --git a/internal/webhook/rule_registry_test.go b/internal/webhook/rule_registry_test.go
index 0f7b02d..0a9550b 100644
--- a/internal/webhook/rule_registry_test.go
+++ b/internal/webhook/rule_registry_test.go
@@ -20,15 +20,16 @@ func TestRuleRegistry_RegisterAndFind(t *testing.T) {
subnetGVR := schema.GroupVersionResource{Group: "network.test.io", Version: "v1", Resource: "subnets"}
state := &RuleState{
- Cancel: func() {},
Rule: v1alpha1.DependencyRuleSpec{
Dependent: v1alpha1.DependentRef{
- Group: "compute.test.io",
- Version: "v1",
- Kind: "VirtualMachine",
+ Group: "compute.test.io",
+ Version: "v1",
+ Kind: "VirtualMachine",
+ Resource: "virtualmachines",
},
},
DependentGVK: schema.GroupVersionKind{Group: "compute.test.io", Version: "v1", Kind: "VirtualMachine"},
+ DependentGVR: schema.GroupVersionResource{Group: "compute.test.io", Version: "v1", Resource: "virtualmachines"},
IndexFields: []IndexedField{
{FieldPath: ".spec.vpcRef.name", TargetGVR: vpcGVR},
{FieldPath: ".spec.subnetRef.name", TargetGVR: subnetGVR},
@@ -63,10 +64,8 @@ func TestRuleRegistry_Unregister(t *testing.T) {
r := NewRuleRegistry()
vpcGVR := schema.GroupVersionResource{Group: "network.test.io", Version: "v1", Resource: "vpcs"}
- cancelled := false
state := &RuleState{
- Cancel: func() { cancelled = true },
IndexFields: []IndexedField{
{FieldPath: ".spec.vpcRef.name", TargetGVR: vpcGVR},
},
@@ -75,10 +74,6 @@ func TestRuleRegistry_Unregister(t *testing.T) {
r.Register("cluster1/vm-deps", state)
r.Unregister("cluster1/vm-deps")
- if !cancelled {
- t.Error("expected cancel to be called")
- }
-
entries := r.FindByTargetGVR(vpcGVR)
if len(entries) != 0 {
t.Errorf("expected 0 entries after unregister, got %d", len(entries))
@@ -95,11 +90,9 @@ func TestRuleRegistry_MultipleRulesSameTarget(t *testing.T) {
vpcGVR := schema.GroupVersionResource{Group: "network.test.io", Version: "v1", Resource: "vpcs"}
state1 := &RuleState{
- Cancel: func() {},
IndexFields: []IndexedField{{FieldPath: ".spec.vpcRef.name", TargetGVR: vpcGVR}},
}
state2 := &RuleState{
- Cancel: func() {},
IndexFields: []IndexedField{{FieldPath: ".spec.networkRef.vpc", TargetGVR: vpcGVR}},
}
@@ -122,44 +115,6 @@ func TestRuleRegistry_MultipleRulesSameTarget(t *testing.T) {
}
}
-func TestRuleRegistry_TrackCluster(t *testing.T) {
- r := NewRuleRegistry()
-
- state := &RuleState{
- Cancel: func() {},
- }
- r.Register("cluster1/rule", state)
-
- r.TrackCluster("cluster1/rule", "consumer-ws-1")
- r.TrackCluster("cluster1/rule", "consumer-ws-2")
- r.TrackCluster("cluster1/rule", "consumer-ws-1") // duplicate
-
- state.mu.Lock()
- count := len(state.knownClusters)
- state.mu.Unlock()
-
- if count != 2 {
- t.Errorf("expected 2 known clusters, got %d", count)
- }
-}
-
-func TestRuleRegistry_MarkReady(t *testing.T) {
- r := NewRuleRegistry()
-
- state := &RuleState{Cancel: func() {}}
- r.Register("c1/rule", state)
-
- if state.IsReady() {
- t.Error("expected Ready=false initially")
- }
-
- r.MarkReady("c1/rule")
-
- if !state.IsReady() {
- t.Error("expected Ready=true after MarkReady")
- }
-}
-
func TestRuleRegistry_UnregisterNonexistent(t *testing.T) {
r := NewRuleRegistry()
// Should not panic.
@@ -173,11 +128,9 @@ func TestRuleRegistry_AllTargetGVRs(t *testing.T) {
subnetGVR := schema.GroupVersionResource{Group: "net.io", Version: "v1", Resource: "subnets"}
r.Register("c1/rule1", &RuleState{
- Cancel: func() {},
IndexFields: []IndexedField{{FieldPath: ".spec.vpcRef.name", TargetGVR: vpcGVR}},
})
r.Register("c1/rule2", &RuleState{
- Cancel: func() {},
IndexFields: []IndexedField{
{FieldPath: ".spec.vpcRef.name", TargetGVR: vpcGVR},
{FieldPath: ".spec.subnetRef.name", TargetGVR: subnetGVR},
@@ -197,18 +150,6 @@ func TestRuleRegistry_GetNil(t *testing.T) {
}
}
-func TestRuleRegistry_MarkReadyNonexistent(t *testing.T) {
- r := NewRuleRegistry()
- // Should not panic.
- r.MarkReady("nonexistent")
-}
-
-func TestRuleRegistry_TrackClusterNonexistent(t *testing.T) {
- r := NewRuleRegistry()
- // Should not panic.
- r.TrackCluster("nonexistent", "cluster1")
-}
-
func TestRuleRegistry_ConcurrentAccess(t *testing.T) {
r := NewRuleRegistry()
vpcGVR := schema.GroupVersionResource{Group: "net.io", Version: "v1", Resource: "vpcs"}
@@ -225,13 +166,12 @@ func TestRuleRegistry_ConcurrentAccess(t *testing.T) {
key := fmt.Sprintf("cluster%d/rule%d", id%5, id)
for j := range opsPerGoroutine {
- switch j % 7 {
+ switch j % 5 {
case 0:
r.Register(key, &RuleState{
- Cancel: func() {},
IndexFields: []IndexedField{{FieldPath: ".spec.ref", TargetGVR: vpcGVR}},
Rule: v1alpha1.DependencyRuleSpec{
- Dependent: v1alpha1.DependentRef{Group: "compute.io", Version: "v1", Kind: "VM"},
+ Dependent: v1alpha1.DependentRef{Group: "compute.io", Version: "v1", Kind: "VM", Resource: "vms"},
},
})
case 1:
@@ -241,10 +181,6 @@ func TestRuleRegistry_ConcurrentAccess(t *testing.T) {
case 3:
r.Get(key)
case 4:
- r.TrackCluster(key, fmt.Sprintf("consumer-%d", j))
- case 5:
- r.MarkReady(key)
- case 6:
r.AllTargetGVRs()
}
}
@@ -273,7 +209,6 @@ func TestRuleRegistry_RegisterOverwrite(t *testing.T) {
subnetGVR := schema.GroupVersionResource{Group: "net.io", Version: "v1", Resource: "subnets"}
oldState := &RuleState{
- Cancel: func() {},
IndexFields: []IndexedField{{FieldPath: ".spec.vpcRef.name", TargetGVR: vpcGVR}},
}
old := r.Register("c1/rule", oldState)
@@ -283,7 +218,6 @@ func TestRuleRegistry_RegisterOverwrite(t *testing.T) {
// Overwrite with new state targeting a different GVR.
old = r.Register("c1/rule", &RuleState{
- Cancel: func() {},
IndexFields: []IndexedField{{FieldPath: ".spec.subnetRef.name", TargetGVR: subnetGVR}},
})
if old != oldState {
@@ -305,7 +239,7 @@ func TestRuleRegistry_RegisterOverwrite(t *testing.T) {
func TestRuleRegistry_RegisterFirstReturnsNil(t *testing.T) {
r := NewRuleRegistry()
- old := r.Register("c1/rule", &RuleState{Cancel: func() {}})
+ old := r.Register("c1/rule", &RuleState{})
if old != nil {
t.Error("expected nil when registering a new key")
}
diff --git a/test/e2e/dependency_test.go b/test/e2e/dependency_test.go
index 6072d87..7c6f7b4 100644
--- a/test/e2e/dependency_test.go
+++ b/test/e2e/dependency_test.go
@@ -40,37 +40,6 @@ var _ = Describe("Dependency Controller E2E", Ordered, func() {
})
})
- It("should have shard-wide apiexports/content access via system:admin bootstrap RBAC", func() {
- // The webhook SA has shard-wide apiexports/content and apiexportendpointslices
- // access granted via system:admin RBAC (Bootstrap Policy Authorizer), instead
- // of per-workspace dynamic RBAC managed by the controller.
-
- By("verifying webhook SA has apiexports/content on compute.test.io in compute-provider")
- waitFor(time.Minute, "webhook SA has apiexports/content on compute.test.io", func() error {
- allowed, err := webhookSACanAccessExport(wsComputeProvider, "compute.test.io")
- if err != nil {
- return fmt.Errorf("SAR check: %w", err)
- }
- if !allowed {
- return fmt.Errorf("webhook SA does not yet have apiexports/content on compute.test.io")
- }
-
- return nil
- })
-
- By("verifying webhook SA also has apiexports/content on network.test.io (shard-wide)")
- allowed, err := webhookSACanAccessExport(wsNetworkProvider, "network.test.io")
- Expect(err).NotTo(HaveOccurred())
- Expect(allowed).To(BeTrue(),
- "webhook SA should have shard-wide apiexports/content access via system:admin")
-
- By("verifying webhook SA cannot access secrets in compute-provider")
- allowed, err = webhookSACanAccessResource(wsComputeProvider, "", "secrets", "list")
- Expect(err).NotTo(HaveOccurred())
- Expect(allowed).To(BeFalse(),
- "webhook SA should NOT be able to list secrets")
- })
-
It("should block VPC deletion while a VM references it", func() {
waitFor(time.Minute, "webhook blocks VPC deletion", func() error {
out, err := kcpctlNoFail(wsConsumer1, "delete", "vpc", "my-vpc", "--namespace", "default")
@@ -137,6 +106,39 @@ var _ = Describe("Dependency Controller E2E", Ordered, func() {
})
})
+ It("should block VPC deletion in consumer2 (may be on a different shard)", func() {
+ // consumer2 may be scheduled on a different shard than consumer1,
+ // validating that webhook protection works regardless of shard placement.
+ By("creating a VPC in consumer2")
+ applyFixtureToWS(wsConsumer2, filepath.Join(fixturesDir, "vpc-shard-vpc.yaml"), nil)
+
+ By("creating a VM referencing the VPC in consumer2")
+ applyFixtureToWS(wsConsumer2, filepath.Join(fixturesDir, "vm-shard-vm.yaml"), nil)
+
+ By("waiting for the webhook to block shard-vpc deletion")
+ waitFor(time.Minute, "webhook blocks shard-vpc deletion", func() error {
+ out, err := kcpctlNoFail(wsConsumer2, "delete", "vpc", "shard-vpc", "--namespace", "default")
+ if err == nil {
+ applyFixtureToWS(wsConsumer2, filepath.Join(fixturesDir, "vpc-shard-vpc.yaml"), nil)
+ return fmt.Errorf("deletion was not blocked, recreated VPC")
+ }
+ if strings.Contains(out, "still referenced by") {
+ return nil
+ }
+
+ return fmt.Errorf("unexpected output: %s", out)
+ })
+
+ By("deleting the VM to release the VPC")
+ kcpctl(wsConsumer2, "delete", "virtualmachine", "shard-vm", "--namespace", "default")
+
+ By("verifying VPC deletion now succeeds")
+ waitFor(time.Minute, "shard-vpc deletion allowed after VM removed", func() error {
+ _, err := kcpctlNoFail(wsConsumer2, "delete", "vpc", "shard-vpc", "--namespace", "default")
+ return err
+ })
+ })
+
It("should allow deletion when skip-protection annotation is set", func() {
By("creating a VPC and VM in consumer1")
applyFixtureToWS(wsConsumer1, filepath.Join(fixturesDir, "vpc-skip-vpc.yaml"), nil)
@@ -208,9 +210,6 @@ var _ = Describe("Dependency Controller E2E", Ordered, func() {
return err
}()).To(Succeed())
- // RBAC is not affected by rule deletion — webhook SA retains shard-wide
- // apiexports/content access via system:admin bootstrap RBAC.
-
// Clean up remaining resources.
kcpctlNoFail(wsConsumer1, "delete", "virtualmachine", "cleanup-vm", "--namespace", "default") //nolint:errcheck
})
@@ -251,4 +250,45 @@ var _ = Describe("Dependency Controller E2E", Ordered, func() {
})
kcpctlNoFail(wsComputeProvider, "delete", "dependencyrule", "vm-dependencies") //nolint:errcheck
})
+
+ It("should propagate in-place DependencyRule updates to the webhook", func() {
+ // Patching a live rule's fieldRef to point at a non-existent path
+ // must cause the webhook's rule registry to re-evaluate dependents
+ // — there are now zero matches, so deletion must be allowed without
+ // the user touching VMs or recreating the rule.
+
+ By("creating the DependencyRule and a VPC+VM that triggers protection")
+ applyFixtureToWS(wsComputeProvider, filepath.Join(fixturesDir, "dependencyrule-vm-dependencies.yaml"), subs)
+ applyFixtureToWS(wsConsumer1, filepath.Join(fixturesDir, "vpc-update-vpc.yaml"), nil)
+ applyFixtureToWS(wsConsumer1, filepath.Join(fixturesDir, "vm-update-vm.yaml"), nil)
+
+ By("waiting for the webhook to block update-vpc deletion under the original fieldPath")
+ waitFor(time.Minute, "webhook blocks update-vpc deletion", func() error {
+ out, err := kcpctlNoFail(wsConsumer1, "delete", "vpc", "update-vpc", "--namespace", "default")
+ if err == nil {
+ applyFixtureToWS(wsConsumer1, filepath.Join(fixturesDir, "vpc-update-vpc.yaml"), nil)
+ return fmt.Errorf("deletion was not blocked, recreated VPC")
+ }
+ if strings.Contains(out, "still referenced by") {
+ return nil
+ }
+
+ return fmt.Errorf("unexpected output: %s", out)
+ })
+
+ By("patching the rule's fieldRef.path to a non-existent field")
+ kcpctl(wsComputeProvider, "patch", "dependencyrule", "vm-dependencies",
+ "--type=json",
+ "-p", `[{"op":"replace","path":"/spec/dependencies/0/fieldRef/path","value":".spec.notarealfield"}]`)
+
+ By("verifying the webhook now allows deletion (rule update propagated, no recreate)")
+ waitFor(time.Minute, "rule update propagated to webhook registry", func() error {
+ _, err := kcpctlNoFail(wsConsumer1, "delete", "vpc", "update-vpc", "--namespace", "default")
+ return err
+ })
+
+ By("cleaning up")
+ kcpctlNoFail(wsConsumer1, "delete", "virtualmachine", "update-vm", "--namespace", "default") //nolint:errcheck
+ kcpctlNoFail(wsComputeProvider, "delete", "dependencyrule", "vm-dependencies") //nolint:errcheck
+ })
})
diff --git a/test/e2e/suite_test.go b/test/e2e/suite_test.go
index 1cd485d..634b443 100644
--- a/test/e2e/suite_test.go
+++ b/test/e2e/suite_test.go
@@ -7,11 +7,13 @@ import (
"bytes"
"context"
"encoding/base64"
- "encoding/json"
"fmt"
+ "net"
+ "net/url"
"os"
"os/exec"
"path/filepath"
+ "regexp"
"strings"
"testing"
"time"
@@ -29,14 +31,15 @@ var (
)
const (
- kindClusterName = "dep-ctrl-e2e"
- kcpNamespace = "kcp-system"
- depNamespace = "dependency-system"
- kcpNodePort = "31500"
- kcpServerLocalPort = "31501" // NodePort to kcp server (not front-proxy)
- certManagerVer = "v1.17.2"
- imageName = "dependency-controller:integration-test"
- helmTimeout = "300s"
+ kindClusterName = "dep-ctrl-e2e"
+ kcpNamespace = "kcp-system"
+ depNamespace = "dependency-system"
+ certManagerVer = "v1.17.2"
+ imageName = "dependency-controller:integration-test"
+ helmTimeout = "300s"
+
+ // NodePort for the front-proxy service exposed via kind.
+ frontProxyNodePort = "31443"
)
// Workspace names under root.
@@ -59,8 +62,44 @@ var (
// Per-component kubeconfigs for in-cluster pods.
controllerKubeconfigPath string
webhookKubeconfigPath string
+
+ // In-cluster front-proxy base URL (extracted from kcp-operator kubeconfig).
+ inClusterFPURL string
)
+// shardPlacement maps each test workspace to the shard it should be pinned to
+// ("root" or "shard1"). Selected at suite startup via E2E_SHARD_CONFIG.
+type shardPlacement struct {
+ depCtrl string
+ networkProvider string
+ computeProvider string
+ consumer1 string
+ consumer2 string
+}
+
+// Two architecturally distinct configurations. Together they exercise:
+// - same-shard fast paths (single-shard)
+// - cross-shard webhook installation, dep-ctrl ↔ provider, consumer ↔ provider,
+// and webhook query (multi-shard)
+var shardConfigs = map[string]shardPlacement{
+ "single-shard": {
+ depCtrl: "root",
+ networkProvider: "root",
+ computeProvider: "root",
+ consumer1: "root",
+ consumer2: "root",
+ },
+ "multi-shard": {
+ depCtrl: "root",
+ networkProvider: "root",
+ computeProvider: "shard1",
+ consumer1: "root",
+ consumer2: "shard1",
+ },
+}
+
+var activeShardConfig shardPlacement
+
func TestE2E(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "E2E Suite")
@@ -83,6 +122,20 @@ func init() {
kubectlBin = lookupTool("KUBECTL", "kubectl")
helmBin = lookupTool("HELM", "helm")
dockerBin = lookupTool("DOCKER", "docker")
+
+ name := os.Getenv("E2E_SHARD_CONFIG")
+ if name == "" {
+ name = "multi-shard"
+ }
+ cfg, ok := shardConfigs[name]
+ if !ok {
+ valid := make([]string, 0, len(shardConfigs))
+ for k := range shardConfigs {
+ valid = append(valid, k)
+ }
+ panic(fmt.Sprintf("unknown E2E_SHARD_CONFIG %q (valid: %v)", name, valid))
+ }
+ activeShardConfig = cfg
}
// run executes a command and returns combined output. Fails the test on non-zero exit.
@@ -126,7 +179,7 @@ func kcpctl(wsPath string, args ...string) {
GinkgoHelper()
run(kubectlBin, append([]string{
"--kubeconfig", kcpHostKubeconfig,
- "--server", fmt.Sprintf("https://localhost:%s/clusters/root:%s", kcpNodePort, wsPath),
+ "--server", fmt.Sprintf("https://localhost:%s/clusters/root:%s", frontProxyNodePort, wsPath),
}, args...)...)
}
@@ -134,7 +187,7 @@ func kcpctl(wsPath string, args ...string) {
func kcpctlNoFail(wsPath string, args ...string) (string, error) {
return runNoFail(kubectlBin, append([]string{
"--kubeconfig", kcpHostKubeconfig,
- "--server", fmt.Sprintf("https://localhost:%s/clusters/root:%s", kcpNodePort, wsPath),
+ "--server", fmt.Sprintf("https://localhost:%s/clusters/root:%s", frontProxyNodePort, wsPath),
}, args...)...)
}
@@ -142,98 +195,15 @@ func kcpctlNoFail(wsPath string, args ...string) (string, error) {
func kcpctlRootNoFail(args ...string) (string, error) {
return runNoFail(kubectlBin, append([]string{
"--kubeconfig", kcpHostKubeconfig,
- "--server", fmt.Sprintf("https://localhost:%s/clusters/root", kcpNodePort),
+ "--server", fmt.Sprintf("https://localhost:%s/clusters/root", frontProxyNodePort),
}, args...)...)
}
-// subjectAccessReview is the JSON structure for a SubjectAccessReview response.
-type subjectAccessReview struct {
- Status struct {
- Allowed bool `json:"allowed"`
- Reason string `json:"reason"`
- } `json:"status"`
-}
-
-// webhookSACanAccessExport checks if the webhook SA has apiexports/content
-// access for the given APIExport in the specified workspace, using a
-// SubjectAccessReview evaluated by kcp.
-func webhookSACanAccessExport(wsPath, exportName string) (bool, error) {
- sarJSON := fmt.Sprintf(`{
- "apiVersion": "authorization.k8s.io/v1",
- "kind": "SubjectAccessReview",
- "spec": {
- "user": "system:serviceaccount:%s:dependency-webhook",
- "groups": ["system:serviceaccounts", "system:serviceaccounts:%s"],
- "resourceAttributes": {
- "group": "apis.kcp.io",
- "resource": "apiexports",
- "subresource": "content",
- "name": %q,
- "verb": "get"
- }
- }
-}`, depNamespace, depNamespace, exportName)
-
- cmd := exec.CommandContext(context.Background(), kubectlBin,
- "--kubeconfig", kcpHostKubeconfig,
- "--server", fmt.Sprintf("https://localhost:%s/clusters/root:%s", kcpNodePort, wsPath),
- "create", "-f", "-", "-o", "json",
- )
- cmd.Stdin = strings.NewReader(sarJSON)
- var buf bytes.Buffer
- cmd.Stdout = &buf
- cmd.Stderr = &buf
- if err := cmd.Run(); err != nil {
- return false, fmt.Errorf("SAR failed in %s: %s %v", wsPath, buf.String(), err)
- }
-
- var sar subjectAccessReview
- if err := json.Unmarshal(buf.Bytes(), &sar); err != nil {
- return false, fmt.Errorf("parsing SAR response: %w", err)
- }
-
- return sar.Status.Allowed, nil
-}
-
-// webhookSACanAccessResource checks if the webhook SA can access a specific
-// resource type in a workspace, using a SubjectAccessReview.
-func webhookSACanAccessResource(wsPath, group, resource, verb string) (bool, error) {
- sarJSON := fmt.Sprintf(`{
- "apiVersion": "authorization.k8s.io/v1",
- "kind": "SubjectAccessReview",
- "spec": {
- "user": "system:serviceaccount:%s:dependency-webhook",
- "groups": ["system:serviceaccounts", "system:serviceaccounts:%s"],
- "resourceAttributes": {
- "group": %q,
- "resource": %q,
- "verb": %q
- }
- }
-}`, depNamespace, depNamespace, group, resource, verb)
-
- cmd := exec.CommandContext(context.Background(), kubectlBin,
- "--kubeconfig", kcpHostKubeconfig,
- "--server", fmt.Sprintf("https://localhost:%s/clusters/root:%s", kcpNodePort, wsPath),
- "create", "-f", "-", "-o", "json",
- )
- cmd.Stdin = strings.NewReader(sarJSON)
- var buf bytes.Buffer
- cmd.Stdout = &buf
- cmd.Stderr = &buf
- if err := cmd.Run(); err != nil {
- return false, fmt.Errorf("SAR failed in %s: %s %v", wsPath, buf.String(), err)
- }
-
- var sar subjectAccessReview
- if err := json.Unmarshal(buf.Bytes(), &sar); err != nil {
- return false, fmt.Errorf("parsing SAR response: %w", err)
- }
-
- return sar.Status.Allowed, nil
-}
-
-// applyFixtureToWS applies a YAML fixture to a kcp workspace with placeholder substitution.
+// applyFixtureToWS applies a YAML fixture to a kcp workspace with placeholder
+// substitution. Retries on transient kcp authorization errors that surface
+// while APIExports are propagating across shards (a fresh consumer workspace
+// on a non-root shard cannot bind to a provider's APIExport until kcp has
+// finished publishing the APIExport's APIExportEndpointSlice on that shard).
func applyFixtureToWS(wsPath, file string, substitutions map[string]string) {
GinkgoHelper()
raw, err := os.ReadFile(file)
@@ -244,16 +214,22 @@ func applyFixtureToWS(wsPath, file string, substitutions map[string]string) {
content = strings.ReplaceAll(content, "${"+k+"}", v)
}
- cmd := exec.CommandContext(context.Background(), kubectlBin,
- "--kubeconfig", kcpHostKubeconfig,
- "--server", fmt.Sprintf("https://localhost:%s/clusters/root:%s", kcpNodePort, wsPath),
- "apply", "-f", "-",
- )
- cmd.Stdin = strings.NewReader(content)
- var buf bytes.Buffer
- cmd.Stdout = &buf
- cmd.Stderr = &buf
- Expect(cmd.Run()).To(Succeed(), "applying %s to %s: %s", file, wsPath, buf.String())
+ waitFor(2*time.Minute, fmt.Sprintf("apply %s to %s", file, wsPath), func() error {
+ cmd := exec.CommandContext(context.Background(), kubectlBin,
+ "--kubeconfig", kcpHostKubeconfig,
+ "--server", fmt.Sprintf("https://localhost:%s/clusters/root:%s", frontProxyNodePort, wsPath),
+ "apply", "-f", "-",
+ )
+ cmd.Stdin = strings.NewReader(content)
+ var buf bytes.Buffer
+ cmd.Stdout = &buf
+ cmd.Stderr = &buf
+ if err := cmd.Run(); err != nil {
+ return fmt.Errorf("%w: %s", err, buf.String())
+ }
+
+ return nil
+ })
}
// waitFor retries a check function until it succeeds or the timeout is reached.
@@ -280,14 +256,10 @@ func waitFor(timeout time.Duration, desc string, check func() error) {
}
}
-// secretField extracts a base64-decoded field from a k8s secret in the kcp namespace.
-func secretField(name, jsonpath string) []byte {
+// kindctlSecret extracts the kubeconfig from a k8s secret in the kcp-system namespace.
+func kindctlSecret(name string) string {
GinkgoHelper()
- out := kindctl("-n", kcpNamespace, "get", "secret", name, "-o", fmt.Sprintf("jsonpath=%s", jsonpath))
- decoded, err := base64.StdEncoding.DecodeString(strings.TrimSpace(out))
- Expect(err).NotTo(HaveOccurred())
-
- return decoded
+ return kindctl("-n", kcpNamespace, "get", "secret", name, "-o", "jsonpath={.data.kubeconfig}")
}
var _ = SynchronizedBeforeSuite(func() {
@@ -306,14 +278,17 @@ var _ = SynchronizedBeforeSuite(func() {
By("installing cert-manager")
installCertManager()
- By("deploying kcp via helm")
- deployKCP()
+ By("deploying kcp via kcp-operator")
+ deployKCPOperator()
+
+ By("deploying etcd instances")
+ deployEtcd()
- By("building admin kubeconfigs")
- buildAdminKubeconfigs()
+ By("creating kcp RootShard, Shard, and FrontProxy")
+ createKCPResources()
- By("exposing kcp server via NodePort")
- kindctl("apply", "-f", filepath.Join(fixturesDir, "kcp-server-nodeport.yaml"))
+ By("generating admin kubeconfig")
+ buildAdminKubeconfig()
By("building component kubeconfigs")
buildComponentKubeconfigs()
@@ -379,202 +354,649 @@ func installCertManager() {
})
}
-func deployKCP() {
+func deployKCPOperator() {
_, _ = runNoFail(helmBin, "repo", "add", "kcp", "https://kcp-dev.github.io/helm-charts")
run(helmBin, "repo", "update", "kcp")
- run(helmBin, "upgrade", "--install", "kcp", "kcp/kcp",
+ run(helmBin, "upgrade", "--install", "kcp-operator", "kcp/kcp-operator",
"--namespace", kcpNamespace,
"--create-namespace",
- "--values", filepath.Join(fixturesDir, "integration-values-kcp.yaml"),
"--wait", "--timeout", helmTimeout,
)
}
-func buildAdminKubeconfigs() {
- kindctl("apply", "-f", filepath.Join(fixturesDir, "kcp-admin-cert.yaml"))
+func deployEtcd() {
+ // Deploy two etcd instances: one for the root shard, one for the secondary shard.
+ for _, name := range []string{"etcd-root", "etcd-shard"} {
+ applyEtcd(name)
+ }
- waitFor(time.Minute, "front-proxy admin cert issued", func() error {
- _, err := kindctlNoFail("-n", kcpNamespace, "get", "secret", "kcp-admin-front-proxy-cert",
- "-o", "jsonpath={.data.tls\\.crt}")
+ // Wait for etcd pods to be ready.
+ for _, name := range []string{"etcd-root", "etcd-shard"} {
+ waitFor(2*time.Minute, fmt.Sprintf("%s ready", name), func() error {
+ _, err := kindctlNoFail("-n", kcpNamespace, "wait", "statefulset", name,
+ "--for=jsonpath={.status.readyReplicas}=1", "--timeout=1s")
- return err
+ return err
+ })
+ }
+}
+
+// applyEtcd creates a minimal single-node etcd instance in the kcp namespace.
+func applyEtcd(name string) {
+ GinkgoHelper()
+ manifest := fmt.Sprintf(`---
+apiVersion: v1
+kind: Service
+metadata:
+ name: %[1]s
+ namespace: %[2]s
+spec:
+ type: ClusterIP
+ selector:
+ app.kubernetes.io/name: etcd
+ app.kubernetes.io/instance: %[1]s
+ ports:
+ - name: client
+ port: 2379
+ targetPort: client
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: %[1]s-headless
+ namespace: %[2]s
+ annotations:
+ service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"
+spec:
+ type: ClusterIP
+ clusterIP: None
+ publishNotReadyAddresses: true
+ selector:
+ app.kubernetes.io/name: etcd
+ app.kubernetes.io/instance: %[1]s
+ ports:
+ - name: client
+ port: 2379
+ targetPort: client
+ - name: peer
+ port: 2380
+ targetPort: peer
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: %[1]s
+ namespace: %[2]s
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app.kubernetes.io/name: etcd
+ app.kubernetes.io/instance: %[1]s
+ serviceName: %[1]s-headless
+ template:
+ metadata:
+ labels:
+ app.kubernetes.io/name: etcd
+ app.kubernetes.io/instance: %[1]s
+ spec:
+ automountServiceAccountToken: false
+ containers:
+ - name: etcd
+ image: quay.io/coreos/etcd:v3.5.21
+ imagePullPolicy: IfNotPresent
+ command: ["/usr/local/bin/etcd"]
+ args:
+ - --name=$(HOSTNAME)
+ - --data-dir=/data
+ - --listen-peer-urls=http://0.0.0.0:2380
+ - --listen-client-urls=http://0.0.0.0:2379
+ - --advertise-client-urls=http://$(HOSTNAME).%[1]s-headless.%[2]s.svc.cluster.local:2379
+ - --initial-cluster-state=new
+ - --initial-cluster-token=$(HOSTNAME)
+ - --initial-cluster=$(HOSTNAME)=http://$(HOSTNAME).%[1]s-headless.%[2]s.svc.cluster.local:2380
+ - --initial-advertise-peer-urls=http://$(HOSTNAME).%[1]s-headless.%[2]s.svc.cluster.local:2380
+ - --listen-metrics-urls=http://0.0.0.0:8080
+ env:
+ - name: HOSTNAME
+ valueFrom:
+ fieldRef:
+ fieldPath: metadata.name
+ ports:
+ - name: client
+ containerPort: 2379
+ - name: peer
+ containerPort: 2380
+ - name: metrics
+ containerPort: 8080
+ livenessProbe:
+ httpGet:
+ path: /livez
+ port: metrics
+ initialDelaySeconds: 15
+ periodSeconds: 10
+ readinessProbe:
+ httpGet:
+ path: /readyz
+ port: metrics
+ initialDelaySeconds: 10
+ periodSeconds: 5
+ failureThreshold: 30
+ resources:
+ requests:
+ cpu: 100m
+ memory: 128Mi
+ limits:
+ memory: 256Mi
+ volumeMounts:
+ - name: data
+ mountPath: /data
+ volumeClaimTemplates:
+ - metadata:
+ name: data
+ spec:
+ accessModes: [ReadWriteOnce]
+ resources:
+ requests:
+ storage: 1Gi
+`, name, kcpNamespace)
+
+ cmd := exec.CommandContext(context.Background(), kubectlBin,
+ "--context", "kind-"+kindClusterName, "apply", "-f", "-")
+ cmd.Stdin = strings.NewReader(manifest)
+ var buf bytes.Buffer
+ cmd.Stdout = &buf
+ cmd.Stderr = &buf
+ Expect(cmd.Run()).To(Succeed(), "applying etcd %s: %s", name, buf.String())
+}
+
+func createKCPResources() {
+ // The front-proxy hostname used for in-cluster access and via NodePort.
+ fpHostname := fmt.Sprintf("kcp-front-proxy.%s.svc.cluster.local", kcpNamespace)
+
+ // Create a cert-manager Issuer in the kcp namespace for kcp-operator PKI.
+ applyToKind(fmt.Sprintf(`apiVersion: cert-manager.io/v1
+kind: Issuer
+metadata:
+ name: selfsigned
+ namespace: %s
+spec:
+ selfSigned: {}`, kcpNamespace))
+
+ // Create RootShard. certificateTemplates adds localhost to the server cert
+ // so we can port-forward to the shard for direct access during bootstrap.
+ applyToKind(fmt.Sprintf(`apiVersion: operator.kcp.io/v1alpha1
+kind: RootShard
+metadata:
+ name: root
+ namespace: %[1]s
+spec:
+ external:
+ hostname: %[2]s
+ port: 6443
+ certificates:
+ issuerRef:
+ group: cert-manager.io
+ kind: Issuer
+ name: selfsigned
+ certificateTemplates:
+ server:
+ spec:
+ dnsNames:
+ - localhost
+ ipAddresses:
+ - "127.0.0.1"
+ cache:
+ embedded:
+ enabled: true
+ etcd:
+ endpoints:
+ - http://etcd-root.%[1]s.svc.cluster.local:2379
+ auth:
+ serviceAccount:
+ enabled: true
+ deploymentTemplate:
+ spec:
+ template:
+ spec:
+ hostAliases:
+ - ip: "10.96.200.200"
+ hostnames:
+ - "%[2]s"`, kcpNamespace, fpHostname))
+
+ // Create FrontProxy with a fixed ClusterIP and NodePort for host access.
+ applyToKind(fmt.Sprintf(`apiVersion: operator.kcp.io/v1alpha1
+kind: FrontProxy
+metadata:
+ name: kcp
+ namespace: %[1]s
+spec:
+ rootShard:
+ ref:
+ name: root
+ auth:
+ serviceAccount:
+ enabled: true
+ serviceTemplate:
+ spec:
+ type: NodePort
+ clusterIP: "10.96.200.200"
+ certificateTemplates:
+ server:
+ spec:
+ dnsNames:
+ - localhost
+ - "%[2]s"
+ ipAddresses:
+ - "127.0.0.1"`, kcpNamespace, fpHostname))
+
+ // Wait for the RootShard to be running.
+ waitFor(3*time.Minute, "root shard running", func() error {
+ out, err := kindctlNoFail("-n", kcpNamespace, "get", "rootshard", "root",
+ "-o", "jsonpath={.status.phase}")
+ if err != nil {
+ return err
+ }
+ if strings.TrimSpace(out) != "Running" {
+ return fmt.Errorf("root shard phase: %s", out)
+ }
+
+ return nil
})
- // Extract front-proxy client certs for host kubeconfig.
- fpClientCrt := secretField("kcp-admin-front-proxy-cert", "{.data.tls\\.crt}")
- fpClientKey := secretField("kcp-admin-front-proxy-cert", "{.data.tls\\.key}")
-
- fpCrtFile := filepath.Join(tmpDir, "fp-client.crt")
- fpKeyFile := filepath.Join(tmpDir, "fp-client.key")
- Expect(os.WriteFile(fpCrtFile, fpClientCrt, 0o600)).To(Succeed())
- Expect(os.WriteFile(fpKeyFile, fpClientKey, 0o600)).To(Succeed())
-
- run(kubectlBin, "--kubeconfig", kcpHostKubeconfig, "config", "set-cluster", "kcp",
- "--server=https://localhost:"+kcpNodePort,
- "--insecure-skip-tls-verify=true")
- run(kubectlBin, "--kubeconfig", kcpHostKubeconfig, "config", "set-credentials", "kcp-admin",
- "--client-certificate="+fpCrtFile,
- "--client-key="+fpKeyFile,
- "--embed-certs=true")
- run(kubectlBin, "--kubeconfig", kcpHostKubeconfig, "config", "set-context", "kcp",
- "--cluster=kcp", "--user=kcp-admin")
- run(kubectlBin, "--kubeconfig", kcpHostKubeconfig, "config", "use-context", "kcp")
-
- waitFor(30*time.Second, "kcp API reachable", func() error {
- _, err := runNoFail(kubectlBin, "--kubeconfig", kcpHostKubeconfig, "get", "--raw", "/readyz")
- return err
+ // Wait for the FrontProxy to be running.
+ waitFor(2*time.Minute, "front-proxy running", func() error {
+ out, err := kindctlNoFail("-n", kcpNamespace, "get", "frontproxy", "kcp",
+ "-o", "jsonpath={.status.phase}")
+ if err != nil {
+ return err
+ }
+ if strings.TrimSpace(out) != "Running" {
+ return fmt.Errorf("front-proxy phase: %s", out)
+ }
+
+ return nil
})
-}
-// buildComponentKubeconfigs creates per-component client certificates and
-// kubeconfigs for the controller and webhook. Each component gets a
-// least-privilege identity instead of shared admin credentials.
-func buildComponentKubeconfigs() {
- // Apply cert requests for controller and webhook identities.
- kindctl("apply", "-f", filepath.Join(fixturesDir, "kcp-controller-cert.yaml"))
- kindctl("apply", "-f", filepath.Join(fixturesDir, "kcp-webhook-sa-cert.yaml"))
+ // Patch the front-proxy Service to use a fixed NodePort.
+ kindctl("-n", kcpNamespace, "patch", "service", "kcp-front-proxy", "--type=json",
+ fmt.Sprintf(`-p=[{"op":"replace","path":"/spec/ports/0/nodePort","value":%s}]`, frontProxyNodePort))
- waitFor(time.Minute, "controller cert issued", func() error {
- _, err := kindctlNoFail("-n", kcpNamespace, "get", "secret", "kcp-controller-cert",
- "-o", "jsonpath={.data.tls\\.crt}")
+ // Create secondary Shard with localhost in server cert for port-forward access.
+ applyToKind(fmt.Sprintf(`apiVersion: operator.kcp.io/v1alpha1
+kind: Shard
+metadata:
+ name: shard1
+ namespace: %[1]s
+spec:
+ rootShard:
+ ref:
+ name: root
+ etcd:
+ endpoints:
+ - http://etcd-shard.%[1]s.svc.cluster.local:2379
+ auth:
+ serviceAccount:
+ enabled: true
+ certificateTemplates:
+ server:
+ spec:
+ dnsNames:
+ - localhost
+ ipAddresses:
+ - "127.0.0.1"
+ deploymentTemplate:
+ spec:
+ template:
+ spec:
+ hostAliases:
+ - ip: "10.96.200.200"
+ hostnames:
+ - "%[2]s"`, kcpNamespace, fpHostname))
+
+ // Wait for the secondary shard to be running.
+ waitFor(3*time.Minute, "shard1 running", func() error {
+ out, err := kindctlNoFail("-n", kcpNamespace, "get", "shard", "shard1",
+ "-o", "jsonpath={.status.phase}")
+ if err != nil {
+ return err
+ }
+ if strings.TrimSpace(out) != "Running" {
+ return fmt.Errorf("shard1 phase: %s", out)
+ }
+
+ return nil
+ })
+}
+
+func buildAdminKubeconfig() {
+ // Create a Kubeconfig CR for admin access via front-proxy.
+ applyToKind(fmt.Sprintf(`apiVersion: operator.kcp.io/v1alpha1
+kind: Kubeconfig
+metadata:
+ name: e2e-admin
+ namespace: %s
+spec:
+ username: kcp-admin
+ groups:
+ - "system:kcp:admin"
+ validity: 8766h
+ secretRef:
+ name: e2e-admin-kubeconfig
+ target:
+ frontProxyRef:
+ name: kcp`, kcpNamespace))
+
+ // Wait for the kubeconfig secret to be created.
+ waitFor(2*time.Minute, "admin kubeconfig secret created", func() error {
+ _, err := kindctlNoFail("-n", kcpNamespace, "get", "secret", "e2e-admin-kubeconfig",
+ "-o", "jsonpath={.data.kubeconfig}")
return err
})
- waitFor(time.Minute, "webhook SA cert issued", func() error {
- _, err := kindctlNoFail("-n", kcpNamespace, "get", "secret", "kcp-webhook-sa-cert",
- "-o", "jsonpath={.data.tls\\.crt}")
+
+ // Extract the kubeconfig and rewrite the server URL to use localhost NodePort.
+ kcRaw := kindctlSecret("e2e-admin-kubeconfig")
+ kcBytes, err := decodeBase64(kcRaw)
+ Expect(err).NotTo(HaveOccurred())
+
+ // Extract the actual server URL from the kubeconfig rather than hardcoding the port.
+ adminServerURL := extractServerFromKubeconfig(kcBytes)
+ rewritten := strings.ReplaceAll(string(kcBytes),
+ adminServerURL,
+ fmt.Sprintf("https://localhost:%s", frontProxyNodePort))
+
+ Expect(os.WriteFile(kcpHostKubeconfig, []byte(rewritten), 0o600)).To(Succeed())
+
+ waitFor(30*time.Second, "kcp API reachable via front-proxy", func() error {
+ _, err := runNoFail(kubectlBin, "--kubeconfig", kcpHostKubeconfig,
+ "--server", fmt.Sprintf("https://localhost:%s/clusters/root", frontProxyNodePort),
+ "get", "--raw", "/readyz")
return err
})
+}
- kcpServerCA := secretField("kcp-ca", "{.data.tls\\.crt}")
- caFile := filepath.Join(tmpDir, "kcp-server-ca.crt")
- Expect(os.WriteFile(caFile, kcpServerCA, 0o600)).To(Succeed())
+// buildComponentKubeconfigs creates Kubeconfig CRs for the controller and webhook
+// identities, then extracts the generated kubeconfigs pointing at the in-cluster
+// front-proxy for use by deployed pods.
+func buildComponentKubeconfigs() {
+ depCtrlPath := "root:" + wsDepCtrl
- kcpInternalServer := fmt.Sprintf("https://kcp.%s.svc.cluster.local:6443/clusters/root:%s", kcpNamespace, wsDepCtrl)
+ // Controller and webhook kubeconfigs target the root shard (not the front-proxy)
+ // so their client certificates are signed by root-client-ca. This CA is trusted by
+ // both the front-proxy (via kcp-merged-client-ca) and all shards directly. This is
+ // required because the multicluster-provider connects to APIExport virtual workspace
+ // URLs that point directly at shards, not through the front-proxy.
+ // The server URL is rewritten below to point at the front-proxy.
+ applyToKind(fmt.Sprintf(`apiVersion: operator.kcp.io/v1alpha1
+kind: Kubeconfig
+metadata:
+ name: e2e-controller
+ namespace: %[1]s
+spec:
+ username: "system:serviceaccount:%[2]s:dependency-controller"
+ groups:
+ - "system:authenticated"
+ - "system:serviceaccounts"
+ - "system:serviceaccounts:%[2]s"
+ validity: 8766h
+ secretRef:
+ name: e2e-controller-kubeconfig
+ target:
+ rootShardRef:
+ name: root`, kcpNamespace, depNamespace))
+
+ applyToKind(fmt.Sprintf(`apiVersion: operator.kcp.io/v1alpha1
+kind: Kubeconfig
+metadata:
+ name: e2e-webhook
+ namespace: %[1]s
+spec:
+ username: "system:serviceaccount:%[2]s:dependency-webhook"
+ groups:
+ - "system:authenticated"
+ - "system:serviceaccounts"
+ - "system:serviceaccounts:%[2]s"
+ validity: 8766h
+ secretRef:
+ name: e2e-webhook-kubeconfig
+ target:
+ rootShardRef:
+ name: root`, kcpNamespace, depNamespace))
+
+ // Wait for both kubeconfig secrets.
+ for _, name := range []string{"e2e-controller-kubeconfig", "e2e-webhook-kubeconfig"} {
+ waitFor(2*time.Minute, fmt.Sprintf("%s secret created", name), func() error {
+ _, err := kindctlNoFail("-n", kcpNamespace, "get", "secret", name,
+ "-o", "jsonpath={.data.kubeconfig}")
+
+ return err
+ })
+ }
- // Build controller kubeconfig.
- ctrlCrt := secretField("kcp-controller-cert", "{.data.tls\\.crt}")
- ctrlKey := secretField("kcp-controller-cert", "{.data.tls\\.key}")
- ctrlCrtFile := filepath.Join(tmpDir, "ctrl-client.crt")
- ctrlKeyFile := filepath.Join(tmpDir, "ctrl-client.key")
- Expect(os.WriteFile(ctrlCrtFile, ctrlCrt, 0o600)).To(Succeed())
- Expect(os.WriteFile(ctrlKeyFile, ctrlKey, 0o600)).To(Succeed())
+ // The kubeconfigs target the root shard. Extract the shard URL and rewrite
+ // it to the front-proxy URL with the dep-ctrl workspace path. The client cert
+ // from root-client-ca works for both front-proxy and direct shard access.
+ fpHostname := fmt.Sprintf("kcp-front-proxy.%s.svc.cluster.local", kcpNamespace)
+ kcRaw := kindctlSecret("e2e-controller-kubeconfig")
+ kcBytes, err := decodeBase64(kcRaw)
+ Expect(err).NotTo(HaveOccurred())
+ shardURL := extractServerFromKubeconfig(kcBytes)
+ // Determine the front-proxy port from the shard URL (both use 6443).
+ parsed, err := url.Parse(shardURL)
+ Expect(err).NotTo(HaveOccurred())
+ fpPort := parsed.Port()
+ if fpPort == "" {
+ fpPort = "6443"
+ }
+ inClusterFPURL = "https://" + net.JoinHostPort(fpHostname, fpPort)
+ depCtrlURL := inClusterFPURL + "/clusters/" + depCtrlPath
+
+ // Rewrite kubeconfigs: shard URL -> front-proxy + workspace path.
controllerKubeconfigPath = filepath.Join(tmpDir, "kcp-controller.kubeconfig")
- run(kubectlBin, "--kubeconfig", controllerKubeconfigPath, "config", "set-cluster", "kcp",
- "--server="+kcpInternalServer,
- "--certificate-authority="+caFile,
- "--embed-certs=true")
- run(kubectlBin, "--kubeconfig", controllerKubeconfigPath, "config", "set-credentials", "controller",
- "--client-certificate="+ctrlCrtFile,
- "--client-key="+ctrlKeyFile,
- "--embed-certs=true")
- run(kubectlBin, "--kubeconfig", controllerKubeconfigPath, "config", "set-context", "kcp",
- "--cluster=kcp", "--user=controller")
- run(kubectlBin, "--kubeconfig", controllerKubeconfigPath, "config", "use-context", "kcp")
-
- // Build webhook kubeconfig.
- whCrt := secretField("kcp-webhook-sa-cert", "{.data.tls\\.crt}")
- whKey := secretField("kcp-webhook-sa-cert", "{.data.tls\\.key}")
- whCrtFile := filepath.Join(tmpDir, "webhook-client.crt")
- whKeyFile := filepath.Join(tmpDir, "webhook-client.key")
- Expect(os.WriteFile(whCrtFile, whCrt, 0o600)).To(Succeed())
- Expect(os.WriteFile(whKeyFile, whKey, 0o600)).To(Succeed())
+ extractAndRewriteKubeconfig("e2e-controller-kubeconfig", controllerKubeconfigPath,
+ shardURL, depCtrlURL)
webhookKubeconfigPath = filepath.Join(tmpDir, "kcp-webhook.kubeconfig")
- run(kubectlBin, "--kubeconfig", webhookKubeconfigPath, "config", "set-cluster", "kcp",
- "--server="+kcpInternalServer,
- "--certificate-authority="+caFile,
- "--embed-certs=true")
- run(kubectlBin, "--kubeconfig", webhookKubeconfigPath, "config", "set-credentials", "webhook",
- "--client-certificate="+whCrtFile,
- "--client-key="+whKeyFile,
- "--embed-certs=true")
- run(kubectlBin, "--kubeconfig", webhookKubeconfigPath, "config", "set-context", "kcp",
- "--cluster=kcp", "--user=webhook")
- run(kubectlBin, "--kubeconfig", webhookKubeconfigPath, "config", "use-context", "kcp")
-}
-
-// bootstrapRBAC uses the bootstrap (system:masters) cert to create
-// RBAC for the controller and webhook identities.
-// - Root workspace: workspace access, webhook/RBAC management, APIExport content
-// - Dep-ctrl workspace: APIExportEndpointSlice read for VW URL discovery
+ extractAndRewriteKubeconfig("e2e-webhook-kubeconfig", webhookKubeconfigPath,
+ shardURL, depCtrlURL)
+}
+
+// extractAndRewriteKubeconfig extracts a kubeconfig from a secret, rewrites the
+// server URL, and writes it to the given path.
+func extractAndRewriteKubeconfig(secretName, outputPath, oldURL, newURL string) {
+ GinkgoHelper()
+ kcRaw := kindctlSecret(secretName)
+ kcBytes, err := decodeBase64(kcRaw)
+ Expect(err).NotTo(HaveOccurred())
+
+ rewritten := string(kcBytes)
+
+ // kcp-operator generates two contexts: "base" (bare front-proxy URL) and
+ // "default" (front-proxy URL + /clusters/root). We rewrite the base URL to
+ // include the workspace path, but this corrupts the "default" entry with a
+ // double /clusters/ path. Switch to the "base" context which has the correct URL.
+ rewritten = strings.ReplaceAll(rewritten, oldURL, newURL)
+ rewritten = strings.ReplaceAll(rewritten, "current-context: default", "current-context: base")
+
+ Expect(os.WriteFile(outputPath, []byte(rewritten), 0o600)).To(Succeed())
+}
+
+// decodeBase64 decodes a base64-encoded string.
+func decodeBase64(s string) ([]byte, error) {
+ return base64.StdEncoding.DecodeString(strings.TrimSpace(s))
+}
+
+// bootstrapRBAC creates RBAC for the controller and webhook identities.
+// The webhook's broad get/list rule must be applied in system:admin on every
+// shard hosting consumer workspaces — the BootstrapPolicyAuthorizer reads
+// RBAC from the local shard's system:admin only ("the policy defined in
+// this workspace applies to every workspace in a kcp shard"; kcp source
+// pkg/authorization/bootstrap_policy_authorizer.go), so per-shard application
+// is required. Controller rules and dep-ctrl APIExport access live in the
+// root and dep-ctrl workspaces and go via the front-proxy.
func bootstrapRBAC() {
- kindctl("apply", "-f", filepath.Join(fixturesDir, "kcp-bootstrap-cert.yaml"))
+ // Webhook get/list, applied in system:admin on every shard via direct
+ // (port-forwarded) shard access.
+ applySystemAdminRBAC("root", "rootShardRef")
+ applySystemAdminRBAC("shard1", "shardRef")
+
+ // Controller-only RBAC in the root workspace via front-proxy.
+ run(kubectlBin, "--kubeconfig", kcpHostKubeconfig,
+ "--server", fmt.Sprintf("https://localhost:%s/clusters/root", frontProxyNodePort),
+ "apply", "-f", filepath.Join(fixturesDir, "root-rbac-bootstrap.yaml"))
+
+ // Controller + webhook access to the dep-ctrl APIExport via front-proxy.
+ run(kubectlBin, "--kubeconfig", kcpHostKubeconfig,
+ "--server", fmt.Sprintf("https://localhost:%s/clusters/root:%s", frontProxyNodePort, wsDepCtrl),
+ "apply", "-f", filepath.Join(fixturesDir, "depctrl-rbac-bootstrap.yaml"))
+}
+
+// applySystemAdminRBAC creates a system:masters kubeconfig targeting the
+// given shard, port-forwards that shard's service to localhost, applies the
+// system:admin RBAC fixture there, then tears down the port-forward.
+//
+// refField selects the kcp-operator Kubeconfig target field: "rootShardRef"
+// for the root shard, "shardRef" for any secondary shard.
+func applySystemAdminRBAC(shardName, refField string) {
+ GinkgoHelper()
- waitFor(time.Minute, "bootstrap cert issued", func() error {
- _, err := kindctlNoFail("-n", kcpNamespace, "get", "secret", "kcp-bootstrap-cert",
- "-o", "jsonpath={.data.tls\\.crt}")
+ kubeconfigName := "e2e-" + shardName + "-system-masters"
+ secretName := kubeconfigName + "-kubeconfig"
+
+ // Create a Kubeconfig CR with the appropriate shard target + system:masters
+ // group. The front-proxy does not honor system:masters, so we must hit the
+ // shard directly. The shard's server cert already includes localhost /
+ // 127.0.0.1 (see the certificateTemplates on the RootShard / Shard CRs).
+ applyToKind(fmt.Sprintf(`apiVersion: operator.kcp.io/v1alpha1
+kind: Kubeconfig
+metadata:
+ name: %[1]s
+ namespace: %[2]s
+spec:
+ username: e2e-system-masters
+ groups:
+ - "system:masters"
+ validity: 8766h
+ secretRef:
+ name: %[3]s
+ target:
+ %[4]s:
+ name: %[5]s`, kubeconfigName, kcpNamespace, secretName, refField, shardName))
+
+ waitFor(2*time.Minute, fmt.Sprintf("%s secret created", secretName), func() error {
+ _, err := kindctlNoFail("-n", kcpNamespace, "get", "secret", secretName,
+ "-o", "jsonpath={.data.kubeconfig}")
return err
})
- bsCrt := secretField("kcp-bootstrap-cert", "{.data.tls\\.crt}")
- bsKey := secretField("kcp-bootstrap-cert", "{.data.tls\\.key}")
- kcpServerCA := secretField("kcp-ca", "{.data.tls\\.crt}")
-
- bsCrtFile := filepath.Join(tmpDir, "bootstrap-client.crt")
- bsKeyFile := filepath.Join(tmpDir, "bootstrap-client.key")
- caFile := filepath.Join(tmpDir, "kcp-server-ca.crt")
- Expect(os.WriteFile(bsCrtFile, bsCrt, 0o600)).To(Succeed())
- Expect(os.WriteFile(bsKeyFile, bsKey, 0o600)).To(Succeed())
- Expect(os.WriteFile(caFile, kcpServerCA, 0o600)).To(Succeed())
-
- // Build a temporary bootstrap kubeconfig targeting root via kcp server NodePort.
- bsKubeconfig := filepath.Join(tmpDir, "kcp-bootstrap.kubeconfig")
- run(kubectlBin, "--kubeconfig", bsKubeconfig, "config", "set-cluster", "kcp",
- fmt.Sprintf("--server=https://localhost:%s/clusters/root", kcpServerLocalPort),
- "--certificate-authority="+caFile,
- "--embed-certs=true")
- run(kubectlBin, "--kubeconfig", bsKubeconfig, "config", "set-credentials", "bootstrap",
- "--client-certificate="+bsCrtFile,
- "--client-key="+bsKeyFile,
- "--embed-certs=true")
- run(kubectlBin, "--kubeconfig", bsKubeconfig, "config", "set-context", "kcp",
- "--cluster=kcp", "--user=bootstrap")
- run(kubectlBin, "--kubeconfig", bsKubeconfig, "config", "use-context", "kcp")
-
- // Apply RBAC in the root workspace.
- run(kubectlBin, "--kubeconfig", bsKubeconfig, "apply", "-f",
- filepath.Join(fixturesDir, "root-rbac-bootstrap.yaml"))
-
- // Apply RBAC in the dep-ctrl workspace (apiexportendpointslices read).
- run(kubectlBin, "--kubeconfig", bsKubeconfig,
- "--server", fmt.Sprintf("https://localhost:%s/clusters/root:%s", kcpServerLocalPort, wsDepCtrl),
- "apply", "-f", filepath.Join(fixturesDir, "depctrl-rbac-bootstrap.yaml"))
+ kcRaw := kindctlSecret(secretName)
+ kcBytes, err := decodeBase64(kcRaw)
+ Expect(err).NotTo(HaveOccurred())
+ shardURL := extractServerFromKubeconfig(kcBytes)
+
+ parsed, err := url.Parse(shardURL)
+ Expect(err).NotTo(HaveOccurred())
+ shardSvc := strings.SplitN(parsed.Hostname(), ".", 2)[0]
+ shardPort := parsed.Port()
+ if shardPort == "" {
+ shardPort = "6443"
+ }
- // Apply shard-wide RBAC in system:admin (apiexports/content + apiexportendpointslices for webhook SA).
- // The Bootstrap Policy Authorizer evaluates system:admin RBAC for every request on the shard.
- run(kubectlBin, "--kubeconfig", bsKubeconfig,
- "--server", fmt.Sprintf("https://localhost:%s/clusters/system:admin", kcpServerLocalPort),
- "apply", "-f", filepath.Join(fixturesDir, "shard-admin-rbac-bootstrap.yaml"))
+ localPort := pickFreePort()
+ rewritten := strings.ReplaceAll(string(kcBytes),
+ shardURL, fmt.Sprintf("https://localhost:%d", localPort))
+ rewritten = strings.ReplaceAll(rewritten,
+ "current-context: default", "current-context: base")
+ sysKubeconfig := filepath.Join(tmpDir, kubeconfigName+".kubeconfig")
+ Expect(os.WriteFile(sysKubeconfig, []byte(rewritten), 0o600)).To(Succeed())
+
+ // Start port-forward in the background. kubectl port-forward exits when
+ // stdin closes; we kill it explicitly via defer.
+ pfCmd := exec.CommandContext(context.Background(), kubectlBin,
+ "--context", "kind-"+kindClusterName,
+ "-n", kcpNamespace, "port-forward",
+ "svc/"+shardSvc, fmt.Sprintf("%d:%s", localPort, shardPort))
+ pfCmd.Stdout = GinkgoWriter
+ pfCmd.Stderr = GinkgoWriter
+ Expect(pfCmd.Start()).To(Succeed())
+ defer func() {
+ _ = pfCmd.Process.Kill()
+ _, _ = pfCmd.Process.Wait()
+ }()
+
+ waitFor(30*time.Second, fmt.Sprintf("%s reachable via port-forward", shardSvc), func() error {
+ _, err := runNoFail(kubectlBin, "--kubeconfig", sysKubeconfig,
+ "--server", fmt.Sprintf("https://localhost:%d/clusters/system:admin", localPort),
+ "get", "--raw", "/readyz")
+
+ return err
+ })
+
+ // --validate=false: system:admin does not serve OpenAPI, so client-side
+ // schema validation has nothing to compare against.
+ run(kubectlBin, "--kubeconfig", sysKubeconfig,
+ "--server", fmt.Sprintf("https://localhost:%d/clusters/system:admin", localPort),
+ "apply", "--validate=false",
+ "-f", filepath.Join(fixturesDir, "system-admin-rbac-bootstrap.yaml"))
}
+// pickFreePort asks the kernel for a free TCP port on localhost.
+func pickFreePort() int {
+ GinkgoHelper()
+ var lc net.ListenConfig
+ listener, err := lc.Listen(context.Background(), "tcp", "127.0.0.1:0")
+ Expect(err).NotTo(HaveOccurred())
+ port := listener.Addr().(*net.TCPAddr).Port
+ Expect(listener.Close()).To(Succeed())
+
+ return port
+}
+
+// extractServerFromKubeconfig extracts the server URL from a kubeconfig YAML.
+func extractServerFromKubeconfig(kubeconfig []byte) string {
+ // Simple regex extraction — avoids pulling in k8s.io/client-go/tools/clientcmd.
+ re := regexp.MustCompile(`server:\s*(https?://\S+)`)
+ m := re.FindSubmatch(kubeconfig)
+ if len(m) < 2 {
+ Fail("could not extract server URL from kubeconfig")
+ }
+
+ return string(m[1])
+}
+
+// portForwardRe matches the "Forwarding from 127.0.0.1:PORT -> ..." line.
func buildAndLoadImage() {
run(dockerBin, "build", "-t", imageName, rootDir)
run(kindBin, "load", "docker-image", imageName, "--name", kindClusterName)
}
func setupKCPWorkspaces() {
- // Create all workspaces.
- for _, ws := range []string{wsDepCtrl, wsNetworkProvider, wsComputeProvider, wsConsumer1, wsConsumer2} {
- cmd := exec.CommandContext(context.Background(), kubectlBin,
- "--kubeconfig", kcpHostKubeconfig,
- "--server", fmt.Sprintf("https://localhost:%s/clusters/root", kcpNodePort),
- "apply", "-f", "-",
- )
- cmd.Stdin = strings.NewReader(fmt.Sprintf(`apiVersion: tenancy.kcp.io/v1alpha1
-kind: Workspace
-metadata:
- name: %s`, ws))
- var buf bytes.Buffer
- cmd.Stdout = &buf
- cmd.Stderr = &buf
- cmd.Run() //nolint:errcheck // ignore AlreadyExists
+ // Label both shards so we can pin workspaces deterministically. The
+ // kcp-operator registers a kcp Shard object named after each CR. Each
+ // workspace is then pinned via spec.location.selector.matchLabels using
+ // the active config below.
+ for _, shard := range []string{"root", "shard1"} {
+ waitFor(time.Minute, fmt.Sprintf("shard %s kcp object exists", shard), func() error {
+ _, err := kcpctlRootNoFail("get", "shard", shard)
+ return err
+ })
+ run(kubectlBin, "--kubeconfig", kcpHostKubeconfig,
+ "--server", fmt.Sprintf("https://localhost:%s/clusters/root", frontProxyNodePort),
+ "label", "shard", shard, "e2e-target="+shard, "--overwrite")
}
+ // Create workspaces with the placement dictated by the active shard config.
+ createWorkspace(wsDepCtrl, activeShardConfig.depCtrl)
+ createWorkspace(wsNetworkProvider, activeShardConfig.networkProvider)
+ createWorkspace(wsComputeProvider, activeShardConfig.computeProvider)
+ createWorkspace(wsConsumer1, activeShardConfig.consumer1)
+ createWorkspace(wsConsumer2, activeShardConfig.consumer2)
+
// Wait for workspaces to be ready.
for _, ws := range []string{wsDepCtrl, wsNetworkProvider, wsComputeProvider, wsConsumer1, wsConsumer2} {
waitFor(time.Minute, fmt.Sprintf("workspace %s ready", ws), func() error {
@@ -590,6 +1012,11 @@ metadata:
})
}
+ // Verify each workspace landed on the shard the active config pinned
+ // it to. Pinning is the assertion — if scheduling ignored a selector,
+ // later cross-shard tests would silently degrade to single-shard.
+ verifyShardPlacements()
+
// Apply schemas and exports.
kcpctl(wsDepCtrl, "apply", "-f", filepath.Join(rootDir, "config/kcp/apiresourceschema-dependencyrules.dependencies.opendefense.cloud.yaml"))
kcpctl(wsDepCtrl, "apply", "-f", filepath.Join(rootDir, "config/kcp/apiexport-dependencies.opendefense.cloud.yaml"))
@@ -643,6 +1070,100 @@ metadata:
}
}
+// verifyShardPlacements asserts that each workspace landed on the shard the
+// active config pinned it to. The shard placement is exposed via the
+// internal.tenancy.kcp.io/shard annotation; the annotation value is the
+// shard's logical-cluster ID (not its name), so we infer the name→ID
+// mapping from the placements themselves and then check consistency:
+// workspaces sharing a target name must share an ID, and distinct targets
+// must resolve to distinct IDs.
+func verifyShardPlacements() {
+ GinkgoHelper()
+ expected := map[string]string{
+ wsDepCtrl: activeShardConfig.depCtrl,
+ wsNetworkProvider: activeShardConfig.networkProvider,
+ wsComputeProvider: activeShardConfig.computeProvider,
+ wsConsumer1: activeShardConfig.consumer1,
+ wsConsumer2: activeShardConfig.consumer2,
+ }
+
+ const shardAnnotation = "internal.tenancy.kcp.io/shard"
+ jsonpath := fmt.Sprintf(`jsonpath={.metadata.annotations.%s}`,
+ strings.ReplaceAll(shardAnnotation, ".", `\.`))
+
+ waitFor(time.Minute, "shard placements match active config", func() error {
+ actual := make(map[string]string, len(expected))
+ for ws := range expected {
+ out, err := kcpctlRootNoFail("get", "workspace", ws, "-o", jsonpath)
+ if err != nil {
+ return err
+ }
+ id := strings.TrimSpace(out)
+ if id == "" {
+ return fmt.Errorf("workspace %s shard annotation not yet set", ws)
+ }
+ actual[ws] = id
+ }
+
+ // Build name→ID mapping from observations; flag mismatches in the
+ // same group (same target name, different IDs) and across groups
+ // (different target names, same ID — selector ignored).
+ nameToID := map[string]string{}
+ for ws, target := range expected {
+ id := actual[ws]
+ if existing, ok := nameToID[target]; ok && existing != id {
+ return fmt.Errorf("workspaces pinned to %q resolved to multiple shards: %q vs %q (workspace %s)",
+ target, existing, id, ws)
+ }
+ nameToID[target] = id
+ }
+ idToName := map[string]string{}
+ for name, id := range nameToID {
+ if existing, ok := idToName[id]; ok && existing != name {
+ return fmt.Errorf("shards %q and %q both resolved to logical cluster %q — selector ignored",
+ name, existing, id)
+ }
+ idToName[id] = name
+ }
+
+ return nil
+ })
+}
+
+// createWorkspace creates a kcp workspace under root. If shardTarget is non-empty,
+// the workspace is pinned to that shard via spec.location.selector.
+func createWorkspace(name, shardTarget string) {
+ GinkgoHelper()
+ var manifest string
+ if shardTarget != "" {
+ manifest = fmt.Sprintf(`apiVersion: tenancy.kcp.io/v1alpha1
+kind: Workspace
+metadata:
+ name: %s
+spec:
+ location:
+ selector:
+ matchLabels:
+ e2e-target: %s`, name, shardTarget)
+ } else {
+ manifest = fmt.Sprintf(`apiVersion: tenancy.kcp.io/v1alpha1
+kind: Workspace
+metadata:
+ name: %s`, name)
+ }
+
+ cmd := exec.CommandContext(context.Background(), kubectlBin,
+ "--kubeconfig", kcpHostKubeconfig,
+ "--server", fmt.Sprintf("https://localhost:%s/clusters/root", frontProxyNodePort),
+ "apply", "-f", "-",
+ )
+ cmd.Stdin = strings.NewReader(manifest)
+ var buf bytes.Buffer
+ cmd.Stdout = &buf
+ cmd.Stderr = &buf
+ cmd.Run() //nolint:errcheck // ignore AlreadyExists
+}
+
// createKubeconfigSecret creates or updates a Secret in the dep-ctrl namespace
// containing the given kubeconfig file.
func createKubeconfigSecret(secretName, kubeconfigPath string) {
@@ -673,6 +1194,19 @@ func deployCharts() {
filepath.Join(rootDir, "charts/dependency-controller"),
"--namespace", depNamespace,
"--values", filepath.Join(fixturesDir, "integration-values.yaml"),
+ "--set", "kcpBaseHost="+inClusterFPURL,
"--wait", "--timeout", "120s",
)
}
+
+// applyToKind applies a YAML manifest to the kind cluster.
+func applyToKind(manifest string) {
+ GinkgoHelper()
+ cmd := exec.CommandContext(context.Background(), kubectlBin,
+ "--context", "kind-"+kindClusterName, "apply", "-f", "-")
+ cmd.Stdin = strings.NewReader(manifest)
+ var buf bytes.Buffer
+ cmd.Stdout = &buf
+ cmd.Stderr = &buf
+ Expect(cmd.Run()).To(Succeed(), "applying manifest: %s", buf.String())
+}
diff --git a/test/fixtures/depctrl-rbac-bootstrap.yaml b/test/fixtures/depctrl-rbac-bootstrap.yaml
index 3fb0f32..9868587 100644
--- a/test/fixtures/depctrl-rbac-bootstrap.yaml
+++ b/test/fixtures/depctrl-rbac-bootstrap.yaml
@@ -1,20 +1,16 @@
# RBAC in the dep-ctrl workspace for the dependency-controller.
-# Applied during setup using a bootstrap (system:masters) identity.
+# Applied during setup using a privileged identity (e.g., system:masters).
#
-# The controller connects to the dep-ctrl workspace to:
+# Both components connect to the dep-ctrl workspace to:
# 1. Discover the VW URL via APIExportEndpointSlice
# 2. Access the dep-ctrl APIExport's virtual workspace (apiexports/content)
#
-# The controller needs full CRUD on apiexports/content because it manages
-# webhooks in binding workspaces via the VW (using permissionClaims).
-#
-# The webhook SA's access is granted shard-wide via system:admin RBAC
-# (see shard-admin-rbac-bootstrap.yaml) instead of per-workspace grants.
+# The controller manages webhooks in binding workspaces via the VW.
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
- name: dependency-controller
+ name: dependency-controller-depctrl
rules:
- apiGroups: ["apis.kcp.io"]
resources: ["apiexportendpointslices"]
@@ -27,12 +23,15 @@ rules:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
- name: dependency-controller
+ name: dependency-controller-depctrl
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
- name: dependency-controller
+ name: dependency-controller-depctrl
subjects:
- apiGroup: rbac.authorization.k8s.io
kind: User
name: "system:serviceaccount:dependency-system:dependency-controller"
+ - apiGroup: rbac.authorization.k8s.io
+ kind: User
+ name: "system:serviceaccount:dependency-system:dependency-webhook"
diff --git a/test/fixtures/dependencyrule-vm-dependencies copy.yaml b/test/fixtures/dependencyrule-vm-dependencies copy.yaml
new file mode 100644
index 0000000..74cf344
--- /dev/null
+++ b/test/fixtures/dependencyrule-vm-dependencies copy.yaml
@@ -0,0 +1,19 @@
+apiVersion: dependencies.opendefense.cloud/v1alpha1
+kind: DependencyRule
+metadata:
+ name: vm-dependencies
+spec:
+ dependent:
+ apiExportName: compute.test.io
+ group: compute.test.io
+ version: v1
+ kind: VirtualMachine
+ dependencies:
+ - apiExportRef:
+ path: root:network-provider
+ name: network.test.io
+ group: network.test.io
+ version: v1
+ resource: vpcs
+ fieldRef:
+ path: ".spec.vpcRef.name"
diff --git a/test/fixtures/dependencyrule-vm-dependencies.yaml b/test/fixtures/dependencyrule-vm-dependencies.yaml
index 5975178..cc1ec0c 100644
--- a/test/fixtures/dependencyrule-vm-dependencies.yaml
+++ b/test/fixtures/dependencyrule-vm-dependencies.yaml
@@ -8,6 +8,7 @@ spec:
group: compute.test.io
version: v1
kind: VirtualMachine
+ resource: virtualmachines
dependencies:
- apiExportRef:
path: ${NETWORK_PROVIDER_PATH}
diff --git a/test/fixtures/integration-values-kcp.yaml b/test/fixtures/integration-values-kcp.yaml
deleted file mode 100644
index 5f4ba1d..0000000
--- a/test/fixtures/integration-values-kcp.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Values for integration testing — kcp deployed in kind via helm chart.
-externalHostname: "kcp-front-proxy.kcp-system.svc.cluster.local"
-externalPort: "8443"
-
-certificates:
- dnsNames:
- - localhost
- - kcp-front-proxy.kcp-system.svc.cluster.local
- - kcp.kcp-system.svc.cluster.local
-
-etcd:
- replicas: 1
- resources:
- requests:
- memory: 256Mi
- cpu: 100m
- limits:
- memory: 512Mi
- volumeSize: 1Gi
-
-kcp:
- volumeClassName: standard
- extraFlags:
- - "--shard-base-url=https://kcp.kcp-system.svc.cluster.local:6443"
- resources:
- requests:
- memory: 512Mi
- cpu: 200m
- limits:
- memory: 1Gi
-
-kcpFrontProxy:
- service:
- type: NodePort
- nodePort: 31500
diff --git a/test/fixtures/integration-values.yaml b/test/fixtures/integration-values.yaml
index dee598b..9161218 100644
--- a/test/fixtures/integration-values.yaml
+++ b/test/fixtures/integration-values.yaml
@@ -1,10 +1,12 @@
-# Values for integration testing — both components deployed in kind, connecting to kcp server directly.
+# Values for integration testing — both components deployed in kind,
+# connecting to kcp via the front-proxy.
image:
repository: dependency-controller
tag: integration-test
pullPolicy: Never
-kcpBaseHost: "https://kcp.kcp-system.svc.cluster.local:6443"
+# kcpBaseHost is set dynamically via --set in the e2e test to match
+# the actual front-proxy URL from kcp-operator.
controller:
kubeconfig:
diff --git a/test/fixtures/kcp-admin-cert.yaml b/test/fixtures/kcp-admin-cert.yaml
deleted file mode 100644
index 1fa9950..0000000
--- a/test/fixtures/kcp-admin-cert.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-# Client certificates for kcp admin access.
-#
-# The front-proxy cert is used by the test script (host) to manage workspaces
-# via the front-proxy NodePort.
-# The server cert is used by in-cluster pods (controller/webhook) to connect
-# directly to the kcp server, including virtual workspace endpoints.
----
-apiVersion: cert-manager.io/v1
-kind: Certificate
-metadata:
- name: kcp-admin-front-proxy-cert
- namespace: kcp-system
-spec:
- commonName: kcp-admin
- issuerRef:
- name: kcp-front-proxy-client-issuer
- privateKey:
- algorithm: RSA
- size: 2048
- secretName: kcp-admin-front-proxy-cert
- subject:
- organizations:
- - "system:kcp:admin"
- usages:
- - client auth
----
-apiVersion: cert-manager.io/v1
-kind: Certificate
-metadata:
- name: kcp-admin-server-cert
- namespace: kcp-system
-spec:
- commonName: kcp-admin
- issuerRef:
- name: kcp-client-issuer
- privateKey:
- algorithm: RSA
- size: 2048
- secretName: kcp-admin-server-cert
- subject:
- organizations:
- - "system:kcp:admin"
- usages:
- - client auth
diff --git a/test/fixtures/kcp-bootstrap-cert.yaml b/test/fixtures/kcp-bootstrap-cert.yaml
deleted file mode 100644
index 5066b73..0000000
--- a/test/fixtures/kcp-bootstrap-cert.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-# Bootstrap cert with system:masters for e2e setup only.
-# Used to seed initial RBAC grants in system:master workspace.
-# NOT used by any deployed component.
-apiVersion: cert-manager.io/v1
-kind: Certificate
-metadata:
- name: kcp-bootstrap-cert
- namespace: kcp-system
-spec:
- commonName: "e2e-bootstrap"
- issuerRef:
- name: kcp-client-issuer
- privateKey:
- algorithm: RSA
- size: 2048
- secretName: kcp-bootstrap-cert
- subject:
- organizations:
- - "system:masters"
- usages:
- - client auth
diff --git a/test/fixtures/kcp-controller-cert.yaml b/test/fixtures/kcp-controller-cert.yaml
deleted file mode 100644
index 1cb3fee..0000000
--- a/test/fixtures/kcp-controller-cert.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-# Client certificate for the dependency-controller service account.
-# Issued by kcp's client CA so the controller can connect to the kcp server
-# directly (not via front-proxy).
-apiVersion: cert-manager.io/v1
-kind: Certificate
-metadata:
- name: kcp-controller-cert
- namespace: kcp-system
-spec:
- commonName: "system:serviceaccount:dependency-system:dependency-controller"
- issuerRef:
- name: kcp-client-issuer
- privateKey:
- algorithm: RSA
- size: 2048
- secretName: kcp-controller-cert
- subject:
- organizations:
- - "system:serviceaccounts"
- - "system:serviceaccounts:dependency-system"
- usages:
- - client auth
diff --git a/test/fixtures/kcp-server-nodeport.yaml b/test/fixtures/kcp-server-nodeport.yaml
deleted file mode 100644
index 6d3b0da..0000000
--- a/test/fixtures/kcp-server-nodeport.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-# NodePort service exposing the kcp server directly (not the front-proxy).
-# VW paths (/services/apiexport/...) are only served by the kcp server,
-# so e2e tests need direct access to verify RBAC via VW requests.
-apiVersion: v1
-kind: Service
-metadata:
- name: kcp-server-nodeport
- namespace: kcp-system
-spec:
- type: NodePort
- ports:
- - protocol: TCP
- name: kcp
- port: 6443
- targetPort: 6443
- nodePort: 31501
- selector:
- app.kubernetes.io/name: kcp
- app.kubernetes.io/instance: kcp
- app.kubernetes.io/component: server
diff --git a/test/fixtures/kcp-webhook-sa-cert.yaml b/test/fixtures/kcp-webhook-sa-cert.yaml
deleted file mode 100644
index eefbf66..0000000
--- a/test/fixtures/kcp-webhook-sa-cert.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-# Client certificate for the dependency-webhook service account.
-# Issued by kcp's client CA so the webhook can connect to the kcp server
-# directly. The CN and orgs encode the SA identity that kcp uses for
-# authorization (matching the bootstrap RBAC subjects in root, dep-ctrl,
-# and system:admin).
-apiVersion: cert-manager.io/v1
-kind: Certificate
-metadata:
- name: kcp-webhook-sa-cert
- namespace: kcp-system
-spec:
- commonName: "system:serviceaccount:dependency-system:dependency-webhook"
- issuerRef:
- name: kcp-client-issuer
- privateKey:
- algorithm: RSA
- size: 2048
- secretName: kcp-webhook-sa-cert
- subject:
- organizations:
- - "system:serviceaccounts"
- - "system:serviceaccounts:dependency-system"
- usages:
- - client auth
diff --git a/test/fixtures/kind-config.yaml b/test/fixtures/kind-config.yaml
index a28c932..a6692b8 100644
--- a/test/fixtures/kind-config.yaml
+++ b/test/fixtures/kind-config.yaml
@@ -3,9 +3,6 @@ apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
extraPortMappings:
- - containerPort: 31500
- hostPort: 31500
- protocol: TCP
- - containerPort: 31501
- hostPort: 31501
+ - containerPort: 31443
+ hostPort: 31443
protocol: TCP
diff --git a/test/fixtures/root-rbac-bootstrap.yaml b/test/fixtures/root-rbac-bootstrap.yaml
index f9a27e0..2d381ee 100644
--- a/test/fixtures/root-rbac-bootstrap.yaml
+++ b/test/fixtures/root-rbac-bootstrap.yaml
@@ -1,10 +1,11 @@
-# RBAC in the root workspace for the dependency-controller system.
-# Applied during setup using a bootstrap (system:masters) identity.
+# RBAC in the root workspace for the dependency-controller.
+# Applied during setup using a system:kcp:admin identity via front-proxy.
#
-# Both components need workspaces/content access to enter the dep-ctrl
-# workspace (where their kubeconfig points). Webhook installation and
-# RBAC management in provider workspaces is done through the dep-ctrl
-# APIExport's virtual workspace, authorized by permissionClaims.
+# Only the controller has rules here. The webhook's broad get/list access
+# lives in the system:admin workspace of the root shard (see
+# system-admin-rbac-bootstrap.yaml) and propagates shard-wide. Webhook
+# installation and RBAC management in provider workspaces is done through
+# the dep-ctrl APIExport's virtual workspace, authorized by permissionClaims.
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
@@ -32,26 +33,3 @@ subjects:
- apiGroup: rbac.authorization.k8s.io
kind: User
name: "system:serviceaccount:dependency-system:dependency-controller"
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: dependency-controller-webhook
-rules:
- # Enter child workspaces through virtual workspace endpoints.
- - apiGroups: ["core.kcp.io"]
- resources: ["workspaces/content"]
- verbs: ["access"]
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: dependency-controller-webhook
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: dependency-controller-webhook
-subjects:
- - apiGroup: rbac.authorization.k8s.io
- kind: User
- name: "system:serviceaccount:dependency-system:dependency-webhook"
diff --git a/test/fixtures/shard-admin-rbac-bootstrap.yaml b/test/fixtures/shard-admin-rbac-bootstrap.yaml
deleted file mode 100644
index 0c01f6f..0000000
--- a/test/fixtures/shard-admin-rbac-bootstrap.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-# RBAC in kcp's system:admin logical cluster, granting the webhook SA
-# apiexports/content and apiexportendpointslices read access across all
-# workspaces on the shard. Applied via the kcp server (not front-proxy)
-# using a bootstrap (system:masters) identity.
-#
-# system:admin RBAC is evaluated by the Bootstrap Policy Authorizer for
-# every request on the shard, so this single grant replaces the per-workspace
-# ClusterRole/ClusterRoleBinding that the controller previously managed.
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
- name: dependency-controller-webhook-apiexport-reader
-rules:
- - apiGroups: ["apis.kcp.io"]
- resources: ["apiexports/content"]
- verbs: ["get", "list", "watch"]
- - apiGroups: ["apis.kcp.io"]
- resources: ["apiexportendpointslices"]
- verbs: ["get", "list", "watch"]
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
- name: dependency-controller-webhook-apiexport-reader
-roleRef:
- apiGroup: rbac.authorization.k8s.io
- kind: ClusterRole
- name: dependency-controller-webhook-apiexport-reader
-subjects:
- - apiGroup: rbac.authorization.k8s.io
- kind: User
- name: "system:serviceaccount:dependency-system:dependency-webhook"
diff --git a/test/fixtures/system-admin-rbac-bootstrap.yaml b/test/fixtures/system-admin-rbac-bootstrap.yaml
new file mode 100644
index 0000000..62f7992
--- /dev/null
+++ b/test/fixtures/system-admin-rbac-bootstrap.yaml
@@ -0,0 +1,32 @@
+# RBAC for the dependency-controller webhook in the system:admin workspace.
+# Applied during setup using a system:masters identity over a direct
+# (port-forwarded) connection to each shard. The system:admin workspace
+# is not reachable via the front-proxy.
+#
+# kcp's BootstrapPolicyAuthorizer reads RBAC from the local shard's
+# system:admin only — bindings do not propagate across shards — so this
+# fixture is applied once per shard. Within a shard the binding applies
+# to every workspace on that shard, so the webhook can get/list dependent
+# resources in any consumer workspace without per-workspace RBAC.
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+ name: dependency-controller-webhook
+rules:
+ - apiGroups: ["*"]
+ resources: ["*"]
+ verbs: ["get", "list"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: dependency-controller-webhook
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: dependency-controller-webhook
+subjects:
+ - apiGroup: rbac.authorization.k8s.io
+ kind: User
+ name: "system:serviceaccount:dependency-system:dependency-webhook"
diff --git a/test/fixtures/vm-shard-vm.yaml b/test/fixtures/vm-shard-vm.yaml
new file mode 100644
index 0000000..7b68ea1
--- /dev/null
+++ b/test/fixtures/vm-shard-vm.yaml
@@ -0,0 +1,9 @@
+apiVersion: compute.test.io/v1
+kind: VirtualMachine
+metadata:
+ name: shard-vm
+ namespace: default
+spec:
+ cpu: 2
+ vpcRef:
+ name: shard-vpc
diff --git a/test/fixtures/vm-update-vm.yaml b/test/fixtures/vm-update-vm.yaml
new file mode 100644
index 0000000..c4bce12
--- /dev/null
+++ b/test/fixtures/vm-update-vm.yaml
@@ -0,0 +1,9 @@
+apiVersion: compute.test.io/v1
+kind: VirtualMachine
+metadata:
+ name: update-vm
+ namespace: default
+spec:
+ cpu: 1
+ vpcRef:
+ name: update-vpc
diff --git a/test/fixtures/vpc-shard-vpc.yaml b/test/fixtures/vpc-shard-vpc.yaml
new file mode 100644
index 0000000..9821979
--- /dev/null
+++ b/test/fixtures/vpc-shard-vpc.yaml
@@ -0,0 +1,7 @@
+apiVersion: network.test.io/v1
+kind: VPC
+metadata:
+ name: shard-vpc
+ namespace: default
+spec:
+ cidr: "10.3.0.0/16"
diff --git a/test/fixtures/vpc-update-vpc.yaml b/test/fixtures/vpc-update-vpc.yaml
new file mode 100644
index 0000000..d3ab117
--- /dev/null
+++ b/test/fixtures/vpc-update-vpc.yaml
@@ -0,0 +1,7 @@
+apiVersion: network.test.io/v1
+kind: VPC
+metadata:
+ name: update-vpc
+ namespace: default
+spec:
+ cidr: "10.5.0.0/16"