diff --git a/pkg/virtualkubelet/config.go b/pkg/virtualkubelet/config.go index b3ed38ff..6b9185c5 100644 --- a/pkg/virtualkubelet/config.go +++ b/pkg/virtualkubelet/config.go @@ -131,14 +131,36 @@ type PodCIDR struct { type Network struct { // EnableTunnel enables WebSocket tunneling for pod port exposure EnableTunnel bool `yaml:"EnableTunnel" default:"false"` + // TunnelType selects the port-forwarding backend: "" or "wstunnel" (default, backward-compatible) or "rathole". + TunnelType string `yaml:"TunnelType,omitempty"` // WildcardDNS specifies the DNS domain for generating tunnel endpoints WildcardDNS string `yaml:"WildcardDNS,omitempty"` // WSTunnelExecutableURL specifies the URL to download the wstunnel executable (default is "https://github.com/interlink-hq/interlink-artifacts/raw/main/wstunnel/v10.4.4/linux-amd64/wstunnel") WSTunnelExecutableURL string `yaml:"WSTunnelExecutable,omitempty"` - // WstunnelTemplatePath is the path to a custom wstunnel template file + // WstunnelTemplatePath is the path to a custom tunnel template file (applies to both wstunnel and rathole) WstunnelTemplatePath string `yaml:"WstunnelTemplatePath,omitempty"` // WstunnelCommand specifies the command template for setting up wstunnel clients WstunnelCommand string `yaml:"WstunnelCommand,omitempty"` + // RatholeExecutableURL specifies the URL to download the rathole executable zip archive + // (default is "https://github.com/rathole-org/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-gnu.zip") + RatholeExecutableURL string `yaml:"RatholeExecutableURL,omitempty"` + // RatholeCommand specifies a custom command template for rathole clients in TLS mode + // (i.e., when RatholeCAIssuerName is set). Five %s format verbs are substituted in order: + // the rathole download URL, base64-encoded CA cert, base64-encoded client cert, + // base64-encoded client key, and base64-encoded client TOML config. + // Default: DefaultRatholeCommand. + RatholeCommand string `yaml:"RatholeCommand,omitempty"` + // RatholeWSCommand specifies a custom command template for rathole clients in WebSocket fallback + // mode (i.e., when RatholeCAIssuerName is empty). Two %s format verbs are substituted in order: + // the rathole download URL and the base64-encoded client TOML config. + // Default: DefaultRatholeWSCommand. + RatholeWSCommand string `yaml:"RatholeWSCommand,omitempty"` + // RatholeCAIssuerName is the cert-manager ClusterIssuer or Issuer name for the admin-provided CA. + // When set, rathole uses TLS transport; cert-manager issues both the server and client certificates. + // A Traefik IngressRouteTCP resource is created to expose the rathole server via TLS on port 443. + RatholeCAIssuerName string `yaml:"RatholeCAIssuerName,omitempty"` + // RatholeCAIssuerKind is the kind of the cert-manager issuer: "ClusterIssuer" (default) or "Issuer". + RatholeCAIssuerKind string `yaml:"RatholeCAIssuerKind,omitempty"` // FullMesh enables full mesh networking with slirp4netns and WireGuard FullMesh bool `yaml:"FullMesh" default:"false"` // MeshScriptTemplatePath is the path to a custom mesh.sh template file diff --git a/pkg/virtualkubelet/config_test.go b/pkg/virtualkubelet/config_test.go index a800c6bf..9726396c 100644 --- a/pkg/virtualkubelet/config_test.go +++ b/pkg/virtualkubelet/config_test.go @@ -1,6 +1,7 @@ package virtualkubelet import ( + "strings" "testing" "github.com/stretchr/testify/assert" @@ -124,6 +125,38 @@ func TestNetwork_Configuration(t *testing.T) { assert.NotEmpty(t, network.WstunnelCommand) } +func TestNetwork_RatholeConfiguration(t *testing.T) { + network := Network{ + EnableTunnel: true, + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + RatholeExecutableURL: "https://example.com/rathole.zip", + // RatholeCommand is the TLS-mode template: 5 %s args (URL, CA cert, client cert, client key, client TOML) + RatholeCommand: "curl -L %s -o r.zip && unzip r.zip && echo %s | base64 -d > /tmp/ca.crt && echo %s | base64 -d > /tmp/cl.crt && echo %s | base64 -d > /tmp/cl.key && echo %s | base64 -d > /tmp/c.toml && ./rathole --client /tmp/c.toml &", + // RatholeWSCommand is the WebSocket-fallback template: 2 %s args (URL, client TOML) + RatholeWSCommand: "curl -L %s -o r.zip && unzip r.zip && echo %s | base64 -d > /tmp/c.toml && ./rathole --client /tmp/c.toml &", + } + + assert.True(t, network.EnableTunnel) + assert.Equal(t, "rathole", network.TunnelType) + assert.Equal(t, "tunnel.example.com", network.WildcardDNS) + assert.Equal(t, "https://example.com/rathole.zip", network.RatholeExecutableURL) + assert.NotEmpty(t, network.RatholeCommand) + assert.NotEmpty(t, network.RatholeWSCommand) + // Validate that RatholeCommand contains exactly 5 %s verbs (TLS mode) + assert.Equal(t, 5, strings.Count(network.RatholeCommand, "%s"), "RatholeCommand must have exactly 5 %%s format verbs for TLS mode") + // Validate that RatholeWSCommand contains exactly 2 %s verbs (WebSocket fallback) + assert.Equal(t, 2, strings.Count(network.RatholeWSCommand, "%s"), "RatholeWSCommand must have exactly 2 %%s format verbs for WebSocket mode") +} + +func TestNetwork_WstunnelDefaultTunnelType(t *testing.T) { + // Empty TunnelType means wstunnel (backward-compatible default) + network := Network{ + EnableTunnel: true, + } + assert.Empty(t, network.TunnelType, "empty TunnelType should default to wstunnel behaviour") +} + func TestAccelerator_AvailableIsKubernetesQuantity(t *testing.T) { tests := []struct { name string diff --git a/pkg/virtualkubelet/mesh.go b/pkg/virtualkubelet/mesh.go index e7dac2b4..345962c8 100644 --- a/pkg/virtualkubelet/mesh.go +++ b/pkg/virtualkubelet/mesh.go @@ -235,7 +235,8 @@ func deriveWGPublicKey(privB64 string) (string, error) { return base64.StdEncoding.EncodeToString(pubRaw), nil } -// addWstunnelClientAnnotation adds the wstunnel client command annotation to the original pod +// addWstunnelClientAnnotation adds the tunnel client command annotation to the original pod. +// In rathole mode it writes a rathole client command; otherwise it writes a wstunnel command. func (p *Provider) addWstunnelClientAnnotation(ctx context.Context, pod *v1.Pod, td *WstunnelTemplateData) error { if pod.Annotations == nil { pod.Annotations = make(map[string]string) @@ -252,7 +253,8 @@ func (p *Provider) addWstunnelClientAnnotation(ctx context.Context, pod *v1.Pod, clearConflictingNetworkAnnotations(pod, fullMeshEnabledForPod) // Check if FullMesh mode is enabled and not disabled for this specific pod - if fullMeshEnabledForPod { + switch { + case fullMeshEnabledForPod: log.G(ctx).Infof("FullMesh mode enabled, generating pre-exec script for pod %s/%s", pod.Namespace, pod.Name) // Generate full mesh script @@ -289,10 +291,116 @@ PersistentKeepalive = %d pod.Annotations["interlink.eu/wireguard-client-snippet"] = wgSnippet - } else { + case p.config.Network.TunnelType == tunnelTypeRathole: + // Rathole mode: build a client TOML config and generate the client bootstrap command. + // When RatholeCAIssuerName is set, use TLS transport with cert-manager-issued certificates; + // otherwise fall back to WebSocket transport for backward compatibility. + ratholeEndpoint := fmt.Sprintf("rathole-%s.%s", td.Name, td.WildcardDNS) + ratholeEndpoint = sanitizeFullDNSName(ratholeEndpoint) + if td.WildcardDNS == "" { + ratholeEndpoint = td.Name + } + + ratholeURL := p.config.Network.RatholeExecutableURL + if ratholeURL == "" { + ratholeURL = DefaultRatholeExecutableURL + } + + var mainCmd string + + if p.config.Network.RatholeCAIssuerName != "" { + // TLS mode: rathole client uses TLS transport; Traefik terminates TLS at port 443. + // Wait for the client certificate secret to be issued by cert-manager. + clientCertSecretName := td.Name + "-rathole-client-tls" + if err := p.waitForRatholeCertSecret(ctx, clientCertSecretName, td.Namespace); err != nil { + return fmt.Errorf("rathole client certificate not ready: %w", err) + } + + certSecret, err := p.clientSet.CoreV1().Secrets(td.Namespace).Get(ctx, clientCertSecretName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to read rathole client certificate secret: %w", err) + } + + // Validate all required keys are present and non-empty. + for _, key := range []string{"ca.crt", "tls.crt", "tls.key"} { + if len(certSecret.Data[key]) == 0 { + return fmt.Errorf("rathole client certificate secret %s/%s is missing required key %q", td.Namespace, clientCertSecretName, key) + } + } + + caCrtB64 := base64.StdEncoding.EncodeToString(certSecret.Data["ca.crt"]) + clientCrtB64 := base64.StdEncoding.EncodeToString(certSecret.Data["tls.crt"]) + clientKeyB64 := base64.StdEncoding.EncodeToString(certSecret.Data["tls.key"]) + + var tomlBuilder strings.Builder + fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:443\"\n\n", ratholeEndpoint) + tomlBuilder.WriteString("[client.transport]\ntype = \"tls\"\n\n") + tomlBuilder.WriteString("[client.transport.tls]\n") + fmt.Fprintf(&tomlBuilder, "hostname = \"%s\"\n", ratholeEndpoint) + tomlBuilder.WriteString("trusted_root = \"/tmp/rathole-ca.crt\"\n") + tomlBuilder.WriteString("cert = \"/tmp/rathole-client.crt\"\n") + tomlBuilder.WriteString("key = \"/tmp/rathole-client.key\"\n\n") + for _, port := range td.ExposedPorts { + if strings.ToUpper(port.Protocol) == protocolUDP { + log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (TLS transport forwards TCP only)", port.Port) + continue + } + fmt.Fprintf(&tomlBuilder, "[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", + port.Port, td.RandomPassword, port.Port) + } + + configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) + + ratholeCmd := p.config.Network.RatholeCommand + if ratholeCmd == "" { + ratholeCmd = DefaultRatholeCommand + } + // Validate that the TLS command template has exactly 5 %s format verbs + // (URL, CA cert, client cert, client key, client TOML). + if strings.Count(ratholeCmd, "%s") != 5 { + return fmt.Errorf("RatholeCommand must have exactly 5 %%s format verbs (url, ca, cert, key, toml); got %d in %q", + strings.Count(ratholeCmd, "%s"), p.config.Network.RatholeCommand) + } + mainCmd = fmt.Sprintf(ratholeCmd, ratholeURL, caCrtB64, clientCrtB64, clientKeyB64, configB64) + } else { + // WebSocket fallback (no CA issuer configured) + log.G(ctx).Debugf("RatholeCAIssuerName not set; using WebSocket transport for pod %s/%s", pod.Namespace, pod.Name) + + var tomlBuilder strings.Builder + fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:80\"\n\n", ratholeEndpoint) + tomlBuilder.WriteString("[client.transport]\ntype = \"websocket\"\n\n") + for _, port := range td.ExposedPorts { + if strings.ToUpper(port.Protocol) == protocolUDP { + log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (websocket transport forwards TCP only)", port.Port) + continue + } + fmt.Fprintf(&tomlBuilder, "[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", + port.Port, td.RandomPassword, port.Port) + } + + configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) + + ratholeWSCmd := p.config.Network.RatholeWSCommand + if ratholeWSCmd == "" { + ratholeWSCmd = DefaultRatholeWSCommand + } + // Validate that the WebSocket command template has exactly 2 %s format verbs + // (URL, client TOML). + if strings.Count(ratholeWSCmd, "%s") != 2 { + return fmt.Errorf("RatholeWSCommand must have exactly 2 %%s format verbs (url, toml); got %d in %q", + strings.Count(ratholeWSCmd, "%s"), p.config.Network.RatholeWSCommand) + } + mainCmd = fmt.Sprintf(ratholeWSCmd, ratholeURL, configB64) + } + + // Remove any stale wstunnel annotation and set the rathole one + delete(pod.Annotations, annWSTunnelClientCmds) + pod.Annotations[annRatholeClientCmds] = mainCmd + + default: var rOptions []string for _, port := range td.ExposedPorts { - if strings.ToUpper(port.Protocol) == "UDP" { + if strings.ToUpper(port.Protocol) == protocolUDP { continue } rOptions = append(rOptions, fmt.Sprintf("-R tcp://0.0.0.0:%d:localhost:%d", port.Port, port.Port)) @@ -347,8 +455,8 @@ PersistentKeepalive = %d // clearConflictingNetworkAnnotations removes generated annotations that are specific to // the opposite network mode to keep pod network bootstrap behavior uniform. -// When fullMeshEnabledForPod is true, any stale wstunnel client command annotation is removed. -// When false, any stale WireGuard snippet annotation is removed. +// When fullMeshEnabledForPod is true, any stale wstunnel and rathole client command annotations +// are removed. When false, any stale WireGuard snippet annotation is removed. func clearConflictingNetworkAnnotations(pod *v1.Pod, fullMeshEnabledForPod bool) { if pod == nil || pod.Annotations == nil { return @@ -356,6 +464,7 @@ func clearConflictingNetworkAnnotations(pod *v1.Pod, fullMeshEnabledForPod bool) if fullMeshEnabledForPod { delete(pod.Annotations, annWSTunnelClientCmds) + delete(pod.Annotations, annRatholeClientCmds) return } diff --git a/pkg/virtualkubelet/mesh_annotations_test.go b/pkg/virtualkubelet/mesh_annotations_test.go index 1ac13e1c..315ed81d 100644 --- a/pkg/virtualkubelet/mesh_annotations_test.go +++ b/pkg/virtualkubelet/mesh_annotations_test.go @@ -27,6 +27,24 @@ func TestClearConflictingNetworkAnnotations(t *testing.T) { assert.Equal(t, "value", pod.Annotations["keep"]) }) + t.Run("full mesh also removes rathole command annotation", func(t *testing.T) { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + annRatholeClientCmds: "rathole-command", + annWGClientSnippet: "wireguard-snippet", + "keep": "value", + }, + }, + } + + clearConflictingNetworkAnnotations(pod, true) + + assert.NotContains(t, pod.Annotations, annRatholeClientCmds) + assert.Contains(t, pod.Annotations, annWGClientSnippet) + assert.Equal(t, "value", pod.Annotations["keep"]) + }) + t.Run("non mesh removes wireguard snippet annotation", func(t *testing.T) { pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/virtualkubelet/rathole_test.go b/pkg/virtualkubelet/rathole_test.go new file mode 100644 index 00000000..ad63cbb9 --- /dev/null +++ b/pkg/virtualkubelet/rathole_test.go @@ -0,0 +1,299 @@ +package virtualkubelet + +import ( + "context" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" +) + +// TestRatholeTemplateExecution verifies that the built-in rathole template can be +// loaded and executed without errors when TunnelType is "rathole". +func TestRatholeTemplateExecution(t *testing.T) { + p := &Provider{ + config: Config{ + Network: Network{ + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + }, + }, + clientSet: fake.NewClientset(), + } + + data := WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "abc123", + WildcardDNS: "tunnel.example.com", + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + {Port: 9090, Name: "metrics", Protocol: "TCP"}, + }, + } + + ctx := context.Background() + yaml, err := p.executeWstunnelTemplate(ctx, data) + require.NoError(t, err) + assert.NotEmpty(t, yaml) + + // Verify the rendered YAML contains rathole-specific markers + assert.Contains(t, yaml, "rathole-config", "ConfigMap name should reference rathole") + assert.Contains(t, yaml, "rapiz1/rathole", "should use the default rathole image") + assert.Contains(t, yaml, "bind_addr = \"0.0.0.0:2333\"", "server control port") + assert.Contains(t, yaml, "token = \"abc123\"", "token from RandomPassword") + assert.Contains(t, yaml, "bind_addr = \"0.0.0.0:8080\"", "port 8080 should be forwarded") + assert.Contains(t, yaml, "bind_addr = \"0.0.0.0:9090\"", "port 9090 should be forwarded") + + // The nginx Ingress is no longer part of the template; TLS ingress is managed separately + // via the Traefik IngressRouteTCP applied by applyRatholeTLSResources. + assert.NotContains(t, yaml, "nginx.ingress.kubernetes.io", "nginx Ingress should not be in the rathole template without HasNginxIngress") + // Plain TCP server — no WebSocket transport section + assert.NotContains(t, yaml, "type = \"websocket\"", "server should use plain TCP, not WebSocket") +} + +// TestRatholeTemplateWebSocketMode verifies that the rathole template includes a nginx Ingress +// when HasNginxIngress is true (WebSocket fallback mode, no CA issuer configured). +func TestRatholeTemplateWebSocketMode(t *testing.T) { + p := &Provider{ + config: Config{ + Network: Network{ + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + }, + }, + clientSet: fake.NewClientset(), + } + + data := WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "abc123", + WildcardDNS: "tunnel.example.com", + HasNginxIngress: true, + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + }, + } + + ctx := context.Background() + yaml, err := p.executeWstunnelTemplate(ctx, data) + require.NoError(t, err) + assert.NotEmpty(t, yaml) + + // WebSocket mode: nginx Ingress should be present so the client can reach port 80 + assert.Contains(t, yaml, "nginx.ingress.kubernetes.io", "nginx Ingress should be present in WebSocket mode") + assert.Contains(t, yaml, "rathole-my-pod-default.tunnel.example.com", "Ingress host should match rathole DNS") +} + +// TestWstunnelTemplateUnchanged verifies that the existing wstunnel template is still +// selected when TunnelType is empty (backward-compatible default). +func TestWstunnelTemplateUnchanged(t *testing.T) { + p := &Provider{ + config: Config{ + Network: Network{ + // TunnelType deliberately empty → wstunnel + WildcardDNS: "tunnel.example.com", + }, + }, + clientSet: fake.NewClientset(), + } + + data := WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "abc123", + WildcardDNS: "tunnel.example.com", + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + }, + } + + ctx := context.Background() + yaml, err := p.executeWstunnelTemplate(ctx, data) + require.NoError(t, err) + assert.NotEmpty(t, yaml) + + // The default wstunnel template should not contain rathole markers + assert.NotContains(t, yaml, "rathole-config") + assert.Contains(t, yaml, "wstunnel", "should use wstunnel image/command") +} + +// TestRatholeClientAnnotation verifies that addWstunnelClientAnnotation sets the rathole +// annotation and removes any stale wstunnel annotation when using the WebSocket fallback +// (RatholeCAIssuerName not set). +func TestRatholeClientAnnotation(t *testing.T) { + fakeClient := fake.NewClientset() + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + Annotations: map[string]string{ + // Simulate a stale wstunnel annotation from a previous run + annWSTunnelClientCmds: "old-wstunnel-cmd", + }, + }, + } + // Create the pod in the fake client so Patch succeeds + _, err := fakeClient.CoreV1().Pods(pod.Namespace).Create(context.Background(), pod, metav1.CreateOptions{}) + require.NoError(t, err) + + p := &Provider{ + config: Config{ + Network: Network{ + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + // RatholeCAIssuerName intentionally left empty → WebSocket fallback + }, + }, + clientSet: fakeClient, + } + + td := &WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "secrettoken", + WildcardDNS: "tunnel.example.com", + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + }, + } + + err = p.addWstunnelClientAnnotation(context.Background(), pod, td) + require.NoError(t, err) + + // The rathole annotation should be set + ratholeCmd, ok := pod.Annotations[annRatholeClientCmds] + assert.True(t, ok, "rathole client command annotation should be present") + assert.NotEmpty(t, ratholeCmd) + assert.Contains(t, ratholeCmd, DefaultRatholeExecutableURL, "should embed the default rathole URL") + // The base64-encoded client config should be included + assert.True(t, strings.Contains(ratholeCmd, "base64"), "command should decode a base64 client config") + + // The stale wstunnel annotation should be removed + _, wstunnelPresent := pod.Annotations[annWSTunnelClientCmds] + assert.False(t, wstunnelPresent, "stale wstunnel annotation should be cleared in rathole mode") +} + +// TestRatholeClientAnnotationTLS verifies that addWstunnelClientAnnotation produces a TLS-mode +// bootstrap command when RatholeCAIssuerName is configured and the cert-manager secret is present. +func TestRatholeClientAnnotationTLS(t *testing.T) { + fakeClient := fake.NewClientset() + + // Pre-create the cert-manager-issued client certificate secret (normally done by cert-manager) + clientCertSecret := &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-pod-default-rathole-client-tls", + Namespace: "default-wstunnel", + }, + Data: map[string][]byte{ + "ca.crt": []byte("fake-ca-cert"), + "tls.crt": []byte("fake-client-cert"), + "tls.key": []byte("fake-client-key"), + }, + } + _, err := fakeClient.CoreV1().Secrets(clientCertSecret.Namespace).Create(context.Background(), clientCertSecret, metav1.CreateOptions{}) + require.NoError(t, err) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + Annotations: map[string]string{}, + }, + } + _, err = fakeClient.CoreV1().Pods(pod.Namespace).Create(context.Background(), pod, metav1.CreateOptions{}) + require.NoError(t, err) + + p := &Provider{ + config: Config{ + Network: Network{ + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + RatholeCAIssuerName: "my-admin-ca", + }, + }, + clientSet: fakeClient, + } + + td := &WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "secrettoken", + WildcardDNS: "tunnel.example.com", + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + }, + } + + err = p.addWstunnelClientAnnotation(context.Background(), pod, td) + require.NoError(t, err) + + ratholeCmd, ok := pod.Annotations[annRatholeClientCmds] + require.True(t, ok, "rathole client command annotation should be present") + assert.NotEmpty(t, ratholeCmd) + + // TLS command should reference the default rathole download URL + assert.Contains(t, ratholeCmd, DefaultRatholeExecutableURL) + + // TLS command should write four distinct base64-decoded files: CA cert, client cert, client key, client TOML + assert.Contains(t, ratholeCmd, "rathole-ca.crt", "command should write CA cert file") + assert.Contains(t, ratholeCmd, "rathole-client.crt", "command should write client cert file") + assert.Contains(t, ratholeCmd, "rathole-client.key", "command should write client key file") + assert.Contains(t, ratholeCmd, "rathole-client.toml", "command should write client TOML file") + + // The stale wstunnel annotation must not be present + _, wstunnelPresent := pod.Annotations[annWSTunnelClientCmds] + assert.False(t, wstunnelPresent, "stale wstunnel annotation should be cleared in rathole TLS mode") +} + +// TestRatholeClientAnnotationCustomCommand verifies that a custom RatholeCommand template +// is honoured in WebSocket fallback mode (RatholeCAIssuerName not set). +func TestRatholeClientAnnotationCustomCommand(t *testing.T) { + fakeClient := fake.NewClientset() + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + Annotations: map[string]string{}, + }, + } + _, err := fakeClient.CoreV1().Pods(pod.Namespace).Create(context.Background(), pod, metav1.CreateOptions{}) + require.NoError(t, err) + + customCmd := "my-custom-rathole-installer %s && my-custom-start %s &" + p := &Provider{ + config: Config{ + Network: Network{ + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + RatholeWSCommand: customCmd, + // RatholeCAIssuerName intentionally empty → WebSocket fallback uses RatholeWSCommand + }, + }, + clientSet: fakeClient, + } + + td := &WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "token", + WildcardDNS: "tunnel.example.com", + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + }, + } + + err = p.addWstunnelClientAnnotation(context.Background(), pod, td) + require.NoError(t, err) + + ratholeCmd, ok := pod.Annotations[annRatholeClientCmds] + assert.True(t, ok) + assert.Contains(t, ratholeCmd, "my-custom-rathole-installer", "custom command template should be used") +} diff --git a/pkg/virtualkubelet/templates/rathole-template.yaml b/pkg/virtualkubelet/templates/rathole-template.yaml new file mode 100644 index 00000000..2314a1f2 --- /dev/null +++ b/pkg/virtualkubelet/templates/rathole-template.yaml @@ -0,0 +1,132 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{.Name}}-rathole-config + namespace: {{.Namespace}} +data: + server.toml: | + [server] + bind_addr = "0.0.0.0:2333" + {{- range .ExposedPorts}} + {{- if ne .Protocol "UDP"}} + + [server.services.p{{.Port}}] + token = "{{$.RandomPassword}}" + bind_addr = "0.0.0.0:{{.Port}}" + {{- end}} + {{- end}} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{.Name}} + namespace: {{.Namespace}} +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: {{.Name}} + template: + metadata: + labels: + app.kubernetes.io/component: {{.Name}} + spec: + containers: + - name: rathole + # Docker Hub image published by https://github.com/rathole-org/rathole CI + image: rapiz1/rathole:v0.5.0 + imagePullPolicy: IfNotPresent + args: ["--server", "/etc/rathole/server.toml"] + ports: + - containerPort: 2333 + name: control + protocol: TCP + {{- range .ExposedPorts}} + {{- if ne .Protocol "UDP"}} + - containerPort: {{.Port}} + name: {{if .Name}}{{.Name}}{{else}}port-{{.Port}}{{end}} + protocol: TCP + {{- end}} + {{- end}} + volumeMounts: + - name: rathole-config + mountPath: /etc/rathole + resources: + requests: + cpu: 100m + memory: 64Mi + readinessProbe: + tcpSocket: + port: 2333 + initialDelaySeconds: 2 + periodSeconds: 2 + failureThreshold: 10 + livenessProbe: + tcpSocket: + port: 2333 + initialDelaySeconds: 10 + periodSeconds: 10 + nodeSelector: + kubernetes.io/os: linux + volumes: + - name: rathole-config + configMap: + name: {{.Name}}-rathole-config +--- +apiVersion: v1 +kind: Service +metadata: + name: {{.Name}} + namespace: {{.Namespace}} +spec: + type: ClusterIP + selector: + app.kubernetes.io/component: {{.Name}} + ports: + - port: 2333 + targetPort: 2333 + name: control + protocol: TCP + {{- range .ExposedPorts}} + {{- if ne .Protocol "UDP"}} + {{- if ne .Port 2333}}{{/* skip control port 2333 to avoid conflicts with the rathole server listener */}} + - port: {{.Port}} + targetPort: {{.Port}} + name: {{if .Name}}{{.Name}}{{else}}port-{{.Port}}{{end}} + protocol: TCP + {{- end}} + {{- end}} + {{- end}} +{{- if .HasNginxIngress}} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{.Name}} + namespace: {{.Namespace}} + annotations: + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + nginx.ingress.kubernetes.io/server-snippets: | + location / { + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_http_version 1.1; + proxy_set_header X-Forwarded-For $remote_addr; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } + kubernetes.io/ingress.class: "nginx" +spec: + rules: + - host: rathole-{{.Name}}.{{.WildcardDNS}} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{.Name}} + port: + number: 2333 +{{- end}} diff --git a/pkg/virtualkubelet/virtualkubelet.go b/pkg/virtualkubelet/virtualkubelet.go index 7e5cbe65..b75659e0 100644 --- a/pkg/virtualkubelet/virtualkubelet.go +++ b/pkg/virtualkubelet/virtualkubelet.go @@ -33,7 +33,10 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/runtime/serializer" + "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/clientcmd" @@ -46,27 +49,44 @@ import ( //go:embed templates/wstunnel-template.yaml var defaultWstunnelTemplate embed.FS +//go:embed templates/rathole-template.yaml +var defaultRatholeTemplate embed.FS + //go:embed all:templates/mesh.sh var meshScriptTemplate embed.FS const ( - DefaultCPUCapacity = "100" - DefaultMemoryCapacity = "3000G" - DefaultPodCapacity = "10000" - DefaultGPUCapacity = "0" - DefaultFPGACapacity = "0" - DefaultListenPort = 10250 - NamespaceKey = "namespace" - NameKey = "name" - CREATE = 0 - DELETE = 1 - nvidiaGPU = "nvidia.com/gpu" - amdGPU = "amd.com/gpu" - intelGPU = "intel.com/gpu" - xilinxFPGA = "xilinx.com/fpga" - intelFPGA = "intel.com/fpga" - DefaultProtocol = "TCP" + DefaultCPUCapacity = "100" + DefaultMemoryCapacity = "3000G" + DefaultPodCapacity = "10000" + DefaultGPUCapacity = "0" + DefaultFPGACapacity = "0" + DefaultListenPort = 10250 + NamespaceKey = "namespace" + NameKey = "name" + CREATE = 0 + DELETE = 1 + nvidiaGPU = "nvidia.com/gpu" + amdGPU = "amd.com/gpu" + intelGPU = "intel.com/gpu" + xilinxFPGA = "xilinx.com/fpga" + intelFPGA = "intel.com/fpga" + DefaultProtocol = "TCP" + // protocolUDP is the protocol string for UDP ports; used to skip UDP in tunnel client configs. + protocolUDP = "UDP" + // tunnelTypeRathole is the TunnelType value that selects the rathole port-forwarding backend. + tunnelTypeRathole = "rathole" DefaultWstunnelCommand = "curl -L -f -k https://github.com/erebe/wstunnel/releases/download/v10.4.4/wstunnel_10.4.4_linux_amd64.tar.gz -o wstunnel.tar.gz && tar -xzvf wstunnel.tar.gz && chmod +x wstunnel && ./wstunnel client --http-upgrade-path-prefix %s %s ws://%s:80 &" + // DefaultRatholeExecutableURL is the default URL to download the rathole executable zip archive. + // Source: https://github.com/rathole-org/rathole (note: x86_64 musl was dropped in v0.5.0; use gnu). + DefaultRatholeExecutableURL = "https://github.com/rathole-org/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-gnu.zip" + // DefaultRatholeCommand is the default command template for the rathole client in TLS mode. + // Five %s format verbs are substituted in order: rathole download URL, base64-encoded CA cert, + // base64-encoded client cert, base64-encoded client key, and base64-encoded client TOML config. + DefaultRatholeCommand = "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && chmod +x rathole && echo %s | base64 -d > /tmp/rathole-ca.crt && echo %s | base64 -d > /tmp/rathole-client.crt && echo %s | base64 -d > /tmp/rathole-client.key && echo %s | base64 -d > /tmp/rathole-client.toml && ./rathole --client /tmp/rathole-client.toml &" + // DefaultRatholeWSCommand is the fallback command template used when no CA issuer is configured + // (WebSocket transport, backward-compatible). Two %s args: download URL and base64 client TOML. + DefaultRatholeWSCommand = "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && chmod +x rathole && echo %s | base64 -d > /tmp/rathole-client.toml && ./rathole --client /tmp/rathole-client.toml &" ) // Annotations for WireGuard and WStunnel configuration @@ -76,10 +96,13 @@ const ( annWGMTU = "interlink.eu/wg-mtu" // optional, default 1280 annWgKeepaliveSeconds = "interlink.eu/wg-keepalive-seconds" // optional, default 25 annWSTunnelClientCmds = "interlink.eu/wstunnel-client-commands" + annRatholeClientCmds = "interlink.eu/rathole-client-commands" annWGClientSnippet = "interlink.eu/wireguard-client-snippet" annDisableOffloadContainers = "interlink.eu/disable-offload-containers" // comma-separated container names annDisableOffloadInitContainers = "interlink.eu/disable-offload-init-containers" // comma-separated init container names annMeshNetworkDisabled = "interlink.eu/mesh-network" // set to "disabled" to opt out of mesh networking + annShadowSameNS = "interlink.eu/shadow-same-ns" // set to "true" to create shadow resources in the same namespace + annShadowSameNSValue = "true" // expected value for annShadowSameNS ) type WstunnelTemplateData struct { @@ -98,6 +121,9 @@ type WstunnelTemplateData struct { Volumes []v1.Volume PodLabels map[string]string // Labels from original pod PodAnnotations map[string]string // Annotations from original pod + // HasNginxIngress controls whether the rathole template emits a nginx Ingress for + // WebSocket mode. It is true when TunnelType=="rathole" and RatholeCAIssuerName=="". + HasNginxIngress bool } type PortMapping struct { @@ -136,6 +162,7 @@ type Provider struct { notifier func(*v1.Pod) onNodeChangeCallback func(*v1.Node) clientSet kubernetes.Interface + dynamicClient dynamic.Interface clientHTTPTransport *http.Transport podIPs []string } @@ -739,7 +766,7 @@ func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1 isSameNamespace := false if originalPod.Annotations != nil { - if val, ok := originalPod.Annotations["interlink.eu/shadow-same-ns"]; ok && val == "true" { + if val, ok := originalPod.Annotations[annShadowSameNS]; ok && val == annShadowSameNSValue { isSameNamespace = true } } @@ -844,6 +871,9 @@ func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1 Volumes: extractVolumesForLocalContainers(originalPod), PodLabels: podLabels, PodAnnotations: podAnnotations, + // In rathole WebSocket mode (no TLS issuer), expose the rathole server via nginx Ingress so + // the compute-side client can reach port 80 over WebSocket. + HasNginxIngress: p.config.Network.TunnelType == tunnelTypeRathole && p.config.Network.RatholeCAIssuerName == "", } log.G(ctx).Debugf("LocalInitContainers count: %d", len(templateData.LocalInitContainers)) @@ -870,7 +900,16 @@ func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1 return nil, nil, fmt.Errorf("failed to apply wstunnel manifests: %w", err) } - log.G(ctx).Infof("Created wstunnel infrastructure for %s/%s", originalPod.Namespace, originalPod.Name) + // For rathole TLS mode, also create the cert-manager Certificates and Traefik IngressRouteTCP. + // This is a hard requirement: if TLS resource creation fails when RatholeCAIssuerName is set, + // the annotation path will block waiting for a cert secret that will never appear. + if p.config.Network.TunnelType == tunnelTypeRathole && p.config.Network.RatholeCAIssuerName != "" { + if tlsErr := p.applyRatholeTLSResources(ctx, templateData); tlsErr != nil { + return nil, nil, fmt.Errorf("failed to apply rathole TLS resources for %s/%s: %w", originalPod.Namespace, originalPod.Name, tlsErr) + } + } + + log.G(ctx).Infof("Created tunnel infrastructure (%s) for %s/%s", p.config.Network.TunnelType, originalPod.Namespace, originalPod.Name) return createdPod, &templateData, nil } @@ -944,11 +983,11 @@ func mergeMaps(dst, src map[string]string) map[string]string { return dst } -// executeWstunnelTemplate loads and executes the wstunnel template +// executeWstunnelTemplate loads and executes the tunnel template (wstunnel or rathole based on configuration) func (p *Provider) executeWstunnelTemplate(ctx context.Context, data WstunnelTemplateData) (string, error) { var templateContent string - // Try to load from custom path first + // Try to load from custom path first (applies to both wstunnel and rathole) if p.config.Network.WstunnelTemplatePath != "" { content, err := os.ReadFile(p.config.Network.WstunnelTemplatePath) if err != nil { @@ -958,11 +997,23 @@ func (p *Provider) executeWstunnelTemplate(ctx context.Context, data WstunnelTem } } - // Fall back to embedded template + // Fall back to the built-in template for the configured tunnel type if templateContent == "" { - content, err := defaultWstunnelTemplate.ReadFile("templates/wstunnel-template.yaml") - if err != nil { - return "", fmt.Errorf("failed to read embedded template: %w", err) + var ( + content []byte + err error + ) + if p.config.Network.TunnelType == tunnelTypeRathole { + content, err = defaultRatholeTemplate.ReadFile("templates/rathole-template.yaml") + if err != nil { + return "", fmt.Errorf("failed to read embedded rathole template: %w", err) + } + log.G(ctx).Info("Using built-in rathole template") + } else { + content, err = defaultWstunnelTemplate.ReadFile("templates/wstunnel-template.yaml") + if err != nil { + return "", fmt.Errorf("failed to read embedded template: %w", err) + } } templateContent = string(content) } @@ -1253,41 +1304,255 @@ func (p *Provider) waitForDeploymentPod(ctx context.Context, deploymentName, nam return nil, fmt.Errorf("no pod found for deployment %s within timeout", deploymentName) } -// cleanupWstunnelResources removes all wstunnel resources for a given name and namespace +// cleanupWstunnelResources removes all tunnel resources for a given name and namespace func (p *Provider) cleanupWstunnelResources(ctx context.Context, wstunnelName, namespace string) { - log.G(ctx).Infof("Cleaning up wstunnel resources for %s/%s", namespace, wstunnelName) + log.G(ctx).Infof("Cleaning up tunnel resources for %s/%s", namespace, wstunnelName) // Delete deployment err := p.clientSet.AppsV1().Deployments(namespace).Delete(ctx, wstunnelName, metav1.DeleteOptions{}) if err != nil { - log.G(ctx).Warningf("Failed to delete wstunnel deployment %s/%s: %v", namespace, wstunnelName, err) + log.G(ctx).Warningf("Failed to delete tunnel deployment %s/%s: %v", namespace, wstunnelName, err) } else { - log.G(ctx).Infof("Successfully deleted wstunnel deployment %s/%s", namespace, wstunnelName) + log.G(ctx).Infof("Successfully deleted tunnel deployment %s/%s", namespace, wstunnelName) } // Delete service err = p.clientSet.CoreV1().Services(namespace).Delete(ctx, wstunnelName, metav1.DeleteOptions{}) if err != nil { - log.G(ctx).Warningf("Failed to delete wstunnel service %s/%s: %v", namespace, wstunnelName, err) + log.G(ctx).Warningf("Failed to delete tunnel service %s/%s: %v", namespace, wstunnelName, err) } else { - log.G(ctx).Infof("Successfully deleted wstunnel service %s/%s", namespace, wstunnelName) + log.G(ctx).Infof("Successfully deleted tunnel service %s/%s", namespace, wstunnelName) } - // Delete ingress + // Delete ingress (nginx WebSocket ingress used in rathole WS-fallback mode or wstunnel mode). + // Ingress is absent in rathole TLS mode (Traefik IngressRouteTCP is used instead); suppress NotFound. err = p.clientSet.NetworkingV1().Ingresses(namespace).Delete(ctx, wstunnelName, metav1.DeleteOptions{}) - if err != nil { - log.G(ctx).Warningf("Failed to delete wstunnel ingress %s/%s: %v", namespace, wstunnelName, err) - } else { - log.G(ctx).Infof("Successfully deleted wstunnel ingress %s/%s", namespace, wstunnelName) + if err != nil && !apierrors.IsNotFound(err) { + log.G(ctx).Warningf("Failed to delete tunnel ingress %s/%s: %v", namespace, wstunnelName, err) + } else if err == nil { + log.G(ctx).Infof("Successfully deleted tunnel ingress %s/%s", namespace, wstunnelName) } - // Delete configmap + // Delete wstunnel wireguard configmap (used in full-mesh / wstunnel mode) err = p.clientSet.CoreV1().ConfigMaps(namespace).Delete(ctx, wstunnelName+"-wg-config", metav1.DeleteOptions{}) - if err != nil { + if err != nil && !apierrors.IsNotFound(err) { log.G(ctx).Warningf("Failed to delete wstunnel configmap %s/%s: %v", namespace, wstunnelName+"-wg-config", err) - } else { + } else if err == nil { log.G(ctx).Infof("Successfully deleted wstunnel configmap %s/%s", namespace, wstunnelName+"-wg-config") } + + // Delete rathole configmap (used in rathole mode) + err = p.clientSet.CoreV1().ConfigMaps(namespace).Delete(ctx, wstunnelName+"-rathole-config", metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + log.G(ctx).Warningf("Failed to delete rathole configmap %s/%s: %v", namespace, wstunnelName+"-rathole-config", err) + } else if err == nil { + log.G(ctx).Infof("Successfully deleted rathole configmap %s/%s", namespace, wstunnelName+"-rathole-config") + } + + // Delete rathole TLS resources (cert-manager Certificates and Traefik IngressRouteTCP) + if p.dynamicClient != nil { + for _, certName := range []string{wstunnelName + "-rathole-server-tls", wstunnelName + "-rathole-client-tls"} { + if delErr := p.deleteUnstructuredResource(ctx, certManagerCertGVR, certName, namespace); delErr != nil { + log.G(ctx).Warningf("Failed to delete rathole cert-manager Certificate %s/%s: %v", namespace, certName, delErr) + } else { + log.G(ctx).Infof("Deleted rathole cert-manager Certificate %s/%s", namespace, certName) + } + } + if delErr := p.deleteUnstructuredResource(ctx, traefikIngressRouteTCPGVR, wstunnelName, namespace); delErr != nil { + log.G(ctx).Warningf("Failed to delete rathole IngressRouteTCP %s/%s: %v", namespace, wstunnelName, delErr) + } else { + log.G(ctx).Infof("Deleted rathole IngressRouteTCP %s/%s", namespace, wstunnelName) + } + } +} + +// GroupVersionResource definitions for cert-manager and Traefik CRDs. +var ( + certManagerCertGVR = schema.GroupVersionResource{ + Group: "cert-manager.io", + Version: "v1", + Resource: "certificates", + } + traefikIngressRouteTCPGVR = schema.GroupVersionResource{ + Group: "traefik.io", + Version: "v1alpha1", + Resource: "ingressroutetcps", + } +) + +// applyUnstructuredResource creates or updates an unstructured Kubernetes resource via the dynamic client. +func (p *Provider) applyUnstructuredResource(ctx context.Context, gvr schema.GroupVersionResource, obj *unstructured.Unstructured) error { + if p.dynamicClient == nil { + return fmt.Errorf("dynamic client not initialised") + } + ns := obj.GetNamespace() + name := obj.GetName() + dr := p.dynamicClient.Resource(gvr).Namespace(ns) + + existing, err := dr.Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + _, err = dr.Create(ctx, obj, metav1.CreateOptions{}) + return err + } + if err != nil { + return err + } + obj.SetResourceVersion(existing.GetResourceVersion()) + _, err = dr.Update(ctx, obj, metav1.UpdateOptions{}) + return err +} + +// deleteUnstructuredResource deletes a namespaced unstructured resource; not-found errors are ignored. +func (p *Provider) deleteUnstructuredResource(ctx context.Context, gvr schema.GroupVersionResource, name, namespace string) error { + if p.dynamicClient == nil { + return nil + } + err := p.dynamicClient.Resource(gvr).Namespace(namespace).Delete(ctx, name, metav1.DeleteOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + return err +} + +// applyRatholeTLSResources creates the cert-manager Certificate resources (server and client) and the +// Traefik IngressRouteTCP that exposes the rathole server via TLS on the websecure entry point. +// The server TLS certificate is served by Traefik; the client certificate is issued so that +// addWstunnelClientAnnotation can embed it in the compute-side bootstrap command. +func (p *Provider) applyRatholeTLSResources(ctx context.Context, td WstunnelTemplateData) error { + if p.dynamicClient == nil { + return fmt.Errorf("dynamic client not initialised; cannot manage cert-manager/Traefik resources") + } + + issuerName := p.config.Network.RatholeCAIssuerName + issuerKind := p.config.Network.RatholeCAIssuerKind + if issuerKind == "" { + issuerKind = "ClusterIssuer" + } + + ratholeHost := fmt.Sprintf("rathole-%s.%s", td.Name, td.WildcardDNS) + ratholeHost = sanitizeFullDNSName(ratholeHost) + + serverCertName := td.Name + "-rathole-server-tls" + clientCertName := td.Name + "-rathole-client-tls" + + // cert-manager Certificate for the server (Traefik TLS termination) + serverCert := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "cert-manager.io/v1", + "kind": "Certificate", + "metadata": map[string]interface{}{ + "name": serverCertName, + "namespace": td.Namespace, + }, + "spec": map[string]interface{}{ + "secretName": serverCertName, + "dnsNames": []interface{}{ratholeHost}, + "issuerRef": map[string]interface{}{ + "name": issuerName, + "kind": issuerKind, + }, + }, + }, + } + if err := p.applyUnstructuredResource(ctx, certManagerCertGVR, serverCert); err != nil { + return fmt.Errorf("failed to apply rathole server Certificate: %w", err) + } + log.G(ctx).Infof("Applied cert-manager Certificate %s/%s for rathole server", td.Namespace, serverCertName) + + // cert-manager Certificate for the client (embedded in the compute-side bootstrap command) + clientCert := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "cert-manager.io/v1", + "kind": "Certificate", + "metadata": map[string]interface{}{ + "name": clientCertName, + "namespace": td.Namespace, + }, + "spec": map[string]interface{}{ + "secretName": clientCertName, + "commonName": "rathole-client", + "usages": []interface{}{"client auth"}, + "issuerRef": map[string]interface{}{ + "name": issuerName, + "kind": issuerKind, + }, + }, + }, + } + if err := p.applyUnstructuredResource(ctx, certManagerCertGVR, clientCert); err != nil { + return fmt.Errorf("failed to apply rathole client Certificate: %w", err) + } + log.G(ctx).Infof("Applied cert-manager Certificate %s/%s for rathole client", td.Namespace, clientCertName) + + // Traefik IngressRouteTCP — TLS termination at Traefik, plain TCP to the rathole server + ingressRoute := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "traefik.io/v1alpha1", + "kind": "IngressRouteTCP", + "metadata": map[string]interface{}{ + "name": td.Name, + "namespace": td.Namespace, + }, + "spec": map[string]interface{}{ + "entryPoints": []interface{}{"websecure"}, + "routes": []interface{}{ + map[string]interface{}{ + "match": fmt.Sprintf("HostSNI(`%s`)", ratholeHost), + "services": []interface{}{ + map[string]interface{}{ + "name": td.Name, + "port": int64(2333), + }, + }, + }, + }, + "tls": map[string]interface{}{ + "secretName": serverCertName, + }, + }, + }, + } + if err := p.applyUnstructuredResource(ctx, traefikIngressRouteTCPGVR, ingressRoute); err != nil { + return fmt.Errorf("failed to apply rathole IngressRouteTCP: %w", err) + } + log.G(ctx).Infof("Applied Traefik IngressRouteTCP %s/%s for rathole (host: %s)", td.Namespace, td.Name, ratholeHost) + + return nil +} + +// waitForRatholeCertSecret polls until cert-manager has issued the given TLS secret (with all +// required data keys: ca.crt, tls.crt, tls.key) or the context is cancelled. +func (p *Provider) waitForRatholeCertSecret(ctx context.Context, secretName, namespace string) error { + const pollInterval = 2 * time.Second + const timeout = 120 * time.Second + + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + timer := time.NewTimer(timeout) + defer timer.Stop() + + for { + secret, err := p.clientSet.CoreV1().Secrets(namespace).Get(ctx, secretName, metav1.GetOptions{}) + if err == nil { + allPresent := len(secret.Data["ca.crt"]) > 0 && + len(secret.Data["tls.crt"]) > 0 && + len(secret.Data["tls.key"]) > 0 + if allPresent { + return nil + } + } else if !apierrors.IsNotFound(err) { + // Fail fast for non-transient errors (e.g. Forbidden, Unauthorized) so the caller + // gets actionable feedback instead of waiting the full 120s timeout. + return fmt.Errorf("unexpected error polling for cert secret %s/%s: %w", namespace, secretName, err) + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return fmt.Errorf("timed out waiting for cert-manager to issue secret %s/%s", namespace, secretName) + case <-ticker.C: + } + } } // cleanupPartialWstunnelResources removes specific resources that were created before a failure @@ -1484,14 +1749,27 @@ func isMeshNetworkingDisabled(pod *v1.Pod) bool { // handleWstunnelCreation creates wstunnel infrastructure and returns the pod IP func (p *Provider) handleWstunnelCreation(ctx context.Context, pod *v1.Pod) (string, error) { - wstunnelName := pod.Name + "-wstunnel" + // Compute resource names using the same logic as createDummyPod so that cleanup + // always targets the resources that were actually created. + isSameNamespace := false + if pod.Annotations != nil { + if val, ok := pod.Annotations[annShadowSameNS]; ok && val == annShadowSameNSValue { + isSameNamespace = true + } + } + var wstunnelName, wstunnelNS string + if isSameNamespace { + wstunnelName, wstunnelNS = computeWstunnelResourceNamesForSameNamespace(pod.Name, pod.Namespace) + } else { + wstunnelName, wstunnelNS = computeWstunnelResourceNames(pod.Name, pod.Namespace) + } // Create wstunnel infrastructure outside virtual node for port exposure dummyPod, templateData, err := p.createDummyPod(ctx, pod) if err != nil { log.G(ctx).Errorf("Failed to create wstunnel infrastructure for %s/%s: %v", pod.Namespace, pod.Name, err) // Clean up any partially created resources - p.cleanupWstunnelResources(ctx, wstunnelName, pod.Namespace) + p.cleanupWstunnelResources(ctx, wstunnelName, wstunnelNS) return "", fmt.Errorf("failed to create wstunnel infrastructure for exposed ports: %w", err) } @@ -1503,11 +1781,11 @@ func (p *Provider) handleWstunnelCreation(ctx context.Context, pod *v1.Pod) (str } } - podIP, err := p.waitForWstunnelPodIP(ctx, dummyPod, timeout, wstunnelName, pod.Namespace) + podIP, err := p.waitForWstunnelPodIP(ctx, dummyPod, timeout, wstunnelName, wstunnelNS) if err != nil { log.G(ctx).Errorf("Failed to get wstunnel pod IP for %s/%s: %v", pod.Namespace, pod.Name, err) // Clean up resources since we failed to get a working pod - p.cleanupWstunnelResources(ctx, wstunnelName, pod.Namespace) + p.cleanupWstunnelResources(ctx, wstunnelName, wstunnelNS) return "", err } @@ -1783,7 +2061,7 @@ func (p *Provider) DeletePod(ctx context.Context, pod *v1.Pod) (err error) { isSameNamespace := false if pod.Annotations != nil { - if val, ok := pod.Annotations["interlink.eu/shadow-same-ns"]; ok && val == "true" { + if val, ok := pod.Annotations[annShadowSameNS]; ok && val == annShadowSameNSValue { isSameNamespace = true } } @@ -2216,6 +2494,17 @@ func (p *Provider) initClientSet(ctx context.Context) error { log.G(ctx).Error(err) return err } + + p.dynamicClient, err = dynamic.NewForConfig(config) + if err != nil { + // In rathole TLS mode the dynamic client is required to create cert-manager and Traefik + // resources. Fail hard so the operator learns about the misconfiguration immediately. + if p.config.Network.TunnelType == tunnelTypeRathole && p.config.Network.RatholeCAIssuerName != "" { + log.G(ctx).Error(err) + return fmt.Errorf("dynamic client required for rathole TLS mode but could not be initialised: %w", err) + } + log.G(ctx).Warningf("Failed to create dynamic client (CRD resources will not be managed): %v", err) + } } return nil