From bfb7cc000ba640daa7a52374348267159f4390f4 Mon Sep 17 00:00:00 2001 From: Clay Kauzlaric Date: Thu, 5 Feb 2026 12:16:52 -0500 Subject: [PATCH 1/4] disable seccomp for emulated environments * this change is to support using the Docker CPI on Apple Silicon * disables seccomp if binary arch does not match kernel arch * should only apply to containerized environments --- src/bpm/runc/adapter/adapter.go | 5 + src/bpm/runc/adapter/adapter_test.go | 84 +++++++++++ src/bpm/runc/specbuilder/specbuilder.go | 11 ++ src/bpm/runc/specbuilder/specbuilder_test.go | 145 +++++++++++++++++++ src/bpm/sysfeat/sysfeat.go | 95 ++++++++++++ src/bpm/sysfeat/sysfeat_test.go | 98 +++++++++++++ 6 files changed, 438 insertions(+) create mode 100644 src/bpm/runc/specbuilder/specbuilder_test.go create mode 100644 src/bpm/sysfeat/sysfeat_test.go diff --git a/src/bpm/runc/adapter/adapter.go b/src/bpm/runc/adapter/adapter.go index e4bc75a3..0e5200e8 100644 --- a/src/bpm/runc/adapter/adapter.go +++ b/src/bpm/runc/adapter/adapter.go @@ -296,6 +296,11 @@ func (a *RuncAdapter) BuildSpec( specbuilder.Apply(spec, specbuilder.WithNamespace("pid")) } + // Disable seccomp if not supported (e.g., architecture emulation) + if !a.features.SeccompSupported { + specbuilder.Apply(spec, specbuilder.WithoutSeccomp()) + } + if procCfg.Unsafe != nil && procCfg.Unsafe.Privileged { specbuilder.Apply(spec, specbuilder.WithPrivileged()) } diff --git a/src/bpm/runc/adapter/adapter_test.go b/src/bpm/runc/adapter/adapter_test.go index 453f50b9..846862f7 100644 --- a/src/bpm/runc/adapter/adapter_test.go +++ b/src/bpm/runc/adapter/adapter_test.go @@ -816,6 +816,90 @@ var _ = Describe("RuncAdapter", func() { }) }) + Context("when seccomp is not supported", func() { + BeforeEach(func() { + features.SeccompSupported = false + identityGlob := func(pattern string) ([]string, error) { + return []string{pattern}, nil + } + runcAdapter = NewRuncAdapter(features, identityGlob, mountSharer.MakeShared, volumeLocker) + }) + + It("disables seccomp in the spec", func() { + spec, err := runcAdapter.BuildSpec(logger, bpmCfg, procCfg, user) + Expect(err).NotTo(HaveOccurred()) + Expect(spec.Linux.Seccomp).To(BeNil()) + }) + + It("does not affect other security features", func() { + spec, err := runcAdapter.BuildSpec(logger, bpmCfg, procCfg, user) + Expect(err).NotTo(HaveOccurred()) + + // User should still be the unprivileged user + Expect(spec.Process.User).To(Equal(user)) + + // NoNewPrivileges should still be true + Expect(spec.Process.NoNewPrivileges).To(BeTrue()) + + // Masked and readonly paths should still be set + Expect(spec.Linux.MaskedPaths).NotTo(BeEmpty()) + Expect(spec.Linux.ReadonlyPaths).NotTo(BeEmpty()) + + // Capabilities should still be limited + Expect(spec.Process.Capabilities.Bounding).To(Equal([]string{"CAP_TAIN", "CAP_SAICIN"})) + + // Mounts should still have nosuid + var hasMountWithNosuid bool + for _, mount := range spec.Mounts { + for _, opt := range mount.Options { + if opt == "nosuid" { + hasMountWithNosuid = true + break + } + } + } + Expect(hasMountWithNosuid).To(BeTrue()) + }) + + Context("when privileged mode is also enabled", func() { + BeforeEach(func() { + procCfg.Unsafe = &config.Unsafe{Privileged: true} + }) + + It("privileged mode takes precedence", func() { + spec, err := runcAdapter.BuildSpec(logger, bpmCfg, procCfg, user) + Expect(err).NotTo(HaveOccurred()) + + // Seccomp should still be nil + Expect(spec.Linux.Seccomp).To(BeNil()) + + // But other privileged settings should apply + Expect(spec.Process.User).To(Equal(specs.User{UID: 0, GID: 0})) + Expect(spec.Process.NoNewPrivileges).To(BeFalse()) + Expect(spec.Linux.MaskedPaths).To(Equal([]string{})) + Expect(spec.Linux.ReadonlyPaths).To(Equal([]string{})) + }) + }) + }) + + Context("when seccomp is supported", func() { + BeforeEach(func() { + features.SeccompSupported = true + identityGlob := func(pattern string) ([]string, error) { + return []string{pattern}, nil + } + runcAdapter = NewRuncAdapter(features, identityGlob, mountSharer.MakeShared, volumeLocker) + }) + + It("includes seccomp in the spec", func() { + spec, err := runcAdapter.BuildSpec(logger, bpmCfg, procCfg, user) + Expect(err).NotTo(HaveOccurred()) + Expect(spec.Linux.Seccomp).NotTo(BeNil()) + Expect(spec.Linux.Seccomp.Architectures).NotTo(BeEmpty()) + Expect(spec.Linux.Seccomp.Syscalls).NotTo(BeEmpty()) + }) + }) + Context("when the user requests a privileged container", func() { BeforeEach(func() { procCfg.Unsafe = &config.Unsafe{Privileged: true} diff --git a/src/bpm/runc/specbuilder/specbuilder.go b/src/bpm/runc/specbuilder/specbuilder.go index a5e293a5..d04a7006 100644 --- a/src/bpm/runc/specbuilder/specbuilder.go +++ b/src/bpm/runc/specbuilder/specbuilder.go @@ -190,6 +190,17 @@ var RootUser = specs.User{ GID: 0, } +// WithoutSeccomp disables seccomp filtering. This is needed when running +// in environments where seccomp BPF filters cannot be loaded (e.g., when +// running x86_64 binaries on an ARM64 kernel via Rosetta emulation). +// Unlike WithPrivileged(), this only disables seccomp without granting +// additional privileges or removing other security features. +func WithoutSeccomp() SpecOption { + return func(spec *specs.Spec) { + spec.Linux.Seccomp = nil + } +} + func WithPrivileged() SpecOption { return func(spec *specs.Spec) { Apply(spec, WithCapabilities(DefaultPrivilegedCapabilities())) diff --git a/src/bpm/runc/specbuilder/specbuilder_test.go b/src/bpm/runc/specbuilder/specbuilder_test.go new file mode 100644 index 00000000..c4c0ca37 --- /dev/null +++ b/src/bpm/runc/specbuilder/specbuilder_test.go @@ -0,0 +1,145 @@ +// Copyright (C) 2018-Present CloudFoundry.org Foundation, Inc. All rights reserved. +// +// This program and the accompanying materials are made available under +// the terms of the under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package specbuilder_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + specs "github.com/opencontainers/runtime-spec/specs-go" + + "bpm/runc/specbuilder" +) + +func TestSpecbuilder(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Specbuilder Suite") +} + +var _ = Describe("SpecBuilder", func() { + Describe("WithoutSeccomp", func() { + var spec *specs.Spec + + BeforeEach(func() { + spec = specbuilder.DefaultSpec() + }) + + It("removes seccomp from the spec", func() { + Expect(spec.Linux.Seccomp).NotTo(BeNil()) + + specbuilder.Apply(spec, specbuilder.WithoutSeccomp()) + + Expect(spec.Linux.Seccomp).To(BeNil()) + }) + + It("does not affect other security features", func() { + // Capture original values + originalNoNewPrivileges := spec.Process.NoNewPrivileges + originalMaskedPaths := spec.Linux.MaskedPaths + originalReadonlyPaths := spec.Linux.ReadonlyPaths + originalUser := spec.Process.User + + specbuilder.Apply(spec, specbuilder.WithoutSeccomp()) + + // Verify other security features are unchanged + Expect(spec.Process.NoNewPrivileges).To(Equal(originalNoNewPrivileges)) + Expect(spec.Linux.MaskedPaths).To(Equal(originalMaskedPaths)) + Expect(spec.Linux.ReadonlyPaths).To(Equal(originalReadonlyPaths)) + Expect(spec.Process.User).To(Equal(originalUser)) + }) + + It("does not affect capabilities", func() { + // Add some capabilities + specbuilder.Apply(spec, specbuilder.WithCapabilities([]string{"CAP_NET_BIND_SERVICE"})) + + originalCaps := spec.Process.Capabilities + + specbuilder.Apply(spec, specbuilder.WithoutSeccomp()) + + // Verify capabilities are unchanged + Expect(spec.Process.Capabilities).To(Equal(originalCaps)) + }) + + It("does not affect user settings", func() { + testUser := specs.User{UID: 1000, GID: 1000} + specbuilder.Apply(spec, specbuilder.WithUser(testUser)) + + specbuilder.Apply(spec, specbuilder.WithoutSeccomp()) + + Expect(spec.Process.User).To(Equal(testUser)) + }) + + It("does not affect mount options", func() { + // Check that nosuid is still present on mounts + var hasMountWithNosuid bool + for _, mount := range spec.Mounts { + for _, opt := range mount.Options { + if opt == "nosuid" { + hasMountWithNosuid = true + break + } + } + } + + Expect(hasMountWithNosuid).To(BeTrue(), "Expected at least one mount to have nosuid option") + + specbuilder.Apply(spec, specbuilder.WithoutSeccomp()) + + // Verify nosuid is still present after WithoutSeccomp + hasMountWithNosuid = false + for _, mount := range spec.Mounts { + for _, opt := range mount.Options { + if opt == "nosuid" { + hasMountWithNosuid = true + break + } + } + } + + Expect(hasMountWithNosuid).To(BeTrue(), "Expected nosuid to remain on mounts") + }) + + Context("when applied before WithPrivileged", func() { + It("WithPrivileged still removes seccomp", func() { + specbuilder.Apply(spec, specbuilder.WithoutSeccomp()) + Expect(spec.Linux.Seccomp).To(BeNil()) + + specbuilder.Apply(spec, specbuilder.WithPrivileged()) + Expect(spec.Linux.Seccomp).To(BeNil()) + }) + }) + + Context("when applied after WithPrivileged", func() { + It("seccomp remains nil", func() { + specbuilder.Apply(spec, specbuilder.WithPrivileged()) + Expect(spec.Linux.Seccomp).To(BeNil()) + + specbuilder.Apply(spec, specbuilder.WithoutSeccomp()) + Expect(spec.Linux.Seccomp).To(BeNil()) + }) + }) + }) + + Describe("DefaultSpec", func() { + It("includes seccomp by default", func() { + spec := specbuilder.DefaultSpec() + Expect(spec.Linux.Seccomp).NotTo(BeNil()) + Expect(spec.Linux.Seccomp.Architectures).NotTo(BeEmpty()) + Expect(spec.Linux.Seccomp.Syscalls).NotTo(BeEmpty()) + }) + }) +}) diff --git a/src/bpm/sysfeat/sysfeat.go b/src/bpm/sysfeat/sysfeat.go index 23bb59d1..8b9e33b2 100644 --- a/src/bpm/sysfeat/sysfeat.go +++ b/src/bpm/sysfeat/sysfeat.go @@ -19,7 +19,10 @@ package sysfeat import ( "os" + "os/exec" "path/filepath" + "runtime" + "strings" "github.com/opencontainers/cgroups" ) @@ -32,10 +35,25 @@ const ( hybridMountpoint = "/sys/fs/cgroup/unified" ) +// goArchToKernelArch maps Go's GOARCH values to Linux kernel architecture +// names as returned by `uname -m`. This mapping is used to detect when +// a binary is running under architecture emulation (e.g., x86_64 binaries +// on ARM64 kernels via Rosetta). +var goArchToKernelArch = map[string]string{ + "amd64": "x86_64", + "386": "i686", + "arm64": "aarch64", + "arm": "armv7l", +} + // Features contains information about what features the host system supports. type Features struct { // Whether the system supports limiting the swap space of a process or not. SwapLimitSupported bool + // Whether the system supports seccomp BPF filtering. This may be false in + // environments with architecture emulation (e.g., x86_64 binaries running + // on ARM64 kernels via Rosetta). + SeccompSupported bool } func Fetch() (*Features, error) { @@ -46,6 +64,7 @@ func Fetch() (*Features, error) { return &Features{ SwapLimitSupported: supported, + SeccompSupported: seccompSupported(), }, nil } @@ -79,3 +98,79 @@ func swapLimitSupportedCgroup1() (bool, error) { _, err = os.Stat(filepath.Join(mountPoint, swapPathCgroup1)) return err == nil, nil } + +// seccompSupported checks whether seccomp BPF filtering is supported in the +// current environment. It returns false when running in a container with +// architecture emulation (e.g., x86_64 binaries on ARM64 kernels). +func seccompSupported() bool { + // Allow override to force seccomp enabled even in emulated environments + if os.Getenv("BPM_DISABLE_SECCOMP_DETECTION") != "" { + return true + } + + // If not in a container, seccomp works normally + if !isRunningInContainer() { + return true + } + + // Check if Go binary architecture matches kernel architecture + goArch := runtime.GOARCH // e.g., "amd64" + kernelArch := getKernelArch() // e.g., "x86_64" + + expectedKernelArch, ok := goArchToKernelArch[goArch] + if !ok { + // Unknown architecture mapping, assume seccomp works (conservative) + return true + } + + // If architectures don't match, we're under emulation + // Seccomp BPF filters won't work + if kernelArch != expectedKernelArch { + return false + } + + return true +} + +// isRunningInContainer checks whether the current process is running inside +// a container environment. +func isRunningInContainer() bool { + // Check for /.dockerenv + if _, err := os.Stat("/.dockerenv"); err == nil { + return true + } + + // Check systemd-detect-virt -c + cmd := exec.Command("systemd-detect-virt", "-c") + output, err := cmd.Output() + if err == nil { + result := strings.TrimSpace(string(output)) + if result != "none" && result != "" { + return true + } + } + + // Check /proc/1/cgroup for container indicators + data, err := os.ReadFile("/proc/1/cgroup") + if err == nil { + content := string(data) + if strings.Contains(content, "docker") || + strings.Contains(content, "lxc") || + strings.Contains(content, "kubepods") { + return true + } + } + + return false +} + +// getKernelArch returns the kernel architecture using uname -m. +func getKernelArch() string { + cmd := exec.Command("uname", "-m") + output, err := cmd.Output() + if err != nil { + // If we can't determine the kernel arch, assume it matches (conservative) + return "" + } + return strings.TrimSpace(string(output)) +} diff --git a/src/bpm/sysfeat/sysfeat_test.go b/src/bpm/sysfeat/sysfeat_test.go new file mode 100644 index 00000000..f40d7689 --- /dev/null +++ b/src/bpm/sysfeat/sysfeat_test.go @@ -0,0 +1,98 @@ +// Copyright (C) 2018-Present CloudFoundry.org Foundation, Inc. All rights reserved. +// +// This program and the accompanying materials are made available under +// the terms of the under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. + +package sysfeat_test + +import ( + "os" + "runtime" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "bpm/sysfeat" +) + +func TestSysfeat(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Sysfeat Suite") +} + +var _ = Describe("Features", func() { + Describe("Fetch", func() { + It("returns a Features struct", func() { + features, err := sysfeat.Fetch() + Expect(err).NotTo(HaveOccurred()) + Expect(features).NotTo(BeNil()) + }) + + It("includes SeccompSupported field", func() { + features, err := sysfeat.Fetch() + Expect(err).NotTo(HaveOccurred()) + // On native systems, seccomp should be supported + // We can't assert the exact value as it depends on the environment + // but we can verify the field exists and has a boolean value + _ = features.SeccompSupported + }) + + Context("when BPM_DISABLE_SECCOMP_DETECTION is set", func() { + BeforeEach(func() { + os.Setenv("BPM_DISABLE_SECCOMP_DETECTION", "1") + }) + + AfterEach(func() { + os.Unsetenv("BPM_DISABLE_SECCOMP_DETECTION") + }) + + It("forces SeccompSupported to true", func() { + features, err := sysfeat.Fetch() + Expect(err).NotTo(HaveOccurred()) + Expect(features.SeccompSupported).To(BeTrue()) + }) + }) + + Context("on a native system", func() { + It("reports seccomp as supported", func() { + // This test assumes we're running on a native system (not in an + // emulated container). In CI/CD environments, this should be true. + features, err := sysfeat.Fetch() + Expect(err).NotTo(HaveOccurred()) + + // If we're not in a container, seccomp should always be supported + // We can check this by verifying we're not in a container + // (no /.dockerenv file) + _, dockerEnvErr := os.Stat("/.dockerenv") + if os.IsNotExist(dockerEnvErr) { + // Not in a container, seccomp should be supported + Expect(features.SeccompSupported).To(BeTrue()) + } + }) + }) + }) + + Describe("Architecture detection", func() { + It("correctly identifies the current architecture", func() { + // This is more of a smoke test to ensure the architecture + // detection doesn't panic or return unexpected values + goArch := runtime.GOARCH + Expect(goArch).NotTo(BeEmpty()) + + // Common architectures we expect + validArchs := []string{"amd64", "386", "arm64", "arm"} + Expect(validArchs).To(ContainElement(goArch)) + }) + }) +}) From f2f5e8b9ef955858cceb6e4bde5f63cea9fcc7ca Mon Sep 17 00:00:00 2001 From: Clay Kauzlaric Date: Thu, 5 Feb 2026 12:28:53 -0500 Subject: [PATCH 2/4] test: check error values --- src/bpm/sysfeat/sysfeat_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/bpm/sysfeat/sysfeat_test.go b/src/bpm/sysfeat/sysfeat_test.go index f40d7689..6d572b25 100644 --- a/src/bpm/sysfeat/sysfeat_test.go +++ b/src/bpm/sysfeat/sysfeat_test.go @@ -50,11 +50,13 @@ var _ = Describe("Features", func() { Context("when BPM_DISABLE_SECCOMP_DETECTION is set", func() { BeforeEach(func() { - os.Setenv("BPM_DISABLE_SECCOMP_DETECTION", "1") + err := os.Setenv("BPM_DISABLE_SECCOMP_DETECTION", "1") + Expect(err).NotTo(HaveOccurred()) }) AfterEach(func() { - os.Unsetenv("BPM_DISABLE_SECCOMP_DETECTION") + err := os.Unsetenv("BPM_DISABLE_SECCOMP_DETECTION") + Expect(err).NotTo(HaveOccurred()) }) It("forces SeccompSupported to true", func() { From 6d7e2f4e1c5f7ac2c2972fb1987e8fc190ec7f5b Mon Sep 17 00:00:00 2001 From: Clay Kauzlaric Date: Thu, 5 Feb 2026 12:40:51 -0500 Subject: [PATCH 3/4] fix: rerun goimports for formatting --- src/bpm/runc/specbuilder/specbuilder_test.go | 2 +- src/bpm/sysfeat/sysfeat.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bpm/runc/specbuilder/specbuilder_test.go b/src/bpm/runc/specbuilder/specbuilder_test.go index c4c0ca37..23f82414 100644 --- a/src/bpm/runc/specbuilder/specbuilder_test.go +++ b/src/bpm/runc/specbuilder/specbuilder_test.go @@ -65,7 +65,7 @@ var _ = Describe("SpecBuilder", func() { It("does not affect capabilities", func() { // Add some capabilities specbuilder.Apply(spec, specbuilder.WithCapabilities([]string{"CAP_NET_BIND_SERVICE"})) - + originalCaps := spec.Process.Capabilities specbuilder.Apply(spec, specbuilder.WithoutSeccomp()) diff --git a/src/bpm/sysfeat/sysfeat.go b/src/bpm/sysfeat/sysfeat.go index 8b9e33b2..d25cf471 100644 --- a/src/bpm/sysfeat/sysfeat.go +++ b/src/bpm/sysfeat/sysfeat.go @@ -114,8 +114,8 @@ func seccompSupported() bool { } // Check if Go binary architecture matches kernel architecture - goArch := runtime.GOARCH // e.g., "amd64" - kernelArch := getKernelArch() // e.g., "x86_64" + goArch := runtime.GOARCH // e.g., "amd64" + kernelArch := getKernelArch() // e.g., "x86_64" expectedKernelArch, ok := goArchToKernelArch[goArch] if !ok { From 00af2a1a41627338fb989cb5d99d3f41dba23ded Mon Sep 17 00:00:00 2001 From: Clay Kauzlaric Date: Thu, 5 Feb 2026 13:56:51 -0500 Subject: [PATCH 4/4] update method for detecting emulation * when doing rosetta emulation, the uname -m will return x86_64, which breaks the old way of checking --- src/bpm/sysfeat/sysfeat.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/bpm/sysfeat/sysfeat.go b/src/bpm/sysfeat/sysfeat.go index d25cf471..7b3f5ad1 100644 --- a/src/bpm/sysfeat/sysfeat.go +++ b/src/bpm/sysfeat/sysfeat.go @@ -103,11 +103,23 @@ func swapLimitSupportedCgroup1() (bool, error) { // current environment. It returns false when running in a container with // architecture emulation (e.g., x86_64 binaries on ARM64 kernels). func seccompSupported() bool { + // Allow override to force seccomp disabled in emulated environments + if os.Getenv("BPM_FORCE_DISABLE_SECCOMP") != "" { + return false + } + // Allow override to force seccomp enabled even in emulated environments if os.Getenv("BPM_DISABLE_SECCOMP_DETECTION") != "" { return true } + // Check if running under Rosetta emulation on Apple Silicon + // This is the most reliable detection because Rosetta intercepts uname + // and lies to x86 binaries about the kernel architecture. + if isRunningUnderRosetta() { + return false + } + // If not in a container, seccomp works normally if !isRunningInContainer() { return true @@ -132,6 +144,30 @@ func seccompSupported() bool { return true } +// isRunningUnderRosetta detects if we're running under Apple's Rosetta +// translation layer on Apple Silicon. Rosetta intercepts the uname syscall +// and returns "x86_64" to x86 binaries even though the kernel is actually +// ARM64 (aarch64). This breaks seccomp BPF filters because they're +// architecture-specific. +func isRunningUnderRosetta() bool { + // Check if rosetta is registered in binfmt_misc + // This is a reliable indicator that we're on a system with Rosetta + if _, err := os.Stat("/proc/sys/fs/binfmt_misc/rosetta"); err == nil { + return true + } + + // Check /proc/cpuinfo for VirtualApple vendor + // This indicates Apple Silicon virtualization/emulation + data, err := os.ReadFile("/proc/cpuinfo") + if err == nil { + if strings.Contains(string(data), "VirtualApple") { + return true + } + } + + return false +} + // isRunningInContainer checks whether the current process is running inside // a container environment. func isRunningInContainer() bool {