From 8521de29f9ec9152be9697ee20ed610cb9cbfe5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Gronowski?= Date: Thu, 30 Apr 2026 22:47:27 +0200 Subject: [PATCH 1/3] seccomp: Document socket rule scope and socketcall limitation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a comment explaining the purpose of the socket rules and noting that on 32-bit x86, socket() goes through socketcall(2) which is allowed unconditionally, so these arg filters only apply to the direct socket syscall. Signed-off-by: Paweł Gronowski --- seccomp/default_linux.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/seccomp/default_linux.go b/seccomp/default_linux.go index f481f08..df267af 100644 --- a/seccomp/default_linux.go +++ b/seccomp/default_linux.go @@ -434,6 +434,12 @@ func DefaultProfile() *Seccomp { MinKernel: &KernelVersion{4, 8}, }, }, + // Allow socket(2) for all address families except AF_VSOCK. + // NOTE: on 32-bit x86, socket() goes through socketcall(2) which is + // allowed unconditionally above, so AF_VSOCK is still reachable + // via the socketcall-based socket() path. These arg filters only apply + // to the direct socket syscall, and do not protect 32-bit x86 unless + // socketcall(2) is also addressed. { LinuxSyscall: specs.LinuxSyscall{ Names: []string{"socket"}, From dec315c57e60de0ffb1e07485815eb71d7b820e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Gronowski?= Date: Thu, 30 Apr 2026 22:48:03 +0200 Subject: [PATCH 2/3] seccomp: Block AF_ALG in default socket policy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AF_ALG (address family 38) exposes the Linux kernel crypto API to userspace via socket(2). Containers have no legitimate need for this interface under the default profile, and leaving it accessible widens the kernel attack surface unnecessarily (see https://copy.fail/). The previous socket rule used a single "arg0 != AF_VSOCK" condition. Adding a second OpNotEqual for AF_ALG does not work because seccomp evaluates multiple argument conditions within a single rule as a logical AND against the same argument index. Instead, restructure the socket allowlist into three range-based rules that cover every domain except AF_ALG (38) and AF_VSOCK (40): 1. Allow socket when arg0 < 38 (all families below AF_ALG) 2. Allow socket when arg0 == 39 (the single family between them) 3. Allow socket when arg0 > 40 (all families above AF_VSOCK) Domains 38 and 40 match none of the three rules and fall through to the default SCMP_ACT_ERRNO action. Signed-off-by: Paweł Gronowski --- seccomp/default.json | 28 +++++++++++++++++++++++++++- seccomp/default_linux.go | 32 +++++++++++++++++++++++++++++--- 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/seccomp/default.json b/seccomp/default.json index 648d00d..ea5a494 100644 --- a/seccomp/default.json +++ b/seccomp/default.json @@ -436,6 +436,32 @@ "minKernel": "4.8" } }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 38, + "op": "SCMP_CMP_LT" + } + ] + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 39, + "op": "SCMP_CMP_EQ" + } + ] + }, { "names": [ "socket" @@ -445,7 +471,7 @@ { "index": 0, "value": 40, - "op": "SCMP_CMP_NE" + "op": "SCMP_CMP_GT" } ] }, diff --git a/seccomp/default_linux.go b/seccomp/default_linux.go index df267af..b53707d 100644 --- a/seccomp/default_linux.go +++ b/seccomp/default_linux.go @@ -434,12 +434,38 @@ func DefaultProfile() *Seccomp { MinKernel: &KernelVersion{4, 8}, }, }, - // Allow socket(2) for all address families except AF_VSOCK. + // Allow socket(2) for all address families except AF_VSOCK and AF_ALG. // NOTE: on 32-bit x86, socket() goes through socketcall(2) which is - // allowed unconditionally above, so AF_VSOCK is still reachable + // allowed unconditionally above, so AF_VSOCK/AF_ALG is still reachable // via the socketcall-based socket() path. These arg filters only apply // to the direct socket syscall, and do not protect 32-bit x86 unless // socketcall(2) is also addressed. + { + LinuxSyscall: specs.LinuxSyscall{ + Names: []string{"socket"}, + Action: specs.ActAllow, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: unix.AF_ALG, + Op: specs.OpLessThan, + }, + }, + }, + }, + { + LinuxSyscall: specs.LinuxSyscall{ + Names: []string{"socket"}, + Action: specs.ActAllow, + Args: []specs.LinuxSeccompArg{ + { + Index: 0, + Value: unix.AF_ALG + 1, + Op: specs.OpEqualTo, + }, + }, + }, + }, { LinuxSyscall: specs.LinuxSyscall{ Names: []string{"socket"}, @@ -448,7 +474,7 @@ func DefaultProfile() *Seccomp { { Index: 0, Value: unix.AF_VSOCK, - Op: specs.OpNotEqual, + Op: specs.OpGreaterThan, }, }, }, From eac1a3f666d986fd96c0ef07b614b41aae2a0915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Gronowski?= Date: Thu, 30 Apr 2026 22:48:20 +0200 Subject: [PATCH 3/3] seccomp: Add compile-time assertions for AF_ALG/AF_VSOCK values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The socket rules depend on AF_ALG and AF_VSOCK being exactly 38 and 40 with a single family between them. Add compile-time array size checks that will fail the build if these constants ever change. Signed-off-by: Paweł Gronowski --- seccomp/default_linux.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/seccomp/default_linux.go b/seccomp/default_linux.go index b53707d..75f392b 100644 --- a/seccomp/default_linux.go +++ b/seccomp/default_linux.go @@ -8,6 +8,14 @@ import ( "golang.org/x/sys/unix" ) +// The socket rules in DefaultProfile rely on AF_ALG and AF_VSOCK being +// exactly two apart (38 and 40), with a single family (39) between them. +var ( + _ [38]byte = [unix.AF_ALG]byte{} + _ [40]byte = [unix.AF_VSOCK]byte{} + _ [1]byte = [unix.AF_VSOCK - unix.AF_ALG - 1]byte{} +) + func arches() []Architecture { return []Architecture{ {