From 7158007a83005b14a24fb7a833e80123bf406e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Gronowski?= Date: Thu, 30 Apr 2026 23:38:06 +0200 Subject: [PATCH] seccomp: Block socketcall to prevent AF_ALG filter bypass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The socket arg filters that block AF_ALG and AF_VSOCK only apply to the direct socket(2) syscall. On architectures with the legacy socketcall(2) multiplexer (i386, s390, MIPS o32), libseccomp auto-generates a socketcall(SYS_SOCKET) -> ALLOW companion for each socket ALLOW rule. This companion only checks the socketcall sub-command number, not the address family (behind a pointer BPF cannot dereference), bypassing the AF_ALG block for 32-bit binaries. Add an explicit socketcall -> ERRNO(ENOSYS) deny rule placed before the socket allow rules. ENOSYS must be used instead of EPERM because the deny errno must differ from DefaultErrnoRet (EPERM): runc skips calling seccomp_rule_add() entirely when a rule's action matches the default action, so an EPERM deny is never passed to libseccomp and the auto-generated socketcall ALLOW path survives unchallenged. With ENOSYS, runc passes the rule through, and libseccomp replaces the auto-generated ALLOW path with ERRNO(ENOSYS) in the BPF. Since Linux 4.3, all affected architectures provide direct socket syscalls and modern glibc/musl already use them. Only very old statically-linked 32-bit binaries compiled against pre-4.3 glibc would be affected. Co-authored-by: Tianon Gravi Signed-off-by: Paweł Gronowski --- seccomp/default.json | 8 +++++++- seccomp/default_linux.go | 26 ++++++++++++++++++++------ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/seccomp/default.json b/seccomp/default.json index ea5a494..2017161 100644 --- a/seccomp/default.json +++ b/seccomp/default.json @@ -371,7 +371,6 @@ "signalfd4", "sigprocmask", "sigreturn", - "socketcall", "socketpair", "splice", "stat", @@ -436,6 +435,13 @@ "minKernel": "4.8" } }, + { + "names": [ + "socketcall" + ], + "action": "SCMP_ACT_ERRNO", + "errnoRet": 38 + }, { "names": [ "socket" diff --git a/seccomp/default_linux.go b/seccomp/default_linux.go index 75f392b..eacb779 100644 --- a/seccomp/default_linux.go +++ b/seccomp/default_linux.go @@ -374,7 +374,6 @@ func DefaultProfile() *Seccomp { "signalfd4", "sigprocmask", "sigreturn", - "socketcall", "socketpair", "splice", "stat", @@ -442,12 +441,27 @@ func DefaultProfile() *Seccomp { MinKernel: &KernelVersion{4, 8}, }, }, + // socketcall(2) is explicitly denied to prevent bypassing the socket + // address family filters above on architectures where socketcall is + // supported (i386, s390, MIPS o32). + // Seccomp cannot inspect socketcall's pointer argument, so allowing it + // would let an attacker open AF_ALG sockets via socketcall(SYS_SOCKET, + // ...). Since Linux 4.3 all affected architectures provide direct + // socket syscalls, so modern userspace is not impacted. + // + // ENOSYS (not EPERM) is used because the errno must differ from + // DefaultErrnoRet; otherwise both runc and libseccomp treat the rule + // as identical to the default action and silently omit it from the + // generated BPF, which lets libseccomp's auto-generated + // socketcall(SYS_SOCKET) -> ALLOW path survive unchallenged. + { + LinuxSyscall: specs.LinuxSyscall{ + Names: []string{"socketcall"}, + Action: specs.ActErrno, + ErrnoRet: &nosys, + }, + }, // Allow socket(2) for all address families except AF_VSOCK and AF_ALG. - // NOTE: on 32-bit x86, socket() goes through socketcall(2) which is - // allowed unconditionally above, so AF_VSOCK/AF_ALG is still reachable - // via the socketcall-based socket() path. These arg filters only apply - // to the direct socket syscall, and do not protect 32-bit x86 unless - // socketcall(2) is also addressed. { LinuxSyscall: specs.LinuxSyscall{ Names: []string{"socket"},