From 3c28324314729dbade8287e868eef6338c42807a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Gronowski?= Date: Wed, 6 May 2026 12:44:44 +0200 Subject: [PATCH] Revert "seccomp: Block socketcall to prevent AF_ALG filter bypass" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Blocking whole `socketcall` had much bigger impact on x86 binaries than anticipated. Drop the seccomp based block in favor of AppArmor/SELinux based one. Seccomp cannot filter socketcall(2) arguments because the address family is behind a userspace pointer that BPF cannot dereference. Only an LSM (AppArmor or SELinux) can deny AF_ALG via the security_socket_create hook in the socketcall path. Signed-off-by: Paweł Gronowski --- seccomp/default.json | 8 +------- seccomp/default_linux.go | 26 ++++++-------------------- 2 files changed, 7 insertions(+), 27 deletions(-) diff --git a/seccomp/default.json b/seccomp/default.json index 2017161..ea5a494 100644 --- a/seccomp/default.json +++ b/seccomp/default.json @@ -371,6 +371,7 @@ "signalfd4", "sigprocmask", "sigreturn", + "socketcall", "socketpair", "splice", "stat", @@ -435,13 +436,6 @@ "minKernel": "4.8" } }, - { - "names": [ - "socketcall" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 38 - }, { "names": [ "socket" diff --git a/seccomp/default_linux.go b/seccomp/default_linux.go index eacb779..75f392b 100644 --- a/seccomp/default_linux.go +++ b/seccomp/default_linux.go @@ -374,6 +374,7 @@ func DefaultProfile() *Seccomp { "signalfd4", "sigprocmask", "sigreturn", + "socketcall", "socketpair", "splice", "stat", @@ -441,27 +442,12 @@ func DefaultProfile() *Seccomp { MinKernel: &KernelVersion{4, 8}, }, }, - // socketcall(2) is explicitly denied to prevent bypassing the socket - // address family filters above on architectures where socketcall is - // supported (i386, s390, MIPS o32). - // Seccomp cannot inspect socketcall's pointer argument, so allowing it - // would let an attacker open AF_ALG sockets via socketcall(SYS_SOCKET, - // ...). Since Linux 4.3 all affected architectures provide direct - // socket syscalls, so modern userspace is not impacted. - // - // ENOSYS (not EPERM) is used because the errno must differ from - // DefaultErrnoRet; otherwise both runc and libseccomp treat the rule - // as identical to the default action and silently omit it from the - // generated BPF, which lets libseccomp's auto-generated - // socketcall(SYS_SOCKET) -> ALLOW path survive unchallenged. - { - LinuxSyscall: specs.LinuxSyscall{ - Names: []string{"socketcall"}, - Action: specs.ActErrno, - ErrnoRet: &nosys, - }, - }, // Allow socket(2) for all address families except AF_VSOCK and AF_ALG. + // NOTE: on 32-bit x86, socket() goes through socketcall(2) which is + // allowed unconditionally above, so AF_VSOCK/AF_ALG is still reachable + // via the socketcall-based socket() path. These arg filters only apply + // to the direct socket syscall, and do not protect 32-bit x86 unless + // socketcall(2) is also addressed. { LinuxSyscall: specs.LinuxSyscall{ Names: []string{"socket"},