diff --git a/seccomp/default.json b/seccomp/default.json index 2017161..ea5a494 100644 --- a/seccomp/default.json +++ b/seccomp/default.json @@ -371,6 +371,7 @@ "signalfd4", "sigprocmask", "sigreturn", + "socketcall", "socketpair", "splice", "stat", @@ -435,13 +436,6 @@ "minKernel": "4.8" } }, - { - "names": [ - "socketcall" - ], - "action": "SCMP_ACT_ERRNO", - "errnoRet": 38 - }, { "names": [ "socket" diff --git a/seccomp/default_linux.go b/seccomp/default_linux.go index eacb779..75f392b 100644 --- a/seccomp/default_linux.go +++ b/seccomp/default_linux.go @@ -374,6 +374,7 @@ func DefaultProfile() *Seccomp { "signalfd4", "sigprocmask", "sigreturn", + "socketcall", "socketpair", "splice", "stat", @@ -441,27 +442,12 @@ func DefaultProfile() *Seccomp { MinKernel: &KernelVersion{4, 8}, }, }, - // socketcall(2) is explicitly denied to prevent bypassing the socket - // address family filters above on architectures where socketcall is - // supported (i386, s390, MIPS o32). - // Seccomp cannot inspect socketcall's pointer argument, so allowing it - // would let an attacker open AF_ALG sockets via socketcall(SYS_SOCKET, - // ...). Since Linux 4.3 all affected architectures provide direct - // socket syscalls, so modern userspace is not impacted. - // - // ENOSYS (not EPERM) is used because the errno must differ from - // DefaultErrnoRet; otherwise both runc and libseccomp treat the rule - // as identical to the default action and silently omit it from the - // generated BPF, which lets libseccomp's auto-generated - // socketcall(SYS_SOCKET) -> ALLOW path survive unchallenged. - { - LinuxSyscall: specs.LinuxSyscall{ - Names: []string{"socketcall"}, - Action: specs.ActErrno, - ErrnoRet: &nosys, - }, - }, // Allow socket(2) for all address families except AF_VSOCK and AF_ALG. + // NOTE: on 32-bit x86, socket() goes through socketcall(2) which is + // allowed unconditionally above, so AF_VSOCK/AF_ALG is still reachable + // via the socketcall-based socket() path. These arg filters only apply + // to the direct socket syscall, and do not protect 32-bit x86 unless + // socketcall(2) is also addressed. { LinuxSyscall: specs.LinuxSyscall{ Names: []string{"socket"},