NVIDIA · johntmyers · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026
@@ -129,22 +129,13 @@ binary_allowed(policy, exec) if {
 	b.path == ancestor
 }
 
-# Binary matching: cmdline exact path (script interpreters — e.g. node runs claude script).
-# When /usr/local/bin/claude has shebang #!/usr/bin/env node, the exe is /usr/bin/node
-# but cmdline contains /usr/local/bin/claude as an argv entry.
-binary_allowed(policy, exec) if {
-	some b
-	b := policy.binaries[_]
-	not contains(b.path, "*")
-	cp := exec.cmdline_paths[_]
-	b.path == cp
-}
-
-# Binary matching: glob pattern against path, any ancestor, or any cmdline path.
+# Binary matching: glob pattern against exe path or any ancestor.
+# NOTE: cmdline_paths are intentionally excluded — argv[0] is trivially
+# spoofable via execve and must not be used as a grant-access signal.
 binary_allowed(policy, exec) if {
 	some b in policy.binaries
 	contains(b.path, "*")
-	all_paths := array.concat(array.concat([exec.path], exec.ancestors), exec.cmdline_paths)
+	all_paths := array.concat([exec.path], exec.ancestors)
 	some p in all_paths
 	glob.match(b.path, ["/"], p)
 }

@@ -51,9 +51,15 @@ impl L7Provider for RestProvider {
 }
 
 /// Parse one HTTP/1.1 request from the stream.
+///
+/// Reads one byte at a time to stop exactly at the `\r\n\r\n` header
+/// terminator.  A multi-byte read could consume bytes belonging to a
+/// subsequent pipelined request, and those overflow bytes would be
+/// forwarded upstream without L7 policy evaluation -- a request
+/// smuggling vulnerability.  Byte-at-a-time overhead is negligible for
+/// the typical 200-800 byte headers on L7-inspected REST endpoints.
 async fn parse_http_request<C: AsyncRead + Unpin>(client: &mut C) -> Result<Option<L7Request>> {
     let mut buf = Vec::with_capacity(4096);
-    let mut tmp = [0u8; 1024];
 
     loop {
         if buf.len() > MAX_HEADER_BYTES {
@@ -62,24 +68,19 @@ async fn parse_http_request<C: AsyncRead + Unpin>(client: &mut C) -> Result<Opti
             ));
         }
 
-        let n = match client.read(&mut tmp).await {
-            Ok(n) => n,
+        let byte = match client.read_u8().await {
+            Ok(b) => b,
             Err(e) if buf.is_empty() && is_benign_close(&e) => return Ok(None),
+            Err(e) if buf.is_empty() && e.kind() == std::io::ErrorKind::UnexpectedEof => {
+                return Ok(None); // Clean close before any data
+            }
             Err(e) => return Err(miette::miette!("{e}")),
         };
-        if n == 0 {
-            if buf.is_empty() {
-                return Ok(None); // Clean connection close
-            }
-            return Err(miette!(
-                "Client disconnected mid-request after {} bytes",
-                buf.len()
-            ));
-        }
-        buf.extend_from_slice(&tmp[..n]);
+        buf.push(byte);
 
-        // Check for end of headers
-        if buf.windows(4).any(|w| w == b"\r\n\r\n") {
+        // Check for end of headers -- `ends_with` is sufficient because
+        // we append exactly one byte per iteration.
+        if buf.ends_with(b"\r\n\r\n") {
             break;
         }
     }
@@ -109,7 +110,7 @@ async fn parse_http_request<C: AsyncRead + Unpin>(client: &mut C) -> Result<Opti
     Ok(Some(L7Request {
         action: method,
         target: path,
-        raw_header: buf, // includes header bytes + any overflow body bytes
+        raw_header: buf, // exact header bytes up to and including \r\n\r\n
         body_length,
     }))
 }
@@ -620,6 +621,43 @@ mod tests {
         }
     }
 
+    /// Regression test: two pipelined requests in a single write must be
+    /// parsed independently.  Before the fix, the 1024-byte `read()` buffer
+    /// could capture bytes from the second request, which were forwarded
+    /// upstream as body overflow of the first -- bypassing L7 policy checks.
+    #[tokio::test]
+    async fn parse_http_request_does_not_overread_next_request() {
+        let (mut client, mut writer) = tokio::io::duplex(4096);
+
+        tokio::spawn(async move {
+            writer
+                .write_all(
+                    b"GET /allowed HTTP/1.1\r\nHost: example.com\r\n\r\n\
+                      POST /blocked HTTP/1.1\r\nHost: example.com\r\nContent-Length: 0\r\n\r\n",
+                )
+                .await
+                .unwrap();
+        });
+
+        let first = parse_http_request(&mut client)
+            .await
+            .expect("first request should parse")
+            .expect("expected first request");
+        assert_eq!(first.action, "GET");
+        assert_eq!(first.target, "/allowed");
+        assert_eq!(
+            first.raw_header, b"GET /allowed HTTP/1.1\r\nHost: example.com\r\n\r\n",
+            "raw_header must contain only the first request's headers"
+        );
+
+        let second = parse_http_request(&mut client)
+            .await
+            .expect("second request should parse")
+            .expect("expected second request");
+        assert_eq!(second.action, "POST");
+        assert_eq!(second.target, "/blocked");
+    }
+
     #[test]
     fn http_method_detection() {
         assert!(looks_like_http(b"GET / HTTP/1.1\r\n"));

@@ -1199,9 +1199,25 @@ fn prepare_filesystem(policy: &SandboxPolicy) -> Result<()> {
         None
     };
 
-    // Create and chown each read_write path
+    // Create and chown each read_write path.
+    //
+    // SECURITY: use symlink_metadata (lstat) to inspect each path *before*
+    // calling chown.  chown follows symlinks, so a malicious container image
+    // could place a symlink (e.g. /sandbox -> /etc/shadow) to trick the
+    // root supervisor into transferring ownership of arbitrary files.
+    // The TOCTOU window between lstat and chown is not exploitable because
+    // no untrusted process is running yet (the child has not been forked).
     for path in &policy.filesystem.read_write {
-        if !path.exists() {
+        // Check for symlinks before touching the path.  Character/block devices
+        // (e.g. /dev/null) are legitimate read_write entries and must be allowed.
+        if let Ok(meta) = std::fs::symlink_metadata(path) {
+            if meta.file_type().is_symlink() {
+                return Err(miette::miette!(
+                    "read_write path '{}' is a symlink — refusing to chown (potential privilege escalation)",
+                    path.display()
+                ));
+            }
+        } else {
             debug!(path = %path.display(), "Creating read_write directory");
             std::fs::create_dir_all(path).into_diagnostic()?;
         }

@@ -1098,9 +1098,11 @@ network_policies:
     }
 
     #[test]
-    fn cmdline_path_matches_script_binary() {
-        // Simulates: node runs /usr/local/bin/my-tool (a script with shebang)
-        // exe = /usr/bin/node, cmdline contains /usr/local/bin/my-tool
+    fn cmdline_path_does_not_grant_access() {
+        // Simulates: node runs /usr/local/bin/my-tool (a script with shebang).
+        // exe = /usr/bin/node, cmdline contains /usr/local/bin/my-tool.
+        // cmdline_paths are attacker-controlled (argv[0] spoofing) and must
+        // NOT be used as a grant-access signal.
         let cmdline_data = r"
 network_policies:
   script_test:
@@ -1121,11 +1123,9 @@ network_policies:
         };
         let decision = engine.evaluate_network(&input).unwrap();
         assert!(
-            decision.allowed,
-            "Expected allow via cmdline path match, got deny: {}",
-            decision.reason
+            !decision.allowed,
+            "cmdline_paths must not grant network access (argv[0] is spoofable)"
         );
-        assert_eq!(decision.matched_policy.as_deref(), Some("script_test"));
     }
 
     #[test]
@@ -1156,7 +1156,7 @@ network_policies:
     }
 
     #[test]
-    fn cmdline_glob_pattern_matches() {
+    fn cmdline_glob_pattern_does_not_grant_access() {
         let glob_data = r#"
 network_policies:
   glob_test:
@@ -1177,9 +1177,8 @@ network_policies:
         };
         let decision = engine.evaluate_network(&input).unwrap();
         assert!(
-            decision.allowed,
-            "Expected glob to match cmdline path, got deny: {}",
-            decision.reason
+            !decision.allowed,
+            "cmdline_paths must not match globs for granting access (argv[0] is spoofable)"
         );
     }
 
@@ -1190,10 +1189,10 @@ network_policies:
         let input = NetworkInput {
             host: "api.anthropic.com".into(),
             port: 443,
-            binary_path: PathBuf::from("/usr/bin/node"),
+            binary_path: PathBuf::from("/usr/local/bin/claude"),
             binary_sha256: "unused".into(),
             ancestors: vec![],
-            cmdline_paths: vec![PathBuf::from("/usr/local/bin/claude")],
+            cmdline_paths: vec![],
         };
         let decision = engine.evaluate_network(&input).unwrap();
         assert!(

@@ -360,7 +360,18 @@ pub fn drop_privileges(policy: &SandboxPolicy) -> Result<()> {
         _ => None,
     };
 
+    // If no user/group is configured and we are running as root, fall back to
+    // "sandbox:sandbox" instead of silently keeping root.  This covers the
+    // local/dev-mode path where policies are loaded from disk and never pass
+    // through the server-side `ensure_sandbox_process_identity` normalization.
+    // For non-root runtimes, the no-op is safe -- we are already unprivileged.
     if user_name.is_none() && group_name.is_none() {
+        if nix::unistd::geteuid().is_root() {
+            let mut fallback = policy.clone();
+            fallback.process.run_as_user = Some("sandbox".into());
+            fallback.process.run_as_group = Some("sandbox".into());
+            return drop_privileges(&fallback);
+        }
         return Ok(());
     }
 
@@ -522,7 +533,15 @@ mod tests {
             run_as_user: None,
             run_as_group: None,
         });
-        assert!(drop_privileges(&policy).is_ok());
+        if nix::unistd::geteuid().is_root() {
+            // As root, drop_privileges falls back to "sandbox:sandbox".
+            // If that user exists, it succeeds; if not (e.g. CI), it
+            // must error rather than silently keep root.
+            let has_sandbox = User::from_name("sandbox").ok().flatten().is_some();
+            assert_eq!(drop_privileges(&policy).is_ok(), has_sandbox);
+        } else {
+            assert!(drop_privileges(&policy).is_ok());
+        }
     }
 
     #[test]
@@ -531,7 +550,12 @@ mod tests {
             run_as_user: Some(String::new()),
             run_as_group: Some(String::new()),
         });
-        assert!(drop_privileges(&policy).is_ok());
+        if nix::unistd::geteuid().is_root() {
+            let has_sandbox = User::from_name("sandbox").ok().flatten().is_some();
+            assert_eq!(drop_privileges(&policy).is_ok(), has_sandbox);
+        } else {
+            assert!(drop_privileges(&policy).is_ok());
+        }
     }
 
     #[test]

@@ -1647,6 +1647,34 @@ async fn handle_forward_proxy(
     };
     let policy_str = matched_policy.as_deref().unwrap_or("-");
 
+    // 4b. Reject if the endpoint has L7 config — the forward proxy path does
+    //     not perform per-request method/path inspection, so L7-configured
+    //     endpoints must go through the CONNECT tunnel where inspection happens.
+    if query_l7_config(&opa_engine, &decision, &host_lc, port).is_some() {
+        info!(
+            dst_host = %host_lc,
+            dst_port = port,
+            method = %method,
+            path = %path,
+            binary = %binary_str,
+            policy = %policy_str,
+            action = "deny",
+            reason = "endpoint has L7 rules; use CONNECT",
+            "FORWARD",
+        );
+        emit_denial_simple(
+            denial_tx,
+            &host_lc,
+            port,
+            &binary_str,
+            &decision,
+            "endpoint has L7 rules configured; forward proxy bypasses L7 inspection — use CONNECT",
+            "forward-l7-bypass",
+        );
+        respond(client, b"HTTP/1.1 403 Forbidden\r\n\r\n").await?;
+        return Ok(());
+    }
+
     // 5. DNS resolution + SSRF defence (mirrors the CONNECT path logic).
     //    - If allowed_ips is set: validate resolved IPs against the allowlist
     //      (this is the SSRF override for private IP destinations).