From d4ea048af7998a29209bcab36bc24956cb1f5fe7 Mon Sep 17 00:00:00 2001 From: johntmyers Date: Mon, 16 Mar 2026 11:18:22 -0700 Subject: [PATCH 1/2] feat(policy): support host wildcards and multi-port endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add glob-style host wildcards to endpoints[].host using OPA's glob.match with "." as delimiter — *.example.com matches a single DNS label, **.example.com matches across labels. Validation rejects bare * and requires *. prefix; warns on broad patterns like *.com. Add repeated uint32 ports field to NetworkEndpoint for multi-port support. Backwards compatible: existing port scalar is normalized to ports array. Both the proto-to-JSON and YAML-to-JSON conversion paths emit a ports array; Rego always references endpoint.ports[_]. Fix OpaEngine::reload() to route through the full preprocessing pipeline instead of bypassing L7 validation and port normalization. Closes #359 --- architecture/security-policy.md | 168 ++++- crates/openshell-policy/src/lib.rs | 241 +++++-- .../data/sandbox-policy.rego | 35 +- crates/openshell-sandbox/src/l7/mod.rs | 170 ++++- .../src/mechanistic_mapper.rs | 2 + crates/openshell-sandbox/src/opa.rs | 636 +++++++++++++++++- e2e/python/test_sandbox_policy.py | 197 ++++++ proto/sandbox.proto | 9 + 8 files changed, 1386 insertions(+), 72 deletions(-) diff --git a/architecture/security-policy.md b/architecture/security-policy.md index 60d29130..cd4d697f 100644 --- a/architecture/security-policy.md +++ b/architecture/security-policy.md @@ -387,6 +387,7 @@ network_policies: name: claude_code # <-- human-readable name (used in audit logs) endpoints: # <-- allowed host:port pairs - { host: api.anthropic.com, port: 443 } + - { host: "*.anthropic.com", ports: [443, 8443] } # glob host + multi-port binaries: # <-- allowed binary identities - { path: /usr/local/bin/claude } ``` @@ -403,10 +404,11 @@ network_policies: Each endpoint defines a network destination and, optionally, L7 inspection behavior. -| Field | Type | Default | Description | -| ------------- | ---------- | --------------- | ------------------------------------------------------------------------------------------------------------------- | -| `host` | `string` | _(required)_ | Hostname to match (case-insensitive). Optional when `allowed_ips` is set (see [Hostless Endpoints](#hostless-endpoints-allowed_ips-without-host)). | -| `port` | `integer` | _(required)_ | TCP port to match | +| Field | Type | Default | Description | +| ------------- | ----------- | --------------- | ------------------------------------------------------------------------------------------------------------------- | +| `host` | `string` | _(required)_ | Hostname or glob pattern to match (case-insensitive). Supports wildcards (`*.example.com`). Optional when `allowed_ips` is set (see [Hostless Endpoints](#hostless-endpoints-allowed_ips-without-host)). See [Host Wildcards](#host-wildcards). | +| `port` | `integer` | _(required)_ | TCP port to match. Mutually exclusive with `ports` — if both are set, `ports` takes precedence. See [Multi-Port Endpoints](#multi-port-endpoints). | +| `ports` | `integer[]`| `[]` | Multiple TCP ports to match. When non-empty, the endpoint covers all listed ports. Backwards compatible with `port`. See [Multi-Port Endpoints](#multi-port-endpoints). | | `protocol` | `string` | `""` | Application protocol for L7 inspection. See [Behavioral Trigger: L7 Inspection](#behavioral-trigger-l7-inspection). | | `tls` | `string` | `"passthrough"` | TLS handling mode. See [Behavioral Trigger: TLS Termination](#behavioral-trigger-tls-termination). | | `enforcement` | `string` | `"audit"` | L7 enforcement mode: `"enforce"` or `"audit"` | @@ -463,6 +465,135 @@ The `access` field provides shorthand for common rule sets. During preprocessing See `crates/openshell-sandbox/src/l7/mod.rs` -- `expand_access_presets()`. +#### Host Wildcards + +The `host` field supports glob patterns for matching multiple subdomains under a common domain. Wildcards use OPA's `glob.match` function with `.` as the delimiter, consistent with TLS certificate wildcard semantics. + +| Pattern | Matches | Does Not Match | +|---------|---------|----------------| +| `*.example.com` | `api.example.com`, `cdn.example.com` | `example.com`, `deep.sub.example.com` | +| `**.example.com` | `api.example.com`, `deep.sub.example.com` | `example.com` | +| `*.EXAMPLE.COM` | `api.example.com` (case-insensitive) | | + +**Wildcard semantics**: + +- `*` matches exactly one DNS label (does not cross `.` boundaries). `*.example.com` matches `api.example.com` but not `deep.sub.example.com`. +- `**` matches across label boundaries. `**.example.com` matches both `api.example.com` and `deep.sub.example.com`. +- Matching is case-insensitive — both the pattern and the incoming hostname are lowercased before comparison. +- The bare domain is never matched. `*.example.com` does not match `example.com` (there must be at least one label before the domain). + +**Validation rules**: + +- **Error**: Bare `*` or `**` (matches all hosts) is rejected. Use a specific pattern like `*.example.com`. +- **Error**: Patterns must start with `*.` or `**.` prefix. Malformed patterns like `*com` are rejected. +- **Warning**: Broad patterns like `*.com` (only two labels) trigger a warning about covering all subdomains of a TLD. + +See `crates/openshell-sandbox/src/l7/mod.rs` -- `validate_l7_policies()` for validation, `sandbox-policy.rego` -- `endpoint_allowed` for the Rego glob matching rule. + +**Rego implementation**: The Rego rules detect host wildcards via `contains(endpoint.host, "*")` and dispatch to `glob.match(lower(endpoint.host), ["."], lower(network.host))`. Exact-match hosts use a separate, faster `lower(endpoint.host) == lower(network.host)` rule. See `crates/openshell-sandbox/data/sandbox-policy.rego`. + +**Example**: Allow any subdomain of `example.com` on port 443: + +```yaml +network_policies: + example_wildcard: + name: example_wildcard + endpoints: + - host: "*.example.com" + port: 443 + binaries: + - { path: /usr/bin/curl } +``` + +Host wildcards compose with all other endpoint features — L7 inspection, TLS termination, multi-port, and `allowed_ips`: + +```yaml +network_policies: + wildcard_l7: + name: wildcard_l7 + endpoints: + - host: "*.example.com" + port: 8080 + protocol: rest + tls: terminate + enforcement: enforce + rules: + - allow: + method: GET + path: "/api/**" + binaries: + - { path: /usr/bin/curl } +``` + +#### Multi-Port Endpoints + +The `ports` field allows a single endpoint entry to cover multiple TCP ports. This avoids duplicating endpoint definitions that differ only in port number. + +**Normalization**: Both YAML loading paths (file mode and gRPC mode) normalize `port` and `ports` before the data reaches the OPA engine: + +- If `ports` is non-empty, it takes precedence. `port` is ignored. +- If `ports` is empty and `port` is set, the scalar is promoted to `ports: [port]`. +- The scalar `port` field is removed from the JSON fed to OPA. Rego rules always reference `endpoint.ports[_]`. + +This normalization happens in `crates/openshell-sandbox/src/opa.rs` -- `normalize_endpoint_ports()` (YAML path) and `proto_to_opa_data_json()` (proto path). + +**Backwards compatibility**: Existing policies using `port: 443` continue to work without changes. The scalar is silently promoted to `ports: [443]` at load time. + +**YAML serialization**: When serializing policy back to YAML (e.g., `nav policy get --full`), a single-element `ports` array is emitted as the compact `port: N` scalar form. Multi-element arrays are emitted as `ports: [N, M]`. See `crates/openshell-policy/src/lib.rs` -- `from_proto()`. + +**Example**: Allow both standard HTTPS and a custom TLS port: + +```yaml +network_policies: + multi_port: + name: multi_port + endpoints: + - host: api.example.com + ports: + - 443 + - 8443 + binaries: + - { path: /usr/bin/curl } +``` + +This is equivalent to two separate endpoint entries: + +```yaml + endpoints: + - { host: api.example.com, port: 443 } + - { host: api.example.com, port: 8443 } +``` + +Multi-port endpoints compose with host wildcards, L7 rules, and all other endpoint fields: + +```yaml +network_policies: + wildcard_multi_port: + name: wildcard_multi_port + endpoints: + - host: "*.example.com" + ports: [443, 8443] + protocol: rest + tls: terminate + enforcement: enforce + access: read-only + binaries: + - { path: /usr/bin/curl } +``` + +Hostless endpoints also support multi-port: + +```yaml +network_policies: + private_multi: + name: private_multi + endpoints: + - ports: [80, 443] + allowed_ips: ["10.0.0.0/8"] + binaries: + - { path: /usr/bin/curl } +``` + --- ### Inference Routing @@ -793,6 +924,8 @@ The following validation rules are enforced during policy loading (both file mod | `tls: terminate` without `protocol` | `TLS termination requires a protocol for L7 inspection` | | `protocol: sql` with `enforcement: enforce` | `SQL enforcement requires full SQL parsing (not available in v1). Use enforcement: audit.` | | `rules: []` (empty list) | `rules list cannot be empty (would deny all traffic). Use access: full or remove rules.` | +| Host wildcard is bare `*` or `**` | `host wildcard '*' matches all hosts; use specific patterns like '*.example.com'` | +| Host wildcard does not start with `*.` or `**.`| `host wildcard must start with '*.' or '**.' (e.g., '*.example.com'), got '{host}'` | | Invalid HTTP method in REST rules | _(warning, not error)_ | ### Errors (Live Update Rejection) @@ -812,6 +945,7 @@ These errors are returned by the gateway's `UpdateSandboxPolicy` handler and rej | Condition | Warning Message | | ---------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | | `protocol: rest` on port 443 without `tls: terminate` | `L7 rules won't be evaluated on encrypted traffic without tls: terminate` | +| Host wildcard with ≤2 labels (e.g., `*.com`) | `host wildcard '*.com' is very broad (covers all subdomains of a TLD)` | | Unknown HTTP method in rules (not GET/HEAD/POST/PUT/DELETE/PATCH/OPTIONS/\*) | `Unknown HTTP method '{method}'. Standard methods: GET, HEAD, POST, PUT, DELETE, PATCH, OPTIONS.` | See `crates/openshell-sandbox/src/l7/mod.rs` -- `validate_l7_policies()`. @@ -1078,6 +1212,30 @@ network_policies: binaries: - { path: /usr/bin/curl } + # Host wildcard: allow any subdomain of example.com on dual ports + example_apis: + name: example_apis + endpoints: + - host: "*.example.com" + ports: + - 443 + - 8443 + binaries: + - { path: /usr/bin/curl } + + # Multi-port with L7: same L7 rules applied across two ports + multi_port_l7: + name: multi_port_l7 + endpoints: + - host: api.internal.svc + ports: [8080, 9090] + protocol: rest + tls: terminate + enforcement: enforce + access: read-only + binaries: + - { path: /usr/bin/curl } + # Forward proxy + CONNECT: private service accessible via plain HTTP or tunnel # With allowed_ips set and the destination being a private IP, both # `http://10.86.8.223:8000/path` (forward proxy) and @@ -1115,7 +1273,7 @@ When the gateway delivers policy via gRPC, the protobuf `SandboxPolicy` message | `NetworkPolicyRule` | `name` | `network_policies..name` | | `NetworkPolicyRule` | `endpoints` | `network_policies..endpoints` | | `NetworkPolicyRule` | `binaries` | `network_policies..binaries` | -| `NetworkEndpoint` | `host`, `port`, `protocol`, `tls`, `enforcement`, `access`, `rules`, `allowed_ips` | Same field names | +| `NetworkEndpoint` | `host`, `port`, `ports`, `protocol`, `tls`, `enforcement`, `access`, `rules`, `allowed_ips` | Same field names. `port`/`ports` normalized during loading (see [Multi-Port Endpoints](#multi-port-endpoints)). | | `L7Rule` | `allow` | `rules[].allow` | | `L7Allow` | `method`, `path`, `command` | `rules[].allow.method`, `.path`, `.command` | diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index ce80648a..f1c15539 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -81,7 +81,12 @@ struct NetworkPolicyRuleDef { struct NetworkEndpointDef { #[serde(default, skip_serializing_if = "String::is_empty")] host: String, + /// Single port (backwards compat). Mutually exclusive with `ports`. + #[serde(default, skip_serializing_if = "is_zero")] port: u32, + /// Multiple ports. When non-empty, this endpoint covers all listed ports. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + ports: Vec, #[serde(default, skip_serializing_if = "String::is_empty")] protocol: String, #[serde(default, skip_serializing_if = "String::is_empty")] @@ -96,6 +101,10 @@ struct NetworkEndpointDef { allowed_ips: Vec, } +fn is_zero(v: &u32) -> bool { + *v == 0 +} + #[derive(Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields)] struct L7RuleDef { @@ -141,25 +150,37 @@ fn to_proto(raw: PolicyFile) -> SandboxPolicy { endpoints: rule .endpoints .into_iter() - .map(|e| NetworkEndpoint { - host: e.host, - port: e.port, - protocol: e.protocol, - tls: e.tls, - enforcement: e.enforcement, - access: e.access, - rules: e - .rules - .into_iter() - .map(|r| L7Rule { - allow: Some(L7Allow { - method: r.allow.method, - path: r.allow.path, - command: r.allow.command, - }), - }) - .collect(), - allowed_ips: e.allowed_ips, + .map(|e| { + // Normalize port/ports: ports takes precedence, else + // single port is promoted to ports array. + let normalized_ports = if !e.ports.is_empty() { + e.ports + } else if e.port > 0 { + vec![e.port] + } else { + vec![] + }; + NetworkEndpoint { + host: e.host, + port: normalized_ports.first().copied().unwrap_or(0), + ports: normalized_ports, + protocol: e.protocol, + tls: e.tls, + enforcement: e.enforcement, + access: e.access, + rules: e + .rules + .into_iter() + .map(|r| L7Rule { + allow: Some(L7Allow { + method: r.allow.method, + path: r.allow.path, + command: r.allow.command, + }), + }) + .collect(), + allowed_ips: e.allowed_ips, + } }) .collect(), binaries: rule @@ -228,28 +249,38 @@ fn from_proto(policy: &SandboxPolicy) -> PolicyFile { endpoints: rule .endpoints .iter() - .map(|e| NetworkEndpointDef { - host: e.host.clone(), - port: e.port, - protocol: e.protocol.clone(), - tls: e.tls.clone(), - enforcement: e.enforcement.clone(), - access: e.access.clone(), - rules: e - .rules - .iter() - .map(|r| { - let a = r.allow.clone().unwrap_or_default(); - L7RuleDef { - allow: L7AllowDef { - method: a.method, - path: a.path, - command: a.command, - }, - } - }) - .collect(), - allowed_ips: e.allowed_ips.clone(), + .map(|e| { + // Use compact form: if ports has exactly 1 element, + // emit port (scalar). If >1, emit ports (array). + let (port, ports) = if e.ports.len() > 1 { + (0, e.ports.clone()) + } else { + (e.ports.first().copied().unwrap_or(e.port), vec![]) + }; + NetworkEndpointDef { + host: e.host.clone(), + port, + ports, + protocol: e.protocol.clone(), + tls: e.tls.clone(), + enforcement: e.enforcement.clone(), + access: e.access.clone(), + rules: e + .rules + .iter() + .map(|r| { + let a = r.allow.clone().unwrap_or_default(); + L7RuleDef { + allow: L7AllowDef { + method: a.method, + path: a.path, + command: a.command, + }, + } + }) + .collect(), + allowed_ips: e.allowed_ips.clone(), + } }) .collect(), binaries: rule @@ -958,4 +989,132 @@ network_policies: assert!(s.contains("run_as_user")); assert!(s.contains("sandbox")); } + + // ---- Multi-port and host wildcard tests ---- + + #[test] + fn parse_ports_array() { + let yaml = r#" +version: 1 +network_policies: + test: + name: test + endpoints: + - { host: api.example.com, ports: [80, 443] } + binaries: + - { path: /usr/bin/curl } +"#; + let policy = parse_sandbox_policy(yaml).expect("should parse"); + let ep = &policy.network_policies["test"].endpoints[0]; + assert_eq!(ep.ports, vec![80, 443]); + // port should be set to first element for backwards compat + assert_eq!(ep.port, 80); + } + + #[test] + fn parse_single_port_normalized_to_ports() { + let yaml = r#" +version: 1 +network_policies: + test: + name: test + endpoints: + - { host: api.example.com, port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let policy = parse_sandbox_policy(yaml).expect("should parse"); + let ep = &policy.network_policies["test"].endpoints[0]; + assert_eq!(ep.ports, vec![443]); + assert_eq!(ep.port, 443); + } + + #[test] + fn round_trip_preserves_multi_port() { + let yaml = r#" +version: 1 +network_policies: + test: + name: test + endpoints: + - host: api.example.com + ports: + - 80 + - 443 + binaries: + - { path: /usr/bin/curl } +"#; + let proto1 = parse_sandbox_policy(yaml).expect("parse failed"); + let yaml_out = serialize_sandbox_policy(&proto1).expect("serialize failed"); + let proto2 = parse_sandbox_policy(&yaml_out).expect("re-parse failed"); + + let ep1 = &proto1.network_policies["test"].endpoints[0]; + let ep2 = &proto2.network_policies["test"].endpoints[0]; + assert_eq!(ep1.ports, ep2.ports); + assert_eq!(ep1.ports, vec![80, 443]); + } + + #[test] + fn serialize_single_port_uses_compact_form() { + let yaml = r#" +version: 1 +network_policies: + test: + name: test + endpoints: + - { host: api.example.com, port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let proto = parse_sandbox_policy(yaml).expect("parse failed"); + let yaml_out = serialize_sandbox_policy(&proto).expect("serialize failed"); + // Should use compact `port: 443` form, not `ports: [443]` + assert!( + yaml_out.contains("port: 443"), + "Single port should serialize as compact form, got:\n{yaml_out}" + ); + assert!( + !yaml_out.contains("ports:"), + "Single port should not produce ports array, got:\n{yaml_out}" + ); + } + + #[test] + fn parse_wildcard_host() { + let yaml = r#" +version: 1 +network_policies: + test: + name: test + endpoints: + - { host: "*.example.com", port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let policy = parse_sandbox_policy(yaml).expect("should parse"); + let ep = &policy.network_policies["test"].endpoints[0]; + assert_eq!(ep.host, "*.example.com"); + } + + #[test] + fn round_trip_preserves_wildcard_host() { + let yaml = r#" +version: 1 +network_policies: + test: + name: test + endpoints: + - host: "*.example.com" + port: 443 + binaries: + - { path: /usr/bin/curl } +"#; + let proto1 = parse_sandbox_policy(yaml).expect("parse failed"); + let yaml_out = serialize_sandbox_policy(&proto1).expect("serialize failed"); + let proto2 = parse_sandbox_policy(&yaml_out).expect("re-parse failed"); + assert_eq!( + proto1.network_policies["test"].endpoints[0].host, + proto2.network_policies["test"].endpoints[0].host + ); + } } diff --git a/crates/openshell-sandbox/data/sandbox-policy.rego b/crates/openshell-sandbox/data/sandbox-policy.rego index 1fddcea2..61393e15 100644 --- a/crates/openshell-sandbox/data/sandbox-policy.rego +++ b/crates/openshell-sandbox/data/sandbox-policy.rego @@ -91,12 +91,24 @@ network_policy_for_request if { binary_allowed(data.network_policies[name], input.exec) } -# Endpoint matching: host (case-insensitive) + port. +# Endpoint matching: exact host (case-insensitive) + port in ports list. endpoint_allowed(policy, network) if { some endpoint endpoint := policy.endpoints[_] + not contains(endpoint.host, "*") lower(endpoint.host) == lower(network.host) - endpoint.port == network.port + endpoint.ports[_] == network.port +} + +# Endpoint matching: glob host pattern + port in ports list. +# Uses "." as delimiter so "*" matches a single DNS label and "**" matches +# across label boundaries — consistent with TLS certificate wildcard semantics. +endpoint_allowed(policy, network) if { + some endpoint + endpoint := policy.endpoints[_] + contains(endpoint.host, "*") + glob.match(lower(endpoint.host), ["."], lower(network.host)) + endpoint.ports[_] == network.port } # Endpoint matching: hostless with allowed_ips — match any host on port. @@ -107,7 +119,7 @@ endpoint_allowed(policy, network) if { endpoint := policy.endpoints[_] object.get(endpoint, "host", "") == "" count(object.get(endpoint, "allowed_ips", [])) > 0 - endpoint.port == network.port + endpoint.ports[_] == network.port } # Binary matching: exact path. @@ -167,8 +179,7 @@ allow_request if { binary_allowed(policy, input.exec) some ep ep := policy.endpoints[_] - lower(ep.host) == lower(input.network.host) - ep.port == input.network.port + endpoint_matches_request(ep, input.network) request_allowed_for_endpoint(input.request, ep) } @@ -245,17 +256,25 @@ matched_endpoint_config := _matching_endpoint_configs[0] if { count(_matching_endpoint_configs) > 0 } -# Hosted endpoint: match on host (case-insensitive) + port. +# Hosted endpoint: exact host match + port in ports list. endpoint_matches_request(ep, network) if { + not contains(ep.host, "*") lower(ep.host) == lower(network.host) - ep.port == network.port + ep.ports[_] == network.port +} + +# Hosted endpoint: glob host match + port in ports list. +endpoint_matches_request(ep, network) if { + contains(ep.host, "*") + glob.match(lower(ep.host), ["."], lower(network.host)) + ep.ports[_] == network.port } # Hostless endpoint with allowed_ips: match on port only. endpoint_matches_request(ep, network) if { object.get(ep, "host", "") == "" count(object.get(ep, "allowed_ips", [])) > 0 - ep.port == network.port + ep.ports[_] == network.port } # An endpoint has extended config if it specifies L7 protocol or allowed_ips. diff --git a/crates/openshell-sandbox/src/l7/mod.rs b/crates/openshell-sandbox/src/l7/mod.rs index 9bfaef1e..f9638dc6 100644 --- a/crates/openshell-sandbox/src/l7/mod.rs +++ b/crates/openshell-sandbox/src/l7/mod.rs @@ -145,12 +145,43 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< .get("rules") .and_then(|v| v.as_array()) .is_some_and(|a| !a.is_empty()); - let port = ep - .get("port") - .and_then(serde_json::Value::as_u64) - .unwrap_or(0); + let host = ep.get("host").and_then(|v| v.as_str()).unwrap_or(""); + + // Read ports from either "ports" array or scalar "port". + let ports: Vec = ep + .get("ports") + .and_then(|v| v.as_array()) + .map(|arr| arr.iter().filter_map(|v| v.as_u64()).collect()) + .unwrap_or_else(|| { + ep.get("port") + .and_then(serde_json::Value::as_u64) + .filter(|p| *p > 0) + .into_iter() + .collect() + }); let loc = format!("{name}.endpoints[{i}]"); + // Validate host wildcard patterns. + if host.contains('*') { + if host == "*" || host == "**" { + errors.push(format!( + "{loc}: host wildcard '{host}' matches all hosts; use specific patterns like '*.example.com'" + )); + } else if !host.starts_with("*.") && !host.starts_with("**.") { + errors.push(format!( + "{loc}: host wildcard must start with '*.' or '**.' (e.g., '*.example.com'), got '{host}'" + )); + } else { + // Warn on very broad wildcards like *.com (2 labels) + let label_count = host.split('.').count(); + if label_count <= 2 { + warnings.push(format!( + "{loc}: host wildcard '{host}' is very broad (covers all subdomains of a TLD)" + )); + } + } + } + // rules + access mutual exclusion if has_rules && !access.is_empty() { errors.push(format!("{loc}: rules and access are mutually exclusive")); @@ -189,7 +220,7 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< } // port 443 + rest + no tls: terminate - if protocol == "rest" && port == 443 && tls != "terminate" { + if protocol == "rest" && ports.contains(&443) && tls != "terminate" { warnings.push(format!( "{loc}: L7 rules won't be evaluated on encrypted traffic without `tls: terminate`" )); @@ -503,4 +534,133 @@ mod tests { .is_none() ); } + + // ---- Host wildcard validation tests ---- + + #[test] + fn validate_wildcard_host_star_only_error() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "*", + "port": 443 + }], + "binaries": [] + } + } + }); + let (errors, _warnings) = validate_l7_policies(&data); + assert!( + errors.iter().any(|e| e.contains("matches all hosts")), + "Bare * host should be rejected, got errors: {errors:?}" + ); + } + + #[test] + fn validate_wildcard_host_double_star_only_error() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "**", + "port": 443 + }], + "binaries": [] + } + } + }); + let (errors, _warnings) = validate_l7_policies(&data); + assert!( + errors.iter().any(|e| e.contains("matches all hosts")), + "Bare ** host should be rejected, got errors: {errors:?}" + ); + } + + #[test] + fn validate_wildcard_host_no_star_dot_error() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "*com", + "port": 443 + }], + "binaries": [] + } + } + }); + let (errors, _warnings) = validate_l7_policies(&data); + assert!( + errors.iter().any(|e| e.contains("must start with")), + "Malformed wildcard should be rejected, got errors: {errors:?}" + ); + } + + #[test] + fn validate_wildcard_host_broad_warning() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "*.com", + "port": 443 + }], + "binaries": [] + } + } + }); + let (errors, warnings) = validate_l7_policies(&data); + assert!(errors.is_empty(), "*.com should not error: {errors:?}"); + assert!( + warnings.iter().any(|w| w.contains("very broad")), + "*.com should warn about breadth, got warnings: {warnings:?}" + ); + } + + #[test] + fn validate_wildcard_host_valid_no_error() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "*.example.com", + "port": 443 + }], + "binaries": [] + } + } + }); + let (errors, warnings) = validate_l7_policies(&data); + assert!( + errors.is_empty(), + "*.example.com should be valid, got errors: {errors:?}" + ); + assert!( + warnings.is_empty(), + "*.example.com should not warn, got warnings: {warnings:?}" + ); + } + + #[test] + fn validate_ports_array_rest_443_warns() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "api.example.com", + "ports": [443, 8080], + "protocol": "rest", + "access": "read-only" + }], + "binaries": [] + } + } + }); + let (_errors, warnings) = validate_l7_policies(&data); + assert!( + warnings.iter().any(|w| w.contains("tls: terminate")), + "REST on port 443 without tls:terminate should warn, got warnings: {warnings:?}" + ); + } } diff --git a/crates/openshell-sandbox/src/mechanistic_mapper.rs b/crates/openshell-sandbox/src/mechanistic_mapper.rs index 55d8e6b3..e3567321 100644 --- a/crates/openshell-sandbox/src/mechanistic_mapper.rs +++ b/crates/openshell-sandbox/src/mechanistic_mapper.rs @@ -112,6 +112,7 @@ pub async fn generate_proposals(summaries: &[DenialSummary]) -> Vec NetworkEndpoint { host: host.clone(), port: *port, + ports: vec![*port], protocol: "rest".to_string(), tls: "terminate".to_string(), enforcement: "enforce".to_string(), @@ -123,6 +124,7 @@ pub async fn generate_proposals(summaries: &[DenialSummary]) -> Vec NetworkEndpoint { host: host.clone(), port: *port, + ports: vec![*port], allowed_ips: allowed_ips.clone(), ..Default::default() } diff --git a/crates/openshell-sandbox/src/opa.rs b/crates/openshell-sandbox/src/opa.rs index a86fd683..bfe9c68f 100644 --- a/crates/openshell-sandbox/src/opa.rs +++ b/crates/openshell-sandbox/src/opa.rs @@ -274,18 +274,15 @@ impl OpaEngine { /// Reload policy and data from strings (data is YAML). /// /// Designed for future gRPC hot-reload from the openshell gateway. - /// Replaces the entire engine atomically. + /// Replaces the entire engine atomically. Routes through the full + /// preprocessing pipeline (port normalization, L7 validation, preset + /// expansion) to maintain consistency with `from_strings()`. pub fn reload(&self, policy: &str, data_yaml: &str) -> Result<()> { - let mut new_engine = regorus::Engine::new(); - new_engine - .add_policy("policy.rego".into(), policy.into()) - .map_err(|e| miette::miette!("{e}"))?; - let data_value = - regorus::Value::from_yaml_str(data_yaml).map_err(|e| miette::miette!("{e}"))?; - new_engine - .add_data(data_value) - .map_err(|e| miette::miette!("{e}"))?; - + let new = Self::from_strings(policy, data_yaml)?; + let new_engine = new + .engine + .into_inner() + .map_err(|_| miette::miette!("lock poisoned on new engine"))?; let mut engine = self .engine .lock() @@ -512,11 +509,14 @@ fn parse_process_policy(val: ®orus::Value) -> ProcessPolicy { } } -/// Preprocess YAML policy data: parse, validate, expand access presets, return JSON. +/// Preprocess YAML policy data: parse, normalize, validate, expand access presets, return JSON. fn preprocess_yaml_data(yaml_str: &str) -> Result { let mut data: serde_json::Value = serde_yaml::from_str(yaml_str) .map_err(|e| miette::miette!("failed to parse YAML data: {e}"))?; + // Normalize port → ports for all endpoints so Rego always sees "ports" array. + normalize_endpoint_ports(&mut data); + // Validate BEFORE expanding presets (catches user errors like rules+access) let (errors, warnings) = crate::l7::validate_l7_policies(&data); for w in &warnings { @@ -535,6 +535,56 @@ fn preprocess_yaml_data(yaml_str: &str) -> Result { serde_json::to_string(&data).map_err(|e| miette::miette!("failed to serialize data: {e}")) } +/// Normalize endpoint port/ports in JSON data. +/// +/// YAML policies may use `port: N` (single) or `ports: [N, M]` (multi). +/// This normalizes all endpoints to have a `ports` array so Rego rules +/// only need to reference `endpoint.ports[_]`. +fn normalize_endpoint_ports(data: &mut serde_json::Value) { + let Some(policies) = data + .get_mut("network_policies") + .and_then(|v| v.as_object_mut()) + else { + return; + }; + + for (_name, policy) in policies.iter_mut() { + let Some(endpoints) = policy.get_mut("endpoints").and_then(|v| v.as_array_mut()) else { + continue; + }; + + for ep in endpoints.iter_mut() { + let ep_obj = match ep.as_object_mut() { + Some(obj) => obj, + None => continue, + }; + + // If "ports" already exists and is non-empty, keep it. + let has_ports = ep_obj + .get("ports") + .and_then(|v| v.as_array()) + .is_some_and(|a| !a.is_empty()); + + if !has_ports { + // Promote scalar "port" to "ports" array. + let port = ep_obj + .get("port") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + if port > 0 { + ep_obj.insert( + "ports".to_string(), + serde_json::Value::Array(vec![serde_json::json!(port)]), + ); + } + } + + // Remove scalar "port" — Rego only uses "ports". + ep_obj.remove("port"); + } + } +} + /// Convert typed proto policy fields to JSON suitable for `engine.add_data_json()`. /// /// The rego rules reference `data.*` directly, so the JSON structure has @@ -589,7 +639,16 @@ fn proto_to_opa_data_json(proto: &ProtoSandboxPolicy) -> String { .endpoints .iter() .map(|e| { - let mut ep = serde_json::json!({"host": e.host, "port": e.port}); + // Normalize port/ports: ports takes precedence, then + // single port promoted to array. Rego always sees "ports". + let ports: Vec = if !e.ports.is_empty() { + e.ports.clone() + } else if e.port > 0 { + vec![e.port] + } else { + vec![] + }; + let mut ep = serde_json::json!({"host": e.host, "ports": ports}); if !e.protocol.is_empty() { ep["protocol"] = e.protocol.clone().into(); } @@ -1935,4 +1994,555 @@ process: let ips = engine.query_allowed_ips(&input).unwrap(); assert_eq!(ips, vec!["10.0.5.0/24", "10.0.6.0/24"]); } + + // ======================================================================== + // Multi-port endpoint tests + // ======================================================================== + + #[test] + fn multi_port_endpoint_matches_first_port() { + let data = r#" +network_policies: + multi: + name: multi + endpoints: + - { host: api.example.com, ports: [443, 8443] } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "api.example.com".into(), + port: 443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!( + decision.allowed, + "First port in multi-port should match: {}", + decision.reason + ); + } + + #[test] + fn multi_port_endpoint_matches_second_port() { + let data = r#" +network_policies: + multi: + name: multi + endpoints: + - { host: api.example.com, ports: [443, 8443] } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "api.example.com".into(), + port: 8443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!( + decision.allowed, + "Second port in multi-port should match: {}", + decision.reason + ); + } + + #[test] + fn multi_port_endpoint_rejects_unlisted_port() { + let data = r#" +network_policies: + multi: + name: multi + endpoints: + - { host: api.example.com, ports: [443, 8443] } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "api.example.com".into(), + port: 80, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!(!decision.allowed, "Unlisted port should be denied"); + } + + #[test] + fn single_port_backwards_compat() { + // Old-style YAML with just `port: 443` should still work + let data = r#" +network_policies: + compat: + name: compat + endpoints: + - { host: api.example.com, port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "api.example.com".into(), + port: 443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!( + decision.allowed, + "Single port backwards compat: {}", + decision.reason + ); + + // Wrong port should still deny + let input_bad = NetworkInput { + host: "api.example.com".into(), + port: 80, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input_bad).unwrap(); + assert!(!decision.allowed); + } + + #[test] + fn hostless_endpoint_multi_port() { + let data = r#" +network_policies: + private: + name: private + endpoints: + - ports: [80, 443] + allowed_ips: ["10.0.0.0/8"] + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + // Port 80 + let input80 = NetworkInput { + host: "anything.internal".into(), + port: 80, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input80).unwrap(); + assert!( + decision.allowed, + "Hostless multi-port should match port 80: {}", + decision.reason + ); + // Port 443 + let input443 = NetworkInput { + host: "anything.internal".into(), + port: 443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input443).unwrap(); + assert!( + decision.allowed, + "Hostless multi-port should match port 443: {}", + decision.reason + ); + // Port 8080 should deny + let input_bad = NetworkInput { + host: "anything.internal".into(), + port: 8080, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input_bad).unwrap(); + assert!(!decision.allowed); + } + + #[test] + fn from_proto_multi_port_allows_matching() { + let mut network_policies = std::collections::HashMap::new(); + network_policies.insert( + "multi".to_string(), + NetworkPolicyRule { + name: "multi".to_string(), + endpoints: vec![NetworkEndpoint { + host: "api.example.com".to_string(), + port: 443, + ports: vec![443, 8443], + ..Default::default() + }], + binaries: vec![NetworkBinary { + path: "/usr/bin/curl".to_string(), + ..Default::default() + }], + }, + ); + let proto = ProtoSandboxPolicy { + version: 1, + filesystem: Some(ProtoFs { + include_workdir: true, + read_only: vec![], + read_write: vec![], + }), + landlock: Some(openshell_core::proto::LandlockPolicy { + compatibility: "best_effort".to_string(), + }), + process: Some(ProtoProc { + run_as_user: "sandbox".to_string(), + run_as_group: "sandbox".to_string(), + }), + network_policies, + }; + let engine = OpaEngine::from_proto(&proto).unwrap(); + // Port 443 + let input443 = NetworkInput { + host: "api.example.com".into(), + port: 443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + assert!(engine.evaluate_network(&input443).unwrap().allowed); + // Port 8443 + let input8443 = NetworkInput { + host: "api.example.com".into(), + port: 8443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + assert!(engine.evaluate_network(&input8443).unwrap().allowed); + // Port 80 denied + let input80 = NetworkInput { + host: "api.example.com".into(), + port: 80, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + assert!(!engine.evaluate_network(&input80).unwrap().allowed); + } + + // ======================================================================== + // Host wildcard tests + // ======================================================================== + + #[test] + fn wildcard_host_matches_subdomain() { + let data = r#" +network_policies: + wildcard: + name: wildcard + endpoints: + - { host: "*.example.com", port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "api.example.com".into(), + port: 443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!( + decision.allowed, + "*.example.com should match api.example.com: {}", + decision.reason + ); + } + + #[test] + fn wildcard_host_rejects_deep_subdomain() { + // * should match single DNS label only (does not cross .) + let data = r#" +network_policies: + wildcard: + name: wildcard + endpoints: + - { host: "*.example.com", port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "deep.sub.example.com".into(), + port: 443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!( + !decision.allowed, + "*.example.com should NOT match deep.sub.example.com" + ); + } + + #[test] + fn wildcard_host_rejects_exact_domain() { + let data = r#" +network_policies: + wildcard: + name: wildcard + endpoints: + - { host: "*.example.com", port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "example.com".into(), + port: 443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!( + !decision.allowed, + "*.example.com should NOT match example.com (requires at least one label)" + ); + } + + #[test] + fn wildcard_host_case_insensitive() { + let data = r#" +network_policies: + wildcard: + name: wildcard + endpoints: + - { host: "*.EXAMPLE.COM", port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "api.example.com".into(), + port: 443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!( + decision.allowed, + "Host wildcards should be case-insensitive: {}", + decision.reason + ); + } + + #[test] + fn wildcard_host_plus_port() { + let data = r#" +network_policies: + wildcard: + name: wildcard + endpoints: + - { host: "*.example.com", port: 443 } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + // Right host, wrong port + let input = NetworkInput { + host: "api.example.com".into(), + port: 80, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!(!decision.allowed, "Wildcard host on wrong port should deny"); + } + + #[test] + fn wildcard_host_multi_port() { + let data = r#" +network_policies: + wildcard: + name: wildcard + endpoints: + - { host: "*.example.com", ports: [443, 8443] } + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "api.example.com".into(), + port: 8443, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let decision = engine.evaluate_network(&input).unwrap(); + assert!( + decision.allowed, + "Wildcard host + multi-port should match: {}", + decision.reason + ); + } + + #[test] + fn wildcard_host_l7_rules_apply() { + let data = r#" +network_policies: + wildcard_l7: + name: wildcard_l7 + endpoints: + - host: "*.example.com" + port: 8080 + protocol: rest + enforcement: enforce + tls: terminate + rules: + - allow: + method: GET + path: "/api/**" + binaries: + - { path: /usr/bin/curl } +filesystem_policy: + include_workdir: true + read_only: [] + read_write: [] +landlock: + compatibility: best_effort +process: + run_as_user: sandbox + run_as_group: sandbox +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + // L7 GET to /api/foo — should be allowed + let input = l7_input("api.example.com", 8080, "GET", "/api/foo"); + assert!( + eval_l7(&engine, &input), + "L7 rule should apply to wildcard-matched host" + ); + // L7 DELETE to /api/foo — should be denied by L7 rule + let input_bad = l7_input("api.example.com", 8080, "DELETE", "/api/foo"); + assert!( + !eval_l7(&engine, &input_bad), + "L7 DELETE should be denied even on wildcard host" + ); + } + + #[test] + fn wildcard_host_l7_endpoint_config_returned() { + let data = r#" +network_policies: + wildcard_l7: + name: wildcard_l7 + endpoints: + - host: "*.example.com" + port: 8080 + protocol: rest + enforcement: enforce + tls: terminate + rules: + - allow: + method: GET + path: "**" + binaries: + - { path: /usr/bin/curl } +filesystem_policy: + include_workdir: true + read_only: [] + read_write: [] +landlock: + compatibility: best_effort +process: + run_as_user: sandbox + run_as_group: sandbox +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "api.example.com".into(), + port: 8080, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let config = engine.query_endpoint_config(&input).unwrap(); + assert!( + config.is_some(), + "Should return endpoint config for wildcard-matched host" + ); + let config = config.unwrap(); + let l7 = crate::l7::parse_l7_config(&config).unwrap(); + assert_eq!(l7.protocol, crate::l7::L7Protocol::Rest); + assert_eq!(l7.enforcement, crate::l7::EnforcementMode::Enforce); + } + + #[test] + fn l7_multi_port_request_evaluation() { + let data = r#" +network_policies: + multi_l7: + name: multi_l7 + endpoints: + - host: api.example.com + ports: [8080, 9090] + protocol: rest + enforcement: enforce + tls: terminate + rules: + - allow: + method: GET + path: "**" + binaries: + - { path: /usr/bin/curl } +filesystem_policy: + include_workdir: true + read_only: [] + read_write: [] +landlock: + compatibility: best_effort +process: + run_as_user: sandbox + run_as_group: sandbox +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + // GET on port 8080 — allowed + let input1 = l7_input("api.example.com", 8080, "GET", "/anything"); + assert!( + eval_l7(&engine, &input1), + "L7 on first port of multi-port should work" + ); + // GET on port 9090 — allowed + let input2 = l7_input("api.example.com", 9090, "GET", "/anything"); + assert!( + eval_l7(&engine, &input2), + "L7 on second port of multi-port should work" + ); + } } diff --git a/e2e/python/test_sandbox_policy.py b/e2e/python/test_sandbox_policy.py index 3d4bfdd8..25d70359 100644 --- a/e2e/python/test_sandbox_policy.py +++ b/e2e/python/test_sandbox_policy.py @@ -1631,3 +1631,200 @@ def test_baseline_enrichment_incomplete_filesystem_policy( f"/sandbox not writable: {checks['sandbox_write']}" ) assert checks["var_log"] is True, "OpenShell log not accessible" + + +# ============================================================================= +# Multi-port endpoint tests +# ============================================================================= +# +# MP-1: Multi-port endpoint allows connections on any listed port +# MP-2: Multi-port endpoint denies connections on unlisted ports +# MP-3: Single port (backwards compat) still works via ports normalization +# ============================================================================= + + +def test_multi_port_allows_all_listed_ports( + sandbox: Callable[..., Sandbox], +) -> None: + """MP-1: Multi-port endpoint allows connections on any listed port. + + Policy allows python -> api.anthropic.com on ports 443 AND 80. + Both should be allowed; port 8080 should be denied. + """ + policy = _base_policy( + network_policies={ + "multi": sandbox_pb2.NetworkPolicyRule( + name="multi", + endpoints=[ + sandbox_pb2.NetworkEndpoint( + host="api.anthropic.com", ports=[443, 80] + ), + ], + binaries=[sandbox_pb2.NetworkBinary(path="/**")], + ), + }, + ) + spec = datamodel_pb2.SandboxSpec(policy=policy) + with sandbox(spec=spec, delete_on_exit=True) as sb: + # Port 443 -> allowed + result = sb.exec_python(_proxy_connect(), args=("api.anthropic.com", 443)) + assert result.exit_code == 0, result.stderr + assert "200" in result.stdout, f"Port 443 should be allowed: {result.stdout}" + + # Port 80 -> allowed + result = sb.exec_python(_proxy_connect(), args=("api.anthropic.com", 80)) + assert result.exit_code == 0, result.stderr + assert "200" in result.stdout, f"Port 80 should be allowed: {result.stdout}" + + +def test_multi_port_denies_unlisted_port( + sandbox: Callable[..., Sandbox], +) -> None: + """MP-2: Multi-port endpoint denies connections on ports not in the list.""" + policy = _base_policy( + network_policies={ + "multi": sandbox_pb2.NetworkPolicyRule( + name="multi", + endpoints=[ + sandbox_pb2.NetworkEndpoint( + host="api.anthropic.com", ports=[443, 80] + ), + ], + binaries=[sandbox_pb2.NetworkBinary(path="/**")], + ), + }, + ) + spec = datamodel_pb2.SandboxSpec(policy=policy) + with sandbox(spec=spec, delete_on_exit=True) as sb: + # Port 8080 -> denied (not in [443, 80]) + result = sb.exec_python(_proxy_connect(), args=("api.anthropic.com", 8080)) + assert result.exit_code == 0, result.stderr + assert "403" in result.stdout, f"Port 8080 should be denied: {result.stdout}" + + +def test_single_port_backwards_compat( + sandbox: Callable[..., Sandbox], +) -> None: + """MP-3: Old-style single port field still works.""" + policy = _base_policy( + network_policies={ + "compat": sandbox_pb2.NetworkPolicyRule( + name="compat", + endpoints=[ + sandbox_pb2.NetworkEndpoint(host="api.anthropic.com", port=443), + ], + binaries=[sandbox_pb2.NetworkBinary(path="/**")], + ), + }, + ) + spec = datamodel_pb2.SandboxSpec(policy=policy) + with sandbox(spec=spec, delete_on_exit=True) as sb: + # Port 443 -> allowed + result = sb.exec_python(_proxy_connect(), args=("api.anthropic.com", 443)) + assert result.exit_code == 0, result.stderr + assert "200" in result.stdout, f"Single port should still work: {result.stdout}" + + # Port 80 -> denied + result = sb.exec_python(_proxy_connect(), args=("api.anthropic.com", 80)) + assert result.exit_code == 0, result.stderr + assert "403" in result.stdout + + +# ============================================================================= +# Host wildcard tests +# ============================================================================= +# +# HW-1: Wildcard *.anthropic.com matches subdomains +# HW-2: Wildcard *.anthropic.com does NOT match anthropic.com (bare domain) +# HW-3: Wildcard *.anthropic.com does NOT match deep.sub.anthropic.com +# ============================================================================= + + +def test_host_wildcard_matches_subdomain( + sandbox: Callable[..., Sandbox], +) -> None: + """HW-1: *.anthropic.com matches api.anthropic.com.""" + policy = _base_policy( + network_policies={ + "wildcard": sandbox_pb2.NetworkPolicyRule( + name="wildcard", + endpoints=[ + sandbox_pb2.NetworkEndpoint(host="*.anthropic.com", port=443), + ], + binaries=[sandbox_pb2.NetworkBinary(path="/**")], + ), + }, + ) + spec = datamodel_pb2.SandboxSpec(policy=policy) + with sandbox(spec=spec, delete_on_exit=True) as sb: + # api.anthropic.com -> matches *.anthropic.com + result = sb.exec_python(_proxy_connect(), args=("api.anthropic.com", 443)) + assert result.exit_code == 0, result.stderr + assert "200" in result.stdout, ( + f"*.anthropic.com should match api.anthropic.com: {result.stdout}" + ) + + # statsig.anthropic.com -> also matches *.anthropic.com + result = sb.exec_python(_proxy_connect(), args=("statsig.anthropic.com", 443)) + assert result.exit_code == 0, result.stderr + assert "200" in result.stdout, ( + f"*.anthropic.com should match statsig.anthropic.com: {result.stdout}" + ) + + # example.com -> does NOT match *.anthropic.com + result = sb.exec_python(_proxy_connect(), args=("example.com", 443)) + assert result.exit_code == 0, result.stderr + assert "403" in result.stdout, ( + f"*.anthropic.com should NOT match example.com: {result.stdout}" + ) + + +def test_host_wildcard_rejects_bare_domain( + sandbox: Callable[..., Sandbox], +) -> None: + """HW-2: *.anthropic.com does NOT match anthropic.com (requires a subdomain).""" + policy = _base_policy( + network_policies={ + "wildcard": sandbox_pb2.NetworkPolicyRule( + name="wildcard", + endpoints=[ + sandbox_pb2.NetworkEndpoint(host="*.anthropic.com", port=443), + ], + binaries=[sandbox_pb2.NetworkBinary(path="/**")], + ), + }, + ) + spec = datamodel_pb2.SandboxSpec(policy=policy) + with sandbox(spec=spec, delete_on_exit=True) as sb: + result = sb.exec_python(_proxy_connect(), args=("anthropic.com", 443)) + assert result.exit_code == 0, result.stderr + assert "403" in result.stdout, ( + f"*.anthropic.com should NOT match bare anthropic.com: {result.stdout}" + ) + + +def test_host_wildcard_rejects_deep_subdomain( + sandbox: Callable[..., Sandbox], +) -> None: + """HW-3: *.anthropic.com does NOT match deep.sub.anthropic.com. + + Single * matches one DNS label only (does not cross . boundaries). + """ + policy = _base_policy( + network_policies={ + "wildcard": sandbox_pb2.NetworkPolicyRule( + name="wildcard", + endpoints=[ + sandbox_pb2.NetworkEndpoint(host="*.anthropic.com", port=443), + ], + binaries=[sandbox_pb2.NetworkBinary(path="/**")], + ), + }, + ) + spec = datamodel_pb2.SandboxSpec(policy=policy) + with sandbox(spec=spec, delete_on_exit=True) as sb: + result = sb.exec_python(_proxy_connect(), args=("deep.sub.anthropic.com", 443)) + assert result.exit_code == 0, result.stderr + assert "403" in result.stdout, ( + f"*.anthropic.com should NOT match deep.sub.anthropic.com: {result.stdout}" + ) diff --git a/proto/sandbox.proto b/proto/sandbox.proto index 92005661..01925fbe 100644 --- a/proto/sandbox.proto +++ b/proto/sandbox.proto @@ -55,7 +55,12 @@ message NetworkPolicyRule { // A network endpoint (host + port) with optional L7 inspection config. message NetworkEndpoint { + // Hostname or host glob pattern. Exact match is case-insensitive. + // Glob patterns use "." as delimiter: "*.example.com" matches a single + // subdomain label, "**.example.com" matches across labels. string host = 1; + // Single port (backwards compat). Use `ports` for multiple ports. + // Mutually exclusive with `ports` — if both are set, `ports` takes precedence. uint32 port = 2; // Application protocol for L7 inspection: "rest", "sql", or "" (L4-only). string protocol = 3; @@ -76,6 +81,10 @@ message NetworkEndpoint { // Loopback (127.0.0.0/8) and link-local (169.254.0.0/16) are always blocked // regardless of this field. repeated string allowed_ips = 8; + // Multiple ports. When non-empty, this endpoint covers all listed ports. + // If `port` is set and `ports` is empty, `port` is normalized to `ports: [port]`. + // If both are set, `ports` takes precedence. + repeated uint32 ports = 9; } // An L7 policy rule (allow-only). From 3b1d8724933ff5608997374a25ca0200511b0dd6 Mon Sep 17 00:00:00 2001 From: John Myers Date: Mon, 16 Mar 2026 11:38:06 -0700 Subject: [PATCH 2/2] fix(policy): reject configs with both port and ports set --- crates/openshell-sandbox/src/l7/mod.rs | 38 ++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/crates/openshell-sandbox/src/l7/mod.rs b/crates/openshell-sandbox/src/l7/mod.rs index f9638dc6..9b9ae473 100644 --- a/crates/openshell-sandbox/src/l7/mod.rs +++ b/crates/openshell-sandbox/src/l7/mod.rs @@ -182,6 +182,21 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec, Vec< } } + // port + ports mutual exclusion + let has_scalar_port = ep + .get("port") + .and_then(serde_json::Value::as_u64) + .is_some_and(|p| p > 0); + let has_ports_array = ep + .get("ports") + .and_then(|v| v.as_array()) + .is_some_and(|a| !a.is_empty()); + if has_scalar_port && has_ports_array { + errors.push(format!( + "{loc}: port and ports are mutually exclusive; use ports for multiple ports" + )); + } + // rules + access mutual exclusion if has_rules && !access.is_empty() { errors.push(format!("{loc}: rules and access are mutually exclusive")); @@ -642,6 +657,29 @@ mod tests { ); } + #[test] + fn validate_port_and_ports_mutually_exclusive() { + let data = serde_json::json!({ + "network_policies": { + "test": { + "endpoints": [{ + "host": "api.example.com", + "port": 443, + "ports": [443, 8443] + }], + "binaries": [] + } + } + }); + let (errors, _warnings) = validate_l7_policies(&data); + assert!( + errors + .iter() + .any(|e| e.contains("port and ports are mutually exclusive")), + "Should reject both port and ports, got errors: {errors:?}" + ); + } + #[test] fn validate_ports_array_rest_443_warns() { let data = serde_json::json!({