diff --git a/crates/bashkit-python/tests/test_bashkit.py b/crates/bashkit-python/tests/test_bashkit.py index 036f4b81..fa344b34 100644 --- a/crates/bashkit-python/tests/test_bashkit.py +++ b/crates/bashkit-python/tests/test_bashkit.py @@ -605,7 +605,8 @@ def test_scripted_tool_callback_error(): ) r = tool.execute_sync("fail_cmd") assert r.exit_code != 0 - assert "service down" in r.stderr + # Error is sanitized by default (TM-INF-017) + assert "callback failed" in r.stderr def test_scripted_tool_error_fallback(): @@ -863,7 +864,8 @@ def test_scripted_tool_callback_runtime_error(): ) r = tool.execute_sync("fail") assert r.exit_code != 0 - assert "runtime fail" in r.stderr + # Error is sanitized by default (TM-INF-017) + assert "callback failed" in r.stderr def test_scripted_tool_callback_type_error(): diff --git a/crates/bashkit/src/scripted_tool/execute.rs b/crates/bashkit/src/scripted_tool/execute.rs index 8ebb24e6..d0d74ede 100644 --- a/crates/bashkit/src/scripted_tool/execute.rs +++ b/crates/bashkit/src/scripted_tool/execute.rs @@ -149,6 +149,8 @@ struct ToolBuiltinAdapter { callback: ToolCallback, schema: serde_json::Value, log: InvocationLog, + /// THREAT[TM-INF-017]: When true, sanitize callback error messages. + sanitize_errors: bool, } #[async_trait] @@ -163,7 +165,19 @@ impl Builtin for ToolBuiltinAdapter { match (self.callback)(&tool_args) { Ok(stdout) => ExecResult::ok(stdout), - Err(msg) => ExecResult::err(msg, 1), + Err(msg) => { + if self.sanitize_errors { + // THREAT[TM-INF-017]: Replace detailed errors with generic message. + // Full error available in debug output for operators. + eprintln!("DEBUG: {}: callback error (sanitized): {}", self.name, msg); + ExecResult::err( + format!("{}: callback failed (error code 1)", self.name), + 1, + ) + } else { + ExecResult::err(msg, 1) + } + } } } Err(msg) => ExecResult::err(msg, 2), @@ -439,6 +453,7 @@ impl ScriptedTool { callback: Arc::clone(&tool.callback), schema: tool.def.input_schema.clone(), log: Arc::clone(&log), + sanitize_errors: self.sanitize_errors, }); builder = builder.builtin(name, builtin); } @@ -1135,4 +1150,90 @@ mod tests { assert_eq!(def.tags, vec!["admin", "billing"]); assert_eq!(def.category.as_deref(), Some("payments")); } + + /// THREAT[TM-INF-017]: Callback errors with internal details are sanitized. + #[tokio::test] + async fn test_sanitize_errors_hides_internal_details() { + let tool = ScriptedTool::builder("api") + .tool( + ToolDef::new("query", "Query database"), + |_args: &ToolArgs| { + Err("connection failed: postgres://admin:secret@internal-db:5432/prod".into()) + }, + ) + .build(); + + let resp = tool + .execute(ToolRequest { + commands: "query".to_string(), + timeout_ms: None, + }) + .await; + assert_ne!(resp.exit_code, 0); + // The sanitized error should NOT contain the connection string + assert!( + !resp.stderr.contains("postgres://"), + "internal details leaked: {}", + resp.stderr + ); + assert!( + !resp.stderr.contains("secret"), + "credentials leaked: {}", + resp.stderr + ); + assert!( + resp.stderr.contains("callback failed"), + "should contain generic message: {}", + resp.stderr + ); + } + + /// When sanitize_errors is disabled, full error is visible. + #[tokio::test] + async fn test_sanitize_errors_disabled_shows_details() { + let tool = ScriptedTool::builder("api") + .sanitize_errors(false) + .tool( + ToolDef::new("query", "Query database"), + |_args: &ToolArgs| { + Err("connection failed: postgres://admin:secret@internal-db:5432/prod".into()) + }, + ) + .build(); + + let resp = tool + .execute(ToolRequest { + commands: "query".to_string(), + timeout_ms: None, + }) + .await; + assert_ne!(resp.exit_code, 0); + assert!( + resp.stderr.contains("postgres://"), + "should show full error when sanitization off: {}", + resp.stderr + ); + } + + /// Default sanitize_errors is true. + #[tokio::test] + async fn test_sanitize_errors_default_true() { + let tool = ScriptedTool::builder("api") + .tool(ToolDef::new("fail", "Always fails"), |_args: &ToolArgs| { + Err("secret_internal_path: /etc/shadow".into()) + }) + .build(); + + let resp = tool + .execute(ToolRequest { + commands: "fail".to_string(), + timeout_ms: None, + }) + .await; + assert!( + !resp.stderr.contains("/etc/shadow"), + "internal path leaked: {}", + resp.stderr + ); + } } diff --git a/crates/bashkit/src/scripted_tool/mod.rs b/crates/bashkit/src/scripted_tool/mod.rs index 007e77a3..71568302 100644 --- a/crates/bashkit/src/scripted_tool/mod.rs +++ b/crates/bashkit/src/scripted_tool/mod.rs @@ -308,6 +308,8 @@ pub struct ScriptedToolBuilder { limits: Option, env_vars: Vec<(String, String)>, compact_prompt: bool, + /// THREAT[TM-INF-017]: Sanitize callback errors to prevent internal state leakage. + sanitize_errors: bool, } impl ScriptedToolBuilder { @@ -320,6 +322,7 @@ impl ScriptedToolBuilder { limits: None, env_vars: Vec::new(), compact_prompt: false, + sanitize_errors: true, } } @@ -363,6 +366,23 @@ impl ScriptedToolBuilder { self } + /// Sanitize callback error messages in tool output (default: `true`). + /// + /// When enabled, callback errors are replaced with a generic + /// `"callback failed (error code 1)"` message in stderr. The full error + /// is logged to the interpreter's debug output for operator visibility. + /// + /// Disable this only for development/debugging. + /// + /// # Security (TM-INF-017) + /// + /// Prevents internal state (connection strings, file paths, stack traces) + /// from leaking through tool output to LLM agents. + pub fn sanitize_errors(mut self, sanitize: bool) -> Self { + self.sanitize_errors = sanitize; + self + } + /// Emit compact `system_prompt()` that omits full schemas and adds help tip. /// /// When enabled, `system_prompt()` lists only tool names + one-liners and @@ -404,6 +424,7 @@ impl ScriptedToolBuilder { limits: self.limits.clone(), env_vars: self.env_vars.clone(), compact_prompt: self.compact_prompt, + sanitize_errors: self.sanitize_errors, last_execution_trace: Arc::new(Mutex::new(None)), } } @@ -475,6 +496,8 @@ pub struct ScriptedTool { pub(crate) limits: Option, pub(crate) env_vars: Vec<(String, String)>, pub(crate) compact_prompt: bool, + /// THREAT[TM-INF-017]: When true, callback errors are sanitized before output. + pub(crate) sanitize_errors: bool, pub(crate) last_execution_trace: Arc>>, } @@ -772,7 +795,12 @@ mod tests { }) .await; assert_ne!(resp.exit_code, 0); - assert!(resp.stderr.contains("service unavailable")); + // Error is sanitized by default (TM-INF-017) — raw message not exposed + assert!( + resp.stderr.contains("callback failed"), + "expected sanitized error, got: {}", + resp.stderr + ); } #[tokio::test]