Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions crates/bashkit-python/tests/test_bashkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,8 @@ def test_scripted_tool_callback_error():
)
r = tool.execute_sync("fail_cmd")
assert r.exit_code != 0
assert "service down" in r.stderr
# Error is sanitized by default (TM-INF-017)
assert "callback failed" in r.stderr


def test_scripted_tool_error_fallback():
Expand Down Expand Up @@ -863,7 +864,8 @@ def test_scripted_tool_callback_runtime_error():
)
r = tool.execute_sync("fail")
assert r.exit_code != 0
assert "runtime fail" in r.stderr
# Error is sanitized by default (TM-INF-017)
assert "callback failed" in r.stderr


def test_scripted_tool_callback_type_error():
Expand Down
103 changes: 102 additions & 1 deletion crates/bashkit/src/scripted_tool/execute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ struct ToolBuiltinAdapter {
callback: ToolCallback,
schema: serde_json::Value,
log: InvocationLog,
/// THREAT[TM-INF-017]: When true, sanitize callback error messages.
sanitize_errors: bool,
}

#[async_trait]
Expand All @@ -163,7 +165,19 @@ impl Builtin for ToolBuiltinAdapter {

match (self.callback)(&tool_args) {
Ok(stdout) => ExecResult::ok(stdout),
Err(msg) => ExecResult::err(msg, 1),
Err(msg) => {
if self.sanitize_errors {
// THREAT[TM-INF-017]: Replace detailed errors with generic message.
// Full error available in debug output for operators.
eprintln!("DEBUG: {}: callback error (sanitized): {}", self.name, msg);
ExecResult::err(
format!("{}: callback failed (error code 1)", self.name),
1,
)
} else {
ExecResult::err(msg, 1)
}
}
}
}
Err(msg) => ExecResult::err(msg, 2),
Expand Down Expand Up @@ -439,6 +453,7 @@ impl ScriptedTool {
callback: Arc::clone(&tool.callback),
schema: tool.def.input_schema.clone(),
log: Arc::clone(&log),
sanitize_errors: self.sanitize_errors,
});
builder = builder.builtin(name, builtin);
}
Expand Down Expand Up @@ -1135,4 +1150,90 @@ mod tests {
assert_eq!(def.tags, vec!["admin", "billing"]);
assert_eq!(def.category.as_deref(), Some("payments"));
}

/// THREAT[TM-INF-017]: Callback errors with internal details are sanitized.
#[tokio::test]
async fn test_sanitize_errors_hides_internal_details() {
let tool = ScriptedTool::builder("api")
.tool(
ToolDef::new("query", "Query database"),
|_args: &ToolArgs| {
Err("connection failed: postgres://admin:secret@internal-db:5432/prod".into())
},
)
.build();

let resp = tool
.execute(ToolRequest {
commands: "query".to_string(),
timeout_ms: None,
})
.await;
assert_ne!(resp.exit_code, 0);
// The sanitized error should NOT contain the connection string
assert!(
!resp.stderr.contains("postgres://"),
"internal details leaked: {}",
resp.stderr
);
assert!(
!resp.stderr.contains("secret"),
"credentials leaked: {}",
resp.stderr
);
assert!(
resp.stderr.contains("callback failed"),
"should contain generic message: {}",
resp.stderr
);
}

/// When sanitize_errors is disabled, full error is visible.
#[tokio::test]
async fn test_sanitize_errors_disabled_shows_details() {
let tool = ScriptedTool::builder("api")
.sanitize_errors(false)
.tool(
ToolDef::new("query", "Query database"),
|_args: &ToolArgs| {
Err("connection failed: postgres://admin:secret@internal-db:5432/prod".into())
},
)
.build();

let resp = tool
.execute(ToolRequest {
commands: "query".to_string(),
timeout_ms: None,
})
.await;
assert_ne!(resp.exit_code, 0);
assert!(
resp.stderr.contains("postgres://"),
"should show full error when sanitization off: {}",
resp.stderr
);
}

/// Default sanitize_errors is true.
#[tokio::test]
async fn test_sanitize_errors_default_true() {
let tool = ScriptedTool::builder("api")
.tool(ToolDef::new("fail", "Always fails"), |_args: &ToolArgs| {
Err("secret_internal_path: /etc/shadow".into())
})
.build();

let resp = tool
.execute(ToolRequest {
commands: "fail".to_string(),
timeout_ms: None,
})
.await;
assert!(
!resp.stderr.contains("/etc/shadow"),
"internal path leaked: {}",
resp.stderr
);
}
}
30 changes: 29 additions & 1 deletion crates/bashkit/src/scripted_tool/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ pub struct ScriptedToolBuilder {
limits: Option<ExecutionLimits>,
env_vars: Vec<(String, String)>,
compact_prompt: bool,
/// THREAT[TM-INF-017]: Sanitize callback errors to prevent internal state leakage.
sanitize_errors: bool,
}

impl ScriptedToolBuilder {
Expand All @@ -320,6 +322,7 @@ impl ScriptedToolBuilder {
limits: None,
env_vars: Vec::new(),
compact_prompt: false,
sanitize_errors: true,
}
}

Expand Down Expand Up @@ -363,6 +366,23 @@ impl ScriptedToolBuilder {
self
}

/// Sanitize callback error messages in tool output (default: `true`).
///
/// When enabled, callback errors are replaced with a generic
/// `"callback failed (error code 1)"` message in stderr. The full error
/// is logged to the interpreter's debug output for operator visibility.
///
/// Disable this only for development/debugging.
///
/// # Security (TM-INF-017)
///
/// Prevents internal state (connection strings, file paths, stack traces)
/// from leaking through tool output to LLM agents.
pub fn sanitize_errors(mut self, sanitize: bool) -> Self {
self.sanitize_errors = sanitize;
self
}

/// Emit compact `system_prompt()` that omits full schemas and adds help tip.
///
/// When enabled, `system_prompt()` lists only tool names + one-liners and
Expand Down Expand Up @@ -404,6 +424,7 @@ impl ScriptedToolBuilder {
limits: self.limits.clone(),
env_vars: self.env_vars.clone(),
compact_prompt: self.compact_prompt,
sanitize_errors: self.sanitize_errors,
last_execution_trace: Arc::new(Mutex::new(None)),
}
}
Expand Down Expand Up @@ -475,6 +496,8 @@ pub struct ScriptedTool {
pub(crate) limits: Option<ExecutionLimits>,
pub(crate) env_vars: Vec<(String, String)>,
pub(crate) compact_prompt: bool,
/// THREAT[TM-INF-017]: When true, callback errors are sanitized before output.
pub(crate) sanitize_errors: bool,
pub(crate) last_execution_trace: Arc<Mutex<Option<ScriptedExecutionTrace>>>,
}

Expand Down Expand Up @@ -772,7 +795,12 @@ mod tests {
})
.await;
assert_ne!(resp.exit_code, 0);
assert!(resp.stderr.contains("service unavailable"));
// Error is sanitized by default (TM-INF-017) — raw message not exposed
assert!(
resp.stderr.contains("callback failed"),
"expected sanitized error, got: {}",
resp.stderr
);
}

#[tokio::test]
Expand Down
Loading