From 5fe9b0cb794caac8ed49510aa53f349eb5fec748 Mon Sep 17 00:00:00 2001 From: Hunter B Date: Mon, 22 Jun 2026 20:29:02 -0700 Subject: [PATCH] WIP harden local state storage boundaries Confine task artifact and automation/run storage IDs before path construction, enable SQLite foreign keys for every state connection, create file-backed secrets with private mode at first write, and keep explicit CODEWHALE_HOME isolated from ambient legacy state. Regression coverage exercises path-like task IDs, automation and run IDs, thread deletion cascades, secure file-store round trips, and explicit home isolation while preserving default legacy backfill/migration behavior. Verification: - cargo fmt --all -- --check - git diff --check - cargo test -p codewhale-tui --bin codewhale-tui --locked write_task_artifact_rejects_path_like_task_ids - cargo test -p codewhale-tui --bin codewhale-tui --locked automation_ - cargo test -p codewhale-state --locked - cargo test -p codewhale-secrets --locked - cargo test -p codewhale-config --locked - cargo test -p codewhale-tui --bin codewhale-tui --locked --- crates/config/src/lib.rs | 23 +++++ crates/config/src/tests.rs | 84 ++++++++++++++++-- crates/secrets/src/lib.rs | 20 +++-- crates/state/src/lib.rs | 68 +++++++++++++- crates/tui/src/automation_manager.rs | 128 +++++++++++++++++++++++---- crates/tui/src/task_manager.rs | 50 +++++++++++ crates/tui/src/tools/tasks.rs | 27 +++++- 7 files changed, 366 insertions(+), 34 deletions(-) diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index b8a982cbc..c022a20ba 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -3249,6 +3249,17 @@ pub fn codewhale_home() -> Result { Ok(home.join(CODEWHALE_APP_DIR)) } +fn explicit_codewhale_home() -> Option { + std::env::var("CODEWHALE_HOME").ok().and_then(|val| { + let trimmed = val.trim(); + if trimmed.is_empty() { + None + } else { + Some(PathBuf::from(trimmed)) + } + }) +} + /// Resolve the legacy DeepSeek home directory (`$HOME/.deepseek`). /// /// Always returns the legacy path regardless of whether it exists. @@ -3305,6 +3316,9 @@ fn ensure_safe_state_subdir(subdir: &str) -> Result<()> { pub fn resolve_state_dir(subdir: &str) -> Result { ensure_safe_state_subdir(subdir)?; let primary = codewhale_home()?.join(subdir); + if explicit_codewhale_home().is_some() { + return Ok(primary); + } if primary.exists() { return Ok(primary); } @@ -3342,6 +3356,9 @@ fn migrate_legacy_state_dir(primary: &Path, subdir: &str) -> Result<()> { if primary.exists() || subdir == "." || subdir.is_empty() { return Ok(()); } + if explicit_codewhale_home().is_some() { + return Ok(()); + } let legacy = match legacy_deepseek_home() { Ok(home) => home.join(subdir), Err(_) => return Ok(()), @@ -3596,6 +3613,9 @@ pub fn default_config_path() -> Result { // Prefer ~/.codewhale/config.toml when it exists (fresh install or // migrated), otherwise fall back to ~/.deepseek/config.toml. let primary = codewhale_home()?.join(CONFIG_FILE_NAME); + if explicit_codewhale_home().is_some() { + return Ok(primary); + } if primary.exists() { return Ok(primary); } @@ -3632,6 +3652,9 @@ pub fn migrate_config_if_needed() -> Result> { if primary.exists() { return Ok(None); } + if explicit_codewhale_home().is_some() { + return Ok(None); + } let legacy = legacy_deepseek_home()?.join(CONFIG_FILE_NAME); if !legacy.exists() { return Ok(None); diff --git a/crates/config/src/tests.rs b/crates/config/src/tests.rs index 5ac83ec74..89315bffc 100644 --- a/crates/config/src/tests.rs +++ b/crates/config/src/tests.rs @@ -1911,7 +1911,7 @@ fn migrate_config_reports_copied_legacy_path() { unsafe { env::set_var("HOME", &home); env::set_var("USERPROFILE", &home); - env::set_var("CODEWHALE_HOME", &primary_dir); + env::remove_var("CODEWHALE_HOME"); } let migration = migrate_config_if_needed() @@ -1962,9 +1962,9 @@ impl Drop for StateEnvRestore { } } -/// Points `HOME`/`USERPROFILE`/`CODEWHALE_HOME` at a fresh temp tree so -/// `codewhale_home()` -> `/.codewhale` and `legacy_deepseek_home()` -/// -> `/.deepseek`. Env is restored on drop. +/// Points `HOME`/`USERPROFILE` at a fresh temp tree so `codewhale_home()` -> +/// `/.codewhale` and `legacy_deepseek_home()` -> `/.deepseek`. +/// Env is restored on drop. struct StateDirEnv { home: PathBuf, _restore: StateEnvRestore, @@ -1985,7 +1985,7 @@ impl StateDirEnv { unsafe { env::set_var("HOME", &home); env::set_var("USERPROFILE", &home); - env::set_var("CODEWHALE_HOME", home.join(CODEWHALE_APP_DIR)); + env::remove_var("CODEWHALE_HOME"); } Self { home, @@ -2087,6 +2087,80 @@ fn resolve_state_dir_still_finds_legacy_for_backfill() { let _ = fs::remove_dir_all(&state_env.home); } +#[test] +fn explicit_codewhale_home_does_not_read_or_migrate_legacy_state() { + let _lock = env_lock(); + let unique = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let state_env = StateDirEnv::install(unique); + let explicit = state_env.home.join("isolated-codewhale-home"); + // Safety: test-only environment mutation is serialized by env_lock(). + unsafe { + env::set_var("CODEWHALE_HOME", &explicit); + } + + fs::create_dir_all(state_env.legacy("catalog")).expect("legacy dir"); + fs::write(state_env.legacy("catalog").join("legacy.json"), b"legacy").expect("legacy file"); + + assert_eq!( + resolve_state_dir("catalog").expect("resolve"), + explicit.join("catalog") + ); + assert_eq!( + ensure_state_dir("catalog").expect("ensure"), + explicit.join("catalog") + ); + assert!( + !explicit.join("catalog").join("legacy.json").exists(), + "explicit CODEWHALE_HOME must not copy ambient legacy state" + ); + assert!( + state_env.legacy("catalog").join("legacy.json").exists(), + "ambient legacy state should remain untouched" + ); + let _ = fs::remove_dir_all(&state_env.home); +} + +#[test] +fn explicit_codewhale_home_does_not_read_or_migrate_legacy_config() { + let _lock = env_lock(); + let unique = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let state_env = StateDirEnv::install(unique); + let explicit = state_env.home.join("isolated-codewhale-home"); + let legacy_config = state_env.home.join(LEGACY_APP_DIR).join(CONFIG_FILE_NAME); + fs::create_dir_all(legacy_config.parent().expect("legacy parent")).expect("legacy dir"); + fs::write(&legacy_config, b"provider = \"deepseek\"\n").expect("legacy config"); + // Safety: test-only environment mutation is serialized by env_lock(). + unsafe { + env::set_var("CODEWHALE_HOME", &explicit); + } + + assert_eq!( + default_config_path().expect("default config"), + explicit.join(CONFIG_FILE_NAME) + ); + assert!( + migrate_config_if_needed() + .expect("migration check") + .is_none(), + "explicit CODEWHALE_HOME should not import ambient legacy config" + ); + assert!( + !explicit.join(CONFIG_FILE_NAME).exists(), + "explicit CODEWHALE_HOME must stay isolated until caller writes config" + ); + assert!( + legacy_config.exists(), + "legacy config should remain untouched" + ); + let _ = fs::remove_dir_all(&state_env.home); +} + #[test] fn state_resolvers_reject_path_traversal_subdirs() { // Defense against path injection (#3240 hardening): the public state diff --git a/crates/secrets/src/lib.rs b/crates/secrets/src/lib.rs index 15506df3a..c7c66f71c 100644 --- a/crates/secrets/src/lib.rs +++ b/crates/secrets/src/lib.rs @@ -13,6 +13,8 @@ use std::collections::HashMap; use std::fs; +#[cfg(unix)] +use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; @@ -480,22 +482,26 @@ impl FileKeyringStore { } } let body = serde_json::to_string_pretty(blob)?; - fs::write(&self.path, body)?; #[cfg(unix)] { + use std::os::unix::fs::OpenOptionsExt; use std::os::unix::fs::PermissionsExt; - // Best-effort 0o600 — matches the parent-dir chmod above which - // is also `let _ = ...`. Filesystems that don't support Unix - // chmod (Docker bind-mounts of NTFS, network shares — #897) - // would otherwise fail the whole save here even though the - // blob already wrote successfully. The host's native ACLs - // are doing access control in those environments. + let mut file = fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .mode(0o600) + .open(&self.path)?; + file.write_all(body.as_bytes())?; + file.sync_all()?; if let Ok(meta) = fs::metadata(&self.path) { let mut perms = meta.permissions(); perms.set_mode(0o600); let _ = fs::set_permissions(&self.path, perms); } } + #[cfg(not(unix))] + fs::write(&self.path, body)?; Ok(()) } } diff --git a/crates/state/src/lib.rs b/crates/state/src/lib.rs index 34b7bfe45..d160c81dc 100644 --- a/crates/state/src/lib.rs +++ b/crates/state/src/lib.rs @@ -305,8 +305,11 @@ impl StateStore { } fn conn(&self) -> Result { - Connection::open(&self.db_path) - .with_context(|| format!("failed to open state db {}", self.db_path.display())) + let conn = Connection::open(&self.db_path) + .with_context(|| format!("failed to open state db {}", self.db_path.display()))?; + conn.pragma_update(None, "foreign_keys", "ON") + .context("failed to enable SQLite foreign keys")?; + Ok(conn) } fn init_schema(&self) -> Result<()> { @@ -1867,6 +1870,67 @@ mod tests { assert!(err.to_string().contains("thread missing-thread not found")); } + #[test] + fn delete_thread_cascades_child_state() { + let store = temp_state_store("thread-delete-cascade"); + store + .upsert_thread(&test_thread("thread-1")) + .expect("upsert thread"); + store + .persist_dynamic_tools( + "thread-1", + &[DynamicToolRecord { + position: 0, + name: "custom_tool".to_string(), + description: Some("test".to_string()), + input_schema: serde_json::json!({"type": "object"}), + }], + ) + .expect("dynamic tools"); + store + .append_message("thread-1", "user", "hello", None) + .expect("append message"); + store + .save_checkpoint("thread-1", "checkpoint-1", &serde_json::json!({"ok": true})) + .expect("checkpoint"); + store + .upsert_thread_goal(&test_goal("thread-1", "Clean up children")) + .expect("goal"); + + store.delete_thread("thread-1").expect("delete thread"); + + assert!( + store + .get_thread("thread-1") + .expect("read deleted thread") + .is_none() + ); + assert!( + store + .list_messages("thread-1", None) + .expect("messages after delete") + .is_empty() + ); + assert!( + store + .list_checkpoints("thread-1", None) + .expect("checkpoints after delete") + .is_empty() + ); + assert!( + store + .get_dynamic_tools("thread-1") + .expect("dynamic tools after delete") + .is_empty() + ); + assert!( + store + .get_thread_goal("thread-1") + .expect("goal after delete") + .is_none() + ); + } + #[test] fn record_thread_goal_usage_accumulates_tokens_and_time() { let store = temp_state_store("thread-goal-usage"); diff --git a/crates/tui/src/automation_manager.rs b/crates/tui/src/automation_manager.rs index 6a394e76b..7c0393993 100644 --- a/crates/tui/src/automation_manager.rs +++ b/crates/tui/src/automation_manager.rs @@ -319,17 +319,21 @@ impl AutomationManager { Self::open(default_automations_dir()) } - fn automation_path(&self, id: &str) -> PathBuf { - self.automations_dir.join(format!("{id}.json")) + fn automation_path(&self, id: &str) -> Result { + validate_storage_id("automation id", id)?; + Ok(self.automations_dir.join(format!("{id}.json"))) } - fn runs_dir_for(&self, automation_id: &str) -> PathBuf { - self.runs_dir.join(automation_id) + fn runs_dir_for(&self, automation_id: &str) -> Result { + validate_storage_id("automation id", automation_id)?; + Ok(self.runs_dir.join(automation_id)) } - fn run_path(&self, automation_id: &str, run_id: &str) -> PathBuf { - self.runs_dir_for(automation_id) - .join(format!("{run_id}.json")) + fn run_path(&self, automation_id: &str, run_id: &str) -> Result { + validate_storage_id("run id", run_id)?; + Ok(self + .runs_dir_for(automation_id)? + .join(format!("{run_id}.json"))) } pub fn create_automation(&self, req: CreateAutomationRequest) -> Result { @@ -362,7 +366,7 @@ impl AutomationManager { } pub fn get_automation(&self, id: &str) -> Result { - let path = self.automation_path(id); + let path = self.automation_path(id)?; let raw = fs::read_to_string(&path) .with_context(|| format!("Failed to read automation {}", path.display()))?; let record: AutomationRecord = serde_json::from_str(&raw) @@ -378,7 +382,7 @@ impl AutomationManager { } pub fn save_automation(&self, record: &AutomationRecord) -> Result<()> { - write_json_atomic(&self.automation_path(&record.id), record) + write_json_atomic(&self.automation_path(&record.id)?, record) } pub fn list_automations(&self) -> Result> { @@ -476,11 +480,11 @@ impl AutomationManager { pub fn delete_automation(&self, id: &str) -> Result { let existing = self.get_automation(id)?; - let path = self.automation_path(id); + let path = self.automation_path(id)?; fs::remove_file(&path) .with_context(|| format!("Failed to delete automation {}", path.display()))?; - let runs_dir = self.runs_dir_for(id); + let runs_dir = self.runs_dir_for(id)?; if runs_dir.exists() { fs::remove_dir_all(&runs_dir).with_context(|| { format!("Failed to delete automation runs {}", runs_dir.display()) @@ -495,7 +499,7 @@ impl AutomationManager { automation_id: &str, limit: Option, ) -> Result> { - let dir = self.runs_dir_for(automation_id); + let dir = self.runs_dir_for(automation_id)?; if !dir.exists() { return Ok(Vec::new()); } @@ -531,9 +535,9 @@ impl AutomationManager { } fn save_run(&self, run: &AutomationRunRecord) -> Result<()> { - let dir = self.runs_dir_for(&run.automation_id); + let dir = self.runs_dir_for(&run.automation_id)?; fs::create_dir_all(&dir).with_context(|| format!("Failed to create {}", dir.display()))?; - write_json_atomic(&self.run_path(&run.automation_id, &run.id), run) + write_json_atomic(&self.run_path(&run.automation_id, &run.id)?, run) } async fn enqueue_run_task( @@ -769,6 +773,27 @@ fn validate_name_and_prompt(name: &str, prompt: &str) -> Result<()> { Ok(()) } +fn validate_storage_id(label: &str, value: &str) -> Result<()> { + let value = value.trim(); + if value.is_empty() { + bail!("{label} must not be empty"); + } + if value.contains('/') || value.contains('\\') { + bail!("{label} must not contain path separators: {value}"); + } + let mut components = Path::new(value).components(); + let Some(component) = components.next() else { + bail!("{label} must not be empty"); + }; + if components.next().is_some() { + bail!("{label} must be a single path component: {value}"); + } + if !matches!(component, std::path::Component::Normal(_)) { + bail!("{label} must be a normal path component: {value}"); + } + Ok(()) +} + fn write_json_atomic(path: &Path, value: &T) -> Result<()> { if let Some(parent) = path.parent() { fs::create_dir_all(parent) @@ -941,14 +966,85 @@ mod tests { error: None, }; manager.save_run(&run).expect("save run"); - assert!(manager.runs_dir_for(&created.id).exists()); + assert!( + manager + .runs_dir_for(&created.id) + .expect("runs dir") + .exists() + ); manager .delete_automation(&created.id) .expect("delete automation"); assert!(manager.get_automation(&created.id).is_err()); - assert!(!manager.runs_dir_for(&created.id).exists()); + assert!( + !manager + .runs_dir_for(&created.id) + .expect("runs dir") + .exists() + ); + } + + #[test] + fn automation_ids_must_not_escape_storage_roots() { + let tempdir = tempfile::tempdir().expect("tempdir"); + let manager = AutomationManager::open(tempdir.path().to_path_buf()).expect("manager"); + let outside = tempdir.path().join("escape.json"); + + for id in ["../escape", "nested/id", "nested\\id", "."] { + assert!( + manager.get_automation(id).is_err(), + "get should reject {id:?}" + ); + assert!( + manager.list_runs(id, None).is_err(), + "list runs should reject {id:?}" + ); + } + assert!( + manager + .runs_dir_for("../escape") + .expect_err("path-like automation id should fail") + .to_string() + .contains("automation id must") + ); + assert!( + !outside.exists(), + "invalid automation IDs must not create paths outside storage" + ); + } + + #[test] + fn automation_run_ids_must_not_escape_storage_roots() { + let tempdir = tempfile::tempdir().expect("tempdir"); + let manager = AutomationManager::open(tempdir.path().to_path_buf()).expect("manager"); + let run = AutomationRunRecord { + schema_version: CURRENT_RUN_SCHEMA_VERSION, + id: "../escape".to_string(), + automation_id: Uuid::new_v4().to_string(), + scheduled_for: Utc::now(), + status: AutomationRunStatus::Queued, + created_at: Utc::now(), + started_at: None, + ended_at: None, + task_id: None, + thread_id: None, + turn_id: None, + error: None, + }; + + let err = manager + .save_run(&run) + .expect_err("path-like run id should be rejected"); + assert!( + err.to_string().contains("run id must"), + "unexpected error: {err:#}" + ); + assert!( + !tempdir.path().join("escape.json").exists(), + "invalid run ID must not create a file outside the run root" + ); } #[test] diff --git a/crates/tui/src/task_manager.rs b/crates/tui/src/task_manager.rs index a9ee32bc7..bed9d0438 100644 --- a/crates/tui/src/task_manager.rs +++ b/crates/tui/src/task_manager.rs @@ -1361,6 +1361,7 @@ impl TaskManager { } fn write_artifact(&self, task_id: &str, label: &str, content: &str) -> Result { + validate_task_storage_id(task_id)?; let artifact_dir = self.artifacts_dir.join(task_id); fs::create_dir_all(&artifact_dir) .with_context(|| format!("Failed to create artifact dir {}", artifact_dir.display()))?; @@ -1640,6 +1641,31 @@ fn sanitize_filename(input: &str) -> String { } } +pub(crate) fn validate_task_storage_id(task_id: &str) -> Result<()> { + validate_storage_component("task id", task_id) +} + +fn validate_storage_component(label: &str, value: &str) -> Result<()> { + let value = value.trim(); + if value.is_empty() { + bail!("{label} must not be empty"); + } + if value.contains('/') || value.contains('\\') { + bail!("{label} must not contain path separators: {value}"); + } + let mut components = Path::new(value).components(); + let Some(component) = components.next() else { + bail!("{label} must not be empty"); + }; + if components.next().is_some() { + bail!("{label} must be a single path component: {value}"); + } + if !matches!(component, std::path::Component::Normal(_)) { + bail!("{label} must be a normal path component: {value}"); + } + Ok(()) +} + fn duration_ms(start: DateTime, end: DateTime) -> u64 { let millis = (end - start).num_milliseconds(); if millis.is_negative() { @@ -1962,6 +1988,30 @@ mod tests { Ok(()) } + #[tokio::test] + async fn write_task_artifact_rejects_path_like_task_ids() -> Result<()> { + let root = std::env::temp_dir().join(format!("deepseek-task-test-{}", Uuid::new_v4())); + let outside = root.join("escape"); + let manager = + TaskManager::start_with_executor(test_config(root.clone()), Arc::new(MockExecutor)) + .await?; + + let err = manager + .write_task_artifact("../escape", "attempt_patch", "patch") + .expect_err("path-like task id should be rejected"); + + assert!( + err.to_string().contains("task id must not contain"), + "unexpected error: {err:#}" + ); + assert!( + !outside.exists(), + "artifact write must not create directories outside the artifact root" + ); + let _ = fs::remove_dir_all(root); + Ok(()) + } + #[tokio::test] async fn cancel_running_task_marks_canceled() -> Result<()> { let root = std::env::temp_dir().join(format!("deepseek-task-test-{}", Uuid::new_v4())); diff --git a/crates/tui/src/tools/tasks.rs b/crates/tui/src/tools/tasks.rs index 54b86b5a1..eb49fb753 100644 --- a/crates/tui/src/tools/tasks.rs +++ b/crates/tui/src/tools/tasks.rs @@ -14,6 +14,7 @@ use crate::command_safety::{SafetyLevel, analyze_command}; use crate::dependencies::ExternalTool; use crate::task_manager::{ NewTaskRequest, TaskArtifactRef, TaskAttemptRecord, TaskGateRecord, TaskRecord, + validate_task_storage_id, }; use crate::tools::shell::{ExecShellTool, ShellWaitTool}; use crate::tools::spec::{ @@ -593,7 +594,17 @@ impl ToolSpec for PrAttemptRecordTool { } async fn execute(&self, input: Value, context: &ToolContext) -> Result { - let task_id = task_id_from_input_or_context(&input, context)?; + let mut task_id = task_id_from_input_or_context(&input, context)?; + if let Some(manager) = context.runtime.task_manager.as_ref() { + task_id = manager + .get_task(&task_id) + .await + .map_err(|e| ToolError::execution_failed(e.to_string()))? + .id; + } else { + validate_task_storage_id(&task_id) + .map_err(|e| ToolError::execution_failed(format!("invalid task id: {e}")))?; + } let base_sha = git_output(&context.workspace, &["rev-parse", "HEAD"]).ok(); let head_sha = base_sha.clone(); let branch = git_output(&context.workspace, &["rev-parse", "--abbrev-ref", "HEAD"]).ok(); @@ -608,7 +619,7 @@ impl ToolSpec for PrAttemptRecordTool { .filter(|line| !line.trim().is_empty()) .map(ToString::to_string) .collect::>(); - let patch_path = write_task_artifact_for(context, &task_id, "attempt_patch", &diff)?; + let patch_path = write_task_artifact_for(context, &task_id, "attempt_patch", &diff).await?; let attempt = TaskAttemptRecord { id: format!("attempt_{}", &Uuid::new_v4().to_string()[..8]), attempt_group_id: optional_str(&input, "attempt_group_id") @@ -826,6 +837,8 @@ fn write_runtime_artifact( let Some(task_id) = context.runtime.active_task_id.as_deref() else { return Ok(None); }; + validate_task_storage_id(task_id) + .map_err(|e| ToolError::execution_failed(format!("invalid active task id: {e}")))?; let manager = context.runtime.task_manager.as_ref(); if let Some(manager) = manager { return manager @@ -855,21 +868,27 @@ fn write_runtime_artifact( )) } -fn write_task_artifact_for( +async fn write_task_artifact_for( context: &ToolContext, task_id: &str, label: &str, content: &str, ) -> Result, ToolError> { if let Some(manager) = context.runtime.task_manager.as_ref() { + let resolved = manager + .get_task(task_id) + .await + .map_err(|e| ToolError::execution_failed(e.to_string()))?; return manager - .write_task_artifact(task_id, label, content) + .write_task_artifact(&resolved.id, label, content) .map(Some) .map_err(|e| ToolError::execution_failed(e.to_string())); } if context.runtime.active_task_id.as_deref() != Some(task_id) { return Ok(None); } + validate_task_storage_id(task_id) + .map_err(|e| ToolError::execution_failed(format!("invalid task id: {e}")))?; write_runtime_artifact(context, label, content) }