diff --git a/crates/ocm-daemon/src/bootstrap.rs b/crates/ocm-daemon/src/bootstrap.rs index 88727b5..43d0f27 100644 --- a/crates/ocm-daemon/src/bootstrap.rs +++ b/crates/ocm-daemon/src/bootstrap.rs @@ -8,10 +8,12 @@ //! that reports status; chat requests fail with clear errors. use crate::settings::{Backend, Settings}; +use crate::supervisor::{self, Supervisor, SupervisorPolicy, SupervisorStatus}; use ocm_inference::ollama::DEFAULT_MODEL as DEFAULT_OLLAMA_MODEL; use ocm_inference::selector::{self, BackendKind, DEFAULT_OLLAMA_BASE_URL}; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; -use std::sync::Arc; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Mutex}; use std::time::Duration; use tracing::{info, warn}; @@ -72,6 +74,133 @@ fn resolve_backend_kind(setting: Backend) -> BackendKind { } } +/// Build the on-disk path the daemon expects for a model_id, matching the +/// convention used by `ocm_models::downloader::download_model`. +fn model_path_for(models_dir: &Path, model_id: &str) -> PathBuf { + models_dir.join(format!("{model_id}.gguf")) +} + +/// Decision: should bootstrap spawn + supervise `llama-server` on this run? +/// +/// True iff ALL of: +/// - `Settings.backend = "llamacpp"` (explicit opt-in — Auto preserves +/// pre-v0.1.2 behavior, Ollama supervises itself, Vllm has its own path) +/// - `Settings.llama_server_binary` is `Some` (directive: None preserves +/// "do not spawn anything") +/// - `Settings.model_id` is `Some` AND the GGUF exists under `models_dir` +/// (conservative: don't burn the restart budget spawning a server that +/// has nothing to load — chat will fail loudly via the existing +/// "backend not reachable" message instead) +pub fn should_spawn_llama_supervisor(settings: &Settings, models_dir: &Path) -> bool { + if !matches!(settings.backend, Backend::LlamaCpp) { + return false; + } + if settings.llama_server_binary.is_none() { + return false; + } + match settings.model_id.as_deref() { + Some(id) => model_path_for(models_dir, id).exists(), + None => false, + } +} + +/// Parse the port out of an `http://host:port/...` URL. Bare-bones to avoid +/// pulling in `url` for one field. Returns `None` if no port segment is found. +fn parse_port(url: &str) -> Option { + // The last `:`-delimited segment, up to the first `/`. + let after_colon = url.rsplit(':').next()?; + let port_str = after_colon.split('/').next()?; + port_str.parse().ok() +} + +/// Build (but don't start) the llama-server Supervisor + its restart policy +/// for the current settings. Returns `None` if the spawn-gate +/// (`should_spawn_llama_supervisor`) refuses. +/// +/// The caller is responsible for spawning `supervisor::supervise(...)` as a +/// background task and holding the resulting handle alive for the daemon's +/// lifetime (`main.rs` does this via Tauri-managed state + tokio task). +pub fn build_llama_supervisor( + settings: &Settings, + models_dir: &Path, +) -> Option<(Arc, SupervisorPolicy)> { + if !should_spawn_llama_supervisor(settings, models_dir) { + return None; + } + let binary = PathBuf::from(settings.llama_server_binary.as_ref()?); + let model_id = settings.model_id.as_ref()?; + let model_path = model_path_for(models_dir, model_id); + + let inference_url = settings + .inference_base_url + .clone() + .unwrap_or_else(|| DEFAULT_INFERENCE_BASE_URL.to_string()); + let port = parse_port(&inference_url).unwrap_or(8080); + let health_url = format!("{inference_url}/v1/models"); + + let sup = Arc::new(supervisor::spawn_llama_server( + &binary, + &model_path, + port, + supervisor::DEFAULT_LLAMA_CTX_LEN, + )); + let policy = SupervisorPolicy { + health_url, + ..SupervisorPolicy::default() + }; + Some((sup, policy)) +} + +/// State the Tauri layer manages for the supervised subprocess. `status` is +/// shared with the supervise loop (which mutates it). +/// +/// `shutdown` is the sender half of the cancellation channel. The supervise +/// loop's `watch::Receiver::changed()` fires either when we explicitly send +/// `true` (via `signal_shutdown`) OR when the Sender is dropped — so dropping +/// `LlamaSupervisorState` during Tauri's teardown is enough to wake the loop, +/// which then calls `Supervisor::stop()` and exits with `Stopped` status. +/// An explicit `signal_shutdown` is here for a future `RunEvent::ExitRequested` +/// hook that wants to wait for the loop to drain. +pub struct LlamaSupervisorState { + pub status: Arc>, + // Held in Mutex> so a future ExitRequested hook can `.take()` it + // to send the signal before tauri tears down the runtime. Field is + // observed via Drop semantics today, not direct reads — keep the + // dead_code allow until the hook lands. + #[allow(dead_code)] + shutdown: Mutex>>, +} + +impl LlamaSupervisorState { + pub fn not_spawning() -> Self { + Self { + status: Arc::new(Mutex::new(SupervisorStatus::NotSpawning)), + shutdown: Mutex::new(None), + } + } + + pub fn live( + status: Arc>, + shutdown_tx: tokio::sync::watch::Sender, + ) -> Self { + Self { + status, + shutdown: Mutex::new(Some(shutdown_tx)), + } + } + + /// Explicitly signal the supervise loop to stop. Currently unused (Drop + /// is sufficient for v0.1.2), kept for the future graceful-exit hook. + #[allow(dead_code)] + pub fn signal_shutdown(&self) { + if let Ok(mut g) = self.shutdown.lock() { + if let Some(tx) = g.take() { + let _ = tx.send(true); + } + } + } +} + /// Construct the full AppState given settings. pub fn build_app_state(settings: &Settings) -> ocm_api::AppState { let inference_url = settings @@ -168,9 +297,30 @@ mod tests { backend: Backend::Auto, ollama_base_url: None, ollama_model: None, + llama_server_binary: None, } } + #[test] + fn parse_port_extracts_from_loopback_url() { + assert_eq!(parse_port("http://127.0.0.1:8080"), Some(8080)); + assert_eq!(parse_port("http://127.0.0.1:8080/v1"), Some(8080)); + assert_eq!(parse_port("http://127.0.0.1:8000/v1/models"), Some(8000)); + } + + #[test] + fn parse_port_returns_none_when_no_port() { + assert_eq!(parse_port("http://example.com/v1"), None); + } + + #[test] + fn build_llama_supervisor_returns_none_when_spawn_gate_refuses() { + let dir = tempfile::tempdir().unwrap(); + // Spawn-gate refuses (backend = Auto, no model file). + let s = Settings::default(); + assert!(build_llama_supervisor(&s, dir.path()).is_none()); + } + #[test] fn defaults_apply_when_settings_blank() { let s = Settings::default(); @@ -244,6 +394,103 @@ mod tests { assert_eq!(state.backend.name(), "vLLM"); } + // --- Process supervision decision (Task 2 — Track 1 item 2) --- + + fn write_dummy_model(dir: &std::path::Path, model_id: &str) -> std::path::PathBuf { + // Convention matches ocm_models::downloader::download_model: + // dest = dest_dir.join(format!("{}.gguf", entry.id)) + let p = dir.join(format!("{model_id}.gguf")); + std::fs::write(&p, b"").unwrap(); + p + } + + #[test] + fn should_spawn_llama_supervisor_yes_when_llamacpp_plus_binary_plus_model_present() { + let dir = tempfile::tempdir().unwrap(); + write_dummy_model(dir.path(), "qwen2.5-1.5b-q4"); + let s = Settings { + backend: Backend::LlamaCpp, + model_id: Some("qwen2.5-1.5b-q4".into()), + llama_server_binary: Some("/usr/local/bin/llama-server".into()), + ..Settings::default() + }; + assert!(should_spawn_llama_supervisor(&s, dir.path())); + } + + #[test] + fn should_spawn_llama_supervisor_no_when_backend_is_ollama() { + // Headline rule per AGENT_OPERATIONS scope: Ollama supervises itself — + // OCM must NEVER spawn anything when backend=ollama, regardless of the + // other fields. + let dir = tempfile::tempdir().unwrap(); + write_dummy_model(dir.path(), "qwen2.5-1.5b-q4"); + let s = Settings { + backend: Backend::Ollama, + model_id: Some("qwen2.5-1.5b-q4".into()), + llama_server_binary: Some("/usr/local/bin/llama-server".into()), + ..Settings::default() + }; + assert!(!should_spawn_llama_supervisor(&s, dir.path())); + } + + #[test] + fn should_spawn_llama_supervisor_no_when_backend_is_auto() { + // Auto leaves orchestration to the user's existing setup. Don't surprise + // pre-v0.1.2 users who never opted into supervision. + let dir = tempfile::tempdir().unwrap(); + write_dummy_model(dir.path(), "qwen2.5-1.5b-q4"); + let s = Settings { + backend: Backend::Auto, + model_id: Some("qwen2.5-1.5b-q4".into()), + llama_server_binary: Some("/usr/local/bin/llama-server".into()), + ..Settings::default() + }; + assert!(!should_spawn_llama_supervisor(&s, dir.path())); + } + + #[test] + fn should_spawn_llama_supervisor_no_when_binary_unset() { + // Directive: "llama-server binary path from settings, new field, + // None = do-not-spawn preserves current behavior". + let dir = tempfile::tempdir().unwrap(); + write_dummy_model(dir.path(), "qwen2.5-1.5b-q4"); + let s = Settings { + backend: Backend::LlamaCpp, + model_id: Some("qwen2.5-1.5b-q4".into()), + llama_server_binary: None, + ..Settings::default() + }; + assert!(!should_spawn_llama_supervisor(&s, dir.path())); + } + + #[test] + fn should_spawn_llama_supervisor_no_when_model_file_missing() { + // Conservative: refuse to spawn if there's nothing to load. The user + // hasn't downloaded a model yet — let chat fail with the existing + // "backend not reachable" message rather than burn the restart budget. + let dir = tempfile::tempdir().unwrap(); + // intentionally no write_dummy_model + let s = Settings { + backend: Backend::LlamaCpp, + model_id: Some("qwen2.5-1.5b-q4".into()), + llama_server_binary: Some("/usr/local/bin/llama-server".into()), + ..Settings::default() + }; + assert!(!should_spawn_llama_supervisor(&s, dir.path())); + } + + #[test] + fn should_spawn_llama_supervisor_no_when_model_id_unset() { + let dir = tempfile::tempdir().unwrap(); + let s = Settings { + backend: Backend::LlamaCpp, + model_id: None, + llama_server_binary: Some("/usr/local/bin/llama-server".into()), + ..Settings::default() + }; + assert!(!should_spawn_llama_supervisor(&s, dir.path())); + } + #[tokio::test] async fn probe_url_returns_false_for_unreachable() { // Using port 1 (privileged, almost guaranteed not bound) on localhost diff --git a/crates/ocm-daemon/src/commands.rs b/crates/ocm-daemon/src/commands.rs index a438a4e..3c19d84 100644 --- a/crates/ocm-daemon/src/commands.rs +++ b/crates/ocm-daemon/src/commands.rs @@ -12,8 +12,10 @@ //! `retrieval_top_k`, `api_port`, `mcp_enabled`. The frontend surfaces //! "restart required to apply" so the user isn't surprised. +use crate::bootstrap::LlamaSupervisorState; use crate::paths::AppPaths; use crate::settings::Settings; +use crate::supervisor::SupervisorStatus; use ocm_models::{downloader::download_model, Registry}; use std::path::PathBuf; use std::sync::Mutex; @@ -59,6 +61,23 @@ pub fn list_registry_models() -> Result { Registry::load_bundled().map_err(|e| format!("load bundled registry: {e}")) } +/// Live status of the llama-server supervisor (if any). The frontend polls +/// this to surface "running" / "restarting" / "failed" badges. When the +/// daemon was started without supervision (backend != "llamacpp", or +/// `llama_server_binary` unset, or model missing), the status is +/// `NotSpawning` and the frontend can fall back to the existing +/// "inference URL reachable?" probe. +#[tauri::command] +pub fn get_supervisor_status( + state: State<'_, LlamaSupervisorState>, +) -> Result { + state + .status + .lock() + .map(|s| s.clone()) + .map_err(|e| format!("supervisor status state poisoned: {e}")) +} + /// Download a model by registry id into the app data dir under "models/". /// Returns the absolute path on success. Refuses entries with empty sha256 /// (registry guards unverified weights). diff --git a/crates/ocm-daemon/src/main.rs b/crates/ocm-daemon/src/main.rs index 2a87c8e..8b4b529 100644 --- a/crates/ocm-daemon/src/main.rs +++ b/crates/ocm-daemon/src/main.rs @@ -4,15 +4,10 @@ mod bootstrap; mod commands; mod paths; mod settings; -// Supervisor's process-spawning helpers (spawn_llama_server, spawn_vllm_server) -// will be activated once OCM ships its own bundled binaries; for now the -// daemon expects external llama-server / vLLM processes to be running and -// connects to them via the URLs in settings (bootstrap.rs handles the connect). -#[allow(dead_code)] mod supervisor; mod tray; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; use tauri::tray::TrayIconBuilder; use tauri::Manager; use tracing::info; @@ -32,6 +27,7 @@ fn main() -> anyhow::Result<()> { commands::save_settings, commands::list_registry_models, commands::download_model_cmd, + commands::get_supervisor_status, ]) .setup(|app| { let app_paths = paths::AppPaths::resolve()?; @@ -48,6 +44,34 @@ fn main() -> anyhow::Result<()> { port = loaded_settings.api_port, "settings loaded" ); + + // Build the llama-server supervisor before bootstrap, so the + // existing dependency probe sees the spawned process as up. + // None means "supervision disabled this run" — preserves + // pre-v0.1.2 behavior when the user doesn't opt in. + let supervisor_state = + match bootstrap::build_llama_supervisor(&loaded_settings, &app_paths.models_dir) { + Some((supervised, policy)) => { + let status = Arc::new(Mutex::new(supervisor::SupervisorStatus::default())); + let (shutdown_tx, shutdown_rx) = tokio::sync::watch::channel(false); + let status_for_loop = status.clone(); + info!( + name = supervised.name(), + health_url = %policy.health_url, + "starting llama-server supervisor" + ); + tauri::async_runtime::spawn(async move { + supervisor::supervise(supervised, policy, status_for_loop, shutdown_rx) + .await; + }); + bootstrap::LlamaSupervisorState::live(status, shutdown_tx) + } + None => { + info!("llama-server supervisor not configured for this run"); + bootstrap::LlamaSupervisorState::not_spawning() + } + }; + // Spawn the bootstrap task — probes external dependencies and // starts the OCM HTTP API server in the background. Tauri's // setup() is sync so we hand off to a tokio task; the Tauri main @@ -67,6 +91,7 @@ fn main() -> anyhow::Result<()> { app.manage(app_paths); app.manage(settings_state); + app.manage(supervisor_state); let menu = tray::build_tray_menu(app.handle())?; let _tray = TrayIconBuilder::new() diff --git a/crates/ocm-daemon/src/settings.rs b/crates/ocm-daemon/src/settings.rs index 374fcdd..952c9f6 100644 --- a/crates/ocm-daemon/src/settings.rs +++ b/crates/ocm-daemon/src/settings.rs @@ -34,6 +34,17 @@ pub struct Settings { /// falls back to `ocm_inference::ollama::DEFAULT_MODEL`. #[serde(default)] pub ollama_model: Option, + /// Absolute path to a `llama-server` binary. When set, the daemon will + /// spawn + supervise it on boot (provided `backend = "llamacpp"` AND + /// a downloaded model file exists under the app's models dir). + /// + /// `None` (the default) preserves the pre-v0.1.2 behavior: the user is + /// expected to run `llama-server` themselves. See + /// `crates/ocm-daemon/src/supervisor.rs` for the lifecycle policy. + /// + /// **No-op when `backend = "ollama"`** — Ollama supervises itself. + #[serde(default)] + pub llama_server_binary: Option, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Copy)] @@ -69,6 +80,7 @@ impl Default for Settings { backend: Backend::Auto, ollama_base_url: None, ollama_model: None, + llama_server_binary: None, } } } @@ -175,6 +187,46 @@ theme = "system" assert_eq!(loaded.ollama_model, None); } + #[test] + fn default_llama_server_binary_is_none() { + // None preserves the pre-v0.1.2 "do not spawn anything" behavior; the + // user opts in by pointing settings at their llama-server binary. + assert_eq!(Settings::default().llama_server_binary, None); + } + + #[test] + fn llama_server_binary_round_trips_via_toml() { + let dir = tempdir().unwrap(); + let path = dir.path().join("settings.toml"); + let original = Settings { + llama_server_binary: Some("/usr/local/bin/llama-server".into()), + ..Settings::default() + }; + original.save(&path).unwrap(); + let loaded = Settings::load_or_default(&path).unwrap(); + assert_eq!( + loaded.llama_server_binary.as_deref(), + Some("/usr/local/bin/llama-server") + ); + } + + #[test] + fn legacy_settings_toml_without_llama_server_binary_still_parses() { + // Forward-compat for v0.1.0/v0.1.1 settings.toml that pre-dates this field. + let dir = tempdir().unwrap(); + let path = dir.path().join("settings.toml"); + let legacy = r#" +api_port = 7300 +mcp_enabled = true +theme = "system" +backend = "llamacpp" +"#; + std::fs::write(&path, legacy).unwrap(); + let loaded = Settings::load_or_default(&path).unwrap(); + assert_eq!(loaded.llama_server_binary, None); + assert_eq!(loaded.backend, Backend::LlamaCpp); + } + #[test] fn all_backend_variants_round_trip() { for kind in [ diff --git a/crates/ocm-daemon/src/supervisor.rs b/crates/ocm-daemon/src/supervisor.rs index 8ba7470..ae5aa60 100644 --- a/crates/ocm-daemon/src/supervisor.rs +++ b/crates/ocm-daemon/src/supervisor.rs @@ -3,13 +3,106 @@ //! //! Use `Supervisor::new()` with a `Command` factory closure so the supervisor can //! restart the process with a fresh `Command` each time without consuming state. +//! +//! # Backend coverage (v0.1.2 — Track 1 item 2) +//! +//! This module currently spawns `llama-server` only. The `spawn_vllm_server` +//! helper is kept here for the future NVIDIA path but is not yet wired into +//! bootstrap. +//! +//! **Ollama is deliberately NOT supervised here.** When `Settings.backend = +//! "ollama"`, OCM expects an Ollama daemon to be already running (it has its +//! own service installer + tray + lifecycle), and bridges to it via the native +//! NDJSON adapter (`crates/ocm-inference/src/ollama.rs`). Spawning would either +//! double-spawn a daemon already running, or fight ollama-svc's own restart +//! logic. The spawn-gate in `bootstrap::should_spawn_llama_supervisor` enforces +//! this. use anyhow::{Context, Result}; +use serde::{Deserialize, Serialize}; use std::path::Path; use std::process::{Child, Command, Stdio}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; -use tracing::{info, warn}; +use tracing::{error, info, warn}; + +/// Restart-loop policy constants. Documented here so `SupervisorPolicy::default()` +/// can assert them, and so future tuning is one-place rather than scattered. +pub const DEFAULT_MAX_RESTARTS: u8 = 3; +pub const DEFAULT_INITIAL_BACKOFF: Duration = Duration::from_millis(500); +pub const DEFAULT_MAX_BACKOFF: Duration = Duration::from_secs(10); +/// If the supervised process runs healthy for at least this long, the restart +/// counter resets — a process that crashes once a day is not "failing repeatedly." +pub const DEFAULT_STABILITY_WINDOW: Duration = Duration::from_secs(60); +pub const DEFAULT_HEALTH_CHECK_INTERVAL: Duration = Duration::from_secs(5); +pub const DEFAULT_HEALTH_CHECK_TIMEOUT: Duration = Duration::from_secs(15); +/// Default context length passed to `llama-server -c`. Matches the v1 design +/// plan's example. Not a Settings field in v0.1.2 — see TASK_2 design notes. +pub const DEFAULT_LLAMA_CTX_LEN: u32 = 4096; + +/// Live status of a supervised subprocess. Exposed to the frontend via the +/// `get_supervisor_status` Tauri command. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum SupervisorStatus { + /// No supervision is configured. Either `Settings.backend != "llamacpp"`, + /// or the binary/model prerequisites aren't satisfied. + #[default] + NotSpawning, + /// Spawn requested; waiting for the process to come up + health-check. + Starting, + /// Process is alive and the last health-check passed. + Running { pid: u32 }, + /// Process died (or failed health-check); waiting backoff before next try. + Restarting { attempt: u8, last_error: String }, + /// Hit `max_restarts` within the stability window. Manual intervention required. + FailedAfterMaxRestarts { attempts: u8, last_error: String }, + /// Stopped on graceful daemon shutdown. + Stopped, +} + +/// Restart + health-check policy. Build via `SupervisorPolicy::default()` and +/// override per-field for tests. +#[derive(Debug, Clone)] +pub struct SupervisorPolicy { + /// Max restarts allowed before surfacing `FailedAfterMaxRestarts`. + pub max_restarts: u8, + pub initial_backoff: Duration, + pub max_backoff: Duration, + /// Run-time after which the restart counter resets (a process that ran + /// healthy this long is considered "stable, then crashed", not "crash-looping"). + pub stability_window: Duration, + /// URL polled for readiness after spawn. + pub health_url: String, + /// How often to recheck liveness once Running. + pub health_check_interval: Duration, + /// Max wait for first HTTP-ready after spawn. + pub health_check_timeout: Duration, +} + +impl Default for SupervisorPolicy { + fn default() -> Self { + Self { + max_restarts: DEFAULT_MAX_RESTARTS, + initial_backoff: DEFAULT_INITIAL_BACKOFF, + max_backoff: DEFAULT_MAX_BACKOFF, + stability_window: DEFAULT_STABILITY_WINDOW, + health_url: String::new(), + health_check_interval: DEFAULT_HEALTH_CHECK_INTERVAL, + health_check_timeout: DEFAULT_HEALTH_CHECK_TIMEOUT, + } + } +} + +/// Exponential backoff: `initial * 2^attempt_index`, clamped to `max`. +/// `attempt_index = 0` returns `initial`. `u8::MAX` does not overflow. +pub fn compute_backoff(attempt_index: u8, initial: Duration, max: Duration) -> Duration { + // Use u32::checked_shl to avoid shift-overflow when attempt_index >= 32. + let multiplier = 1u32 + .checked_shl(u32::from(attempt_index)) + .unwrap_or(u32::MAX); + initial.saturating_mul(multiplier).min(max) +} type CommandFactory = Box Command + Send + Sync>; @@ -109,6 +202,12 @@ pub fn spawn_llama_server(binary: &Path, model_path: &Path, port: u16, ctx_len: } /// Build a Supervisor that runs vLLM's OpenAI-compat HTTP server. +/// +/// Not yet wired into bootstrap — the NVIDIA-supervision path is a separate +/// follow-up (vLLM has heavier Python/CUDA preconditions than llama.cpp). +/// Kept here so the supervision machinery covers both backends when that path +/// activates. +#[allow(dead_code)] pub fn spawn_vllm_server(python: &Path, model_id: &str, port: u16) -> Supervisor { let python = python.to_path_buf(); let model_id = model_id.to_string(); @@ -144,6 +243,184 @@ pub async fn wait_for_http_ready(url: &str, timeout: Duration) -> Result<()> { anyhow::bail!("backend at {url} did not become ready within {timeout:?}") } +/// Health-gated restart loop. Spawns the configured subprocess, waits for +/// HTTP-ready, monitors liveness, restarts on death with exponential backoff, +/// and surfaces `FailedAfterMaxRestarts` once the budget is exhausted. +/// +/// The loop honors `shutdown` (a `tokio::sync::watch` channel): when it sees +/// `true`, it stops the supervisee cleanly and sets `Stopped`. +/// +/// This is the v0.1.2 supervision entry point; bootstrap calls it as a +/// `tauri::async_runtime::spawn`'d background task when `should_spawn_llama_supervisor` +/// returns true. +pub async fn supervise( + supervisor: Arc, + policy: SupervisorPolicy, + status: Arc>, + mut shutdown: tokio::sync::watch::Receiver, +) { + let mut attempts: u8 = 0; + // Initialized to a sentinel that's only observable if we surface + // FailedAfterMaxRestarts before ever setting it (shouldn't happen in + // practice, but is sound). All real paths overwrite this before read. + #[allow(unused_assignments)] + let mut last_error = String::new(); + + loop { + // Honor shutdown before any work. + if *shutdown.borrow() { + supervisor.stop(); + set_status(&status, SupervisorStatus::Stopped); + return; + } + + set_status(&status, SupervisorStatus::Starting); + let spawned_at = Instant::now(); + match supervisor.start() { + Ok(()) => { + // Spawned OK. Wait for HTTP readiness. + match wait_for_http_ready(&policy.health_url, policy.health_check_timeout).await { + Ok(()) => { + let pid = supervisor.pid().unwrap_or(0); + info!( + name = supervisor.name(), + pid, "supervised subprocess healthy" + ); + set_status(&status, SupervisorStatus::Running { pid }); + // Counter resets immediately on healthy spawn; if the + // process later crashes after stability_window, the + // monitor branch below also resets. + attempts = 0; + + // Monitor until shutdown OR liveness loss. + let died = monitor_until_dead( + &supervisor, + &policy, + &status, + &mut shutdown, + spawned_at, + &mut attempts, + ) + .await; + match died { + MonitorOutcome::Shutdown => { + supervisor.stop(); + set_status(&status, SupervisorStatus::Stopped); + return; + } + MonitorOutcome::Died(reason) => { + last_error = reason; + attempts = attempts.saturating_add(1); + } + } + } + Err(e) => { + last_error = format!("health-check failed: {e}"); + supervisor.stop(); + attempts = attempts.saturating_add(1); + } + } + } + Err(e) => { + last_error = format!("spawn failed: {e}"); + attempts = attempts.saturating_add(1); + } + } + + // Budget check. + if attempts >= policy.max_restarts { + error!( + attempts, + error = %last_error, + "supervisor exhausted restart budget; manual intervention required" + ); + set_status( + &status, + SupervisorStatus::FailedAfterMaxRestarts { + attempts, + last_error: last_error.clone(), + }, + ); + return; + } + + // Backoff, then loop. attempt_index is the 0-based shift power; first + // restart waits `initial_backoff`. + let backoff = compute_backoff( + attempts.saturating_sub(1), + policy.initial_backoff, + policy.max_backoff, + ); + warn!( + attempts, + backoff_ms = backoff.as_millis() as u64, + error = %last_error, + "supervised subprocess will restart after backoff" + ); + set_status( + &status, + SupervisorStatus::Restarting { + attempt: attempts, + last_error: last_error.clone(), + }, + ); + + // Race backoff against shutdown so we exit promptly. + tokio::select! { + _ = tokio::time::sleep(backoff) => {} + _ = shutdown.changed() => { + supervisor.stop(); + set_status(&status, SupervisorStatus::Stopped); + return; + } + } + } +} + +enum MonitorOutcome { + Shutdown, + Died(String), +} + +async fn monitor_until_dead( + supervisor: &Supervisor, + policy: &SupervisorPolicy, + status: &Arc>, + shutdown: &mut tokio::sync::watch::Receiver, + spawned_at: Instant, + attempts: &mut u8, +) -> MonitorOutcome { + loop { + tokio::select! { + _ = shutdown.changed() => { + return MonitorOutcome::Shutdown; + } + _ = tokio::time::sleep(policy.health_check_interval) => { + if !supervisor.is_alive() { + // If we ran healthy for at least the stability window, + // reset the restart counter — this was a "stable run then + // crash", not a flap. + if spawned_at.elapsed() >= policy.stability_window { + *attempts = 0; + } + return MonitorOutcome::Died("subprocess exited".to_string()); + } + // Refresh Running { pid } in case of mid-life pid change (rare, + // but cheap to keep in sync). + if let Some(pid) = supervisor.pid() { + set_status(status, SupervisorStatus::Running { pid }); + } + } + } + } +} + +fn set_status(slot: &Arc>, new_status: SupervisorStatus) { + if let Ok(mut g) = slot.lock() { + *g = new_status; + } +} + #[cfg(test)] mod tests { use super::*; @@ -163,6 +440,114 @@ mod tests { } } + /// Spawn-then-exit-immediately. Used to exercise the supervise loop's + /// restart-on-death path without having to block on a long-running subprocess. + fn immediate_exit_command() -> Command { + #[cfg(unix)] + { + Command::new("true") + } + #[cfg(windows)] + { + let mut c = Command::new("cmd"); + c.args(["/c", "exit", "0"]); + c + } + } + + #[test] + fn supervisor_status_default_is_not_spawning() { + assert_eq!(SupervisorStatus::default(), SupervisorStatus::NotSpawning); + } + + #[test] + fn policy_default_uses_documented_constants() { + let p = SupervisorPolicy::default(); + assert_eq!(p.max_restarts, DEFAULT_MAX_RESTARTS); + assert_eq!(p.initial_backoff, DEFAULT_INITIAL_BACKOFF); + assert_eq!(p.max_backoff, DEFAULT_MAX_BACKOFF); + assert_eq!(p.stability_window, DEFAULT_STABILITY_WINDOW); + } + + #[test] + fn backoff_doubles_then_clamps_at_max() { + let initial = Duration::from_millis(100); + let max = Duration::from_millis(800); + assert_eq!(compute_backoff(0, initial, max), Duration::from_millis(100)); + assert_eq!(compute_backoff(1, initial, max), Duration::from_millis(200)); + assert_eq!(compute_backoff(2, initial, max), Duration::from_millis(400)); + // attempt 3 would be 800 (exactly at cap) + assert_eq!(compute_backoff(3, initial, max), Duration::from_millis(800)); + // attempt 4+ clamped + assert_eq!(compute_backoff(4, initial, max), Duration::from_millis(800)); + // u8::MAX must not overflow + assert_eq!( + compute_backoff(255, initial, max), + Duration::from_millis(800) + ); + } + + #[tokio::test(flavor = "current_thread")] + async fn supervise_with_immediate_exit_hits_failed_after_max_restarts() { + // Process dies right after spawn AND the health URL never responds + // (port 1 is privileged + unbound on CI runners). The loop should + // burn through max_restarts attempts then surface FailedAfterMaxRestarts. + let sup = Arc::new(Supervisor::new("immediate-exit", immediate_exit_command)); + let policy = SupervisorPolicy { + max_restarts: 2, + initial_backoff: Duration::from_millis(20), + max_backoff: Duration::from_millis(40), + stability_window: Duration::from_secs(60), + health_url: "http://127.0.0.1:1/health".to_string(), + health_check_interval: Duration::from_millis(50), + health_check_timeout: Duration::from_millis(100), + }; + let status = Arc::new(Mutex::new(SupervisorStatus::default())); + let (_tx, rx) = tokio::sync::watch::channel(false); + + supervise(sup.clone(), policy, status.clone(), rx).await; + + let final_status = status.lock().unwrap().clone(); + match final_status { + SupervisorStatus::FailedAfterMaxRestarts { attempts, .. } => { + assert_eq!(attempts, 2, "should hit exactly max_restarts attempts"); + } + other => panic!("expected FailedAfterMaxRestarts, got {other:?}"), + } + // Drop should leave no orphan + assert!(!sup.is_alive()); + } + + #[tokio::test(flavor = "current_thread")] + async fn supervise_exits_cleanly_on_shutdown_signal() { + // sleep 30 keeps the supervisee alive; we signal shutdown before any + // restart loop work happens, expecting Stopped status + clean exit. + let sup = Arc::new(Supervisor::new("sleep", || sleep_command(30))); + let policy = SupervisorPolicy { + max_restarts: 3, + initial_backoff: Duration::from_millis(10), + max_backoff: Duration::from_millis(20), + stability_window: Duration::from_secs(60), + health_url: "http://127.0.0.1:1/health".to_string(), + health_check_interval: Duration::from_millis(50), + health_check_timeout: Duration::from_millis(50), + }; + let status = Arc::new(Mutex::new(SupervisorStatus::default())); + let (tx, rx) = tokio::sync::watch::channel(false); + + // Signal shutdown almost immediately, before the loop's first health check returns. + let shutdown_handle = tokio::spawn(async move { + tokio::time::sleep(Duration::from_millis(10)).await; + let _ = tx.send(true); + }); + + supervise(sup.clone(), policy, status.clone(), rx).await; + shutdown_handle.await.ok(); + + assert_eq!(*status.lock().unwrap(), SupervisorStatus::Stopped); + assert!(!sup.is_alive(), "supervisor should have killed the child"); + } + #[test] fn lifecycle_with_sleep_command() { let sup = Supervisor::new("sleep", || sleep_command(30)); diff --git a/frontend/src/lib/settings.ts b/frontend/src/lib/settings.ts index 8c810a8..8e5c325 100644 --- a/frontend/src/lib/settings.ts +++ b/frontend/src/lib/settings.ts @@ -19,6 +19,7 @@ export interface Settings { backend: Backend; ollama_base_url: string | null; ollama_model: string | null; + llama_server_binary: string | null; } import { invoke } from './tauri'; diff --git a/frontend/src/routes/settings/+page.svelte b/frontend/src/routes/settings/+page.svelte index c3f8423..b48538a 100644 --- a/frontend/src/routes/settings/+page.svelte +++ b/frontend/src/routes/settings/+page.svelte @@ -165,6 +165,15 @@ bind:value={settings.ollama_model} /> + + +