OpenCircuitDev · OpenCircuitDev · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/crates/ocm-daemon/src/bootstrap.rs b/crates/ocm-daemon/src/bootstrap.rs
@@ -8,10 +8,12 @@
 //! that reports status; chat requests fail with clear errors.
 
 use crate::settings::{Backend, Settings};
+use crate::supervisor::{self, Supervisor, SupervisorPolicy, SupervisorStatus};
 use ocm_inference::ollama::DEFAULT_MODEL as DEFAULT_OLLAMA_MODEL;
 use ocm_inference::selector::{self, BackendKind, DEFAULT_OLLAMA_BASE_URL};
 use std::net::{IpAddr, Ipv4Addr, SocketAddr};
-use std::sync::Arc;
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, Mutex};
 use std::time::Duration;
 use tracing::{info, warn};
 
@@ -72,6 +74,133 @@ fn resolve_backend_kind(setting: Backend) -> BackendKind {
     }
 }
 
+/// Build the on-disk path the daemon expects for a model_id, matching the
+/// convention used by `ocm_models::downloader::download_model`.
+fn model_path_for(models_dir: &Path, model_id: &str) -> PathBuf {
+    models_dir.join(format!("{model_id}.gguf"))
+}
+
+/// Decision: should bootstrap spawn + supervise `llama-server` on this run?
+///
+/// True iff ALL of:
+/// - `Settings.backend = "llamacpp"` (explicit opt-in — Auto preserves
+///   pre-v0.1.2 behavior, Ollama supervises itself, Vllm has its own path)
+/// - `Settings.llama_server_binary` is `Some` (directive: None preserves
+///   "do not spawn anything")
+/// - `Settings.model_id` is `Some` AND the GGUF exists under `models_dir`
+///   (conservative: don't burn the restart budget spawning a server that
+///   has nothing to load — chat will fail loudly via the existing
+///   "backend not reachable" message instead)
+pub fn should_spawn_llama_supervisor(settings: &Settings, models_dir: &Path) -> bool {
+    if !matches!(settings.backend, Backend::LlamaCpp) {
+        return false;
+    }
+    if settings.llama_server_binary.is_none() {
+        return false;
+    }
+    match settings.model_id.as_deref() {
+        Some(id) => model_path_for(models_dir, id).exists(),
+        None => false,
+    }
+}
+
+/// Parse the port out of an `http://host:port/...` URL. Bare-bones to avoid
+/// pulling in `url` for one field. Returns `None` if no port segment is found.
+fn parse_port(url: &str) -> Option<u16> {
+    // The last `:`-delimited segment, up to the first `/`.
+    let after_colon = url.rsplit(':').next()?;
+    let port_str = after_colon.split('/').next()?;
+    port_str.parse().ok()
+}
+
+/// Build (but don't start) the llama-server Supervisor + its restart policy
+/// for the current settings. Returns `None` if the spawn-gate
+/// (`should_spawn_llama_supervisor`) refuses.
+///
+/// The caller is responsible for spawning `supervisor::supervise(...)` as a
+/// background task and holding the resulting handle alive for the daemon's
+/// lifetime (`main.rs` does this via Tauri-managed state + tokio task).
+pub fn build_llama_supervisor(
+    settings: &Settings,
+    models_dir: &Path,
+) -> Option<(Arc<Supervisor>, SupervisorPolicy)> {
+    if !should_spawn_llama_supervisor(settings, models_dir) {
+        return None;
+    }
+    let binary = PathBuf::from(settings.llama_server_binary.as_ref()?);
+    let model_id = settings.model_id.as_ref()?;
+    let model_path = model_path_for(models_dir, model_id);
+
+    let inference_url = settings
+        .inference_base_url
+        .clone()
+        .unwrap_or_else(|| DEFAULT_INFERENCE_BASE_URL.to_string());
+    let port = parse_port(&inference_url).unwrap_or(8080);
+    let health_url = format!("{inference_url}/v1/models");
+
+    let sup = Arc::new(supervisor::spawn_llama_server(
+        &binary,
+        &model_path,
+        port,
+        supervisor::DEFAULT_LLAMA_CTX_LEN,
+    ));
+    let policy = SupervisorPolicy {
+        health_url,
+        ..SupervisorPolicy::default()
+    };
+    Some((sup, policy))
+}
+
+/// State the Tauri layer manages for the supervised subprocess. `status` is
+/// shared with the supervise loop (which mutates it).
+///
+/// `shutdown` is the sender half of the cancellation channel. The supervise
+/// loop's `watch::Receiver::changed()` fires either when we explicitly send
+/// `true` (via `signal_shutdown`) OR when the Sender is dropped — so dropping
+/// `LlamaSupervisorState` during Tauri's teardown is enough to wake the loop,
+/// which then calls `Supervisor::stop()` and exits with `Stopped` status.
+/// An explicit `signal_shutdown` is here for a future `RunEvent::ExitRequested`
+/// hook that wants to wait for the loop to drain.
+pub struct LlamaSupervisorState {
+    pub status: Arc<Mutex<SupervisorStatus>>,
+    // Held in Mutex<Option<_>> so a future ExitRequested hook can `.take()` it
+    // to send the signal before tauri tears down the runtime. Field is
+    // observed via Drop semantics today, not direct reads — keep the
+    // dead_code allow until the hook lands.
+    #[allow(dead_code)]
+    shutdown: Mutex<Option<tokio::sync::watch::Sender<bool>>>,
+}
+
+impl LlamaSupervisorState {
+    pub fn not_spawning() -> Self {
+        Self {
+            status: Arc::new(Mutex::new(SupervisorStatus::NotSpawning)),
+            shutdown: Mutex::new(None),
+        }
+    }
+
+    pub fn live(
+        status: Arc<Mutex<SupervisorStatus>>,
+        shutdown_tx: tokio::sync::watch::Sender<bool>,
+    ) -> Self {
+        Self {
+            status,
+            shutdown: Mutex::new(Some(shutdown_tx)),
+        }
+    }
+
+    /// Explicitly signal the supervise loop to stop. Currently unused (Drop
+    /// is sufficient for v0.1.2), kept for the future graceful-exit hook.
+    #[allow(dead_code)]
+    pub fn signal_shutdown(&self) {
+        if let Ok(mut g) = self.shutdown.lock() {
+            if let Some(tx) = g.take() {
+                let _ = tx.send(true);
+            }
+        }
+    }
+}
+
 /// Construct the full AppState given settings.
 pub fn build_app_state(settings: &Settings) -> ocm_api::AppState {
     let inference_url = settings
@@ -168,9 +297,30 @@ mod tests {
             backend: Backend::Auto,
             ollama_base_url: None,
             ollama_model: None,
+            llama_server_binary: None,
         }
     }
 
+    #[test]
+    fn parse_port_extracts_from_loopback_url() {
+        assert_eq!(parse_port("http://127.0.0.1:8080"), Some(8080));
+        assert_eq!(parse_port("http://127.0.0.1:8080/v1"), Some(8080));
+        assert_eq!(parse_port("http://127.0.0.1:8000/v1/models"), Some(8000));
+    }
+
+    #[test]
+    fn parse_port_returns_none_when_no_port() {
+        assert_eq!(parse_port("http://example.com/v1"), None);
+    }
+
+    #[test]
+    fn build_llama_supervisor_returns_none_when_spawn_gate_refuses() {
+        let dir = tempfile::tempdir().unwrap();
+        // Spawn-gate refuses (backend = Auto, no model file).
+        let s = Settings::default();
+        assert!(build_llama_supervisor(&s, dir.path()).is_none());
+    }
+
     #[test]
     fn defaults_apply_when_settings_blank() {
         let s = Settings::default();
@@ -244,6 +394,103 @@ mod tests {
         assert_eq!(state.backend.name(), "vLLM");
     }
 
+    // --- Process supervision decision (Task 2 — Track 1 item 2) ---
+
+    fn write_dummy_model(dir: &std::path::Path, model_id: &str) -> std::path::PathBuf {
+        // Convention matches ocm_models::downloader::download_model:
+        // dest = dest_dir.join(format!("{}.gguf", entry.id))
+        let p = dir.join(format!("{model_id}.gguf"));
+        std::fs::write(&p, b"").unwrap();
+        p
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_yes_when_llamacpp_plus_binary_plus_model_present() {
+        let dir = tempfile::tempdir().unwrap();
+        write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
+        let s = Settings {
+            backend: Backend::LlamaCpp,
+            model_id: Some("qwen2.5-1.5b-q4".into()),
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        assert!(should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_no_when_backend_is_ollama() {
+        // Headline rule per AGENT_OPERATIONS scope: Ollama supervises itself —
+        // OCM must NEVER spawn anything when backend=ollama, regardless of the
+        // other fields.
+        let dir = tempfile::tempdir().unwrap();
+        write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
+        let s = Settings {
+            backend: Backend::Ollama,
+            model_id: Some("qwen2.5-1.5b-q4".into()),
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        assert!(!should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_no_when_backend_is_auto() {
+        // Auto leaves orchestration to the user's existing setup. Don't surprise
+        // pre-v0.1.2 users who never opted into supervision.
+        let dir = tempfile::tempdir().unwrap();
+        write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
+        let s = Settings {
+            backend: Backend::Auto,
+            model_id: Some("qwen2.5-1.5b-q4".into()),
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        assert!(!should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_no_when_binary_unset() {
+        // Directive: "llama-server binary path from settings, new field,
+        // None = do-not-spawn preserves current behavior".
+        let dir = tempfile::tempdir().unwrap();
+        write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
+        let s = Settings {
+            backend: Backend::LlamaCpp,
+            model_id: Some("qwen2.5-1.5b-q4".into()),
+            llama_server_binary: None,
+            ..Settings::default()
+        };
+        assert!(!should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_no_when_model_file_missing() {
+        // Conservative: refuse to spawn if there's nothing to load. The user
+        // hasn't downloaded a model yet — let chat fail with the existing
+        // "backend not reachable" message rather than burn the restart budget.
+        let dir = tempfile::tempdir().unwrap();
+        // intentionally no write_dummy_model
+        let s = Settings {
+            backend: Backend::LlamaCpp,
+            model_id: Some("qwen2.5-1.5b-q4".into()),
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        assert!(!should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_no_when_model_id_unset() {
+        let dir = tempfile::tempdir().unwrap();
+        let s = Settings {
+            backend: Backend::LlamaCpp,
+            model_id: None,
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        assert!(!should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
     #[tokio::test]
     async fn probe_url_returns_false_for_unreachable() {
         // Using port 1 (privileged, almost guaranteed not bound) on localhost

diff --git a/crates/ocm-daemon/src/commands.rs b/crates/ocm-daemon/src/commands.rs
@@ -12,8 +12,10 @@
 //! `retrieval_top_k`, `api_port`, `mcp_enabled`. The frontend surfaces
 //! "restart required to apply" so the user isn't surprised.
 
+use crate::bootstrap::LlamaSupervisorState;
 use crate::paths::AppPaths;
 use crate::settings::Settings;
+use crate::supervisor::SupervisorStatus;
 use ocm_models::{downloader::download_model, Registry};
 use std::path::PathBuf;
 use std::sync::Mutex;
@@ -59,6 +61,23 @@ pub fn list_registry_models() -> Result<Registry, String> {
     Registry::load_bundled().map_err(|e| format!("load bundled registry: {e}"))
 }
 
+/// Live status of the llama-server supervisor (if any). The frontend polls
+/// this to surface "running" / "restarting" / "failed" badges. When the
+/// daemon was started without supervision (backend != "llamacpp", or
+/// `llama_server_binary` unset, or model missing), the status is
+/// `NotSpawning` and the frontend can fall back to the existing
+/// "inference URL reachable?" probe.
+#[tauri::command]
+pub fn get_supervisor_status(
+    state: State<'_, LlamaSupervisorState>,
+) -> Result<SupervisorStatus, String> {
+    state
+        .status
+        .lock()
+        .map(|s| s.clone())
+        .map_err(|e| format!("supervisor status state poisoned: {e}"))
+}
+
 /// Download a model by registry id into the app data dir under "models/".
 /// Returns the absolute path on success. Refuses entries with empty sha256
 /// (registry guards unverified weights).

diff --git a/crates/ocm-daemon/src/main.rs b/crates/ocm-daemon/src/main.rs
@@ -4,15 +4,10 @@ mod bootstrap;
 mod commands;
 mod paths;
 mod settings;
-// Supervisor's process-spawning helpers (spawn_llama_server, spawn_vllm_server)
-// will be activated once OCM ships its own bundled binaries; for now the
-// daemon expects external llama-server / vLLM processes to be running and
-// connects to them via the URLs in settings (bootstrap.rs handles the connect).
-#[allow(dead_code)]
 mod supervisor;
 mod tray;
 
-use std::sync::Mutex;
+use std::sync::{Arc, Mutex};
 use tauri::tray::TrayIconBuilder;
 use tauri::Manager;
 use tracing::info;
@@ -32,6 +27,7 @@ fn main() -> anyhow::Result<()> {
             commands::save_settings,
             commands::list_registry_models,
             commands::download_model_cmd,
+            commands::get_supervisor_status,
         ])
         .setup(|app| {
             let app_paths = paths::AppPaths::resolve()?;
@@ -48,6 +44,34 @@ fn main() -> anyhow::Result<()> {
                 port = loaded_settings.api_port,
                 "settings loaded"
             );
+
+            // Build the llama-server supervisor before bootstrap, so the
+            // existing dependency probe sees the spawned process as up.
+            // None means "supervision disabled this run" — preserves
+            // pre-v0.1.2 behavior when the user doesn't opt in.
+            let supervisor_state =
+                match bootstrap::build_llama_supervisor(&loaded_settings, &app_paths.models_dir) {
+                    Some((supervised, policy)) => {
+                        let status = Arc::new(Mutex::new(supervisor::SupervisorStatus::default()));
+                        let (shutdown_tx, shutdown_rx) = tokio::sync::watch::channel(false);
+                        let status_for_loop = status.clone();
+                        info!(
+                            name = supervised.name(),
+                            health_url = %policy.health_url,
+                            "starting llama-server supervisor"
+                        );
+                        tauri::async_runtime::spawn(async move {
+                            supervisor::supervise(supervised, policy, status_for_loop, shutdown_rx)
+                                .await;
+                        });
+                        bootstrap::LlamaSupervisorState::live(status, shutdown_tx)
+                    }
+                    None => {
+                        info!("llama-server supervisor not configured for this run");
+                        bootstrap::LlamaSupervisorState::not_spawning()
+                    }
+                };
+
             // Spawn the bootstrap task — probes external dependencies and
             // starts the OCM HTTP API server in the background. Tauri's
             // setup() is sync so we hand off to a tokio task; the Tauri main
@@ -67,6 +91,7 @@ fn main() -> anyhow::Result<()> {
 
             app.manage(app_paths);
             app.manage(settings_state);
+            app.manage(supervisor_state);
 
             let menu = tray::build_tray_menu(app.handle())?;
             let _tray = TrayIconBuilder::new()