diff --git a/crates/ocm-daemon/src/bootstrap.rs b/crates/ocm-daemon/src/bootstrap.rs
index 88727b5..43d0f27 100644
--- a/crates/ocm-daemon/src/bootstrap.rs
+++ b/crates/ocm-daemon/src/bootstrap.rs
@@ -8,10 +8,12 @@
 //! that reports status; chat requests fail with clear errors.
 
 use crate::settings::{Backend, Settings};
+use crate::supervisor::{self, Supervisor, SupervisorPolicy, SupervisorStatus};
 use ocm_inference::ollama::DEFAULT_MODEL as DEFAULT_OLLAMA_MODEL;
 use ocm_inference::selector::{self, BackendKind, DEFAULT_OLLAMA_BASE_URL};
 use std::net::{IpAddr, Ipv4Addr, SocketAddr};
-use std::sync::Arc;
+use std::path::{Path, PathBuf};
+use std::sync::{Arc, Mutex};
 use std::time::Duration;
 use tracing::{info, warn};
 
@@ -72,6 +74,133 @@ fn resolve_backend_kind(setting: Backend) -> BackendKind {
     }
 }
 
+/// Build the on-disk path the daemon expects for a model_id, matching the
+/// convention used by `ocm_models::downloader::download_model`.
+fn model_path_for(models_dir: &Path, model_id: &str) -> PathBuf {
+    models_dir.join(format!("{model_id}.gguf"))
+}
+
+/// Decision: should bootstrap spawn + supervise `llama-server` on this run?
+///
+/// True iff ALL of:
+/// - `Settings.backend = "llamacpp"` (explicit opt-in — Auto preserves
+///   pre-v0.1.2 behavior, Ollama supervises itself, Vllm has its own path)
+/// - `Settings.llama_server_binary` is `Some` (directive: None preserves
+///   "do not spawn anything")
+/// - `Settings.model_id` is `Some` AND the GGUF exists under `models_dir`
+///   (conservative: don't burn the restart budget spawning a server that
+///   has nothing to load — chat will fail loudly via the existing
+///   "backend not reachable" message instead)
+pub fn should_spawn_llama_supervisor(settings: &Settings, models_dir: &Path) -> bool {
+    if !matches!(settings.backend, Backend::LlamaCpp) {
+        return false;
+    }
+    if settings.llama_server_binary.is_none() {
+        return false;
+    }
+    match settings.model_id.as_deref() {
+        Some(id) => model_path_for(models_dir, id).exists(),
+        None => false,
+    }
+}
+
+/// Parse the port out of an `http://host:port/...` URL. Bare-bones to avoid
+/// pulling in `url` for one field. Returns `None` if no port segment is found.
+fn parse_port(url: &str) -> Option<u16> {
+    // The last `:`-delimited segment, up to the first `/`.
+    let after_colon = url.rsplit(':').next()?;
+    let port_str = after_colon.split('/').next()?;
+    port_str.parse().ok()
+}
+
+/// Build (but don't start) the llama-server Supervisor + its restart policy
+/// for the current settings. Returns `None` if the spawn-gate
+/// (`should_spawn_llama_supervisor`) refuses.
+///
+/// The caller is responsible for spawning `supervisor::supervise(...)` as a
+/// background task and holding the resulting handle alive for the daemon's
+/// lifetime (`main.rs` does this via Tauri-managed state + tokio task).
+pub fn build_llama_supervisor(
+    settings: &Settings,
+    models_dir: &Path,
+) -> Option<(Arc<Supervisor>, SupervisorPolicy)> {
+    if !should_spawn_llama_supervisor(settings, models_dir) {
+        return None;
+    }
+    let binary = PathBuf::from(settings.llama_server_binary.as_ref()?);
+    let model_id = settings.model_id.as_ref()?;
+    let model_path = model_path_for(models_dir, model_id);
+
+    let inference_url = settings
+        .inference_base_url
+        .clone()
+        .unwrap_or_else(|| DEFAULT_INFERENCE_BASE_URL.to_string());
+    let port = parse_port(&inference_url).unwrap_or(8080);
+    let health_url = format!("{inference_url}/v1/models");
+
+    let sup = Arc::new(supervisor::spawn_llama_server(
+        &binary,
+        &model_path,
+        port,
+        supervisor::DEFAULT_LLAMA_CTX_LEN,
+    ));
+    let policy = SupervisorPolicy {
+        health_url,
+        ..SupervisorPolicy::default()
+    };
+    Some((sup, policy))
+}
+
+/// State the Tauri layer manages for the supervised subprocess. `status` is
+/// shared with the supervise loop (which mutates it).
+///
+/// `shutdown` is the sender half of the cancellation channel. The supervise
+/// loop's `watch::Receiver::changed()` fires either when we explicitly send
+/// `true` (via `signal_shutdown`) OR when the Sender is dropped — so dropping
+/// `LlamaSupervisorState` during Tauri's teardown is enough to wake the loop,
+/// which then calls `Supervisor::stop()` and exits with `Stopped` status.
+/// An explicit `signal_shutdown` is here for a future `RunEvent::ExitRequested`
+/// hook that wants to wait for the loop to drain.
+pub struct LlamaSupervisorState {
+    pub status: Arc<Mutex<SupervisorStatus>>,
+    // Held in Mutex<Option<_>> so a future ExitRequested hook can `.take()` it
+    // to send the signal before tauri tears down the runtime. Field is
+    // observed via Drop semantics today, not direct reads — keep the
+    // dead_code allow until the hook lands.
+    #[allow(dead_code)]
+    shutdown: Mutex<Option<tokio::sync::watch::Sender<bool>>>,
+}
+
+impl LlamaSupervisorState {
+    pub fn not_spawning() -> Self {
+        Self {
+            status: Arc::new(Mutex::new(SupervisorStatus::NotSpawning)),
+            shutdown: Mutex::new(None),
+        }
+    }
+
+    pub fn live(
+        status: Arc<Mutex<SupervisorStatus>>,
+        shutdown_tx: tokio::sync::watch::Sender<bool>,
+    ) -> Self {
+        Self {
+            status,
+            shutdown: Mutex::new(Some(shutdown_tx)),
+        }
+    }
+
+    /// Explicitly signal the supervise loop to stop. Currently unused (Drop
+    /// is sufficient for v0.1.2), kept for the future graceful-exit hook.
+    #[allow(dead_code)]
+    pub fn signal_shutdown(&self) {
+        if let Ok(mut g) = self.shutdown.lock() {
+            if let Some(tx) = g.take() {
+                let _ = tx.send(true);
+            }
+        }
+    }
+}
+
 /// Construct the full AppState given settings.
 pub fn build_app_state(settings: &Settings) -> ocm_api::AppState {
     let inference_url = settings
@@ -168,9 +297,30 @@ mod tests {
             backend: Backend::Auto,
             ollama_base_url: None,
             ollama_model: None,
+            llama_server_binary: None,
         }
     }
 
+    #[test]
+    fn parse_port_extracts_from_loopback_url() {
+        assert_eq!(parse_port("http://127.0.0.1:8080"), Some(8080));
+        assert_eq!(parse_port("http://127.0.0.1:8080/v1"), Some(8080));
+        assert_eq!(parse_port("http://127.0.0.1:8000/v1/models"), Some(8000));
+    }
+
+    #[test]
+    fn parse_port_returns_none_when_no_port() {
+        assert_eq!(parse_port("http://example.com/v1"), None);
+    }
+
+    #[test]
+    fn build_llama_supervisor_returns_none_when_spawn_gate_refuses() {
+        let dir = tempfile::tempdir().unwrap();
+        // Spawn-gate refuses (backend = Auto, no model file).
+        let s = Settings::default();
+        assert!(build_llama_supervisor(&s, dir.path()).is_none());
+    }
+
     #[test]
     fn defaults_apply_when_settings_blank() {
         let s = Settings::default();
@@ -244,6 +394,103 @@ mod tests {
         assert_eq!(state.backend.name(), "vLLM");
     }
 
+    // --- Process supervision decision (Task 2 — Track 1 item 2) ---
+
+    fn write_dummy_model(dir: &std::path::Path, model_id: &str) -> std::path::PathBuf {
+        // Convention matches ocm_models::downloader::download_model:
+        // dest = dest_dir.join(format!("{}.gguf", entry.id))
+        let p = dir.join(format!("{model_id}.gguf"));
+        std::fs::write(&p, b"").unwrap();
+        p
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_yes_when_llamacpp_plus_binary_plus_model_present() {
+        let dir = tempfile::tempdir().unwrap();
+        write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
+        let s = Settings {
+            backend: Backend::LlamaCpp,
+            model_id: Some("qwen2.5-1.5b-q4".into()),
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        assert!(should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_no_when_backend_is_ollama() {
+        // Headline rule per AGENT_OPERATIONS scope: Ollama supervises itself —
+        // OCM must NEVER spawn anything when backend=ollama, regardless of the
+        // other fields.
+        let dir = tempfile::tempdir().unwrap();
+        write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
+        let s = Settings {
+            backend: Backend::Ollama,
+            model_id: Some("qwen2.5-1.5b-q4".into()),
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        assert!(!should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_no_when_backend_is_auto() {
+        // Auto leaves orchestration to the user's existing setup. Don't surprise
+        // pre-v0.1.2 users who never opted into supervision.
+        let dir = tempfile::tempdir().unwrap();
+        write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
+        let s = Settings {
+            backend: Backend::Auto,
+            model_id: Some("qwen2.5-1.5b-q4".into()),
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        assert!(!should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_no_when_binary_unset() {
+        // Directive: "llama-server binary path from settings, new field,
+        // None = do-not-spawn preserves current behavior".
+        let dir = tempfile::tempdir().unwrap();
+        write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
+        let s = Settings {
+            backend: Backend::LlamaCpp,
+            model_id: Some("qwen2.5-1.5b-q4".into()),
+            llama_server_binary: None,
+            ..Settings::default()
+        };
+        assert!(!should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_no_when_model_file_missing() {
+        // Conservative: refuse to spawn if there's nothing to load. The user
+        // hasn't downloaded a model yet — let chat fail with the existing
+        // "backend not reachable" message rather than burn the restart budget.
+        let dir = tempfile::tempdir().unwrap();
+        // intentionally no write_dummy_model
+        let s = Settings {
+            backend: Backend::LlamaCpp,
+            model_id: Some("qwen2.5-1.5b-q4".into()),
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        assert!(!should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
+    #[test]
+    fn should_spawn_llama_supervisor_no_when_model_id_unset() {
+        let dir = tempfile::tempdir().unwrap();
+        let s = Settings {
+            backend: Backend::LlamaCpp,
+            model_id: None,
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        assert!(!should_spawn_llama_supervisor(&s, dir.path()));
+    }
+
     #[tokio::test]
     async fn probe_url_returns_false_for_unreachable() {
         // Using port 1 (privileged, almost guaranteed not bound) on localhost
diff --git a/crates/ocm-daemon/src/commands.rs b/crates/ocm-daemon/src/commands.rs
index a438a4e..3c19d84 100644
--- a/crates/ocm-daemon/src/commands.rs
+++ b/crates/ocm-daemon/src/commands.rs
@@ -12,8 +12,10 @@
 //! `retrieval_top_k`, `api_port`, `mcp_enabled`. The frontend surfaces
 //! "restart required to apply" so the user isn't surprised.
 
+use crate::bootstrap::LlamaSupervisorState;
 use crate::paths::AppPaths;
 use crate::settings::Settings;
+use crate::supervisor::SupervisorStatus;
 use ocm_models::{downloader::download_model, Registry};
 use std::path::PathBuf;
 use std::sync::Mutex;
@@ -59,6 +61,23 @@ pub fn list_registry_models() -> Result<Registry, String> {
     Registry::load_bundled().map_err(|e| format!("load bundled registry: {e}"))
 }
 
+/// Live status of the llama-server supervisor (if any). The frontend polls
+/// this to surface "running" / "restarting" / "failed" badges. When the
+/// daemon was started without supervision (backend != "llamacpp", or
+/// `llama_server_binary` unset, or model missing), the status is
+/// `NotSpawning` and the frontend can fall back to the existing
+/// "inference URL reachable?" probe.
+#[tauri::command]
+pub fn get_supervisor_status(
+    state: State<'_, LlamaSupervisorState>,
+) -> Result<SupervisorStatus, String> {
+    state
+        .status
+        .lock()
+        .map(|s| s.clone())
+        .map_err(|e| format!("supervisor status state poisoned: {e}"))
+}
+
 /// Download a model by registry id into the app data dir under "models/".
 /// Returns the absolute path on success. Refuses entries with empty sha256
 /// (registry guards unverified weights).
diff --git a/crates/ocm-daemon/src/main.rs b/crates/ocm-daemon/src/main.rs
index 2a87c8e..8b4b529 100644
--- a/crates/ocm-daemon/src/main.rs
+++ b/crates/ocm-daemon/src/main.rs
@@ -4,15 +4,10 @@ mod bootstrap;
 mod commands;
 mod paths;
 mod settings;
-// Supervisor's process-spawning helpers (spawn_llama_server, spawn_vllm_server)
-// will be activated once OCM ships its own bundled binaries; for now the
-// daemon expects external llama-server / vLLM processes to be running and
-// connects to them via the URLs in settings (bootstrap.rs handles the connect).
-#[allow(dead_code)]
 mod supervisor;
 mod tray;
 
-use std::sync::Mutex;
+use std::sync::{Arc, Mutex};
 use tauri::tray::TrayIconBuilder;
 use tauri::Manager;
 use tracing::info;
@@ -32,6 +27,7 @@ fn main() -> anyhow::Result<()> {
             commands::save_settings,
             commands::list_registry_models,
             commands::download_model_cmd,
+            commands::get_supervisor_status,
         ])
         .setup(|app| {
             let app_paths = paths::AppPaths::resolve()?;
@@ -48,6 +44,34 @@ fn main() -> anyhow::Result<()> {
                 port = loaded_settings.api_port,
                 "settings loaded"
             );
+
+            // Build the llama-server supervisor before bootstrap, so the
+            // existing dependency probe sees the spawned process as up.
+            // None means "supervision disabled this run" — preserves
+            // pre-v0.1.2 behavior when the user doesn't opt in.
+            let supervisor_state =
+                match bootstrap::build_llama_supervisor(&loaded_settings, &app_paths.models_dir) {
+                    Some((supervised, policy)) => {
+                        let status = Arc::new(Mutex::new(supervisor::SupervisorStatus::default()));
+                        let (shutdown_tx, shutdown_rx) = tokio::sync::watch::channel(false);
+                        let status_for_loop = status.clone();
+                        info!(
+                            name = supervised.name(),
+                            health_url = %policy.health_url,
+                            "starting llama-server supervisor"
+                        );
+                        tauri::async_runtime::spawn(async move {
+                            supervisor::supervise(supervised, policy, status_for_loop, shutdown_rx)
+                                .await;
+                        });
+                        bootstrap::LlamaSupervisorState::live(status, shutdown_tx)
+                    }
+                    None => {
+                        info!("llama-server supervisor not configured for this run");
+                        bootstrap::LlamaSupervisorState::not_spawning()
+                    }
+                };
+
             // Spawn the bootstrap task — probes external dependencies and
             // starts the OCM HTTP API server in the background. Tauri's
             // setup() is sync so we hand off to a tokio task; the Tauri main
@@ -67,6 +91,7 @@ fn main() -> anyhow::Result<()> {
 
             app.manage(app_paths);
             app.manage(settings_state);
+            app.manage(supervisor_state);
 
             let menu = tray::build_tray_menu(app.handle())?;
             let _tray = TrayIconBuilder::new()
diff --git a/crates/ocm-daemon/src/settings.rs b/crates/ocm-daemon/src/settings.rs
index 374fcdd..952c9f6 100644
--- a/crates/ocm-daemon/src/settings.rs
+++ b/crates/ocm-daemon/src/settings.rs
@@ -34,6 +34,17 @@ pub struct Settings {
     /// falls back to `ocm_inference::ollama::DEFAULT_MODEL`.
     #[serde(default)]
     pub ollama_model: Option<String>,
+    /// Absolute path to a `llama-server` binary. When set, the daemon will
+    /// spawn + supervise it on boot (provided `backend = "llamacpp"` AND
+    /// a downloaded model file exists under the app's models dir).
+    ///
+    /// `None` (the default) preserves the pre-v0.1.2 behavior: the user is
+    /// expected to run `llama-server` themselves. See
+    /// `crates/ocm-daemon/src/supervisor.rs` for the lifecycle policy.
+    ///
+    /// **No-op when `backend = "ollama"`** — Ollama supervises itself.
+    #[serde(default)]
+    pub llama_server_binary: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Copy)]
@@ -69,6 +80,7 @@ impl Default for Settings {
             backend: Backend::Auto,
             ollama_base_url: None,
             ollama_model: None,
+            llama_server_binary: None,
         }
     }
 }
@@ -175,6 +187,46 @@ theme = "system"
         assert_eq!(loaded.ollama_model, None);
     }
 
+    #[test]
+    fn default_llama_server_binary_is_none() {
+        // None preserves the pre-v0.1.2 "do not spawn anything" behavior; the
+        // user opts in by pointing settings at their llama-server binary.
+        assert_eq!(Settings::default().llama_server_binary, None);
+    }
+
+    #[test]
+    fn llama_server_binary_round_trips_via_toml() {
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("settings.toml");
+        let original = Settings {
+            llama_server_binary: Some("/usr/local/bin/llama-server".into()),
+            ..Settings::default()
+        };
+        original.save(&path).unwrap();
+        let loaded = Settings::load_or_default(&path).unwrap();
+        assert_eq!(
+            loaded.llama_server_binary.as_deref(),
+            Some("/usr/local/bin/llama-server")
+        );
+    }
+
+    #[test]
+    fn legacy_settings_toml_without_llama_server_binary_still_parses() {
+        // Forward-compat for v0.1.0/v0.1.1 settings.toml that pre-dates this field.
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("settings.toml");
+        let legacy = r#"
+api_port = 7300
+mcp_enabled = true
+theme = "system"
+backend = "llamacpp"
+"#;
+        std::fs::write(&path, legacy).unwrap();
+        let loaded = Settings::load_or_default(&path).unwrap();
+        assert_eq!(loaded.llama_server_binary, None);
+        assert_eq!(loaded.backend, Backend::LlamaCpp);
+    }
+
     #[test]
     fn all_backend_variants_round_trip() {
         for kind in [
diff --git a/crates/ocm-daemon/src/supervisor.rs b/crates/ocm-daemon/src/supervisor.rs
index 8ba7470..ae5aa60 100644
--- a/crates/ocm-daemon/src/supervisor.rs
+++ b/crates/ocm-daemon/src/supervisor.rs
@@ -3,13 +3,106 @@
 //!
 //! Use `Supervisor::new()` with a `Command` factory closure so the supervisor can
 //! restart the process with a fresh `Command` each time without consuming state.
+//!
+//! # Backend coverage (v0.1.2 — Track 1 item 2)
+//!
+//! This module currently spawns `llama-server` only. The `spawn_vllm_server`
+//! helper is kept here for the future NVIDIA path but is not yet wired into
+//! bootstrap.
+//!
+//! **Ollama is deliberately NOT supervised here.** When `Settings.backend =
+//! "ollama"`, OCM expects an Ollama daemon to be already running (it has its
+//! own service installer + tray + lifecycle), and bridges to it via the native
+//! NDJSON adapter (`crates/ocm-inference/src/ollama.rs`). Spawning would either
+//! double-spawn a daemon already running, or fight ollama-svc's own restart
+//! logic. The spawn-gate in `bootstrap::should_spawn_llama_supervisor` enforces
+//! this.
 
 use anyhow::{Context, Result};
+use serde::{Deserialize, Serialize};
 use std::path::Path;
 use std::process::{Child, Command, Stdio};
 use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};
-use tracing::{info, warn};
+use tracing::{error, info, warn};
+
+/// Restart-loop policy constants. Documented here so `SupervisorPolicy::default()`
+/// can assert them, and so future tuning is one-place rather than scattered.
+pub const DEFAULT_MAX_RESTARTS: u8 = 3;
+pub const DEFAULT_INITIAL_BACKOFF: Duration = Duration::from_millis(500);
+pub const DEFAULT_MAX_BACKOFF: Duration = Duration::from_secs(10);
+/// If the supervised process runs healthy for at least this long, the restart
+/// counter resets — a process that crashes once a day is not "failing repeatedly."
+pub const DEFAULT_STABILITY_WINDOW: Duration = Duration::from_secs(60);
+pub const DEFAULT_HEALTH_CHECK_INTERVAL: Duration = Duration::from_secs(5);
+pub const DEFAULT_HEALTH_CHECK_TIMEOUT: Duration = Duration::from_secs(15);
+/// Default context length passed to `llama-server -c`. Matches the v1 design
+/// plan's example. Not a Settings field in v0.1.2 — see TASK_2 design notes.
+pub const DEFAULT_LLAMA_CTX_LEN: u32 = 4096;
+
+/// Live status of a supervised subprocess. Exposed to the frontend via the
+/// `get_supervisor_status` Tauri command.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+#[serde(tag = "kind", rename_all = "snake_case")]
+pub enum SupervisorStatus {
+    /// No supervision is configured. Either `Settings.backend != "llamacpp"`,
+    /// or the binary/model prerequisites aren't satisfied.
+    #[default]
+    NotSpawning,
+    /// Spawn requested; waiting for the process to come up + health-check.
+    Starting,
+    /// Process is alive and the last health-check passed.
+    Running { pid: u32 },
+    /// Process died (or failed health-check); waiting backoff before next try.
+    Restarting { attempt: u8, last_error: String },
+    /// Hit `max_restarts` within the stability window. Manual intervention required.
+    FailedAfterMaxRestarts { attempts: u8, last_error: String },
+    /// Stopped on graceful daemon shutdown.
+    Stopped,
+}
+
+/// Restart + health-check policy. Build via `SupervisorPolicy::default()` and
+/// override per-field for tests.
+#[derive(Debug, Clone)]
+pub struct SupervisorPolicy {
+    /// Max restarts allowed before surfacing `FailedAfterMaxRestarts`.
+    pub max_restarts: u8,
+    pub initial_backoff: Duration,
+    pub max_backoff: Duration,
+    /// Run-time after which the restart counter resets (a process that ran
+    /// healthy this long is considered "stable, then crashed", not "crash-looping").
+    pub stability_window: Duration,
+    /// URL polled for readiness after spawn.
+    pub health_url: String,
+    /// How often to recheck liveness once Running.
+    pub health_check_interval: Duration,
+    /// Max wait for first HTTP-ready after spawn.
+    pub health_check_timeout: Duration,
+}
+
+impl Default for SupervisorPolicy {
+    fn default() -> Self {
+        Self {
+            max_restarts: DEFAULT_MAX_RESTARTS,
+            initial_backoff: DEFAULT_INITIAL_BACKOFF,
+            max_backoff: DEFAULT_MAX_BACKOFF,
+            stability_window: DEFAULT_STABILITY_WINDOW,
+            health_url: String::new(),
+            health_check_interval: DEFAULT_HEALTH_CHECK_INTERVAL,
+            health_check_timeout: DEFAULT_HEALTH_CHECK_TIMEOUT,
+        }
+    }
+}
+
+/// Exponential backoff: `initial * 2^attempt_index`, clamped to `max`.
+/// `attempt_index = 0` returns `initial`. `u8::MAX` does not overflow.
+pub fn compute_backoff(attempt_index: u8, initial: Duration, max: Duration) -> Duration {
+    // Use u32::checked_shl to avoid shift-overflow when attempt_index >= 32.
+    let multiplier = 1u32
+        .checked_shl(u32::from(attempt_index))
+        .unwrap_or(u32::MAX);
+    initial.saturating_mul(multiplier).min(max)
+}
 
 type CommandFactory = Box<dyn Fn() -> Command + Send + Sync>;
 
@@ -109,6 +202,12 @@ pub fn spawn_llama_server(binary: &Path, model_path: &Path, port: u16, ctx_len:
 }
 
 /// Build a Supervisor that runs vLLM's OpenAI-compat HTTP server.
+///
+/// Not yet wired into bootstrap — the NVIDIA-supervision path is a separate
+/// follow-up (vLLM has heavier Python/CUDA preconditions than llama.cpp).
+/// Kept here so the supervision machinery covers both backends when that path
+/// activates.
+#[allow(dead_code)]
 pub fn spawn_vllm_server(python: &Path, model_id: &str, port: u16) -> Supervisor {
     let python = python.to_path_buf();
     let model_id = model_id.to_string();
@@ -144,6 +243,184 @@ pub async fn wait_for_http_ready(url: &str, timeout: Duration) -> Result<()> {
     anyhow::bail!("backend at {url} did not become ready within {timeout:?}")
 }
 
+/// Health-gated restart loop. Spawns the configured subprocess, waits for
+/// HTTP-ready, monitors liveness, restarts on death with exponential backoff,
+/// and surfaces `FailedAfterMaxRestarts` once the budget is exhausted.
+///
+/// The loop honors `shutdown` (a `tokio::sync::watch` channel): when it sees
+/// `true`, it stops the supervisee cleanly and sets `Stopped`.
+///
+/// This is the v0.1.2 supervision entry point; bootstrap calls it as a
+/// `tauri::async_runtime::spawn`'d background task when `should_spawn_llama_supervisor`
+/// returns true.
+pub async fn supervise(
+    supervisor: Arc<Supervisor>,
+    policy: SupervisorPolicy,
+    status: Arc<Mutex<SupervisorStatus>>,
+    mut shutdown: tokio::sync::watch::Receiver<bool>,
+) {
+    let mut attempts: u8 = 0;
+    // Initialized to a sentinel that's only observable if we surface
+    // FailedAfterMaxRestarts before ever setting it (shouldn't happen in
+    // practice, but is sound). All real paths overwrite this before read.
+    #[allow(unused_assignments)]
+    let mut last_error = String::new();
+
+    loop {
+        // Honor shutdown before any work.
+        if *shutdown.borrow() {
+            supervisor.stop();
+            set_status(&status, SupervisorStatus::Stopped);
+            return;
+        }
+
+        set_status(&status, SupervisorStatus::Starting);
+        let spawned_at = Instant::now();
+        match supervisor.start() {
+            Ok(()) => {
+                // Spawned OK. Wait for HTTP readiness.
+                match wait_for_http_ready(&policy.health_url, policy.health_check_timeout).await {
+                    Ok(()) => {
+                        let pid = supervisor.pid().unwrap_or(0);
+                        info!(
+                            name = supervisor.name(),
+                            pid, "supervised subprocess healthy"
+                        );
+                        set_status(&status, SupervisorStatus::Running { pid });
+                        // Counter resets immediately on healthy spawn; if the
+                        // process later crashes after stability_window, the
+                        // monitor branch below also resets.
+                        attempts = 0;
+
+                        // Monitor until shutdown OR liveness loss.
+                        let died = monitor_until_dead(
+                            &supervisor,
+                            &policy,
+                            &status,
+                            &mut shutdown,
+                            spawned_at,
+                            &mut attempts,
+                        )
+                        .await;
+                        match died {
+                            MonitorOutcome::Shutdown => {
+                                supervisor.stop();
+                                set_status(&status, SupervisorStatus::Stopped);
+                                return;
+                            }
+                            MonitorOutcome::Died(reason) => {
+                                last_error = reason;
+                                attempts = attempts.saturating_add(1);
+                            }
+                        }
+                    }
+                    Err(e) => {
+                        last_error = format!("health-check failed: {e}");
+                        supervisor.stop();
+                        attempts = attempts.saturating_add(1);
+                    }
+                }
+            }
+            Err(e) => {
+                last_error = format!("spawn failed: {e}");
+                attempts = attempts.saturating_add(1);
+            }
+        }
+
+        // Budget check.
+        if attempts >= policy.max_restarts {
+            error!(
+                attempts,
+                error = %last_error,
+                "supervisor exhausted restart budget; manual intervention required"
+            );
+            set_status(
+                &status,
+                SupervisorStatus::FailedAfterMaxRestarts {
+                    attempts,
+                    last_error: last_error.clone(),
+                },
+            );
+            return;
+        }
+
+        // Backoff, then loop. attempt_index is the 0-based shift power; first
+        // restart waits `initial_backoff`.
+        let backoff = compute_backoff(
+            attempts.saturating_sub(1),
+            policy.initial_backoff,
+            policy.max_backoff,
+        );
+        warn!(
+            attempts,
+            backoff_ms = backoff.as_millis() as u64,
+            error = %last_error,
+            "supervised subprocess will restart after backoff"
+        );
+        set_status(
+            &status,
+            SupervisorStatus::Restarting {
+                attempt: attempts,
+                last_error: last_error.clone(),
+            },
+        );
+
+        // Race backoff against shutdown so we exit promptly.
+        tokio::select! {
+            _ = tokio::time::sleep(backoff) => {}
+            _ = shutdown.changed() => {
+                supervisor.stop();
+                set_status(&status, SupervisorStatus::Stopped);
+                return;
+            }
+        }
+    }
+}
+
+enum MonitorOutcome {
+    Shutdown,
+    Died(String),
+}
+
+async fn monitor_until_dead(
+    supervisor: &Supervisor,
+    policy: &SupervisorPolicy,
+    status: &Arc<Mutex<SupervisorStatus>>,
+    shutdown: &mut tokio::sync::watch::Receiver<bool>,
+    spawned_at: Instant,
+    attempts: &mut u8,
+) -> MonitorOutcome {
+    loop {
+        tokio::select! {
+            _ = shutdown.changed() => {
+                return MonitorOutcome::Shutdown;
+            }
+            _ = tokio::time::sleep(policy.health_check_interval) => {
+                if !supervisor.is_alive() {
+                    // If we ran healthy for at least the stability window,
+                    // reset the restart counter — this was a "stable run then
+                    // crash", not a flap.
+                    if spawned_at.elapsed() >= policy.stability_window {
+                        *attempts = 0;
+                    }
+                    return MonitorOutcome::Died("subprocess exited".to_string());
+                }
+                // Refresh Running { pid } in case of mid-life pid change (rare,
+                // but cheap to keep in sync).
+                if let Some(pid) = supervisor.pid() {
+                    set_status(status, SupervisorStatus::Running { pid });
+                }
+            }
+        }
+    }
+}
+
+fn set_status(slot: &Arc<Mutex<SupervisorStatus>>, new_status: SupervisorStatus) {
+    if let Ok(mut g) = slot.lock() {
+        *g = new_status;
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -163,6 +440,114 @@ mod tests {
         }
     }
 
+    /// Spawn-then-exit-immediately. Used to exercise the supervise loop's
+    /// restart-on-death path without having to block on a long-running subprocess.
+    fn immediate_exit_command() -> Command {
+        #[cfg(unix)]
+        {
+            Command::new("true")
+        }
+        #[cfg(windows)]
+        {
+            let mut c = Command::new("cmd");
+            c.args(["/c", "exit", "0"]);
+            c
+        }
+    }
+
+    #[test]
+    fn supervisor_status_default_is_not_spawning() {
+        assert_eq!(SupervisorStatus::default(), SupervisorStatus::NotSpawning);
+    }
+
+    #[test]
+    fn policy_default_uses_documented_constants() {
+        let p = SupervisorPolicy::default();
+        assert_eq!(p.max_restarts, DEFAULT_MAX_RESTARTS);
+        assert_eq!(p.initial_backoff, DEFAULT_INITIAL_BACKOFF);
+        assert_eq!(p.max_backoff, DEFAULT_MAX_BACKOFF);
+        assert_eq!(p.stability_window, DEFAULT_STABILITY_WINDOW);
+    }
+
+    #[test]
+    fn backoff_doubles_then_clamps_at_max() {
+        let initial = Duration::from_millis(100);
+        let max = Duration::from_millis(800);
+        assert_eq!(compute_backoff(0, initial, max), Duration::from_millis(100));
+        assert_eq!(compute_backoff(1, initial, max), Duration::from_millis(200));
+        assert_eq!(compute_backoff(2, initial, max), Duration::from_millis(400));
+        // attempt 3 would be 800 (exactly at cap)
+        assert_eq!(compute_backoff(3, initial, max), Duration::from_millis(800));
+        // attempt 4+ clamped
+        assert_eq!(compute_backoff(4, initial, max), Duration::from_millis(800));
+        // u8::MAX must not overflow
+        assert_eq!(
+            compute_backoff(255, initial, max),
+            Duration::from_millis(800)
+        );
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn supervise_with_immediate_exit_hits_failed_after_max_restarts() {
+        // Process dies right after spawn AND the health URL never responds
+        // (port 1 is privileged + unbound on CI runners). The loop should
+        // burn through max_restarts attempts then surface FailedAfterMaxRestarts.
+        let sup = Arc::new(Supervisor::new("immediate-exit", immediate_exit_command));
+        let policy = SupervisorPolicy {
+            max_restarts: 2,
+            initial_backoff: Duration::from_millis(20),
+            max_backoff: Duration::from_millis(40),
+            stability_window: Duration::from_secs(60),
+            health_url: "http://127.0.0.1:1/health".to_string(),
+            health_check_interval: Duration::from_millis(50),
+            health_check_timeout: Duration::from_millis(100),
+        };
+        let status = Arc::new(Mutex::new(SupervisorStatus::default()));
+        let (_tx, rx) = tokio::sync::watch::channel(false);
+
+        supervise(sup.clone(), policy, status.clone(), rx).await;
+
+        let final_status = status.lock().unwrap().clone();
+        match final_status {
+            SupervisorStatus::FailedAfterMaxRestarts { attempts, .. } => {
+                assert_eq!(attempts, 2, "should hit exactly max_restarts attempts");
+            }
+            other => panic!("expected FailedAfterMaxRestarts, got {other:?}"),
+        }
+        // Drop should leave no orphan
+        assert!(!sup.is_alive());
+    }
+
+    #[tokio::test(flavor = "current_thread")]
+    async fn supervise_exits_cleanly_on_shutdown_signal() {
+        // sleep 30 keeps the supervisee alive; we signal shutdown before any
+        // restart loop work happens, expecting Stopped status + clean exit.
+        let sup = Arc::new(Supervisor::new("sleep", || sleep_command(30)));
+        let policy = SupervisorPolicy {
+            max_restarts: 3,
+            initial_backoff: Duration::from_millis(10),
+            max_backoff: Duration::from_millis(20),
+            stability_window: Duration::from_secs(60),
+            health_url: "http://127.0.0.1:1/health".to_string(),
+            health_check_interval: Duration::from_millis(50),
+            health_check_timeout: Duration::from_millis(50),
+        };
+        let status = Arc::new(Mutex::new(SupervisorStatus::default()));
+        let (tx, rx) = tokio::sync::watch::channel(false);
+
+        // Signal shutdown almost immediately, before the loop's first health check returns.
+        let shutdown_handle = tokio::spawn(async move {
+            tokio::time::sleep(Duration::from_millis(10)).await;
+            let _ = tx.send(true);
+        });
+
+        supervise(sup.clone(), policy, status.clone(), rx).await;
+        shutdown_handle.await.ok();
+
+        assert_eq!(*status.lock().unwrap(), SupervisorStatus::Stopped);
+        assert!(!sup.is_alive(), "supervisor should have killed the child");
+    }
+
     #[test]
     fn lifecycle_with_sleep_command() {
         let sup = Supervisor::new("sleep", || sleep_command(30));
diff --git a/frontend/src/lib/settings.ts b/frontend/src/lib/settings.ts
index 8c810a8..8e5c325 100644
--- a/frontend/src/lib/settings.ts
+++ b/frontend/src/lib/settings.ts
@@ -19,6 +19,7 @@ export interface Settings {
 	backend: Backend;
 	ollama_base_url: string | null;
 	ollama_model: string | null;
+	llama_server_binary: string | null;
 }
 
 import { invoke } from './tauri';
diff --git a/frontend/src/routes/settings/+page.svelte b/frontend/src/routes/settings/+page.svelte
index c3f8423..b48538a 100644
--- a/frontend/src/routes/settings/+page.svelte
+++ b/frontend/src/routes/settings/+page.svelte
@@ -165,6 +165,15 @@
 					bind:value={settings.ollama_model}
 				/>
 
+				<label for="llama_server_binary" class="text-ocm-muted">llama-server</label>
+				<input
+					id="llama_server_binary"
+					type="text"
+					placeholder="(leave blank: run llama-server yourself)"
+					class="rounded border border-ocm-border bg-ocm-bg px-2 py-1 font-mono"
+					bind:value={settings.llama_server_binary}
+				/>
+
 				<label for="mem0_url" class="text-ocm-muted">Mem0 URL</label>
 				<input
 					id="mem0_url"