Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
249 changes: 248 additions & 1 deletion crates/ocm-daemon/src/bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
//! that reports status; chat requests fail with clear errors.

use crate::settings::{Backend, Settings};
use crate::supervisor::{self, Supervisor, SupervisorPolicy, SupervisorStatus};
use ocm_inference::ollama::DEFAULT_MODEL as DEFAULT_OLLAMA_MODEL;
use ocm_inference::selector::{self, BackendKind, DEFAULT_OLLAMA_BASE_URL};
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::sync::Arc;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use std::time::Duration;
use tracing::{info, warn};

Expand Down Expand Up @@ -72,6 +74,133 @@ fn resolve_backend_kind(setting: Backend) -> BackendKind {
}
}

/// Build the on-disk path the daemon expects for a model_id, matching the
/// convention used by `ocm_models::downloader::download_model`.
fn model_path_for(models_dir: &Path, model_id: &str) -> PathBuf {
models_dir.join(format!("{model_id}.gguf"))
}

/// Decision: should bootstrap spawn + supervise `llama-server` on this run?
///
/// True iff ALL of:
/// - `Settings.backend = "llamacpp"` (explicit opt-in — Auto preserves
/// pre-v0.1.2 behavior, Ollama supervises itself, Vllm has its own path)
/// - `Settings.llama_server_binary` is `Some` (directive: None preserves
/// "do not spawn anything")
/// - `Settings.model_id` is `Some` AND the GGUF exists under `models_dir`
/// (conservative: don't burn the restart budget spawning a server that
/// has nothing to load — chat will fail loudly via the existing
/// "backend not reachable" message instead)
pub fn should_spawn_llama_supervisor(settings: &Settings, models_dir: &Path) -> bool {
if !matches!(settings.backend, Backend::LlamaCpp) {
return false;
}
if settings.llama_server_binary.is_none() {
return false;
}
match settings.model_id.as_deref() {
Some(id) => model_path_for(models_dir, id).exists(),
None => false,
}
}

/// Parse the port out of an `http://host:port/...` URL. Bare-bones to avoid
/// pulling in `url` for one field. Returns `None` if no port segment is found.
fn parse_port(url: &str) -> Option<u16> {
// The last `:`-delimited segment, up to the first `/`.
let after_colon = url.rsplit(':').next()?;
let port_str = after_colon.split('/').next()?;
port_str.parse().ok()
}

/// Build (but don't start) the llama-server Supervisor + its restart policy
/// for the current settings. Returns `None` if the spawn-gate
/// (`should_spawn_llama_supervisor`) refuses.
///
/// The caller is responsible for spawning `supervisor::supervise(...)` as a
/// background task and holding the resulting handle alive for the daemon's
/// lifetime (`main.rs` does this via Tauri-managed state + tokio task).
pub fn build_llama_supervisor(
settings: &Settings,
models_dir: &Path,
) -> Option<(Arc<Supervisor>, SupervisorPolicy)> {
if !should_spawn_llama_supervisor(settings, models_dir) {
return None;
}
let binary = PathBuf::from(settings.llama_server_binary.as_ref()?);
let model_id = settings.model_id.as_ref()?;
let model_path = model_path_for(models_dir, model_id);

let inference_url = settings
.inference_base_url
.clone()
.unwrap_or_else(|| DEFAULT_INFERENCE_BASE_URL.to_string());
let port = parse_port(&inference_url).unwrap_or(8080);
let health_url = format!("{inference_url}/v1/models");

let sup = Arc::new(supervisor::spawn_llama_server(
&binary,
&model_path,
port,
supervisor::DEFAULT_LLAMA_CTX_LEN,
));
let policy = SupervisorPolicy {
health_url,
..SupervisorPolicy::default()
};
Some((sup, policy))
}

/// State the Tauri layer manages for the supervised subprocess. `status` is
/// shared with the supervise loop (which mutates it).
///
/// `shutdown` is the sender half of the cancellation channel. The supervise
/// loop's `watch::Receiver::changed()` fires either when we explicitly send
/// `true` (via `signal_shutdown`) OR when the Sender is dropped — so dropping
/// `LlamaSupervisorState` during Tauri's teardown is enough to wake the loop,
/// which then calls `Supervisor::stop()` and exits with `Stopped` status.
/// An explicit `signal_shutdown` is here for a future `RunEvent::ExitRequested`
/// hook that wants to wait for the loop to drain.
pub struct LlamaSupervisorState {
pub status: Arc<Mutex<SupervisorStatus>>,
// Held in Mutex<Option<_>> so a future ExitRequested hook can `.take()` it
// to send the signal before tauri tears down the runtime. Field is
// observed via Drop semantics today, not direct reads — keep the
// dead_code allow until the hook lands.
#[allow(dead_code)]
shutdown: Mutex<Option<tokio::sync::watch::Sender<bool>>>,
}

impl LlamaSupervisorState {
pub fn not_spawning() -> Self {
Self {
status: Arc::new(Mutex::new(SupervisorStatus::NotSpawning)),
shutdown: Mutex::new(None),
}
}

pub fn live(
status: Arc<Mutex<SupervisorStatus>>,
shutdown_tx: tokio::sync::watch::Sender<bool>,
) -> Self {
Self {
status,
shutdown: Mutex::new(Some(shutdown_tx)),
}
}

/// Explicitly signal the supervise loop to stop. Currently unused (Drop
/// is sufficient for v0.1.2), kept for the future graceful-exit hook.
#[allow(dead_code)]
pub fn signal_shutdown(&self) {
if let Ok(mut g) = self.shutdown.lock() {
if let Some(tx) = g.take() {
let _ = tx.send(true);
}
}
}
}

/// Construct the full AppState given settings.
pub fn build_app_state(settings: &Settings) -> ocm_api::AppState {
let inference_url = settings
Expand Down Expand Up @@ -168,9 +297,30 @@ mod tests {
backend: Backend::Auto,
ollama_base_url: None,
ollama_model: None,
llama_server_binary: None,
}
}

#[test]
fn parse_port_extracts_from_loopback_url() {
assert_eq!(parse_port("http://127.0.0.1:8080"), Some(8080));
assert_eq!(parse_port("http://127.0.0.1:8080/v1"), Some(8080));
assert_eq!(parse_port("http://127.0.0.1:8000/v1/models"), Some(8000));
}

#[test]
fn parse_port_returns_none_when_no_port() {
assert_eq!(parse_port("http://example.com/v1"), None);
}

#[test]
fn build_llama_supervisor_returns_none_when_spawn_gate_refuses() {
let dir = tempfile::tempdir().unwrap();
// Spawn-gate refuses (backend = Auto, no model file).
let s = Settings::default();
assert!(build_llama_supervisor(&s, dir.path()).is_none());
}

#[test]
fn defaults_apply_when_settings_blank() {
let s = Settings::default();
Expand Down Expand Up @@ -244,6 +394,103 @@ mod tests {
assert_eq!(state.backend.name(), "vLLM");
}

// --- Process supervision decision (Task 2 — Track 1 item 2) ---

fn write_dummy_model(dir: &std::path::Path, model_id: &str) -> std::path::PathBuf {
// Convention matches ocm_models::downloader::download_model:
// dest = dest_dir.join(format!("{}.gguf", entry.id))
let p = dir.join(format!("{model_id}.gguf"));
std::fs::write(&p, b"").unwrap();
p
}

#[test]
fn should_spawn_llama_supervisor_yes_when_llamacpp_plus_binary_plus_model_present() {
let dir = tempfile::tempdir().unwrap();
write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
let s = Settings {
backend: Backend::LlamaCpp,
model_id: Some("qwen2.5-1.5b-q4".into()),
llama_server_binary: Some("/usr/local/bin/llama-server".into()),
..Settings::default()
};
assert!(should_spawn_llama_supervisor(&s, dir.path()));
}

#[test]
fn should_spawn_llama_supervisor_no_when_backend_is_ollama() {
// Headline rule per AGENT_OPERATIONS scope: Ollama supervises itself —
// OCM must NEVER spawn anything when backend=ollama, regardless of the
// other fields.
let dir = tempfile::tempdir().unwrap();
write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
let s = Settings {
backend: Backend::Ollama,
model_id: Some("qwen2.5-1.5b-q4".into()),
llama_server_binary: Some("/usr/local/bin/llama-server".into()),
..Settings::default()
};
assert!(!should_spawn_llama_supervisor(&s, dir.path()));
}

#[test]
fn should_spawn_llama_supervisor_no_when_backend_is_auto() {
// Auto leaves orchestration to the user's existing setup. Don't surprise
// pre-v0.1.2 users who never opted into supervision.
let dir = tempfile::tempdir().unwrap();
write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
let s = Settings {
backend: Backend::Auto,
model_id: Some("qwen2.5-1.5b-q4".into()),
llama_server_binary: Some("/usr/local/bin/llama-server".into()),
..Settings::default()
};
assert!(!should_spawn_llama_supervisor(&s, dir.path()));
}

#[test]
fn should_spawn_llama_supervisor_no_when_binary_unset() {
// Directive: "llama-server binary path from settings, new field,
// None = do-not-spawn preserves current behavior".
let dir = tempfile::tempdir().unwrap();
write_dummy_model(dir.path(), "qwen2.5-1.5b-q4");
let s = Settings {
backend: Backend::LlamaCpp,
model_id: Some("qwen2.5-1.5b-q4".into()),
llama_server_binary: None,
..Settings::default()
};
assert!(!should_spawn_llama_supervisor(&s, dir.path()));
}

#[test]
fn should_spawn_llama_supervisor_no_when_model_file_missing() {
// Conservative: refuse to spawn if there's nothing to load. The user
// hasn't downloaded a model yet — let chat fail with the existing
// "backend not reachable" message rather than burn the restart budget.
let dir = tempfile::tempdir().unwrap();
// intentionally no write_dummy_model
let s = Settings {
backend: Backend::LlamaCpp,
model_id: Some("qwen2.5-1.5b-q4".into()),
llama_server_binary: Some("/usr/local/bin/llama-server".into()),
..Settings::default()
};
assert!(!should_spawn_llama_supervisor(&s, dir.path()));
}

#[test]
fn should_spawn_llama_supervisor_no_when_model_id_unset() {
let dir = tempfile::tempdir().unwrap();
let s = Settings {
backend: Backend::LlamaCpp,
model_id: None,
llama_server_binary: Some("/usr/local/bin/llama-server".into()),
..Settings::default()
};
assert!(!should_spawn_llama_supervisor(&s, dir.path()));
}

#[tokio::test]
async fn probe_url_returns_false_for_unreachable() {
// Using port 1 (privileged, almost guaranteed not bound) on localhost
Expand Down
19 changes: 19 additions & 0 deletions crates/ocm-daemon/src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
//! `retrieval_top_k`, `api_port`, `mcp_enabled`. The frontend surfaces
//! "restart required to apply" so the user isn't surprised.

use crate::bootstrap::LlamaSupervisorState;
use crate::paths::AppPaths;
use crate::settings::Settings;
use crate::supervisor::SupervisorStatus;
use ocm_models::{downloader::download_model, Registry};
use std::path::PathBuf;
use std::sync::Mutex;
Expand Down Expand Up @@ -59,6 +61,23 @@ pub fn list_registry_models() -> Result<Registry, String> {
Registry::load_bundled().map_err(|e| format!("load bundled registry: {e}"))
}

/// Live status of the llama-server supervisor (if any). The frontend polls
/// this to surface "running" / "restarting" / "failed" badges. When the
/// daemon was started without supervision (backend != "llamacpp", or
/// `llama_server_binary` unset, or model missing), the status is
/// `NotSpawning` and the frontend can fall back to the existing
/// "inference URL reachable?" probe.
#[tauri::command]
pub fn get_supervisor_status(
state: State<'_, LlamaSupervisorState>,
) -> Result<SupervisorStatus, String> {
state
.status
.lock()
.map(|s| s.clone())
.map_err(|e| format!("supervisor status state poisoned: {e}"))
}

/// Download a model by registry id into the app data dir under "models/".
/// Returns the absolute path on success. Refuses entries with empty sha256
/// (registry guards unverified weights).
Expand Down
37 changes: 31 additions & 6 deletions crates/ocm-daemon/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,10 @@ mod bootstrap;
mod commands;
mod paths;
mod settings;
// Supervisor's process-spawning helpers (spawn_llama_server, spawn_vllm_server)
// will be activated once OCM ships its own bundled binaries; for now the
// daemon expects external llama-server / vLLM processes to be running and
// connects to them via the URLs in settings (bootstrap.rs handles the connect).
#[allow(dead_code)]
mod supervisor;
mod tray;

use std::sync::Mutex;
use std::sync::{Arc, Mutex};
use tauri::tray::TrayIconBuilder;
use tauri::Manager;
use tracing::info;
Expand All @@ -32,6 +27,7 @@ fn main() -> anyhow::Result<()> {
commands::save_settings,
commands::list_registry_models,
commands::download_model_cmd,
commands::get_supervisor_status,
])
.setup(|app| {
let app_paths = paths::AppPaths::resolve()?;
Expand All @@ -48,6 +44,34 @@ fn main() -> anyhow::Result<()> {
port = loaded_settings.api_port,
"settings loaded"
);

// Build the llama-server supervisor before bootstrap, so the
// existing dependency probe sees the spawned process as up.
// None means "supervision disabled this run" — preserves
// pre-v0.1.2 behavior when the user doesn't opt in.
let supervisor_state =
match bootstrap::build_llama_supervisor(&loaded_settings, &app_paths.models_dir) {
Some((supervised, policy)) => {
let status = Arc::new(Mutex::new(supervisor::SupervisorStatus::default()));
let (shutdown_tx, shutdown_rx) = tokio::sync::watch::channel(false);
let status_for_loop = status.clone();
info!(
name = supervised.name(),
health_url = %policy.health_url,
"starting llama-server supervisor"
);
tauri::async_runtime::spawn(async move {
supervisor::supervise(supervised, policy, status_for_loop, shutdown_rx)
.await;
});
bootstrap::LlamaSupervisorState::live(status, shutdown_tx)
}
None => {
info!("llama-server supervisor not configured for this run");
bootstrap::LlamaSupervisorState::not_spawning()
}
};

// Spawn the bootstrap task — probes external dependencies and
// starts the OCM HTTP API server in the background. Tauri's
// setup() is sync so we hand off to a tokio task; the Tauri main
Expand All @@ -67,6 +91,7 @@ fn main() -> anyhow::Result<()> {

app.manage(app_paths);
app.manage(settings_state);
app.manage(supervisor_state);

let menu = tray::build_tray_menu(app.handle())?;
let _tray = TrayIconBuilder::new()
Expand Down
Loading
Loading