Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 83 additions & 4 deletions crates/ocm-daemon/src/bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
//! functionality and logs warnings. The user gets a Tauri tray + window
//! that reports status; chat requests fail with clear errors.

use crate::settings::Settings;
use ocm_inference::selector;
use crate::settings::{Backend, Settings};
use ocm_inference::ollama::DEFAULT_MODEL as DEFAULT_OLLAMA_MODEL;
use ocm_inference::selector::{self, BackendKind, DEFAULT_OLLAMA_BASE_URL};
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::sync::Arc;
use std::time::Duration;
Expand Down Expand Up @@ -60,6 +61,17 @@ async fn probe_url(base: &str, path: &str) -> bool {
matches!(client.get(&url).send().await, Ok(r) if r.status().is_success())
}

/// Resolve `Settings.backend` to a concrete `BackendKind`. `Auto` delegates to
/// the existing platform detect; explicit settings win over detection.
fn resolve_backend_kind(setting: Backend) -> BackendKind {
match setting {
Backend::Auto => selector::detect_backend_kind(),
Backend::LlamaCpp => BackendKind::LlamaCpp,
Backend::Vllm => BackendKind::Vllm,
Backend::Ollama => BackendKind::Ollama,
}
}

/// Construct the full AppState given settings.
pub fn build_app_state(settings: &Settings) -> ocm_api::AppState {
let inference_url = settings
Expand All @@ -70,8 +82,18 @@ pub fn build_app_state(settings: &Settings) -> ocm_api::AppState {
.mem0_base_url
.clone()
.unwrap_or_else(|| DEFAULT_MEM0_BASE_URL.to_string());
let ollama_url = settings
.ollama_base_url
.clone()
.unwrap_or_else(|| DEFAULT_OLLAMA_BASE_URL.to_string());
let ollama_model = settings
.ollama_model
.clone()
.unwrap_or_else(|| DEFAULT_OLLAMA_MODEL.to_string());

let backend = selector::make_backend(inference_url);
let kind = resolve_backend_kind(settings.backend);
info!(backend = kind.as_str(), "selected inference backend");
let backend = selector::make_backend_for_kind(kind, inference_url, ollama_url, ollama_model);
let memory = Arc::new(ocm_memory::Mem0Client::new(memory_url, "ocm-default"));
let backend: Arc<dyn ocm_inference::InferenceBackend> = Arc::from(backend);

Expand Down Expand Up @@ -132,7 +154,7 @@ pub async fn bootstrap(settings: Settings) {
#[cfg(test)]
mod tests {
use super::*;
use crate::settings::Theme;
use crate::settings::{Backend, Theme};

fn test_settings() -> Settings {
Settings {
Expand All @@ -143,6 +165,9 @@ mod tests {
inference_base_url: Some("http://127.0.0.1:18080".into()),
mem0_base_url: Some("http://127.0.0.1:18765".into()),
retrieval_top_k: Some(3),
backend: Backend::Auto,
ollama_base_url: None,
ollama_model: None,
}
}

Expand All @@ -154,6 +179,7 @@ mod tests {
let state = build_app_state(&s);
assert_eq!(state.retrieval_top_k, DEFAULT_RETRIEVAL_TOP_K);
// backend / memory clients are constructed; concrete name depends on platform
// (Auto never picks Ollama — it's opt-in).
let backend_name = state.backend.name();
assert!(backend_name == "llama.cpp" || backend_name == "vLLM");
}
Expand All @@ -165,6 +191,59 @@ mod tests {
assert_eq!(state.retrieval_top_k, 3);
}

#[test]
fn explicit_ollama_backend_is_wired_through_to_app_state() {
// The headline v0.1.1 wiring assertion: a user who selects backend =
// "ollama" in settings ends up with an Ollama InferenceBackend on the
// live AppState. Verified by the trait's `name()` ("Ollama" — see
// ocm_inference::ollama::Ollama::name).
let s = Settings {
backend: Backend::Ollama,
ollama_base_url: Some("http://127.0.0.1:11434".into()),
ollama_model: Some("llama3".into()),
..Settings::default()
};
let state = build_app_state(&s);
assert_eq!(state.backend.name(), "Ollama");
}

#[test]
fn explicit_ollama_uses_defaults_when_fields_unset() {
// backend = "ollama" with no URL/model still produces a constructible
// Ollama backend — bootstrap fills in the daemon's native defaults
// (port 11434, the existing ollama::DEFAULT_MODEL).
let s = Settings {
backend: Backend::Ollama,
ollama_base_url: None,
ollama_model: None,
..Settings::default()
};
let state = build_app_state(&s);
assert_eq!(state.backend.name(), "Ollama");
}

#[test]
fn explicit_llamacpp_overrides_platform_detect() {
// Users on a CUDA box who explicitly pick llama.cpp must get llama.cpp,
// even if auto-detect would have picked vLLM.
let s = Settings {
backend: Backend::LlamaCpp,
..Settings::default()
};
let state = build_app_state(&s);
assert_eq!(state.backend.name(), "llama.cpp");
}

#[test]
fn explicit_vllm_overrides_platform_detect() {
let s = Settings {
backend: Backend::Vllm,
..Settings::default()
};
let state = build_app_state(&s);
assert_eq!(state.backend.name(), "vLLM");
}

#[tokio::test]
async fn probe_url_returns_false_for_unreachable() {
// Using port 1 (privileged, almost guaranteed not bound) on localhost
Expand Down
109 changes: 109 additions & 0 deletions crates/ocm-daemon/src/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,22 @@ pub struct Settings {
/// Number of memories to retrieve per chat turn. Default 5. Set to 0 to disable.
#[serde(default)]
pub retrieval_top_k: Option<u32>,
/// Which inference backend to use. `Auto` (default) preserves pre-v0.1.1
/// behavior: platform-detect picks llama.cpp on Mac/Windows/CPU-Linux and
/// vLLM on CUDA Linux. Explicit `LlamaCpp` / `Vllm` / `Ollama` override
/// detection — the Ollama branch is the "I have an Ollama daemon already,
/// point OCM at it" zero-extra-process path.
#[serde(default)]
pub backend: Backend,
/// Override the Ollama daemon URL. Only consulted when `backend = "ollama"`.
/// Default (when unset) is `http://127.0.0.1:11434` — Ollama's installed default.
#[serde(default)]
pub ollama_base_url: Option<String>,
/// Ollama model tag (e.g. `llama3`, `qwen2.5:7b`). REQUIRED by the Ollama
/// native API — there is no server-side default. When unset, bootstrap
/// falls back to `ocm_inference::ollama::DEFAULT_MODEL`.
#[serde(default)]
pub ollama_model: Option<String>,
}

#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Copy)]
Expand All @@ -28,6 +44,18 @@ pub enum Theme {
System,
}

/// Inference backend selection. Wire-format is lowercase TOML (`backend = "ollama"`).
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Copy, Default)]
#[serde(rename_all = "lowercase")]
pub enum Backend {
/// Platform-detect (existing pre-v0.1.1 behavior).
#[default]
Auto,
LlamaCpp,
Vllm,
Ollama,
}

impl Default for Settings {
fn default() -> Self {
Self {
Expand All @@ -38,6 +66,9 @@ impl Default for Settings {
inference_base_url: None,
mem0_base_url: None,
retrieval_top_k: None,
backend: Backend::Auto,
ollama_base_url: None,
ollama_model: None,
}
}
}
Expand Down Expand Up @@ -83,4 +114,82 @@ mod tests {
let s = Settings::load_or_default(&path).unwrap();
assert_eq!(s, Settings::default());
}

#[test]
fn default_backend_is_auto() {
// Auto preserves the platform-detect behavior that shipped before this
// field existed; explicit selection (LlamaCpp / Vllm / Ollama) is opt-in.
let s = Settings::default();
assert_eq!(s.backend, Backend::Auto);
assert_eq!(s.ollama_base_url, None);
assert_eq!(s.ollama_model, None);
}

#[test]
fn backend_serializes_lowercase() {
// TOML keys are lowercase by convention; matches Theme's serde shape.
let raw = toml::to_string(&Settings {
backend: Backend::Ollama,
..Settings::default()
})
.unwrap();
assert!(raw.contains("backend = \"ollama\""));
}

#[test]
fn ollama_settings_round_trip_via_toml() {
let dir = tempdir().unwrap();
let path = dir.path().join("settings.toml");
let original = Settings {
backend: Backend::Ollama,
ollama_base_url: Some("http://127.0.0.1:11434".into()),
ollama_model: Some("llama3".into()),
..Settings::default()
};
original.save(&path).unwrap();
let loaded = Settings::load_or_default(&path).unwrap();
assert_eq!(loaded, original);
assert_eq!(loaded.backend, Backend::Ollama);
assert_eq!(
loaded.ollama_base_url.as_deref(),
Some("http://127.0.0.1:11434")
);
assert_eq!(loaded.ollama_model.as_deref(), Some("llama3"));
}

#[test]
fn legacy_settings_toml_without_backend_field_still_parses() {
// Forward-compat: users with a settings.toml written before v0.1.1
// (no `backend` key) must still load — the new field defaults to Auto.
let dir = tempdir().unwrap();
let path = dir.path().join("settings.toml");
let legacy = r#"
api_port = 7300
mcp_enabled = true
theme = "system"
"#;
std::fs::write(&path, legacy).unwrap();
let loaded = Settings::load_or_default(&path).unwrap();
assert_eq!(loaded.backend, Backend::Auto);
assert_eq!(loaded.ollama_base_url, None);
assert_eq!(loaded.ollama_model, None);
}

#[test]
fn all_backend_variants_round_trip() {
for kind in [
Backend::Auto,
Backend::LlamaCpp,
Backend::Vllm,
Backend::Ollama,
] {
let s = Settings {
backend: kind,
..Settings::default()
};
let raw = toml::to_string(&s).unwrap();
let back: Settings = toml::from_str(&raw).unwrap();
assert_eq!(back.backend, kind, "round-trip failed for {kind:?}");
}
}
}
Loading
Loading