OpenCircuitDev · OpenCircuitDev · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/crates/ocm-daemon/src/bootstrap.rs b/crates/ocm-daemon/src/bootstrap.rs
@@ -7,8 +7,9 @@
 //! functionality and logs warnings. The user gets a Tauri tray + window
 //! that reports status; chat requests fail with clear errors.
 
-use crate::settings::Settings;
-use ocm_inference::selector;
+use crate::settings::{Backend, Settings};
+use ocm_inference::ollama::DEFAULT_MODEL as DEFAULT_OLLAMA_MODEL;
+use ocm_inference::selector::{self, BackendKind, DEFAULT_OLLAMA_BASE_URL};
 use std::net::{IpAddr, Ipv4Addr, SocketAddr};
 use std::sync::Arc;
 use std::time::Duration;
@@ -60,6 +61,17 @@ async fn probe_url(base: &str, path: &str) -> bool {
     matches!(client.get(&url).send().await, Ok(r) if r.status().is_success())
 }
 
+/// Resolve `Settings.backend` to a concrete `BackendKind`. `Auto` delegates to
+/// the existing platform detect; explicit settings win over detection.
+fn resolve_backend_kind(setting: Backend) -> BackendKind {
+    match setting {
+        Backend::Auto => selector::detect_backend_kind(),
+        Backend::LlamaCpp => BackendKind::LlamaCpp,
+        Backend::Vllm => BackendKind::Vllm,
+        Backend::Ollama => BackendKind::Ollama,
+    }
+}
+
 /// Construct the full AppState given settings.
 pub fn build_app_state(settings: &Settings) -> ocm_api::AppState {
     let inference_url = settings
@@ -70,8 +82,18 @@ pub fn build_app_state(settings: &Settings) -> ocm_api::AppState {
         .mem0_base_url
         .clone()
         .unwrap_or_else(|| DEFAULT_MEM0_BASE_URL.to_string());
+    let ollama_url = settings
+        .ollama_base_url
+        .clone()
+        .unwrap_or_else(|| DEFAULT_OLLAMA_BASE_URL.to_string());
+    let ollama_model = settings
+        .ollama_model
+        .clone()
+        .unwrap_or_else(|| DEFAULT_OLLAMA_MODEL.to_string());
 
-    let backend = selector::make_backend(inference_url);
+    let kind = resolve_backend_kind(settings.backend);
+    info!(backend = kind.as_str(), "selected inference backend");
+    let backend = selector::make_backend_for_kind(kind, inference_url, ollama_url, ollama_model);
     let memory = Arc::new(ocm_memory::Mem0Client::new(memory_url, "ocm-default"));
     let backend: Arc<dyn ocm_inference::InferenceBackend> = Arc::from(backend);
 
@@ -132,7 +154,7 @@ pub async fn bootstrap(settings: Settings) {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::settings::Theme;
+    use crate::settings::{Backend, Theme};
 
     fn test_settings() -> Settings {
         Settings {
@@ -143,6 +165,9 @@ mod tests {
             inference_base_url: Some("http://127.0.0.1:18080".into()),
             mem0_base_url: Some("http://127.0.0.1:18765".into()),
             retrieval_top_k: Some(3),
+            backend: Backend::Auto,
+            ollama_base_url: None,
+            ollama_model: None,
         }
     }
 
@@ -154,6 +179,7 @@ mod tests {
         let state = build_app_state(&s);
         assert_eq!(state.retrieval_top_k, DEFAULT_RETRIEVAL_TOP_K);
         // backend / memory clients are constructed; concrete name depends on platform
+        // (Auto never picks Ollama — it's opt-in).
         let backend_name = state.backend.name();
         assert!(backend_name == "llama.cpp" || backend_name == "vLLM");
     }
@@ -165,6 +191,59 @@ mod tests {
         assert_eq!(state.retrieval_top_k, 3);
     }
 
+    #[test]
+    fn explicit_ollama_backend_is_wired_through_to_app_state() {
+        // The headline v0.1.1 wiring assertion: a user who selects backend =
+        // "ollama" in settings ends up with an Ollama InferenceBackend on the
+        // live AppState. Verified by the trait's `name()` ("Ollama" — see
+        // ocm_inference::ollama::Ollama::name).
+        let s = Settings {
+            backend: Backend::Ollama,
+            ollama_base_url: Some("http://127.0.0.1:11434".into()),
+            ollama_model: Some("llama3".into()),
+            ..Settings::default()
+        };
+        let state = build_app_state(&s);
+        assert_eq!(state.backend.name(), "Ollama");
+    }
+
+    #[test]
+    fn explicit_ollama_uses_defaults_when_fields_unset() {
+        // backend = "ollama" with no URL/model still produces a constructible
+        // Ollama backend — bootstrap fills in the daemon's native defaults
+        // (port 11434, the existing ollama::DEFAULT_MODEL).
+        let s = Settings {
+            backend: Backend::Ollama,
+            ollama_base_url: None,
+            ollama_model: None,
+            ..Settings::default()
+        };
+        let state = build_app_state(&s);
+        assert_eq!(state.backend.name(), "Ollama");
+    }
+
+    #[test]
+    fn explicit_llamacpp_overrides_platform_detect() {
+        // Users on a CUDA box who explicitly pick llama.cpp must get llama.cpp,
+        // even if auto-detect would have picked vLLM.
+        let s = Settings {
+            backend: Backend::LlamaCpp,
+            ..Settings::default()
+        };
+        let state = build_app_state(&s);
+        assert_eq!(state.backend.name(), "llama.cpp");
+    }
+
+    #[test]
+    fn explicit_vllm_overrides_platform_detect() {
+        let s = Settings {
+            backend: Backend::Vllm,
+            ..Settings::default()
+        };
+        let state = build_app_state(&s);
+        assert_eq!(state.backend.name(), "vLLM");
+    }
+
     #[tokio::test]
     async fn probe_url_returns_false_for_unreachable() {
         // Using port 1 (privileged, almost guaranteed not bound) on localhost

diff --git a/crates/ocm-daemon/src/settings.rs b/crates/ocm-daemon/src/settings.rs
@@ -18,6 +18,22 @@ pub struct Settings {
     /// Number of memories to retrieve per chat turn. Default 5. Set to 0 to disable.
     #[serde(default)]
     pub retrieval_top_k: Option<u32>,
+    /// Which inference backend to use. `Auto` (default) preserves pre-v0.1.1
+    /// behavior: platform-detect picks llama.cpp on Mac/Windows/CPU-Linux and
+    /// vLLM on CUDA Linux. Explicit `LlamaCpp` / `Vllm` / `Ollama` override
+    /// detection — the Ollama branch is the "I have an Ollama daemon already,
+    /// point OCM at it" zero-extra-process path.
+    #[serde(default)]
+    pub backend: Backend,
+    /// Override the Ollama daemon URL. Only consulted when `backend = "ollama"`.
+    /// Default (when unset) is `http://127.0.0.1:11434` — Ollama's installed default.
+    #[serde(default)]
+    pub ollama_base_url: Option<String>,
+    /// Ollama model tag (e.g. `llama3`, `qwen2.5:7b`). REQUIRED by the Ollama
+    /// native API — there is no server-side default. When unset, bootstrap
+    /// falls back to `ocm_inference::ollama::DEFAULT_MODEL`.
+    #[serde(default)]
+    pub ollama_model: Option<String>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Copy)]
@@ -28,6 +44,18 @@ pub enum Theme {
     System,
 }
 
+/// Inference backend selection. Wire-format is lowercase TOML (`backend = "ollama"`).
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Copy, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum Backend {
+    /// Platform-detect (existing pre-v0.1.1 behavior).
+    #[default]
+    Auto,
+    LlamaCpp,
+    Vllm,
+    Ollama,
+}
+
 impl Default for Settings {
     fn default() -> Self {
         Self {
@@ -38,6 +66,9 @@ impl Default for Settings {
             inference_base_url: None,
             mem0_base_url: None,
             retrieval_top_k: None,
+            backend: Backend::Auto,
+            ollama_base_url: None,
+            ollama_model: None,
         }
     }
 }
@@ -83,4 +114,82 @@ mod tests {
         let s = Settings::load_or_default(&path).unwrap();
         assert_eq!(s, Settings::default());
     }
+
+    #[test]
+    fn default_backend_is_auto() {
+        // Auto preserves the platform-detect behavior that shipped before this
+        // field existed; explicit selection (LlamaCpp / Vllm / Ollama) is opt-in.
+        let s = Settings::default();
+        assert_eq!(s.backend, Backend::Auto);
+        assert_eq!(s.ollama_base_url, None);
+        assert_eq!(s.ollama_model, None);
+    }
+
+    #[test]
+    fn backend_serializes_lowercase() {
+        // TOML keys are lowercase by convention; matches Theme's serde shape.
+        let raw = toml::to_string(&Settings {
+            backend: Backend::Ollama,
+            ..Settings::default()
+        })
+        .unwrap();
+        assert!(raw.contains("backend = \"ollama\""));
+    }
+
+    #[test]
+    fn ollama_settings_round_trip_via_toml() {
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("settings.toml");
+        let original = Settings {
+            backend: Backend::Ollama,
+            ollama_base_url: Some("http://127.0.0.1:11434".into()),
+            ollama_model: Some("llama3".into()),
+            ..Settings::default()
+        };
+        original.save(&path).unwrap();
+        let loaded = Settings::load_or_default(&path).unwrap();
+        assert_eq!(loaded, original);
+        assert_eq!(loaded.backend, Backend::Ollama);
+        assert_eq!(
+            loaded.ollama_base_url.as_deref(),
+            Some("http://127.0.0.1:11434")
+        );
+        assert_eq!(loaded.ollama_model.as_deref(), Some("llama3"));
+    }
+
+    #[test]
+    fn legacy_settings_toml_without_backend_field_still_parses() {
+        // Forward-compat: users with a settings.toml written before v0.1.1
+        // (no `backend` key) must still load — the new field defaults to Auto.
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("settings.toml");
+        let legacy = r#"
+api_port = 7300
+mcp_enabled = true
+theme = "system"
+"#;
+        std::fs::write(&path, legacy).unwrap();
+        let loaded = Settings::load_or_default(&path).unwrap();
+        assert_eq!(loaded.backend, Backend::Auto);
+        assert_eq!(loaded.ollama_base_url, None);
+        assert_eq!(loaded.ollama_model, None);
+    }
+
+    #[test]
+    fn all_backend_variants_round_trip() {
+        for kind in [
+            Backend::Auto,
+            Backend::LlamaCpp,
+            Backend::Vllm,
+            Backend::Ollama,
+        ] {
+            let s = Settings {
+                backend: kind,
+                ..Settings::default()
+            };
+            let raw = toml::to_string(&s).unwrap();
+            let back: Settings = toml::from_str(&raw).unwrap();
+            assert_eq!(back.backend, kind, "round-trip failed for {kind:?}");
+        }
+    }
 }