NVIDIA · pimlock · Mar 16, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 14, 2026
@@ -66,8 +66,9 @@ The gateway implements the `Inference` gRPC service defined in `proto/inference.
 1. Validates that both fields are non-empty.
 2. Fetches the named provider record from the store.
 3. Validates the provider by resolving its route (checking that the provider type is supported and has a usable API key).
-4. Builds a managed route spec that stores only `provider_name` and `model_id`. The spec intentionally leaves `base_url`, `api_key`, and `protocols` empty -- these are resolved dynamically at bundle time from the provider record.
-5. Upserts the route with name `inference.local`. Version starts at 1 and increments monotonically on each update.
+4. By default, performs a lightweight provider-shaped probe against the resolved upstream endpoint (for example, a tiny chat/messages request with `max_tokens: 1`) to confirm the endpoint is reachable and accepts the expected auth/request shape. `--no-verify` disables this probe when the endpoint is not up yet.
+5. Builds a managed route spec that stores only `provider_name` and `model_id`. The spec intentionally leaves `base_url`, `api_key`, and `protocols` empty -- these are resolved dynamically at bundle time from the provider record.
+6. Upserts the route with name `inference.local`. Version starts at 1 and increments monotonically on each update.
 
 `GetClusterInference` returns `provider_name`, `model_id`, and `version` for the managed route. Returns `NOT_FOUND` if cluster inference is not configured.
 
@@ -91,7 +92,7 @@ File: `proto/inference.proto`
 
 Key messages:
 
-- `SetClusterInferenceRequest` -- `provider_name` + `model_id`
+- `SetClusterInferenceRequest` -- `provider_name` + `model_id` + optional `no_verify` override, with verification enabled by default
 - `SetClusterInferenceResponse` -- `provider_name` + `model_id` + `version`
 - `GetInferenceBundleResponse` -- `repeated ResolvedRoute routes` + `revision` + `generated_at_ms`
 - `ResolvedRoute` -- `name`, `base_url`, `protocols`, `api_key`, `model_id`, `provider_type`
@@ -296,13 +297,15 @@ The system route is stored as a separate `InferenceRoute` record in the gateway
 
 Cluster inference commands:
 
-- `openshell cluster inference set --provider <name> --model <id>` -- configures user-facing cluster inference
-- `openshell cluster inference set --system --provider <name> --model <id>` -- configures system inference
-- `openshell cluster inference get` -- displays both user and system inference configuration
-- `openshell cluster inference get --system` -- displays only the system inference configuration
+- `openshell inference set --provider <name> --model <id>` -- configures user-facing cluster inference
+- `openshell inference set --system --provider <name> --model <id>` -- configures system inference
+- `openshell inference get` -- displays both user and system inference configuration
+- `openshell inference get --system` -- displays only the system inference configuration
 
 The `--provider` flag references a provider record name (not a provider type). The provider must already exist in the cluster and have a supported inference type (`openai`, `anthropic`, or `nvidia`).
 
+Inference writes verify by default. `--no-verify` is the explicit opt-out for endpoints that are not up yet.
+
 ## Provider Discovery
 
 Files:

@@ -526,9 +526,13 @@ pub async fn ensure_container(
         port_bindings: Some(port_bindings),
         binds: Some(vec![format!("{}:/var/lib/rancher/k3s", volume_name(name))]),
         network_mode: Some(network_name(name)),
-        // Add host.docker.internal mapping for DNS resolution
-        // This allows the entrypoint script to configure CoreDNS to use the host gateway
-        extra_hosts: Some(vec!["host.docker.internal:host-gateway".to_string()]),
+        // Add host gateway aliases for DNS resolution.
+        // This allows both the entrypoint script and the running gateway
+        // process to reach services on the Docker host.
+        extra_hosts: Some(vec![
+            "host.docker.internal:host-gateway".to_string(),
+            "host.openshell.internal:host-gateway".to_string(),
+        ]),
         ..Default::default()
     };
 

@@ -906,9 +906,6 @@ enum InferenceCommands {
         system: bool,
 
         /// Skip endpoint verification before saving the route.
-        ///
-        /// Accepted now so scripts can opt out explicitly ahead of a future
-        /// default switch to verification.
         #[arg(long)]
         no_verify: bool,
     },
@@ -929,9 +926,6 @@ enum InferenceCommands {
         system: bool,
 
         /// Skip endpoint verification before saving the route.
-        ///
-        /// Accepted now so scripts can opt out explicitly ahead of a future
-        /// default switch to verification.
         #[arg(long)]
         no_verify: bool,
     },
@@ -1810,24 +1804,27 @@ async fn main() -> Result<()> {
                     provider,
                     model,
                     system,
-                    no_verify: _,
+                    no_verify,
                 } => {
                     let route_name = if system { "sandbox-system" } else { "" };
-                    run::gateway_inference_set(endpoint, &provider, &model, route_name, &tls)
-                        .await?;
+                    run::gateway_inference_set(
+                        endpoint, &provider, &model, route_name, no_verify, &tls,
+                    )
+                    .await?;
                 }
                 InferenceCommands::Update {
                     provider,
                     model,
                     system,
-                    no_verify: _,
+                    no_verify,
                 } => {
                     let route_name = if system { "sandbox-system" } else { "" };
                     run::gateway_inference_update(
                         endpoint,
                         provider.as_deref(),
                         model.as_deref(),
                         route_name,
+                        no_verify,
                         &tls,
                     )
                     .await?;
@@ -2559,6 +2556,54 @@ mod tests {
         ));
     }
 
+    #[test]
+    fn inference_set_accepts_no_verify_flag() {
+        let cli = Cli::try_parse_from([
+            "openshell",
+            "inference",
+            "set",
+            "--provider",
+            "openai-dev",
+            "--model",
+            "gpt-4.1",
+            "--no-verify",
+        ])
+        .expect("inference set should parse --no-verify");
+
+        assert!(matches!(
+            cli.command,
+            Some(Commands::Inference {
+                command: Some(InferenceCommands::Set {
+                    no_verify: true,
+                    ..
+                })
+            })
+        ));
+    }
+
+    #[test]
+    fn inference_update_accepts_no_verify_flag() {
+        let cli = Cli::try_parse_from([
+            "openshell",
+            "inference",
+            "update",
+            "--provider",
+            "openai-dev",
+            "--no-verify",
+        ])
+        .expect("inference update should parse --no-verify");
+
+        assert!(matches!(
+            cli.command,
+            Some(Commands::Inference {
+                command: Some(InferenceCommands::Update {
+                    no_verify: true,
+                    ..
+                })
+            })
+        ));
+    }
+
     #[test]
     fn completion_script_uses_openshell_command_name() {
         let script = normalize_completion_script(
@@ -2747,52 +2792,4 @@ mod tests {
             other => panic!("expected SshProxy, got: {other:?}"),
         }
     }
-
-    #[test]
-    fn inference_set_accepts_no_verify_flag() {
-        let cli = Cli::try_parse_from([
-            "openshell",
-            "inference",
-            "set",
-            "--provider",
-            "openai-dev",
-            "--model",
-            "gpt-4.1",
-            "--no-verify",
-        ])
-        .expect("inference set should parse --no-verify");
-
-        assert!(matches!(
-            cli.command,
-            Some(Commands::Inference {
-                command: Some(InferenceCommands::Set {
-                    no_verify: true,
-                    ..
-                })
-            })
-        ));
-    }
-
-    #[test]
-    fn inference_update_accepts_no_verify_flag() {
-        let cli = Cli::try_parse_from([
-            "openshell",
-            "inference",
-            "update",
-            "--provider",
-            "openai-dev",
-            "--no-verify",
-        ])
-        .expect("inference update should parse --no-verify");
-
-        assert!(matches!(
-            cli.command,
-            Some(Commands::Inference {
-                command: Some(InferenceCommands::Update {
-                    no_verify: true,
-                    ..
-                })
-            })
-        ));
-    }
 }
@@ -40,7 +40,7 @@ use std::io::{IsTerminal, Write};
 use std::path::{Path, PathBuf};
 use std::process::Command;
 use std::time::{Duration, Instant};
-use tonic::Code;
+use tonic::{Code, Status};
 
 // Re-export SSH functions for backward compatibility
 pub use crate::ssh::{Editor, print_ssh_config};
@@ -3390,17 +3390,38 @@ pub async fn gateway_inference_set(
     provider_name: &str,
     model_id: &str,
     route_name: &str,
+    no_verify: bool,
     tls: &TlsOptions,
 ) -> Result<()> {
+    let progress = if std::io::stdout().is_terminal() {
+        let spinner = ProgressBar::new_spinner();
+        spinner.set_style(
+            ProgressStyle::with_template("{spinner:.cyan} {msg} ({elapsed})")
+                .unwrap_or_else(|_| ProgressStyle::default_spinner()),
+        );
+        spinner.set_message("Configuring inference...");
+        spinner.enable_steady_tick(Duration::from_millis(120));
+        Some(spinner)
+    } else {
+        None
+    };
+
     let mut client = grpc_inference_client(server, tls).await?;
     let response = client
         .set_cluster_inference(SetClusterInferenceRequest {
             provider_name: provider_name.to_string(),
             model_id: model_id.to_string(),
             route_name: route_name.to_string(),
+            verify: false,
+            no_verify,
         })
-        .await
-        .into_diagnostic()?;
+        .await;
+
+    if let Some(progress) = &progress {
+        progress.finish_and_clear();
+    }
+
+    let response = response.map_err(format_inference_status)?;
 
     let configured = response.into_inner();
     let label = if configured.route_name == "sandbox-system" {
@@ -3414,6 +3435,12 @@ pub async fn gateway_inference_set(
     println!("  {} {}", "Provider:".dimmed(), configured.provider_name);
     println!("  {} {}", "Model:".dimmed(), configured.model_id);
     println!("  {} {}", "Version:".dimmed(), configured.version);
+    if configured.validation_performed {
+        println!("  {}", "Validated Endpoints:".dimmed());
+        for endpoint in configured.validated_endpoints {
+            println!("    - {} ({})", endpoint.url, endpoint.protocol);
+        }
+    }
     Ok(())
 }
 
@@ -3422,6 +3449,7 @@ pub async fn gateway_inference_update(
     provider_name: Option<&str>,
     model_id: Option<&str>,
     route_name: &str,
+    no_verify: bool,
     tls: &TlsOptions,
 ) -> Result<()> {
     if provider_name.is_none() && model_id.is_none() {
@@ -3444,14 +3472,34 @@ pub async fn gateway_inference_update(
     let provider = provider_name.unwrap_or(&current.provider_name);
     let model = model_id.unwrap_or(&current.model_id);
 
+    let progress = if std::io::stdout().is_terminal() {
+        let spinner = ProgressBar::new_spinner();
+        spinner.set_style(
+            ProgressStyle::with_template("{spinner:.cyan} {msg} ({elapsed})")
+                .unwrap_or_else(|_| ProgressStyle::default_spinner()),
+        );
+        spinner.set_message("Configuring inference...");
+        spinner.enable_steady_tick(Duration::from_millis(120));
+        Some(spinner)
+    } else {
+        None
+    };
+
     let response = client
         .set_cluster_inference(SetClusterInferenceRequest {
             provider_name: provider.to_string(),
             model_id: model.to_string(),
             route_name: route_name.to_string(),
+            verify: false,
+            no_verify,
         })
-        .await
-        .into_diagnostic()?;
+        .await;
+
+    if let Some(progress) = &progress {
+        progress.finish_and_clear();
+    }
+
+    let response = response.map_err(format_inference_status)?;
 
     let configured = response.into_inner();
     let label = if configured.route_name == "sandbox-system" {
@@ -3465,6 +3513,12 @@ pub async fn gateway_inference_update(
     println!("  {} {}", "Provider:".dimmed(), configured.provider_name);
     println!("  {} {}", "Model:".dimmed(), configured.model_id);
     println!("  {} {}", "Version:".dimmed(), configured.version);
+    if configured.validation_performed {
+        println!("  {}", "Validated Endpoints:".dimmed());
+        for endpoint in configured.validated_endpoints {
+            println!("    - {} ({})", endpoint.url, endpoint.protocol);
+        }
+    }
     Ok(())
 }
 
@@ -3536,6 +3590,16 @@ async fn print_inference_route(
     }
 }
 
+fn format_inference_status(status: Status) -> miette::Report {
+    let message = status.message().trim();
+
+    if message.is_empty() {
+        return miette::miette!("inference configuration failed ({})", status.code());
+    }
+
+    miette::miette!("{message}")
+}
+
 pub fn git_repo_root(local_path: &Path) -> Result<PathBuf> {
     let git_dir = if local_path.is_dir() {
         local_path