From 505a489d24f6d21ed352c92b25ce595ab05218f8 Mon Sep 17 00:00:00 2001 From: Yuxi Shi Date: Wed, 18 Mar 2026 16:41:54 +0900 Subject: [PATCH 1/4] fix(server): prevent deleting providers still referenced by sandboxes or inference routes Block provider deletion when the provider is still referenced by any sandbox (spec.providers) or inference route (inference.local, sandbox-system). Returns FAILED_PRECONDITION with an actionable error message listing the referencing resources. Closes #157 --- crates/openshell-server/src/grpc.rs | 155 +++++++++++++++++++++++++++- 1 file changed, 153 insertions(+), 2 deletions(-) diff --git a/crates/openshell-server/src/grpc.rs b/crates/openshell-server/src/grpc.rs index a2c6a58f..bc3a2fc2 100644 --- a/crates/openshell-server/src/grpc.rs +++ b/crates/openshell-server/src/grpc.rs @@ -32,7 +32,7 @@ use openshell_core::proto::{ WatchSandboxRequest, open_shell_server::OpenShell, }; use openshell_core::proto::{ - Sandbox, SandboxPhase, SandboxPolicy as ProtoSandboxPolicy, SandboxTemplate, + InferenceRoute, Sandbox, SandboxPhase, SandboxPolicy as ProtoSandboxPolicy, SandboxTemplate, }; use prost::Message; use sha2::{Digest, Sha256}; @@ -3290,6 +3290,62 @@ async fn delete_provider_record( return Err(Status::invalid_argument("name is required")); } + // Check if any sandbox references this provider. + let sandbox_records = store + .list(Sandbox::object_type(), u32::MAX, 0) + .await + .map_err(|e| Status::internal(format!("list sandboxes failed: {e}")))?; + + let mut referencing_sandboxes: Vec = Vec::new(); + for record in sandbox_records { + let sandbox = Sandbox::decode(record.payload.as_slice()) + .map_err(|e| Status::internal(format!("decode sandbox failed: {e}")))?; + if let Some(spec) = &sandbox.spec { + if spec.providers.iter().any(|p| p == name) { + referencing_sandboxes.push(sandbox.name.clone()); + } + } + } + + // Check if any inference route references this provider. + let mut referencing_routes: Vec = Vec::new(); + for route_name in ["inference.local", "sandbox-system"] { + if let Some(route) = store + .get_message_by_name::(route_name) + .await + .map_err(|e| Status::internal(format!("fetch inference route failed: {e}")))? + { + if route + .config + .as_ref() + .is_some_and(|c| c.provider_name == name) + { + referencing_routes.push(route_name.to_string()); + } + } + } + + if !referencing_sandboxes.is_empty() || !referencing_routes.is_empty() { + let mut details = Vec::new(); + if !referencing_sandboxes.is_empty() { + details.push(format!( + "sandbox(es): {}", + referencing_sandboxes.join(", ") + )); + } + if !referencing_routes.is_empty() { + details.push(format!( + "inference route(s): {}", + referencing_routes.join(", ") + )); + } + return Err(Status::failed_precondition(format!( + "cannot delete provider '{}': still referenced by {}", + name, + details.join(" and ") + ))); + } + store .delete_by_name(Provider::object_type(), name) .await @@ -3326,7 +3382,9 @@ mod tests { validate_provider_fields, validate_sandbox_spec, }; use crate::persistence::Store; - use openshell_core::proto::{Provider, SandboxSpec, SandboxTemplate}; + use openshell_core::proto::{ + ClusterInferenceConfig, InferenceRoute, Provider, Sandbox, SandboxSpec, SandboxTemplate, + }; use std::collections::HashMap; use tonic::Code; @@ -4624,4 +4682,97 @@ mod tests { assert_eq!(err.code(), Code::InvalidArgument); assert!(err.message().contains("value")); } + + #[tokio::test] + async fn delete_provider_blocked_by_sandbox_reference() { + let store = Store::connect("sqlite::memory:?cache=shared") + .await + .unwrap(); + + let provider = create_provider_record(&store, provider_with_values("my-provider", "nvidia")) + .await + .unwrap(); + + // Create a sandbox that references the provider. + let sandbox = Sandbox { + id: uuid::Uuid::new_v4().to_string(), + name: "test-sandbox".to_string(), + spec: Some(SandboxSpec { + providers: vec!["my-provider".to_string()], + ..SandboxSpec::default() + }), + ..Sandbox::default() + }; + store.put_message(&sandbox).await.unwrap(); + + // Deleting the referenced provider should fail. + let err = delete_provider_record(&store, &provider.name) + .await + .unwrap_err(); + assert_eq!(err.code(), Code::FailedPrecondition); + assert!(err.message().contains("test-sandbox")); + assert!(err.message().contains("my-provider")); + } + + #[tokio::test] + async fn delete_provider_blocked_by_inference_route() { + let store = Store::connect("sqlite::memory:?cache=shared") + .await + .unwrap(); + + let provider = + create_provider_record(&store, provider_with_values("inference-provider", "nvidia")) + .await + .unwrap(); + + // Create an inference route that references the provider. + let route = InferenceRoute { + id: uuid::Uuid::new_v4().to_string(), + name: "inference.local".to_string(), + config: Some(ClusterInferenceConfig { + provider_name: "inference-provider".to_string(), + ..ClusterInferenceConfig::default() + }), + version: 1, + }; + store.put_message(&route).await.unwrap(); + + // Deleting the referenced provider should fail. + let err = delete_provider_record(&store, &provider.name) + .await + .unwrap_err(); + assert_eq!(err.code(), Code::FailedPrecondition); + assert!(err.message().contains("inference.local")); + assert!(err.message().contains("inference-provider")); + } + + #[tokio::test] + async fn delete_provider_succeeds_when_unreferenced() { + let store = Store::connect("sqlite::memory:?cache=shared") + .await + .unwrap(); + + let provider = + create_provider_record(&store, provider_with_values("lonely-provider", "nvidia")) + .await + .unwrap(); + + // Create a sandbox that references a DIFFERENT provider name. + let sandbox = Sandbox { + id: uuid::Uuid::new_v4().to_string(), + name: "other-sandbox".to_string(), + spec: Some(SandboxSpec { + providers: vec!["other-provider".to_string()], + ..SandboxSpec::default() + }), + ..Sandbox::default() + }; + store.put_message(&sandbox).await.unwrap(); + + // Deleting the unreferenced provider should succeed. + let deleted = delete_provider_record(&store, &provider.name) + .await + .unwrap(); + assert!(deleted); + } } From 756c3a3e4f130ad68ae496a4fc4cdd77cb0d48ff Mon Sep 17 00:00:00 2001 From: Yuxi Shi Date: Wed, 18 Mar 2026 16:46:37 +0900 Subject: [PATCH 2/4] style(server): fix cargo fmt formatting in delete_provider_record --- crates/openshell-server/src/grpc.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/openshell-server/src/grpc.rs b/crates/openshell-server/src/grpc.rs index bc3a2fc2..5b6efc8b 100644 --- a/crates/openshell-server/src/grpc.rs +++ b/crates/openshell-server/src/grpc.rs @@ -3328,10 +3328,7 @@ async fn delete_provider_record( if !referencing_sandboxes.is_empty() || !referencing_routes.is_empty() { let mut details = Vec::new(); if !referencing_sandboxes.is_empty() { - details.push(format!( - "sandbox(es): {}", - referencing_sandboxes.join(", ") - )); + details.push(format!("sandbox(es): {}", referencing_sandboxes.join(", "))); } if !referencing_routes.is_empty() { details.push(format!( @@ -4689,9 +4686,10 @@ mod tests { .await .unwrap(); - let provider = create_provider_record(&store, provider_with_values("my-provider", "nvidia")) - .await - .unwrap(); + let provider = + create_provider_record(&store, provider_with_values("my-provider", "nvidia")) + .await + .unwrap(); // Create a sandbox that references the provider. let sandbox = Sandbox { From 24fc37c44389238ab9fe0378264017595b464bce Mon Sep 17 00:00:00 2001 From: Yuxi Shi Date: Wed, 18 Mar 2026 16:54:49 +0900 Subject: [PATCH 3/4] fix(server): skip deleting sandboxes when checking provider references Sandboxes in the Deleting phase still exist in the store but should not block provider deletion since they are already being torn down. --- crates/openshell-server/src/grpc.rs | 38 ++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/crates/openshell-server/src/grpc.rs b/crates/openshell-server/src/grpc.rs index 5b6efc8b..52740da1 100644 --- a/crates/openshell-server/src/grpc.rs +++ b/crates/openshell-server/src/grpc.rs @@ -3300,6 +3300,10 @@ async fn delete_provider_record( for record in sandbox_records { let sandbox = Sandbox::decode(record.payload.as_slice()) .map_err(|e| Status::internal(format!("decode sandbox failed: {e}")))?; + // Skip sandboxes that are already being deleted. + if sandbox.phase == SandboxPhase::Deleting as i32 { + continue; + } if let Some(spec) = &sandbox.spec { if spec.providers.iter().any(|p| p == name) { referencing_sandboxes.push(sandbox.name.clone()); @@ -3380,7 +3384,8 @@ mod tests { }; use crate::persistence::Store; use openshell_core::proto::{ - ClusterInferenceConfig, InferenceRoute, Provider, Sandbox, SandboxSpec, SandboxTemplate, + ClusterInferenceConfig, InferenceRoute, Provider, Sandbox, SandboxPhase, SandboxSpec, + SandboxTemplate, }; use std::collections::HashMap; use tonic::Code; @@ -4773,4 +4778,35 @@ mod tests { .unwrap(); assert!(deleted); } + + #[tokio::test] + async fn delete_provider_ignores_deleting_sandboxes() { + let store = Store::connect("sqlite::memory:?cache=shared") + .await + .unwrap(); + + let provider = + create_provider_record(&store, provider_with_values("my-provider", "nvidia")) + .await + .unwrap(); + + // Create a sandbox that references the provider but is already being deleted. + let sandbox = Sandbox { + id: uuid::Uuid::new_v4().to_string(), + name: "dying-sandbox".to_string(), + phase: SandboxPhase::Deleting as i32, + spec: Some(SandboxSpec { + providers: vec!["my-provider".to_string()], + ..SandboxSpec::default() + }), + ..Sandbox::default() + }; + store.put_message(&sandbox).await.unwrap(); + + // Deleting sandboxes should not block provider deletion. + let deleted = delete_provider_record(&store, &provider.name) + .await + .unwrap(); + assert!(deleted); + } } From 51c22ccebddf27e56bc463fec7ca7801ecfbf286 Mon Sep 17 00:00:00 2001 From: Yuxi Shi Date: Wed, 18 Mar 2026 17:01:31 +0900 Subject: [PATCH 4/4] fix(server): early-out for non-existent provider and add next-action hint Skip the full sandbox/route scan when the provider does not exist, avoiding unnecessary I/O. Also append an actionable hint to the FAILED_PRECONDITION error message per the issue acceptance criteria. --- crates/openshell-server/src/grpc.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/crates/openshell-server/src/grpc.rs b/crates/openshell-server/src/grpc.rs index 52740da1..268a692b 100644 --- a/crates/openshell-server/src/grpc.rs +++ b/crates/openshell-server/src/grpc.rs @@ -3290,6 +3290,15 @@ async fn delete_provider_record( return Err(Status::invalid_argument("name is required")); } + // Early-out: if the provider doesn't exist, nothing to delete. + let exists = store + .get_by_name(Provider::object_type(), name) + .await + .map_err(|e| Status::internal(format!("check provider failed: {e}")))?; + if exists.is_none() { + return Ok(false); + } + // Check if any sandbox references this provider. let sandbox_records = store .list(Sandbox::object_type(), u32::MAX, 0) @@ -3341,7 +3350,8 @@ async fn delete_provider_record( )); } return Err(Status::failed_precondition(format!( - "cannot delete provider '{}': still referenced by {}", + "cannot delete provider '{}': still referenced by {}. \ + Remove or update these references before deleting the provider.", name, details.join(" and ") )));