Skip to content

Commit bdd8a26

Browse files
committed
feat(runtime): reuse session reservations inside the registry
Extend the runtime reservation contract with an optional owner id so repeated acquire calls can converge on the same backend-owned reservation inside pantograph-runtime-registry instead of depending on adapter-local contains-check guards. Add owner-conflict detection when the same owner attempts to bind to a different runtime, persist the owner id on reservation leases, and update the embedded workflow host to pass the session id as the reservation owner while removing its duplicate-load precheck. Update the runtime-registry README and Milestone 3 implementation plan to record that session-load reuse now belongs to the backend registry boundary. Verification: - cargo test -p pantograph-runtime-registry - cargo test -p pantograph-embedded-runtime keep_alive_session_load_tracks_registry_reservation_lifecycle -- --nocapture - cargo check --manifest-path src-tauri/Cargo.toml
1 parent b9dff0b commit bdd8a26

5 files changed

Lines changed: 133 additions & 11 deletions

File tree

IMPLEMENTATION-PLAN-pantograph-runtime-registry-technical-fit-selection.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,10 @@ runtime callers.
452452
- 2026-04-13: The registry now also exposes an idempotent reservation-release
453453
path so overlapping cleanup or retry flows can remain concurrency-safe
454454
without adapter-local duplicate-release suppression logic.
455+
- 2026-04-13: Session runtime acquire now has a backend-owned owner-key reuse
456+
path so repeated session loads can converge on the same reservation inside
457+
the registry lock instead of depending on adapter-local duplicate-load
458+
guards.
455459

456460
**Verification:**
457461
- `cargo test -p pantograph-runtime-registry`

crates/pantograph-embedded-runtime/src/lib.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -769,17 +769,6 @@ impl EmbeddedWorkflowHost {
769769
return Ok(());
770770
};
771771

772-
{
773-
let reservations = self.session_runtime_reservations.lock().map_err(|_| {
774-
WorkflowServiceError::Internal(
775-
"session runtime reservation lock poisoned".to_string(),
776-
)
777-
})?;
778-
if reservations.contains_key(session_id) {
779-
return Ok(());
780-
}
781-
}
782-
783772
let mode_info = self.gateway.mode_info().await;
784773
let runtime_id = mode_info
785774
.active_runtime
@@ -807,6 +796,7 @@ impl EmbeddedWorkflowHost {
807796
.acquire_reservation(RuntimeReservationRequest {
808797
runtime_id,
809798
workflow_id: workflow_id.to_string(),
799+
reservation_owner_id: Some(session_id.to_string()),
810800
usage_profile: Self::trimmed_optional(usage_profile),
811801
model_id: mode_info.active_model_target.clone(),
812802
pin_runtime: false,

crates/pantograph-runtime-registry/src/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ desktop app wiring.
6767
- Hosts may supply runtime admission budgets and reservation memory
6868
requirements, but the resulting acceptance or rejection decision belongs to
6969
this crate.
70+
- Hosts may also supply a stable reservation owner id when repeated acquire
71+
calls should reuse the same backend-owned reservation instead of duplicating
72+
session-local prechecks in adapters.
7073
- Hosts may also supply a reservation retention hint such as keep-alive intent,
7174
but this crate remains the owner of how that hint affects retention
7275
disposition and future eviction policy.

crates/pantograph-runtime-registry/src/lib.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,15 @@ pub enum RuntimeRegistryError {
4747
#[error("runtime '{0}' cannot accept reservations while stopping or failed")]
4848
ReservationRejected(String),
4949

50+
#[error(
51+
"reservation owner '{owner_id}' is already bound to runtime '{existing_runtime_id}', not '{requested_runtime_id}'"
52+
)]
53+
ReservationOwnerConflict {
54+
owner_id: String,
55+
existing_runtime_id: String,
56+
requested_runtime_id: String,
57+
},
58+
5059
#[error("runtime '{runtime_id}' admission rejected reservation: {failure}")]
5160
AdmissionRejected {
5261
runtime_id: String,
@@ -202,6 +211,23 @@ impl RuntimeRegistry {
202211
.state
203212
.lock()
204213
.expect("runtime registry state lock poisoned");
214+
if let Some(owner_id) = request.reservation_owner_id.as_deref() {
215+
if let Some(existing_reservation) = guard
216+
.reservations
217+
.values()
218+
.find(|reservation| reservation.reservation_owner_id.as_deref() == Some(owner_id))
219+
{
220+
if existing_reservation.runtime_id == runtime_id {
221+
return Ok(existing_reservation.clone().into_lease());
222+
}
223+
224+
return Err(RuntimeRegistryError::ReservationOwnerConflict {
225+
owner_id: owner_id.to_string(),
226+
existing_runtime_id: existing_reservation.runtime_id.clone(),
227+
requested_runtime_id: runtime_id,
228+
});
229+
}
230+
}
205231
let record = guard
206232
.runtimes
207233
.get(&runtime_id)
@@ -231,6 +257,7 @@ impl RuntimeRegistry {
231257
reservation_id,
232258
runtime_id: runtime_id.clone(),
233259
workflow_id: request.workflow_id,
260+
reservation_owner_id: request.reservation_owner_id,
234261
usage_profile: request.usage_profile,
235262
model_id: request.model_id,
236263
pin_runtime: request.pin_runtime,
@@ -704,6 +731,7 @@ mod tests {
704731
.acquire_reservation(RuntimeReservationRequest {
705732
runtime_id: "onnxruntime".to_string(),
706733
workflow_id: "wf-1".to_string(),
734+
reservation_owner_id: None,
707735
usage_profile: Some("audio".to_string()),
708736
model_id: Some("model-a".to_string()),
709737
pin_runtime: true,
@@ -776,6 +804,7 @@ mod tests {
776804
.acquire_reservation(RuntimeReservationRequest {
777805
runtime_id: "llama.cpp".to_string(),
778806
workflow_id: "wf-budget".to_string(),
807+
reservation_owner_id: None,
779808
usage_profile: None,
780809
model_id: None,
781810
pin_runtime: false,
@@ -824,6 +853,7 @@ mod tests {
824853
.acquire_reservation(RuntimeReservationRequest {
825854
runtime_id: "llama.cpp".to_string(),
826855
workflow_id: "wf-stop".to_string(),
856+
reservation_owner_id: None,
827857
usage_profile: None,
828858
model_id: None,
829859
pin_runtime: false,
@@ -957,6 +987,7 @@ mod tests {
957987
.acquire_reservation(RuntimeReservationRequest {
958988
runtime_id: "reserved-ready".to_string(),
959989
workflow_id: "wf-reserved".to_string(),
990+
reservation_owner_id: None,
960991
usage_profile: None,
961992
model_id: Some("model-b".to_string()),
962993
pin_runtime: false,
@@ -1007,6 +1038,7 @@ mod tests {
10071038
.acquire_reservation(RuntimeReservationRequest {
10081039
runtime_id: "shared-runtime".to_string(),
10091040
workflow_id: "wf-keep-alive".to_string(),
1041+
reservation_owner_id: None,
10101042
usage_profile: Some("interactive".to_string()),
10111043
model_id: Some("model-a".to_string()),
10121044
pin_runtime: false,
@@ -1018,6 +1050,7 @@ mod tests {
10181050
.acquire_reservation(RuntimeReservationRequest {
10191051
runtime_id: "shared-runtime".to_string(),
10201052
workflow_id: "wf-ephemeral".to_string(),
1053+
reservation_owner_id: None,
10211054
usage_profile: Some("batch".to_string()),
10221055
model_id: Some("model-a".to_string()),
10231056
pin_runtime: false,
@@ -1090,6 +1123,7 @@ mod tests {
10901123
.acquire_reservation(RuntimeReservationRequest {
10911124
runtime_id: "idempotent-runtime".to_string(),
10921125
workflow_id: "wf-idempotent".to_string(),
1126+
reservation_owner_id: None,
10931127
usage_profile: None,
10941128
model_id: Some("model-a".to_string()),
10951129
pin_runtime: false,
@@ -1112,6 +1146,86 @@ mod tests {
11121146
);
11131147
}
11141148

1149+
#[test]
1150+
fn acquire_reservation_reuses_existing_owner_binding() {
1151+
let registry = RuntimeRegistry::new();
1152+
registry.observe_runtimes(vec![
1153+
RuntimeObservation {
1154+
runtime_id: "owner-runtime-a".to_string(),
1155+
display_name: "owner-runtime-a".to_string(),
1156+
backend_keys: vec!["llama_cpp".to_string()],
1157+
model_id: Some("model-a".to_string()),
1158+
status: RuntimeRegistryStatus::Ready,
1159+
runtime_instance_id: Some("owner-runtime-a-1".to_string()),
1160+
last_error: None,
1161+
},
1162+
RuntimeObservation {
1163+
runtime_id: "owner-runtime-b".to_string(),
1164+
display_name: "owner-runtime-b".to_string(),
1165+
backend_keys: vec!["llama_cpp".to_string()],
1166+
model_id: Some("model-b".to_string()),
1167+
status: RuntimeRegistryStatus::Ready,
1168+
runtime_instance_id: Some("owner-runtime-b-1".to_string()),
1169+
last_error: None,
1170+
},
1171+
]);
1172+
1173+
let first = registry
1174+
.acquire_reservation(RuntimeReservationRequest {
1175+
runtime_id: "owner-runtime-a".to_string(),
1176+
workflow_id: "wf-owner".to_string(),
1177+
reservation_owner_id: Some("session-owner".to_string()),
1178+
usage_profile: Some("interactive".to_string()),
1179+
model_id: Some("model-a".to_string()),
1180+
pin_runtime: false,
1181+
requirements: None,
1182+
retention_hint: RuntimeRetentionHint::KeepAlive,
1183+
})
1184+
.expect("first owner reservation");
1185+
1186+
let reused = registry
1187+
.acquire_reservation(RuntimeReservationRequest {
1188+
runtime_id: "owner-runtime-a".to_string(),
1189+
workflow_id: "wf-owner".to_string(),
1190+
reservation_owner_id: Some("session-owner".to_string()),
1191+
usage_profile: None,
1192+
model_id: None,
1193+
pin_runtime: false,
1194+
requirements: None,
1195+
retention_hint: RuntimeRetentionHint::Ephemeral,
1196+
})
1197+
.expect("second owner reservation should reuse existing lease");
1198+
1199+
assert_eq!(first.reservation_id, reused.reservation_id);
1200+
assert_eq!(
1201+
reused.reservation_owner_id.as_deref(),
1202+
Some("session-owner")
1203+
);
1204+
assert_eq!(registry.snapshot().reservations.len(), 1);
1205+
1206+
let err = registry
1207+
.acquire_reservation(RuntimeReservationRequest {
1208+
runtime_id: "owner-runtime-b".to_string(),
1209+
workflow_id: "wf-owner".to_string(),
1210+
reservation_owner_id: Some("session-owner".to_string()),
1211+
usage_profile: None,
1212+
model_id: None,
1213+
pin_runtime: false,
1214+
requirements: None,
1215+
retention_hint: RuntimeRetentionHint::Ephemeral,
1216+
})
1217+
.expect_err("owner should not bind to another runtime");
1218+
1219+
assert_eq!(
1220+
err,
1221+
RuntimeRegistryError::ReservationOwnerConflict {
1222+
owner_id: "session-owner".to_string(),
1223+
existing_runtime_id: "owner-runtime-a".to_string(),
1224+
requested_runtime_id: "owner-runtime-b".to_string(),
1225+
}
1226+
);
1227+
}
1228+
11151229
#[test]
11161230
fn eviction_candidates_use_deterministic_status_and_age_ordering() {
11171231
let registry = RuntimeRegistry::new();
@@ -1187,6 +1301,7 @@ mod tests {
11871301
.acquire_reservation(RuntimeReservationRequest {
11881302
runtime_id: "llama.cpp".to_string(),
11891303
workflow_id: "wf-1".to_string(),
1304+
reservation_owner_id: None,
11901305
usage_profile: None,
11911306
model_id: Some("model-a".to_string()),
11921307
pin_runtime: false,
@@ -1204,6 +1319,7 @@ mod tests {
12041319
.acquire_reservation(RuntimeReservationRequest {
12051320
runtime_id: "llama.cpp".to_string(),
12061321
workflow_id: "wf-2".to_string(),
1322+
reservation_owner_id: None,
12071323
usage_profile: None,
12081324
model_id: Some("model-b".to_string()),
12091325
pin_runtime: false,
@@ -1253,6 +1369,7 @@ mod tests {
12531369
.acquire_reservation(RuntimeReservationRequest {
12541370
runtime_id: "pytorch".to_string(),
12551371
workflow_id: "wf-ram-1".to_string(),
1372+
reservation_owner_id: None,
12561373
usage_profile: Some("interactive".to_string()),
12571374
model_id: Some("model-ram-a".to_string()),
12581375
pin_runtime: false,
@@ -1270,6 +1387,7 @@ mod tests {
12701387
.acquire_reservation(RuntimeReservationRequest {
12711388
runtime_id: "pytorch".to_string(),
12721389
workflow_id: "wf-ram-2".to_string(),
1390+
reservation_owner_id: None,
12731391
usage_profile: None,
12741392
model_id: Some("model-ram-b".to_string()),
12751393
pin_runtime: false,
@@ -1305,6 +1423,7 @@ mod tests {
13051423
.acquire_reservation(RuntimeReservationRequest {
13061424
runtime_id: "pytorch".to_string(),
13071425
workflow_id: "wf-ram-3".to_string(),
1426+
reservation_owner_id: None,
13081427
usage_profile: None,
13091428
model_id: Some("model-ram-c".to_string()),
13101429
pin_runtime: false,

crates/pantograph-runtime-registry/src/reservation.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ pub struct RuntimeReservationRequest {
1515
pub runtime_id: String,
1616
pub workflow_id: String,
1717
#[serde(default)]
18+
pub reservation_owner_id: Option<String>,
19+
#[serde(default)]
1820
pub usage_profile: Option<String>,
1921
#[serde(default)]
2022
pub model_id: Option<String>,
@@ -33,6 +35,8 @@ pub struct RuntimeReservationLease {
3335
pub runtime_id: String,
3436
pub workflow_id: String,
3537
#[serde(default)]
38+
pub reservation_owner_id: Option<String>,
39+
#[serde(default)]
3640
pub usage_profile: Option<String>,
3741
#[serde(default)]
3842
pub model_id: Option<String>,
@@ -47,6 +51,7 @@ pub(crate) struct RuntimeReservationRecord {
4751
pub reservation_id: u64,
4852
pub runtime_id: String,
4953
pub workflow_id: String,
54+
pub reservation_owner_id: Option<String>,
5055
pub usage_profile: Option<String>,
5156
pub model_id: Option<String>,
5257
pub pin_runtime: bool,
@@ -61,6 +66,7 @@ impl RuntimeReservationRecord {
6166
reservation_id: self.reservation_id,
6267
runtime_id: self.runtime_id,
6368
workflow_id: self.workflow_id,
69+
reservation_owner_id: self.reservation_owner_id,
6470
usage_profile: self.usage_profile,
6571
model_id: self.model_id,
6672
pin_runtime: self.pin_runtime,

0 commit comments

Comments
 (0)