Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion ds4_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -8763,6 +8763,14 @@ static void kv_cache_restore_suppressed_continued(kv_disk_cache *kc,
ds4_kvstore_restore_suppressed_continued(kc, old_tokens, suppressed_tokens);
}

static void kv_cache_evict_failed_disk_entry(server *s, const char *path) {
if (!path) return;
server_log(DS4_LOG_KVCACHE,
"ds4-server: kv cache evicted reason=prefill-failed file=%s", path);
unlink(path);
ds4_session_invalidate(s->session);
}

static void kv_cache_maybe_store_continued(server *s) {
kv_disk_cache *kc = &s->kv;
const ds4_tokens *tokens = ds4_session_tokens(s->session);
Expand Down Expand Up @@ -10029,7 +10037,6 @@ static void generate_job(server *s, job *j) {
const double t0 = now_sec();
uint64_t trace_id = trace_begin(s, j, cached, prompt_tokens, &cache_diag,
cache_source, disk_cached, disk_cache_path);
free(disk_cache_path);
char ctx_span[48];
request_ctx_span(ctx_span, sizeof(ctx_span), cached, prompt_tokens);
server_prefill_progress progress = {
Expand Down Expand Up @@ -10131,6 +10138,8 @@ static void generate_job(server *s, job *j) {
ds4_session_set_display_progress(s->session, NULL, NULL);
kv_cache_restore_suppressed_continued(&s->kv, suppressed_continued_last,
cold_store_len);
kv_cache_evict_failed_disk_entry(s, disk_cache_path);
free(disk_cache_path);
trace_event(s, trace_id, "prefill failed: %s", err);
send_prefill_failure_response(s, j, &progress, ctx_span, req_flags, err);
return;
Expand All @@ -10152,10 +10161,13 @@ static void generate_job(server *s, job *j) {
ds4_session_set_display_progress(s->session, NULL, NULL);
kv_cache_restore_suppressed_continued(&s->kv, suppressed_continued_last,
cold_store_len);
kv_cache_evict_failed_disk_entry(s, disk_cache_path);
free(disk_cache_path);
trace_event(s, trace_id, "prefill failed: %s", err);
send_prefill_failure_response(s, j, &progress, ctx_span, req_flags, err);
return;
}
free(disk_cache_path);
/* Once a non-live request wins, old protocol live bindings are stale. Keep
* a binding only when this request explicitly continued from it. */
if (!responses_live_continuation) responses_live_clear(s);
Expand Down