From 38416b31d68cebbb06eea38ecac11a56c36f6df0 Mon Sep 17 00:00:00 2001 From: Hunter B Date: Mon, 22 Jun 2026 21:26:07 -0700 Subject: [PATCH 1/3] WIP: route inline reasoning streams Add provider-scoped reasoning_stream_style handling for chat-completion streams so OpenAI-compatible routes can choose separate_field, inline_tags, or none without a new provider enum variant. Inline blocks are streamed into Thinking content blocks, including tags split across SSE chunks, while none preserves raw visible text and existing separate-field reasoning keeps its default behavior. Refs #3222. --- config.example.toml | 1 + crates/tui/src/client.rs | 6 + crates/tui/src/client/chat.rs | 485 +++++++++++++++++++++++++++++---- crates/tui/src/config.rs | 5 + crates/tui/src/config/tests.rs | 40 +++ docs/CONFIGURATION.md | 1 + 6 files changed, 486 insertions(+), 52 deletions(-) diff --git a/config.example.toml b/config.example.toml index fb9bf39b6..53bb07554 100644 --- a/config.example.toml +++ b/config.example.toml @@ -319,6 +319,7 @@ max_subagents = 10 # optional (1-20) # model = "deepseek-ai/DeepSeek-V4-Pro" # http_headers = { "X-Model-Provider-Id" = "your-model-provider" } # optional custom request headers # path_suffix = "/chat/completions" # override the API path; skips /v1 versioning when set +# reasoning_stream_style = "inline_tags" # route ... content into Thinking cells # [providers.deepseek.auth] # provider-scoped auth source metadata; command execution lands in a follow-up slice # source = "command" # command = ["secret-tool", "lookup", "service", "codewhale-deepseek"] diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index 7608ea50e..96f3981dc 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -161,6 +161,7 @@ pub struct DeepSeekClient { connection_health: Arc>, rate_limiter: Arc>, path_suffix: Option, + pub(super) reasoning_stream_style: Option, pub(super) stream_idle_timeout: Duration, } @@ -329,6 +330,7 @@ impl Clone for DeepSeekClient { connection_health: self.connection_health.clone(), rate_limiter: self.rate_limiter.clone(), path_suffix: self.path_suffix.clone(), + reasoning_stream_style: self.reasoning_stream_style.clone(), stream_idle_timeout: self.stream_idle_timeout, } } @@ -648,6 +650,9 @@ impl DeepSeekClient { let path_suffix = config .provider_config_for(api_provider) .and_then(|p| p.path_suffix.clone()); + let reasoning_stream_style = config + .provider_config_for(api_provider) + .and_then(|p| p.reasoning_stream_style.clone()); logging::info(format!("API provider: {}", api_provider.as_str())); logging::info(format!( @@ -691,6 +696,7 @@ impl DeepSeekClient { connection_health: Arc::new(AsyncMutex::new(ConnectionHealth::default())), rate_limiter: Arc::new(AsyncMutex::new(TokenBucket::from_env())), path_suffix, + reasoning_stream_style, stream_idle_timeout, }) } diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index 073793d5b..7926120e0 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -381,6 +381,7 @@ impl DeepSeekClient { let response_headers = format_stream_headers(response.headers()); let byte_stream = response.bytes_stream(); let stream_idle_timeout = self.stream_idle_timeout; + let configured_reasoning_stream_style = self.reasoning_stream_style.clone(); let stream = async_stream::stream! { use futures_util::StreamExt; @@ -411,7 +412,12 @@ impl DeepSeekClient { let mut thinking_started = false; let mut tool_indices: std::collections::HashMap = std::collections::HashMap::new(); let mut reasoning_detail_buffers: std::collections::HashMap = std::collections::HashMap::new(); - let is_reasoning_model = is_reasoning_model_for_stream(api_provider, &model); + let mut inline_reasoning_tags = InlineReasoningTagState::default(); + let reasoning_stream_style = reasoning_stream_style_for_stream( + api_provider, + &model, + configured_reasoning_stream_style.as_deref(), + ); let mut byte_stream = std::pin::pin!(byte_stream); let idle = stream_idle_timeout; @@ -500,7 +506,8 @@ impl DeepSeekClient { &mut thinking_started, &mut tool_indices, &mut reasoning_detail_buffers, - is_reasoning_model, + &mut inline_reasoning_tags, + reasoning_stream_style, ) { SseDataFrame::Done => break 'stream, SseDataFrame::Events(events) => { @@ -2133,6 +2140,39 @@ fn is_reasoning_model_for_stream(provider: ApiProvider, model: &str) -> bool { provider_accepts_reasoning_content(provider) && model_supports_reasoning(model) } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum ReasoningStreamStyle { + SeparateField, + InlineTags, + None, +} + +fn reasoning_stream_style_for_stream( + provider: ApiProvider, + model: &str, + configured: Option<&str>, +) -> ReasoningStreamStyle { + if let Some(style) = configured.and_then(parse_reasoning_stream_style) { + return style; + } + if is_reasoning_model_for_stream(provider, model) { + ReasoningStreamStyle::SeparateField + } else { + ReasoningStreamStyle::None + } +} + +fn parse_reasoning_stream_style(value: &str) -> Option { + match value.trim().to_ascii_lowercase().replace('-', "_").as_str() { + "separate_field" | "separate" | "field" => Some(ReasoningStreamStyle::SeparateField), + "inline_tags" | "inline" | "think_tags" | "thinking_tags" => { + Some(ReasoningStreamStyle::InlineTags) + } + "none" | "text" | "disabled" | "off" => Some(ReasoningStreamStyle::None), + _ => None, + } +} + /// Providers whose chat-completions API both returns and accepts a dedicated /// `reasoning_content` field on assistant messages. /// @@ -2415,6 +2455,163 @@ fn build_stream_events(response: &MessageResponse) -> Vec { events } +#[derive(Debug, Default)] +struct InlineReasoningTagState { + inside_think: bool, + pending: String, +} + +#[derive(Debug, PartialEq, Eq)] +enum ReasoningSegment { + Text(String), + Thinking(String), +} + +fn inline_reasoning_segments( + content: &str, + state: &mut InlineReasoningTagState, + flush: bool, +) -> Vec { + state.pending.push_str(content); + let mut segments = Vec::new(); + + loop { + if state.pending.is_empty() { + break; + } + + if state.inside_think { + if let Some(close_at) = state.pending.find("") { + push_reasoning_segment( + &mut segments, + ReasoningSegment::Thinking(state.pending[..close_at].to_string()), + ); + state.pending.drain(..close_at + "".len()); + state.inside_think = false; + continue; + } + + let hold_len = if flush { + 0 + } else { + trailing_tag_prefix_len(&state.pending, "") + }; + let emit_len = state.pending.len().saturating_sub(hold_len); + if emit_len > 0 { + push_reasoning_segment( + &mut segments, + ReasoningSegment::Thinking(state.pending[..emit_len].to_string()), + ); + state.pending.drain(..emit_len); + } + break; + } + + if let Some(open_at) = state.pending.find("") { + push_reasoning_segment( + &mut segments, + ReasoningSegment::Text(state.pending[..open_at].to_string()), + ); + state.pending.drain(..open_at + "".len()); + state.inside_think = true; + continue; + } + + let hold_len = if flush { + 0 + } else { + trailing_tag_prefix_len(&state.pending, "") + }; + let emit_len = state.pending.len().saturating_sub(hold_len); + if emit_len > 0 { + push_reasoning_segment( + &mut segments, + ReasoningSegment::Text(state.pending[..emit_len].to_string()), + ); + state.pending.drain(..emit_len); + } + break; + } + + segments +} + +fn trailing_tag_prefix_len(content: &str, tag: &str) -> usize { + let max_len = tag.len().min(content.len()); + for len in (1..=max_len).rev() { + let start = content.len() - len; + if content.is_char_boundary(start) && tag.starts_with(&content[start..]) { + return len; + } + } + 0 +} + +fn push_reasoning_segment(segments: &mut Vec, segment: ReasoningSegment) { + match &segment { + ReasoningSegment::Text(text) | ReasoningSegment::Thinking(text) if text.is_empty() => {} + _ => segments.push(segment), + } +} + +fn push_text_delta( + events: &mut Vec, + content_index: &mut u32, + text_started: &mut bool, + thinking_started: &mut bool, + text: String, +) { + if *thinking_started { + events.push(StreamEvent::ContentBlockStop { + index: *content_index, + }); + *content_index += 1; + *thinking_started = false; + } + if !*text_started { + events.push(StreamEvent::ContentBlockStart { + index: *content_index, + content_block: ContentBlockStart::Text { + text: String::new(), + }, + }); + *text_started = true; + } + events.push(StreamEvent::ContentBlockDelta { + index: *content_index, + delta: Delta::TextDelta { text }, + }); +} + +fn push_thinking_delta( + events: &mut Vec, + content_index: &mut u32, + text_started: &mut bool, + thinking_started: &mut bool, + thinking: String, +) { + if *text_started { + events.push(StreamEvent::ContentBlockStop { + index: *content_index, + }); + *content_index += 1; + *text_started = false; + } + if !*thinking_started { + events.push(StreamEvent::ContentBlockStart { + index: *content_index, + content_block: ContentBlockStart::Thinking { + thinking: String::new(), + }, + }); + *thinking_started = true; + } + events.push(StreamEvent::ContentBlockDelta { + index: *content_index, + delta: Delta::ThinkingDelta { thinking }, + }); +} + // === SSE Chunk Parser === enum SseDataFrame { @@ -2429,7 +2626,8 @@ fn parse_sse_data_frame( thinking_started: &mut bool, tool_indices: &mut std::collections::HashMap, reasoning_detail_buffers: &mut std::collections::HashMap, - is_reasoning_model: bool, + inline_reasoning_tags: &mut InlineReasoningTagState, + reasoning_stream_style: ReasoningStreamStyle, ) -> SseDataFrame { if data.trim() == "[DONE]" { return SseDataFrame::Done; @@ -2437,14 +2635,15 @@ fn parse_sse_data_frame( let events = serde_json::from_str::(data).map_or_else( |_| Vec::new(), |chunk_json| { - parse_sse_chunk( + parse_sse_chunk_with_reasoning_style( &chunk_json, content_index, text_started, thinking_started, tool_indices, reasoning_detail_buffers, - is_reasoning_model, + inline_reasoning_tags, + reasoning_stream_style, ) }, ); @@ -2461,6 +2660,34 @@ pub(super) fn parse_sse_chunk( tool_indices: &mut std::collections::HashMap, reasoning_detail_buffers: &mut std::collections::HashMap, is_reasoning_model: bool, +) -> Vec { + let mut inline_reasoning_tags = InlineReasoningTagState::default(); + let reasoning_stream_style = if is_reasoning_model { + ReasoningStreamStyle::SeparateField + } else { + ReasoningStreamStyle::None + }; + parse_sse_chunk_with_reasoning_style( + chunk, + content_index, + text_started, + thinking_started, + tool_indices, + reasoning_detail_buffers, + &mut inline_reasoning_tags, + reasoning_stream_style, + ) +} + +fn parse_sse_chunk_with_reasoning_style( + chunk: &Value, + content_index: &mut u32, + text_started: &mut bool, + thinking_started: &mut bool, + tool_indices: &mut std::collections::HashMap, + reasoning_detail_buffers: &mut std::collections::HashMap, + inline_reasoning_tags: &mut InlineReasoningTagState, + reasoning_stream_style: ReasoningStreamStyle, ) -> Vec { let mut events = Vec::new(); @@ -2511,57 +2738,63 @@ pub(super) fn parse_sse_chunk( .map(str::to_string); // Handle reasoning_content / reasoning thinking deltas. - if is_reasoning_model && let Some(reasoning) = reasoning_text.as_deref() { - if !*thinking_started { - events.push(StreamEvent::ContentBlockStart { - index: *content_index, - content_block: ContentBlockStart::Thinking { - thinking: String::new(), - }, - }); - *thinking_started = true; - } - events.push(StreamEvent::ContentBlockDelta { - index: *content_index, - delta: Delta::ThinkingDelta { - thinking: reasoning.to_string(), - }, - }); + if reasoning_stream_style != ReasoningStreamStyle::None + && let Some(reasoning) = reasoning_text.as_deref() + { + push_thinking_delta( + &mut events, + content_index, + text_started, + thinking_started, + reasoning.to_string(), + ); } // Generic OpenAI-compatible proxies sometimes stream answer text - // in `reasoning_content`. If this provider is not one whose - // reasoning-content semantics we support, render that field as - // normal text when no `content` delta is present. - let effective_content = match content_text { - Some(content) => Some(content), - None if !is_reasoning_model => reasoning_text, - None => None, - }; - - // Handle regular content - if let Some(content) = effective_content { - // Close thinking block if transitioning to text - if *thinking_started { - events.push(StreamEvent::ContentBlockStop { - index: *content_index, - }); - *content_index += 1; - *thinking_started = false; + // in `reasoning_content`. If this route is configured with no + // reasoning semantics, render that field as normal text when no + // `content` delta is present. + match (content_text, reasoning_stream_style) { + (Some(content), ReasoningStreamStyle::InlineTags) => { + for segment in inline_reasoning_segments(&content, inline_reasoning_tags, false) + { + match segment { + ReasoningSegment::Text(text) => push_text_delta( + &mut events, + content_index, + text_started, + thinking_started, + text, + ), + ReasoningSegment::Thinking(thinking) => push_thinking_delta( + &mut events, + content_index, + text_started, + thinking_started, + thinking, + ), + } + } } - if !*text_started { - events.push(StreamEvent::ContentBlockStart { - index: *content_index, - content_block: ContentBlockStart::Text { - text: String::new(), - }, - }); - *text_started = true; + (Some(content), _) => push_text_delta( + &mut events, + content_index, + text_started, + thinking_started, + content, + ), + (None, ReasoningStreamStyle::None) => { + if let Some(content) = reasoning_text { + push_text_delta( + &mut events, + content_index, + text_started, + thinking_started, + content, + ); + } } - events.push(StreamEvent::ContentBlockDelta { - index: *content_index, - delta: Delta::TextDelta { text: content }, - }); + (None, _) => {} } // Handle tool calls @@ -2654,6 +2887,26 @@ pub(super) fn parse_sse_chunk( // Handle finish reason if let Some(reason) = finish_reason { + if reasoning_stream_style == ReasoningStreamStyle::InlineTags { + for segment in inline_reasoning_segments("", inline_reasoning_tags, true) { + match segment { + ReasoningSegment::Text(text) => push_text_delta( + &mut events, + content_index, + text_started, + thinking_started, + text, + ), + ReasoningSegment::Thinking(thinking) => push_thinking_delta( + &mut events, + content_index, + text_started, + thinking_started, + thinking, + ), + } + } + } // Close any open blocks if *text_started { events.push(StreamEvent::ContentBlockStop { @@ -3002,6 +3255,60 @@ mod stream_decoder_tests { ) } + fn decode_chunks_with_style( + chunks: &[&str], + reasoning_stream_style: ReasoningStreamStyle, + ) -> Vec { + let mut content_index = 0u32; + let mut text_started = false; + let mut thinking_started = false; + let mut tool_indices = std::collections::HashMap::new(); + let mut reasoning_detail_buffers = std::collections::HashMap::new(); + let mut inline_reasoning_tags = InlineReasoningTagState::default(); + let mut events = Vec::new(); + + for chunk in chunks { + let value: Value = serde_json::from_str(chunk).expect("valid SSE JSON"); + events.extend(parse_sse_chunk_with_reasoning_style( + &value, + &mut content_index, + &mut text_started, + &mut thinking_started, + &mut tool_indices, + &mut reasoning_detail_buffers, + &mut inline_reasoning_tags, + reasoning_stream_style, + )); + } + events + } + + fn text_delta_text(events: &[StreamEvent]) -> String { + events + .iter() + .filter_map(|event| match event { + StreamEvent::ContentBlockDelta { + delta: Delta::TextDelta { text }, + .. + } => Some(text.as_str()), + _ => None, + }) + .collect() + } + + fn thinking_delta_text(events: &[StreamEvent]) -> String { + events + .iter() + .filter_map(|event| match event { + StreamEvent::ContentBlockDelta { + delta: Delta::ThinkingDelta { thinking }, + .. + } => Some(thinking.as_str()), + _ => None, + }) + .collect() + } + #[test] fn decoder_emits_text_delta_for_content_chunk() { // The "happy" first chunk: a normal content delta. The engine treats @@ -3278,6 +3585,78 @@ mod stream_decoder_tests { ); } + #[test] + fn reasoning_style_separate_field_routes_reasoning_to_thinking() { + let events = decode_chunks_with_style( + &[ + r#"{"choices":[{"delta":{"reasoning_content":"private plan"}}]}"#, + r#"{"choices":[{"delta":{"content":"Public answer."}}]}"#, + ], + ReasoningStreamStyle::SeparateField, + ); + + assert_eq!(thinking_delta_text(&events), "private plan"); + assert_eq!(text_delta_text(&events), "Public answer."); + } + + #[test] + fn reasoning_style_inline_tags_routes_think_blocks_to_thinking() { + let events = decode_chunks_with_style( + &[ + r#"{"choices":[{"delta":{"content":"Before private plan after."}}]}"#, + ], + ReasoningStreamStyle::InlineTags, + ); + + assert_eq!(thinking_delta_text(&events), "private plan"); + assert_eq!(text_delta_text(&events), "Before after."); + assert!( + !text_delta_text(&events).contains(""), + "inline reasoning tags must not leak into visible text: {events:?}" + ); + } + + #[test] + fn reasoning_style_none_keeps_inline_tags_visible_text() { + let events = decode_chunks_with_style( + &[r#"{"choices":[{"delta":{"content":"visible answer"}}]}"#], + ReasoningStreamStyle::None, + ); + + assert_eq!(thinking_delta_text(&events), ""); + assert_eq!(text_delta_text(&events), "visible answer"); + } + + #[test] + fn configured_reasoning_style_overrides_route_default() { + assert_eq!( + reasoning_stream_style_for_stream(ApiProvider::Openai, "custom-minimax", None), + ReasoningStreamStyle::None + ); + assert_eq!( + reasoning_stream_style_for_stream( + ApiProvider::Openai, + "custom-minimax", + Some("inline-tags") + ), + ReasoningStreamStyle::InlineTags + ); + assert_eq!( + reasoning_stream_style_for_stream(ApiProvider::XiaomiMimo, "mimo-v2.5-pro", None), + ReasoningStreamStyle::SeparateField + ); + assert_eq!( + reasoning_stream_style_for_stream( + ApiProvider::XiaomiMimo, + "mimo-v2.5-pro", + Some("none") + ), + ReasoningStreamStyle::None + ); + } + #[test] fn decoder_yields_no_events_for_keepalive_chunk() { // DeepSeek often sends `{"choices":[]}` keepalive chunks before @@ -3298,6 +3677,7 @@ mod stream_decoder_tests { let mut thinking_started = false; let mut tool_indices = std::collections::HashMap::new(); let mut reasoning_detail_buffers = std::collections::HashMap::new(); + let mut inline_reasoning_tags = InlineReasoningTagState::default(); let outcome = parse_sse_data_frame( " [DONE] ", @@ -3306,7 +3686,8 @@ mod stream_decoder_tests { &mut thinking_started, &mut tool_indices, &mut reasoning_detail_buffers, - true, + &mut inline_reasoning_tags, + ReasoningStreamStyle::SeparateField, ); assert!( diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 21e42cbca..143347d5b 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -2624,6 +2624,8 @@ pub struct ProviderConfig { pub http_headers: Option>, #[serde(alias = "pathSuffix")] pub path_suffix: Option, + #[serde(alias = "reasoningStyle", alias = "reasoningStreamStyle")] + pub reasoning_stream_style: Option, pub auth: Option, } @@ -5619,6 +5621,9 @@ fn merge_provider_config(base: ProviderConfig, override_cfg: ProviderConfig) -> .or(base.insecure_skip_tls_verify), http_headers: override_cfg.http_headers.or(base.http_headers), path_suffix: override_cfg.path_suffix.or(base.path_suffix), + reasoning_stream_style: override_cfg + .reasoning_stream_style + .or(base.reasoning_stream_style), auth: override_cfg.auth.or(base.auth), } } diff --git a/crates/tui/src/config/tests.rs b/crates/tui/src/config/tests.rs index 2344dcddc..9fb1bb1f2 100644 --- a/crates/tui/src/config/tests.rs +++ b/crates/tui/src/config/tests.rs @@ -4277,6 +4277,46 @@ model = "custom-qianfan-service-id" Ok(()) } +#[test] +fn provider_config_loads_reasoning_stream_style() -> Result<()> { + let _lock = lock_test_env(); + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let temp_root = env::temp_dir().join(format!( + "codewhale-tui-reasoning-style-{}-{}", + std::process::id(), + nanos + )); + fs::create_dir_all(&temp_root)?; + let _guard = EnvGuard::new(&temp_root); + + let config_path = temp_root.join(".deepseek").join("config.toml"); + ensure_parent_dir(&config_path)?; + fs::write( + &config_path, + r#"provider = "openai" + +[providers.openai] +api_key = "openai-table-key" +base_url = "https://openai-compatible.example/v1" +model = "custom-reasoner" +reasoning_stream_style = "inline_tags" +"#, + )?; + + let config = Config::load(None, None)?; + let openai = config + .provider_config_for(ApiProvider::Openai) + .expect("openai provider config"); + assert_eq!( + openai.reasoning_stream_style.as_deref(), + Some("inline_tags") + ); + Ok(()) +} + // Regression for issue #1714: `codewhale --provider openai --model // MiniMax-M2.7` forwards the choice via DEEPSEEK_MODEL (never // OPENAI_MODEL) and uses the DEFAULT base_url. The explicit custom model diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index db8040a08..89714c23f 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -986,6 +986,7 @@ If you are upgrading from older releases: - `api_key` (string, required for hosted providers): must be non-empty for DeepSeek/hosted providers (or set the provider API key env var). Self-hosted SGLang, vLLM, and Ollama can omit it. - `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API, including legacy `provider = "deepseek-cn"` configs. Other defaults are `https://integrate.api.nvidia.com/v1` for `nvidia-nim`, `https://api.openai.com/v1` for `openai`, `https://api.atlascloud.ai/v1` for `atlascloud`, `https://maas-openapi.wanjiedata.com/api/v1` for `wanjie-ark`, `https://ark.cn-beijing.volces.com/api/coding/v3` for `volcengine`, `https://openrouter.ai/api/v1` for `openrouter`, `https://token-plan-sgp.xiaomimimo.com/v1` for `xiaomi-mimo` when the API key starts with `tp-...` and `https://api.xiaomimimo.com/v1` otherwise, `https://api.novita.ai/openai/v1` for `novita`, `https://api.fireworks.ai/inference/v1` for `fireworks`, `https://api.siliconflow.com/v1` for `siliconflow`, `https://api.siliconflow.cn/v1` for `siliconflow-CN`, `https://api.arcee.ai/api/v1` for `arcee`, `https://api.moonshot.ai/v1` for `moonshot`, `https://api.minimax.io/v1` for `minimax`, `https://api.z.ai/api/coding/paas/v4` for `zai`, `https://api.stepfun.ai/v1` for `stepfun`, `https://api.deepinfra.com/v1/openai` for `deepinfra`, `https://router.huggingface.co/v1` for `huggingface`, `https://api.together.xyz/v1` for `together`, `https://api.baiduqianfan.ai/v1` for `qianfan`, `https://chatgpt.com/backend-api` for `openai-codex`, `https://api.anthropic.com` for `anthropic`, `http://localhost:30000/v1` for `sglang`, `http://localhost:8000/v1` for `vllm`, and `http://localhost:11434/v1` for `ollama`. Set `base_url = "https://token-plan-cn.xiaomimimo.com/v1"` explicitly if your Xiaomi MiMo Token Plan account is provisioned in the China region. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features. - `path_suffix` (string, optional provider-table key): override the chat-completions path for OpenAI-compatible gateways that do not serve `/v1/chat/completions`. For example, `[providers.openai] path_suffix = "/chat/completions"` sends chat requests to the unversioned base URL plus `/chat/completions`; `models` and `beta/*` requests keep their normal routing. +- `reasoning_stream_style` (string, optional provider-table key): override how streaming reasoning is separated from answer text for the active provider route. Use `separate_field` for `reasoning_content` / `reasoning` deltas, `inline_tags` for gateways that stream `...` inside `delta.content`, or `none` to render incoming content exactly as answer text. - `[providers..auth]` (table, optional): provider-scoped auth source metadata. `source = "command"` stores a command argv plus optional `timeout_ms`; `source = "secret"` stores a `secret_id`. This slice lets provider readiness, `/provider`, and doctor JSON report the auth source class without exposing command argv output or secret values; executing commands and resolving external secret material is handled by the follow-up resolver work. - `insecure_skip_tls_verify` (bool, optional provider-table key): legacy compatibility key, disabled by default. When true on the active provider table, provider clients reject the configuration instead of skipping TLS certificate verification. Use `SSL_CERT_FILE` for corporate or private CA bundles; `codewhale doctor` reports stale uses of this setting. - `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow and DeepInfra, `trinity-large-thinking` for Arcee AI, `kimi-k2.7-code` for Moonshot, `MiniMax-M3` for MiniMax, `GLM-5.2` for Z.ai, `step-3.7-flash` for StepFun, `ernie-4.0-turbo-8k` for Qianfan, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Hugging Face and Together AI both default to `deepseek-ai/DeepSeek-V4-Pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `google/gemma-4-31b-it`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, and `nvidia/nemotron-3-ultra-550b-a55b`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Moonshot recognizes `kimi-k2.7-code`, `kimi-k2.6`, and Kimi Code's stable `kimi-for-coding`; direct MiniMax recognizes `MiniMax-M3` and the documented M2.x chat model IDs; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, `moonshot`, `minimax`, `zai`, `stepfun`, `qianfan`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias. From 7993d15ec29337a4ec91172ad0e1530efbd6d112 Mon Sep 17 00:00:00 2001 From: Hunter B Date: Mon, 22 Jun 2026 21:41:13 -0700 Subject: [PATCH 2/3] Fix inline reasoning stream lint Limit the legacy parse_sse_chunk wrapper to test builds so release and clippy builds use only the style-aware stream parser path. Refs #3222. --- crates/tui/src/client/chat.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index 7926120e0..63594b3b5 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -2652,6 +2652,7 @@ fn parse_sse_data_frame( /// Parse a single SSE chunk from the Chat Completions streaming API into /// our internal `StreamEvent` representation. +#[cfg(test)] pub(super) fn parse_sse_chunk( chunk: &Value, content_index: &mut u32, From 69007768aae1af70cee4f40f163cda74cc2877d7 Mon Sep 17 00:00:00 2001 From: Hunter B Date: Tue, 23 Jun 2026 08:49:42 -0700 Subject: [PATCH 3/3] fix(tui): harden inline reasoning stream style Warn when reasoning_stream_style contains an unrecognized configured value, keep inline_tags mode from also consuming separate reasoning_content fields, and cover unclosed stream flushes. Verified with: - cargo fmt --all -- --check - git diff --check - cargo test -p codewhale-tui --bin codewhale-tui --locked reasoning_style - cargo test -p codewhale-tui --bin codewhale-tui --locked provider_config_loads_reasoning_stream_style - cargo test -p codewhale-tui --bin codewhale-tui --locked stream_decoder_tests - cargo test -p codewhale-tui --bin codewhale-tui --locked qianfan_provider_accepts_custom_model_and_base_url - cargo clippy -p codewhale-tui --bin codewhale-tui --locked -- -D warnings -A clippy::uninlined_format_args -A clippy::too_many_arguments -A clippy::unnecessary_map_or -A clippy::assertions_on_constants --- crates/tui/src/client/chat.rs | 42 ++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index 63594b3b5..dbccee9b4 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -1,8 +1,8 @@ //! Chat Completions API helpers for DeepSeek's OpenAI-compatible endpoint. //! //! This is the production code path. Streaming (`create_message_stream`), -//! request building (`build_chat_messages*`), and SSE parsing (`parse_sse_chunk`) -//! all live here. +//! request building (`build_chat_messages*`), and SSE parsing +//! (`parse_sse_chunk_with_reasoning_style`) all live here. use std::collections::{HashMap, HashSet}; use std::io::Write; @@ -2152,8 +2152,13 @@ fn reasoning_stream_style_for_stream( model: &str, configured: Option<&str>, ) -> ReasoningStreamStyle { - if let Some(style) = configured.and_then(parse_reasoning_stream_style) { - return style; + if let Some(configured) = configured { + if let Some(style) = parse_reasoning_stream_style(configured) { + return style; + } + logging::warn(format!( + "Ignoring unrecognized reasoning_stream_style `{configured}`; expected separate_field, inline_tags, or none" + )); } if is_reasoning_model_for_stream(provider, model) { ReasoningStreamStyle::SeparateField @@ -2739,7 +2744,7 @@ fn parse_sse_chunk_with_reasoning_style( .map(str::to_string); // Handle reasoning_content / reasoning thinking deltas. - if reasoning_stream_style != ReasoningStreamStyle::None + if reasoning_stream_style == ReasoningStreamStyle::SeparateField && let Some(reasoning) = reasoning_text.as_deref() { push_thinking_delta( @@ -3619,6 +3624,33 @@ mod stream_decoder_tests { ); } + #[test] + fn reasoning_style_inline_tags_flushes_unclosed_think_at_stream_end() { + let events = decode_chunks_with_style( + &[ + r#"{"choices":[{"delta":{"content":"Before partial reasoning"}}]}"#, + r#"{"choices":[{"finish_reason":"stop"}]}"#, + ], + ReasoningStreamStyle::InlineTags, + ); + + assert_eq!(thinking_delta_text(&events), "partial reasoning"); + assert_eq!(text_delta_text(&events), "Before "); + } + + #[test] + fn reasoning_style_inline_tags_ignores_separate_reasoning_field() { + let events = decode_chunks_with_style( + &[ + r#"{"choices":[{"delta":{"reasoning_content":"metadata","content":"tagged answer"}}]}"#, + ], + ReasoningStreamStyle::InlineTags, + ); + + assert_eq!(thinking_delta_text(&events), "tagged"); + assert_eq!(text_delta_text(&events), " answer"); + } + #[test] fn reasoning_style_none_keeps_inline_tags_visible_text() { let events = decode_chunks_with_style(