diff --git a/config.example.toml b/config.example.toml
index fb9bf39b6..53bb07554 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -319,6 +319,7 @@ max_subagents = 10 # optional (1-20)
# model = "deepseek-ai/DeepSeek-V4-Pro"
# http_headers = { "X-Model-Provider-Id" = "your-model-provider" } # optional custom request headers
# path_suffix = "/chat/completions" # override the API path; skips /v1 versioning when set
+# reasoning_stream_style = "inline_tags" # route ... content into Thinking cells
# [providers.deepseek.auth] # provider-scoped auth source metadata; command execution lands in a follow-up slice
# source = "command"
# command = ["secret-tool", "lookup", "service", "codewhale-deepseek"]
diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs
index 7608ea50e..96f3981dc 100644
--- a/crates/tui/src/client.rs
+++ b/crates/tui/src/client.rs
@@ -161,6 +161,7 @@ pub struct DeepSeekClient {
connection_health: Arc>,
rate_limiter: Arc>,
path_suffix: Option,
+ pub(super) reasoning_stream_style: Option,
pub(super) stream_idle_timeout: Duration,
}
@@ -329,6 +330,7 @@ impl Clone for DeepSeekClient {
connection_health: self.connection_health.clone(),
rate_limiter: self.rate_limiter.clone(),
path_suffix: self.path_suffix.clone(),
+ reasoning_stream_style: self.reasoning_stream_style.clone(),
stream_idle_timeout: self.stream_idle_timeout,
}
}
@@ -648,6 +650,9 @@ impl DeepSeekClient {
let path_suffix = config
.provider_config_for(api_provider)
.and_then(|p| p.path_suffix.clone());
+ let reasoning_stream_style = config
+ .provider_config_for(api_provider)
+ .and_then(|p| p.reasoning_stream_style.clone());
logging::info(format!("API provider: {}", api_provider.as_str()));
logging::info(format!(
@@ -691,6 +696,7 @@ impl DeepSeekClient {
connection_health: Arc::new(AsyncMutex::new(ConnectionHealth::default())),
rate_limiter: Arc::new(AsyncMutex::new(TokenBucket::from_env())),
path_suffix,
+ reasoning_stream_style,
stream_idle_timeout,
})
}
diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs
index 073793d5b..dbccee9b4 100644
--- a/crates/tui/src/client/chat.rs
+++ b/crates/tui/src/client/chat.rs
@@ -1,8 +1,8 @@
//! Chat Completions API helpers for DeepSeek's OpenAI-compatible endpoint.
//!
//! This is the production code path. Streaming (`create_message_stream`),
-//! request building (`build_chat_messages*`), and SSE parsing (`parse_sse_chunk`)
-//! all live here.
+//! request building (`build_chat_messages*`), and SSE parsing
+//! (`parse_sse_chunk_with_reasoning_style`) all live here.
use std::collections::{HashMap, HashSet};
use std::io::Write;
@@ -381,6 +381,7 @@ impl DeepSeekClient {
let response_headers = format_stream_headers(response.headers());
let byte_stream = response.bytes_stream();
let stream_idle_timeout = self.stream_idle_timeout;
+ let configured_reasoning_stream_style = self.reasoning_stream_style.clone();
let stream = async_stream::stream! {
use futures_util::StreamExt;
@@ -411,7 +412,12 @@ impl DeepSeekClient {
let mut thinking_started = false;
let mut tool_indices: std::collections::HashMap = std::collections::HashMap::new();
let mut reasoning_detail_buffers: std::collections::HashMap = std::collections::HashMap::new();
- let is_reasoning_model = is_reasoning_model_for_stream(api_provider, &model);
+ let mut inline_reasoning_tags = InlineReasoningTagState::default();
+ let reasoning_stream_style = reasoning_stream_style_for_stream(
+ api_provider,
+ &model,
+ configured_reasoning_stream_style.as_deref(),
+ );
let mut byte_stream = std::pin::pin!(byte_stream);
let idle = stream_idle_timeout;
@@ -500,7 +506,8 @@ impl DeepSeekClient {
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
- is_reasoning_model,
+ &mut inline_reasoning_tags,
+ reasoning_stream_style,
) {
SseDataFrame::Done => break 'stream,
SseDataFrame::Events(events) => {
@@ -2133,6 +2140,44 @@ fn is_reasoning_model_for_stream(provider: ApiProvider, model: &str) -> bool {
provider_accepts_reasoning_content(provider) && model_supports_reasoning(model)
}
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(super) enum ReasoningStreamStyle {
+ SeparateField,
+ InlineTags,
+ None,
+}
+
+fn reasoning_stream_style_for_stream(
+ provider: ApiProvider,
+ model: &str,
+ configured: Option<&str>,
+) -> ReasoningStreamStyle {
+ if let Some(configured) = configured {
+ if let Some(style) = parse_reasoning_stream_style(configured) {
+ return style;
+ }
+ logging::warn(format!(
+ "Ignoring unrecognized reasoning_stream_style `{configured}`; expected separate_field, inline_tags, or none"
+ ));
+ }
+ if is_reasoning_model_for_stream(provider, model) {
+ ReasoningStreamStyle::SeparateField
+ } else {
+ ReasoningStreamStyle::None
+ }
+}
+
+fn parse_reasoning_stream_style(value: &str) -> Option {
+ match value.trim().to_ascii_lowercase().replace('-', "_").as_str() {
+ "separate_field" | "separate" | "field" => Some(ReasoningStreamStyle::SeparateField),
+ "inline_tags" | "inline" | "think_tags" | "thinking_tags" => {
+ Some(ReasoningStreamStyle::InlineTags)
+ }
+ "none" | "text" | "disabled" | "off" => Some(ReasoningStreamStyle::None),
+ _ => None,
+ }
+}
+
/// Providers whose chat-completions API both returns and accepts a dedicated
/// `reasoning_content` field on assistant messages.
///
@@ -2415,6 +2460,163 @@ fn build_stream_events(response: &MessageResponse) -> Vec {
events
}
+#[derive(Debug, Default)]
+struct InlineReasoningTagState {
+ inside_think: bool,
+ pending: String,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+enum ReasoningSegment {
+ Text(String),
+ Thinking(String),
+}
+
+fn inline_reasoning_segments(
+ content: &str,
+ state: &mut InlineReasoningTagState,
+ flush: bool,
+) -> Vec {
+ state.pending.push_str(content);
+ let mut segments = Vec::new();
+
+ loop {
+ if state.pending.is_empty() {
+ break;
+ }
+
+ if state.inside_think {
+ if let Some(close_at) = state.pending.find("") {
+ push_reasoning_segment(
+ &mut segments,
+ ReasoningSegment::Thinking(state.pending[..close_at].to_string()),
+ );
+ state.pending.drain(..close_at + "".len());
+ state.inside_think = false;
+ continue;
+ }
+
+ let hold_len = if flush {
+ 0
+ } else {
+ trailing_tag_prefix_len(&state.pending, "")
+ };
+ let emit_len = state.pending.len().saturating_sub(hold_len);
+ if emit_len > 0 {
+ push_reasoning_segment(
+ &mut segments,
+ ReasoningSegment::Thinking(state.pending[..emit_len].to_string()),
+ );
+ state.pending.drain(..emit_len);
+ }
+ break;
+ }
+
+ if let Some(open_at) = state.pending.find("") {
+ push_reasoning_segment(
+ &mut segments,
+ ReasoningSegment::Text(state.pending[..open_at].to_string()),
+ );
+ state.pending.drain(..open_at + "".len());
+ state.inside_think = true;
+ continue;
+ }
+
+ let hold_len = if flush {
+ 0
+ } else {
+ trailing_tag_prefix_len(&state.pending, "")
+ };
+ let emit_len = state.pending.len().saturating_sub(hold_len);
+ if emit_len > 0 {
+ push_reasoning_segment(
+ &mut segments,
+ ReasoningSegment::Text(state.pending[..emit_len].to_string()),
+ );
+ state.pending.drain(..emit_len);
+ }
+ break;
+ }
+
+ segments
+}
+
+fn trailing_tag_prefix_len(content: &str, tag: &str) -> usize {
+ let max_len = tag.len().min(content.len());
+ for len in (1..=max_len).rev() {
+ let start = content.len() - len;
+ if content.is_char_boundary(start) && tag.starts_with(&content[start..]) {
+ return len;
+ }
+ }
+ 0
+}
+
+fn push_reasoning_segment(segments: &mut Vec, segment: ReasoningSegment) {
+ match &segment {
+ ReasoningSegment::Text(text) | ReasoningSegment::Thinking(text) if text.is_empty() => {}
+ _ => segments.push(segment),
+ }
+}
+
+fn push_text_delta(
+ events: &mut Vec,
+ content_index: &mut u32,
+ text_started: &mut bool,
+ thinking_started: &mut bool,
+ text: String,
+) {
+ if *thinking_started {
+ events.push(StreamEvent::ContentBlockStop {
+ index: *content_index,
+ });
+ *content_index += 1;
+ *thinking_started = false;
+ }
+ if !*text_started {
+ events.push(StreamEvent::ContentBlockStart {
+ index: *content_index,
+ content_block: ContentBlockStart::Text {
+ text: String::new(),
+ },
+ });
+ *text_started = true;
+ }
+ events.push(StreamEvent::ContentBlockDelta {
+ index: *content_index,
+ delta: Delta::TextDelta { text },
+ });
+}
+
+fn push_thinking_delta(
+ events: &mut Vec,
+ content_index: &mut u32,
+ text_started: &mut bool,
+ thinking_started: &mut bool,
+ thinking: String,
+) {
+ if *text_started {
+ events.push(StreamEvent::ContentBlockStop {
+ index: *content_index,
+ });
+ *content_index += 1;
+ *text_started = false;
+ }
+ if !*thinking_started {
+ events.push(StreamEvent::ContentBlockStart {
+ index: *content_index,
+ content_block: ContentBlockStart::Thinking {
+ thinking: String::new(),
+ },
+ });
+ *thinking_started = true;
+ }
+ events.push(StreamEvent::ContentBlockDelta {
+ index: *content_index,
+ delta: Delta::ThinkingDelta { thinking },
+ });
+}
+
// === SSE Chunk Parser ===
enum SseDataFrame {
@@ -2429,7 +2631,8 @@ fn parse_sse_data_frame(
thinking_started: &mut bool,
tool_indices: &mut std::collections::HashMap,
reasoning_detail_buffers: &mut std::collections::HashMap,
- is_reasoning_model: bool,
+ inline_reasoning_tags: &mut InlineReasoningTagState,
+ reasoning_stream_style: ReasoningStreamStyle,
) -> SseDataFrame {
if data.trim() == "[DONE]" {
return SseDataFrame::Done;
@@ -2437,14 +2640,15 @@ fn parse_sse_data_frame(
let events = serde_json::from_str::(data).map_or_else(
|_| Vec::new(),
|chunk_json| {
- parse_sse_chunk(
+ parse_sse_chunk_with_reasoning_style(
&chunk_json,
content_index,
text_started,
thinking_started,
tool_indices,
reasoning_detail_buffers,
- is_reasoning_model,
+ inline_reasoning_tags,
+ reasoning_stream_style,
)
},
);
@@ -2453,6 +2657,7 @@ fn parse_sse_data_frame(
/// Parse a single SSE chunk from the Chat Completions streaming API into
/// our internal `StreamEvent` representation.
+#[cfg(test)]
pub(super) fn parse_sse_chunk(
chunk: &Value,
content_index: &mut u32,
@@ -2461,6 +2666,34 @@ pub(super) fn parse_sse_chunk(
tool_indices: &mut std::collections::HashMap,
reasoning_detail_buffers: &mut std::collections::HashMap,
is_reasoning_model: bool,
+) -> Vec {
+ let mut inline_reasoning_tags = InlineReasoningTagState::default();
+ let reasoning_stream_style = if is_reasoning_model {
+ ReasoningStreamStyle::SeparateField
+ } else {
+ ReasoningStreamStyle::None
+ };
+ parse_sse_chunk_with_reasoning_style(
+ chunk,
+ content_index,
+ text_started,
+ thinking_started,
+ tool_indices,
+ reasoning_detail_buffers,
+ &mut inline_reasoning_tags,
+ reasoning_stream_style,
+ )
+}
+
+fn parse_sse_chunk_with_reasoning_style(
+ chunk: &Value,
+ content_index: &mut u32,
+ text_started: &mut bool,
+ thinking_started: &mut bool,
+ tool_indices: &mut std::collections::HashMap,
+ reasoning_detail_buffers: &mut std::collections::HashMap,
+ inline_reasoning_tags: &mut InlineReasoningTagState,
+ reasoning_stream_style: ReasoningStreamStyle,
) -> Vec {
let mut events = Vec::new();
@@ -2511,57 +2744,63 @@ pub(super) fn parse_sse_chunk(
.map(str::to_string);
// Handle reasoning_content / reasoning thinking deltas.
- if is_reasoning_model && let Some(reasoning) = reasoning_text.as_deref() {
- if !*thinking_started {
- events.push(StreamEvent::ContentBlockStart {
- index: *content_index,
- content_block: ContentBlockStart::Thinking {
- thinking: String::new(),
- },
- });
- *thinking_started = true;
- }
- events.push(StreamEvent::ContentBlockDelta {
- index: *content_index,
- delta: Delta::ThinkingDelta {
- thinking: reasoning.to_string(),
- },
- });
+ if reasoning_stream_style == ReasoningStreamStyle::SeparateField
+ && let Some(reasoning) = reasoning_text.as_deref()
+ {
+ push_thinking_delta(
+ &mut events,
+ content_index,
+ text_started,
+ thinking_started,
+ reasoning.to_string(),
+ );
}
// Generic OpenAI-compatible proxies sometimes stream answer text
- // in `reasoning_content`. If this provider is not one whose
- // reasoning-content semantics we support, render that field as
- // normal text when no `content` delta is present.
- let effective_content = match content_text {
- Some(content) => Some(content),
- None if !is_reasoning_model => reasoning_text,
- None => None,
- };
-
- // Handle regular content
- if let Some(content) = effective_content {
- // Close thinking block if transitioning to text
- if *thinking_started {
- events.push(StreamEvent::ContentBlockStop {
- index: *content_index,
- });
- *content_index += 1;
- *thinking_started = false;
+ // in `reasoning_content`. If this route is configured with no
+ // reasoning semantics, render that field as normal text when no
+ // `content` delta is present.
+ match (content_text, reasoning_stream_style) {
+ (Some(content), ReasoningStreamStyle::InlineTags) => {
+ for segment in inline_reasoning_segments(&content, inline_reasoning_tags, false)
+ {
+ match segment {
+ ReasoningSegment::Text(text) => push_text_delta(
+ &mut events,
+ content_index,
+ text_started,
+ thinking_started,
+ text,
+ ),
+ ReasoningSegment::Thinking(thinking) => push_thinking_delta(
+ &mut events,
+ content_index,
+ text_started,
+ thinking_started,
+ thinking,
+ ),
+ }
+ }
}
- if !*text_started {
- events.push(StreamEvent::ContentBlockStart {
- index: *content_index,
- content_block: ContentBlockStart::Text {
- text: String::new(),
- },
- });
- *text_started = true;
+ (Some(content), _) => push_text_delta(
+ &mut events,
+ content_index,
+ text_started,
+ thinking_started,
+ content,
+ ),
+ (None, ReasoningStreamStyle::None) => {
+ if let Some(content) = reasoning_text {
+ push_text_delta(
+ &mut events,
+ content_index,
+ text_started,
+ thinking_started,
+ content,
+ );
+ }
}
- events.push(StreamEvent::ContentBlockDelta {
- index: *content_index,
- delta: Delta::TextDelta { text: content },
- });
+ (None, _) => {}
}
// Handle tool calls
@@ -2654,6 +2893,26 @@ pub(super) fn parse_sse_chunk(
// Handle finish reason
if let Some(reason) = finish_reason {
+ if reasoning_stream_style == ReasoningStreamStyle::InlineTags {
+ for segment in inline_reasoning_segments("", inline_reasoning_tags, true) {
+ match segment {
+ ReasoningSegment::Text(text) => push_text_delta(
+ &mut events,
+ content_index,
+ text_started,
+ thinking_started,
+ text,
+ ),
+ ReasoningSegment::Thinking(thinking) => push_thinking_delta(
+ &mut events,
+ content_index,
+ text_started,
+ thinking_started,
+ thinking,
+ ),
+ }
+ }
+ }
// Close any open blocks
if *text_started {
events.push(StreamEvent::ContentBlockStop {
@@ -3002,6 +3261,60 @@ mod stream_decoder_tests {
)
}
+ fn decode_chunks_with_style(
+ chunks: &[&str],
+ reasoning_stream_style: ReasoningStreamStyle,
+ ) -> Vec {
+ let mut content_index = 0u32;
+ let mut text_started = false;
+ let mut thinking_started = false;
+ let mut tool_indices = std::collections::HashMap::new();
+ let mut reasoning_detail_buffers = std::collections::HashMap::new();
+ let mut inline_reasoning_tags = InlineReasoningTagState::default();
+ let mut events = Vec::new();
+
+ for chunk in chunks {
+ let value: Value = serde_json::from_str(chunk).expect("valid SSE JSON");
+ events.extend(parse_sse_chunk_with_reasoning_style(
+ &value,
+ &mut content_index,
+ &mut text_started,
+ &mut thinking_started,
+ &mut tool_indices,
+ &mut reasoning_detail_buffers,
+ &mut inline_reasoning_tags,
+ reasoning_stream_style,
+ ));
+ }
+ events
+ }
+
+ fn text_delta_text(events: &[StreamEvent]) -> String {
+ events
+ .iter()
+ .filter_map(|event| match event {
+ StreamEvent::ContentBlockDelta {
+ delta: Delta::TextDelta { text },
+ ..
+ } => Some(text.as_str()),
+ _ => None,
+ })
+ .collect()
+ }
+
+ fn thinking_delta_text(events: &[StreamEvent]) -> String {
+ events
+ .iter()
+ .filter_map(|event| match event {
+ StreamEvent::ContentBlockDelta {
+ delta: Delta::ThinkingDelta { thinking },
+ ..
+ } => Some(thinking.as_str()),
+ _ => None,
+ })
+ .collect()
+ }
+
#[test]
fn decoder_emits_text_delta_for_content_chunk() {
// The "happy" first chunk: a normal content delta. The engine treats
@@ -3278,6 +3591,105 @@ mod stream_decoder_tests {
);
}
+ #[test]
+ fn reasoning_style_separate_field_routes_reasoning_to_thinking() {
+ let events = decode_chunks_with_style(
+ &[
+ r#"{"choices":[{"delta":{"reasoning_content":"private plan"}}]}"#,
+ r#"{"choices":[{"delta":{"content":"Public answer."}}]}"#,
+ ],
+ ReasoningStreamStyle::SeparateField,
+ );
+
+ assert_eq!(thinking_delta_text(&events), "private plan");
+ assert_eq!(text_delta_text(&events), "Public answer.");
+ }
+
+ #[test]
+ fn reasoning_style_inline_tags_routes_think_blocks_to_thinking() {
+ let events = decode_chunks_with_style(
+ &[
+ r#"{"choices":[{"delta":{"content":"Before private plan after."}}]}"#,
+ ],
+ ReasoningStreamStyle::InlineTags,
+ );
+
+ assert_eq!(thinking_delta_text(&events), "private plan");
+ assert_eq!(text_delta_text(&events), "Before after.");
+ assert!(
+ !text_delta_text(&events).contains(""),
+ "inline reasoning tags must not leak into visible text: {events:?}"
+ );
+ }
+
+ #[test]
+ fn reasoning_style_inline_tags_flushes_unclosed_think_at_stream_end() {
+ let events = decode_chunks_with_style(
+ &[
+ r#"{"choices":[{"delta":{"content":"Before partial reasoning"}}]}"#,
+ r#"{"choices":[{"finish_reason":"stop"}]}"#,
+ ],
+ ReasoningStreamStyle::InlineTags,
+ );
+
+ assert_eq!(thinking_delta_text(&events), "partial reasoning");
+ assert_eq!(text_delta_text(&events), "Before ");
+ }
+
+ #[test]
+ fn reasoning_style_inline_tags_ignores_separate_reasoning_field() {
+ let events = decode_chunks_with_style(
+ &[
+ r#"{"choices":[{"delta":{"reasoning_content":"metadata","content":"tagged answer"}}]}"#,
+ ],
+ ReasoningStreamStyle::InlineTags,
+ );
+
+ assert_eq!(thinking_delta_text(&events), "tagged");
+ assert_eq!(text_delta_text(&events), " answer");
+ }
+
+ #[test]
+ fn reasoning_style_none_keeps_inline_tags_visible_text() {
+ let events = decode_chunks_with_style(
+ &[r#"{"choices":[{"delta":{"content":"visible answer"}}]}"#],
+ ReasoningStreamStyle::None,
+ );
+
+ assert_eq!(thinking_delta_text(&events), "");
+ assert_eq!(text_delta_text(&events), "visible answer");
+ }
+
+ #[test]
+ fn configured_reasoning_style_overrides_route_default() {
+ assert_eq!(
+ reasoning_stream_style_for_stream(ApiProvider::Openai, "custom-minimax", None),
+ ReasoningStreamStyle::None
+ );
+ assert_eq!(
+ reasoning_stream_style_for_stream(
+ ApiProvider::Openai,
+ "custom-minimax",
+ Some("inline-tags")
+ ),
+ ReasoningStreamStyle::InlineTags
+ );
+ assert_eq!(
+ reasoning_stream_style_for_stream(ApiProvider::XiaomiMimo, "mimo-v2.5-pro", None),
+ ReasoningStreamStyle::SeparateField
+ );
+ assert_eq!(
+ reasoning_stream_style_for_stream(
+ ApiProvider::XiaomiMimo,
+ "mimo-v2.5-pro",
+ Some("none")
+ ),
+ ReasoningStreamStyle::None
+ );
+ }
+
#[test]
fn decoder_yields_no_events_for_keepalive_chunk() {
// DeepSeek often sends `{"choices":[]}` keepalive chunks before
@@ -3298,6 +3710,7 @@ mod stream_decoder_tests {
let mut thinking_started = false;
let mut tool_indices = std::collections::HashMap::new();
let mut reasoning_detail_buffers = std::collections::HashMap::new();
+ let mut inline_reasoning_tags = InlineReasoningTagState::default();
let outcome = parse_sse_data_frame(
" [DONE] ",
@@ -3306,7 +3719,8 @@ mod stream_decoder_tests {
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
- true,
+ &mut inline_reasoning_tags,
+ ReasoningStreamStyle::SeparateField,
);
assert!(
diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs
index 21e42cbca..143347d5b 100644
--- a/crates/tui/src/config.rs
+++ b/crates/tui/src/config.rs
@@ -2624,6 +2624,8 @@ pub struct ProviderConfig {
pub http_headers: Option>,
#[serde(alias = "pathSuffix")]
pub path_suffix: Option,
+ #[serde(alias = "reasoningStyle", alias = "reasoningStreamStyle")]
+ pub reasoning_stream_style: Option,
pub auth: Option,
}
@@ -5619,6 +5621,9 @@ fn merge_provider_config(base: ProviderConfig, override_cfg: ProviderConfig) ->
.or(base.insecure_skip_tls_verify),
http_headers: override_cfg.http_headers.or(base.http_headers),
path_suffix: override_cfg.path_suffix.or(base.path_suffix),
+ reasoning_stream_style: override_cfg
+ .reasoning_stream_style
+ .or(base.reasoning_stream_style),
auth: override_cfg.auth.or(base.auth),
}
}
diff --git a/crates/tui/src/config/tests.rs b/crates/tui/src/config/tests.rs
index 2344dcddc..9fb1bb1f2 100644
--- a/crates/tui/src/config/tests.rs
+++ b/crates/tui/src/config/tests.rs
@@ -4277,6 +4277,46 @@ model = "custom-qianfan-service-id"
Ok(())
}
+#[test]
+fn provider_config_loads_reasoning_stream_style() -> Result<()> {
+ let _lock = lock_test_env();
+ let nanos = SystemTime::now()
+ .duration_since(UNIX_EPOCH)
+ .unwrap()
+ .as_nanos();
+ let temp_root = env::temp_dir().join(format!(
+ "codewhale-tui-reasoning-style-{}-{}",
+ std::process::id(),
+ nanos
+ ));
+ fs::create_dir_all(&temp_root)?;
+ let _guard = EnvGuard::new(&temp_root);
+
+ let config_path = temp_root.join(".deepseek").join("config.toml");
+ ensure_parent_dir(&config_path)?;
+ fs::write(
+ &config_path,
+ r#"provider = "openai"
+
+[providers.openai]
+api_key = "openai-table-key"
+base_url = "https://openai-compatible.example/v1"
+model = "custom-reasoner"
+reasoning_stream_style = "inline_tags"
+"#,
+ )?;
+
+ let config = Config::load(None, None)?;
+ let openai = config
+ .provider_config_for(ApiProvider::Openai)
+ .expect("openai provider config");
+ assert_eq!(
+ openai.reasoning_stream_style.as_deref(),
+ Some("inline_tags")
+ );
+ Ok(())
+}
+
// Regression for issue #1714: `codewhale --provider openai --model
// MiniMax-M2.7` forwards the choice via DEEPSEEK_MODEL (never
// OPENAI_MODEL) and uses the DEFAULT base_url. The explicit custom model
diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
index db8040a08..89714c23f 100644
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@@ -986,6 +986,7 @@ If you are upgrading from older releases:
- `api_key` (string, required for hosted providers): must be non-empty for DeepSeek/hosted providers (or set the provider API key env var). Self-hosted SGLang, vLLM, and Ollama can omit it.
- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API, including legacy `provider = "deepseek-cn"` configs. Other defaults are `https://integrate.api.nvidia.com/v1` for `nvidia-nim`, `https://api.openai.com/v1` for `openai`, `https://api.atlascloud.ai/v1` for `atlascloud`, `https://maas-openapi.wanjiedata.com/api/v1` for `wanjie-ark`, `https://ark.cn-beijing.volces.com/api/coding/v3` for `volcengine`, `https://openrouter.ai/api/v1` for `openrouter`, `https://token-plan-sgp.xiaomimimo.com/v1` for `xiaomi-mimo` when the API key starts with `tp-...` and `https://api.xiaomimimo.com/v1` otherwise, `https://api.novita.ai/openai/v1` for `novita`, `https://api.fireworks.ai/inference/v1` for `fireworks`, `https://api.siliconflow.com/v1` for `siliconflow`, `https://api.siliconflow.cn/v1` for `siliconflow-CN`, `https://api.arcee.ai/api/v1` for `arcee`, `https://api.moonshot.ai/v1` for `moonshot`, `https://api.minimax.io/v1` for `minimax`, `https://api.z.ai/api/coding/paas/v4` for `zai`, `https://api.stepfun.ai/v1` for `stepfun`, `https://api.deepinfra.com/v1/openai` for `deepinfra`, `https://router.huggingface.co/v1` for `huggingface`, `https://api.together.xyz/v1` for `together`, `https://api.baiduqianfan.ai/v1` for `qianfan`, `https://chatgpt.com/backend-api` for `openai-codex`, `https://api.anthropic.com` for `anthropic`, `http://localhost:30000/v1` for `sglang`, `http://localhost:8000/v1` for `vllm`, and `http://localhost:11434/v1` for `ollama`. Set `base_url = "https://token-plan-cn.xiaomimimo.com/v1"` explicitly if your Xiaomi MiMo Token Plan account is provisioned in the China region. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features.
- `path_suffix` (string, optional provider-table key): override the chat-completions path for OpenAI-compatible gateways that do not serve `/v1/chat/completions`. For example, `[providers.openai] path_suffix = "/chat/completions"` sends chat requests to the unversioned base URL plus `/chat/completions`; `models` and `beta/*` requests keep their normal routing.
+- `reasoning_stream_style` (string, optional provider-table key): override how streaming reasoning is separated from answer text for the active provider route. Use `separate_field` for `reasoning_content` / `reasoning` deltas, `inline_tags` for gateways that stream `...` inside `delta.content`, or `none` to render incoming content exactly as answer text.
- `[providers..auth]` (table, optional): provider-scoped auth source metadata. `source = "command"` stores a command argv plus optional `timeout_ms`; `source = "secret"` stores a `secret_id`. This slice lets provider readiness, `/provider`, and doctor JSON report the auth source class without exposing command argv output or secret values; executing commands and resolving external secret material is handled by the follow-up resolver work.
- `insecure_skip_tls_verify` (bool, optional provider-table key): legacy compatibility key, disabled by default. When true on the active provider table, provider clients reject the configuration instead of skipping TLS certificate verification. Use `SSL_CERT_FILE` for corporate or private CA bundles; `codewhale doctor` reports stale uses of this setting.
- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow and DeepInfra, `trinity-large-thinking` for Arcee AI, `kimi-k2.7-code` for Moonshot, `MiniMax-M3` for MiniMax, `GLM-5.2` for Z.ai, `step-3.7-flash` for StepFun, `ernie-4.0-turbo-8k` for Qianfan, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Hugging Face and Together AI both default to `deepseek-ai/DeepSeek-V4-Pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `google/gemma-4-31b-it`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, and `nvidia/nemotron-3-ultra-550b-a55b`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Moonshot recognizes `kimi-k2.7-code`, `kimi-k2.6`, and Kimi Code's stable `kimi-for-coding`; direct MiniMax recognizes `MiniMax-M3` and the documented M2.x chat model IDs; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, `moonshot`, `minimax`, `zai`, `stepfun`, `qianfan`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias.