Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions crates/forge_app/src/apply_tunable_parameters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ impl ApplyTunableParameters {
if let Some(temperature) = self.agent.temperature {
ctx = ctx.temperature(temperature);
}
if let Some(reasoning_effort) = self.agent.reasoning_effort {
ctx = ctx.reasoning_effort(reasoning_effort);
}
if let Some(service_tier) = self.agent.service_tier {
ctx = ctx.service_tier(service_tier);
}
if let Some(top_p) = self.agent.top_p {
ctx = ctx.top_p(top_p);
}
Expand Down
5 changes: 4 additions & 1 deletion crates/forge_app/src/dto/openai/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ pub struct Request {
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub service_tier: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_completion_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub thinking: Option<ThinkingConfig>,
Expand Down Expand Up @@ -396,7 +398,8 @@ impl From<Context> for Request {
stream_options: Some(StreamOptions { include_usage: Some(true) }),
session_id: context.conversation_id.map(|id| id.to_string()),
reasoning: context.reasoning,
reasoning_effort: Default::default(),
reasoning_effort: context.reasoning_effort.map(|re| re.to_string()),
service_tier: context.service_tier.map(|st| st.api_str().to_string()),
max_completion_tokens: Default::default(),
thinking: Default::default(),
}
Expand Down
22 changes: 20 additions & 2 deletions crates/forge_domain/src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use merge::Merge;

use crate::{
AgentDefinition, AgentId, Compact, Error, EventContext, MaxTokens, ModelId, ProviderId,
ReasoningConfig, Result, SystemContext, Temperature, Template, ToolDefinition, ToolName, TopK,
TopP, Workflow,
ReasoningConfig, ReasoningEffortLevel, Result, ServiceTier, SystemContext, Temperature, Template,
ToolDefinition, ToolName, TopK, TopP, Workflow,
};

/// Runtime agent representation with required model and provider
Expand Down Expand Up @@ -54,6 +54,12 @@ pub struct Agent {
/// Temperature used for agent
pub temperature: Option<Temperature>,

/// Reasoning effort level for this agent
pub reasoning_effort: Option<ReasoningEffortLevel>,

/// Service tier for this agent (fast = priority processing at 2x cost)
pub service_tier: Option<ServiceTier>,

/// Top-p (nucleus sampling) used for agent
pub top_p: Option<TopP>,

Expand Down Expand Up @@ -90,6 +96,8 @@ impl Agent {
compact: Compact::default(),
custom_rules: Default::default(),
temperature: Default::default(),
reasoning_effort: Default::default(),
service_tier: Default::default(),
top_p: Default::default(),
top_k: Default::default(),
max_tokens: Default::default(),
Expand Down Expand Up @@ -128,6 +136,14 @@ impl Agent {
agent.temperature = Some(temperature);
}

if let Some(reasoning_effort) = workflow.reasoning_effort {
agent.reasoning_effort = Some(reasoning_effort);
}

if let Some(service_tier) = workflow.service_tier {
agent.service_tier = Some(service_tier);
}

if let Some(top_p) = workflow.top_p {
agent.top_p = Some(top_p);
}
Expand Down Expand Up @@ -199,6 +215,8 @@ impl Agent {
system_prompt: def.system_prompt,
user_prompt: def.user_prompt,
temperature: def.temperature,
reasoning_effort: def.reasoning_effort,
service_tier: def.service_tier,
max_tokens: def.max_tokens,
top_p: def.top_p,
top_k: def.top_k,
Expand Down
35 changes: 34 additions & 1 deletion crates/forge_domain/src/agent_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ use strum_macros::Display as StrumDisplay;
use crate::compact::Compact;
use crate::temperature::Temperature;
use crate::template::Template;
use crate::{EventContext, MaxTokens, ModelId, ProviderId, SystemContext, ToolName, TopK, TopP};
use crate::{
EventContext, MaxTokens, ModelId, ProviderId, ReasoningEffortLevel, ServiceTier, SystemContext, ToolName,
TopK, TopP,
};

// Unique identifier for an agent
#[derive(Debug, Display, Eq, PartialEq, Hash, Clone, Serialize, Deserialize, JsonSchema)]
Expand Down Expand Up @@ -130,6 +133,34 @@ pub struct AgentDefinition {
#[merge(strategy = crate::merge::option)]
pub temperature: Option<Temperature>,

/// Reasoning effort level for this agent
///
/// Controls the reasoning effort for models that support variable thinking
/// levels (e.g., GPT-5.x). Sent as `reasoning_effort` in the API request.
/// - `none` — no reasoning, raw completion
/// - `minimal` — very light reasoning
/// - `low` — light reasoning, fast responses
/// - `medium` — balanced reasoning effort (default for most models)
/// - `high` — thorough reasoning
/// - `xhigh` — maximum reasoning, most thorough responses
/// - If not specified, the model provider's default will be used
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
#[merge(strategy = crate::merge::option)]
pub reasoning_effort: Option<ReasoningEffortLevel>,

/// Service tier for API requests
///
/// Controls the processing priority for models that support service tiers:
/// - `fast` — priority processing at 2x cost (fastest inference)
/// - `flex` — flexible processing at reduced cost
/// - `auto` — let the API choose the appropriate tier
/// - If not specified, the model provider's default will be used
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
#[merge(strategy = crate::merge::option)]
pub service_tier: Option<ServiceTier>,

/// Top-p (nucleus sampling) used for agent
///
/// Controls the diversity of the model's output by considering only the
Expand Down Expand Up @@ -264,6 +295,8 @@ impl AgentDefinition {
compact: Default::default(),
custom_rules: Default::default(),
temperature: Default::default(),
reasoning_effort: Default::default(),
service_tier: Default::default(),
top_p: Default::default(),
top_k: Default::default(),
max_tokens: Default::default(),
Expand Down
4 changes: 4 additions & 0 deletions crates/forge_domain/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,10 @@ pub struct Context {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub temperature: Option<Temperature>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<crate::ReasoningEffortLevel>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub service_tier: Option<crate::ServiceTier>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub top_p: Option<TopP>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub top_k: Option<TopK>,
Expand Down
4 changes: 4 additions & 0 deletions crates/forge_domain/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ mod point;
mod policies;
mod provider;
mod reasoning;
mod reasoning_effort;
mod service_tier;
mod repo;
mod result_stream_ext;
mod retry_config;
Expand Down Expand Up @@ -92,6 +94,8 @@ pub use point::*;
pub use policies::*;
pub use provider::*;
pub use reasoning::*;
pub use reasoning_effort::*;
pub use service_tier::*;
pub use repo::*;
pub use result_stream_ext::*;
pub use retry_config::*;
Expand Down
147 changes: 147 additions & 0 deletions crates/forge_domain/src/reasoning_effort.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
use std::fmt;

use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

/// Controls the reasoning effort level for models that support variable thinking.
///
/// This parameter is sent directly as `reasoning_effort` in the OpenAI-compatible
/// API request body. Models like GPT-5.x and Codex use this to control how much
/// computation is spent on reasoning:
/// - `none` — no reasoning, raw completion
/// - `minimal` — very light reasoning
/// - `low` — light reasoning, fast responses
/// - `medium` — balanced reasoning effort (default for most models)
/// - `high` — thorough reasoning
/// - `xhigh` — maximum reasoning, most thorough responses
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "lowercase")]
pub enum ReasoningEffortLevel {
None,
Minimal,
Low,
Medium,
High,
Xhigh,
}

impl ReasoningEffortLevel {
/// Returns the string representation used in API requests
pub fn as_str(&self) -> &'static str {
match self {
ReasoningEffortLevel::None => "none",
ReasoningEffortLevel::Minimal => "minimal",
ReasoningEffortLevel::Low => "low",
ReasoningEffortLevel::Medium => "medium",
ReasoningEffortLevel::High => "high",
ReasoningEffortLevel::Xhigh => "xhigh",
}
}
}

impl fmt::Display for ReasoningEffortLevel {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}

#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use serde_json::json;

use super::*;

#[test]
fn test_reasoning_effort_level_serialization() {
assert_eq!(
serde_json::to_value(ReasoningEffortLevel::None).unwrap(),
json!("none")
);
assert_eq!(
serde_json::to_value(ReasoningEffortLevel::Minimal).unwrap(),
json!("minimal")
);
assert_eq!(
serde_json::to_value(ReasoningEffortLevel::Low).unwrap(),
json!("low")
);
assert_eq!(
serde_json::to_value(ReasoningEffortLevel::Medium).unwrap(),
json!("medium")
);
assert_eq!(
serde_json::to_value(ReasoningEffortLevel::High).unwrap(),
json!("high")
);
assert_eq!(
serde_json::to_value(ReasoningEffortLevel::Xhigh).unwrap(),
json!("xhigh")
);
}

#[test]
fn test_reasoning_effort_level_deserialization() {
let none: ReasoningEffortLevel = serde_json::from_value(json!("none")).unwrap();
assert_eq!(none, ReasoningEffortLevel::None);

let minimal: ReasoningEffortLevel = serde_json::from_value(json!("minimal")).unwrap();
assert_eq!(minimal, ReasoningEffortLevel::Minimal);

let low: ReasoningEffortLevel = serde_json::from_value(json!("low")).unwrap();
assert_eq!(low, ReasoningEffortLevel::Low);

let medium: ReasoningEffortLevel = serde_json::from_value(json!("medium")).unwrap();
assert_eq!(medium, ReasoningEffortLevel::Medium);

let high: ReasoningEffortLevel = serde_json::from_value(json!("high")).unwrap();
assert_eq!(high, ReasoningEffortLevel::High);

let xhigh: ReasoningEffortLevel = serde_json::from_value(json!("xhigh")).unwrap();
assert_eq!(xhigh, ReasoningEffortLevel::Xhigh);
}

#[test]
fn test_reasoning_effort_level_invalid_deserialization() {
let result: Result<ReasoningEffortLevel, _> = serde_json::from_value(json!("invalid"));
assert!(result.is_err());
}

#[test]
fn test_reasoning_effort_level_display() {
assert_eq!(ReasoningEffortLevel::None.to_string(), "none");
assert_eq!(ReasoningEffortLevel::Minimal.to_string(), "minimal");
assert_eq!(ReasoningEffortLevel::Low.to_string(), "low");
assert_eq!(ReasoningEffortLevel::Medium.to_string(), "medium");
assert_eq!(ReasoningEffortLevel::High.to_string(), "high");
assert_eq!(ReasoningEffortLevel::Xhigh.to_string(), "xhigh");
}

#[test]
fn test_reasoning_effort_level_as_str() {
assert_eq!(ReasoningEffortLevel::None.as_str(), "none");
assert_eq!(ReasoningEffortLevel::Minimal.as_str(), "minimal");
assert_eq!(ReasoningEffortLevel::Low.as_str(), "low");
assert_eq!(ReasoningEffortLevel::Medium.as_str(), "medium");
assert_eq!(ReasoningEffortLevel::High.as_str(), "high");
assert_eq!(ReasoningEffortLevel::Xhigh.as_str(), "xhigh");
}

#[test]
fn test_reasoning_effort_level_in_struct() {
#[derive(Serialize, Deserialize, Debug)]
struct TestStruct {
reasoning_effort: ReasoningEffortLevel,
}

let json = json!({
"reasoning_effort": "medium"
});
let test_struct: Result<TestStruct, _> = serde_json::from_value(json);
assert!(test_struct.is_ok());
assert_eq!(
test_struct.unwrap().reasoning_effort,
ReasoningEffortLevel::Medium
);
}
}
Loading
Loading