@@ -28,6 +28,18 @@ def pytest_addoption(parser) -> None:
2828 "Pass an integer (e.g., 2, 50) or 'all' for no limit."
2929 ),
3030 )
31+ group .addoption (
32+ "--ep-num-runs" ,
33+ action = "store" ,
34+ default = None ,
35+ help = ("Override the number of runs for evaluation_test. Pass an integer (e.g., 1, 5, 10)." ),
36+ )
37+ group .addoption (
38+ "--ep-max-concurrent-rollouts" ,
39+ action = "store" ,
40+ default = None ,
41+ help = ("Override the maximum number of concurrent rollouts. Pass an integer (e.g., 8, 50, 100)." ),
42+ )
3143 group .addoption (
3244 "--ep-print-summary" ,
3345 action = "store_true" ,
@@ -56,14 +68,13 @@ def pytest_addoption(parser) -> None:
5668 default = None ,
5769 help = (
5870 "Set reasoning.effort for providers that support it (e.g., Fireworks) via LiteLLM extra_body. "
59- "Values: low|medium|high"
71+ "Values: low|medium|high|none "
6072 ),
6173 )
6274 group .addoption (
6375 "--ep-max-retry" ,
6476 action = "store" ,
65- type = int ,
66- default = 0 ,
77+ default = None ,
6778 help = ("Failed rollouts (with rollout_status.code indicating error) will be retried up to this many times." ),
6879 )
6980 group .addoption (
@@ -92,6 +103,20 @@ def _normalize_max_rows(val: Optional[str]) -> Optional[str]:
92103 return None
93104
94105
106+ def _normalize_number (val : Optional [str ]) -> Optional [str ]:
107+ if val is None :
108+ return None
109+ s = val .strip ()
110+ # Validate int; if invalid, ignore and return None (no override)
111+ try :
112+ num = int (s )
113+ if num <= 0 :
114+ return None # num_runs must be positive
115+ return str (num )
116+ except ValueError :
117+ return None
118+
119+
95120def pytest_configure (config ) -> None :
96121 # Quiet LiteLLM INFO spam early in pytest session unless user set a level
97122 try :
@@ -110,6 +135,16 @@ def pytest_configure(config) -> None:
110135 if norm is not None :
111136 os .environ ["EP_MAX_DATASET_ROWS" ] = norm
112137
138+ num_runs_val = config .getoption ("--ep-num-runs" )
139+ norm_runs = _normalize_number (num_runs_val )
140+ if norm_runs is not None :
141+ os .environ ["EP_NUM_RUNS" ] = norm_runs
142+
143+ max_concurrent_val = config .getoption ("--ep-max-concurrent-rollouts" )
144+ norm_concurrent = _normalize_number (max_concurrent_val )
145+ if norm_concurrent is not None :
146+ os .environ ["EP_MAX_CONCURRENT_ROLLOUTS" ] = norm_concurrent
147+
113148 if config .getoption ("--ep-print-summary" ):
114149 os .environ ["EP_PRINT_SUMMARY" ] = "1"
115150
@@ -118,10 +153,13 @@ def pytest_configure(config) -> None:
118153 os .environ ["EP_SUMMARY_JSON" ] = summary_json_path
119154
120155 max_retry = config .getoption ("--ep-max-retry" )
121- os .environ ["EP_MAX_RETRY" ] = str (max_retry )
156+ norm_max_retry = _normalize_number (max_retry )
157+ if norm_max_retry is not None :
158+ os .environ ["EP_MAX_RETRY" ] = norm_max_retry
122159
123160 fail_on_max_retry = config .getoption ("--ep-fail-on-max-retry" )
124- os .environ ["EP_FAIL_ON_MAX_RETRY" ] = fail_on_max_retry
161+ if fail_on_max_retry is not None :
162+ os .environ ["EP_FAIL_ON_MAX_RETRY" ] = fail_on_max_retry
125163
126164 # Allow ad-hoc overrides of input params via CLI flags
127165 try :
@@ -153,7 +191,8 @@ def pytest_configure(config) -> None:
153191 if reasoning_effort :
154192 # Always place under extra_body to avoid LiteLLM rejecting top-level params
155193 eb = merged .setdefault ("extra_body" , {})
156- eb ["reasoning_effort" ] = str (reasoning_effort )
194+ # Convert "none" string to None value for API compatibility
195+ eb ["reasoning_effort" ] = None if reasoning_effort .lower () == "none" else str (reasoning_effort )
157196 if merged :
158197 os .environ ["EP_INPUT_PARAMS_JSON" ] = _json .dumps (merged )
159198 except Exception :
0 commit comments