Skip to content

Commit 3dbcd59

Browse files
author
Dylan Huang
committed
test
1 parent 4b71ddb commit 3dbcd59

6 files changed

Lines changed: 30 additions & 186 deletions

File tree

eval_protocol/cli.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,6 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
8080
"--env-file",
8181
help="Path to .env file containing secrets to upload (default: .env in current directory)",
8282
)
83-
upload_parser.add_argument(
84-
"--force",
85-
action="store_true",
86-
help="Overwrite existing evaluator with the same ID",
87-
)
8883

8984
# Auto-generate flags from SDK Fireworks().evaluators.create() signature
9085
create_evaluator_fn = create_fireworks_client().evaluators.create
@@ -136,7 +131,6 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
136131

137132
rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode")
138133
rft_parser.add_argument("--dry-run", action="store_true", help="Print planned SDK call without sending")
139-
rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID")
140134
rft_parser.add_argument("--skip-validation", action="store_true", help="Skip local dataset/evaluator validation")
141135
rft_parser.add_argument(
142136
"--ignore-docker",

eval_protocol/cli_commands/create_rft.py

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -567,37 +567,35 @@ def _upload_and_ensure_evaluator(
567567
evaluator_resource_name: str,
568568
api_key: str,
569569
api_base: str,
570-
force: bool,
571570
) -> bool:
572571
"""Ensure the evaluator exists and is ACTIVE, uploading it if needed."""
573-
# Optional short-circuit: if evaluator already exists and not forcing, skip upload path
574-
if not force:
575-
try:
576-
headers = {
577-
"Authorization": f"Bearer {api_key}",
578-
"Content-Type": "application/json",
579-
"User-Agent": get_user_agent(),
580-
}
581-
resp = requests.get(f"{api_base}/v1/{evaluator_resource_name}", headers=headers, timeout=10)
582-
if resp.ok:
583-
state = resp.json().get("state", "STATE_UNSPECIFIED")
584-
print(f"✓ Evaluator exists (state: {state}). Skipping upload (use --force to overwrite).")
585-
# Poll for ACTIVE before proceeding
586-
print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
587-
if not _poll_evaluator_status(
588-
evaluator_resource_name=evaluator_resource_name,
589-
api_key=api_key,
590-
api_base=api_base,
591-
timeout_minutes=10,
592-
):
593-
dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
594-
print("\n❌ Evaluator is not ready within the timeout period.")
595-
print(f"📊 Please check the evaluator status at: {dashboard_url}")
596-
print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
597-
return False
598-
return True
599-
except requests.exceptions.RequestException:
600-
pass
572+
# Check if evaluator already exists
573+
try:
574+
headers = {
575+
"Authorization": f"Bearer {api_key}",
576+
"Content-Type": "application/json",
577+
"User-Agent": get_user_agent(),
578+
}
579+
resp = requests.get(f"{api_base}/v1/{evaluator_resource_name}", headers=headers, timeout=10)
580+
if resp.ok:
581+
state = resp.json().get("state", "STATE_UNSPECIFIED")
582+
print(f"✓ Evaluator exists (state: {state}). Skipping upload.")
583+
# Poll for ACTIVE before proceeding
584+
print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
585+
if not _poll_evaluator_status(
586+
evaluator_resource_name=evaluator_resource_name,
587+
api_key=api_key,
588+
api_base=api_base,
589+
timeout_minutes=10,
590+
):
591+
dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
592+
print("\n❌ Evaluator is not ready within the timeout period.")
593+
print(f"📊 Please check the evaluator status at: {dashboard_url}")
594+
print(" Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
595+
return False
596+
return True
597+
except requests.exceptions.RequestException:
598+
pass
601599

602600
# Ensure evaluator exists by invoking the upload flow programmatically
603601
try:
@@ -622,14 +620,10 @@ def _upload_and_ensure_evaluator(
622620
id=evaluator_id,
623621
display_name=None,
624622
description=None,
625-
force=force, # Pass through the --force flag
626623
yes=True,
627-
env_file=None, # Add the new env_file parameter
624+
env_file=None,
628625
)
629626

630-
if force:
631-
print(f"🔄 Force flag enabled - will overwrite existing evaluator '{evaluator_id}'")
632-
633627
rc = upload_command(upload_args)
634628
if rc == 0:
635629
print(f"✓ Uploaded/ensured evaluator: {evaluator_id}")
@@ -738,7 +732,6 @@ def create_rft_command(args) -> int:
738732
evaluator_arg: Optional[str] = getattr(args, "evaluator", None)
739733
non_interactive: bool = bool(getattr(args, "yes", False))
740734
dry_run: bool = bool(getattr(args, "dry_run", False))
741-
force: bool = bool(getattr(args, "force", False))
742735
skip_validation: bool = bool(getattr(args, "skip_validation", False))
743736
ignore_docker: bool = bool(getattr(args, "ignore_docker", False))
744737
docker_build_extra: str = getattr(args, "docker_build_extra", "") or ""
@@ -816,7 +809,6 @@ def create_rft_command(args) -> int:
816809
evaluator_resource_name=evaluator_resource_name,
817810
api_key=api_key,
818811
api_base=api_base,
819-
force=force,
820812
):
821813
return 1
822814

eval_protocol/cli_commands/upload.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,6 @@ def upload_command(args: argparse.Namespace) -> int:
289289
base_id = getattr(args, "id", None)
290290
display_name = getattr(args, "display_name", None)
291291
description = getattr(args, "description", None)
292-
force = bool(getattr(args, "force", False))
293292
env_file = getattr(args, "env_file", None)
294293

295294
# Load secrets from .env file and ensure they're available on Fireworks
@@ -382,7 +381,6 @@ def upload_command(args: argparse.Namespace) -> int:
382381
evaluator_id=evaluator_id,
383382
display_name=display_name or evaluator_id,
384383
description=description or f"Evaluator for {qualname}",
385-
force=force,
386384
entry_point=entry_point,
387385
)
388386
name = result.get("name", evaluator_id) if isinstance(result, dict) else evaluator_id

eval_protocol/evaluation.py

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def _create_tar_gz_with_ignores(output_path: str, source_dir: str) -> int:
155155
logger.info(f"Created {output_path} ({size_bytes:,} bytes)")
156156
return size_bytes
157157

158-
def create(self, evaluator_id, display_name=None, description=None, force=False):
158+
def create(self, evaluator_id, display_name=None, description=None):
159159
auth_token = self.api_key or get_fireworks_api_key()
160160
account_id = self.account_id or get_fireworks_account_id()
161161
if not account_id and auth_token:
@@ -203,22 +203,6 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
203203
logger.info(f"Creating evaluator '{evaluator_id}' for account '{account_id}'...")
204204

205205
try:
206-
if force:
207-
try:
208-
logger.info("Checking if evaluator exists")
209-
existing_evaluator = client.evaluators.get(evaluator_id=evaluator_id)
210-
if existing_evaluator:
211-
logger.info(f"Evaluator '{evaluator_id}' already exists, deleting and recreating...")
212-
try:
213-
client.evaluators.delete(evaluator_id=evaluator_id)
214-
logger.info(f"Successfully deleted evaluator '{evaluator_id}'")
215-
except fireworks.NotFoundError:
216-
logger.info(f"Evaluator '{evaluator_id}' not found, creating...")
217-
except fireworks.APIError as e:
218-
logger.warning(f"Error deleting evaluator: {str(e)}")
219-
except fireworks.NotFoundError:
220-
logger.info(f"Evaluator '{evaluator_id}' does not exist, creating...")
221-
222206
# Create evaluator using SDK
223207
result = client.evaluators.create(
224208
evaluator_id=evaluator_id,
@@ -387,7 +371,6 @@ def create_evaluation(
387371
evaluator_id: str,
388372
display_name: Optional[str] = None,
389373
description: Optional[str] = None,
390-
force: bool = False,
391374
account_id: Optional[str] = None,
392375
api_key: Optional[str] = None,
393376
entry_point: Optional[str] = None,
@@ -399,7 +382,6 @@ def create_evaluation(
399382
evaluator_id: Unique identifier for the evaluator
400383
display_name: Display name for the evaluator
401384
description: Description for the evaluator
402-
force: If True, delete and recreate if evaluator exists
403385
account_id: Optional Fireworks account ID
404386
api_key: Optional Fireworks API key
405387
entry_point: Optional entry point (module::function or path::function)
@@ -410,4 +392,4 @@ def create_evaluation(
410392
entry_point=entry_point,
411393
)
412394

413-
return evaluator.create(evaluator_id, display_name, description, force)
395+
return evaluator.create(evaluator_id, display_name, description)

tests/test_cli_create_rft.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,6 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
239239
evaluator=None,
240240
yes=True,
241241
dry_run=True,
242-
force=False,
243242
env_file=None,
244243
dataset=None,
245244
dataset_jsonl=str(ds_path),
@@ -299,7 +298,6 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
299298
evaluator=None,
300299
yes=True,
301300
dry_run=True,
302-
force=False,
303301
env_file=None,
304302
dataset=None,
305303
dataset_jsonl=str(ds_path),
@@ -351,7 +349,6 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
351349
evaluator="my-evaluator",
352350
yes=True,
353351
dry_run=True,
354-
force=False,
355352
env_file=None,
356353
dataset=None,
357354
dataset_jsonl=str(ds_path),
@@ -401,7 +398,6 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
401398
evaluator="my-evaluator",
402399
yes=True,
403400
dry_run=True,
404-
force=False,
405401
env_file=None,
406402
dataset=None,
407403
dataset_jsonl=str(ds_path),
@@ -462,7 +458,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
462458
setattr(args, "evaluator", None)
463459
setattr(args, "yes", True)
464460
setattr(args, "dry_run", False)
465-
setattr(args, "force", False)
466461
setattr(args, "env_file", None)
467462
setattr(args, "dataset", None)
468463
setattr(args, "dataset_jsonl", str(ds_path))
@@ -530,7 +525,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
530525
evaluator=cr._normalize_evaluator_id("foo_eval-test_bar_evaluation"),
531526
yes=True,
532527
dry_run=False,
533-
force=False,
534528
env_file=None,
535529
dataset=None,
536530
dataset_jsonl=str(ds_path),
@@ -600,7 +594,6 @@ def test_create_rft_interactive_selector_single_test(rft_test_harness, monkeypat
600594
evaluator=None,
601595
yes=True,
602596
dry_run=False,
603-
force=False,
604597
env_file=None,
605598
dataset=None,
606599
dataset_jsonl=str(ds_path),
@@ -674,7 +667,6 @@ def raise_for_status(self):
674667
evaluator="some-eval",
675668
yes=True,
676669
dry_run=False,
677-
force=False,
678670
env_file=None,
679671
dataset=None,
680672
dataset_jsonl=str(ds_path),
@@ -727,7 +719,6 @@ def _raise(*a, **k):
727719
evaluator="some-eval",
728720
yes=True,
729721
dry_run=False,
730-
force=False,
731722
env_file=None,
732723
dataset=None,
733724
dataset_jsonl=str(project / "dataset.jsonl"),
@@ -789,7 +780,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
789780
evaluator=None,
790781
yes=True,
791782
dry_run=False,
792-
force=False,
793783
env_file=None,
794784
dataset=None,
795785
dataset_jsonl=None,
@@ -850,7 +840,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
850840
evaluator=None,
851841
yes=True,
852842
dry_run=False,
853-
force=False,
854843
env_file=None,
855844
dataset=None,
856845
dataset_jsonl=None,
@@ -912,7 +901,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
912901
evaluator=None,
913902
yes=True,
914903
dry_run=False,
915-
force=False,
916904
env_file=None,
917905
dataset=None,
918906
dataset_jsonl=None,
@@ -1007,7 +995,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
1007995
evaluator=eval_id,
1008996
yes=True,
1009997
dry_run=False,
1010-
force=False,
1011998
env_file=None,
1012999
dataset=None,
10131000
dataset_jsonl=None,
@@ -1175,7 +1162,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
11751162
evaluator=None,
11761163
yes=True,
11771164
dry_run=False,
1178-
force=False,
11791165
env_file=None,
11801166
dataset=None,
11811167
dataset_jsonl=str(explicit_jsonl),
@@ -1266,7 +1252,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
12661252
evaluator=None,
12671253
yes=True,
12681254
dry_run=False,
1269-
force=False,
12701255
env_file=None,
12711256
dataset=None,
12721257
dataset_jsonl=None,

0 commit comments

Comments
 (0)