test

Dylan Huang · Dylan Huang · commit 3dbcd5980210 · 2026-01-08T15:20:04.000-08:00
diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py
@@ -80,11 +80,6 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
         "--env-file",
         help="Path to .env file containing secrets to upload (default: .env in current directory)",
     )
-    upload_parser.add_argument(
-        "--force",
-        action="store_true",
-        help="Overwrite existing evaluator with the same ID",
-    )
 
     # Auto-generate flags from SDK Fireworks().evaluators.create() signature
     create_evaluator_fn = create_fireworks_client().evaluators.create
@@ -136,7 +131,6 @@ def _configure_parser(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
 
     rft_parser.add_argument("--yes", "-y", action="store_true", help="Non-interactive mode")
     rft_parser.add_argument("--dry-run", action="store_true", help="Print planned SDK call without sending")
-    rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID")
     rft_parser.add_argument("--skip-validation", action="store_true", help="Skip local dataset/evaluator validation")
     rft_parser.add_argument(
         "--ignore-docker",
diff --git a/eval_protocol/cli_commands/create_rft.py b/eval_protocol/cli_commands/create_rft.py
@@ -567,37 +567,35 @@ def _upload_and_ensure_evaluator(
     evaluator_resource_name: str,
     api_key: str,
     api_base: str,
-    force: bool,
 ) -> bool:
     """Ensure the evaluator exists and is ACTIVE, uploading it if needed."""
-    # Optional short-circuit: if evaluator already exists and not forcing, skip upload path
-    if not force:
-        try:
-            headers = {
-                "Authorization": f"Bearer {api_key}",
-                "Content-Type": "application/json",
-                "User-Agent": get_user_agent(),
-            }
-            resp = requests.get(f"{api_base}/v1/{evaluator_resource_name}", headers=headers, timeout=10)
-            if resp.ok:
-                state = resp.json().get("state", "STATE_UNSPECIFIED")
-                print(f"✓ Evaluator exists (state: {state}). Skipping upload (use --force to overwrite).")
-                # Poll for ACTIVE before proceeding
-                print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
-                if not _poll_evaluator_status(
-                    evaluator_resource_name=evaluator_resource_name,
-                    api_key=api_key,
-                    api_base=api_base,
-                    timeout_minutes=10,
-                ):
-                    dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
-                    print("\n❌ Evaluator is not ready within the timeout period.")
-                    print(f"📊 Please check the evaluator status at: {dashboard_url}")
-                    print("   Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
-                    return False
-                return True
-        except requests.exceptions.RequestException:
-            pass
+    # Check if evaluator already exists
+    try:
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+            "User-Agent": get_user_agent(),
+        }
+        resp = requests.get(f"{api_base}/v1/{evaluator_resource_name}", headers=headers, timeout=10)
+        if resp.ok:
+            state = resp.json().get("state", "STATE_UNSPECIFIED")
+            print(f"✓ Evaluator exists (state: {state}). Skipping upload.")
+            # Poll for ACTIVE before proceeding
+            print(f"Waiting for evaluator '{evaluator_id}' to become ACTIVE...")
+            if not _poll_evaluator_status(
+                evaluator_resource_name=evaluator_resource_name,
+                api_key=api_key,
+                api_base=api_base,
+                timeout_minutes=10,
+            ):
+                dashboard_url = _build_evaluator_dashboard_url(evaluator_id)
+                print("\n❌ Evaluator is not ready within the timeout period.")
+                print(f"📊 Please check the evaluator status at: {dashboard_url}")
+                print("   Wait for it to become ACTIVE, then run 'eval-protocol create rft' again.")
+                return False
+            return True
+    except requests.exceptions.RequestException:
+        pass
 
     # Ensure evaluator exists by invoking the upload flow programmatically
     try:
@@ -622,14 +620,10 @@ def _upload_and_ensure_evaluator(
             id=evaluator_id,
             display_name=None,
             description=None,
-            force=force,  # Pass through the --force flag
             yes=True,
-            env_file=None,  # Add the new env_file parameter
+            env_file=None,
         )
 
-        if force:
-            print(f"🔄 Force flag enabled - will overwrite existing evaluator '{evaluator_id}'")
-
         rc = upload_command(upload_args)
         if rc == 0:
             print(f"✓ Uploaded/ensured evaluator: {evaluator_id}")
@@ -738,7 +732,6 @@ def create_rft_command(args) -> int:
     evaluator_arg: Optional[str] = getattr(args, "evaluator", None)
     non_interactive: bool = bool(getattr(args, "yes", False))
     dry_run: bool = bool(getattr(args, "dry_run", False))
-    force: bool = bool(getattr(args, "force", False))
     skip_validation: bool = bool(getattr(args, "skip_validation", False))
     ignore_docker: bool = bool(getattr(args, "ignore_docker", False))
     docker_build_extra: str = getattr(args, "docker_build_extra", "") or ""
@@ -816,7 +809,6 @@ def create_rft_command(args) -> int:
         evaluator_resource_name=evaluator_resource_name,
         api_key=api_key,
         api_base=api_base,
-        force=force,
     ):
         return 1
 
diff --git a/eval_protocol/cli_commands/upload.py b/eval_protocol/cli_commands/upload.py
@@ -289,7 +289,6 @@ def upload_command(args: argparse.Namespace) -> int:
     base_id = getattr(args, "id", None)
     display_name = getattr(args, "display_name", None)
     description = getattr(args, "description", None)
-    force = bool(getattr(args, "force", False))
     env_file = getattr(args, "env_file", None)
 
     # Load secrets from .env file and ensure they're available on Fireworks
@@ -382,7 +381,6 @@ def upload_command(args: argparse.Namespace) -> int:
                 evaluator_id=evaluator_id,
                 display_name=display_name or evaluator_id,
                 description=description or f"Evaluator for {qualname}",
-                force=force,
                 entry_point=entry_point,
             )
             name = result.get("name", evaluator_id) if isinstance(result, dict) else evaluator_id
diff --git a/eval_protocol/evaluation.py b/eval_protocol/evaluation.py
@@ -155,7 +155,7 @@ def _create_tar_gz_with_ignores(output_path: str, source_dir: str) -> int:
         logger.info(f"Created {output_path} ({size_bytes:,} bytes)")
         return size_bytes
 
-    def create(self, evaluator_id, display_name=None, description=None, force=False):
+    def create(self, evaluator_id, display_name=None, description=None):
         auth_token = self.api_key or get_fireworks_api_key()
         account_id = self.account_id or get_fireworks_account_id()
         if not account_id and auth_token:
@@ -203,22 +203,6 @@ def create(self, evaluator_id, display_name=None, description=None, force=False)
         logger.info(f"Creating evaluator '{evaluator_id}' for account '{account_id}'...")
 
         try:
-            if force:
-                try:
-                    logger.info("Checking if evaluator exists")
-                    existing_evaluator = client.evaluators.get(evaluator_id=evaluator_id)
-                    if existing_evaluator:
-                        logger.info(f"Evaluator '{evaluator_id}' already exists, deleting and recreating...")
-                        try:
-                            client.evaluators.delete(evaluator_id=evaluator_id)
-                            logger.info(f"Successfully deleted evaluator '{evaluator_id}'")
-                        except fireworks.NotFoundError:
-                            logger.info(f"Evaluator '{evaluator_id}' not found, creating...")
-                        except fireworks.APIError as e:
-                            logger.warning(f"Error deleting evaluator: {str(e)}")
-                except fireworks.NotFoundError:
-                    logger.info(f"Evaluator '{evaluator_id}' does not exist, creating...")
-
             # Create evaluator using SDK
             result = client.evaluators.create(
                 evaluator_id=evaluator_id,
@@ -387,7 +371,6 @@ def create_evaluation(
     evaluator_id: str,
     display_name: Optional[str] = None,
     description: Optional[str] = None,
-    force: bool = False,
     account_id: Optional[str] = None,
     api_key: Optional[str] = None,
     entry_point: Optional[str] = None,
@@ -399,7 +382,6 @@ def create_evaluation(
         evaluator_id: Unique identifier for the evaluator
         display_name: Display name for the evaluator
         description: Description for the evaluator
-        force: If True, delete and recreate if evaluator exists
         account_id: Optional Fireworks account ID
         api_key: Optional Fireworks API key
         entry_point: Optional entry point (module::function or path::function)
@@ -410,4 +392,4 @@ def create_evaluation(
         entry_point=entry_point,
     )
 
-    return evaluator.create(evaluator_id, display_name, description, force)
+    return evaluator.create(evaluator_id, display_name, description)
diff --git a/tests/test_cli_create_rft.py b/tests/test_cli_create_rft.py
@@ -239,7 +239,6 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
         evaluator=None,
         yes=True,
         dry_run=True,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=str(ds_path),
@@ -299,7 +298,6 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
         evaluator=None,
         yes=True,
         dry_run=True,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=str(ds_path),
@@ -351,7 +349,6 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
         evaluator="my-evaluator",
         yes=True,
         dry_run=True,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=str(ds_path),
@@ -401,7 +398,6 @@ def _fake_run_evaluator_test(project_root, pytest_target, ignore_docker, docker_
         evaluator="my-evaluator",
         yes=True,
         dry_run=True,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=str(ds_path),
@@ -462,7 +458,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
     setattr(args, "evaluator", None)
     setattr(args, "yes", True)
     setattr(args, "dry_run", False)
-    setattr(args, "force", False)
     setattr(args, "env_file", None)
     setattr(args, "dataset", None)
     setattr(args, "dataset_jsonl", str(ds_path))
@@ -530,7 +525,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
         evaluator=cr._normalize_evaluator_id("foo_eval-test_bar_evaluation"),
         yes=True,
         dry_run=False,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=str(ds_path),
@@ -600,7 +594,6 @@ def test_create_rft_interactive_selector_single_test(rft_test_harness, monkeypat
         evaluator=None,
         yes=True,
         dry_run=False,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=str(ds_path),
@@ -674,7 +667,6 @@ def raise_for_status(self):
         evaluator="some-eval",
         yes=True,
         dry_run=False,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=str(ds_path),
@@ -727,7 +719,6 @@ def _raise(*a, **k):
         evaluator="some-eval",
         yes=True,
         dry_run=False,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=str(project / "dataset.jsonl"),
@@ -789,7 +780,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
         evaluator=None,
         yes=True,
         dry_run=False,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=None,
@@ -850,7 +840,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
         evaluator=None,
         yes=True,
         dry_run=False,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=None,
@@ -912,7 +901,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
         evaluator=None,
         yes=True,
         dry_run=False,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=None,
@@ -1007,7 +995,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
         evaluator=eval_id,
         yes=True,
         dry_run=False,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=None,
@@ -1175,7 +1162,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
         evaluator=None,
         yes=True,
         dry_run=False,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=str(explicit_jsonl),
@@ -1266,7 +1252,6 @@ def _fake_create_dataset_from_jsonl(account_id, api_key, api_base, dataset_id, d
         evaluator=None,
         yes=True,
         dry_run=False,
-        force=False,
         env_file=None,
         dataset=None,
         dataset_jsonl=None,
diff --git a/tests/test_ep_upload_e2e.py b/tests/test_ep_upload_e2e.py