From d0190d08e4ab201eb65b9e900a2024eec6f02509 Mon Sep 17 00:00:00 2001
From: Qiuyang Mang <joyemang33@gmail.com>
Date: Tue, 2 Jun 2026 16:58:05 -0400
Subject: [PATCH] Configure ANN query concurrency

---
 2.0/problems/vector_db_ann/config.yaml        |  6 ++
 2.0/problems/vector_db_ann/evaluator.py       | 60 ++++++++++++++++---
 .../vector_db_ann/harbor/app/README.md        |  3 +-
 2.0/problems/vector_db_ann/readme             |  6 +-
 4 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/2.0/problems/vector_db_ann/config.yaml b/2.0/problems/vector_db_ann/config.yaml
index 56f99c37..24dc2aac 100644
--- a/2.0/problems/vector_db_ann/config.yaml
+++ b/2.0/problems/vector_db_ann/config.yaml
@@ -26,6 +26,12 @@ environment:
   memory_mb: 16384
   storage_mb: 8192
   build_timeout_seconds: 3600
+evaluation:
+  # The judge drives the search service with this many concurrent workers.
+  # Keep this aligned with the CPU budget unless the task is intentionally
+  # changed into a higher-concurrency service benchmark.
+  query_concurrency: 8
+  queries_per_worker: 64
 submission:
   kind: directory
   path: /app
diff --git a/2.0/problems/vector_db_ann/evaluator.py b/2.0/problems/vector_db_ann/evaluator.py
index edf3ea40..687c7eb1 100644
--- a/2.0/problems/vector_db_ann/evaluator.py
+++ b/2.0/problems/vector_db_ann/evaluator.py
@@ -19,19 +19,61 @@
 
 import numpy as np
 
+
+def _read_evaluation_config() -> dict[str, int]:
+    config_path = Path(__file__).with_name("config.yaml")
+    if not config_path.exists():
+        return {}
+
+    values: dict[str, int] = {}
+    in_evaluation = False
+    for raw_line in config_path.read_text(encoding="utf-8").splitlines():
+        line = raw_line.split("#", 1)[0].rstrip()
+        if not line:
+            continue
+        if not raw_line.startswith((" ", "\t")):
+            in_evaluation = line == "evaluation:"
+            continue
+        if not in_evaluation:
+            continue
+        stripped = line.strip()
+        if ":" not in stripped:
+            continue
+        key, value = stripped.split(":", 1)
+        key = key.strip()
+        value = value.strip()
+        if key in {"query_concurrency", "queries_per_worker"} and value:
+            values[key] = int(value)
+    return values
+
+
+def _config_int(name: str, default: int) -> int:
+    return int(os.environ.get(name, str(default)))
+
+
+_EVALUATION_CONFIG = _read_evaluation_config()
+CONFIG_CONCURRENCY = int(_EVALUATION_CONFIG.get("query_concurrency", 8))
+CONFIG_QUERIES_PER_WORKER = int(_EVALUATION_CONFIG.get("queries_per_worker", 64))
+
+
 DIM = 128
-N_BASE = int(os.environ.get("FRONTIER_VECTOR_DB_N", "1000000"))
-N_QUERIES = int(os.environ.get("FRONTIER_VECTOR_DB_Q", "256"))
-TOP_K = int(os.environ.get("FRONTIER_VECTOR_DB_TOP_K", "10"))
-SEED = int(os.environ.get("FRONTIER_VECTOR_DB_SEED", "20260528"))
+N_BASE = _config_int("FRONTIER_VECTOR_DB_N", 1000000)
+CONCURRENCY = _config_int("FRONTIER_VECTOR_DB_CONCURRENCY", CONFIG_CONCURRENCY)
+QUERIES_PER_WORKER = _config_int(
+    "FRONTIER_VECTOR_DB_QUERIES_PER_WORKER", CONFIG_QUERIES_PER_WORKER
+)
+N_QUERIES = _config_int(
+    "FRONTIER_VECTOR_DB_Q", CONCURRENCY * QUERIES_PER_WORKER
+)
+TOP_K = _config_int("FRONTIER_VECTOR_DB_TOP_K", 10)
+SEED = _config_int("FRONTIER_VECTOR_DB_SEED", 20260528)
 TARGET_RECALL = float(os.environ.get("FRONTIER_VECTOR_DB_TARGET_RECALL", "0.95"))
 QUERY_NOISE = float(os.environ.get("FRONTIER_VECTOR_DB_QUERY_NOISE", "0.02"))
-BUILD_TIMEOUT_SECONDS = int(os.environ.get("FRONTIER_VECTOR_DB_BUILD_TIMEOUT", "600"))
-LOAD_TIMEOUT_SECONDS = int(os.environ.get("FRONTIER_VECTOR_DB_LOAD_TIMEOUT", "900"))
+BUILD_TIMEOUT_SECONDS = _config_int("FRONTIER_VECTOR_DB_BUILD_TIMEOUT", 600)
+LOAD_TIMEOUT_SECONDS = _config_int("FRONTIER_VECTOR_DB_LOAD_TIMEOUT", 900)
 LOAD_PENALTY_WEIGHT = float(os.environ.get("FRONTIER_VECTOR_DB_LOAD_PENALTY", "0.01"))
-BATCH_SIZE = int(os.environ.get("FRONTIER_VECTOR_DB_BATCH_SIZE", "1000"))
-CONCURRENCY = int(os.environ.get("FRONTIER_VECTOR_DB_CONCURRENCY", "4"))
-WARMUP = int(os.environ.get("FRONTIER_VECTOR_DB_WARMUP", "32"))
+BATCH_SIZE = _config_int("FRONTIER_VECTOR_DB_BATCH_SIZE", 1000)
+WARMUP = _config_int("FRONTIER_VECTOR_DB_WARMUP", 32)
 CACHE_DIR = Path(os.environ.get("FRONTIER_VECTOR_DB_CACHE", "/tmp/frontier_vector_db_ann"))
 
 _BENCHMARK: "Benchmark | None" = None
diff --git a/2.0/problems/vector_db_ann/harbor/app/README.md b/2.0/problems/vector_db_ann/harbor/app/README.md
index 06c74801..eb0aa6b5 100644
--- a/2.0/problems/vector_db_ann/harbor/app/README.md
+++ b/2.0/problems/vector_db_ann/harbor/app/README.md
@@ -25,7 +25,8 @@ The Harbor task provides the following resource budget:
 ```text
 vCPUs: 8
 memory: 16 GiB
-query concurrency: 4
+query concurrency: 8
+timed queries per worker: 64
 ```
 
 ## Attribution
diff --git a/2.0/problems/vector_db_ann/readme b/2.0/problems/vector_db_ann/readme
index 4fb77cc0..40c3dff4 100644
--- a/2.0/problems/vector_db_ann/readme
+++ b/2.0/problems/vector_db_ann/readme
@@ -42,7 +42,8 @@ parallel search and indexing strategy for this budget:
 ```text
 vCPUs: 8
 memory: 16 GiB
-query concurrency: 4
+query concurrency: 8
+timed queries per worker: 64
 ```
 
 The service must listen on `PORT` and implement these endpoints:
@@ -118,7 +119,8 @@ trial while still letting different machines measure their own local baseline.
 
 Each submission is then timed independently. The load phase includes all
 `/bulk_insert` calls and any index construction performed by the service before
-queries begin. The query phase measures only `/search` throughput:
+queries begin. The query phase uses 8 concurrent workers, each issuing 64
+queries, and measures only `/search` throughput:
 
 ```text
 candidate_qps