From d0190d08e4ab201eb65b9e900a2024eec6f02509 Mon Sep 17 00:00:00 2001 From: Qiuyang Mang Date: Tue, 2 Jun 2026 16:58:05 -0400 Subject: [PATCH] Configure ANN query concurrency --- 2.0/problems/vector_db_ann/config.yaml | 6 ++ 2.0/problems/vector_db_ann/evaluator.py | 60 ++++++++++++++++--- .../vector_db_ann/harbor/app/README.md | 3 +- 2.0/problems/vector_db_ann/readme | 6 +- 4 files changed, 63 insertions(+), 12 deletions(-) diff --git a/2.0/problems/vector_db_ann/config.yaml b/2.0/problems/vector_db_ann/config.yaml index 56f99c37..24dc2aac 100644 --- a/2.0/problems/vector_db_ann/config.yaml +++ b/2.0/problems/vector_db_ann/config.yaml @@ -26,6 +26,12 @@ environment: memory_mb: 16384 storage_mb: 8192 build_timeout_seconds: 3600 +evaluation: + # The judge drives the search service with this many concurrent workers. + # Keep this aligned with the CPU budget unless the task is intentionally + # changed into a higher-concurrency service benchmark. + query_concurrency: 8 + queries_per_worker: 64 submission: kind: directory path: /app diff --git a/2.0/problems/vector_db_ann/evaluator.py b/2.0/problems/vector_db_ann/evaluator.py index edf3ea40..687c7eb1 100644 --- a/2.0/problems/vector_db_ann/evaluator.py +++ b/2.0/problems/vector_db_ann/evaluator.py @@ -19,19 +19,61 @@ import numpy as np + +def _read_evaluation_config() -> dict[str, int]: + config_path = Path(__file__).with_name("config.yaml") + if not config_path.exists(): + return {} + + values: dict[str, int] = {} + in_evaluation = False + for raw_line in config_path.read_text(encoding="utf-8").splitlines(): + line = raw_line.split("#", 1)[0].rstrip() + if not line: + continue + if not raw_line.startswith((" ", "\t")): + in_evaluation = line == "evaluation:" + continue + if not in_evaluation: + continue + stripped = line.strip() + if ":" not in stripped: + continue + key, value = stripped.split(":", 1) + key = key.strip() + value = value.strip() + if key in {"query_concurrency", "queries_per_worker"} and value: + values[key] = int(value) + return values + + +def _config_int(name: str, default: int) -> int: + return int(os.environ.get(name, str(default))) + + +_EVALUATION_CONFIG = _read_evaluation_config() +CONFIG_CONCURRENCY = int(_EVALUATION_CONFIG.get("query_concurrency", 8)) +CONFIG_QUERIES_PER_WORKER = int(_EVALUATION_CONFIG.get("queries_per_worker", 64)) + + DIM = 128 -N_BASE = int(os.environ.get("FRONTIER_VECTOR_DB_N", "1000000")) -N_QUERIES = int(os.environ.get("FRONTIER_VECTOR_DB_Q", "256")) -TOP_K = int(os.environ.get("FRONTIER_VECTOR_DB_TOP_K", "10")) -SEED = int(os.environ.get("FRONTIER_VECTOR_DB_SEED", "20260528")) +N_BASE = _config_int("FRONTIER_VECTOR_DB_N", 1000000) +CONCURRENCY = _config_int("FRONTIER_VECTOR_DB_CONCURRENCY", CONFIG_CONCURRENCY) +QUERIES_PER_WORKER = _config_int( + "FRONTIER_VECTOR_DB_QUERIES_PER_WORKER", CONFIG_QUERIES_PER_WORKER +) +N_QUERIES = _config_int( + "FRONTIER_VECTOR_DB_Q", CONCURRENCY * QUERIES_PER_WORKER +) +TOP_K = _config_int("FRONTIER_VECTOR_DB_TOP_K", 10) +SEED = _config_int("FRONTIER_VECTOR_DB_SEED", 20260528) TARGET_RECALL = float(os.environ.get("FRONTIER_VECTOR_DB_TARGET_RECALL", "0.95")) QUERY_NOISE = float(os.environ.get("FRONTIER_VECTOR_DB_QUERY_NOISE", "0.02")) -BUILD_TIMEOUT_SECONDS = int(os.environ.get("FRONTIER_VECTOR_DB_BUILD_TIMEOUT", "600")) -LOAD_TIMEOUT_SECONDS = int(os.environ.get("FRONTIER_VECTOR_DB_LOAD_TIMEOUT", "900")) +BUILD_TIMEOUT_SECONDS = _config_int("FRONTIER_VECTOR_DB_BUILD_TIMEOUT", 600) +LOAD_TIMEOUT_SECONDS = _config_int("FRONTIER_VECTOR_DB_LOAD_TIMEOUT", 900) LOAD_PENALTY_WEIGHT = float(os.environ.get("FRONTIER_VECTOR_DB_LOAD_PENALTY", "0.01")) -BATCH_SIZE = int(os.environ.get("FRONTIER_VECTOR_DB_BATCH_SIZE", "1000")) -CONCURRENCY = int(os.environ.get("FRONTIER_VECTOR_DB_CONCURRENCY", "4")) -WARMUP = int(os.environ.get("FRONTIER_VECTOR_DB_WARMUP", "32")) +BATCH_SIZE = _config_int("FRONTIER_VECTOR_DB_BATCH_SIZE", 1000) +WARMUP = _config_int("FRONTIER_VECTOR_DB_WARMUP", 32) CACHE_DIR = Path(os.environ.get("FRONTIER_VECTOR_DB_CACHE", "/tmp/frontier_vector_db_ann")) _BENCHMARK: "Benchmark | None" = None diff --git a/2.0/problems/vector_db_ann/harbor/app/README.md b/2.0/problems/vector_db_ann/harbor/app/README.md index 06c74801..eb0aa6b5 100644 --- a/2.0/problems/vector_db_ann/harbor/app/README.md +++ b/2.0/problems/vector_db_ann/harbor/app/README.md @@ -25,7 +25,8 @@ The Harbor task provides the following resource budget: ```text vCPUs: 8 memory: 16 GiB -query concurrency: 4 +query concurrency: 8 +timed queries per worker: 64 ``` ## Attribution diff --git a/2.0/problems/vector_db_ann/readme b/2.0/problems/vector_db_ann/readme index 4fb77cc0..40c3dff4 100644 --- a/2.0/problems/vector_db_ann/readme +++ b/2.0/problems/vector_db_ann/readme @@ -42,7 +42,8 @@ parallel search and indexing strategy for this budget: ```text vCPUs: 8 memory: 16 GiB -query concurrency: 4 +query concurrency: 8 +timed queries per worker: 64 ``` The service must listen on `PORT` and implement these endpoints: @@ -118,7 +119,8 @@ trial while still letting different machines measure their own local baseline. Each submission is then timed independently. The load phase includes all `/bulk_insert` calls and any index construction performed by the service before -queries begin. The query phase measures only `/search` throughput: +queries begin. The query phase uses 8 concurrent workers, each issuing 64 +queries, and measures only `/search` throughput: ```text candidate_qps