Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions 2.0/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,11 @@ The demo variant uses the same interface and scoring rule with only `N = 10`
points. Its problem ID is `erdos_demo`. It is intended as a quick visual sanity
check for Harborized agent workflows before running the larger
`erdos_unit_distance` task.

## Vector DB ANN

This systems problem asks agents to build a Rust approximate nearest-neighbor
vector search service for a hidden SIFT1M-scale benchmark. Its problem ID is
`vector_db_ann`. Submissions are whole `/app` projects served through Harbor,
and the objective is to maximize effective QPS subject to `recall@10 >= 0.95`;
the score includes query throughput plus a small load/index-build penalty.
32 changes: 32 additions & 0 deletions 2.0/problems/vector_db_ann/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
tag: systems
runtime:
language: rust
timeout_seconds: 10800
environment: "Rust project; hidden ANN benchmark; Python/NumPy judge"
apt_packages:
- build-essential
- cargo
- git
- rustc
judge_apt_packages:
- build-essential
- cargo
- rustc
- python3-pip
- python3-numpy
judge_pip_packages:
- faiss-cpu
docker:
image: ubuntu:24.04
environment:
cpus: 4
memory_mb: 8192
storage_mb: 8192
build_timeout_seconds: 3600
submission:
kind: directory
path: /app
exclude:
- target
- .git
- .frontier-cs
38 changes: 38 additions & 0 deletions 2.0/problems/vector_db_ann/evaluate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

if [[ $# -gt 0 ]]; then
exec python3 "$SCRIPT_DIR/evaluator.py" "$@"
fi

SOLUTION="/work/execution_env/solution_env/solution.rs"
if [[ ! -f "$SOLUTION" ]]; then
echo "Error: Missing $SOLUTION" >&2
exit 1
fi

if ! command -v cargo >/dev/null 2>&1 || ! python3 -c 'import numpy, faiss' >/dev/null 2>&1; then
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get install -y -qq --no-install-recommends \
build-essential cargo rustc python3-pip python3-numpy >/dev/null
python3 -c 'import faiss' >/dev/null 2>&1 || \
pip3 install --break-system-packages -q faiss-cpu
fi

WORKDIR=$(mktemp -d)
trap 'rm -rf "$WORKDIR"' EXIT
cp -R "$SCRIPT_DIR/harbor/app/." "$WORKDIR/"
cp "$SOLUTION" "$WORKDIR/src/db.rs"

# The repository validator checks that the evaluator path works; the full
# 1M-vector benchmark is exercised through Harbor.
export FRONTIER_VECTOR_DB_N="${FRONTIER_VECTOR_DB_N:-5000}"
export FRONTIER_VECTOR_DB_Q="${FRONTIER_VECTOR_DB_Q:-16}"
export FRONTIER_VECTOR_DB_WARMUP="${FRONTIER_VECTOR_DB_WARMUP:-4}"
export FRONTIER_VECTOR_DB_BATCH_SIZE="${FRONTIER_VECTOR_DB_BATCH_SIZE:-500}"
export FRONTIER_VECTOR_DB_CACHE="${FRONTIER_VECTOR_DB_CACHE:-/tmp/frontier_vector_db_ann_ci}"

python3 "$SCRIPT_DIR/evaluator.py" "$WORKDIR"
Loading
Loading