-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdocker-compose.cli.yml
More file actions
61 lines (60 loc) · 2.34 KB
/
docker-compose.cli.yml
File metadata and controls
61 lines (60 loc) · 2.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
services:
taskbench-cli:
build:
context: .
dockerfile: Dockerfile
env_file:
- .env
environment:
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
- GENERAL_TASK_LLM=${GENERAL_TASK_LLM:-anthropic/claude-sonnet-4.5}
- TASKBENCH_MAX_CONCURRENCY=${TASKBENCH_MAX_CONCURRENCY:-5}
- TASKBENCH_USE_GENERATION_LOOKUP=${TASKBENCH_USE_GENERATION_LOOKUP:-true}
- TASKBENCH_MAX_TOKENS=${TASKBENCH_MAX_TOKENS:-4000}
- TASKBENCH_TEMPERATURE=${TASKBENCH_TEMPERATURE:-0.7}
- TASKBENCH_CHUNK_CHARS=${TASKBENCH_CHUNK_CHARS:-20000}
- TASKBENCH_CHUNK_OVERLAP=${TASKBENCH_CHUNK_OVERLAP:-500}
- TASKBENCH_TIMEOUT=${TASKBENCH_TIMEOUT:-120.0}
- TASKBENCH_RATE_LIMIT=${TASKBENCH_RATE_LIMIT:-60}
- TASKBENCH_RETRY_MAX_DELAY=${TASKBENCH_RETRY_MAX_DELAY:-60.0}
- TASKBENCH_JUDGE_MAX_TOKENS=${TASKBENCH_JUDGE_MAX_TOKENS:-2000}
- TASKBENCH_INPUT_PREVIEW_LEN=${TASKBENCH_INPUT_PREVIEW_LEN:-5000}
volumes:
- ./tasks:/app/tasks
- ./config:/app/config
- ./results:/app/results
- ./tests/fixtures:/app/tests/fixtures
- ./sample-usecases:/app/sample-usecases
- ./usecases:/app/usecases
- ./.cache:/app/.cache
working_dir: /app
entrypoint: ["taskbench"]
command: ["--help"]
# Usage Examples:
#
# Build the CLI image:
# docker compose -f docker-compose.cli.yml build
#
# List available use cases:
# docker compose -f docker-compose.cli.yml run --rm taskbench-cli list-usecases
#
# Run a folder-based use case (recommended):
# docker compose -f docker-compose.cli.yml run --rm taskbench-cli run \
# sample-usecases/00-lecture-concept-extraction \
# --models anthropic/claude-sonnet-4,openai/gpt-4o \
# --skip-judge
#
# Run with full judge evaluation:
# docker compose -f docker-compose.cli.yml run --rm taskbench-cli run \
# sample-usecases/00-lecture-concept-extraction \
# --models anthropic/claude-sonnet-4
#
# Legacy YAML-based evaluation:
# docker compose -f docker-compose.cli.yml run --rm taskbench-cli evaluate \
# tasks/lecture_analysis.yaml \
# --models anthropic/claude-sonnet-4,openai/gpt-4o \
# --input-file tests/fixtures/sample_transcript.txt
#
# Environment Variables:
# Required: OPENROUTER_API_KEY (set in .env file)
# See CLAUDE.md for all configurable environment variables