Skip to content

Commit 043f5b1

Browse files
committed
Several minor tweak to vllm benchmark workflow
Signed-off-by: Huy Do <huydhn@gmail.com>
1 parent 0ab78c7 commit 043f5b1

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

.github/scripts/generate_vllm_benchmark_matrix.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
"linux.rocm.gpu.gfx942.1",
2020
"linux.24xl.spr-metal",
2121
"linux.24xl.gnr",
22-
"linux.arm64.m7g.4xlarge",
22+
# "linux.arm64.m7g.4xlarge", # TODO (huydhn): This is not working yet
2323
"linux.dgx.b200",
2424
"linux.hpu.gaudi3.8",
2525
],
@@ -60,7 +60,7 @@
6060
"linux.rocm.gpu.gfx942.8": "rocm",
6161
"linux.24xl.spr-metal": "cpu",
6262
"linux.24xl.gnr": "cpu",
63-
"linux.arm64.m7g.4xlarge": "arm64-cpu",
63+
# "linux.arm64.m7g.4xlarge": "arm64-cpu", # TODO (huydhn): This is not working yet
6464
"linux.hpu.gaudi3.8": "hpu",
6565
}
6666

@@ -254,7 +254,10 @@ def generate_benchmark_matrix(
254254
# Gather all possible benchmarks
255255
for platform in sorted(platforms):
256256
selected_models = []
257-
for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*.json"):
257+
# Only need to parse serving config because all models need it and it
258+
# always has the tensor_parallel_size field that is used to find the
259+
# right runner
260+
for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*serving*.json"):
258261
with open(file) as f:
259262
try:
260263
configs = json.load(f)

.github/workflows/vllm-benchmark.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ name: vLLM Benchmark
22

33
on:
44
schedule:
5-
# Run every 12 hours
6-
- cron: '0 */12 * * *'
5+
# Run daily at 1:15 AM PST
6+
- cron: '15 9 * * *'
77
workflow_dispatch:
88
inputs:
99
vllm_branch:

0 commit comments

Comments
 (0)