From c4dcf9fca5e2d3ef26e8f3fd821dcc0ec801a27e Mon Sep 17 00:00:00 2001 From: Huy Do Date: Sat, 13 Dec 2025 00:27:26 -0800 Subject: [PATCH 1/2] [no ci] Several minor tweak to vllm benchmark workflow Signed-off-by: Huy Do --- .github/scripts/generate_vllm_benchmark_matrix.py | 9 ++++++--- .github/workflows/vllm-benchmark.yml | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py index 2720ffe..288f83d 100755 --- a/.github/scripts/generate_vllm_benchmark_matrix.py +++ b/.github/scripts/generate_vllm_benchmark_matrix.py @@ -19,7 +19,7 @@ "linux.rocm.gpu.gfx942.1", "linux.24xl.spr-metal", "linux.24xl.gnr", - "linux.arm64.m7g.4xlarge", + # "linux.arm64.m7g.4xlarge", # TODO (huydhn): This is not working yet "linux.dgx.b200", "linux.hpu.gaudi3.8", ], @@ -60,7 +60,7 @@ "linux.rocm.gpu.gfx942.8": "rocm", "linux.24xl.spr-metal": "cpu", "linux.24xl.gnr": "cpu", - "linux.arm64.m7g.4xlarge": "arm64-cpu", + # "linux.arm64.m7g.4xlarge": "arm64-cpu", # TODO (huydhn): This is not working yet "linux.hpu.gaudi3.8": "hpu", } @@ -254,7 +254,10 @@ def generate_benchmark_matrix( # Gather all possible benchmarks for platform in sorted(platforms): selected_models = [] - for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*.json"): + # Only need to parse serving config because all models need it and it + # always has the tensor_parallel_size field that is used to find the + # right runner + for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*serving*.json"): with open(file) as f: try: configs = json.load(f) diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml index 126424a..718d42a 100644 --- a/.github/workflows/vllm-benchmark.yml +++ b/.github/workflows/vllm-benchmark.yml @@ -2,8 +2,8 @@ name: vLLM Benchmark on: schedule: - # Run every 12 hours - - cron: '0 */12 * * *' + # Run daily at 1:15 AM PST + - cron: '15 9 * * *' workflow_dispatch: inputs: vllm_branch: From 7ba6367a37efd86b90272a4d37779644181c4e00 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Sat, 13 Dec 2025 00:27:26 -0800 Subject: [PATCH 2/2] [no ci] Several minor tweak to vllm benchmark workflow Signed-off-by: Huy Do --- .github/scripts/generate_vllm_benchmark_matrix.py | 9 ++++++--- .github/workflows/vllm-benchmark.yml | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py index 2720ffe..008ee83 100755 --- a/.github/scripts/generate_vllm_benchmark_matrix.py +++ b/.github/scripts/generate_vllm_benchmark_matrix.py @@ -19,7 +19,7 @@ "linux.rocm.gpu.gfx942.1", "linux.24xl.spr-metal", "linux.24xl.gnr", - "linux.arm64.m7g.4xlarge", + # "linux.arm64.m7g.4xlarge", # TODO (huydhn): This is not working yet "linux.dgx.b200", "linux.hpu.gaudi3.8", ], @@ -60,7 +60,7 @@ "linux.rocm.gpu.gfx942.8": "rocm", "linux.24xl.spr-metal": "cpu", "linux.24xl.gnr": "cpu", - "linux.arm64.m7g.4xlarge": "arm64-cpu", + # "linux.arm64.m7g.4xlarge": "arm64-cpu", # TODO (huydhn): This is not working yet "linux.hpu.gaudi3.8": "hpu", } @@ -254,7 +254,10 @@ def generate_benchmark_matrix( # Gather all possible benchmarks for platform in sorted(platforms): selected_models = [] - for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*.json"): + # Only need to parse serving config because it has all the models and + # their tensor_parallel_size field. The latter is used to find the runner + # with the right capacity + for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*serving*.json"): with open(file) as f: try: configs = json.load(f) diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml index 126424a..718d42a 100644 --- a/.github/workflows/vllm-benchmark.yml +++ b/.github/workflows/vllm-benchmark.yml @@ -2,8 +2,8 @@ name: vLLM Benchmark on: schedule: - # Run every 12 hours - - cron: '0 */12 * * *' + # Run daily at 1:15 AM PST + - cron: '15 9 * * *' workflow_dispatch: inputs: vllm_branch: