From c4dcf9fca5e2d3ef26e8f3fd821dcc0ec801a27e Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Sat, 13 Dec 2025 00:27:26 -0800
Subject: [PATCH 1/2] [no ci] Several minor tweak to vllm benchmark workflow

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/scripts/generate_vllm_benchmark_matrix.py | 9 ++++++---
 .github/workflows/vllm-benchmark.yml              | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
index 2720ffe..288f83d 100755
--- a/.github/scripts/generate_vllm_benchmark_matrix.py
+++ b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -19,7 +19,7 @@
         "linux.rocm.gpu.gfx942.1",
         "linux.24xl.spr-metal",
         "linux.24xl.gnr",
-        "linux.arm64.m7g.4xlarge",
+        # "linux.arm64.m7g.4xlarge",  # TODO (huydhn): This is not working yet
         "linux.dgx.b200",
         "linux.hpu.gaudi3.8",
     ],
@@ -60,7 +60,7 @@
     "linux.rocm.gpu.gfx942.8": "rocm",
     "linux.24xl.spr-metal": "cpu",
     "linux.24xl.gnr": "cpu",
-    "linux.arm64.m7g.4xlarge": "arm64-cpu",
+    # "linux.arm64.m7g.4xlarge": "arm64-cpu",  # TODO (huydhn): This is not working yet
     "linux.hpu.gaudi3.8": "hpu",
 }
 
@@ -254,7 +254,10 @@ def generate_benchmark_matrix(
     # Gather all possible benchmarks
     for platform in sorted(platforms):
         selected_models = []
-        for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*.json"):
+        # Only need to parse serving config because all models need it and it
+        # always has the tensor_parallel_size field that is used to find the
+        # right runner
+        for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*serving*.json"):
             with open(file) as f:
                 try:
                     configs = json.load(f)
diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
index 126424a..718d42a 100644
--- a/.github/workflows/vllm-benchmark.yml
+++ b/.github/workflows/vllm-benchmark.yml
@@ -2,8 +2,8 @@ name: vLLM Benchmark
 
 on:
   schedule:
-    # Run every 12 hours
-    - cron: '0 */12 * * *'
+    # Run daily at 1:15 AM PST
+    - cron: '15 9 * * *'
   workflow_dispatch:
     inputs:
       vllm_branch:

From 7ba6367a37efd86b90272a4d37779644181c4e00 Mon Sep 17 00:00:00 2001
From: Huy Do <huydhn@gmail.com>
Date: Sat, 13 Dec 2025 00:27:26 -0800
Subject: [PATCH 2/2] [no ci] Several minor tweak to vllm benchmark workflow

Signed-off-by: Huy Do <huydhn@gmail.com>
---
 .github/scripts/generate_vllm_benchmark_matrix.py | 9 ++++++---
 .github/workflows/vllm-benchmark.yml              | 4 ++--
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
index 2720ffe..008ee83 100755
--- a/.github/scripts/generate_vllm_benchmark_matrix.py
+++ b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -19,7 +19,7 @@
         "linux.rocm.gpu.gfx942.1",
         "linux.24xl.spr-metal",
         "linux.24xl.gnr",
-        "linux.arm64.m7g.4xlarge",
+        # "linux.arm64.m7g.4xlarge",  # TODO (huydhn): This is not working yet
         "linux.dgx.b200",
         "linux.hpu.gaudi3.8",
     ],
@@ -60,7 +60,7 @@
     "linux.rocm.gpu.gfx942.8": "rocm",
     "linux.24xl.spr-metal": "cpu",
     "linux.24xl.gnr": "cpu",
-    "linux.arm64.m7g.4xlarge": "arm64-cpu",
+    # "linux.arm64.m7g.4xlarge": "arm64-cpu",  # TODO (huydhn): This is not working yet
     "linux.hpu.gaudi3.8": "hpu",
 }
 
@@ -254,7 +254,10 @@ def generate_benchmark_matrix(
     # Gather all possible benchmarks
     for platform in sorted(platforms):
         selected_models = []
-        for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*.json"):
+        # Only need to parse serving config because it has all the models and
+        # their tensor_parallel_size field. The latter is used to find the runner
+        # with the right capacity
+        for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*serving*.json"):
             with open(file) as f:
                 try:
                     configs = json.load(f)
diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
index 126424a..718d42a 100644
--- a/.github/workflows/vllm-benchmark.yml
+++ b/.github/workflows/vllm-benchmark.yml
@@ -2,8 +2,8 @@ name: vLLM Benchmark
 
 on:
   schedule:
-    # Run every 12 hours
-    - cron: '0 */12 * * *'
+    # Run daily at 1:15 AM PST
+    - cron: '15 9 * * *'
   workflow_dispatch:
     inputs:
       vllm_branch: