pytorch · huydhn · Dec 13, 2025 · Dec 13, 2025 · Dec 13, 2025
diff --git a/.github/scripts/generate_vllm_benchmark_matrix.py b/.github/scripts/generate_vllm_benchmark_matrix.py
@@ -19,7 +19,7 @@
         "linux.rocm.gpu.gfx942.1",
         "linux.24xl.spr-metal",
         "linux.24xl.gnr",
-        "linux.arm64.m7g.4xlarge",
+        # "linux.arm64.m7g.4xlarge",  # TODO (huydhn): This is not working yet
         "linux.dgx.b200",
         "linux.hpu.gaudi3.8",
     ],
@@ -60,7 +60,7 @@
     "linux.rocm.gpu.gfx942.8": "rocm",
     "linux.24xl.spr-metal": "cpu",
     "linux.24xl.gnr": "cpu",
-    "linux.arm64.m7g.4xlarge": "arm64-cpu",
+    # "linux.arm64.m7g.4xlarge": "arm64-cpu",  # TODO (huydhn): This is not working yet
     "linux.hpu.gaudi3.8": "hpu",
 }
 
@@ -254,7 +254,10 @@ def generate_benchmark_matrix(
     # Gather all possible benchmarks
     for platform in sorted(platforms):
         selected_models = []
-        for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*.json"):
+        # Only need to parse serving config because it has all the models and
+        # their tensor_parallel_size field. The latter is used to find the runner
+        # with the right capacity
+        for file in glob.glob(f"{benchmark_configs_dir}/{platform}/*serving*.json"):
             with open(file) as f:
                 try:
                     configs = json.load(f)

diff --git a/.github/workflows/vllm-benchmark.yml b/.github/workflows/vllm-benchmark.yml
@@ -2,8 +2,8 @@ name: vLLM Benchmark
 
 on:
   schedule:
-    # Run every 12 hours
-    - cron: '0 */12 * * *'
+    # Run daily at 1:15 AM PST
+    - cron: '15 9 * * *'
   workflow_dispatch:
     inputs:
       vllm_branch: