Skip to content

Commit beaad7c

Browse files
committed
Re-enable Arm CPU vLLM HUD Benchmarks
- re-enables the benchmarks disabled in #114 - related to vllm-project/vllm#26494 (not sure which needs to go in first) - use default block_size in serving benchmarks (i.e. 128 instead of setting it to 16) Signed-off-by: Fadi Arafeh <fadi.arafeh@arm.com>
1 parent a069415 commit beaad7c

File tree

4 files changed

+15
-25
lines changed

4 files changed

+15
-25
lines changed

.github/scripts/generate_vllm_benchmark_matrix.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
"linux.rocm.gpu.gfx942.1",
2020
"linux.24xl.spr-metal",
2121
"linux.24xl.gnr",
22-
# "linux.arm64.m7g.4xlarge", # TODO (huydhn): This is not working yet
22+
"linux.arm64.m8g.4xlarge",
2323
"linux.dgx.b200",
2424
"linux.hpu.gaudi3.8",
2525
],
@@ -60,7 +60,7 @@
6060
"linux.rocm.gpu.gfx942.8": "rocm",
6161
"linux.24xl.spr-metal": "cpu",
6262
"linux.24xl.gnr": "cpu",
63-
# "linux.arm64.m7g.4xlarge": "arm64-cpu", # TODO (huydhn): This is not working yet
63+
"linux.arm64.m8g.4xlarge": "arm64-cpu",
6464
"linux.hpu.gaudi3.8": "hpu",
6565
}
6666

.github/scripts/test_generate_vllm_benchmark_matrix.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def test_generate_benchmark_matrix():
2222
{
2323
"include": [
2424
{
25-
"runner": "linux.arm64.m7g.4xlarge",
25+
"runner": "linux.arm64.m8g.4xlarge",
2626
"models": "meta-llama/meta-llama-3.1-8b-instruct"
2727
},
2828
{
@@ -209,7 +209,7 @@ def test_generate_benchmark_matrix():
209209
{
210210
"include": [
211211
{
212-
"runner": "linux.arm64.m7g.4xlarge",
212+
"runner": "linux.arm64.m8g.4xlarge",
213213
"models": "meta-llama/meta-llama-3.1-8b-instruct"
214214
},
215215
{
@@ -247,7 +247,7 @@ def test_generate_benchmark_matrix():
247247
{
248248
"include": [
249249
{
250-
"runner": "linux.arm64.m7g.4xlarge",
250+
"runner": "linux.arm64.m8g.4xlarge",
251251
"models": "meta-llama/meta-llama-3.1-8b-instruct"
252252
},
253253
{
@@ -286,7 +286,7 @@ def test_generate_benchmark_matrix():
286286
{
287287
"include": [
288288
{
289-
"runner": "linux.arm64.m7g.4xlarge",
289+
"runner": "linux.arm64.m8g.4xlarge",
290290
"models": "meta-llama/meta-llama-3.1-8b-instruct"
291291
},
292292
{
@@ -321,7 +321,7 @@ def test_generate_benchmark_matrix():
321321
{
322322
"include": [
323323
{
324-
"runner": "linux.arm64.m7g.4xlarge",
324+
"runner": "linux.arm64.m8g.4xlarge",
325325
"models": "meta-llama/meta-llama-3.1-8b-instruct"
326326
},
327327
{
@@ -409,7 +409,7 @@ def test_generate_benchmark_matrix():
409409

410410
# Select multiple runners
411411
models = []
412-
runners = ["h100", "spr", "m7g"]
412+
runners = ["h100", "spr", "m8g"]
413413
output = json.dumps(
414414
generate_benchmark_matrix(BENCHMARK_CONFIG_DIRS, models, runners), indent=2
415415
)
@@ -419,7 +419,7 @@ def test_generate_benchmark_matrix():
419419
{
420420
"include": [
421421
{
422-
"runner": "linux.arm64.m7g.4xlarge",
422+
"runner": "linux.arm64.m8g.4xlarge",
423423
"models": "meta-llama/meta-llama-3.1-8b-instruct"
424424
},
425425
{
@@ -624,7 +624,7 @@ def test_generate_benchmark_matrix():
624624
"meta-llama/meta-llama-3.1-8b-instruct",
625625
"mistralai/mixtral-8x7b-instruct-v0.1",
626626
]
627-
runners = ["rocm", "spr", "m7g"]
627+
runners = ["rocm", "spr", "m8g"]
628628
output = json.dumps(
629629
generate_benchmark_matrix(BENCHMARK_CONFIG_DIRS, models, runners), indent=2
630630
)
@@ -634,7 +634,7 @@ def test_generate_benchmark_matrix():
634634
{
635635
"include": [
636636
{
637-
"runner": "linux.arm64.m7g.4xlarge",
637+
"runner": "linux.arm64.m8g.4xlarge",
638638
"models": "meta-llama/meta-llama-3.1-8b-instruct"
639639
},
640640
{

.github/workflows/vllm-benchmark.yml

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,7 @@ on:
2525
A comma-separated list of runners from .github/scripts/generate_vllm_benchmark_matrix.py to run the benchmark (optional, default to run everything)
2626
required: true
2727
type: string
28-
# TODO (huydhn): Remove aarch64 CPU benchmark running on m7g until the change
29-
# from https://github.com/vllm-project/vllm/pull/26494#issuecomment-3537415441
30-
# is resolved and merged
31-
default: h100,rocm,spr,gnr,b200,gaudi3
28+
default: h100,rocm,spr,gnr,b200,m8g,gaudi3
3229
pull_request:
3330
paths:
3431
- .github/workflows/vllm-benchmark.yml
@@ -306,13 +303,11 @@ jobs:
306303
run: |
307304
set -eux
308305
309-
ON_ARM64_CPU=0
310306
ON_CPU=0
311307
312-
case "$DEVICE_NAME" in
313-
cpu) ON_CPU=1 ;;
314-
arm64-cpu) ON_ARM64_CPU=1 ;;
315-
esac
308+
if ["$DEVICE_NAME" == "cpu"] || ["$DEVICE_NAME" == "arm64-cpu"]; then
309+
ON_CPU=1
310+
fi
316311
317312
container_name=$(docker run \
318313
${GPU_FLAG:-} \
@@ -325,7 +320,6 @@ jobs:
325320
-e ENGINE_VERSION \
326321
-e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \
327322
-e ON_CPU="${ON_CPU}" \
328-
-e ON_ARM64_CPU="${ON_ARM64_CPU}" \
329323
--ipc=host \
330324
--tty \
331325
--detach \

vllm-benchmarks/benchmarks/arm64-cpu/serving-tests-arm64-cpu.json

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
"device": "cpu",
1515
"dtype": "bfloat16",
1616
"distributed_executor_backend": "mp",
17-
"block_size": 16,
1817
"trust_remote_code": "",
1918
"disable_log_stats": "",
2019
"disable_log_requests": "",
@@ -43,7 +42,6 @@
4342
"device": "cpu",
4443
"dtype": "bfloat16",
4544
"distributed_executor_backend": "mp",
46-
"block_size": 16,
4745
"trust_remote_code": "",
4846
"disable_log_stats": "",
4947
"disable_log_requests": "",
@@ -72,7 +70,6 @@
7270
"device": "cpu",
7371
"dtype": "bfloat16",
7472
"distributed_executor_backend": "mp",
75-
"block_size": 16,
7673
"trust_remote_code": "",
7774
"disable_log_stats": "",
7875
"disable_log_requests": "",
@@ -101,7 +98,6 @@
10198
"device": "cpu",
10299
"dtype": "bfloat16",
103100
"distributed_executor_backend": "mp",
104-
"block_size": 16,
105101
"trust_remote_code": "",
106102
"enable_chunked_prefill": "",
107103
"disable_log_stats": "",

0 commit comments

Comments
 (0)