diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index f4570fd2c..01455b9db 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -3250,7 +3250,7 @@ gptoss-fp4-h100-vllm: - { tp: 8, conc-start: 4, conc-end: 16 } minimaxm2.5-fp8-h100-vllm: - image: vllm/vllm-openai:v0.16.0 + image: vllm/vllm-openai:v0.18.0 model: MiniMaxAI/MiniMax-M2.5 model-prefix: minimaxm2.5 runner: h100 @@ -3532,7 +3532,7 @@ gptoss-fp4-h200-vllm: - { tp: 8, conc-start: 4, conc-end: 32 } minimaxm2.5-fp8-h200-vllm: - image: vllm/vllm-openai:v0.16.0 + image: vllm/vllm-openai:v0.18.0 model: MiniMaxAI/MiniMax-M2.5 model-prefix: minimaxm2.5 runner: h200 diff --git a/benchmarks/single_node/minimaxm2.5_fp8_h100.sh b/benchmarks/single_node/minimaxm2.5_fp8_h100.sh index 90f5bd772..dc0540fee 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_h100.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_h100.sh @@ -42,7 +42,6 @@ $EP \ --gpu-memory-utilization 0.90 \ --max-model-len $MAX_MODEL_LEN \ --max-num-seqs 256 \ ---disable-log-requests \ --trust-remote-code \ --compilation-config '{"cudagraph_mode":"PIECEWISE"}' > $SERVER_LOG 2>&1 & diff --git a/benchmarks/single_node/minimaxm2.5_fp8_h200.sh b/benchmarks/single_node/minimaxm2.5_fp8_h200.sh index 4b613d88e..f152ff98d 100755 --- a/benchmarks/single_node/minimaxm2.5_fp8_h200.sh +++ b/benchmarks/single_node/minimaxm2.5_fp8_h200.sh @@ -37,7 +37,6 @@ vllm serve $MODEL --port $PORT \ $EP \ --gpu-memory-utilization 0.95 \ --max-model-len $MAX_MODEL_LEN \ ---disable-log-requests \ --trust-remote-code > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/perf-changelog.yaml b/perf-changelog.yaml index b85245458..3eb1cf228 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1,3 +1,10 @@ +- config-keys: + - minimaxm2.5-fp8-h100-vllm + - minimaxm2.5-fp8-h200-vllm + description: + - "Update vLLM image from v0.16.0 to v0.18.0 for minimax h100 and h200 configs" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/XXX + - config-keys: - dsr1-fp8-b200-dynamo-trt - dsr1-fp8-h200-dynamo-trt