diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index f4570fd2c..d990b3ae1 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -3197,7 +3197,7 @@ gptoss-fp4-b200-vllm: - { tp: 8, conc-start: 4, conc-end: 4 } minimaxm2.5-fp8-b200-vllm: - image: vllm/vllm-openai:v0.17.0-cu130 + image: vllm/vllm-openai:v0.18.0-cu130 model: MiniMaxAI/MiniMax-M2.5 model-prefix: minimaxm2.5 runner: b200 @@ -3208,18 +3208,21 @@ minimaxm2.5-fp8-b200-vllm: - isl: 1024 osl: 1024 search-space: - - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 2, conc-start: 4, conc-end: 256 } + - { tp: 4, conc-start: 4, conc-end: 256 } + - { tp: 4, ep: 4, conc-start: 32, conc-end: 256 } - isl: 1024 osl: 8192 search-space: - - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 2, conc-start: 4, conc-end: 256 } + - { tp: 4, conc-start: 4, conc-end: 256 } + - { tp: 4, ep: 4, conc-start: 32, conc-end: 256 } - isl: 8192 osl: 1024 search-space: - - { tp: 2, conc-start: 4, conc-end: 64 } - - { tp: 4, conc-start: 4, conc-end: 64 } + - { tp: 2, conc-start: 4, conc-end: 256 } + - { tp: 4, conc-start: 4, conc-end: 256 } + - { tp: 4, ep: 4, conc-start: 32, conc-end: 256 } gptoss-fp4-h100-vllm: image: vllm/vllm-openai:v0.15.1 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 03fb6e082..afce65264 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1068,3 +1068,10 @@ - "dsr1-fp8-h200-sglang: v0.5.9-cu129-amd64 → v0.5.9-cu130" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/943 +- config-keys: + - minimaxm2.5-fp8-b200-vllm + description: + - "Update vLLM image from v0.17.0 to v0.18.0 for MiniMax-M2.5 FP8 B200" + - "Add tp4 ep4 search-space entries (conc 32-256) for all seq-len configs" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/947 +