Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/single_node/kimik2.5_fp4_b200.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ vllm serve $MODEL --host 0.0.0.0 --port $PORT \
--reasoning-parser kimi_k2 \
--tool-call-parser kimi_k2 \
--compilation_config.pass_config.fuse_allreduce_rms true \
--no-enable-prefix-caching \
--trust-remote-code > $SERVER_LOG 2>&1 &

SERVER_PID=$!
Expand Down
1 change: 1 addition & 0 deletions benchmarks/single_node/kimik2.5_fp4_mi355x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ vllm serve $MODEL --port $PORT \
--block-size=64 \
--disable-log-requests \
--trust-remote-code \
--no-enable-prefix-caching \
--mm-encoder-tp-mode data > $SERVER_LOG 2>&1 &

SERVER_PID=$!
Expand Down
1 change: 1 addition & 0 deletions benchmarks/single_node/kimik2.5_int4_h200.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ vllm serve $MODEL --host 0.0.0.0 --port $PORT \
--tool-call-parser kimi_k2 \
--compilation_config.pass_config.fuse_allreduce_rms true \
--trust-remote-code \
--no-enable-prefix-caching \
--disable-log-requests > $SERVER_LOG 2>&1 &

SERVER_PID=$!
Expand Down
1 change: 1 addition & 0 deletions benchmarks/single_node/kimik2.5_int4_mi325x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ vllm serve $MODEL --port $PORT \
--block-size=64 \
--disable-log-requests \
--trust-remote-code \
--no-enable-prefix-caching \
--mm-encoder-tp-mode data > $SERVER_LOG 2>&1 &

SERVER_PID=$!
Expand Down
1 change: 1 addition & 0 deletions benchmarks/single_node/kimik2.5_int4_mi355x.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ vllm serve $MODEL --port $PORT \
--block-size=64 \
--disable-log-requests \
--trust-remote-code \
--no-enable-prefix-caching \
--mm-encoder-tp-mode data > $SERVER_LOG 2>&1 &

SERVER_PID=$!
Expand Down
10 changes: 10 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1055,3 +1055,13 @@
- "Enable VLLM_USE_FLASHINFER_MOE_INT4=1 for Kimi K2.5 INT4 B200 benchmark"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/935

- config-keys:
- kimik2.5-int4-mi325x-vllm
- kimik2.5-int4-mi355x-vllm
- kimik2.5-int4-h200-vllm
- kimik2.5-fp4-mi355x-vllm
- kimik2.5-fp4-b200-vllm
description:
- "Disable prefix caching (--no-enable-prefix-caching) for all Kimi K2.5 benchmarks using random datasets"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/926

2 changes: 1 addition & 1 deletion runners/launch_h200-nb.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/bash

export HF_HUB_CACHE_MOUNT="/home/gharunner/gharunners/hf-hub-cache/"
export HF_HUB_CACHE_MOUNT="/mnt/data/gharunners/hf-hub-cache/"
export PORT=8888

MODEL_CODE="${EXP_NAME%%_*}"
Expand Down
Loading