From d689db81726ad35754e8a33a639a56197c67ab90 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 22 Mar 2026 20:59:11 -0700 Subject: [PATCH 1/3] init --- benchmarks/single_node/kimik2.5_fp4_b200.sh | 1 + benchmarks/single_node/kimik2.5_fp4_mi355x.sh | 1 + benchmarks/single_node/kimik2.5_int4_b200.sh | 1 + benchmarks/single_node/kimik2.5_int4_h200.sh | 1 + benchmarks/single_node/kimik2.5_int4_mi325x.sh | 1 + benchmarks/single_node/kimik2.5_int4_mi355x.sh | 1 + perf-changelog.yaml | 11 +++++++++++ 7 files changed, 17 insertions(+) diff --git a/benchmarks/single_node/kimik2.5_fp4_b200.sh b/benchmarks/single_node/kimik2.5_fp4_b200.sh index 422a74950..d08e23bb2 100644 --- a/benchmarks/single_node/kimik2.5_fp4_b200.sh +++ b/benchmarks/single_node/kimik2.5_fp4_b200.sh @@ -38,6 +38,7 @@ vllm serve $MODEL --host 0.0.0.0 --port $PORT \ --reasoning-parser kimi_k2 \ --tool-call-parser kimi_k2 \ --compilation_config.pass_config.fuse_allreduce_rms true \ +--no-enable-prefix-caching \ --trust-remote-code > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/kimik2.5_fp4_mi355x.sh b/benchmarks/single_node/kimik2.5_fp4_mi355x.sh index bb522b396..8f4e86512 100755 --- a/benchmarks/single_node/kimik2.5_fp4_mi355x.sh +++ b/benchmarks/single_node/kimik2.5_fp4_mi355x.sh @@ -49,6 +49,7 @@ vllm serve $MODEL --port $PORT \ --block-size=64 \ --disable-log-requests \ --trust-remote-code \ +--no-enable-prefix-caching \ --mm-encoder-tp-mode data > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/kimik2.5_int4_b200.sh b/benchmarks/single_node/kimik2.5_int4_b200.sh index b5880a67f..be6219459 100755 --- a/benchmarks/single_node/kimik2.5_int4_b200.sh +++ b/benchmarks/single_node/kimik2.5_int4_b200.sh @@ -38,6 +38,7 @@ vllm serve $MODEL --host 0.0.0.0 --port $PORT \ --tool-call-parser kimi_k2 \ --compilation_config.pass_config.fuse_allreduce_rms true \ --trust-remote-code \ +--no-enable-prefix-caching \ --disable-log-requests > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/kimik2.5_int4_h200.sh b/benchmarks/single_node/kimik2.5_int4_h200.sh index 37281f61e..473a1bd73 100755 --- a/benchmarks/single_node/kimik2.5_int4_h200.sh +++ b/benchmarks/single_node/kimik2.5_int4_h200.sh @@ -40,6 +40,7 @@ vllm serve $MODEL --host 0.0.0.0 --port $PORT \ --tool-call-parser kimi_k2 \ --compilation_config.pass_config.fuse_allreduce_rms true \ --trust-remote-code \ +--no-enable-prefix-caching \ --disable-log-requests > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/kimik2.5_int4_mi325x.sh b/benchmarks/single_node/kimik2.5_int4_mi325x.sh index e6b7629ea..0baf27e72 100755 --- a/benchmarks/single_node/kimik2.5_int4_mi325x.sh +++ b/benchmarks/single_node/kimik2.5_int4_mi325x.sh @@ -40,6 +40,7 @@ vllm serve $MODEL --port $PORT \ --block-size=64 \ --disable-log-requests \ --trust-remote-code \ +--no-enable-prefix-caching \ --mm-encoder-tp-mode data > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/benchmarks/single_node/kimik2.5_int4_mi355x.sh b/benchmarks/single_node/kimik2.5_int4_mi355x.sh index e03c6abc2..510aeaf3f 100755 --- a/benchmarks/single_node/kimik2.5_int4_mi355x.sh +++ b/benchmarks/single_node/kimik2.5_int4_mi355x.sh @@ -37,6 +37,7 @@ vllm serve $MODEL --port $PORT \ --block-size=64 \ --disable-log-requests \ --trust-remote-code \ +--no-enable-prefix-caching \ --mm-encoder-tp-mode data > $SERVER_LOG 2>&1 & SERVER_PID=$! diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 99699236c..8e2f2335f 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -994,3 +994,14 @@ - "EAGLE speculative decoding: num-steps 3, draft-tokens 4, topk 1" - "New script: benchmarks/single_node/qwen3.5_fp8_b200_mtp.sh" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/898 + +- config-keys: + - kimik2.5-int4-mi325x-vllm + - kimik2.5-int4-mi355x-vllm + - kimik2.5-int4-b200-vllm + - kimik2.5-int4-h200-vllm + - kimik2.5-fp4-mi355x-vllm + - kimik2.5-fp4-b200-vllm + description: + - "Disable prefix caching (--no-enable-prefix-caching) for all Kimi K2.5 benchmarks using random datasets" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/926 From 097040eb54ecd5e3bd7bb4e45c00d81a566244a4 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Sun, 22 Mar 2026 21:04:22 -0700 Subject: [PATCH 2/3] fix cache dir --- runners/launch_h200-nb.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/launch_h200-nb.sh b/runners/launch_h200-nb.sh index 170a1bdc3..9d157a858 100644 --- a/runners/launch_h200-nb.sh +++ b/runners/launch_h200-nb.sh @@ -1,6 +1,6 @@ #!/usr/bin/bash -export HF_HUB_CACHE_MOUNT="/home/gharunner/gharunners/hf-hub-cache/" +export HF_HUB_CACHE_MOUNT="/mnt/data/gharunners/hf-hub-cache/" export PORT=8888 MODEL_CODE="${EXP_NAME%%_*}" From a586515285b42f658d7f172208693e9152cb0c84 Mon Sep 17 00:00:00 2001 From: Bryan Shan <58582368+Oseltamivir@users.noreply.github.com> Date: Fri, 27 Mar 2026 09:52:45 -0700 Subject: [PATCH 3/3] Fix newline at end of file in perf-changelog.yaml --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index abc009f60..3e7db64e5 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -1117,4 +1117,4 @@ - kimik2.5-fp4-b200-vllm description: - "Disable prefix caching (--no-enable-prefix-caching) for all Kimi K2.5 benchmarks using random datasets" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/926 \ No newline at end of file + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/926