From 84f819f65994cc0fe5ceffec938cd9b62dff40d8 Mon Sep 17 00:00:00 2001 From: ganyi Date: Tue, 28 Apr 2026 06:27:33 +0000 Subject: [PATCH 1/2] prefill gdr kernel enablement Signed-off-by: ganyi --- atom/plugin/vllm/attention_backend/attention_gdn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/atom/plugin/vllm/attention_backend/attention_gdn.py b/atom/plugin/vllm/attention_backend/attention_gdn.py index 619964abf..c4227e2cb 100644 --- a/atom/plugin/vllm/attention_backend/attention_gdn.py +++ b/atom/plugin/vllm/attention_backend/attention_gdn.py @@ -358,12 +358,13 @@ def forward( # 2.2: Process the remaining part if attn_metadata.num_prefills > 0: + from aiter.ops.triton.gated_delta_net.gated_delta_rule import chunk_gated_delta_rule_opt_vk initial_state = ssm_state[non_spec_state_indices_tensor].contiguous() initial_state[~has_initial_state, ...] = 0 ( core_attn_out_non_spec, last_recurrent_state, - ) = self.chunk_gated_delta_rule( + ) = chunk_gated_delta_rule_opt_vk( q=query_non_spec, k=key_non_spec, v=value_non_spec, From cebbb6e510dcb2cbc8b3f27d29f79b2f16f8486c Mon Sep 17 00:00:00 2001 From: ganyi Date: Tue, 28 Apr 2026 12:39:48 +0000 Subject: [PATCH 2/2] format Signed-off-by: ganyi --- atom/plugin/vllm/attention_backend/attention_gdn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/atom/plugin/vllm/attention_backend/attention_gdn.py b/atom/plugin/vllm/attention_backend/attention_gdn.py index c4227e2cb..bc1230665 100644 --- a/atom/plugin/vllm/attention_backend/attention_gdn.py +++ b/atom/plugin/vllm/attention_backend/attention_gdn.py @@ -358,7 +358,10 @@ def forward( # 2.2: Process the remaining part if attn_metadata.num_prefills > 0: - from aiter.ops.triton.gated_delta_net.gated_delta_rule import chunk_gated_delta_rule_opt_vk + from aiter.ops.triton.gated_delta_net.gated_delta_rule import ( + chunk_gated_delta_rule_opt_vk, + ) + initial_state = ssm_state[non_spec_state_indices_tensor].contiguous() initial_state[~has_initial_state, ...] = 0 (