diff --git a/onnxruntime/core/mlas/lib/kleidiai/convolve_kleidiai.cpp b/onnxruntime/core/mlas/lib/kleidiai/convolve_kleidiai.cpp index cca4f5a19c417..8ff4a2c4ad3c7 100644 --- a/onnxruntime/core/mlas/lib/kleidiai/convolve_kleidiai.cpp +++ b/onnxruntime/core/mlas/lib/kleidiai/convolve_kleidiai.cpp @@ -447,6 +447,9 @@ static std::unique_ptr LhsPackImageDataSme(const size_t ci, const s const auto lhs_size = kai_get_lhs_packed_size_lhs_imatmul_pack_x32p2vlx1_x32p_sme(m,kh*kw,ci); auto lhs = std::make_unique(lhs_size); + // Some ukernel packing paths may not overwrite every byte in the packed buffer for partial tiles. + // Start from a deterministic zero state so stale heap contents cannot influence later math. + std::fill_n(lhs.get(), lhs_size, std::byte{0}); std::unique_ptr nhwc_holder; const float* activation_src = nullptr; diff --git a/onnxruntime/test/mlas/unittest/test_conv2d.h b/onnxruntime/test/mlas/unittest/test_conv2d.h index ef65da4adb031..a95ed1a1d9018 100644 --- a/onnxruntime/test/mlas/unittest/test_conv2d.h +++ b/onnxruntime/test/mlas/unittest/test_conv2d.h @@ -716,12 +716,16 @@ class MlasConv2DTest : public MlasTestBase { // // This sequence forces pad-buffer growth by running a smaller-CI convolution followed by a larger-CI // convolution (with padding to ensure pad pointers are used), then runs the smaller-CI convolution again. + // Execute both orders to validate that results are stable regardless of invocation order. // Repeat a few times to increase the likelihood of triggering a reallocation and verify the path. // for (int i = 0; i < 4; ++i) { Test(1, 1, 64, 11, 11, 32, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1); // smaller CI Test(1, 1, 320, 11, 11, 32, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1); // larger CI forces pad buffer growth Test(1, 1, 64, 11, 11, 32, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1); // sanity: back to smaller CI after growth + + Test(1, 1, 320, 11, 11, 32, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1); // reversed order start + Test(1, 1, 64, 11, 11, 32, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1); // reversed order end } }