diff --git a/Android.bp b/Android.bp index 39ba145195..f97978deaa 100644 --- a/Android.bp +++ b/Android.bp @@ -462,6 +462,7 @@ cc_library_static { "src/cpu/kernels/CpuScatterKernel.cpp", "src/cpu/kernels/CpuSoftmaxKernel.cpp", "src/cpu/kernels/CpuSubKernel.cpp", + "src/cpu/kernels/CpuTopKVKernel.cpp", "src/cpu/kernels/CpuTransposeKernel.cpp", "src/cpu/kernels/CpuWeightsReshapeKernel.cpp", "src/cpu/kernels/CpuWinogradConv2dKernel.cpp", @@ -611,6 +612,10 @@ cc_library_static { "src/cpu/kernels/sub/neon/qasymm8.cpp", "src/cpu/kernels/sub/neon/qasymm8_signed.cpp", "src/cpu/kernels/sub/neon/qsymm16.cpp", + "src/cpu/kernels/topkv/generic/neon/fp16.cpp", + "src/cpu/kernels/topkv/generic/neon/fp32.cpp", + "src/cpu/kernels/topkv/generic/neon/qasymm8.cpp", + "src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp", "src/cpu/operators/CpuActivation.cpp", "src/cpu/operators/CpuAdd.cpp", "src/cpu/operators/CpuAddMulAdd.cpp", @@ -649,6 +654,7 @@ cc_library_static { "src/cpu/operators/CpuScatter.cpp", "src/cpu/operators/CpuSoftmax.cpp", "src/cpu/operators/CpuSub.cpp", + "src/cpu/operators/CpuTopKV.cpp", "src/cpu/operators/CpuTranspose.cpp", "src/cpu/operators/CpuWinogradConv2d.cpp", "src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp", @@ -978,6 +984,7 @@ cc_library_static { "src/runtime/NEON/functions/NEStackLayer.cpp", "src/runtime/NEON/functions/NEStridedSlice.cpp", "src/runtime/NEON/functions/NETile.cpp", + "src/runtime/NEON/functions/NETopKV.cpp", "src/runtime/NEON/functions/NETranspose.cpp", "src/runtime/NEON/functions/NEUnstack.cpp", "src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp", diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index 49a6e9fefb..82388da2d7 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2025 Arm Limited. + * Copyright (c) 2016-2026 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -112,6 +112,7 @@ #include "arm_compute/runtime/NEON/functions/NEStackLayer.h" #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h" #include "arm_compute/runtime/NEON/functions/NETile.h" +#include "arm_compute/runtime/NEON/functions/NETopKV.h" #include "arm_compute/runtime/NEON/functions/NETranspose.h" #include "arm_compute/runtime/NEON/functions/NEUnstack.h" #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" diff --git a/arm_compute/runtime/NEON/functions/NETopKV.h b/arm_compute/runtime/NEON/functions/NETopKV.h new file mode 100644 index 0000000000..8f63b2a62e --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NETopKV.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NETOPKV_H +#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NETOPKV_H + +/** @file + * @publicapi + */ + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +// Forward declarations +class ITensor; +class ITensorInfo; + +/** Basic function to run cpu::kernels::CpuActivationKernel + * + * @note The function simulates an activation layer with the specified activation function. + */ +class NETopKV : public IFunction +{ +public: + /** Constructor */ + NETopKV(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETopKV(const NETopKV &) = delete; + /** Default move constructor */ + NETopKV(NETopKV &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NETopKV &operator=(const NETopKV &) = delete; + /** Default move assignment operator */ + NETopKV &operator=(NETopKV &&); + /** Destructor */ + ~NETopKV(); + /** Set the input and output of the kernel. + * + * @param[in] predictions A batch_size x classes tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED + * @param[in] targets A batch_size 1D tensor of class ids. Data types supported: U32 + * @param[out] output Computed precision at @p k as a bool 1D tensor. Data types supported: U8 + * @param[in] k Number of top elements to look at for computing precision. + */ + void configure(const ITensor *predictions, const ITensor *targets, ITensor *output, const unsigned int k); + + /** Static function to check if given info will lead to a valid configuration. + * + * Similar to @ref NETopKV::configure() + * + * @return a status + */ + static Status + validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; +}; +} // namespace arm_compute +#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NETOPKV_H diff --git a/filelist.json b/filelist.json index 9cf4b72eb1..ae92ef763a 100644 --- a/filelist.json +++ b/filelist.json @@ -2453,6 +2453,26 @@ ] } }, + "TopKV": { + "files": { + "common": [ + "src/cpu/kernels/CpuTopKVKernel.cpp", + "src/cpu/operators/CpuTopKV.cpp", + "src/runtime/NEON/functions/NETopKV.cpp" + ], + "neon": { + "fp16": [ "src/cpu/kernels/topkv/generic/neon/fp16.cpp" ], + "fp32": [ "src/cpu/kernels/topkv/generic/neon/fp32.cpp" ], + "qasymm8": [ + "src/cpu/kernels/topkv/generic/neon/qasymm8.cpp" + ], + "qasymm8_signed": [ + "src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp" + ] + } + + } + }, "Transpose": { "files": { "common": [ diff --git a/src/BUILD.bazel b/src/BUILD.bazel index 3c0b83369a..6eb9570cd8 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -738,6 +738,7 @@ filegroup( "cpu/kernels/CpuScatterKernel.cpp", "cpu/kernels/CpuSoftmaxKernel.cpp", "cpu/kernels/CpuSubKernel.cpp", + "cpu/kernels/CpuTopKVKernel.cpp", "cpu/kernels/CpuTransposeKernel.cpp", "cpu/kernels/CpuWeightsReshapeKernel.cpp", "cpu/kernels/CpuWinogradConv2dKernel.cpp", @@ -848,6 +849,9 @@ filegroup( "cpu/kernels/sub/neon/qasymm8.cpp", "cpu/kernels/sub/neon/qasymm8_signed.cpp", "cpu/kernels/sub/neon/qsymm16.cpp", + "cpu/kernels/topkv/generic/neon/fp32.cpp", + "cpu/kernels/topkv/generic/neon/qasymm8.cpp", + "cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp", "cpu/operators/CpuActivation.cpp", "cpu/operators/CpuAdd.cpp", "cpu/operators/CpuAddMulAdd.cpp", @@ -886,6 +890,7 @@ filegroup( "cpu/operators/CpuScatter.cpp", "cpu/operators/CpuSoftmax.cpp", "cpu/operators/CpuSub.cpp", + "cpu/operators/CpuTopKV.cpp", "cpu/operators/CpuTranspose.cpp", "cpu/operators/CpuWinogradConv2d.cpp", "cpu/operators/internal/CpuGemmAssemblyDispatch.cpp", @@ -994,6 +999,7 @@ filegroup( "runtime/NEON/functions/NEStackLayer.cpp", "runtime/NEON/functions/NEStridedSlice.cpp", "runtime/NEON/functions/NETile.cpp", + "runtime/NEON/functions/NETopKV.cpp", "runtime/NEON/functions/NETranspose.cpp", "runtime/NEON/functions/NEUnstack.cpp", "runtime/NEON/functions/NEWinogradConvolutionLayer.cpp", @@ -1109,7 +1115,8 @@ filegroup( "cpu/kernels/scatter/generic/neon/fp16.cpp", "cpu/kernels/select/generic/neon/fp16.cpp", "cpu/kernels/softmax/generic/neon/fp16.cpp", - "cpu/kernels/sub/neon/fp16.cpp"] + + "cpu/kernels/sub/neon/fp16.cpp", + "cpu/kernels/topkv/generic/neon/fp16.cpp"] + glob(["**/*.h", "**/*.hpp", "**/*.inl"]), diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 74847f6878..55e3c075e9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -732,6 +732,7 @@ target_sources( cpu/kernels/CpuScatterKernel.cpp cpu/kernels/CpuSoftmaxKernel.cpp cpu/kernels/CpuSubKernel.cpp + cpu/kernels/CpuTopKVKernel.cpp cpu/kernels/CpuTransposeKernel.cpp cpu/kernels/CpuWeightsReshapeKernel.cpp cpu/kernels/CpuWinogradConv2dKernel.cpp @@ -842,6 +843,9 @@ target_sources( cpu/kernels/sub/neon/qasymm8.cpp cpu/kernels/sub/neon/qasymm8_signed.cpp cpu/kernels/sub/neon/qsymm16.cpp + cpu/kernels/topkv/generic/neon/fp32.cpp + cpu/kernels/topkv/generic/neon/qasymm8.cpp + cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp cpu/operators/CpuActivation.cpp cpu/operators/CpuAdd.cpp cpu/operators/CpuAddMulAdd.cpp @@ -880,6 +884,7 @@ target_sources( cpu/operators/CpuScatter.cpp cpu/operators/CpuSoftmax.cpp cpu/operators/CpuSub.cpp + cpu/operators/CpuTopKV.cpp cpu/operators/CpuTranspose.cpp cpu/operators/CpuWinogradConv2d.cpp cpu/operators/internal/CpuGemmAssemblyDispatch.cpp @@ -988,6 +993,7 @@ target_sources( runtime/NEON/functions/NEStackLayer.cpp runtime/NEON/functions/NEStridedSlice.cpp runtime/NEON/functions/NETile.cpp + runtime/NEON/functions/NETopKV.cpp runtime/NEON/functions/NETranspose.cpp runtime/NEON/functions/NEUnstack.cpp runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -1109,4 +1115,5 @@ target_sources( cpu/kernels/select/generic/neon/fp16.cpp cpu/kernels/softmax/generic/neon/fp16.cpp cpu/kernels/sub/neon/fp16.cpp + cpu/kernels/topkv/generic/neon/fp16.cpp ) \ No newline at end of file diff --git a/src/cpu/kernels/CpuTopKVKernel.cpp b/src/cpu/kernels/CpuTopKVKernel.cpp new file mode 100644 index 0000000000..e0e17c3d32 --- /dev/null +++ b/src/cpu/kernels/CpuTopKVKernel.cpp @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/cpu/kernels/CpuTopKVKernel.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" + +#include "src/common/utils/profile/acl_profile.h" +#include "src/core/common/Registrars.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/cpu/kernels/topkv/list.h" + +#include +#include + +namespace arm_compute +{ +namespace cpu +{ + +namespace kernels +{ +namespace +{ + +static const std::vector available_kernels = { + {"neon_fp32_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::F32); }, + REGISTER_FP32_NEON(arm_compute::cpu::topkv_fp32_neon)}, + {"neon_fp16_topkv", + [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::F16) && data.isa.fp16; }, + REGISTER_FP16_NEON(arm_compute::cpu::topkv_fp16_neon)}, + {"neon_qu8_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::QASYMM8); }, + REGISTER_QASYMM8_NEON(arm_compute::cpu::topkv_qasymm8_neon)}, + {"neon_qs8_topkv", + [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::QASYMM8_SIGNED); }, + REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::topkv_qasymm8_signed_neon)}}; + +Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, const ITensorInfo &dst, uint32_t k) +{ + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&src0); + + // src0: predictions + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src0, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::F16, DataType::F32); + + // src1: targets (class indices) + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&src1, 1, DataType::U32); + + // Basic dimensionality expectations: + // predictions: [C, N] (2D), targets: [N] (1D) + ARM_COMPUTE_RETURN_ERROR_ON(src0.num_dimensions() < 2); + ARM_COMPUTE_RETURN_ERROR_ON(src1.num_dimensions() < 1); + + const unsigned int C = src0.tensor_shape()[0]; // classes + const unsigned int N = src0.tensor_shape()[1]; // batch + + // k constraints + ARM_COMPUTE_RETURN_ERROR_ON_MSG(k == 0, "k must be > 0"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(C == 0, "predictions classes dimension must be > 0"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(k > C, "k must be <= number of classes (C)"); + + // targets must match batch + ARM_COMPUTE_RETURN_ERROR_ON_MSG(src1.tensor_shape()[0] != N, + "targets dimension must match predictions batch dimension (N)"); + + // Output is one byte per batch element + const TensorShape out_shape(N); + + // If dst is already configured, validate it + if (dst.total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&dst, 1, DataType::U8); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst.tensor_shape() != out_shape, "dst shape must be [N]"); + } + + const auto uk = CpuTopKVKernel::get_implementation( + CpuTopKVKernelDataTypeISASelectorData{src0.data_type(), CPUInfo::get().get_isa()}); + + ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); + + return Status{}; +} +} // namespace + +void CpuTopKVKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, uint32_t k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKVKernel::configure"); + ARM_COMPUTE_UNUSED(src1); // compiler error on v7a + ARM_COMPUTE_ERROR_ON_NULLPTR(src0, src1, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, k)); + + const auto uk = CpuTopKVKernel::get_implementation( + CpuTopKVKernelDataTypeISASelectorData{src0->data_type(), CPUInfo::get().get_isa()}); + + ARM_COMPUTE_ERROR_ON_NULLPTR(uk); + + _run_method = uk->ukernel; + _name = std::string("CpuTopKVKernel").append("/").append(uk->name); + _k = k; + // Auto initialize dst if not initialized + auto_init_if_empty(*dst, TensorShape(src0->dimension(1)), 1U, DataType::U8); + // Configure kernel window + Window win = calculate_max_window(*dst, Steps()); + ICpuKernel::configure(win); +} +size_t CpuTopKVKernel::get_mws(const CPUInfo &platform, size_t thread_count) const +{ + ARM_COMPUTE_UNUSED(thread_count); + ARM_COMPUTE_UNUSED(platform); + + return 1024u; +} + +Status CpuTopKVKernel::validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, uint32_t k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKVKernel::validate"); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src0, src1, dst); + + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*src0, *src1, *dst, k)); + + return Status{}; +} + +void CpuTopKVKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKVKernel::run_op"); + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); + + ARM_COMPUTE_ERROR_ON(tensors.empty()); + ARM_COMPUTE_ERROR_ON(_run_method == nullptr); + + const ITensor *src0 = tensors.get_const_tensor(TensorType::ACL_SRC_0); + const ITensor *src1 = tensors.get_const_tensor(TensorType::ACL_SRC_1); + ITensor *dst = tensors.get_tensor(TensorType::ACL_DST); + _run_method(src0, src1, dst, _k, window); +} + +const char *CpuTopKVKernel::name() const +{ + return _name.c_str(); +} + +const std::vector &CpuTopKVKernel::get_available_kernels() +{ + return available_kernels; +} + +} // namespace kernels +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/CpuTopKVKernel.h b/src/cpu/kernels/CpuTopKVKernel.h new file mode 100644 index 0000000000..ebcddbcb36 --- /dev/null +++ b/src/cpu/kernels/CpuTopKVKernel.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_KERNELS_CPUTOPKVKERNEL_H +#define ACL_SRC_CPU_KERNELS_CPUTOPKVKERNEL_H + +#include "src/core/common/Macros.h" +#include "src/cpu/ICpuKernel.h" + +namespace arm_compute +{ +namespace cpu +{ +namespace kernels +{ +using CpuTopKVKernelDataTypeISASelectorData = DataTypeISASelectorData; +using CpuTopKVKernelDataTypeISASelectorDataPtr = DataTypeISASelectorPtr; + +/** Interface for the kernel to perform addition between two tensors */ +class CpuTopKVKernel : public ICpuKernel +{ +private: + using TopKVKernelPtr = + std::add_pointer::type; + +public: + struct TopKVKernel + { + const char *name; + const CpuTopKVKernelDataTypeISASelectorDataPtr is_selected; + TopKVKernelPtr ukernel; + }; + + CpuTopKVKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuTopKVKernel); + /** Initialise the kernel's input, dst and border mode. + * + * @param[in] src0 First input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] src1 Second input tensor info. Data types supported: U32 + * @param[out] dst The dst tensor info. Data types supported: U8 + * @param[in] policy Overflow policy. + */ + void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, uint32_t k); + /** Static function to check if given info will lead to a valid configuration + * + * Similar to CpuTopKVKernel::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, uint32_t k); + + // Inherited methods overridden: + void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; + const char *name() const override; + + /** Return minimum workload size of the relevant kernel + * + * @param[in] platform The CPU platform used to create the context. + * @param[in] thread_count Number of threads in the execution. + * + * @return[out] small_network_mws Minimum workload size for requsted configuration. + */ + size_t get_mws(const CPUInfo &platform, size_t thread_count) const override; + + static const std::vector &get_available_kernels(); + +private: + uint32_t _k{}; + TopKVKernelPtr _run_method{nullptr}; + std::string _name{}; +}; +} // namespace kernels +} // namespace cpu +} // namespace arm_compute +#endif // ACL_SRC_CPU_KERNELS_CPUTOPKVKERNEL_H diff --git a/src/cpu/kernels/topkv/generic/neon/fp16.cpp b/src/cpu/kernels/topkv/generic/neon/fp16.cpp new file mode 100644 index 0000000000..14c5d9f4a3 --- /dev/null +++ b/src/cpu/kernels/topkv/generic/neon/fp16.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "src/common/utils/profile/acl_profile.h" +#include "src/core/helpers/WindowHelpers.h" + +#include +#include + +namespace arm_compute +{ +namespace cpu +{ +void topkv_fp16_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) +{ + // predictions: [C, N] F16 + // targets: [N] U32 + // output: [N] U8 + + const auto &pred_info = *in1->info(); + const unsigned int C = pred_info.tensor_shape()[0]; + + // Match FP32 reference behaviour: threshold = target_val + epsilon (in float) + const float eps = std::numeric_limits::epsilon(); + + // Assume classes are contiguous for each sample so we can vector load. + ARM_COMPUTE_ERROR_ON(pred_info.strides_in_bytes()[0] != sizeof(float16_t)); + + execute_window_loop( + win, + [&](const Coordinates &id) + { + const int n = id.x(); // scheduled over output [N] + + const uint32_t t = *reinterpret_cast(in2->ptr_to_element(Coordinates{n})); + + // Pointer to predictions[0, n] + const float16_t *base = reinterpret_cast(in1->ptr_to_element(Coordinates{0, n})); + + // Read target value in FP16, compute threshold in FP32, then convert to FP16 for compares + const float target_val_f32 = static_cast(base[t]); + const float threshold_f32 = target_val_f32 + eps; + + // Convert threshold to fp16 and broadcast + const float16x8_t thr = vdupq_n_f16(static_cast(threshold_f32)); + + unsigned int c = 0; + uint16x8_t acc = vdupq_n_u16(0); + + // Process 8 classes at a time (8 x fp16 lanes) + for (; c + 8 <= C; c += 8) + { + // Load 8 consecutive FP16 values + const float16x8_t v = vld1q_f16(reinterpret_cast(base + c)); + + // Compare v > threshold (lane-wise), returns 0xFFFF/0x0000 per lane + const uint16x8_t m = vcgtq_f16(v, thr); + + // Convert mask to 0/1 by shifting MSB into LSB, accumulate in u16 lanes + acc = vaddq_u16(acc, vshrq_n_u16(m, 15)); + } + + // Reduce accumulator to scalar + uint32_t rank = 0; + +#if defined(__aarch64__) + // Sum all 8 lanes + rank += static_cast(vaddvq_u16(acc)); +#else + // ARMv7: manual horizontal sum + uint16x4_t s0 = vadd_u16(vget_low_u16(acc), vget_high_u16(acc)); + s0 = vpadd_u16(s0, s0); + s0 = vpadd_u16(s0, s0); + rank += vget_lane_u16(s0, 0); +#endif + + // Tail + for (; c < C; ++c) + { + const float v = static_cast(base[c]); + rank += (v > threshold_f32) ? 1u : 0u; + } + + *reinterpret_cast(out->ptr_to_element(Coordinates{n})) = static_cast(rank < k); + }); +} +} // namespace cpu +} // namespace arm_compute +#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ diff --git a/src/cpu/kernels/topkv/generic/neon/fp32.cpp b/src/cpu/kernels/topkv/generic/neon/fp32.cpp new file mode 100644 index 0000000000..3d78e9fe9e --- /dev/null +++ b/src/cpu/kernels/topkv/generic/neon/fp32.cpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "src/common/utils/profile/acl_profile.h" +#include "src/core/helpers/WindowHelpers.h" + +#include +#include + +namespace arm_compute +{ +namespace cpu +{ +void topkv_fp32_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "topkv_fp32_neon"); + + // predictions: [C, N] F32 + // targets: [N] U32 + // output: [N] U8 + + const auto &pred_info = *in1->info(); + const unsigned int C = pred_info.tensor_shape()[0]; + const float eps = std::numeric_limits::epsilon(); + + // We assume classes are stored contiguously for each sample: + // stride[0] must equal sizeof(float) so we can use vld1q_f32 safely. + ARM_COMPUTE_ERROR_ON(pred_info.strides_in_bytes()[0] != sizeof(float)); + + execute_window_loop(win, + [&](const Coordinates &id) + { + // Window is scheduled over output, so DimX corresponds to batch index (n) + const int n = id.x(); + + // Load target class index for sample n + const uint32_t t = *reinterpret_cast(in2->ptr_to_element(Coordinates{n})); + + // Pointer to predictions[0, n], i.e. the first class score for sample n + const float *base = reinterpret_cast(in1->ptr_to_element(Coordinates{0, n})); + + // Read prediction score for the target class + const float target_val = base[t]; + + // Threshold used for strict comparison: + // equivalent to (v - target_val) > epsilon in the reference + const float threshold = target_val + eps; + + // Broadcast threshold into a NEON vector + const float32x4_t thr = vdupq_n_f32(threshold); + + unsigned int c = 0; + + // Accumulator holding the count of values greater than threshold + // Each lane accumulates 0 or 1 per comparison + uint32x4_t acc = vdupq_n_u32(0); + + // Process 4 class scores at a time + for (; c + 4 <= C; c += 4) + { + // Load 4 consecutive prediction values: + // v = { base[c], base[c+1], base[c+2], base[c+3] } + const float32x4_t v = vld1q_f32(base + c); + + // Compare v > threshold elementwise + // Result is a mask: 0xFFFFFFFF where true, 0x00000000 where false + const uint32x4_t m = vcgtq_f32(v, thr); + + // Convert mask to {0,1} by shifting MSB into LSB position + // Then accumulate into acc + acc = vaddq_u32(acc, vshrq_n_u32(m, 31)); + } + + // Horizontal reduction of acc into scalar rank + uint32_t rank = 0; + +#if defined(__aarch64__) + // AArch64 has a single instruction to sum all lanes + rank += vaddvq_u32(acc); +#else + // ARMv7: pairwise add manually + uint32x2_t acc2 = vadd_u32(vget_low_u32(acc), vget_high_u32(acc)); + acc2 = vpadd_u32(acc2, acc2); + rank += vget_lane_u32(acc2, 0); +#endif + + // Handle remaining classes (tail) scalar + for (; c < C; ++c) + { + rank += (base[c] > threshold) ? 1u : 0u; + } + + // Output 1 if target class is in top-k (rank < k), else 0 + *reinterpret_cast(out->ptr_to_element(Coordinates{n})) = + static_cast(rank < k); + }); +} + +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/topkv/generic/neon/qasymm8.cpp b/src/cpu/kernels/topkv/generic/neon/qasymm8.cpp new file mode 100644 index 0000000000..48b35e5901 --- /dev/null +++ b/src/cpu/kernels/topkv/generic/neon/qasymm8.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "src/common/utils/profile/acl_profile.h" +#include "src/core/helpers/WindowHelpers.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ +void topkv_qasymm8_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) +{ + // predictions: [C, N] QASYMM8 (U8) + // targets: [N] U32 + // output: [N] U8 + + const auto &pred_info = *in1->info(); + const unsigned int C = pred_info.tensor_shape()[0]; + + // Assume classes are contiguous for each sample + ARM_COMPUTE_ERROR_ON(pred_info.strides_in_bytes()[0] != sizeof(uint8_t)); + + execute_window_loop( + win, + [&](const Coordinates &id) + { + const int n = id.x(); // scheduled over output [N] + + const uint32_t t = *reinterpret_cast(in2->ptr_to_element(Coordinates{n})); + + const uint8_t *base = reinterpret_cast(in1->ptr_to_element(Coordinates{0, n})); + + const uint8_t target_val = base[t]; + const uint8x16_t thr = vdupq_n_u8(target_val); + + unsigned int c = 0; + + // Accumulate counts in u16 lanes to avoid overflow + uint16x8_t acc0 = vdupq_n_u16(0); + uint16x8_t acc1 = vdupq_n_u16(0); + + for (; c + 16 <= C; c += 16) + { + // Load 16 u8 scores + const uint8x16_t v = vld1q_u8(base + c); + + // Compare v > target_val -> mask bytes are 0xFF (true) or 0x00 (false) + const uint8x16_t m = vcgtq_u8(v, thr); + + // Turn 0xFF/0x00 into 1/0 cheaply: mask & 1 + const uint8x16_t ones = vandq_u8(m, vdupq_n_u8(1)); + + // Widen low/high halves to u16 and accumulate + acc0 = vaddq_u16(acc0, vmovl_u8(vget_low_u8(ones))); + acc1 = vaddq_u16(acc1, vmovl_u8(vget_high_u8(ones))); + } + + uint32_t rank = 0; + +#if defined(__aarch64__) + rank += vaddvq_u16(acc0); + rank += vaddvq_u16(acc1); +#else + // ARMv7 horizontal sum for acc0 + { + uint16x4_t s = vadd_u16(vget_low_u16(acc0), vget_high_u16(acc0)); + s = vpadd_u16(s, s); + s = vpadd_u16(s, s); + rank += vget_lane_u16(s, 0); + } + // ARMv7 horizontal sum for acc1 + { + uint16x4_t s = vadd_u16(vget_low_u16(acc1), vget_high_u16(acc1)); + s = vpadd_u16(s, s); + s = vpadd_u16(s, s); + rank += vget_lane_u16(s, 0); + } +#endif + + // Tail + for (; c < C; ++c) + { + rank += (base[c] > target_val) ? 1u : 0u; + } + + *reinterpret_cast(out->ptr_to_element(Coordinates{n})) = static_cast(rank < k); + }); +} +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp b/src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp new file mode 100644 index 0000000000..223094f4ba --- /dev/null +++ b/src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "src/common/utils/profile/acl_profile.h" +#include "src/core/helpers/WindowHelpers.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ + +void topkv_qasymm8_signed_neon(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) +{ + // predictions: [C, N] QASYMM8_SIGNED (S8) + // targets: [N] U32 + // output: [N] U8 + + const auto &pred_info = *in1->info(); + const unsigned int C = pred_info.tensor_shape()[0]; + + // Assume classes are contiguous for each sample + ARM_COMPUTE_ERROR_ON(pred_info.strides_in_bytes()[0] != sizeof(int8_t)); + + execute_window_loop( + win, + [&](const Coordinates &id) + { + const int n = id.x(); // scheduled over output [N] + + const uint32_t t = *reinterpret_cast(in2->ptr_to_element(Coordinates{n})); + + const int8_t *base = reinterpret_cast(in1->ptr_to_element(Coordinates{0, n})); + + const int8_t target_val = base[t]; + const int8x16_t thr = vdupq_n_s8(target_val); + + unsigned int c = 0; + uint16x8_t acc0 = vdupq_n_u16(0); + uint16x8_t acc1 = vdupq_n_u16(0); + + for (; c + 16 <= C; c += 16) + { + const int8x16_t v = vld1q_s8(base + c); + + // Signed compare v > thr. + // NOTE: vcgtq_s8 returns a uint8x16_t mask (0xFF/0x00 per lane) on AArch64. + const uint8x16_t m = vcgtq_s8(v, thr); + + // Convert mask to 1/0: mask & 1 + const uint8x16_t ones = vandq_u8(m, vdupq_n_u8(1)); + + // Widen and accumulate + acc0 = vaddq_u16(acc0, vmovl_u8(vget_low_u8(ones))); + acc1 = vaddq_u16(acc1, vmovl_u8(vget_high_u8(ones))); + } + + uint32_t rank = 0; + +#if defined(__aarch64__) + rank += vaddvq_u16(acc0); + rank += vaddvq_u16(acc1); +#else + { + uint16x4_t s = vadd_u16(vget_low_u16(acc0), vget_high_u16(acc0)); + s = vpadd_u16(s, s); + s = vpadd_u16(s, s); + rank += vget_lane_u16(s, 0); + } + { + uint16x4_t s = vadd_u16(vget_low_u16(acc1), vget_high_u16(acc1)); + s = vpadd_u16(s, s); + s = vpadd_u16(s, s); + rank += vget_lane_u16(s, 0); + } +#endif + + for (; c < C; ++c) + { + rank += (base[c] > target_val) ? 1u : 0u; + } + + *reinterpret_cast(out->ptr_to_element(Coordinates{n})) = static_cast(rank < k); + }); +} + +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/topkv/list.h b/src/cpu/kernels/topkv/list.h new file mode 100644 index 0000000000..b3488ae94c --- /dev/null +++ b/src/cpu/kernels/topkv/list.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_KERNELS_TOPKV_LIST_H +#define ACL_SRC_CPU_KERNELS_TOPKV_LIST_H + +namespace arm_compute +{ +namespace cpu +{ +#define DECLARE_TOPKV_KERNEL(func_name) \ + void func_name(const ITensor *in1, const ITensor *in2, ITensor *out, uint32_t k, const Window &win) + +DECLARE_TOPKV_KERNEL(topkv_qasymm8_neon); +DECLARE_TOPKV_KERNEL(topkv_qasymm8_signed_neon); +DECLARE_TOPKV_KERNEL(topkv_fp16_neon); +DECLARE_TOPKV_KERNEL(topkv_fp32_neon); + +#undef DECLARE_TOPKV_KERNEL +} // namespace cpu +} // namespace arm_compute + +#endif // ACL_SRC_CPU_KERNELS_TOPKV_LIST_H diff --git a/src/cpu/operators/CpuTopKV.cpp b/src/cpu/operators/CpuTopKV.cpp new file mode 100644 index 0000000000..a509b29c34 --- /dev/null +++ b/src/cpu/operators/CpuTopKV.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/cpu/operators/CpuTopKV.h" + +#include "arm_compute/runtime/NEON/NEScheduler.h" + +#include "src/common/IOperator.h" +#include "src/common/utils/LegacySupport.h" +#include "src/common/utils/Log.h" +#include "src/common/utils/profile/acl_profile.h" +#include "src/cpu/kernels/CpuTopKVKernel.h" + +namespace arm_compute +{ +namespace cpu +{ +void CpuTopKV::configure(const ITensorInfo *predictions, + const ITensorInfo *targets, + ITensorInfo *output, + const unsigned int k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKV::configure"); + ARM_COMPUTE_LOG_PARAMS(predictions, targets, output, k); + + auto kernel = std::make_unique(); + kernel->configure(predictions, targets, output, k); + _kernel = std::move(kernel); +} + +Status CpuTopKV::validate(const ITensorInfo *predictions, + const ITensorInfo *targets, + ITensorInfo *output, + const unsigned int k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKV::validate"); + return kernels::CpuTopKVKernel::validate(predictions, targets, output, k); +} + +void CpuTopKV::run(ITensorPack &tensors) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "CpuTopKV::run"); + ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); + + NEScheduler::get().schedule_op(_kernel.get(), Window::DimX, _kernel->window(), tensors); +} +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/operators/CpuTopKV.h b/src/cpu/operators/CpuTopKV.h new file mode 100644 index 0000000000..859c36e8c9 --- /dev/null +++ b/src/cpu/operators/CpuTopKV.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_OPERATORS_CPUTOPKV_H +#define ACL_SRC_CPU_OPERATORS_CPUTOPKV_H + +#include "src/cpu/ICpuOperator.h" + +namespace arm_compute +{ +namespace cpu +{ +/** Basic function to run @ref kernels::CpuActivationKernel */ +class CpuTopKV : public ICpuOperator +{ +public: + /** Set the input and output of the kernel. + * + * @param[in] predictions A classes x batches tensor. Data types supported: F16/F32/QASYMM8/QASYMM8_SIGNED + * @param[in] targets A batch_size 1D tensor of class ids. Data types supported: U32 + * @param[out] output Computed precision at @p k as a bool 1D tensor. Data types supported: U8 + * @param[in] k Number of top elements to look at for computing precision. + */ + void + configure(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); + + /** Static function to check if given info will lead to a valid configuration. + * + * Similar to @ref CpuTopKV::configure() + * + * @return a status + */ + static Status + validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; +}; +} // namespace cpu +} // namespace arm_compute +#endif // ACL_SRC_CPU_OPERATORS_CPUTOPKV_H diff --git a/src/runtime/NEON/functions/NETopKV.cpp b/src/runtime/NEON/functions/NETopKV.cpp new file mode 100644 index 0000000000..7a6eb1d692 --- /dev/null +++ b/src/runtime/NEON/functions/NETopKV.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NETopKV.h" + +#include "arm_compute/core/Validate.h" + +#include "src/common/utils/profile/acl_profile.h" +#include "src/cpu/operators/CpuTopKV.h" + +namespace arm_compute +{ +struct NETopKV::Impl +{ + const ITensor *predictions{nullptr}; + const ITensor *targets{nullptr}; + ITensor *dst{nullptr}; + std::unique_ptr op{nullptr}; +}; + +NETopKV::NETopKV() : _impl(std::make_unique()) +{ +} +NETopKV::NETopKV(NETopKV &&) = default; +NETopKV &NETopKV::operator=(NETopKV &&) = default; +NETopKV::~NETopKV() = default; + +void NETopKV::configure(const ITensor *predictions, const ITensor *targets, ITensor *output, const unsigned int k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NETopKV::configure"); + _impl->predictions = predictions; + _impl->targets = targets; + _impl->dst = output; + + ARM_COMPUTE_ERROR_ON_NULLPTR(_impl->predictions, _impl->targets, _impl->dst); + + _impl->op = std::make_unique(); + _impl->op->configure(_impl->predictions->info(), _impl->targets->info(), _impl->dst->info(), k); +} + +Status +NETopKV::validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k) +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NETopKV::validate"); + ARM_COMPUTE_RETURN_ERROR_ON_DYNAMIC_SHAPE(predictions, targets, output); + return cpu::CpuTopKV::validate(predictions, targets, output, k); +} + +void NETopKV::run() +{ + ARM_COMPUTE_TRACE_EVENT(ARM_COMPUTE_PROF_CAT_CPU, ARM_COMPUTE_PROF_LVL_CPU, "NETopKV::run"); + ITensorPack pack; + pack.add_tensor(TensorType::ACL_SRC_0, _impl->predictions); + pack.add_tensor(TensorType::ACL_SRC_1, _impl->targets); + pack.add_tensor(TensorType::ACL_DST, _impl->dst); + _impl->op->run(pack); +} +} // namespace arm_compute diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h index 83ef352a75..14a62cd65c 100644 --- a/tests/datasets/ShapeDatasets.h +++ b/tests/datasets/ShapeDatasets.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2025 Arm Limited. + * Copyright (c) 2017-2026 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -1078,6 +1078,25 @@ class LogisticSMEStressShapesFp32 final : public ShapeDataset { } }; + +/** Data set containing small 2D tensor shapes for TopKV operator. */ +class SmallTopKV final : public ShapeDataset +{ +public: + SmallTopKV() : ShapeDataset("Shape", {TensorShape{8U, 7U}, TensorShape{15U, 13U}, TensorShape{32U, 64U}}) + { + } +}; + +/** Data set containing small 2D tensor shapes for TopKV operator. */ +class LargeTopKV final : public ShapeDataset +{ +public: + LargeTopKV() + : ShapeDataset("Shape", {TensorShape{1000U, 64U}, TensorShape{1500U, 128U}, TensorShape{1000U, 32000U}}) + { + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/TopKV.cpp b/tests/validation/NEON/TopKV.cpp new file mode 100644 index 0000000000..b794edb034 --- /dev/null +++ b/tests/validation/NEON/TopKV.cpp @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/Acl.hpp" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/Traits.h" +#include "arm_compute/core/utils/StringUtils.h" +#include "arm_compute/runtime/CPP/functions/CPPTopKV.h" +#include "arm_compute/runtime/NEON/functions/NETopKV.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" + +#include "support/AclRequires.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/framework/Macros.h" +#include "tests/NEON/Accessor.h" +#include "tests/validation/fixtures/TopKVLayerFixture.h" +#include "tests/validation/Validation.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +using framework::dataset::make; + +const auto small_dataset_topkv = combine(datasets::SmallTopKV(), make("K", 3, 5)); +const auto large_dataset_topkv = combine(datasets::LargeTopKV(), make("K", 3, 5)); +const auto f32_small_dataset = combine(small_dataset_topkv, make("DataType", DataType::F32)); +const auto f32_large_dataset = combine(large_dataset_topkv, make("DataType", DataType::F32)); +const auto f16_small_dataset = combine(small_dataset_topkv, make("DataType", DataType::F16)); +const auto f16_large_dataset = combine(large_dataset_topkv, make("DataType", DataType::F16)); +const auto qu8_small_dataset = combine(small_dataset_topkv, make("DataType", DataType::QASYMM8)); +const auto qu8_large_dataset = combine(large_dataset_topkv, make("DataType", DataType::QASYMM8)); +const auto qs8_small_dataset = combine(small_dataset_topkv, make("DataType", DataType::QASYMM8_SIGNED)); +const auto qs8_large_dataset = combine(large_dataset_topkv, make("DataType", DataType::QASYMM8_SIGNED)); + +TEST_SUITE(NEON) +TEST_SUITE(TopKVLayer) +// clang-format on +// *INDENT-ON* +template +using NETopKVFixture = TopKVValidationFixture; + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture, framework::DatasetMode::ALL, qu8_small_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, AbsoluteTolerance(0)); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture, framework::DatasetMode::NIGHTLY, qu8_large_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, AbsoluteTolerance(0)); +} + +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture, framework::DatasetMode::ALL, qs8_small_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, AbsoluteTolerance(0)); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture, framework::DatasetMode::NIGHTLY, qs8_large_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, AbsoluteTolerance(0)); +} + +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // Quantized + +TEST_SUITE(Float) +#ifdef ARM_COMPUTE_ENABLE_FP16 +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture, framework::DatasetMode::ALL, f16_small_dataset) +{ + if (CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, AbsoluteTolerance(0)); + } + else + { + ARM_COMPUTE_TEST_WARNING("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_WARNING(); + } +} +FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture, framework::DatasetMode::NIGHTLY, f16_large_dataset) +{ + if (CPUInfo::get().has_fp16()) + { + // Validate output + validate(Accessor(_target), _reference, AbsoluteTolerance(0)); + } + else + { + ARM_COMPUTE_TEST_WARNING("Device does not support fp16 vector operations. Test SKIPPED."); + framework::ARM_COMPUTE_PRINT_WARNING(); + } +} + +TEST_SUITE_END() // FP16 +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + +TEST_SUITE(FP32) + +FIXTURE_DATA_TEST_CASE(RunSmall, NETopKVFixture, framework::DatasetMode::ALL, f32_small_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, AbsoluteTolerance(0)); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NETopKVFixture, framework::DatasetMode::NIGHTLY, f32_large_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, AbsoluteTolerance(0)); +} + +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float + +TEST_SUITE_END() // TopKVLayer +TEST_SUITE_END() // Neon + +TEST_SUITE(CPP) +TEST_SUITE(TopKVLayer) + +template +using CPPTopKVLayerFixture = TopKVValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +// Used only for benchmarking +FIXTURE_DATA_TEST_CASE(RunLarge, CPPTopKVLayerFixture, framework::DatasetMode::DISABLED, f32_large_dataset) +{ + // Validate output + validate(Accessor(_target), _reference, AbsoluteTolerance(0)); +} +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float + +TEST_SUITE_END() // TopKVLayer +TEST_SUITE_END() // Neon + +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/TopKVLayerFixture.h b/tests/validation/fixtures/TopKVLayerFixture.h new file mode 100644 index 0000000000..c2d13efa37 --- /dev/null +++ b/tests/validation/fixtures/TopKVLayerFixture.h @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_FIXTURES_TOPKVLAYERFIXTURE_H +#define ACL_TESTS_VALIDATION_FIXTURES_TOPKVLAYERFIXTURE_H + +#include "tests/AssetsLibrary.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" +#include "tests/Globals.h" +#include "tests/validation/reference/TopKV.h" + +#include +#include +#include +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class TopKVValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape predictions_shape, uint32_t k, DataType input_data_type) + { + if (std::is_same::value && // Cpu + input_data_type == DataType::F16 && !CPUInfo::get().has_fp16()) + { + return; + } + const TensorShape targets_shape(predictions_shape[1]); + _data_type = input_data_type; + _target = compute_target(predictions_shape, targets_shape, k); + _reference = compute_reference(predictions_shape, targets_shape, k); + } + +protected: + // Fills prediction scores with small random noise, assigns a random target class to each sample, + // and slightly boosts the target score so some targets fall inside the top-K and others do not. + template + void fill(U &&predictions, W &&targets, int32_t i, uint32_t C) + { + std::mt19937 gen(0); + + // 1) Fill targets with valid class indices in [0, C-1] + std::uniform_int_distribution class_dist(0, C - 1); + library->fill(targets, class_dist, i); + + const unsigned int N = targets.shape()[0]; + + // 2) Fill predictions with small noise + 3) add a per-sample boost to the target class + if (_data_type == DataType::F32 || _data_type == DataType::F16) + { + std::normal_distribution noise(0.f, 0.1f); + library->fill(predictions, noise, i); + + // Mixture of boosts so output is not always 1 + std::uniform_real_distribution u01(0.f, 1.f); + std::normal_distribution strong_boost(0.8f, 0.15f); + std::normal_distribution medium_boost(0.35f, 0.15f); + std::normal_distribution weak_boost(0.05f, 0.10f); + + for (unsigned int n = 0; n < N; ++n) + { + const uint32_t target = *reinterpret_cast(targets(Coordinates{static_cast(n)})); + + float boost = 0.f; + const float r = u01(gen); + if (r < 0.35f) + { + boost = strong_boost(gen); + } + else if (r < 0.80f) + { + boost = medium_boost(gen); + } + else + { + boost = weak_boost(gen); + } + boost = std::max(-0.2f, std::min(boost, 1.5f)); + + const Coordinates pc{static_cast(target), static_cast(n)}; + + if (_data_type == DataType::F32) + { + *reinterpret_cast(predictions(pc)) += boost; + } + else // F16 + { + auto *p = reinterpret_cast(predictions(pc)); + *p = static_cast(static_cast(*p) + boost); + } + } + return; + } + + if (_data_type == DataType::QASYMM8 || _data_type == DataType::QASYMM8_SIGNED) + { + const bool is_signed = (_data_type == DataType::QASYMM8_SIGNED); + + // Small integer noise + if (is_signed) + { + std::uniform_int_distribution noise(-6, 6); + library->fill(predictions, noise, i); + } + else + { + std::uniform_int_distribution noise(0, 12); + library->fill(predictions, noise, i); + } + + // Small, variable boost in quantized units + std::uniform_int_distribution boost_dist(0, 18); + + for (unsigned int n = 0; n < N; ++n) + { + const uint32_t target = *reinterpret_cast(targets(Coordinates{static_cast(n)})); + + const int boost = boost_dist(gen); + const Coordinates pc{static_cast(target), static_cast(n)}; + + if (is_signed) + { + auto *p = reinterpret_cast(predictions(pc)); + const int v = static_cast(*p) + boost; + *p = static_cast(std::max(-128, std::min(127, v))); + } + else + { + auto *p = reinterpret_cast(predictions(pc)); + const int v = static_cast(*p) + boost; + *p = static_cast(std::max(0, std::min(255, v))); + } + } + return; + } + } + + TensorType compute_target(const TensorShape &pred_shape, const TensorShape &targets_shape, uint32_t k) + { + // Create tensors + TensorType pred = create_tensor(pred_shape, _data_type, 1, QuantizationInfo()); + TensorType targets = create_tensor(targets_shape, DataType::U32, 1, QuantizationInfo()); + TensorType output; + + // Create and configure function + FunctionType topkv_layer; + topkv_layer.configure(&pred, &targets, &output, k); + + ARM_COMPUTE_ASSERT(pred.info()->is_resizable()); + ARM_COMPUTE_ASSERT(targets.info()->is_resizable()); + + // Allocate tensors + pred.allocator()->allocate(); + targets.allocator()->allocate(); + output.allocator()->allocate(); + + ARM_COMPUTE_ASSERT(!pred.info()->is_resizable()); + + // Fill tensors + const auto C{pred_shape[0]}; + fill(AccessorType(pred), AccessorType(targets), 0, C); + + topkv_layer.run(); + + return output; + } + + SimpleTensor compute_reference(const TensorShape &pred_shape, const TensorShape &targets_shape, uint32_t k) + { + // Create reference + SimpleTensor pred{pred_shape, _data_type, 1, QuantizationInfo()}; + SimpleTensor targets{targets_shape, DataType::U32, 1, QuantizationInfo()}; + + const auto C{pred_shape[0]}; + fill(pred, targets, 0, C); + + return reference::topkv(pred, targets, k); + } + +protected: + TensorType _target{}; + SimpleTensor _reference{}; + DataType _data_type{}; +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_FIXTURES_TOPKVLAYERFIXTURE_H diff --git a/tests/validation/reference/TopKV.cpp b/tests/validation/reference/TopKV.cpp new file mode 100644 index 0000000000..7cb3a0fb93 --- /dev/null +++ b/tests/validation/reference/TopKV.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "TopKV.h" + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/CoreTypes.h" +#include "arm_compute/core/TensorShape.h" + +#include "tests/SimpleTensor.h" + +#include +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ + +template +SimpleTensor topkv(SimpleTensor &predictions, SimpleTensor &targets, uint32_t k) +{ + const TensorShape &ps = predictions.shape(); + const unsigned int C = ps[0]; // classes + const unsigned int N = ps[1]; // batch + + SimpleTensor expected(TensorShape(N), DataType::U8); + + const float eps = std::numeric_limits::epsilon(); + + for (unsigned int i = 0; i < N; ++i) + { + // targets[i] (U32) + const uint32_t target_class = *reinterpret_cast(targets(Coordinates{static_cast(i)})); + + // Read predictions[target_class, i] as T, then promote to float + const T target_t = + *reinterpret_cast(predictions(Coordinates{static_cast(target_class), static_cast(i)})); + const float target_val = static_cast(target_t); + + unsigned int rank = 0; + for (unsigned int c = 0; c < C; ++c) + { + const T vt = + *reinterpret_cast(predictions(Coordinates{static_cast(c), static_cast(i)})); + const float v = static_cast(vt); + + if ((v - target_val) > eps) + { + ++rank; + } + } + + expected[i] = static_cast(rank < k); + } + + return expected; +} + +template SimpleTensor topkv(SimpleTensor &, SimpleTensor &, uint32_t); +template SimpleTensor topkv(SimpleTensor &, SimpleTensor &, uint32_t); +template SimpleTensor topkv(SimpleTensor &, SimpleTensor &, uint32_t); +template SimpleTensor topkv(SimpleTensor &, SimpleTensor &, uint32_t); + +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/reference/TopKV.h b/tests/validation/reference/TopKV.h new file mode 100644 index 0000000000..5cb1772c12 --- /dev/null +++ b/tests/validation/reference/TopKV.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_TESTS_VALIDATION_REFERENCE_TOPKV_H +#define ACL_TESTS_VALIDATION_REFERENCE_TOPKV_H + +#include "tests/SimpleTensor.h" +#include "tests/validation/Helpers.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace reference +{ +template +SimpleTensor topkv(SimpleTensor &predictions, SimpleTensor &targets, uint32_t k); +} // namespace reference +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif // ACL_TESTS_VALIDATION_REFERENCE_TOPKV_H