diff --git a/include/boost/crypt/utility/config.hpp b/include/boost/crypt/utility/config.hpp index a46640c6..4e0a1606 100644 --- a/include/boost/crypt/utility/config.hpp +++ b/include/boost/crypt/utility/config.hpp @@ -127,7 +127,7 @@ // ----- Has something ----- // ----- Unreachable ----- -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) || defined(__clang__) || defined(BOOST_CRYPT_HAS_CUDA) # define BOOST_CRYPT_UNREACHABLE __builtin_unreachable() #elif defined(_MSC_VER) # define BOOST_CRYPT_UNREACHABLE __assume(0) diff --git a/include/boost/crypt2/drbg/detail/hash_drbg.hpp b/include/boost/crypt2/drbg/detail/hash_drbg.hpp index 285f769b..a751e9fe 100644 --- a/include/boost/crypt2/drbg/detail/hash_drbg.hpp +++ b/include/boost/crypt2/drbg/detail/hash_drbg.hpp @@ -6,6 +6,7 @@ #define BOOST_CRYPT2_DRBG_HASH_DRBG_HPP #include +#include #include #include #include @@ -61,7 +62,7 @@ class hash_drbg static constexpr compat::uint64_t reseed_interval {281474976710656ULL}; // 2^48 compat::array constant_ {}; - compat::span constant_span_ {constant_}; + compat::span constant_span_ {constant_}; compat::array value_ {}; compat::span value_span_ {value_}; @@ -108,7 +109,7 @@ class hash_drbg compat::span personalization = compat::span{}) noexcept -> state; template , concepts::sized_range SizedRange3 = compat::span> BOOST_CRYPT_GPU_ENABLED auto init(SizedRange1&& entropy, SizedRange2&& nonce = compat::span {}, diff --git a/include/boost/crypt2/drbg/detail/hmac_drbg.hpp b/include/boost/crypt2/drbg/detail/hmac_drbg.hpp index 8a7c985a..74be5218 100644 --- a/include/boost/crypt2/drbg/detail/hmac_drbg.hpp +++ b/include/boost/crypt2/drbg/detail/hmac_drbg.hpp @@ -135,7 +135,7 @@ BOOST_CRYPT_GPU_ENABLED_CONSTEXPR auto hmac_drbg storage_gap {std::byte{0x00}}; + compat::array storage_gap {compat::byte{0x00}}; compat::span storage_gap_span {storage_gap}; HMACType hmac; diff --git a/test/nvcc_jamfile b/test/nvcc_jamfile index 7bdbdccc..37a9be9f 100644 --- a/test/nvcc_jamfile +++ b/test/nvcc_jamfile @@ -24,3 +24,6 @@ run test_shake128_nvcc.cu ; run test_shake256_nvcc.cu ; run test_hmac.cu ; + +run test_hmac_drbg.cu ; +run test_hash_drbg.cu ; diff --git a/test/test_hash_drbg.cu b/test/test_hash_drbg.cu new file mode 100644 index 00000000..e3589405 --- /dev/null +++ b/test/test_hash_drbg.cu @@ -0,0 +1,119 @@ +// Copyright Matt Borland 2024 +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" +#include "generate_random_strings.hpp" +#include +#include +#include +#include +#include + +using digest_type = typename cuda::std::array; + +// The kernel function +__global__ void cuda_test(char** in, digest_type* out, int numElements) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i < numElements) + { + boost::crypt::sha1_hash_drbg drbg; + cuda::std::span in_span {in[i], static_cast(64)}; + drbg.init(in_span); + drbg.generate(out[i], 640U); + } +} + +int main() +{ + try + { + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + constexpr int numElements = 50000; + constexpr std::size_t elementSize = 64; + + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + char** input_vector1; + cudaMallocManaged(&input_vector1, numElements * sizeof(char*)); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + for (int i = 0; i < numElements; ++i) + { + cudaMallocManaged(&input_vector1[i], elementSize * sizeof(char)); + if (input_vector1[i] == nullptr) + { + throw std::runtime_error("Failed to allocate memory for input_vector1"); + } + boost::crypt::generate_random_string(input_vector1[i], elementSize); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1, output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + digest_type out; + boost::crypt::sha1_hash_drbg drbg; + std::span in_span(input_vector1[i], static_cast(64)); + drbg.init(in_span); + drbg.generate(out, 640U); + results.emplace_back(out); + } + double t = w.elapsed(); + + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i][0] != results[i][0]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + // Cleanup all the memory we allocated + for (int i = 0; i < numElements; ++i) + { + cudaFree(input_vector1[i]); + } + cudaFree(input_vector1); + } + catch (const std::exception& e) + { + std::cerr << "Terminated with exception: " << e.what() << std::endl; + } +} diff --git a/test/test_hmac_drbg.cu b/test/test_hmac_drbg.cu new file mode 100644 index 00000000..41a479cc --- /dev/null +++ b/test/test_hmac_drbg.cu @@ -0,0 +1,119 @@ +// Copyright Matt Borland 2024 +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" +#include "generate_random_strings.hpp" +#include +#include +#include +#include +#include + +using digest_type = typename cuda::std::array; + +// The kernel function +__global__ void cuda_test(char** in, digest_type* out, int numElements) +{ + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i < numElements) + { + boost::crypt::sha1_hmac_drbg drbg; + cuda::std::span in_span {in[i], static_cast(64)}; + drbg.init(in_span); + drbg.generate(out[i], 640U); + } +} + +int main() +{ + try + { + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + constexpr int numElements = 50000; + constexpr std::size_t elementSize = 64; + + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector A + char** input_vector1; + cudaMallocManaged(&input_vector1, numElements * sizeof(char*)); + + // Allocate the managed output vector C + cuda_managed_ptr output_vector(numElements); + + for (int i = 0; i < numElements; ++i) + { + cudaMallocManaged(&input_vector1[i], elementSize * sizeof(char)); + if (input_vector1[i] == nullptr) + { + throw std::runtime_error("Failed to allocate memory for input_vector1"); + } + boost::crypt::generate_random_string(input_vector1[i], elementSize); + } + + // Launch the Vector Add CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + cuda_test<<>>(input_vector1, output_vector.get(), numElements); + cudaDeviceSynchronize(); + std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + if (err != cudaSuccess) + { + std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + std::vector results; + results.reserve(numElements); + w.reset(); + for(int i = 0; i < numElements; ++i) + { + digest_type out; + boost::crypt::sha1_hmac_drbg drbg; + std::span in_span(input_vector1[i], static_cast(64)); + drbg.init(in_span); + drbg.generate(out, 640U); + results.emplace_back(out); + } + double t = w.elapsed(); + + // check the results + for(int i = 0; i < numElements; ++i) + { + if (output_vector[i][0] != results[i][0]) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + // Cleanup all the memory we allocated + for (int i = 0; i < numElements; ++i) + { + cudaFree(input_vector1[i]); + } + cudaFree(input_vector1); + } + catch (const std::exception& e) + { + std::cerr << "Terminated with exception: " << e.what() << std::endl; + } +}