From b5625e98b3bbff1116137cbca19d687fd42d6ee4 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Thu, 5 Mar 2026 17:44:49 +0900
Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20Implement=20custom=20tuple?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 419 ++++++++++++++++++++++++++++++------------
 test/test.cpp         | 194 +++++++++----------
 2 files changed, 399 insertions(+), 214 deletions(-)
diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index aa56b8a..fc78c80 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -48,6 +48,187 @@
 #define SIGSEGV_DEPRECATED [[deprecated("Cannot access GPU memory directly")]]
 #endif
 
+namespace gpu_array::detail
+{
+    // Custom implementation of tuple for device code
+
+    template <std::size_t I, class T>
+    struct tuple_leaf
+    {
+        using type = T;
+        T value;
+    };
+
+    template <std::size_t I, class T>
+    tuple_leaf<I, T> at_index(const tuple_leaf<I, T>&);  // undefined
+
+    template <class Seq, class... Ts>
+    struct tuple_impl;
+
+    template <std::size_t... Is, class... Ts>
+    struct tuple_impl<std::index_sequence<Is...>, Ts...> : tuple_leaf<Is, Ts>...
+    {
+        tuple_impl()
+        requires (std::default_initializable<Ts> && ...)
+        = default;
+        template <class... Us>
+        requires (sizeof...(Us) == sizeof...(Ts))
+        __host__ __device__ tuple_impl(Us&&... us) : tuple_leaf<Is, Ts>(std::forward<Us>(us))...
+        {
+        }
+    };
+
+    template <class... Ts>
+    struct tuple
+    {
+        tuple()
+        requires (std::default_initializable<Ts> && ...)
+        = default;
+
+        template <class... Us>
+        struct is_single_tuple : std::false_type
+        {
+        };
+        template <class... Us>
+        struct is_single_tuple<detail::tuple<Us...>> : std::true_type
+        {
+        };
+        template <class... Us>
+        requires (sizeof...(Us) == sizeof...(Ts) && !is_single_tuple<std::remove_cvref_t<Us>...>::value)
+        __host__ __device__ tuple(Us&&... us) : base_{std::forward<Us>(us)...}
+        {
+        }
+
+        template <class... Us>
+        requires (sizeof...(Us) == sizeof...(Ts))
+        __host__ __device__ tuple(const detail::tuple<Us...>& t)
+            : tuple{tuple_convert(t, std::index_sequence_for<Ts...>{})}
+        {
+        }
+        template <class... Us>
+        requires (sizeof...(Us) == sizeof...(Ts))
+        __host__ __device__ tuple(detail::tuple<Us...>&& t)
+            : tuple{tuple_convert(std::move(t), std::index_sequence_for<Ts...>{})}
+        {
+        }
+        template <class... Us>
+        requires (sizeof...(Us) == sizeof...(Ts))
+        __host__ __device__ auto& operator=(const detail::tuple<Us...>& t)
+        {
+            *this = tuple_convert(t, std::index_sequence_for<Ts...>{});
+            return *this;
+        }
+        template <class... Us>
+        requires (sizeof...(Us) == sizeof...(Ts))
+        __host__ __device__ auto& operator=(detail::tuple<Us...>&& t)
+        {
+            *this = tuple_convert(std::move(t), std::index_sequence_for<Ts...>{});
+            return *this;
+        }
+
+        template <std::size_t I, class... Us>
+        __host__ __device__ friend auto& get(detail::tuple<Us...>&);
+        template <std::size_t I, class... Us>
+        __host__ __device__ friend const auto& get(const detail::tuple<Us...>&);
+        template <std::size_t I, class... Us>
+        __host__ __device__ friend auto&& get(detail::tuple<Us...>&&);
+        template <std::size_t I, class... Us>
+        __host__ __device__ friend const auto&& get(const detail::tuple<Us...>&&);
+
+    private:
+        template <class... Us, std::size_t... Is>
+        static auto tuple_convert(const detail::tuple<Us...>& t, std::index_sequence<Is...>)
+        {
+            return detail::tuple<Ts...>{get<Is>(t)...};
+        }
+        template <class... Us, std::size_t... Is>
+        static auto tuple_convert(detail::tuple<Us...>&& t, std::index_sequence<Is...>)
+        {
+            return detail::tuple<Ts...>{std::move(get<Is>(t))...};
+        }
+
+        using base = tuple_impl<std::index_sequence_for<Ts...>, Ts...>;
+        base base_;
+    };
+
+    template <class... Ts>
+    tuple(Ts...) -> tuple<Ts...>;
+
+    template <std::size_t I, class... Us>
+    __host__ __device__ auto& get(detail::tuple<Us...>& t)
+    {
+        using leaf = decltype(at_index<I>(t.base_));
+        return static_cast<leaf&>(t.base_).value;
+    }
+    template <std::size_t I, class... Us>
+    __host__ __device__ const auto& get(const detail::tuple<Us...>& t)
+    {
+        using leaf = decltype(at_index<I>(t.base_));
+        return static_cast<const leaf&>(t.base_).value;
+    }
+    template <std::size_t I, class... Us>
+    __host__ __device__ auto&& get(detail::tuple<Us...>&& t)
+    {
+        using leaf = decltype(at_index<I>(t.base_));
+        return static_cast<typename leaf::type&&>(static_cast<leaf&>(t.base_).value);
+    }
+    template <std::size_t I, class... Us>
+    __host__ __device__ const auto&& get(const detail::tuple<Us...>&& t)
+    {
+        using leaf = decltype(at_index<I>(t.base_));
+        return static_cast<const typename leaf::type&&>(static_cast<const leaf&>(t.base_).value);
+    }
+
+    template <class... Ts, class... Us, std::size_t... Is>
+    __host__ __device__ bool tuple_equal_impl(const detail::tuple<Ts...>& t, const detail::tuple<Us...>& u,
+                                              std::index_sequence<Is...>)
+    {
+        return ((detail::get<Is>(t) == detail::get<Is>(u)) && ...);
+    }
+    template <class... Ts, class... Us>
+    requires (sizeof...(Ts) == sizeof...(Us))
+    __host__ __device__ bool operator==(const detail::tuple<Ts...>& t, const detail::tuple<Us...>& u)
+    {
+        return tuple_equal_impl(t, u, std::index_sequence_for<Ts...>{});
+    }
+
+    template <class F, class Tuple, std::size_t... Is>
+    requires requires { std::declval<F>()(detail::get<Is>(std::declval<Tuple>())...); }
+    __host__ __device__ decltype(auto) apply_impl(F&& f, Tuple&& t, std::index_sequence<Is...>)
+    {
+        return std::forward<F>(f)(detail::get<Is>(std::forward<Tuple>(t))...);
+    }
+    template <class F, class Tuple>
+    requires requires { std::tuple_size_v<std::remove_reference_t<Tuple>>; }
+    __host__ __device__ decltype(auto) apply(F&& f, Tuple&& t)
+    {
+        return apply_impl(std::forward<F>(f), std::forward<Tuple>(t),
+                          std::make_index_sequence<std::tuple_size_v<std::remove_reference_t<Tuple>>>{});
+    }
+}  // namespace gpu_array::detail
+
+template <class... Ts>
+struct std::tuple_size<gpu_array::detail::tuple<Ts...>> : std::integral_constant<std::size_t, sizeof...(Ts)>
+{
+};
+template <std::size_t I, class... Ts>
+struct std::tuple_element<I, gpu_array::detail::tuple<Ts...>> : std::tuple_element<I, std::tuple<Ts...>>
+{
+};
+template <class... TTypes, class... UTypes>
+requires requires { typename gpu_array::detail::tuple<std::common_type_t<TTypes, UTypes>...>; }
+struct std::common_type<gpu_array::detail::tuple<TTypes...>, gpu_array::detail::tuple<UTypes...>>
+{
+    using type = gpu_array::detail::tuple<std::common_type_t<TTypes, UTypes>...>;
+};
+template <class... TTypes, class... UTypes, template <class> class TQual, template <class> class UQual>
+requires requires { typename gpu_array::detail::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>; }
+struct std::basic_common_reference<gpu_array::detail::tuple<TTypes...>, gpu_array::detail::tuple<UTypes...>, TQual,
+                                   UQual>
+{
+    using type = gpu_array::detail::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>;
+};
+
 namespace gpu_array
 {
 #if defined(GPU_USE_32BIT_SIZE_TYPE_DEFAULT)
@@ -120,11 +301,11 @@ namespace gpu_array
 
         protected:
             size_type size_ = 0U;
-            std::tuple<ValueTypes*...> data_;
+            detail::tuple<ValueTypes*...> data_;
             std::uint32_t* ref_count_ = nullptr;  // reference counter, not used on GPU
 
             template <std::size_t N>
-            using element_type = std::tuple_element_t<N, std::tuple<ValueTypes...>>;
+            using element_type = std::tuple_element_t<N, detail::tuple<ValueTypes...>>;
 
             __host__ __device__ void init()
             {
@@ -255,11 +436,11 @@ namespace gpu_array
 
             __host__ __device__ void tuple_for_each(auto&& f) const
             {
-                std::apply([&f](const auto&... args) { (f(args), ...); }, data_);
+                detail::apply([&f](const auto&... args) { (f(args), ...); }, data_);
             }
             __host__ __device__ void tuple_for_each(auto&& f)
             {
-                std::apply([&f](auto&... args) { (f(args), ...); }, data_);
+                detail::apply([&f](auto&... args) { (f(args), ...); }, data_);
             }
         };
 
@@ -427,8 +608,8 @@ namespace gpu_array
         {
             return *(data() + base::size_ - 1);
         }
-        __host__ __device__ pointer data() noexcept { return std::get<0>(base::data_); }
-        __host__ __device__ const_pointer data() const noexcept { return std::get<0>(base::data_); }
+        __host__ __device__ pointer data() noexcept { return detail::get<0>(base::data_); }
+        __host__ __device__ const_pointer data() const noexcept { return detail::get<0>(base::data_); }
 
         array() = default;
         __host__ __device__ array(const array& r) : base(r) {}
@@ -438,8 +619,8 @@ namespace gpu_array
         {
             if (base::size_ == 0) return;
             auto buf = std::make_unique<value_type[]>(base::size_);
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(data(), buf.get(), sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
         }
@@ -447,8 +628,8 @@ namespace gpu_array
         __host__ array(std::size_t size, default_init_tag) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             if constexpr (!std::is_trivially_default_constructible_v<value_type>)
             {
@@ -465,8 +646,8 @@ namespace gpu_array
             auto al = std::allocator<value_type>();
             auto buf = al.allocate(base::size_);
             std::ranges::uninitialized_fill(buf, buf + base::size_, value);
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(data(), buf, sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
             al.deallocate(buf, base::size_);
@@ -481,8 +662,8 @@ namespace gpu_array
         {
             if (base::size_ == 0) return;
 
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(
                 api::gpuMemcpy(data(), std::ranges::data(r), sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
@@ -499,8 +680,8 @@ namespace gpu_array
             auto buf = al.allocate(base::size_);
             for (auto i = std::size_t{0}; const auto& v : r) std::ranges::construct_at(buf + i++, v);
 
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(data(), buf, sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
             al.deallocate(buf, base::size_);
@@ -509,8 +690,8 @@ namespace gpu_array
         __host__ array(std::initializer_list<value_type> r) : base(std::ranges::size(r))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(
                 api::gpuMemcpy(data(), std::ranges::data(r), sizeof(ValueType) * base::size_, gpuMemcpyHostToDevice));
@@ -710,7 +891,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected device memory pointer");
             }
-            std::get<0>(base::data_) = ptr;
+            detail::get<0>(base::data_) = ptr;
         }
 #endif
 #if defined(GPU_OVERLOAD_DEVICE)
@@ -756,7 +937,7 @@ namespace gpu_array
                                    [](auto acc, const auto& r) { return acc + std::ranges::size(r); }))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
                                                   sizeof(ValueType) * base::size_));
             assert(data() != nullptr);
 
@@ -812,8 +993,8 @@ namespace gpu_array
         __host__ __device__ const_reference front() const noexcept { return *begin(); }
         __host__ __device__ reference back() noexcept { return *(data() + base::size_ - 1); }
         __host__ __device__ const_reference back() const noexcept { return *(data() + base::size_ - 1); }
-        __host__ __device__ pointer data() noexcept { return std::get<0>(base::data_); }
-        __host__ __device__ const_pointer data() const noexcept { return std::get<0>(base::data_); }
+        __host__ __device__ pointer data() noexcept { return detail::get<0>(base::data_); }
+        __host__ __device__ const_pointer data() const noexcept { return detail::get<0>(base::data_); }
 
         managed_array() = default;
         __host__ __device__ managed_array(const managed_array& r) : base(r) {}
@@ -822,7 +1003,7 @@ namespace gpu_array
         __host__ explicit managed_array(std::size_t size) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             std::ranges::uninitialized_value_construct(*this);
@@ -831,7 +1012,7 @@ namespace gpu_array
         __host__ explicit managed_array(std::size_t size, default_init_tag) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             std::ranges::uninitialized_default_construct(*this);
@@ -840,7 +1021,7 @@ namespace gpu_array
         __host__ managed_array(std::size_t size, const value_type& value) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             std::ranges::uninitialized_fill(*this, value);
@@ -851,7 +1032,7 @@ namespace gpu_array
         __host__ explicit managed_array(const T& r) : base(std::ranges::size(r))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
 
@@ -864,7 +1045,7 @@ namespace gpu_array
         __host__ managed_array(std::initializer_list<value_type> r) : base(std::ranges::size(r))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             for (auto i = std::size_t{0}; const auto& v : r) std::ranges::construct_at(data() + i++, v);
@@ -1070,7 +1251,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected managed memory pointer");
             }
-            std::get<0>(base::data_) = ptr;
+            detail::get<0>(base::data_) = ptr;
         }
 #endif
 #if defined(GPU_OVERLOAD_DEVICE)
@@ -1125,7 +1306,7 @@ namespace gpu_array
 
         __host__ explicit value(default_init_tag) : base(1)
         {
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             if constexpr (!std::is_trivially_default_constructible_v<value_type>)
             {
@@ -1136,7 +1317,7 @@ namespace gpu_array
 
         __host__ explicit value(const value_type& r) : base(1)
         {
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(get(), &r, sizeof(value_type), gpuMemcpyHostToDevice));
         }
@@ -1146,7 +1327,7 @@ namespace gpu_array
         __host__ explicit value(Args&&... args) : base(1)
         {
             auto temp = value_type(std::forward<Args>(args)...);
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(get(), &temp, sizeof(value_type), gpuMemcpyHostToDevice));
         }
@@ -1162,7 +1343,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected device memory pointer");
             }
-            std::get<0>(base::data_) = ptr;
+            detail::get<0>(base::data_) = ptr;
         }
 #else
             : base(ptr, ptr == nullptr ? 0 : 1)
@@ -1181,8 +1362,8 @@ namespace gpu_array
             return *this;
         }
 
-        __host__ __device__ pointer get() const noexcept { return std::get<0>(base::data_); }
-        __host__ __device__ explicit operator bool() const noexcept { return std::get<0>(base::data_) != nullptr; }
+        __host__ __device__ pointer get() const noexcept { return detail::get<0>(base::data_); }
+        __host__ __device__ explicit operator bool() const noexcept { return detail::get<0>(base::data_) != nullptr; }
 
 #if defined(GPU_OVERLOAD_DEVICE)
         __device__ reference operator*() const noexcept
@@ -1252,7 +1433,7 @@ namespace gpu_array
         __host__ explicit managed_value(default_init_tag) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::uninitialized_default_construct_n(get(), 1);
         }
@@ -1260,7 +1441,7 @@ namespace gpu_array
         __host__ explicit managed_value(const ValueType& r) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::construct_at(get(), r);
         }
@@ -1268,7 +1449,7 @@ namespace gpu_array
         __host__ explicit managed_value(ValueType&& r) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::construct_at(get(), std::move(r));
         }
@@ -1278,7 +1459,7 @@ namespace gpu_array
         __host__ explicit managed_value(Args&&... args) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::construct_at(get(), std::forward<Args>(args)...);
         }
@@ -1294,7 +1475,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected managed memory pointer");
             }
-            std::get<0>(base::data_) = ptr;
+            detail::get<0>(base::data_) = ptr;
         }
 #else
             : base(ptr, ptr == nullptr ? 0 : 1)
@@ -1324,8 +1505,8 @@ namespace gpu_array
             return get();
         }
 
-        __host__ __device__ pointer get() const noexcept { return std::get<0>(base::data_); }
-        __host__ __device__ explicit operator bool() const noexcept { return std::get<0>(base::data_) != nullptr; }
+        __host__ __device__ pointer get() const noexcept { return detail::get<0>(base::data_); }
+        __host__ __device__ explicit operator bool() const noexcept { return detail::get<0>(base::data_) != nullptr; }
 
         __host__ void prefetch(int device_id, api::gpuStream_t stream = 0, bool recursive = true) const
         {
@@ -1376,10 +1557,10 @@ namespace gpu_array
         template <std::size_t N, typename Tuple, typename... Ts>
         constexpr bool assignable_to_tuple_helper_n()
         {
-            return requires(const Tuple& t1, std::tuple<Ts...>& t2) {
-                std::get<N>(t1);
-                std::get<N>(t2);
-                requires std::assignable_from<decltype(std::get<N>(t2)), decltype(std::get<N>(t1))>;
+            return requires(const Tuple& t1, detail::tuple<Ts...>& t2) {
+                detail::get<N>(t1);
+                detail::get<N>(t2);
+                requires std::assignable_from<decltype(detail::get<N>(t2)), decltype(detail::get<N>(t1))>;
             };
         }
         template <typename Tuple, typename... Ts>
@@ -1396,7 +1577,7 @@ namespace gpu_array
     template <template <typename...> typename Tuple, typename... Ts>
     class structure_of_arrays_iterator
     {
-        std::tuple<Ts*...> ptrs_;
+        detail::tuple<Ts*...> ptrs_;
 
     public:
         using difference_type = std::ptrdiff_t;
@@ -1411,15 +1592,15 @@ namespace gpu_array
         structure_of_arrays_iterator& operator=(const structure_of_arrays_iterator&) = default;
         structure_of_arrays_iterator& operator=(structure_of_arrays_iterator&&) noexcept = default;
 
-        __host__ __device__ explicit structure_of_arrays_iterator(std::tuple<Ts*...> ptrs) : ptrs_(ptrs) {}
+        __host__ __device__ explicit structure_of_arrays_iterator(detail::tuple<Ts*...> ptrs) : ptrs_(ptrs) {}
 
         __host__ __device__ Tuple<Ts&...> operator*() const
         {
-            return std::apply([](auto*... ptrs) { return Tuple<Ts&...>(*ptrs...); }, ptrs_);
+            return detail::apply([](auto*... ptrs) { return Tuple<Ts&...>(*ptrs...); }, ptrs_);
         }
         __host__ __device__ Tuple<Ts&...> operator[](size_type n) const
         {
-            return std::apply([n](auto*... ptrs) { return Tuple<Ts&...>(ptrs[n]...); }, ptrs_);
+            return detail::apply([n](auto*... ptrs) { return Tuple<Ts&...>(ptrs[n]...); }, ptrs_);
         }
         __host__ __device__ auto operator->() const
         {
@@ -1432,7 +1613,7 @@ namespace gpu_array
         }
         __host__ __device__ structure_of_arrays_iterator& operator++()
         {
-            std::apply([](auto*&... ptrs) { (++ptrs, ...); }, ptrs_);
+            detail::apply([](auto*&... ptrs) { (++ptrs, ...); }, ptrs_);
             return *this;
         }
         __host__ __device__ structure_of_arrays_iterator operator++(int)
@@ -1443,12 +1624,12 @@ namespace gpu_array
         }
         __host__ __device__ structure_of_arrays_iterator& operator+=(difference_type n)
         {
-            std::apply([n](auto*&... ptrs) { ((ptrs += n), ...); }, ptrs_);
+            detail::apply([n](auto*&... ptrs) { ((ptrs += n), ...); }, ptrs_);
             return *this;
         }
         __host__ __device__ structure_of_arrays_iterator& operator--()
         {
-            std::apply([](auto*&... ptrs) { (--ptrs, ...); }, ptrs_);
+            detail::apply([](auto*&... ptrs) { (--ptrs, ...); }, ptrs_);
             return *this;
         }
         __host__ __device__ structure_of_arrays_iterator operator--(int)
@@ -1459,20 +1640,20 @@ namespace gpu_array
         }
         __host__ __device__ structure_of_arrays_iterator& operator-=(difference_type n)
         {
-            std::apply([n](auto*&... ptrs) { ((ptrs -= n), ...); }, ptrs_);
+            detail::apply([n](auto*&... ptrs) { ((ptrs -= n), ...); }, ptrs_);
             return *this;
         }
 
         __host__ __device__ friend difference_type operator-(const structure_of_arrays_iterator& lhs,
                                                              const structure_of_arrays_iterator& rhs)
         {
-            return std::get<0>(lhs.ptrs_) - std::get<0>(rhs.ptrs_);
+            return detail::get<0>(lhs.ptrs_) - detail::get<0>(rhs.ptrs_);
         }
         __host__ __device__ friend structure_of_arrays_iterator operator+(const structure_of_arrays_iterator& lhs,
                                                                           difference_type n)
         {
             return structure_of_arrays_iterator(
-                std::apply([n](auto*... ptrs) { return std::tuple{ptrs + n...}; }, lhs.ptrs_));
+                detail::apply([n](auto*... ptrs) { return detail::tuple{ptrs + n...}; }, lhs.ptrs_));
         }
         __host__ __device__ friend structure_of_arrays_iterator operator+(structure_of_arrays_iterator&& lhs,
                                                                           difference_type n)
@@ -1494,7 +1675,7 @@ namespace gpu_array
                                                                           difference_type n)
         {
             return structure_of_arrays_iterator(
-                std::apply([n](auto*... ptrs) { return std::tuple{ptrs - n...}; }, lhs.ptrs_));
+                detail::apply([n](auto*... ptrs) { return detail::tuple{ptrs - n...}; }, lhs.ptrs_));
         }
         __host__ __device__ friend structure_of_arrays_iterator operator-(structure_of_arrays_iterator&& lhs,
                                                                           difference_type n)
@@ -1506,16 +1687,16 @@ namespace gpu_array
         __host__ __device__ friend bool operator==(const structure_of_arrays_iterator& lhs,
                                                    const structure_of_arrays_iterator& rhs)
         {
-            return std::get<0>(lhs.ptrs_) == std::get<0>(rhs.ptrs_);
+            return detail::get<0>(lhs.ptrs_) == detail::get<0>(rhs.ptrs_);
         }
         __host__ __device__ friend std::strong_ordering operator<=>(const structure_of_arrays_iterator& lhs,
                                                                     const structure_of_arrays_iterator& rhs)
         {
-            return std::get<0>(lhs.ptrs_) <=> std::get<0>(rhs.ptrs_);
+            return detail::get<0>(lhs.ptrs_) <=> detail::get<0>(rhs.ptrs_);
         }
         __host__ __device__ friend auto iter_move(const structure_of_arrays_iterator& x)
         {
-            return std::apply(
+            return detail::apply(
                 [](auto*... ptrs) {
                     using RetType = std::remove_cvref_t<decltype(x)>::value_type;
                     return RetType(std::move(*ptrs)...);
@@ -1527,15 +1708,15 @@ namespace gpu_array
         {
             constexpr std::size_t size = std::tuple_size_v<std::remove_cvref_t<decltype(lhs.ptrs_)>>;
             [&lhs, &rhs]<std::size_t... N>(std::index_sequence<N...>) {
-                (std::swap(*std::get<N>(lhs.ptrs_), *std::get<N>(rhs.ptrs_)), ...);
+                (std::swap(*detail::get<N>(lhs.ptrs_), *detail::get<N>(rhs.ptrs_)), ...);
             }(std::make_index_sequence<size>());
         }
     };
 
     template <typename... Ts>
-    class structure_of_arrays : public structure_of_arrays<std::tuple<Ts...>, size_type_default>
+    class structure_of_arrays : public structure_of_arrays<detail::tuple<Ts...>, size_type_default>
     {
-        using base = structure_of_arrays<std::tuple<Ts...>, size_type_default>;
+        using base = structure_of_arrays<detail::tuple<Ts...>, size_type_default>;
         using base::base;
 
     public:
@@ -1567,9 +1748,9 @@ namespace gpu_array
         static constexpr auto num_arrays = sizeof...(Ts);
         using base = detail::base<false, SizeType, Ts...>;
 
-        using tuple_value_type = std::tuple<Ts...>;
-        using tuple_pointer_type = std::tuple<Ts*...>;
-        using tuple_const_pointer_type = std::tuple<const Ts*...>;
+        using tuple_value_type = detail::tuple<Ts...>;
+        using tuple_pointer_type = detail::tuple<Ts*...>;
+        using tuple_const_pointer_type = detail::tuple<const Ts*...>;
         using ret_tuple_value_type = Tuple<Ts...>;
         using ret_tuple_reference_type = Tuple<Ts&...>;
         using ret_tuple_const_reference_type = Tuple<const Ts&...>;
@@ -1584,18 +1765,18 @@ namespace gpu_array
         SIGSEGV_DEPRECATED __host__ __device__ auto begin() noexcept { return iterator_type(base::data_); }
         SIGSEGV_DEPRECATED __host__ __device__ auto end() noexcept
         {
-            return std::apply(
+            return detail::apply(
                 [this](auto&... ptrs) { return iterator_type(tuple_pointer_type{(ptrs + base::size_)...}); },
                 base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto begin() const noexcept
         {
-            return std::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
-                              base::data_);
+            return detail::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
+                                 base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto end() const noexcept
         {
-            return std::apply(
+            return detail::apply(
                 [this](auto&... ptrs) {
                     return const_iterator_type(tuple_const_pointer_type{(ptrs + base::size_)...});
                 },
@@ -1604,28 +1785,28 @@ namespace gpu_array
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) &
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
+            return detail::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) const&
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
-                              base::data_);
+            return detail::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
+                                 base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) &&
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
+            return detail::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
         }
         template <std::size_t N>
         __host__ __device__ auto* data() noexcept
         {
-            return std::get<N>(base::data_);
+            return detail::get<N>(base::data_);
         }
         template <std::size_t N>
         __host__ __device__ const auto* data() const noexcept
         {
-            return std::get<N>(base::data_);
+            return detail::get<N>(base::data_);
         }
 
         structure_of_arrays() = default;
@@ -1672,7 +1853,7 @@ namespace gpu_array
             };
 
             [this, &value, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), std::get<N>(value)), ...);
+                (alloc_ptr(detail::get<N>(base::data_), detail::get<N>(value)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -1691,8 +1872,8 @@ namespace gpu_array
             };
 
             [this, &array, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_),
-                           array | std::views::transform([](const auto& v) { return (std::get<N>(v)); })),
+                (alloc_ptr(detail::get<N>(base::data_),
+                           array | std::views::transform([](const auto& v) { return (detail::get<N>(v)); })),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -1710,15 +1891,15 @@ namespace gpu_array
             };
 
             [this, &list, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_),
-                           list | std::views::transform([](const auto& v) { return (std::get<N>(v)); })),
+                (alloc_ptr(detail::get<N>(base::data_),
+                           list | std::views::transform([](const auto& v) { return (detail::get<N>(v)); })),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
         template <detail::array_convertible_for_copy... Ranges>
         requires (sizeof...(Ranges) == num_arrays) &&
-                 detail::assignable_to_tuple<std::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
+                 detail::assignable_to_tuple<detail::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
         __host__ explicit structure_of_arrays(const Ranges&... arrays) : base(std::max({std::ranges::size(arrays)...}))
         {
             if (base::size_ == 0) return;
@@ -1737,9 +1918,9 @@ namespace gpu_array
                 GPU_CHECK_ERROR(api::gpuMemcpy(ptr, buf.get(), sizeof(T) * base::size_, gpuMemcpyHostToDevice));
             };
 
-            auto arrays_tuple = std::tuple<const Ranges&...>(arrays...);
+            auto arrays_tuple = detail::tuple<const Ranges&...>(arrays...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), std::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(detail::get<N>(base::data_), detail::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -1762,9 +1943,9 @@ namespace gpu_array
                 GPU_CHECK_ERROR(api::gpuMemcpy(ptr, buf.get(), sizeof(T) * base::size_, gpuMemcpyHostToDevice));
             };
 
-            auto arrays_tuple = std::tuple<std::initializer_list<Ts>...>(lists...);
+            auto arrays_tuple = detail::tuple<std::initializer_list<Ts>...>(lists...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), std::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(detail::get<N>(base::data_), detail::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -1796,9 +1977,9 @@ namespace gpu_array
                 return buf;
             };
 
-            const auto tmp_bufs = std::apply(
+            const auto tmp_bufs = detail::apply(
                 [copy_buffer](const auto*... ptrs) {
-                    return std::tuple<std::unique_ptr<Ts[]>...>(copy_buffer(ptrs)...);
+                    return detail::tuple<std::unique_ptr<Ts[]>...>(copy_buffer(ptrs)...);
                 },
                 base::data_);
 
@@ -1814,7 +1995,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    std::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, tmp_bufs);
+                    detail::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, tmp_bufs);
                 }
 
                 return result;
@@ -1826,7 +2007,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    std::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, tmp_bufs);
+                    detail::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, tmp_bufs);
                 }
 
                 return result;
@@ -1852,7 +2033,7 @@ namespace gpu_array
             if (base::size_ > 0) assert(ptr != nullptr);
 
             // reset specified pointer only
-            std::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
+            detail::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
         }
 
         template <std::size_t N, gpu_array_ptr T>
@@ -1872,9 +2053,9 @@ namespace gpu_array
     };
 
     template <typename... Ts>
-    class managed_structure_of_arrays : public managed_structure_of_arrays<std::tuple<Ts...>, size_type_default>
+    class managed_structure_of_arrays : public managed_structure_of_arrays<detail::tuple<Ts...>, size_type_default>
     {
-        using base = managed_structure_of_arrays<std::tuple<Ts...>, size_type_default>;
+        using base = managed_structure_of_arrays<detail::tuple<Ts...>, size_type_default>;
         using base::base;
 
     public:
@@ -1907,9 +2088,9 @@ namespace gpu_array
         static constexpr auto num_arrays = sizeof...(Ts);
         using base = detail::base<true, SizeType, Ts...>;
 
-        using tuple_value_type = std::tuple<Ts...>;
-        using tuple_pointer_type = std::tuple<Ts*...>;
-        using tuple_const_pointer_type = std::tuple<const Ts*...>;
+        using tuple_value_type = detail::tuple<Ts...>;
+        using tuple_pointer_type = detail::tuple<Ts*...>;
+        using tuple_const_pointer_type = detail::tuple<const Ts*...>;
         using ret_tuple_value_type = Tuple<Ts...>;
         using ret_tuple_reference_type = Tuple<Ts&...>;
         using ret_tuple_const_reference_type = Tuple<const Ts&...>;
@@ -1944,7 +2125,8 @@ namespace gpu_array
             };
 
             [this, &alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), [](const auto& v) -> const auto& { return (std::get<N>(v)); }),
+                (alloc_ptr(detail::get<N>(base::data_),
+                           [](const auto& v) -> const auto& { return (detail::get<N>(v)); }),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -1957,18 +2139,18 @@ namespace gpu_array
         __host__ __device__ auto begin() noexcept { return iterator_type(base::data_); }
         __host__ __device__ auto end() noexcept
         {
-            return std::apply(
+            return detail::apply(
                 [this](auto&... ptrs) { return iterator_type(tuple_pointer_type{(ptrs + base::size_)...}); },
                 base::data_);
         }
         __host__ __device__ auto begin() const noexcept
         {
-            return std::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
-                              base::data_);
+            return detail::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
+                                 base::data_);
         }
         __host__ __device__ auto end() const noexcept
         {
-            return std::apply(
+            return detail::apply(
                 [this](auto&... ptrs) {
                     return const_iterator_type(tuple_const_pointer_type{(ptrs + base::size_)...});
                 },
@@ -1977,28 +2159,28 @@ namespace gpu_array
         __host__ __device__ auto operator[](size_type i) &
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
+            return detail::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
         }
         __host__ __device__ auto operator[](size_type i) const&
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
-                              base::data_);
+            return detail::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
+                                 base::data_);
         }
         __host__ __device__ auto operator[](size_type i) &&
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
+            return detail::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
         }
         template <std::size_t N>
         __host__ __device__ auto* data() noexcept
         {
-            return std::get<N>(base::data_);
+            return detail::get<N>(base::data_);
         }
         template <std::size_t N>
         __host__ __device__ const auto* data() const noexcept
         {
-            return std::get<N>(base::data_);
+            return detail::get<N>(base::data_);
         }
 
         managed_structure_of_arrays() = default;
@@ -2039,7 +2221,8 @@ namespace gpu_array
             });
 
             [this, &value]<std::size_t... N>(std::index_sequence<N...>) {
-                (std::ranges::uninitialized_fill_n(std::get<N>(base::data_), base::size_, std::get<N>(value)), ...);
+                (std::ranges::uninitialized_fill_n(detail::get<N>(base::data_), base::size_, detail::get<N>(value)),
+                 ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -2060,7 +2243,8 @@ namespace gpu_array
             };
 
             [this, &alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), [](const auto& e) -> const auto& { return (std::get<N>(e)); }),
+                (alloc_ptr(detail::get<N>(base::data_),
+                           [](const auto& e) -> const auto& { return (detail::get<N>(e)); }),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -2081,14 +2265,15 @@ namespace gpu_array
             };
 
             [this, &alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), [](const auto& e) -> const auto& { return (std::get<N>(e)); }),
+                (alloc_ptr(detail::get<N>(base::data_),
+                           [](const auto& e) -> const auto& { return (detail::get<N>(e)); }),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
         template <detail::array_convertible_for_copy... Ranges>
         requires (sizeof...(Ranges) == num_arrays) &&
-                 detail::assignable_to_tuple<std::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
+                 detail::assignable_to_tuple<detail::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
         __host__ explicit managed_structure_of_arrays(const Ranges&... arrays)
             : base(std::max({std::ranges::size(arrays)...}))
         {
@@ -2106,9 +2291,9 @@ namespace gpu_array
                 for (auto i = std::size_t{0}; const auto& v : range) std::ranges::construct_at(ptr + i++, v);
             };
 
-            auto arrays_tuple = std::tuple<const Ranges&...>(arrays...);
+            auto arrays_tuple = detail::tuple<const Ranges&...>(arrays...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), std::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(detail::get<N>(base::data_), detail::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -2129,9 +2314,9 @@ namespace gpu_array
                 for (auto i = std::size_t{0}; const auto& v : range) std::ranges::construct_at(ptr + i++, v);
             };
 
-            auto arrays_tuple = std::tuple<std::initializer_list<Ts>...>(lists...);
+            auto arrays_tuple = detail::tuple<std::initializer_list<Ts>...>(lists...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), std::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(detail::get<N>(base::data_), detail::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -2258,7 +2443,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    std::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, base::data_);
+                    detail::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, base::data_);
                 }
 
                 return result;
@@ -2270,7 +2455,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    std::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, base::data_);
+                    detail::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, base::data_);
                 }
 
                 return result;
@@ -2296,7 +2481,7 @@ namespace gpu_array
             if (base::size_ > 0) assert(ptr != nullptr);
 
             // reset specified pointer only
-            std::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
+            detail::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
         }
 
         template <std::size_t N, gpu_array_ptr T>
diff --git a/test/test.cpp b/test/test.cpp
index 13cbc77..c8745ac 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -6,19 +6,19 @@
 // NOLINTBEGIN
 using namespace gpu_array;
 
-// Example of custom tuple type derived from std::tuple
+// Example of custom tuple type derived from detail::tuple
 // You may need to specialize std::common_type and std::basic_common_reference to satisfy range concepts
 template <typename... Ts>
-class custom_tuple_example : public std::tuple<Ts...>
+class custom_tuple_example : public detail::tuple<Ts...>
 {
-    using base = std::tuple<Ts...>;
+    using base = detail::tuple<Ts...>;
     using base::base;
 
 public:
     template <std::size_t N>
     __host__ decltype(auto) get_string() const
     {
-        return std::to_string(std::get<N>(*this));
+        return std::to_string(detail::get<N>(*this));
     }
     using base::operator=;
     template <typename... Us>
@@ -1038,7 +1038,7 @@ TEST(StructureOfArrays, Construction)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = std::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = detail::tuple<tuple_elem_type0, tuple_elem_type1>;
     using custom_tuple_type = custom_tuple_example<tuple_elem_type0, tuple_elem_type1>;
 
     // initizalization
@@ -1052,18 +1052,18 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 0);
-            EXPECT_EQ(std::get<1>(vec[i]), 0.0);
+            EXPECT_EQ(detail::get<0>(vec[i]), 0);
+            EXPECT_EQ(detail::get<1>(vec[i]), 0.0);
         }
     }
     {
-        auto soa = structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, std::tuple(1, 2.5));
+        auto soa = structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, detail::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 1);
-            EXPECT_EQ(std::get<1>(vec[i]), 2.5);
+            EXPECT_EQ(detail::get<0>(vec[i]), 1);
+            EXPECT_EQ(detail::get<1>(vec[i]), 2.5);
         }
     }
     {
@@ -1080,18 +1080,18 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 0);
-            EXPECT_EQ(std::get<1>(vec[i]), 0.0);
+            EXPECT_EQ(detail::get<0>(vec[i]), 0);
+            EXPECT_EQ(detail::get<1>(vec[i]), 0.0);
         }
     }
     {
-        auto soa = structure_of_arrays<tuple_type>(10, std::tuple(1, 2.5));
+        auto soa = structure_of_arrays<tuple_type>(10, detail::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 1);
-            EXPECT_EQ(std::get<1>(vec[i]), 2.5);
+            EXPECT_EQ(detail::get<0>(vec[i]), 1);
+            EXPECT_EQ(detail::get<1>(vec[i]), 2.5);
         }
     }
     {
@@ -1108,8 +1108,8 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 0);
-            EXPECT_EQ(std::get<1>(vec[i]), 0.0);
+            EXPECT_EQ(detail::get<0>(vec[i]), 0);
+            EXPECT_EQ(detail::get<1>(vec[i]), 0.0);
         }
     }
     {
@@ -1118,8 +1118,8 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 1);
-            EXPECT_EQ(std::get<1>(vec[i]), 2.5);
+            EXPECT_EQ(detail::get<0>(vec[i]), 1);
+            EXPECT_EQ(detail::get<1>(vec[i]), 2.5);
         }
     }
     {
@@ -1158,7 +1158,7 @@ TEST(StructureOfArrays, Construction)
         EXPECT_EQ(arr1.use_count(), 1);
     }
 
-    // construction from range of std::tuple
+    // construction from range of detail::tuple
     {
         auto vec = std::vector<tuple_type>();
         for (std::size_t i = 0; i < 10; ++i)
@@ -1171,17 +1171,17 @@ TEST(StructureOfArrays, Construction)
         EXPECT_EQ(soa_vec, vec);
     }
 
-    // construction from initializer_list of std::tuple
+    // construction from initializer_list of detail::tuple
     {
         auto soa = structure_of_arrays<tuple_type>({{0, 0.5}, {1, 1.5}, {2, 2.5}});
         EXPECT_EQ(soa.size(), 3);
         const auto soa_vec = soa.to<std::vector>();
-        EXPECT_EQ(std::get<0>(soa_vec[0]), 0);
-        EXPECT_EQ(std::get<1>(soa_vec[0]), 0.5);
-        EXPECT_EQ(std::get<0>(soa_vec[1]), 1);
-        EXPECT_EQ(std::get<1>(soa_vec[1]), 1.5);
-        EXPECT_EQ(std::get<0>(soa_vec[2]), 2);
-        EXPECT_EQ(std::get<1>(soa_vec[2]), 2.5);
+        EXPECT_EQ(detail::get<0>(soa_vec[0]), 0);
+        EXPECT_EQ(detail::get<1>(soa_vec[0]), 0.5);
+        EXPECT_EQ(detail::get<0>(soa_vec[1]), 1);
+        EXPECT_EQ(detail::get<1>(soa_vec[1]), 1.5);
+        EXPECT_EQ(detail::get<0>(soa_vec[2]), 2);
+        EXPECT_EQ(detail::get<1>(soa_vec[2]), 2.5);
     }
 
     // construction from multiple ranges
@@ -1198,8 +1198,8 @@ TEST(StructureOfArrays, Construction)
         const auto soa_vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < soa_vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(soa_vec[i]), vec0[i]);
-            EXPECT_EQ(std::get<1>(soa_vec[i]), vec1[i]);
+            EXPECT_EQ(detail::get<0>(soa_vec[i]), vec0[i]);
+            EXPECT_EQ(detail::get<1>(soa_vec[i]), vec1[i]);
         }
     }
 
@@ -1208,12 +1208,12 @@ TEST(StructureOfArrays, Construction)
         auto soa = structure_of_arrays<tuple_elem_type0, tuple_elem_type1>({0, 1, 2}, {0.5, 1.5, 2.5});
         EXPECT_EQ(soa.size(), 3);
         const auto soa_vec = soa.to<std::vector>();
-        EXPECT_EQ(std::get<0>(soa_vec[0]), 0);
-        EXPECT_EQ(std::get<1>(soa_vec[0]), 0.5);
-        EXPECT_EQ(std::get<0>(soa_vec[1]), 1);
-        EXPECT_EQ(std::get<1>(soa_vec[1]), 1.5);
-        EXPECT_EQ(std::get<0>(soa_vec[2]), 2);
-        EXPECT_EQ(std::get<1>(soa_vec[2]), 2.5);
+        EXPECT_EQ(detail::get<0>(soa_vec[0]), 0);
+        EXPECT_EQ(detail::get<1>(soa_vec[0]), 0.5);
+        EXPECT_EQ(detail::get<0>(soa_vec[1]), 1);
+        EXPECT_EQ(detail::get<1>(soa_vec[1]), 1.5);
+        EXPECT_EQ(detail::get<0>(soa_vec[2]), 2);
+        EXPECT_EQ(detail::get<1>(soa_vec[2]), 2.5);
     }
 
     // construction from range of custom tuple
@@ -1228,9 +1228,9 @@ TEST(StructureOfArrays, Construction)
         const auto soa_vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < soa_vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(soa_vec[i]), static_cast<tuple_elem_type0>(i));
+            EXPECT_EQ(detail::get<0>(soa_vec[i]), static_cast<tuple_elem_type0>(i));
             EXPECT_EQ(soa_vec[i].get_string<0>(), std::to_string(i));
-            EXPECT_EQ(std::get<1>(soa_vec[i]), static_cast<tuple_elem_type1>(i) + 0.5);
+            EXPECT_EQ(detail::get<1>(soa_vec[i]), static_cast<tuple_elem_type1>(i) + 0.5);
             EXPECT_EQ(soa_vec[i].get_string<1>(), std::to_string(i + 0.5));
         }
     }
@@ -1252,7 +1252,7 @@ TEST(StructureOfArrays, Export)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = std::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = detail::tuple<tuple_elem_type0, tuple_elem_type1>;
 
     // export to range with same value type
     {
@@ -1278,7 +1278,7 @@ TEST(StructureOfArrays, Export)
 
 TEST(StructureOfArrays, RangeInterface)
 {
-    using tuple_type = std::tuple<int, double>;
+    using tuple_type = detail::tuple<int, double>;
     using soa_type1 = structure_of_arrays<int, double>;
     using soa_type2 = structure_of_arrays<tuple_type>;
     using soa_type3 = structure_of_arrays<custom_tuple_example<int, double>>;
@@ -1350,20 +1350,20 @@ TEST(StructureOfArrays, RangeInterface)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #endif
-        static_assert(std::same_as<decltype(soa[0]), std::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
+        static_assert(std::same_as<decltype(soa[0]), detail::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
         static_assert(
-            std::same_as<decltype(*soa.begin()), std::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
+            std::same_as<decltype(*soa.begin()), detail::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
         static_assert(std::same_as<decltype(*(soa.end() - 1)),
-                                   std::tuple<decltype(*(soa.data<0>() + 9)), decltype(*(soa.data<1>() + 9))>>);
+                                   detail::tuple<decltype(*(soa.data<0>() + 9)), decltype(*(soa.data<1>() + 9))>>);
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
-        soa = {std::tuple(0, 0.0), std::tuple(1, 1.0), std::tuple(2, 2.0), std::tuple(3, 3.0), std::tuple(4, 4.0)};
+        soa = {detail::tuple(0, 0.0), detail::tuple(1, 1.0), detail::tuple(2, 2.0), detail::tuple(3, 3.0), detail::tuple(4, 4.0)};
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), static_cast<int>(i));
-            EXPECT_EQ(std::get<1>(vec[i]), static_cast<double>(i));
+            EXPECT_EQ(detail::get<0>(vec[i]), static_cast<int>(i));
+            EXPECT_EQ(detail::get<1>(vec[i]), static_cast<double>(i));
         }
     }
 }
@@ -1385,7 +1385,7 @@ TEST(ManagedStructureOfArrays, Construction)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = std::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = detail::tuple<tuple_elem_type0, tuple_elem_type1>;
     using custom_tuple_type = custom_tuple_example<tuple_elem_type0, tuple_elem_type1>;
 
     // initizalization
@@ -1398,17 +1398,17 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 0);
-            EXPECT_EQ(std::get<1>(v), 0.0);
+            EXPECT_EQ(detail::get<0>(v), 0);
+            EXPECT_EQ(detail::get<1>(v), 0.0);
         }
     }
     {
-        auto soa = managed_structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, std::tuple(1, 2.5));
+        auto soa = managed_structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, detail::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 1);
-            EXPECT_EQ(std::get<1>(v), 2.5);
+            EXPECT_EQ(detail::get<0>(v), 1);
+            EXPECT_EQ(detail::get<1>(v), 2.5);
         }
     }
     {
@@ -1424,17 +1424,17 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 0);
-            EXPECT_EQ(std::get<1>(v), 0.0);
+            EXPECT_EQ(detail::get<0>(v), 0);
+            EXPECT_EQ(detail::get<1>(v), 0.0);
         }
     }
     {
-        auto soa = managed_structure_of_arrays<tuple_type>(10, std::tuple(1, 2.5));
+        auto soa = managed_structure_of_arrays<tuple_type>(10, detail::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 1);
-            EXPECT_EQ(std::get<1>(v), 2.5);
+            EXPECT_EQ(detail::get<0>(v), 1);
+            EXPECT_EQ(detail::get<1>(v), 2.5);
         }
     }
     {
@@ -1450,8 +1450,8 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 0);
-            EXPECT_EQ(std::get<1>(v), 0.0);
+            EXPECT_EQ(detail::get<0>(v), 0);
+            EXPECT_EQ(detail::get<1>(v), 0.0);
         }
     }
     {
@@ -1459,8 +1459,8 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 1);
-            EXPECT_EQ(std::get<1>(v), 2.5);
+            EXPECT_EQ(detail::get<0>(v), 1);
+            EXPECT_EQ(detail::get<1>(v), 2.5);
         }
     }
     {
@@ -1499,7 +1499,7 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(arr1.use_count(), 1);
     }
 
-    // construction from range of std::tuple
+    // construction from range of detail::tuple
     {
         auto vec = std::vector<tuple_type>();
         for (std::size_t i = 0; i < 10; ++i)
@@ -1510,21 +1510,21 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (std::size_t i = 0; i < 10; ++i)
         {
-            EXPECT_EQ(std::get<0>(soa[i]), std::get<0>(vec[i]));
-            EXPECT_EQ(std::get<1>(soa[i]), std::get<1>(vec[i]));
+            EXPECT_EQ(detail::get<0>(soa[i]), detail::get<0>(vec[i]));
+            EXPECT_EQ(detail::get<1>(soa[i]), detail::get<1>(vec[i]));
         }
     }
 
-    // construction from initializer_list of std::tuple
+    // construction from initializer_list of detail::tuple
     {
         auto soa = managed_structure_of_arrays<tuple_type>({{0, 0.5}, {1, 1.5}, {2, 2.5}});
         EXPECT_EQ(soa.size(), 3);
-        EXPECT_EQ(std::get<0>(soa[0]), 0);
-        EXPECT_EQ(std::get<1>(soa[0]), 0.5);
-        EXPECT_EQ(std::get<0>(soa[1]), 1);
-        EXPECT_EQ(std::get<1>(soa[1]), 1.5);
-        EXPECT_EQ(std::get<0>(soa[2]), 2);
-        EXPECT_EQ(std::get<1>(soa[2]), 2.5);
+        EXPECT_EQ(detail::get<0>(soa[0]), 0);
+        EXPECT_EQ(detail::get<1>(soa[0]), 0.5);
+        EXPECT_EQ(detail::get<0>(soa[1]), 1);
+        EXPECT_EQ(detail::get<1>(soa[1]), 1.5);
+        EXPECT_EQ(detail::get<0>(soa[2]), 2);
+        EXPECT_EQ(detail::get<1>(soa[2]), 2.5);
     }
 
     // construction from multiple ranges
@@ -1540,8 +1540,8 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (std::size_t i = 0; const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), vec0[i]);
-            EXPECT_EQ(std::get<1>(v), vec1[i]);
+            EXPECT_EQ(detail::get<0>(v), vec0[i]);
+            EXPECT_EQ(detail::get<1>(v), vec1[i]);
             ++i;
         }
     }
@@ -1551,12 +1551,12 @@ TEST(ManagedStructureOfArrays, Construction)
         auto soa = managed_structure_of_arrays<tuple_elem_type0, tuple_elem_type1>({0, 1, 2}, {0.5, 1.5, 2.5});
         EXPECT_EQ(soa.size(), 3);
         const auto soa_vec = soa.to<std::vector>();
-        EXPECT_EQ(std::get<0>(soa_vec[0]), 0);
-        EXPECT_EQ(std::get<1>(soa_vec[0]), 0.5);
-        EXPECT_EQ(std::get<0>(soa_vec[1]), 1);
-        EXPECT_EQ(std::get<1>(soa_vec[1]), 1.5);
-        EXPECT_EQ(std::get<0>(soa_vec[2]), 2);
-        EXPECT_EQ(std::get<1>(soa_vec[2]), 2.5);
+        EXPECT_EQ(detail::get<0>(soa_vec[0]), 0);
+        EXPECT_EQ(detail::get<1>(soa_vec[0]), 0.5);
+        EXPECT_EQ(detail::get<0>(soa_vec[1]), 1);
+        EXPECT_EQ(detail::get<1>(soa_vec[1]), 1.5);
+        EXPECT_EQ(detail::get<0>(soa_vec[2]), 2);
+        EXPECT_EQ(detail::get<1>(soa_vec[2]), 2.5);
     }
 
     // construction from range of custom tuple
@@ -1570,9 +1570,9 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (std::size_t i = 0; i < soa.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(soa[i]), static_cast<tuple_elem_type0>(i));
+            EXPECT_EQ(detail::get<0>(soa[i]), static_cast<tuple_elem_type0>(i));
             EXPECT_EQ(soa[i].get_string<0>(), std::to_string(i));
-            EXPECT_EQ(std::get<1>(soa[i]), static_cast<tuple_elem_type1>(i) + 0.5);
+            EXPECT_EQ(detail::get<1>(soa[i]), static_cast<tuple_elem_type1>(i) + 0.5);
             EXPECT_EQ(soa[i].get_string<1>(), std::to_string(i + 0.5));
         }
     }
@@ -1595,7 +1595,7 @@ TEST(ManagedStructureOfArrays, Export)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = std::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = detail::tuple<tuple_elem_type0, tuple_elem_type1>;
 
     // export to range with same value type
     {
@@ -1621,7 +1621,7 @@ TEST(ManagedStructureOfArrays, Export)
 
 TEST(ManagedStructureOfArrays, RangeInterface)
 {
-    using tuple_type = std::tuple<int, double>;
+    using tuple_type = detail::tuple<int, double>;
     using soa_type1 = managed_structure_of_arrays<int, double>;
     using soa_type2 = managed_structure_of_arrays<tuple_type>;
     using soa_type3 = managed_structure_of_arrays<custom_tuple_example<int, double>>;
@@ -1682,15 +1682,15 @@ TEST(ManagedStructureOfArrays, RangeInterface)
         EXPECT_NE(soa.data<1>(), nullptr);
         EXPECT_EQ(soa.size(), 10);
         EXPECT_FALSE(soa.empty());
-        EXPECT_EQ(soa[0], std::tuple(*soa.data<0>(), *soa.data<1>()));
-        EXPECT_EQ(*soa.begin(), std::tuple(*soa.data<0>(), *soa.data<1>()));
-        EXPECT_EQ(*(soa.end() - 1), std::tuple(*(soa.data<0>() + 9), *(soa.data<1>() + 9)));
+        EXPECT_EQ(soa[0], detail::tuple(*soa.data<0>(), *soa.data<1>()));
+        EXPECT_EQ(*soa.begin(), detail::tuple(*soa.data<0>(), *soa.data<1>()));
+        EXPECT_EQ(*(soa.end() - 1), detail::tuple(*(soa.data<0>() + 9), *(soa.data<1>() + 9)));
 
-        soa = {std::tuple(0, 0.0), std::tuple(1, 1.0), std::tuple(2, 2.0), std::tuple(3, 3.0), std::tuple(4, 4.0)};
+        soa = {detail::tuple(0, 0.0), detail::tuple(1, 1.0), detail::tuple(2, 2.0), detail::tuple(3, 3.0), detail::tuple(4, 4.0)};
         for (std::size_t i = 0; i < soa.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(soa[i]), static_cast<int>(i));
-            EXPECT_EQ(std::get<1>(soa[i]), static_cast<double>(i));
+            EXPECT_EQ(detail::get<0>(soa[i]), static_cast<int>(i));
+            EXPECT_EQ(detail::get<1>(soa[i]), static_cast<double>(i));
         }
     }
 }
@@ -1711,7 +1711,7 @@ TEST(ManagedStructureOfArrays, SmartPointerInterface)
 TEST(ManagedStructureOfArrays, MemoryManagement)
 {
     // create nested managed_array
-    auto elms = std::vector<std::tuple<int, managed_array<double>>>();
+    auto elms = std::vector<detail::tuple<int, managed_array<double>>>();
     for (std::size_t i = 0; i < 10; ++i) elms.emplace_back(i, managed_array<double>(i, 99));
     auto soa = managed_structure_of_arrays(elms);
 
@@ -1826,7 +1826,7 @@ TEST(JaggedArray, Construction)
 
     // construction from nested containers for jagged structure of arrays
     {
-        using tuple_type = std::tuple<int, double>;
+        using tuple_type = detail::tuple<int, double>;
         auto vec_tpl = std::vector<std::vector<tuple_type>>{
             std::vector<tuple_type>(1, {0, 0.5}), std::vector<tuple_type>(2, {1, 1.5}),
             std::vector<tuple_type>(3, {2, 2.5}), std::vector<tuple_type>(4, {3, 3.5}),
@@ -1891,12 +1891,12 @@ TEST(JaggedArray, Construction)
 
     // construction from container of sizes and flat range (structure of arrays)
     {
-        auto flat_range = std::vector<std::tuple<int, double>>{
+        auto flat_range = std::vector<detail::tuple<int, double>>{
             {0, 0.5}, {1, 1.5}, {2, 2.5},   {3, 3.5},   {4, 4.5},   {5, 5.5},   {6, 6.5},  {7, 7.5},
             {8, 8.5}, {9, 9.5}, {10, 10.5}, {11, 11.5}, {12, 12.5}, {13, 13.5}, {14, 14.5}};
         auto sizes = std::vector<std::uint32_t>{1, 2, 3, 4, 5};
 
-        auto jagged_arr = jagged_array<managed_structure_of_arrays<std::tuple<int, double>>>(
+        auto jagged_arr = jagged_array<managed_structure_of_arrays<detail::tuple<int, double>>>(
             sizes, flat_range);  // no deduction guide
 
         EXPECT_EQ(jagged_arr.size(), 15);
@@ -1905,7 +1905,7 @@ TEST(JaggedArray, Construction)
             EXPECT_EQ(jagged_arr.size(i), sizes[i]);
             for (const auto& v : jagged_arr.row(i))
             {
-                EXPECT_EQ(v, (std::tuple<int, double>(j, static_cast<double>(j) + 0.5)));
+                EXPECT_EQ(v, (detail::tuple<int, double>(j, static_cast<double>(j) + 0.5)));
                 ++j;
             }
         }
@@ -1926,7 +1926,7 @@ TEST(JaggedArray, Construction)
 
     // construction from nested initializer_list (managed structure of arrays)
     {
-        using tuple_type = std::tuple<int, double>;
+        using tuple_type = detail::tuple<int, double>;
         auto jagged_arr = jagged_array<managed_structure_of_arrays<tuple_type>>{
             {{0, 0.5}},
             {{1, 1.5}, {1, 1.5}},
@@ -1967,7 +1967,7 @@ TEST(JaggedArray, Construction)
 
     // wrap managed structure of arrays with sizes
     {
-        using tuple_type = std::tuple<int, double>;
+        using tuple_type = detail::tuple<int, double>;
         auto sizes = std::vector<std::uint32_t>{3, 1, 4, 1, 5};
         auto arr = managed_structure_of_arrays<tuple_type>(14, tuple_type(42, 99.0));
         auto jagged_arr_wrap = jagged_array(sizes, arr);
@@ -1985,8 +1985,8 @@ TEST(JaggedArray, Construction)
             EXPECT_EQ(jagged_arr_wrap.size(i), sizes[i]);
             for (const auto& v : jagged_arr_wrap.row(i))
             {
-                EXPECT_EQ(std::get<0>(v), 42);
-                EXPECT_EQ(std::get<1>(v), 99.0);
+                EXPECT_EQ(detail::get<0>(v), 42);
+                EXPECT_EQ(detail::get<1>(v), 99.0);
             }
         }
     }

From b28d3e2879b7955b89ef032210d9318dae7d53a8 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Thu, 5 Mar 2026 17:49:52 +0900
Subject: [PATCH 2/3] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Fix=20namespace?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 501 +++++++++++++++++++++---------------------
 test/test.cpp         | 194 ++++++++--------
 2 files changed, 349 insertions(+), 346 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index fc78c80..eed0202 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -48,185 +48,188 @@
 #define SIGSEGV_DEPRECATED [[deprecated("Cannot access GPU memory directly")]]
 #endif
 
-namespace gpu_array::detail
+namespace gpu_array
 {
     // Custom implementation of tuple for device code
 
-    template <std::size_t I, class T>
-    struct tuple_leaf
+    namespace detail
     {
-        using type = T;
-        T value;
-    };
+        template <std::size_t I, class T>
+        struct tuple_leaf
+        {
+            using type = T;
+            T value;
+        };
 
-    template <std::size_t I, class T>
-    tuple_leaf<I, T> at_index(const tuple_leaf<I, T>&);  // undefined
+        template <std::size_t I, class T>
+        tuple_leaf<I, T> at_index(const tuple_leaf<I, T>&);  // undefined
 
-    template <class Seq, class... Ts>
-    struct tuple_impl;
+        template <class Seq, class... Ts>
+        struct tuple_impl;
 
-    template <std::size_t... Is, class... Ts>
-    struct tuple_impl<std::index_sequence<Is...>, Ts...> : tuple_leaf<Is, Ts>...
-    {
-        tuple_impl()
-        requires (std::default_initializable<Ts> && ...)
-        = default;
-        template <class... Us>
-        requires (sizeof...(Us) == sizeof...(Ts))
-        __host__ __device__ tuple_impl(Us&&... us) : tuple_leaf<Is, Ts>(std::forward<Us>(us))...
+        template <std::size_t... Is, class... Ts>
+        struct tuple_impl<std::index_sequence<Is...>, Ts...> : tuple_leaf<Is, Ts>...
         {
-        }
-    };
-
-    template <class... Ts>
-    struct tuple
-    {
-        tuple()
-        requires (std::default_initializable<Ts> && ...)
-        = default;
+            tuple_impl()
+            requires (std::default_initializable<Ts> && ...)
+            = default;
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts))
+            __host__ __device__ tuple_impl(Us&&... us) : tuple_leaf<Is, Ts>(std::forward<Us>(us))...
+            {
+            }
+        };
 
-        template <class... Us>
-        struct is_single_tuple : std::false_type
+        template <class... Ts>
+        struct tuple
         {
+            tuple()
+            requires (std::default_initializable<Ts> && ...)
+            = default;
+
+            template <class... Us>
+            struct is_single_tuple : std::false_type
+            {
+            };
+            template <class... Us>
+            struct is_single_tuple<tuple<Us...>> : std::true_type
+            {
+            };
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts) && !is_single_tuple<std::remove_cvref_t<Us>...>::value)
+            __host__ __device__ tuple(Us&&... us) : base_{std::forward<Us>(us)...}
+            {
+            }
+
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts))
+            __host__ __device__ tuple(const tuple<Us...>& t) : tuple{tuple_convert(t, std::index_sequence_for<Ts...>{})}
+            {
+            }
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts))
+            __host__ __device__ tuple(tuple<Us...>&& t)
+                : tuple{tuple_convert(std::move(t), std::index_sequence_for<Ts...>{})}
+            {
+            }
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts))
+            __host__ __device__ auto& operator=(const tuple<Us...>& t)
+            {
+                *this = tuple_convert(t, std::index_sequence_for<Ts...>{});
+                return *this;
+            }
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts))
+            __host__ __device__ auto& operator=(tuple<Us...>&& t)
+            {
+                *this = tuple_convert(std::move(t), std::index_sequence_for<Ts...>{});
+                return *this;
+            }
+
+            template <std::size_t I, class... Us>
+            __host__ __device__ friend auto& get(tuple<Us...>&);
+            template <std::size_t I, class... Us>
+            __host__ __device__ friend const auto& get(const tuple<Us...>&);
+            template <std::size_t I, class... Us>
+            __host__ __device__ friend auto&& get(tuple<Us...>&&);
+            template <std::size_t I, class... Us>
+            __host__ __device__ friend const auto&& get(const tuple<Us...>&&);
+
+        private:
+            template <class... Us, std::size_t... Is>
+            static auto tuple_convert(const tuple<Us...>& t, std::index_sequence<Is...>)
+            {
+                return tuple<Ts...>{get<Is>(t)...};
+            }
+            template <class... Us, std::size_t... Is>
+            static auto tuple_convert(tuple<Us...>&& t, std::index_sequence<Is...>)
+            {
+                return tuple<Ts...>{std::move(get<Is>(t))...};
+            }
+
+            using base = tuple_impl<std::index_sequence_for<Ts...>, Ts...>;
+            base base_;
         };
-        template <class... Us>
-        struct is_single_tuple<detail::tuple<Us...>> : std::true_type
+
+        template <class... Ts>
+        tuple(Ts...) -> tuple<Ts...>;
+
+        template <std::size_t I, class... Us>
+        __host__ __device__ auto& get(tuple<Us...>& t)
         {
-        };
-        template <class... Us>
-        requires (sizeof...(Us) == sizeof...(Ts) && !is_single_tuple<std::remove_cvref_t<Us>...>::value)
-        __host__ __device__ tuple(Us&&... us) : base_{std::forward<Us>(us)...}
+            using leaf = decltype(at_index<I>(t.base_));
+            return static_cast<leaf&>(t.base_).value;
+        }
+        template <std::size_t I, class... Us>
+        __host__ __device__ const auto& get(const tuple<Us...>& t)
         {
+            using leaf = decltype(at_index<I>(t.base_));
+            return static_cast<const leaf&>(t.base_).value;
         }
-
-        template <class... Us>
-        requires (sizeof...(Us) == sizeof...(Ts))
-        __host__ __device__ tuple(const detail::tuple<Us...>& t)
-            : tuple{tuple_convert(t, std::index_sequence_for<Ts...>{})}
+        template <std::size_t I, class... Us>
+        __host__ __device__ auto&& get(tuple<Us...>&& t)
         {
+            using leaf = decltype(at_index<I>(t.base_));
+            return static_cast<typename leaf::type&&>(static_cast<leaf&>(t.base_).value);
         }
-        template <class... Us>
-        requires (sizeof...(Us) == sizeof...(Ts))
-        __host__ __device__ tuple(detail::tuple<Us...>&& t)
-            : tuple{tuple_convert(std::move(t), std::index_sequence_for<Ts...>{})}
+        template <std::size_t I, class... Us>
+        __host__ __device__ const auto&& get(const tuple<Us...>&& t)
         {
+            using leaf = decltype(at_index<I>(t.base_));
+            return static_cast<const typename leaf::type&&>(static_cast<const leaf&>(t.base_).value);
         }
-        template <class... Us>
-        requires (sizeof...(Us) == sizeof...(Ts))
-        __host__ __device__ auto& operator=(const detail::tuple<Us...>& t)
+
+        template <class... Ts, class... Us, std::size_t... Is>
+        __host__ __device__ bool tuple_equal_impl(const tuple<Ts...>& t, const tuple<Us...>& u,
+                                                  std::index_sequence<Is...>)
         {
-            *this = tuple_convert(t, std::index_sequence_for<Ts...>{});
-            return *this;
+            return ((get<Is>(t) == get<Is>(u)) && ...);
         }
-        template <class... Us>
-        requires (sizeof...(Us) == sizeof...(Ts))
-        __host__ __device__ auto& operator=(detail::tuple<Us...>&& t)
+        template <class... Ts, class... Us>
+        requires (sizeof...(Ts) == sizeof...(Us))
+        __host__ __device__ bool operator==(const tuple<Ts...>& t, const tuple<Us...>& u)
         {
-            *this = tuple_convert(std::move(t), std::index_sequence_for<Ts...>{});
-            return *this;
+            return tuple_equal_impl(t, u, std::index_sequence_for<Ts...>{});
         }
 
-        template <std::size_t I, class... Us>
-        __host__ __device__ friend auto& get(detail::tuple<Us...>&);
-        template <std::size_t I, class... Us>
-        __host__ __device__ friend const auto& get(const detail::tuple<Us...>&);
-        template <std::size_t I, class... Us>
-        __host__ __device__ friend auto&& get(detail::tuple<Us...>&&);
-        template <std::size_t I, class... Us>
-        __host__ __device__ friend const auto&& get(const detail::tuple<Us...>&&);
-
-    private:
-        template <class... Us, std::size_t... Is>
-        static auto tuple_convert(const detail::tuple<Us...>& t, std::index_sequence<Is...>)
+        template <class F, class Tuple, std::size_t... Is>
+        requires requires { std::declval<F>()(get<Is>(std::declval<Tuple>())...); }
+        __host__ __device__ decltype(auto) apply_impl(F&& f, Tuple&& t, std::index_sequence<Is...>)
         {
-            return detail::tuple<Ts...>{get<Is>(t)...};
+            return std::forward<F>(f)(get<Is>(std::forward<Tuple>(t))...);
         }
-        template <class... Us, std::size_t... Is>
-        static auto tuple_convert(detail::tuple<Us...>&& t, std::index_sequence<Is...>)
+        template <class F, class Tuple>
+        requires requires { std::tuple_size_v<std::remove_reference_t<Tuple>>; }
+        __host__ __device__ decltype(auto) apply(F&& f, Tuple&& t)
         {
-            return detail::tuple<Ts...>{std::move(get<Is>(t))...};
+            return apply_impl(std::forward<F>(f), std::forward<Tuple>(t),
+                              std::make_index_sequence<std::tuple_size_v<std::remove_reference_t<Tuple>>>{});
         }
+    }  // namespace detail
 
-        using base = tuple_impl<std::index_sequence_for<Ts...>, Ts...>;
-        base base_;
-    };
-
-    template <class... Ts>
-    tuple(Ts...) -> tuple<Ts...>;
-
-    template <std::size_t I, class... Us>
-    __host__ __device__ auto& get(detail::tuple<Us...>& t)
-    {
-        using leaf = decltype(at_index<I>(t.base_));
-        return static_cast<leaf&>(t.base_).value;
-    }
-    template <std::size_t I, class... Us>
-    __host__ __device__ const auto& get(const detail::tuple<Us...>& t)
-    {
-        using leaf = decltype(at_index<I>(t.base_));
-        return static_cast<const leaf&>(t.base_).value;
-    }
-    template <std::size_t I, class... Us>
-    __host__ __device__ auto&& get(detail::tuple<Us...>&& t)
-    {
-        using leaf = decltype(at_index<I>(t.base_));
-        return static_cast<typename leaf::type&&>(static_cast<leaf&>(t.base_).value);
-    }
-    template <std::size_t I, class... Us>
-    __host__ __device__ const auto&& get(const detail::tuple<Us...>&& t)
-    {
-        using leaf = decltype(at_index<I>(t.base_));
-        return static_cast<const typename leaf::type&&>(static_cast<const leaf&>(t.base_).value);
-    }
-
-    template <class... Ts, class... Us, std::size_t... Is>
-    __host__ __device__ bool tuple_equal_impl(const detail::tuple<Ts...>& t, const detail::tuple<Us...>& u,
-                                              std::index_sequence<Is...>)
-    {
-        return ((detail::get<Is>(t) == detail::get<Is>(u)) && ...);
-    }
-    template <class... Ts, class... Us>
-    requires (sizeof...(Ts) == sizeof...(Us))
-    __host__ __device__ bool operator==(const detail::tuple<Ts...>& t, const detail::tuple<Us...>& u)
-    {
-        return tuple_equal_impl(t, u, std::index_sequence_for<Ts...>{});
-    }
-
-    template <class F, class Tuple, std::size_t... Is>
-    requires requires { std::declval<F>()(detail::get<Is>(std::declval<Tuple>())...); }
-    __host__ __device__ decltype(auto) apply_impl(F&& f, Tuple&& t, std::index_sequence<Is...>)
-    {
-        return std::forward<F>(f)(detail::get<Is>(std::forward<Tuple>(t))...);
-    }
-    template <class F, class Tuple>
-    requires requires { std::tuple_size_v<std::remove_reference_t<Tuple>>; }
-    __host__ __device__ decltype(auto) apply(F&& f, Tuple&& t)
-    {
-        return apply_impl(std::forward<F>(f), std::forward<Tuple>(t),
-                          std::make_index_sequence<std::tuple_size_v<std::remove_reference_t<Tuple>>>{});
-    }
-}  // namespace gpu_array::detail
+    using detail::tuple, detail::get, detail::apply;
+}  // namespace gpu_array
 
 template <class... Ts>
-struct std::tuple_size<gpu_array::detail::tuple<Ts...>> : std::integral_constant<std::size_t, sizeof...(Ts)>
+struct std::tuple_size<gpu_array::tuple<Ts...>> : std::integral_constant<std::size_t, sizeof...(Ts)>
 {
 };
 template <std::size_t I, class... Ts>
-struct std::tuple_element<I, gpu_array::detail::tuple<Ts...>> : std::tuple_element<I, std::tuple<Ts...>>
+struct std::tuple_element<I, gpu_array::tuple<Ts...>> : std::tuple_element<I, std::tuple<Ts...>>
 {
 };
 template <class... TTypes, class... UTypes>
-requires requires { typename gpu_array::detail::tuple<std::common_type_t<TTypes, UTypes>...>; }
-struct std::common_type<gpu_array::detail::tuple<TTypes...>, gpu_array::detail::tuple<UTypes...>>
+requires requires { typename gpu_array::tuple<std::common_type_t<TTypes, UTypes>...>; }
+struct std::common_type<gpu_array::tuple<TTypes...>, gpu_array::tuple<UTypes...>>
 {
-    using type = gpu_array::detail::tuple<std::common_type_t<TTypes, UTypes>...>;
+    using type = gpu_array::tuple<std::common_type_t<TTypes, UTypes>...>;
 };
 template <class... TTypes, class... UTypes, template <class> class TQual, template <class> class UQual>
-requires requires { typename gpu_array::detail::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>; }
-struct std::basic_common_reference<gpu_array::detail::tuple<TTypes...>, gpu_array::detail::tuple<UTypes...>, TQual,
-                                   UQual>
+requires requires { typename gpu_array::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>; }
+struct std::basic_common_reference<gpu_array::tuple<TTypes...>, gpu_array::tuple<UTypes...>, TQual, UQual>
 {
-    using type = gpu_array::detail::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>;
+    using type = gpu_array::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>;
 };
 
 namespace gpu_array
@@ -301,11 +304,11 @@ namespace gpu_array
 
         protected:
             size_type size_ = 0U;
-            detail::tuple<ValueTypes*...> data_;
+            gpu_array::tuple<ValueTypes*...> data_;
             std::uint32_t* ref_count_ = nullptr;  // reference counter, not used on GPU
 
             template <std::size_t N>
-            using element_type = std::tuple_element_t<N, detail::tuple<ValueTypes...>>;
+            using element_type = std::tuple_element_t<N, gpu_array::tuple<ValueTypes...>>;
 
             __host__ __device__ void init()
             {
@@ -436,11 +439,11 @@ namespace gpu_array
 
             __host__ __device__ void tuple_for_each(auto&& f) const
             {
-                detail::apply([&f](const auto&... args) { (f(args), ...); }, data_);
+                gpu_array::apply([&f](const auto&... args) { (f(args), ...); }, data_);
             }
             __host__ __device__ void tuple_for_each(auto&& f)
             {
-                detail::apply([&f](auto&... args) { (f(args), ...); }, data_);
+                gpu_array::apply([&f](auto&... args) { (f(args), ...); }, data_);
             }
         };
 
@@ -608,8 +611,8 @@ namespace gpu_array
         {
             return *(data() + base::size_ - 1);
         }
-        __host__ __device__ pointer data() noexcept { return detail::get<0>(base::data_); }
-        __host__ __device__ const_pointer data() const noexcept { return detail::get<0>(base::data_); }
+        __host__ __device__ pointer data() noexcept { return gpu_array::get<0>(base::data_); }
+        __host__ __device__ const_pointer data() const noexcept { return gpu_array::get<0>(base::data_); }
 
         array() = default;
         __host__ __device__ array(const array& r) : base(r) {}
@@ -619,7 +622,7 @@ namespace gpu_array
         {
             if (base::size_ == 0) return;
             auto buf = std::make_unique<value_type[]>(base::size_);
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                            sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(data(), buf.get(), sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
@@ -628,7 +631,7 @@ namespace gpu_array
         __host__ array(std::size_t size, default_init_tag) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                            sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             if constexpr (!std::is_trivially_default_constructible_v<value_type>)
@@ -646,7 +649,7 @@ namespace gpu_array
             auto al = std::allocator<value_type>();
             auto buf = al.allocate(base::size_);
             std::ranges::uninitialized_fill(buf, buf + base::size_, value);
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                            sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(data(), buf, sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
@@ -662,7 +665,7 @@ namespace gpu_array
         {
             if (base::size_ == 0) return;
 
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                            sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(
@@ -680,7 +683,7 @@ namespace gpu_array
             auto buf = al.allocate(base::size_);
             for (auto i = std::size_t{0}; const auto& v : r) std::ranges::construct_at(buf + i++, v);
 
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                            sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(data(), buf, sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
@@ -690,7 +693,7 @@ namespace gpu_array
         __host__ array(std::initializer_list<value_type> r) : base(std::ranges::size(r))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                            sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(
@@ -891,7 +894,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected device memory pointer");
             }
-            detail::get<0>(base::data_) = ptr;
+            gpu_array::get<0>(base::data_) = ptr;
         }
 #endif
 #if defined(GPU_OVERLOAD_DEVICE)
@@ -937,7 +940,7 @@ namespace gpu_array
                                    [](auto acc, const auto& r) { return acc + std::ranges::size(r); }))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(ValueType) * base::size_));
             assert(data() != nullptr);
 
@@ -993,8 +996,8 @@ namespace gpu_array
         __host__ __device__ const_reference front() const noexcept { return *begin(); }
         __host__ __device__ reference back() noexcept { return *(data() + base::size_ - 1); }
         __host__ __device__ const_reference back() const noexcept { return *(data() + base::size_ - 1); }
-        __host__ __device__ pointer data() noexcept { return detail::get<0>(base::data_); }
-        __host__ __device__ const_pointer data() const noexcept { return detail::get<0>(base::data_); }
+        __host__ __device__ pointer data() noexcept { return gpu_array::get<0>(base::data_); }
+        __host__ __device__ const_pointer data() const noexcept { return gpu_array::get<0>(base::data_); }
 
         managed_array() = default;
         __host__ __device__ managed_array(const managed_array& r) : base(r) {}
@@ -1003,7 +1006,7 @@ namespace gpu_array
         __host__ explicit managed_array(std::size_t size) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             std::ranges::uninitialized_value_construct(*this);
@@ -1012,7 +1015,7 @@ namespace gpu_array
         __host__ explicit managed_array(std::size_t size, default_init_tag) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             std::ranges::uninitialized_default_construct(*this);
@@ -1021,7 +1024,7 @@ namespace gpu_array
         __host__ managed_array(std::size_t size, const value_type& value) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             std::ranges::uninitialized_fill(*this, value);
@@ -1032,7 +1035,7 @@ namespace gpu_array
         __host__ explicit managed_array(const T& r) : base(std::ranges::size(r))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
 
@@ -1045,7 +1048,7 @@ namespace gpu_array
         __host__ managed_array(std::initializer_list<value_type> r) : base(std::ranges::size(r))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             for (auto i = std::size_t{0}; const auto& v : r) std::ranges::construct_at(data() + i++, v);
@@ -1251,7 +1254,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected managed memory pointer");
             }
-            detail::get<0>(base::data_) = ptr;
+            gpu_array::get<0>(base::data_) = ptr;
         }
 #endif
 #if defined(GPU_OVERLOAD_DEVICE)
@@ -1306,7 +1309,7 @@ namespace gpu_array
 
         __host__ explicit value(default_init_tag) : base(1)
         {
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             if constexpr (!std::is_trivially_default_constructible_v<value_type>)
             {
@@ -1317,7 +1320,7 @@ namespace gpu_array
 
         __host__ explicit value(const value_type& r) : base(1)
         {
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(get(), &r, sizeof(value_type), gpuMemcpyHostToDevice));
         }
@@ -1327,7 +1330,7 @@ namespace gpu_array
         __host__ explicit value(Args&&... args) : base(1)
         {
             auto temp = value_type(std::forward<Args>(args)...);
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(get(), &temp, sizeof(value_type), gpuMemcpyHostToDevice));
         }
@@ -1343,7 +1346,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected device memory pointer");
             }
-            detail::get<0>(base::data_) = ptr;
+            gpu_array::get<0>(base::data_) = ptr;
         }
 #else
             : base(ptr, ptr == nullptr ? 0 : 1)
@@ -1362,8 +1365,8 @@ namespace gpu_array
             return *this;
         }
 
-        __host__ __device__ pointer get() const noexcept { return detail::get<0>(base::data_); }
-        __host__ __device__ explicit operator bool() const noexcept { return detail::get<0>(base::data_) != nullptr; }
+        __host__ __device__ pointer get() const noexcept { return gpu_array::get<0>(base::data_); }
+        __host__ __device__ explicit operator bool() const noexcept { return gpu_array::get<0>(base::data_) != nullptr; }
 
 #if defined(GPU_OVERLOAD_DEVICE)
         __device__ reference operator*() const noexcept
@@ -1433,7 +1436,7 @@ namespace gpu_array
         __host__ explicit managed_value(default_init_tag) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::uninitialized_default_construct_n(get(), 1);
         }
@@ -1441,7 +1444,7 @@ namespace gpu_array
         __host__ explicit managed_value(const ValueType& r) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::construct_at(get(), r);
         }
@@ -1449,7 +1452,7 @@ namespace gpu_array
         __host__ explicit managed_value(ValueType&& r) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::construct_at(get(), std::move(r));
         }
@@ -1459,7 +1462,7 @@ namespace gpu_array
         __host__ explicit managed_value(Args&&... args) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&detail::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::construct_at(get(), std::forward<Args>(args)...);
         }
@@ -1475,7 +1478,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected managed memory pointer");
             }
-            detail::get<0>(base::data_) = ptr;
+            gpu_array::get<0>(base::data_) = ptr;
         }
 #else
             : base(ptr, ptr == nullptr ? 0 : 1)
@@ -1505,8 +1508,8 @@ namespace gpu_array
             return get();
         }
 
-        __host__ __device__ pointer get() const noexcept { return detail::get<0>(base::data_); }
-        __host__ __device__ explicit operator bool() const noexcept { return detail::get<0>(base::data_) != nullptr; }
+        __host__ __device__ pointer get() const noexcept { return gpu_array::get<0>(base::data_); }
+        __host__ __device__ explicit operator bool() const noexcept { return gpu_array::get<0>(base::data_) != nullptr; }
 
         __host__ void prefetch(int device_id, api::gpuStream_t stream = 0, bool recursive = true) const
         {
@@ -1557,10 +1560,10 @@ namespace gpu_array
         template <std::size_t N, typename Tuple, typename... Ts>
         constexpr bool assignable_to_tuple_helper_n()
         {
-            return requires(const Tuple& t1, detail::tuple<Ts...>& t2) {
-                detail::get<N>(t1);
-                detail::get<N>(t2);
-                requires std::assignable_from<decltype(detail::get<N>(t2)), decltype(detail::get<N>(t1))>;
+            return requires(const Tuple& t1, gpu_array::tuple<Ts...>& t2) {
+                gpu_array::get<N>(t1);
+                gpu_array::get<N>(t2);
+                requires std::assignable_from<decltype(gpu_array::get<N>(t2)), decltype(gpu_array::get<N>(t1))>;
             };
         }
         template <typename Tuple, typename... Ts>
@@ -1577,7 +1580,7 @@ namespace gpu_array
     template <template <typename...> typename Tuple, typename... Ts>
     class structure_of_arrays_iterator
     {
-        detail::tuple<Ts*...> ptrs_;
+        gpu_array::tuple<Ts*...> ptrs_;
 
     public:
         using difference_type = std::ptrdiff_t;
@@ -1592,15 +1595,15 @@ namespace gpu_array
         structure_of_arrays_iterator& operator=(const structure_of_arrays_iterator&) = default;
         structure_of_arrays_iterator& operator=(structure_of_arrays_iterator&&) noexcept = default;
 
-        __host__ __device__ explicit structure_of_arrays_iterator(detail::tuple<Ts*...> ptrs) : ptrs_(ptrs) {}
+        __host__ __device__ explicit structure_of_arrays_iterator(gpu_array::tuple<Ts*...> ptrs) : ptrs_(ptrs) {}
 
         __host__ __device__ Tuple<Ts&...> operator*() const
         {
-            return detail::apply([](auto*... ptrs) { return Tuple<Ts&...>(*ptrs...); }, ptrs_);
+            return gpu_array::apply([](auto*... ptrs) { return Tuple<Ts&...>(*ptrs...); }, ptrs_);
         }
         __host__ __device__ Tuple<Ts&...> operator[](size_type n) const
         {
-            return detail::apply([n](auto*... ptrs) { return Tuple<Ts&...>(ptrs[n]...); }, ptrs_);
+            return gpu_array::apply([n](auto*... ptrs) { return Tuple<Ts&...>(ptrs[n]...); }, ptrs_);
         }
         __host__ __device__ auto operator->() const
         {
@@ -1613,7 +1616,7 @@ namespace gpu_array
         }
         __host__ __device__ structure_of_arrays_iterator& operator++()
         {
-            detail::apply([](auto*&... ptrs) { (++ptrs, ...); }, ptrs_);
+            gpu_array::apply([](auto*&... ptrs) { (++ptrs, ...); }, ptrs_);
             return *this;
         }
         __host__ __device__ structure_of_arrays_iterator operator++(int)
@@ -1624,12 +1627,12 @@ namespace gpu_array
         }
         __host__ __device__ structure_of_arrays_iterator& operator+=(difference_type n)
         {
-            detail::apply([n](auto*&... ptrs) { ((ptrs += n), ...); }, ptrs_);
+            gpu_array::apply([n](auto*&... ptrs) { ((ptrs += n), ...); }, ptrs_);
             return *this;
         }
         __host__ __device__ structure_of_arrays_iterator& operator--()
         {
-            detail::apply([](auto*&... ptrs) { (--ptrs, ...); }, ptrs_);
+            gpu_array::apply([](auto*&... ptrs) { (--ptrs, ...); }, ptrs_);
             return *this;
         }
         __host__ __device__ structure_of_arrays_iterator operator--(int)
@@ -1640,20 +1643,20 @@ namespace gpu_array
         }
         __host__ __device__ structure_of_arrays_iterator& operator-=(difference_type n)
         {
-            detail::apply([n](auto*&... ptrs) { ((ptrs -= n), ...); }, ptrs_);
+            gpu_array::apply([n](auto*&... ptrs) { ((ptrs -= n), ...); }, ptrs_);
             return *this;
         }
 
         __host__ __device__ friend difference_type operator-(const structure_of_arrays_iterator& lhs,
                                                              const structure_of_arrays_iterator& rhs)
         {
-            return detail::get<0>(lhs.ptrs_) - detail::get<0>(rhs.ptrs_);
+            return gpu_array::get<0>(lhs.ptrs_) - gpu_array::get<0>(rhs.ptrs_);
         }
         __host__ __device__ friend structure_of_arrays_iterator operator+(const structure_of_arrays_iterator& lhs,
                                                                           difference_type n)
         {
             return structure_of_arrays_iterator(
-                detail::apply([n](auto*... ptrs) { return detail::tuple{ptrs + n...}; }, lhs.ptrs_));
+                gpu_array::apply([n](auto*... ptrs) { return gpu_array::tuple{ptrs + n...}; }, lhs.ptrs_));
         }
         __host__ __device__ friend structure_of_arrays_iterator operator+(structure_of_arrays_iterator&& lhs,
                                                                           difference_type n)
@@ -1675,7 +1678,7 @@ namespace gpu_array
                                                                           difference_type n)
         {
             return structure_of_arrays_iterator(
-                detail::apply([n](auto*... ptrs) { return detail::tuple{ptrs - n...}; }, lhs.ptrs_));
+                gpu_array::apply([n](auto*... ptrs) { return gpu_array::tuple{ptrs - n...}; }, lhs.ptrs_));
         }
         __host__ __device__ friend structure_of_arrays_iterator operator-(structure_of_arrays_iterator&& lhs,
                                                                           difference_type n)
@@ -1687,16 +1690,16 @@ namespace gpu_array
         __host__ __device__ friend bool operator==(const structure_of_arrays_iterator& lhs,
                                                    const structure_of_arrays_iterator& rhs)
         {
-            return detail::get<0>(lhs.ptrs_) == detail::get<0>(rhs.ptrs_);
+            return gpu_array::get<0>(lhs.ptrs_) == gpu_array::get<0>(rhs.ptrs_);
         }
         __host__ __device__ friend std::strong_ordering operator<=>(const structure_of_arrays_iterator& lhs,
                                                                     const structure_of_arrays_iterator& rhs)
         {
-            return detail::get<0>(lhs.ptrs_) <=> detail::get<0>(rhs.ptrs_);
+            return gpu_array::get<0>(lhs.ptrs_) <=> gpu_array::get<0>(rhs.ptrs_);
         }
         __host__ __device__ friend auto iter_move(const structure_of_arrays_iterator& x)
         {
-            return detail::apply(
+            return gpu_array::apply(
                 [](auto*... ptrs) {
                     using RetType = std::remove_cvref_t<decltype(x)>::value_type;
                     return RetType(std::move(*ptrs)...);
@@ -1708,15 +1711,15 @@ namespace gpu_array
         {
             constexpr std::size_t size = std::tuple_size_v<std::remove_cvref_t<decltype(lhs.ptrs_)>>;
             [&lhs, &rhs]<std::size_t... N>(std::index_sequence<N...>) {
-                (std::swap(*detail::get<N>(lhs.ptrs_), *detail::get<N>(rhs.ptrs_)), ...);
+                (std::swap(*gpu_array::get<N>(lhs.ptrs_), *gpu_array::get<N>(rhs.ptrs_)), ...);
             }(std::make_index_sequence<size>());
         }
     };
 
     template <typename... Ts>
-    class structure_of_arrays : public structure_of_arrays<detail::tuple<Ts...>, size_type_default>
+    class structure_of_arrays : public structure_of_arrays<gpu_array::tuple<Ts...>, size_type_default>
     {
-        using base = structure_of_arrays<detail::tuple<Ts...>, size_type_default>;
+        using base = structure_of_arrays<gpu_array::tuple<Ts...>, size_type_default>;
         using base::base;
 
     public:
@@ -1748,9 +1751,9 @@ namespace gpu_array
         static constexpr auto num_arrays = sizeof...(Ts);
         using base = detail::base<false, SizeType, Ts...>;
 
-        using tuple_value_type = detail::tuple<Ts...>;
-        using tuple_pointer_type = detail::tuple<Ts*...>;
-        using tuple_const_pointer_type = detail::tuple<const Ts*...>;
+        using tuple_value_type = gpu_array::tuple<Ts...>;
+        using tuple_pointer_type = gpu_array::tuple<Ts*...>;
+        using tuple_const_pointer_type = gpu_array::tuple<const Ts*...>;
         using ret_tuple_value_type = Tuple<Ts...>;
         using ret_tuple_reference_type = Tuple<Ts&...>;
         using ret_tuple_const_reference_type = Tuple<const Ts&...>;
@@ -1765,18 +1768,18 @@ namespace gpu_array
         SIGSEGV_DEPRECATED __host__ __device__ auto begin() noexcept { return iterator_type(base::data_); }
         SIGSEGV_DEPRECATED __host__ __device__ auto end() noexcept
         {
-            return detail::apply(
+            return gpu_array::apply(
                 [this](auto&... ptrs) { return iterator_type(tuple_pointer_type{(ptrs + base::size_)...}); },
                 base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto begin() const noexcept
         {
-            return detail::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
+            return gpu_array::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
                                  base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto end() const noexcept
         {
-            return detail::apply(
+            return gpu_array::apply(
                 [this](auto&... ptrs) {
                     return const_iterator_type(tuple_const_pointer_type{(ptrs + base::size_)...});
                 },
@@ -1785,28 +1788,28 @@ namespace gpu_array
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) &
         {
             assert(i < base::size_);
-            return detail::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) const&
         {
             assert(i < base::size_);
-            return detail::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
                                  base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) &&
         {
             assert(i < base::size_);
-            return detail::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
         }
         template <std::size_t N>
         __host__ __device__ auto* data() noexcept
         {
-            return detail::get<N>(base::data_);
+            return gpu_array::get<N>(base::data_);
         }
         template <std::size_t N>
         __host__ __device__ const auto* data() const noexcept
         {
-            return detail::get<N>(base::data_);
+            return gpu_array::get<N>(base::data_);
         }
 
         structure_of_arrays() = default;
@@ -1853,7 +1856,7 @@ namespace gpu_array
             };
 
             [this, &value, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(detail::get<N>(base::data_), detail::get<N>(value)), ...);
+                (alloc_ptr(gpu_array::get<N>(base::data_), gpu_array::get<N>(value)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -1872,8 +1875,8 @@ namespace gpu_array
             };
 
             [this, &array, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(detail::get<N>(base::data_),
-                           array | std::views::transform([](const auto& v) { return (detail::get<N>(v)); })),
+                (alloc_ptr(gpu_array::get<N>(base::data_),
+                           array | std::views::transform([](const auto& v) { return (gpu_array::get<N>(v)); })),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -1891,15 +1894,15 @@ namespace gpu_array
             };
 
             [this, &list, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(detail::get<N>(base::data_),
-                           list | std::views::transform([](const auto& v) { return (detail::get<N>(v)); })),
+                (alloc_ptr(gpu_array::get<N>(base::data_),
+                           list | std::views::transform([](const auto& v) { return (gpu_array::get<N>(v)); })),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
         template <detail::array_convertible_for_copy... Ranges>
         requires (sizeof...(Ranges) == num_arrays) &&
-                 detail::assignable_to_tuple<detail::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
+                 detail::assignable_to_tuple<gpu_array::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
         __host__ explicit structure_of_arrays(const Ranges&... arrays) : base(std::max({std::ranges::size(arrays)...}))
         {
             if (base::size_ == 0) return;
@@ -1918,9 +1921,9 @@ namespace gpu_array
                 GPU_CHECK_ERROR(api::gpuMemcpy(ptr, buf.get(), sizeof(T) * base::size_, gpuMemcpyHostToDevice));
             };
 
-            auto arrays_tuple = detail::tuple<const Ranges&...>(arrays...);
+            auto arrays_tuple = gpu_array::tuple<const Ranges&...>(arrays...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(detail::get<N>(base::data_), detail::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(gpu_array::get<N>(base::data_), gpu_array::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -1943,9 +1946,9 @@ namespace gpu_array
                 GPU_CHECK_ERROR(api::gpuMemcpy(ptr, buf.get(), sizeof(T) * base::size_, gpuMemcpyHostToDevice));
             };
 
-            auto arrays_tuple = detail::tuple<std::initializer_list<Ts>...>(lists...);
+            auto arrays_tuple = gpu_array::tuple<std::initializer_list<Ts>...>(lists...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(detail::get<N>(base::data_), detail::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(gpu_array::get<N>(base::data_), gpu_array::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -1977,9 +1980,9 @@ namespace gpu_array
                 return buf;
             };
 
-            const auto tmp_bufs = detail::apply(
+            const auto tmp_bufs = gpu_array::apply(
                 [copy_buffer](const auto*... ptrs) {
-                    return detail::tuple<std::unique_ptr<Ts[]>...>(copy_buffer(ptrs)...);
+                    return gpu_array::tuple<std::unique_ptr<Ts[]>...>(copy_buffer(ptrs)...);
                 },
                 base::data_);
 
@@ -1995,7 +1998,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    detail::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, tmp_bufs);
+                    gpu_array::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, tmp_bufs);
                 }
 
                 return result;
@@ -2007,7 +2010,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    detail::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, tmp_bufs);
+                    gpu_array::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, tmp_bufs);
                 }
 
                 return result;
@@ -2033,7 +2036,7 @@ namespace gpu_array
             if (base::size_ > 0) assert(ptr != nullptr);
 
             // reset specified pointer only
-            detail::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
+            gpu_array::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
         }
 
         template <std::size_t N, gpu_array_ptr T>
@@ -2053,9 +2056,9 @@ namespace gpu_array
     };
 
     template <typename... Ts>
-    class managed_structure_of_arrays : public managed_structure_of_arrays<detail::tuple<Ts...>, size_type_default>
+    class managed_structure_of_arrays : public managed_structure_of_arrays<gpu_array::tuple<Ts...>, size_type_default>
     {
-        using base = managed_structure_of_arrays<detail::tuple<Ts...>, size_type_default>;
+        using base = managed_structure_of_arrays<gpu_array::tuple<Ts...>, size_type_default>;
         using base::base;
 
     public:
@@ -2088,9 +2091,9 @@ namespace gpu_array
         static constexpr auto num_arrays = sizeof...(Ts);
         using base = detail::base<true, SizeType, Ts...>;
 
-        using tuple_value_type = detail::tuple<Ts...>;
-        using tuple_pointer_type = detail::tuple<Ts*...>;
-        using tuple_const_pointer_type = detail::tuple<const Ts*...>;
+        using tuple_value_type = gpu_array::tuple<Ts...>;
+        using tuple_pointer_type = gpu_array::tuple<Ts*...>;
+        using tuple_const_pointer_type = gpu_array::tuple<const Ts*...>;
         using ret_tuple_value_type = Tuple<Ts...>;
         using ret_tuple_reference_type = Tuple<Ts&...>;
         using ret_tuple_const_reference_type = Tuple<const Ts&...>;
@@ -2125,8 +2128,8 @@ namespace gpu_array
             };
 
             [this, &alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(detail::get<N>(base::data_),
-                           [](const auto& v) -> const auto& { return (detail::get<N>(v)); }),
+                (alloc_ptr(gpu_array::get<N>(base::data_),
+                           [](const auto& v) -> const auto& { return (gpu_array::get<N>(v)); }),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -2139,18 +2142,18 @@ namespace gpu_array
         __host__ __device__ auto begin() noexcept { return iterator_type(base::data_); }
         __host__ __device__ auto end() noexcept
         {
-            return detail::apply(
+            return gpu_array::apply(
                 [this](auto&... ptrs) { return iterator_type(tuple_pointer_type{(ptrs + base::size_)...}); },
                 base::data_);
         }
         __host__ __device__ auto begin() const noexcept
         {
-            return detail::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
+            return gpu_array::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
                                  base::data_);
         }
         __host__ __device__ auto end() const noexcept
         {
-            return detail::apply(
+            return gpu_array::apply(
                 [this](auto&... ptrs) {
                     return const_iterator_type(tuple_const_pointer_type{(ptrs + base::size_)...});
                 },
@@ -2159,28 +2162,28 @@ namespace gpu_array
         __host__ __device__ auto operator[](size_type i) &
         {
             assert(i < base::size_);
-            return detail::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
         }
         __host__ __device__ auto operator[](size_type i) const&
         {
             assert(i < base::size_);
-            return detail::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
                                  base::data_);
         }
         __host__ __device__ auto operator[](size_type i) &&
         {
             assert(i < base::size_);
-            return detail::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
         }
         template <std::size_t N>
         __host__ __device__ auto* data() noexcept
         {
-            return detail::get<N>(base::data_);
+            return gpu_array::get<N>(base::data_);
         }
         template <std::size_t N>
         __host__ __device__ const auto* data() const noexcept
         {
-            return detail::get<N>(base::data_);
+            return gpu_array::get<N>(base::data_);
         }
 
         managed_structure_of_arrays() = default;
@@ -2221,7 +2224,7 @@ namespace gpu_array
             });
 
             [this, &value]<std::size_t... N>(std::index_sequence<N...>) {
-                (std::ranges::uninitialized_fill_n(detail::get<N>(base::data_), base::size_, detail::get<N>(value)),
+                (std::ranges::uninitialized_fill_n(gpu_array::get<N>(base::data_), base::size_, gpu_array::get<N>(value)),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -2243,8 +2246,8 @@ namespace gpu_array
             };
 
             [this, &alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(detail::get<N>(base::data_),
-                           [](const auto& e) -> const auto& { return (detail::get<N>(e)); }),
+                (alloc_ptr(gpu_array::get<N>(base::data_),
+                           [](const auto& e) -> const auto& { return (gpu_array::get<N>(e)); }),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -2265,15 +2268,15 @@ namespace gpu_array
             };
 
             [this, &alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(detail::get<N>(base::data_),
-                           [](const auto& e) -> const auto& { return (detail::get<N>(e)); }),
+                (alloc_ptr(gpu_array::get<N>(base::data_),
+                           [](const auto& e) -> const auto& { return (gpu_array::get<N>(e)); }),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
         template <detail::array_convertible_for_copy... Ranges>
         requires (sizeof...(Ranges) == num_arrays) &&
-                 detail::assignable_to_tuple<detail::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
+                 detail::assignable_to_tuple<gpu_array::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
         __host__ explicit managed_structure_of_arrays(const Ranges&... arrays)
             : base(std::max({std::ranges::size(arrays)...}))
         {
@@ -2291,9 +2294,9 @@ namespace gpu_array
                 for (auto i = std::size_t{0}; const auto& v : range) std::ranges::construct_at(ptr + i++, v);
             };
 
-            auto arrays_tuple = detail::tuple<const Ranges&...>(arrays...);
+            auto arrays_tuple = gpu_array::tuple<const Ranges&...>(arrays...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(detail::get<N>(base::data_), detail::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(gpu_array::get<N>(base::data_), gpu_array::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -2314,9 +2317,9 @@ namespace gpu_array
                 for (auto i = std::size_t{0}; const auto& v : range) std::ranges::construct_at(ptr + i++, v);
             };
 
-            auto arrays_tuple = detail::tuple<std::initializer_list<Ts>...>(lists...);
+            auto arrays_tuple = gpu_array::tuple<std::initializer_list<Ts>...>(lists...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(detail::get<N>(base::data_), detail::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(gpu_array::get<N>(base::data_), gpu_array::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -2443,7 +2446,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    detail::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, base::data_);
+                    gpu_array::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, base::data_);
                 }
 
                 return result;
@@ -2455,7 +2458,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    detail::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, base::data_);
+                    gpu_array::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, base::data_);
                 }
 
                 return result;
@@ -2481,7 +2484,7 @@ namespace gpu_array
             if (base::size_ > 0) assert(ptr != nullptr);
 
             // reset specified pointer only
-            detail::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
+            gpu_array::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
         }
 
         template <std::size_t N, gpu_array_ptr T>
diff --git a/test/test.cpp b/test/test.cpp
index c8745ac..19a280a 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -6,19 +6,19 @@
 // NOLINTBEGIN
 using namespace gpu_array;
 
-// Example of custom tuple type derived from detail::tuple
+// Example of custom tuple type derived from gpu_array::tuple
 // You may need to specialize std::common_type and std::basic_common_reference to satisfy range concepts
 template <typename... Ts>
-class custom_tuple_example : public detail::tuple<Ts...>
+class custom_tuple_example : public gpu_array::tuple<Ts...>
 {
-    using base = detail::tuple<Ts...>;
+    using base = gpu_array::tuple<Ts...>;
     using base::base;
 
 public:
     template <std::size_t N>
     __host__ decltype(auto) get_string() const
     {
-        return std::to_string(detail::get<N>(*this));
+        return std::to_string(gpu_array::get<N>(*this));
     }
     using base::operator=;
     template <typename... Us>
@@ -1038,7 +1038,7 @@ TEST(StructureOfArrays, Construction)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = detail::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = gpu_array::tuple<tuple_elem_type0, tuple_elem_type1>;
     using custom_tuple_type = custom_tuple_example<tuple_elem_type0, tuple_elem_type1>;
 
     // initizalization
@@ -1052,18 +1052,18 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(vec[i]), 0);
-            EXPECT_EQ(detail::get<1>(vec[i]), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 0);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 0.0);
         }
     }
     {
-        auto soa = structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, detail::tuple(1, 2.5));
+        auto soa = structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, gpu_array::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(vec[i]), 1);
-            EXPECT_EQ(detail::get<1>(vec[i]), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 1);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 2.5);
         }
     }
     {
@@ -1080,18 +1080,18 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(vec[i]), 0);
-            EXPECT_EQ(detail::get<1>(vec[i]), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 0);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 0.0);
         }
     }
     {
-        auto soa = structure_of_arrays<tuple_type>(10, detail::tuple(1, 2.5));
+        auto soa = structure_of_arrays<tuple_type>(10, gpu_array::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(vec[i]), 1);
-            EXPECT_EQ(detail::get<1>(vec[i]), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 1);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 2.5);
         }
     }
     {
@@ -1108,8 +1108,8 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(vec[i]), 0);
-            EXPECT_EQ(detail::get<1>(vec[i]), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 0);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 0.0);
         }
     }
     {
@@ -1118,8 +1118,8 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(vec[i]), 1);
-            EXPECT_EQ(detail::get<1>(vec[i]), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 1);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 2.5);
         }
     }
     {
@@ -1158,7 +1158,7 @@ TEST(StructureOfArrays, Construction)
         EXPECT_EQ(arr1.use_count(), 1);
     }
 
-    // construction from range of detail::tuple
+    // construction from range of gpu_array::tuple
     {
         auto vec = std::vector<tuple_type>();
         for (std::size_t i = 0; i < 10; ++i)
@@ -1171,17 +1171,17 @@ TEST(StructureOfArrays, Construction)
         EXPECT_EQ(soa_vec, vec);
     }
 
-    // construction from initializer_list of detail::tuple
+    // construction from initializer_list of gpu_array::tuple
     {
         auto soa = structure_of_arrays<tuple_type>({{0, 0.5}, {1, 1.5}, {2, 2.5}});
         EXPECT_EQ(soa.size(), 3);
         const auto soa_vec = soa.to<std::vector>();
-        EXPECT_EQ(detail::get<0>(soa_vec[0]), 0);
-        EXPECT_EQ(detail::get<1>(soa_vec[0]), 0.5);
-        EXPECT_EQ(detail::get<0>(soa_vec[1]), 1);
-        EXPECT_EQ(detail::get<1>(soa_vec[1]), 1.5);
-        EXPECT_EQ(detail::get<0>(soa_vec[2]), 2);
-        EXPECT_EQ(detail::get<1>(soa_vec[2]), 2.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[0]), 0);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[0]), 0.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[1]), 1);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[1]), 1.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[2]), 2);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[2]), 2.5);
     }
 
     // construction from multiple ranges
@@ -1198,8 +1198,8 @@ TEST(StructureOfArrays, Construction)
         const auto soa_vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < soa_vec.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(soa_vec[i]), vec0[i]);
-            EXPECT_EQ(detail::get<1>(soa_vec[i]), vec1[i]);
+            EXPECT_EQ(gpu_array::get<0>(soa_vec[i]), vec0[i]);
+            EXPECT_EQ(gpu_array::get<1>(soa_vec[i]), vec1[i]);
         }
     }
 
@@ -1208,12 +1208,12 @@ TEST(StructureOfArrays, Construction)
         auto soa = structure_of_arrays<tuple_elem_type0, tuple_elem_type1>({0, 1, 2}, {0.5, 1.5, 2.5});
         EXPECT_EQ(soa.size(), 3);
         const auto soa_vec = soa.to<std::vector>();
-        EXPECT_EQ(detail::get<0>(soa_vec[0]), 0);
-        EXPECT_EQ(detail::get<1>(soa_vec[0]), 0.5);
-        EXPECT_EQ(detail::get<0>(soa_vec[1]), 1);
-        EXPECT_EQ(detail::get<1>(soa_vec[1]), 1.5);
-        EXPECT_EQ(detail::get<0>(soa_vec[2]), 2);
-        EXPECT_EQ(detail::get<1>(soa_vec[2]), 2.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[0]), 0);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[0]), 0.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[1]), 1);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[1]), 1.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[2]), 2);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[2]), 2.5);
     }
 
     // construction from range of custom tuple
@@ -1228,9 +1228,9 @@ TEST(StructureOfArrays, Construction)
         const auto soa_vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < soa_vec.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(soa_vec[i]), static_cast<tuple_elem_type0>(i));
+            EXPECT_EQ(gpu_array::get<0>(soa_vec[i]), static_cast<tuple_elem_type0>(i));
             EXPECT_EQ(soa_vec[i].get_string<0>(), std::to_string(i));
-            EXPECT_EQ(detail::get<1>(soa_vec[i]), static_cast<tuple_elem_type1>(i) + 0.5);
+            EXPECT_EQ(gpu_array::get<1>(soa_vec[i]), static_cast<tuple_elem_type1>(i) + 0.5);
             EXPECT_EQ(soa_vec[i].get_string<1>(), std::to_string(i + 0.5));
         }
     }
@@ -1252,7 +1252,7 @@ TEST(StructureOfArrays, Export)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = detail::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = gpu_array::tuple<tuple_elem_type0, tuple_elem_type1>;
 
     // export to range with same value type
     {
@@ -1278,7 +1278,7 @@ TEST(StructureOfArrays, Export)
 
 TEST(StructureOfArrays, RangeInterface)
 {
-    using tuple_type = detail::tuple<int, double>;
+    using tuple_type = gpu_array::tuple<int, double>;
     using soa_type1 = structure_of_arrays<int, double>;
     using soa_type2 = structure_of_arrays<tuple_type>;
     using soa_type3 = structure_of_arrays<custom_tuple_example<int, double>>;
@@ -1350,20 +1350,20 @@ TEST(StructureOfArrays, RangeInterface)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #endif
-        static_assert(std::same_as<decltype(soa[0]), detail::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
+        static_assert(std::same_as<decltype(soa[0]), gpu_array::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
         static_assert(
-            std::same_as<decltype(*soa.begin()), detail::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
+            std::same_as<decltype(*soa.begin()), gpu_array::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
         static_assert(std::same_as<decltype(*(soa.end() - 1)),
-                                   detail::tuple<decltype(*(soa.data<0>() + 9)), decltype(*(soa.data<1>() + 9))>>);
+                                   gpu_array::tuple<decltype(*(soa.data<0>() + 9)), decltype(*(soa.data<1>() + 9))>>);
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
-        soa = {detail::tuple(0, 0.0), detail::tuple(1, 1.0), detail::tuple(2, 2.0), detail::tuple(3, 3.0), detail::tuple(4, 4.0)};
+        soa = {gpu_array::tuple(0, 0.0), gpu_array::tuple(1, 1.0), gpu_array::tuple(2, 2.0), gpu_array::tuple(3, 3.0), gpu_array::tuple(4, 4.0)};
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(vec[i]), static_cast<int>(i));
-            EXPECT_EQ(detail::get<1>(vec[i]), static_cast<double>(i));
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), static_cast<int>(i));
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), static_cast<double>(i));
         }
     }
 }
@@ -1385,7 +1385,7 @@ TEST(ManagedStructureOfArrays, Construction)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = detail::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = gpu_array::tuple<tuple_elem_type0, tuple_elem_type1>;
     using custom_tuple_type = custom_tuple_example<tuple_elem_type0, tuple_elem_type1>;
 
     // initizalization
@@ -1398,17 +1398,17 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(detail::get<0>(v), 0);
-            EXPECT_EQ(detail::get<1>(v), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(v), 0);
+            EXPECT_EQ(gpu_array::get<1>(v), 0.0);
         }
     }
     {
-        auto soa = managed_structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, detail::tuple(1, 2.5));
+        auto soa = managed_structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, gpu_array::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(detail::get<0>(v), 1);
-            EXPECT_EQ(detail::get<1>(v), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(v), 1);
+            EXPECT_EQ(gpu_array::get<1>(v), 2.5);
         }
     }
     {
@@ -1424,17 +1424,17 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(detail::get<0>(v), 0);
-            EXPECT_EQ(detail::get<1>(v), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(v), 0);
+            EXPECT_EQ(gpu_array::get<1>(v), 0.0);
         }
     }
     {
-        auto soa = managed_structure_of_arrays<tuple_type>(10, detail::tuple(1, 2.5));
+        auto soa = managed_structure_of_arrays<tuple_type>(10, gpu_array::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(detail::get<0>(v), 1);
-            EXPECT_EQ(detail::get<1>(v), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(v), 1);
+            EXPECT_EQ(gpu_array::get<1>(v), 2.5);
         }
     }
     {
@@ -1450,8 +1450,8 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(detail::get<0>(v), 0);
-            EXPECT_EQ(detail::get<1>(v), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(v), 0);
+            EXPECT_EQ(gpu_array::get<1>(v), 0.0);
         }
     }
     {
@@ -1459,8 +1459,8 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(detail::get<0>(v), 1);
-            EXPECT_EQ(detail::get<1>(v), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(v), 1);
+            EXPECT_EQ(gpu_array::get<1>(v), 2.5);
         }
     }
     {
@@ -1499,7 +1499,7 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(arr1.use_count(), 1);
     }
 
-    // construction from range of detail::tuple
+    // construction from range of gpu_array::tuple
     {
         auto vec = std::vector<tuple_type>();
         for (std::size_t i = 0; i < 10; ++i)
@@ -1510,21 +1510,21 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (std::size_t i = 0; i < 10; ++i)
         {
-            EXPECT_EQ(detail::get<0>(soa[i]), detail::get<0>(vec[i]));
-            EXPECT_EQ(detail::get<1>(soa[i]), detail::get<1>(vec[i]));
+            EXPECT_EQ(gpu_array::get<0>(soa[i]), gpu_array::get<0>(vec[i]));
+            EXPECT_EQ(gpu_array::get<1>(soa[i]), gpu_array::get<1>(vec[i]));
         }
     }
 
-    // construction from initializer_list of detail::tuple
+    // construction from initializer_list of gpu_array::tuple
     {
         auto soa = managed_structure_of_arrays<tuple_type>({{0, 0.5}, {1, 1.5}, {2, 2.5}});
         EXPECT_EQ(soa.size(), 3);
-        EXPECT_EQ(detail::get<0>(soa[0]), 0);
-        EXPECT_EQ(detail::get<1>(soa[0]), 0.5);
-        EXPECT_EQ(detail::get<0>(soa[1]), 1);
-        EXPECT_EQ(detail::get<1>(soa[1]), 1.5);
-        EXPECT_EQ(detail::get<0>(soa[2]), 2);
-        EXPECT_EQ(detail::get<1>(soa[2]), 2.5);
+        EXPECT_EQ(gpu_array::get<0>(soa[0]), 0);
+        EXPECT_EQ(gpu_array::get<1>(soa[0]), 0.5);
+        EXPECT_EQ(gpu_array::get<0>(soa[1]), 1);
+        EXPECT_EQ(gpu_array::get<1>(soa[1]), 1.5);
+        EXPECT_EQ(gpu_array::get<0>(soa[2]), 2);
+        EXPECT_EQ(gpu_array::get<1>(soa[2]), 2.5);
     }
 
     // construction from multiple ranges
@@ -1540,8 +1540,8 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (std::size_t i = 0; const auto& v : soa)
         {
-            EXPECT_EQ(detail::get<0>(v), vec0[i]);
-            EXPECT_EQ(detail::get<1>(v), vec1[i]);
+            EXPECT_EQ(gpu_array::get<0>(v), vec0[i]);
+            EXPECT_EQ(gpu_array::get<1>(v), vec1[i]);
             ++i;
         }
     }
@@ -1551,12 +1551,12 @@ TEST(ManagedStructureOfArrays, Construction)
         auto soa = managed_structure_of_arrays<tuple_elem_type0, tuple_elem_type1>({0, 1, 2}, {0.5, 1.5, 2.5});
         EXPECT_EQ(soa.size(), 3);
         const auto soa_vec = soa.to<std::vector>();
-        EXPECT_EQ(detail::get<0>(soa_vec[0]), 0);
-        EXPECT_EQ(detail::get<1>(soa_vec[0]), 0.5);
-        EXPECT_EQ(detail::get<0>(soa_vec[1]), 1);
-        EXPECT_EQ(detail::get<1>(soa_vec[1]), 1.5);
-        EXPECT_EQ(detail::get<0>(soa_vec[2]), 2);
-        EXPECT_EQ(detail::get<1>(soa_vec[2]), 2.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[0]), 0);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[0]), 0.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[1]), 1);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[1]), 1.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[2]), 2);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[2]), 2.5);
     }
 
     // construction from range of custom tuple
@@ -1570,9 +1570,9 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (std::size_t i = 0; i < soa.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(soa[i]), static_cast<tuple_elem_type0>(i));
+            EXPECT_EQ(gpu_array::get<0>(soa[i]), static_cast<tuple_elem_type0>(i));
             EXPECT_EQ(soa[i].get_string<0>(), std::to_string(i));
-            EXPECT_EQ(detail::get<1>(soa[i]), static_cast<tuple_elem_type1>(i) + 0.5);
+            EXPECT_EQ(gpu_array::get<1>(soa[i]), static_cast<tuple_elem_type1>(i) + 0.5);
             EXPECT_EQ(soa[i].get_string<1>(), std::to_string(i + 0.5));
         }
     }
@@ -1595,7 +1595,7 @@ TEST(ManagedStructureOfArrays, Export)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = detail::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = gpu_array::tuple<tuple_elem_type0, tuple_elem_type1>;
 
     // export to range with same value type
     {
@@ -1621,7 +1621,7 @@ TEST(ManagedStructureOfArrays, Export)
 
 TEST(ManagedStructureOfArrays, RangeInterface)
 {
-    using tuple_type = detail::tuple<int, double>;
+    using tuple_type = gpu_array::tuple<int, double>;
     using soa_type1 = managed_structure_of_arrays<int, double>;
     using soa_type2 = managed_structure_of_arrays<tuple_type>;
     using soa_type3 = managed_structure_of_arrays<custom_tuple_example<int, double>>;
@@ -1682,15 +1682,15 @@ TEST(ManagedStructureOfArrays, RangeInterface)
         EXPECT_NE(soa.data<1>(), nullptr);
         EXPECT_EQ(soa.size(), 10);
         EXPECT_FALSE(soa.empty());
-        EXPECT_EQ(soa[0], detail::tuple(*soa.data<0>(), *soa.data<1>()));
-        EXPECT_EQ(*soa.begin(), detail::tuple(*soa.data<0>(), *soa.data<1>()));
-        EXPECT_EQ(*(soa.end() - 1), detail::tuple(*(soa.data<0>() + 9), *(soa.data<1>() + 9)));
+        EXPECT_EQ(soa[0], gpu_array::tuple(*soa.data<0>(), *soa.data<1>()));
+        EXPECT_EQ(*soa.begin(), gpu_array::tuple(*soa.data<0>(), *soa.data<1>()));
+        EXPECT_EQ(*(soa.end() - 1), gpu_array::tuple(*(soa.data<0>() + 9), *(soa.data<1>() + 9)));
 
-        soa = {detail::tuple(0, 0.0), detail::tuple(1, 1.0), detail::tuple(2, 2.0), detail::tuple(3, 3.0), detail::tuple(4, 4.0)};
+        soa = {gpu_array::tuple(0, 0.0), gpu_array::tuple(1, 1.0), gpu_array::tuple(2, 2.0), gpu_array::tuple(3, 3.0), gpu_array::tuple(4, 4.0)};
         for (std::size_t i = 0; i < soa.size(); ++i)
         {
-            EXPECT_EQ(detail::get<0>(soa[i]), static_cast<int>(i));
-            EXPECT_EQ(detail::get<1>(soa[i]), static_cast<double>(i));
+            EXPECT_EQ(gpu_array::get<0>(soa[i]), static_cast<int>(i));
+            EXPECT_EQ(gpu_array::get<1>(soa[i]), static_cast<double>(i));
         }
     }
 }
@@ -1711,7 +1711,7 @@ TEST(ManagedStructureOfArrays, SmartPointerInterface)
 TEST(ManagedStructureOfArrays, MemoryManagement)
 {
     // create nested managed_array
-    auto elms = std::vector<detail::tuple<int, managed_array<double>>>();
+    auto elms = std::vector<gpu_array::tuple<int, managed_array<double>>>();
     for (std::size_t i = 0; i < 10; ++i) elms.emplace_back(i, managed_array<double>(i, 99));
     auto soa = managed_structure_of_arrays(elms);
 
@@ -1826,7 +1826,7 @@ TEST(JaggedArray, Construction)
 
     // construction from nested containers for jagged structure of arrays
     {
-        using tuple_type = detail::tuple<int, double>;
+        using tuple_type = gpu_array::tuple<int, double>;
         auto vec_tpl = std::vector<std::vector<tuple_type>>{
             std::vector<tuple_type>(1, {0, 0.5}), std::vector<tuple_type>(2, {1, 1.5}),
             std::vector<tuple_type>(3, {2, 2.5}), std::vector<tuple_type>(4, {3, 3.5}),
@@ -1891,12 +1891,12 @@ TEST(JaggedArray, Construction)
 
     // construction from container of sizes and flat range (structure of arrays)
     {
-        auto flat_range = std::vector<detail::tuple<int, double>>{
+        auto flat_range = std::vector<gpu_array::tuple<int, double>>{
             {0, 0.5}, {1, 1.5}, {2, 2.5},   {3, 3.5},   {4, 4.5},   {5, 5.5},   {6, 6.5},  {7, 7.5},
             {8, 8.5}, {9, 9.5}, {10, 10.5}, {11, 11.5}, {12, 12.5}, {13, 13.5}, {14, 14.5}};
         auto sizes = std::vector<std::uint32_t>{1, 2, 3, 4, 5};
 
-        auto jagged_arr = jagged_array<managed_structure_of_arrays<detail::tuple<int, double>>>(
+        auto jagged_arr = jagged_array<managed_structure_of_arrays<gpu_array::tuple<int, double>>>(
             sizes, flat_range);  // no deduction guide
 
         EXPECT_EQ(jagged_arr.size(), 15);
@@ -1905,7 +1905,7 @@ TEST(JaggedArray, Construction)
             EXPECT_EQ(jagged_arr.size(i), sizes[i]);
             for (const auto& v : jagged_arr.row(i))
             {
-                EXPECT_EQ(v, (detail::tuple<int, double>(j, static_cast<double>(j) + 0.5)));
+                EXPECT_EQ(v, (gpu_array::tuple<int, double>(j, static_cast<double>(j) + 0.5)));
                 ++j;
             }
         }
@@ -1926,7 +1926,7 @@ TEST(JaggedArray, Construction)
 
     // construction from nested initializer_list (managed structure of arrays)
     {
-        using tuple_type = detail::tuple<int, double>;
+        using tuple_type = gpu_array::tuple<int, double>;
         auto jagged_arr = jagged_array<managed_structure_of_arrays<tuple_type>>{
             {{0, 0.5}},
             {{1, 1.5}, {1, 1.5}},
@@ -1967,7 +1967,7 @@ TEST(JaggedArray, Construction)
 
     // wrap managed structure of arrays with sizes
     {
-        using tuple_type = detail::tuple<int, double>;
+        using tuple_type = gpu_array::tuple<int, double>;
         auto sizes = std::vector<std::uint32_t>{3, 1, 4, 1, 5};
         auto arr = managed_structure_of_arrays<tuple_type>(14, tuple_type(42, 99.0));
         auto jagged_arr_wrap = jagged_array(sizes, arr);
@@ -1985,8 +1985,8 @@ TEST(JaggedArray, Construction)
             EXPECT_EQ(jagged_arr_wrap.size(i), sizes[i]);
             for (const auto& v : jagged_arr_wrap.row(i))
             {
-                EXPECT_EQ(detail::get<0>(v), 42);
-                EXPECT_EQ(detail::get<1>(v), 99.0);
+                EXPECT_EQ(gpu_array::get<0>(v), 42);
+                EXPECT_EQ(gpu_array::get<1>(v), 99.0);
             }
         }
     }

From 1df3e301a7e226b57a347d3317ed690f8e6a0a25 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Thu, 5 Mar 2026 18:03:29 +0900
Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=90=9B=20Add=20constructor=20for=20tu?=
 =?UTF-8?q?ple=5Fleaf?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 52 ++++++++++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 16 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index eed0202..e9fd8a8 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -57,6 +57,13 @@ namespace gpu_array
         template <std::size_t I, class T>
         struct tuple_leaf
         {
+            tuple_leaf()
+            requires std::default_initializable<T>
+            = default;
+            template <class U>
+            __host__ __device__ tuple_leaf(U&& u) : value(std::forward<U>(u))
+            {
+            }
             using type = T;
             T value;
         };
@@ -97,7 +104,7 @@ namespace gpu_array
             };
             template <class... Us>
             requires (sizeof...(Us) == sizeof...(Ts) && !is_single_tuple<std::remove_cvref_t<Us>...>::value)
-            __host__ __device__ tuple(Us&&... us) : base_{std::forward<Us>(us)...}
+            __host__ __device__ tuple(Us&&... us) : base_(std::forward<Us>(us)...)
             {
             }
 
@@ -1309,7 +1316,8 @@ namespace gpu_array
 
         __host__ explicit value(default_init_tag) : base(1)
         {
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(
+                api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             if constexpr (!std::is_trivially_default_constructible_v<value_type>)
             {
@@ -1320,7 +1328,8 @@ namespace gpu_array
 
         __host__ explicit value(const value_type& r) : base(1)
         {
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(
+                api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(get(), &r, sizeof(value_type), gpuMemcpyHostToDevice));
         }
@@ -1330,7 +1339,8 @@ namespace gpu_array
         __host__ explicit value(Args&&... args) : base(1)
         {
             auto temp = value_type(std::forward<Args>(args)...);
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(
+                api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(get(), &temp, sizeof(value_type), gpuMemcpyHostToDevice));
         }
@@ -1366,7 +1376,10 @@ namespace gpu_array
         }
 
         __host__ __device__ pointer get() const noexcept { return gpu_array::get<0>(base::data_); }
-        __host__ __device__ explicit operator bool() const noexcept { return gpu_array::get<0>(base::data_) != nullptr; }
+        __host__ __device__ explicit operator bool() const noexcept
+        {
+            return gpu_array::get<0>(base::data_) != nullptr;
+        }
 
 #if defined(GPU_OVERLOAD_DEVICE)
         __device__ reference operator*() const noexcept
@@ -1509,7 +1522,10 @@ namespace gpu_array
         }
 
         __host__ __device__ pointer get() const noexcept { return gpu_array::get<0>(base::data_); }
-        __host__ __device__ explicit operator bool() const noexcept { return gpu_array::get<0>(base::data_) != nullptr; }
+        __host__ __device__ explicit operator bool() const noexcept
+        {
+            return gpu_array::get<0>(base::data_) != nullptr;
+        }
 
         __host__ void prefetch(int device_id, api::gpuStream_t stream = 0, bool recursive = true) const
         {
@@ -1774,8 +1790,8 @@ namespace gpu_array
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto begin() const noexcept
         {
-            return gpu_array::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
-                                 base::data_);
+            return gpu_array::apply(
+                [](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); }, base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto end() const noexcept
         {
@@ -1788,13 +1804,14 @@ namespace gpu_array
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) &
         {
             assert(i < base::size_);
-            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; },
+                                    base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) const&
         {
             assert(i < base::size_);
             return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
-                                 base::data_);
+                                    base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) &&
         {
@@ -2148,8 +2165,8 @@ namespace gpu_array
         }
         __host__ __device__ auto begin() const noexcept
         {
-            return gpu_array::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
-                                 base::data_);
+            return gpu_array::apply(
+                [](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); }, base::data_);
         }
         __host__ __device__ auto end() const noexcept
         {
@@ -2162,13 +2179,14 @@ namespace gpu_array
         __host__ __device__ auto operator[](size_type i) &
         {
             assert(i < base::size_);
-            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; },
+                                    base::data_);
         }
         __host__ __device__ auto operator[](size_type i) const&
         {
             assert(i < base::size_);
             return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
-                                 base::data_);
+                                    base::data_);
         }
         __host__ __device__ auto operator[](size_type i) &&
         {
@@ -2224,7 +2242,8 @@ namespace gpu_array
             });
 
             [this, &value]<std::size_t... N>(std::index_sequence<N...>) {
-                (std::ranges::uninitialized_fill_n(gpu_array::get<N>(base::data_), base::size_, gpu_array::get<N>(value)),
+                (std::ranges::uninitialized_fill_n(gpu_array::get<N>(base::data_), base::size_,
+                                                   gpu_array::get<N>(value)),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -2446,7 +2465,8 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    gpu_array::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, base::data_);
+                    gpu_array::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); },
+                                     base::data_);
                 }
 
                 return result;