From c9f9cee50d3ff0d7f61151c79e7d84314d7055d1 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Fri, 13 Feb 2026 19:59:19 +0900
Subject: [PATCH 01/19] =?UTF-8?q?=E2=9C=A8=20Create=20enumerate=5Fview=20f?=
 =?UTF-8?q?rom=20stride=5Fview?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 215 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 215 insertions(+)
diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index aa56b8a..11e0d3b 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -2831,6 +2831,221 @@ namespace gpu_array
                 return self(range);
             }
         };
+
+        template <std::ranges::random_access_range Range>
+        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
+        class enumerate_sentinel;
+
+        template <std::ranges::random_access_range Range>
+        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
+        class enumerate_iterator_base
+        {
+            template <typename T>
+            __host__ __device__ friend bool operator==(const enumerate_iterator_base<T>& it,
+                                                       const enumerate_sentinel<T>& se) noexcept;
+
+        public:
+            enumerate_iterator_base() = default;
+            __host__ __device__ std::ranges::range_reference_t<Range> operator*() const noexcept
+            {
+                return (*pointer_)[index_];
+            }
+
+        protected:
+            __host__ __device__ explicit enumerate_iterator_base(Range&& r,
+                                                                 std::ranges::range_size_t<Range> index) noexcept
+                : pointer_(&r), index_(index)
+            {
+            }
+
+            std::remove_reference_t<Range>* pointer_ = nullptr;
+            std::ranges::range_size_t<Range> index_ = 0;
+        };
+
+        template <std::ranges::random_access_range Range>
+        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
+        class enumerate_sentinel
+        {
+            template <typename T>
+            __host__ __device__ friend bool operator==(const enumerate_iterator_base<T>& it,
+                                                       const enumerate_sentinel<T>& se) noexcept;
+
+        public:
+            enumerate_sentinel() = default;
+            __host__ __device__ explicit enumerate_sentinel(Range&& r) noexcept : end_(r.size()) {}
+
+        protected:
+            std::ranges::range_size_t<Range> end_ = 0;
+        };
+
+        template <typename T>
+        __host__ __device__ inline bool operator==(const enumerate_iterator_base<T>& it,
+                                                   const enumerate_sentinel<T>& se) noexcept
+        {
+            return it.index_ >= se.end_;
+        }
+
+        template <Stride StrideType, std::ranges::random_access_range Range>
+        requires std::is_lvalue_reference_v<Range&&>
+        class enumerate_iterator : public enumerate_iterator_base<Range>
+        {
+            using base = enumerate_iterator_base<Range>;
+
+            __host__ __device__ static auto get_initial_index() noexcept
+            {
+#if defined(GPU_DEVICE_COMPILE)
+                using namespace cooperative_groups;  // NOLINT
+                if constexpr (StrideType == Stride::BlockThread)
+                {
+                    return this_thread_block().thread_rank();
+                }
+                else if constexpr (StrideType == Stride::GridThread)
+                {
+                    return this_grid().thread_rank();
+                }
+#if defined(ENABLE_HIP)
+                else if constexpr (StrideType == Stride::GridBlock)
+                {
+                    return (static_cast<unsigned long long>(blockIdx.z) * gridDim.y * gridDim.x) +  // NOLINT
+                           (static_cast<unsigned long long>(blockIdx.y) * gridDim.x) +              // NOLINT
+                           static_cast<unsigned long long>(blockIdx.x);                             // NOLINT
+                }
+#else
+                else if constexpr (StrideType == Stride::GridBlock)
+                {
+                    return this_grid().block_rank();
+                }
+#endif
+#if defined(_CG_HAS_CLUSTER_GROUP)
+                else if constexpr (StrideType == Stride::ClusterThread)
+                {
+                    return this_cluster().thread_rank();
+                }
+                else if constexpr (StrideType == Stride::ClusterBlock)
+                {
+                    return this_cluster().block_rank();
+                }
+                else if constexpr (StrideType == Stride::GridCluster)
+                {
+                    return this_grid().cluster_rank();
+                }
+#endif
+                else
+                {
+                    static_assert([]() { return false; }(), "invalid StrideType");
+                }
+#else
+                return 0;
+#endif
+            }
+
+            __host__ __device__ static auto get_enumerate() noexcept
+            {
+#if defined(GPU_DEVICE_COMPILE)
+                using namespace cooperative_groups;  // NOLINT
+                if constexpr (StrideType == Stride::BlockThread)
+                {
+                    return this_thread_block().size();
+                }
+                else if constexpr (StrideType == Stride::GridThread)
+                {
+                    return this_grid().size();
+                }
+#if defined(ENABLE_HIP)
+                else if constexpr (StrideType == Stride::GridBlock)
+                {
+                    return static_cast<unsigned long long>(gridDim.x) * (gridDim.y * gridDim.z);  // NOLINT
+                }
+#else
+                else if constexpr (StrideType == Stride::GridBlock)
+                {
+                    return this_grid().num_blocks();
+                }
+#endif
+#if defined(_CG_HAS_CLUSTER_GROUP)
+                else if constexpr (StrideType == Stride::ClusterThread)
+                {
+                    return this_cluster().size();
+                }
+                else if constexpr (StrideType == Stride::ClusterBlock)
+                {
+                    return this_cluster().num_blocks();
+                }
+                else if constexpr (StrideType == Stride::GridCluster)
+                {
+                    return this_grid().num_clusters();
+                }
+#endif
+                else
+                {
+                    static_assert([]() { return false; }(), "invalid StrideType");
+                }
+#else
+                return 1;
+#endif
+            }
+
+        public:
+            using iterator_category = std::forward_iterator_tag;
+            using value_type = std::ranges::range_value_t<Range>;
+            using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
+
+            __host__ __device__ explicit enumerate_iterator(Range&& r) noexcept
+                : base(std::forward<Range>(r), get_initial_index())
+            {
+            }
+            __host__ __device__ enumerate_iterator& operator++() noexcept
+            {
+                base::index_ += get_enumerate();
+                return *this;
+            }
+            __host__ __device__ enumerate_iterator operator++(int) noexcept
+            {
+                auto res = *this;
+                ++(*this);
+                return res;
+            }
+            __host__ __device__ bool operator==(const enumerate_iterator& it) const noexcept
+            {
+                return base::index_ == it.index_;
+            }
+        };
+
+        template <Stride StrideType, std::ranges::random_access_range Range>
+        requires std::is_lvalue_reference_v<Range&&>
+        class enumerate_view : public std::ranges::view_interface<enumerate_view<StrideType, Range>>
+        {
+        public:
+            enumerate_view() = default;
+            __host__ __device__ explicit enumerate_view(Range&& r) noexcept : pointer_(&r) {}
+            [[nodiscard]] __host__ __device__ auto begin() const noexcept
+            {
+                return enumerate_iterator<StrideType, Range>(*pointer_);
+            }
+            [[nodiscard]] __host__ __device__ auto end() const noexcept { return enumerate_sentinel<Range>(*pointer_); }
+
+        private:
+            std::remove_reference_t<Range>* pointer_ = nullptr;
+        };
+
+        template <Stride StrideType>
+        struct enumerate_adapter
+        {
+            template <std::ranges::random_access_range Range>
+            requires std::ranges::sized_range<Range>
+            [[nodiscard]] constexpr auto operator()(Range& r) const noexcept
+            {
+                return enumerate_view<StrideType, Range&>(r);
+            }
+
+            template <std::ranges::random_access_range Range>
+            requires std::ranges::sized_range<Range>
+            [[nodiscard]] friend constexpr std::ranges::view auto operator|(Range& range,
+                                                                            const enumerate_adapter& self) noexcept
+            {
+                return self(range);
+            }
+        };
     }  // namespace detail
 
 #if !defined(ENABLE_HIP)

From 144b3485dc737e3dae7ce6e9b65eb8d1de8b7c22 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Fri, 13 Feb 2026 20:27:43 +0900
Subject: [PATCH 02/19] =?UTF-8?q?=E2=9C=A8=20Implement=20enumerate=5Fview?=
 =?UTF-8?q?=20logic?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 31 ++++++++++++++++++++++++-------
 test/test.cpp         | 26 ++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index 11e0d3b..62bb906 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -2832,6 +2832,8 @@ namespace gpu_array
             }
         };
 
+        // enumerate_view
+
         template <std::ranges::random_access_range Range>
         requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
         class enumerate_sentinel;
@@ -2846,9 +2848,10 @@ namespace gpu_array
 
         public:
             enumerate_iterator_base() = default;
-            __host__ __device__ std::ranges::range_reference_t<Range> operator*() const noexcept
+            __host__ __device__ std::pair<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
+            operator*() const noexcept
             {
-                return (*pointer_)[index_];
+                return {index_, (*pointer_)[index_]};
             }
 
         protected:
@@ -2939,7 +2942,7 @@ namespace gpu_array
 #endif
             }
 
-            __host__ __device__ static auto get_enumerate() noexcept
+            __host__ __device__ static auto get_stride() noexcept
             {
 #if defined(GPU_DEVICE_COMPILE)
                 using namespace cooperative_groups;  // NOLINT
@@ -2996,7 +2999,7 @@ namespace gpu_array
             }
             __host__ __device__ enumerate_iterator& operator++() noexcept
             {
-                base::index_ += get_enumerate();
+                base::index_ += get_stride();
                 return *this;
             }
             __host__ __device__ enumerate_iterator operator++(int) noexcept
@@ -3033,15 +3036,15 @@ namespace gpu_array
         {
             template <std::ranges::random_access_range Range>
             requires std::ranges::sized_range<Range>
-            [[nodiscard]] constexpr auto operator()(Range& r) const noexcept
+            [[nodiscard]] __host__ __device__ auto operator()(Range& r) const noexcept
             {
                 return enumerate_view<StrideType, Range&>(r);
             }
 
             template <std::ranges::random_access_range Range>
             requires std::ranges::sized_range<Range>
-            [[nodiscard]] friend constexpr std::ranges::view auto operator|(Range& range,
-                                                                            const enumerate_adapter& self) noexcept
+            [[nodiscard]] __host__ __device__ friend std::ranges::view auto operator|(
+                Range& range, const enumerate_adapter& self) noexcept
             {
                 return self(range);
             }
@@ -3064,6 +3067,20 @@ namespace gpu_array
     using cluster_block_stride_view = detail::stride_view<detail::Stride::ClusterBlock, Range>;
     template <std::ranges::random_access_range Range>
     using grid_cluster_stride_view = detail::stride_view<detail::Stride::GridCluster, Range>;
+
+    template <std::ranges::random_access_range Range>
+    using block_thread_enumerate_view = detail::enumerate_view<detail::Stride::BlockThread, Range>;
+    template <std::ranges::random_access_range Range>
+    using grid_thread_enumerate_view = detail::enumerate_view<detail::Stride::GridThread, Range>;
+    template <std::ranges::random_access_range Range>
+    using grid_block_enumerate_view = detail::enumerate_view<detail::Stride::GridBlock, Range>;
+
+    template <std::ranges::random_access_range Range>
+    using cluster_thread_enumerate_view = detail::enumerate_view<detail::Stride::ClusterThread, Range>;
+    template <std::ranges::random_access_range Range>
+    using cluster_block_enumerate_view = detail::enumerate_view<detail::Stride::ClusterBlock, Range>;
+    template <std::ranges::random_access_range Range>
+    using grid_cluster_enumerate_view = detail::enumerate_view<detail::Stride::GridCluster, Range>;
 #endif
 
     namespace views
diff --git a/test/test.cpp b/test/test.cpp
index 13cbc77..91e0736 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2121,5 +2121,31 @@ TEST(StrideView, AliasTemplate)
     for (const auto& inner_array : nested_array)
         for (const auto& v : inner_array) EXPECT_EQ(v, 3);
 }
+
+template <std::ranges::input_range T>
+requires std::ranges::input_range<std::ranges::range_value_t<T>>
+__global__ void kernel_enumerate(T array)
+{
+    for (auto&& [i, xs] : grid_block_enumerate_view(array))
+        for (auto&& [j, x] : block_thread_enumerate_view(xs)) x = i * 100 + j;
+}
+
+TEST(EnumerateView, HowToUse)
+{
+    auto vec_vec = std::vector(32, std::vector<int>(64, 0));
+    auto nested_array = managed_array(vec_vec);
+
+    kernel_enumerate<<<32, 64>>>(nested_array);
+    api::gpuDeviceSynchronize();
+    for (int i = 0; const auto& xs : nested_array)
+    {
+        for (int j = 0; const auto& x : xs)
+        {
+            EXPECT_EQ(x, i * 100 + j);
+            ++j;
+        }
+        ++i;
+    }
+}
 #endif
 // NOLINTEND

From b15593ba8033765df7de485d4d0ecc7f8aa16133 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Fri, 13 Feb 2026 20:35:29 +0900
Subject: [PATCH 03/19] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 195 ++++++++----------------------------------
 1 file changed, 37 insertions(+), 158 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index 62bb906..a62948a 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -2676,6 +2676,7 @@ namespace gpu_array
         {
             using base = stride_iterator_base<Range>;
 
+        public:
             __host__ __device__ static auto get_initial_index() noexcept
             {
 #if defined(GPU_DEVICE_COMPILE)
@@ -2770,7 +2771,6 @@ namespace gpu_array
 #endif
             }
 
-        public:
             using iterator_category = std::forward_iterator_tag;
             using value_type = std::ranges::range_value_t<Range>;
             using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
@@ -2832,174 +2832,29 @@ namespace gpu_array
             }
         };
 
-        // enumerate_view
-
-        template <std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
-        class enumerate_sentinel;
-
-        template <std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
-        class enumerate_iterator_base
-        {
-            template <typename T>
-            __host__ __device__ friend bool operator==(const enumerate_iterator_base<T>& it,
-                                                       const enumerate_sentinel<T>& se) noexcept;
-
-        public:
-            enumerate_iterator_base() = default;
-            __host__ __device__ std::pair<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
-            operator*() const noexcept
-            {
-                return {index_, (*pointer_)[index_]};
-            }
-
-        protected:
-            __host__ __device__ explicit enumerate_iterator_base(Range&& r,
-                                                                 std::ranges::range_size_t<Range> index) noexcept
-                : pointer_(&r), index_(index)
-            {
-            }
-
-            std::remove_reference_t<Range>* pointer_ = nullptr;
-            std::ranges::range_size_t<Range> index_ = 0;
-        };
-
-        template <std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
-        class enumerate_sentinel
-        {
-            template <typename T>
-            __host__ __device__ friend bool operator==(const enumerate_iterator_base<T>& it,
-                                                       const enumerate_sentinel<T>& se) noexcept;
-
-        public:
-            enumerate_sentinel() = default;
-            __host__ __device__ explicit enumerate_sentinel(Range&& r) noexcept : end_(r.size()) {}
-
-        protected:
-            std::ranges::range_size_t<Range> end_ = 0;
-        };
-
-        template <typename T>
-        __host__ __device__ inline bool operator==(const enumerate_iterator_base<T>& it,
-                                                   const enumerate_sentinel<T>& se) noexcept
-        {
-            return it.index_ >= se.end_;
-        }
-
         template <Stride StrideType, std::ranges::random_access_range Range>
         requires std::is_lvalue_reference_v<Range&&>
-        class enumerate_iterator : public enumerate_iterator_base<Range>
+        class enumerate_iterator
         {
-            using base = enumerate_iterator_base<Range>;
-
-            __host__ __device__ static auto get_initial_index() noexcept
-            {
-#if defined(GPU_DEVICE_COMPILE)
-                using namespace cooperative_groups;  // NOLINT
-                if constexpr (StrideType == Stride::BlockThread)
-                {
-                    return this_thread_block().thread_rank();
-                }
-                else if constexpr (StrideType == Stride::GridThread)
-                {
-                    return this_grid().thread_rank();
-                }
-#if defined(ENABLE_HIP)
-                else if constexpr (StrideType == Stride::GridBlock)
-                {
-                    return (static_cast<unsigned long long>(blockIdx.z) * gridDim.y * gridDim.x) +  // NOLINT
-                           (static_cast<unsigned long long>(blockIdx.y) * gridDim.x) +              // NOLINT
-                           static_cast<unsigned long long>(blockIdx.x);                             // NOLINT
-                }
-#else
-                else if constexpr (StrideType == Stride::GridBlock)
-                {
-                    return this_grid().block_rank();
-                }
-#endif
-#if defined(_CG_HAS_CLUSTER_GROUP)
-                else if constexpr (StrideType == Stride::ClusterThread)
-                {
-                    return this_cluster().thread_rank();
-                }
-                else if constexpr (StrideType == Stride::ClusterBlock)
-                {
-                    return this_cluster().block_rank();
-                }
-                else if constexpr (StrideType == Stride::GridCluster)
-                {
-                    return this_grid().cluster_rank();
-                }
-#endif
-                else
-                {
-                    static_assert([]() { return false; }(), "invalid StrideType");
-                }
-#else
-                return 0;
-#endif
-            }
-
-            __host__ __device__ static auto get_stride() noexcept
-            {
-#if defined(GPU_DEVICE_COMPILE)
-                using namespace cooperative_groups;  // NOLINT
-                if constexpr (StrideType == Stride::BlockThread)
-                {
-                    return this_thread_block().size();
-                }
-                else if constexpr (StrideType == Stride::GridThread)
-                {
-                    return this_grid().size();
-                }
-#if defined(ENABLE_HIP)
-                else if constexpr (StrideType == Stride::GridBlock)
-                {
-                    return static_cast<unsigned long long>(gridDim.x) * (gridDim.y * gridDim.z);  // NOLINT
-                }
-#else
-                else if constexpr (StrideType == Stride::GridBlock)
-                {
-                    return this_grid().num_blocks();
-                }
-#endif
-#if defined(_CG_HAS_CLUSTER_GROUP)
-                else if constexpr (StrideType == Stride::ClusterThread)
-                {
-                    return this_cluster().size();
-                }
-                else if constexpr (StrideType == Stride::ClusterBlock)
-                {
-                    return this_cluster().num_blocks();
-                }
-                else if constexpr (StrideType == Stride::GridCluster)
-                {
-                    return this_grid().num_clusters();
-                }
-#endif
-                else
-                {
-                    static_assert([]() { return false; }(), "invalid StrideType");
-                }
-#else
-                return 1;
-#endif
-            }
-
         public:
             using iterator_category = std::forward_iterator_tag;
             using value_type = std::ranges::range_value_t<Range>;
             using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
 
+            enumerate_iterator() = default;
             __host__ __device__ explicit enumerate_iterator(Range&& r) noexcept
-                : base(std::forward<Range>(r), get_initial_index())
+                : pointer_(&r), index_(stride_iterator<StrideType, Range>::get_initial_index())
+            {
+            }
+            __host__ __device__ std::ranges::range_size_t<Range> index() const noexcept { return index_; }
+            __host__ __device__ std::pair<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
+            operator*() const noexcept
             {
+                return {index_, (*pointer_)[index_]};
             }
             __host__ __device__ enumerate_iterator& operator++() noexcept
             {
-                base::index_ += get_stride();
+                index_ += stride_iterator<StrideType, Range>::get_stride();
                 return *this;
             }
             __host__ __device__ enumerate_iterator operator++(int) noexcept
@@ -3010,8 +2865,29 @@ namespace gpu_array
             }
             __host__ __device__ bool operator==(const enumerate_iterator& it) const noexcept
             {
-                return base::index_ == it.index_;
+                return index_ == it.index_;
             }
+
+        private:
+            std::remove_reference_t<Range>* pointer_ = nullptr;
+            std::ranges::range_size_t<Range> index_ = 0;
+        };
+
+        template <Stride StrideType, std::ranges::random_access_range Range>
+        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
+        class enumerate_sentinel
+        {
+        public:
+            enumerate_sentinel() = default;
+            __host__ __device__ explicit enumerate_sentinel(Range&& r) noexcept : end_(r.size()) {}
+            __host__ __device__ friend bool operator==(const enumerate_iterator<StrideType, Range>& it,
+                                                       const enumerate_sentinel& se) noexcept
+            {
+                return it.index() >= se.end_;
+            }
+
+        private:
+            std::ranges::range_size_t<Range> end_ = 0;
         };
 
         template <Stride StrideType, std::ranges::random_access_range Range>
@@ -3025,7 +2901,10 @@ namespace gpu_array
             {
                 return enumerate_iterator<StrideType, Range>(*pointer_);
             }
-            [[nodiscard]] __host__ __device__ auto end() const noexcept { return enumerate_sentinel<Range>(*pointer_); }
+            [[nodiscard]] __host__ __device__ auto end() const noexcept
+            {
+                return enumerate_sentinel<StrideType, Range>(*pointer_);
+            }
 
         private:
             std::remove_reference_t<Range>* pointer_ = nullptr;

From 87db19a9ebec5dcae1c98d0ecb51b1e075ebd7c2 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Fri, 13 Feb 2026 21:03:41 +0900
Subject: [PATCH 04/19] =?UTF-8?q?=E2=9C=A8=20Add=20block=5Fthread=5Fenumer?=
 =?UTF-8?q?ate,=20etc.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index a62948a..4181f2a 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -2983,6 +2983,26 @@ namespace gpu_array
         inline constexpr detail::stride_adapter<Stride::ClusterBlock> cluster_block_stride;
         inline constexpr detail::stride_adapter<Stride::GridCluster> grid_cluster_stride;
 #endif
+#endif
+
+#ifdef GPU_CHECK_ERROR
+        __device__ static constexpr detail::enumerate_adapter<Stride::BlockThread> block_thread_enumerate;
+        __device__ static constexpr detail::enumerate_adapter<Stride::GridThread> grid_thread_enumerate;
+        __device__ static constexpr detail::enumerate_adapter<Stride::GridBlock> grid_block_enumerate;
+#if defined(_CG_HAS_CLUSTER_GROUP)
+        __device__ static constexpr detail::enumerate_adapter<Stride::ClusterThread> cluster_thread_enumerate;
+        __device__ static constexpr detail::enumerate_adapter<Stride::ClusterBlock> cluster_block_enumerate;
+        __device__ static constexpr detail::enumerate_adapter<Stride::GridCluster> grid_cluster_enumerate;
+#endif
+#else
+        inline constexpr detail::enumerate_adapter<Stride::BlockThread> block_thread_enumerate;
+        inline constexpr detail::enumerate_adapter<Stride::GridThread> grid_thread_enumerate;
+        inline constexpr detail::enumerate_adapter<Stride::GridBlock> grid_block_enumerate;
+#if defined(_CG_HAS_CLUSTER_GROUP)
+        inline constexpr detail::enumerate_adapter<Stride::ClusterThread> cluster_thread_enumerate;
+        inline constexpr detail::enumerate_adapter<Stride::ClusterBlock> cluster_block_enumerate;
+        inline constexpr detail::enumerate_adapter<Stride::GridCluster> grid_cluster_enumerate;
+#endif
 #endif
     }  // namespace views
 }  // namespace gpu_array

From 92931af828daa2947dde00228c3bec4cb3cbd8b9 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Mon, 2 Mar 2026 11:55:57 +0900
Subject: [PATCH 05/19] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 82 ++++++++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 37 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index 4181f2a..f432179 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -2831,7 +2831,52 @@ namespace gpu_array
                 return self(range);
             }
         };
+    }  // namespace detail
+
+#if !defined(ENABLE_HIP)
+    // The following three alias templates are also disabled in HIP because HIP does not support alias template argument
+    // deduction.
+    template <std::ranges::random_access_range Range>
+    using block_thread_stride_view = detail::stride_view<detail::Stride::BlockThread, Range>;
+    template <std::ranges::random_access_range Range>
+    using grid_thread_stride_view = detail::stride_view<detail::Stride::GridThread, Range>;
+    template <std::ranges::random_access_range Range>
+    using grid_block_stride_view = detail::stride_view<detail::Stride::GridBlock, Range>;
+
+    template <std::ranges::random_access_range Range>
+    using cluster_thread_stride_view = detail::stride_view<detail::Stride::ClusterThread, Range>;
+    template <std::ranges::random_access_range Range>
+    using cluster_block_stride_view = detail::stride_view<detail::Stride::ClusterBlock, Range>;
+    template <std::ranges::random_access_range Range>
+    using grid_cluster_stride_view = detail::stride_view<detail::Stride::GridCluster, Range>;
+#endif
+
+    namespace views
+    {
+        using detail::Stride;
+#ifdef GPU_CHECK_ERROR
+        __device__ static constexpr detail::stride_adapter<Stride::BlockThread> block_thread_stride;
+        __device__ static constexpr detail::stride_adapter<Stride::GridThread> grid_thread_stride;
+        __device__ static constexpr detail::stride_adapter<Stride::GridBlock> grid_block_stride;
+#if defined(_CG_HAS_CLUSTER_GROUP)
+        __device__ static constexpr detail::stride_adapter<Stride::ClusterThread> cluster_thread_stride;
+        __device__ static constexpr detail::stride_adapter<Stride::ClusterBlock> cluster_block_stride;
+        __device__ static constexpr detail::stride_adapter<Stride::GridCluster> grid_cluster_stride;
+#endif
+#else
+        inline constexpr detail::stride_adapter<Stride::BlockThread> block_thread_stride;
+        inline constexpr detail::stride_adapter<Stride::GridThread> grid_thread_stride;
+        inline constexpr detail::stride_adapter<Stride::GridBlock> grid_block_stride;
+#if defined(_CG_HAS_CLUSTER_GROUP)
+        inline constexpr detail::stride_adapter<Stride::ClusterThread> cluster_thread_stride;
+        inline constexpr detail::stride_adapter<Stride::ClusterBlock> cluster_block_stride;
+        inline constexpr detail::stride_adapter<Stride::GridCluster> grid_cluster_stride;
+#endif
+#endif
+    }  // namespace views
 
+    namespace detail
+    {
         template <Stride StrideType, std::ranges::random_access_range Range>
         requires std::is_lvalue_reference_v<Range&&>
         class enumerate_iterator
@@ -2931,22 +2976,6 @@ namespace gpu_array
     }  // namespace detail
 
 #if !defined(ENABLE_HIP)
-    // The following three alias templates are also disabled in HIP because HIP does not support alias template argument
-    // deduction.
-    template <std::ranges::random_access_range Range>
-    using block_thread_stride_view = detail::stride_view<detail::Stride::BlockThread, Range>;
-    template <std::ranges::random_access_range Range>
-    using grid_thread_stride_view = detail::stride_view<detail::Stride::GridThread, Range>;
-    template <std::ranges::random_access_range Range>
-    using grid_block_stride_view = detail::stride_view<detail::Stride::GridBlock, Range>;
-
-    template <std::ranges::random_access_range Range>
-    using cluster_thread_stride_view = detail::stride_view<detail::Stride::ClusterThread, Range>;
-    template <std::ranges::random_access_range Range>
-    using cluster_block_stride_view = detail::stride_view<detail::Stride::ClusterBlock, Range>;
-    template <std::ranges::random_access_range Range>
-    using grid_cluster_stride_view = detail::stride_view<detail::Stride::GridCluster, Range>;
-
     template <std::ranges::random_access_range Range>
     using block_thread_enumerate_view = detail::enumerate_view<detail::Stride::BlockThread, Range>;
     template <std::ranges::random_access_range Range>
@@ -2964,27 +2993,6 @@ namespace gpu_array
 
     namespace views
     {
-        using detail::Stride;
-#ifdef GPU_CHECK_ERROR
-        __device__ static constexpr detail::stride_adapter<Stride::BlockThread> block_thread_stride;
-        __device__ static constexpr detail::stride_adapter<Stride::GridThread> grid_thread_stride;
-        __device__ static constexpr detail::stride_adapter<Stride::GridBlock> grid_block_stride;
-#if defined(_CG_HAS_CLUSTER_GROUP)
-        __device__ static constexpr detail::stride_adapter<Stride::ClusterThread> cluster_thread_stride;
-        __device__ static constexpr detail::stride_adapter<Stride::ClusterBlock> cluster_block_stride;
-        __device__ static constexpr detail::stride_adapter<Stride::GridCluster> grid_cluster_stride;
-#endif
-#else
-        inline constexpr detail::stride_adapter<Stride::BlockThread> block_thread_stride;
-        inline constexpr detail::stride_adapter<Stride::GridThread> grid_thread_stride;
-        inline constexpr detail::stride_adapter<Stride::GridBlock> grid_block_stride;
-#if defined(_CG_HAS_CLUSTER_GROUP)
-        inline constexpr detail::stride_adapter<Stride::ClusterThread> cluster_thread_stride;
-        inline constexpr detail::stride_adapter<Stride::ClusterBlock> cluster_block_stride;
-        inline constexpr detail::stride_adapter<Stride::GridCluster> grid_cluster_stride;
-#endif
-#endif
-
 #ifdef GPU_CHECK_ERROR
         __device__ static constexpr detail::enumerate_adapter<Stride::BlockThread> block_thread_enumerate;
         __device__ static constexpr detail::enumerate_adapter<Stride::GridThread> grid_thread_enumerate;

From f97667d3354044f7cc6ebe9f5bfe1e2a79e2fceb Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Mon, 2 Mar 2026 12:07:09 +0900
Subject: [PATCH 06/19] =?UTF-8?q?=E2=9C=A8=20Create=20zip=5Fview=20from=20?=
 =?UTF-8?q?enumerate=5Fview?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 136 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 136 insertions(+)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index f432179..c626820 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -3011,6 +3011,142 @@ namespace gpu_array
         inline constexpr detail::enumerate_adapter<Stride::ClusterBlock> cluster_block_enumerate;
         inline constexpr detail::enumerate_adapter<Stride::GridCluster> grid_cluster_enumerate;
 #endif
+#endif
+    }  // namespace views
+
+    namespace detail
+    {
+        template <Stride StrideType, std::ranges::random_access_range Range>
+        requires std::is_lvalue_reference_v<Range&&>
+        class zip_iterator
+        {
+        public:
+            using iterator_category = std::forward_iterator_tag;
+            using value_type = std::ranges::range_value_t<Range>;
+            using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
+
+            zip_iterator() = default;
+            __host__ __device__ explicit zip_iterator(Range&& r) noexcept
+                : pointer_(&r), index_(stride_iterator<StrideType, Range>::get_initial_index())
+            {
+            }
+            __host__ __device__ std::ranges::range_size_t<Range> index() const noexcept { return index_; }
+            __host__ __device__ std::pair<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
+            operator*() const noexcept
+            {
+                return {index_, (*pointer_)[index_]};
+            }
+            __host__ __device__ zip_iterator& operator++() noexcept
+            {
+                index_ += stride_iterator<StrideType, Range>::get_stride();
+                return *this;
+            }
+            __host__ __device__ zip_iterator operator++(int) noexcept
+            {
+                auto res = *this;
+                ++(*this);
+                return res;
+            }
+            __host__ __device__ bool operator==(const zip_iterator& it) const noexcept { return index_ == it.index_; }
+
+        private:
+            std::remove_reference_t<Range>* pointer_ = nullptr;
+            std::ranges::range_size_t<Range> index_ = 0;
+        };
+
+        template <Stride StrideType, std::ranges::random_access_range Range>
+        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
+        class zip_sentinel
+        {
+        public:
+            zip_sentinel() = default;
+            __host__ __device__ explicit zip_sentinel(Range&& r) noexcept : end_(r.size()) {}
+            __host__ __device__ friend bool operator==(const zip_iterator<StrideType, Range>& it,
+                                                       const zip_sentinel& se) noexcept
+            {
+                return it.index() >= se.end_;
+            }
+
+        private:
+            std::ranges::range_size_t<Range> end_ = 0;
+        };
+
+        template <Stride StrideType, std::ranges::random_access_range Range>
+        requires std::is_lvalue_reference_v<Range&&>
+        class zip_view : public std::ranges::view_interface<zip_view<StrideType, Range>>
+        {
+        public:
+            zip_view() = default;
+            __host__ __device__ explicit zip_view(Range&& r) noexcept : pointer_(&r) {}
+            [[nodiscard]] __host__ __device__ auto begin() const noexcept
+            {
+                return zip_iterator<StrideType, Range>(*pointer_);
+            }
+            [[nodiscard]] __host__ __device__ auto end() const noexcept
+            {
+                return zip_sentinel<StrideType, Range>(*pointer_);
+            }
+
+        private:
+            std::remove_reference_t<Range>* pointer_ = nullptr;
+        };
+
+        template <Stride StrideType>
+        struct zip_adapter
+        {
+            template <std::ranges::random_access_range Range>
+            requires std::ranges::sized_range<Range>
+            [[nodiscard]] __host__ __device__ auto operator()(Range& r) const noexcept
+            {
+                return zip_view<StrideType, Range&>(r);
+            }
+
+            template <std::ranges::random_access_range Range>
+            requires std::ranges::sized_range<Range>
+            [[nodiscard]] __host__ __device__ friend std::ranges::view auto operator|(Range& range,
+                                                                                      const zip_adapter& self) noexcept
+            {
+                return self(range);
+            }
+        };
+    }  // namespace detail
+
+#if !defined(ENABLE_HIP)
+    template <std::ranges::random_access_range Range>
+    using block_thread_zip_view = detail::zip_view<detail::Stride::BlockThread, Range>;
+    template <std::ranges::random_access_range Range>
+    using grid_thread_zip_view = detail::zip_view<detail::Stride::GridThread, Range>;
+    template <std::ranges::random_access_range Range>
+    using grid_block_zip_view = detail::zip_view<detail::Stride::GridBlock, Range>;
+
+    template <std::ranges::random_access_range Range>
+    using cluster_thread_zip_view = detail::zip_view<detail::Stride::ClusterThread, Range>;
+    template <std::ranges::random_access_range Range>
+    using cluster_block_zip_view = detail::zip_view<detail::Stride::ClusterBlock, Range>;
+    template <std::ranges::random_access_range Range>
+    using grid_cluster_zip_view = detail::zip_view<detail::Stride::GridCluster, Range>;
+#endif
+
+    namespace views
+    {
+#ifdef GPU_CHECK_ERROR
+        __device__ static constexpr detail::zip_adapter<Stride::BlockThread> block_thread_zip;
+        __device__ static constexpr detail::zip_adapter<Stride::GridThread> grid_thread_zip;
+        __device__ static constexpr detail::zip_adapter<Stride::GridBlock> grid_block_zip;
+#if defined(_CG_HAS_CLUSTER_GROUP)
+        __device__ static constexpr detail::zip_adapter<Stride::ClusterThread> cluster_thread_zip;
+        __device__ static constexpr detail::zip_adapter<Stride::ClusterBlock> cluster_block_zip;
+        __device__ static constexpr detail::zip_adapter<Stride::GridCluster> grid_cluster_zip;
+#endif
+#else
+        inline constexpr detail::zip_adapter<Stride::BlockThread> block_thread_zip;
+        inline constexpr detail::zip_adapter<Stride::GridThread> grid_thread_zip;
+        inline constexpr detail::zip_adapter<Stride::GridBlock> grid_block_zip;
+#if defined(_CG_HAS_CLUSTER_GROUP)
+        inline constexpr detail::zip_adapter<Stride::ClusterThread> cluster_thread_zip;
+        inline constexpr detail::zip_adapter<Stride::ClusterBlock> cluster_block_zip;
+        inline constexpr detail::zip_adapter<Stride::GridCluster> grid_cluster_zip;
+#endif
 #endif
     }  // namespace views
 }  // namespace gpu_array

From ea84e03bf3e5b98a4615bbc7bbbe47c56b311df0 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Mon, 2 Mar 2026 14:07:05 +0900
Subject: [PATCH 07/19] =?UTF-8?q?=E2=9C=A8=20Implement=20zip=5Fview=20logi?=
 =?UTF-8?q?c?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 103 ++++++++++++++++++++++--------------------
 test/test.cpp         |  50 +++++++++++++++++++-
 2 files changed, 102 insertions(+), 51 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index c626820..caa6ab1 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -3016,29 +3016,38 @@ namespace gpu_array
 
     namespace detail
     {
-        template <Stride StrideType, std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&>
+        template <class... Ranges>
+        using first_t = std::tuple_element_t<0, std::tuple<Ranges...>>;
+
+        template <Stride StrideType, std::ranges::random_access_range... Ranges>
+        requires (std::is_lvalue_reference_v<Ranges &&> && ...)
         class zip_iterator
         {
         public:
             using iterator_category = std::forward_iterator_tag;
-            using value_type = std::ranges::range_value_t<Range>;
-            using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
+            using value_type = std::tuple<std::ranges::range_value_t<Ranges>...>;
+            using difference_type = std::common_type_t<std::make_signed_t<std::ranges::range_size_t<Ranges>>...>;
 
             zip_iterator() = default;
-            __host__ __device__ explicit zip_iterator(Range&& r) noexcept
-                : pointer_(&r), index_(stride_iterator<StrideType, Range>::get_initial_index())
+            __host__ __device__ explicit zip_iterator(Ranges&&... rs) noexcept
+                : pointers_(&rs...), index_(stride_iterator<StrideType, first_t<Ranges...>>::get_initial_index())
             {
             }
-            __host__ __device__ std::ranges::range_size_t<Range> index() const noexcept { return index_; }
-            __host__ __device__ std::pair<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
-            operator*() const noexcept
+            __host__ __device__ std::common_type_t<std::ranges::range_size_t<Ranges>...> index() const noexcept
             {
-                return {index_, (*pointer_)[index_]};
+                return index_;
+            }
+            __host__ __device__ auto operator*() const noexcept
+            {
+                return std::apply(
+                    [this](auto&... pointers) {
+                        return std::tuple<std::ranges::range_reference_t<Ranges>...>((*pointers)[index_]...);
+                    },
+                    pointers_);
             }
             __host__ __device__ zip_iterator& operator++() noexcept
             {
-                index_ += stride_iterator<StrideType, Range>::get_stride();
+                index_ += stride_iterator<StrideType, first_t<Ranges...>>::get_stride();
                 return *this;
             }
             __host__ __device__ zip_iterator operator++(int) noexcept
@@ -3050,81 +3059,75 @@ namespace gpu_array
             __host__ __device__ bool operator==(const zip_iterator& it) const noexcept { return index_ == it.index_; }
 
         private:
-            std::remove_reference_t<Range>* pointer_ = nullptr;
-            std::ranges::range_size_t<Range> index_ = 0;
+            std::tuple<std::remove_reference_t<Ranges>*...> pointers_{};
+            std::common_type_t<std::ranges::range_size_t<Ranges>...> index_ = 0;
         };
 
-        template <Stride StrideType, std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
+        template <Stride StrideType, std::ranges::random_access_range... Ranges>
+        requires (std::is_lvalue_reference_v<Ranges &&> && ...) && (std::ranges::sized_range<Ranges> && ...)
         class zip_sentinel
         {
         public:
             zip_sentinel() = default;
-            __host__ __device__ explicit zip_sentinel(Range&& r) noexcept : end_(r.size()) {}
-            __host__ __device__ friend bool operator==(const zip_iterator<StrideType, Range>& it,
+            __host__ __device__ explicit zip_sentinel(Ranges&&... rs) noexcept : end_(std::min({rs.size()...})) {}
+            __host__ __device__ friend bool operator==(const zip_iterator<StrideType, Ranges...>& it,
                                                        const zip_sentinel& se) noexcept
             {
                 return it.index() >= se.end_;
             }
 
         private:
-            std::ranges::range_size_t<Range> end_ = 0;
+            std::common_type_t<std::ranges::range_size_t<Ranges>...> end_ = 0;
         };
 
-        template <Stride StrideType, std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&>
-        class zip_view : public std::ranges::view_interface<zip_view<StrideType, Range>>
+        template <Stride StrideType, std::ranges::random_access_range... Ranges>
+        requires (std::is_lvalue_reference_v<Ranges &&> && ...)
+        class zip_view : public std::ranges::view_interface<zip_view<StrideType, Ranges...>>
         {
         public:
             zip_view() = default;
-            __host__ __device__ explicit zip_view(Range&& r) noexcept : pointer_(&r) {}
+            __host__ __device__ explicit zip_view(Ranges&&... rs) noexcept : pointers_(&rs...) {}
             [[nodiscard]] __host__ __device__ auto begin() const noexcept
             {
-                return zip_iterator<StrideType, Range>(*pointer_);
+                return std::apply(
+                    [this](auto&... pointers) { return zip_iterator<StrideType, Ranges...>(*pointers...); }, pointers_);
             }
             [[nodiscard]] __host__ __device__ auto end() const noexcept
             {
-                return zip_sentinel<StrideType, Range>(*pointer_);
+                return std::apply(
+                    [this](auto&... pointers) { return zip_sentinel<StrideType, Ranges...>(*pointers...); }, pointers_);
             }
 
         private:
-            std::remove_reference_t<Range>* pointer_ = nullptr;
+            std::tuple<std::remove_reference_t<Ranges>*...> pointers_{};
         };
 
         template <Stride StrideType>
         struct zip_adapter
         {
-            template <std::ranges::random_access_range Range>
-            requires std::ranges::sized_range<Range>
-            [[nodiscard]] __host__ __device__ auto operator()(Range& r) const noexcept
-            {
-                return zip_view<StrideType, Range&>(r);
-            }
-
-            template <std::ranges::random_access_range Range>
-            requires std::ranges::sized_range<Range>
-            [[nodiscard]] __host__ __device__ friend std::ranges::view auto operator|(Range& range,
-                                                                                      const zip_adapter& self) noexcept
+            template <std::ranges::random_access_range... Ranges>
+            requires (std::ranges::sized_range<Ranges> && ...)
+            [[nodiscard]] __host__ __device__ auto operator()(Ranges&... rs) const noexcept
             {
-                return self(range);
+                return zip_view<StrideType, Ranges&...>(rs...);
             }
         };
     }  // namespace detail
 
 #if !defined(ENABLE_HIP)
-    template <std::ranges::random_access_range Range>
-    using block_thread_zip_view = detail::zip_view<detail::Stride::BlockThread, Range>;
-    template <std::ranges::random_access_range Range>
-    using grid_thread_zip_view = detail::zip_view<detail::Stride::GridThread, Range>;
-    template <std::ranges::random_access_range Range>
-    using grid_block_zip_view = detail::zip_view<detail::Stride::GridBlock, Range>;
-
-    template <std::ranges::random_access_range Range>
-    using cluster_thread_zip_view = detail::zip_view<detail::Stride::ClusterThread, Range>;
-    template <std::ranges::random_access_range Range>
-    using cluster_block_zip_view = detail::zip_view<detail::Stride::ClusterBlock, Range>;
-    template <std::ranges::random_access_range Range>
-    using grid_cluster_zip_view = detail::zip_view<detail::Stride::GridCluster, Range>;
+    template <std::ranges::random_access_range Ranges>
+    using block_thread_zip_view = detail::zip_view<detail::Stride::BlockThread, Ranges>;
+    template <std::ranges::random_access_range Ranges>
+    using grid_thread_zip_view = detail::zip_view<detail::Stride::GridThread, Ranges>;
+    template <std::ranges::random_access_range Ranges>
+    using grid_block_zip_view = detail::zip_view<detail::Stride::GridBlock, Ranges>;
+
+    template <std::ranges::random_access_range Ranges>
+    using cluster_thread_zip_view = detail::zip_view<detail::Stride::ClusterThread, Ranges>;
+    template <std::ranges::random_access_range Ranges>
+    using cluster_block_zip_view = detail::zip_view<detail::Stride::ClusterBlock, Ranges>;
+    template <std::ranges::random_access_range Ranges>
+    using grid_cluster_zip_view = detail::zip_view<detail::Stride::GridCluster, Ranges>;
 #endif
 
     namespace views
diff --git a/test/test.cpp b/test/test.cpp
index 91e0736..22e4f7e 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2130,7 +2130,7 @@ __global__ void kernel_enumerate(T array)
         for (auto&& [j, x] : block_thread_enumerate_view(xs)) x = i * 100 + j;
 }
 
-TEST(EnumerateView, HowToUse)
+TEST(EnumerateView, Simple)
 {
     auto vec_vec = std::vector(32, std::vector<int>(64, 0));
     auto nested_array = managed_array(vec_vec);
@@ -2147,5 +2147,53 @@ TEST(EnumerateView, HowToUse)
         ++i;
     }
 }
+
+template <std::ranges::input_range T>
+requires std::ranges::input_range<std::ranges::range_value_t<T>>
+__global__ void zip_test_init(T array, int coeff)
+{
+    for (auto&& [i, xs] : grid_block_enumerate_view(array))
+        for (auto&& [j, x] : block_thread_enumerate_view(xs)) x = (i * xs.size() + j) * coeff;
+}
+
+template <std::ranges::input_range T, std::ranges::input_range U>
+requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
+         std::ranges::input_range<std::ranges::range_value_t<U>>
+__global__ void kernel_zip(T array1, const U array2)
+{
+    for (auto&& [xs, ys] : detail::zip_adapter<detail::Stride::GridBlock>{}(array1, array2))
+        for (auto&& [x, y] : detail::zip_adapter<detail::Stride::BlockThread>{}(xs, ys)) x = x + y;
+}
+
+TEST(ZipView, Simple)
+{
+    auto vec_vec = std::vector(10, std::vector<int>(20, 0));
+    auto array1 = managed_array(vec_vec);
+    auto array2 = managed_array(vec_vec);
+    zip_test_init<<<10, 20>>>(array1, 1);
+    zip_test_init<<<10, 20>>>(array2, 1000);
+    api::gpuDeviceSynchronize();
+    for (int i = 0; const auto& xs : array1)
+    {
+        for (int j = 0; const auto& x : xs)
+        {
+            EXPECT_EQ(x, i * 20 + j);
+            ++j;
+        }
+        ++i;
+    }
+
+    kernel_zip<<<10, 20>>>(array1, array2);
+    api::gpuDeviceSynchronize();
+    for (int i = 0; const auto& xs : array1)
+    {
+        for (int j = 0; const auto& x : xs)
+        {
+            EXPECT_EQ(x, (i * 20 + j) * 1001);
+            ++j;
+        }
+        ++i;
+    }
+}
 #endif
 // NOLINTEND

From 742725866226d2f8aa2a7411287dd09532b0c55e Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Mon, 2 Mar 2026 14:37:39 +0900
Subject: [PATCH 08/19] =?UTF-8?q?=E2=9C=A8=20Add=20block=5Fthread=5Fzip=5F?=
 =?UTF-8?q?view,=20etc.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 32 ++++++++++++++++++--------------
 test/test.cpp         | 29 ++++++++++++++++++++++++++---
 2 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index caa6ab1..b15d252 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -3114,20 +3114,24 @@ namespace gpu_array
         };
     }  // namespace detail
 
-#if !defined(ENABLE_HIP)
-    template <std::ranges::random_access_range Ranges>
-    using block_thread_zip_view = detail::zip_view<detail::Stride::BlockThread, Ranges>;
-    template <std::ranges::random_access_range Ranges>
-    using grid_thread_zip_view = detail::zip_view<detail::Stride::GridThread, Ranges>;
-    template <std::ranges::random_access_range Ranges>
-    using grid_block_zip_view = detail::zip_view<detail::Stride::GridBlock, Ranges>;
-
-    template <std::ranges::random_access_range Ranges>
-    using cluster_thread_zip_view = detail::zip_view<detail::Stride::ClusterThread, Ranges>;
-    template <std::ranges::random_access_range Ranges>
-    using cluster_block_zip_view = detail::zip_view<detail::Stride::ClusterBlock, Ranges>;
-    template <std::ranges::random_access_range Ranges>
-    using grid_cluster_zip_view = detail::zip_view<detail::Stride::GridCluster, Ranges>;
+#ifdef GPU_CHECK_ERROR
+    __device__ static constexpr detail::zip_adapter<detail::Stride::BlockThread> block_thread_zip_view;
+    __device__ static constexpr detail::zip_adapter<detail::Stride::GridThread> grid_thread_zip_view;
+    __device__ static constexpr detail::zip_adapter<detail::Stride::GridBlock> grid_block_zip_view;
+#if defined(_CG_HAS_CLUSTER_GROUP)
+    __device__ static constexpr detail::zip_adapter<detail::Stride::ClusterThread> cluster_thread_zip_view;
+    __device__ static constexpr detail::zip_adapter<detail::Stride::ClusterBlock> cluster_block_zip_view;
+    __device__ static constexpr detail::zip_adapter<detail::Stride::GridCluster> grid_cluster_zip_view;
+#endif
+#else
+    inline constexpr detail::zip_adapter<Stride::BlockThread> block_thread_zip_view;
+    inline constexpr detail::zip_adapter<Stride::GridThread> grid_thread_zip_view;
+    inline constexpr detail::zip_adapter<Stride::GridBlock> grid_block_zip_view;
+#if defined(_CG_HAS_CLUSTER_GROUP)
+    inline constexpr detail::zip_adapter<Stride::ClusterThread> cluster_thread_zip_view;
+    inline constexpr detail::zip_adapter<Stride::ClusterBlock> cluster_block_zip_view;
+    inline constexpr detail::zip_adapter<Stride::GridCluster> grid_cluster_zip_view;
+#endif
 #endif
 
     namespace views
diff --git a/test/test.cpp b/test/test.cpp
index 22e4f7e..3b071cd 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2161,8 +2161,17 @@ requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
          std::ranges::input_range<std::ranges::range_value_t<U>>
 __global__ void kernel_zip(T array1, const U array2)
 {
-    for (auto&& [xs, ys] : detail::zip_adapter<detail::Stride::GridBlock>{}(array1, array2))
-        for (auto&& [x, y] : detail::zip_adapter<detail::Stride::BlockThread>{}(xs, ys)) x = x + y;
+    for (auto&& [xs, ys] : views::grid_block_zip(array1, array2))
+        for (auto&& [x, y] : views::block_thread_zip(xs, ys)) x = x + y;
+}
+
+template <std::ranges::input_range T, std::ranges::input_range U>
+requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
+         std::ranges::input_range<std::ranges::range_value_t<U>>
+__global__ void kernel_zip2(T array1, const U array2)
+{
+    for (auto&& [xs, ys] : grid_block_zip_view(array1, array2))
+        for (auto&& [x, y] : block_thread_zip_view(xs, ys)) x = x + y;
 }
 
 TEST(ZipView, Simple)
@@ -2171,7 +2180,6 @@ TEST(ZipView, Simple)
     auto array1 = managed_array(vec_vec);
     auto array2 = managed_array(vec_vec);
     zip_test_init<<<10, 20>>>(array1, 1);
-    zip_test_init<<<10, 20>>>(array2, 1000);
     api::gpuDeviceSynchronize();
     for (int i = 0; const auto& xs : array1)
     {
@@ -2183,6 +2191,7 @@ TEST(ZipView, Simple)
         ++i;
     }
 
+    zip_test_init<<<10, 20>>>(array2, 1000);
     kernel_zip<<<10, 20>>>(array1, array2);
     api::gpuDeviceSynchronize();
     for (int i = 0; const auto& xs : array1)
@@ -2194,6 +2203,20 @@ TEST(ZipView, Simple)
         }
         ++i;
     }
+
+    zip_test_init<<<10, 20>>>(array1, 1);
+    zip_test_init<<<10, 20>>>(array2, 2000);
+    kernel_zip2<<<10, 20>>>(array1, array2);
+    api::gpuDeviceSynchronize();
+    for (int i = 0; const auto& xs : array1)
+    {
+        for (int j = 0; const auto& x : xs)
+        {
+            EXPECT_EQ(x, (i * 20 + j) * 2001);
+            ++j;
+        }
+        ++i;
+    }
 }
 #endif
 // NOLINTEND

From e94b9870d51b11cd748b05c6eb24aa82aa2cf0d7 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Mon, 2 Mar 2026 17:32:53 +0900
Subject: [PATCH 09/19] =?UTF-8?q?=F0=9F=90=9B=20Make=20stride=5Fview=20sat?=
 =?UTF-8?q?isfy=20forward=5Frange?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 1 +
 test/test.cpp         | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index b15d252..6182790 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -2775,6 +2775,7 @@ namespace gpu_array
             using value_type = std::ranges::range_value_t<Range>;
             using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
 
+            stride_iterator() = default;
             __host__ __device__ explicit stride_iterator(Range&& r) noexcept
                 : base(std::forward<Range>(r), get_initial_index())
             {
diff --git a/test/test.cpp b/test/test.cpp
index 3b071cd..c2494ba 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2071,6 +2071,8 @@ TEST(JaggedArray, MemoryManagement)
 }
 
 #if !defined(ENABLE_HIP)
+static_assert(std::ranges::forward_range<detail::stride_view<detail::Stride::GridBlock, managed_array<int>&>>);
+
 template <std::ranges::input_range T>
 requires std::ranges::input_range<std::ranges::range_value_t<T>>
 __global__ void kernel_stride(T array)

From 9cb3387b785d2270e83b3117247eeea9358f2e26 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Mon, 2 Mar 2026 17:52:59 +0900
Subject: [PATCH 10/19] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Refactor=20enumerate?=
 =?UTF-8?q?=5Fiterator=20and=20enumerate=5Fview?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 190 +++++++++++++++++++++++++++++++-----------
 1 file changed, 142 insertions(+), 48 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index 6182790..5ce8f0b 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -2878,20 +2878,17 @@ namespace gpu_array
 
     namespace detail
     {
-        template <Stride StrideType, std::ranges::random_access_range Range>
+        template <std::ranges::random_access_range Range>
         requires std::is_lvalue_reference_v<Range&&>
         class enumerate_iterator
         {
         public:
-            using iterator_category = std::forward_iterator_tag;
+            using iterator_category = std::random_access_iterator_tag;
             using value_type = std::ranges::range_value_t<Range>;
             using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
 
             enumerate_iterator() = default;
-            __host__ __device__ explicit enumerate_iterator(Range&& r) noexcept
-                : pointer_(&r), index_(stride_iterator<StrideType, Range>::get_initial_index())
-            {
-            }
+            __host__ __device__ explicit enumerate_iterator(Range&& r) noexcept : pointer_(&r), index_(0) {}
             __host__ __device__ std::ranges::range_size_t<Range> index() const noexcept { return index_; }
             __host__ __device__ std::pair<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
             operator*() const noexcept
@@ -2900,7 +2897,7 @@ namespace gpu_array
             }
             __host__ __device__ enumerate_iterator& operator++() noexcept
             {
-                index_ += stride_iterator<StrideType, Range>::get_stride();
+                ++index_;
                 return *this;
             }
             __host__ __device__ enumerate_iterator operator++(int) noexcept
@@ -2909,61 +2906,145 @@ namespace gpu_array
                 ++(*this);
                 return res;
             }
-            __host__ __device__ bool operator==(const enumerate_iterator& it) const noexcept
+            __host__ __device__ enumerate_iterator& operator--() noexcept
+            {
+                --index_;
+                return *this;
+            }
+            __host__ __device__ enumerate_iterator operator--(int) noexcept
+            {
+                auto res = *this;
+                --(*this);
+                return res;
+            }
+            __host__ __device__ enumerate_iterator& operator+=(difference_type n)
+            {
+                index_ += n;
+                return *this;
+            }
+            __host__ __device__ enumerate_iterator& operator-=(difference_type n)
+            {
+                index_ -= n;
+                return *this;
+            }
+            __host__ __device__ std::pair<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
+            operator[](difference_type n) const
+            {
+                return *(*this + n);
+            }
+
+            __host__ __device__ bool operator==(const enumerate_iterator& it) const& noexcept
             {
                 return index_ == it.index_;
             }
+            __host__ __device__ bool operator<(const enumerate_iterator& it) const& noexcept
+            {
+                return index_ < it.index_;
+            }
+            __host__ __device__ bool operator>(const enumerate_iterator& it) const& noexcept
+            {
+                return index_ > it.index_;
+            }
+            __host__ __device__ bool operator<=(const enumerate_iterator& it) const& noexcept
+            {
+                return index_ <= it.index_;
+            }
+            __host__ __device__ bool operator>=(const enumerate_iterator& it) const& noexcept
+            {
+                return index_ >= it.index_;
+            }
+
+            __host__ __device__ friend enumerate_iterator operator+(enumerate_iterator x, difference_type n)
+            {
+                x += n;
+                return x;
+            }
+            __host__ __device__ friend enumerate_iterator operator+(difference_type n, enumerate_iterator x)
+            {
+                x += n;
+                return x;
+            }
+            __host__ __device__ friend enumerate_iterator operator-(enumerate_iterator x, difference_type n)
+            {
+                x -= n;
+                return x;
+            }
+            __host__ __device__ friend difference_type operator-(const enumerate_iterator& x,
+                                                                 const enumerate_iterator& y)
+            {
+                return x.index() - y.index();
+            }
+
+            __host__ __device__ friend std::pair<std::ranges::range_size_t<Range>,
+                                                 std::ranges::range_rvalue_reference_t<Range>>
+            iter_move(const enumerate_iterator& x)
+            {
+                return {x.index(), std::move(x->second)};
+            }
 
         private:
             std::remove_reference_t<Range>* pointer_ = nullptr;
             std::ranges::range_size_t<Range> index_ = 0;
         };
 
-        template <Stride StrideType, std::ranges::random_access_range Range>
+        template <std::ranges::random_access_range Range>
         requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
         class enumerate_sentinel
         {
+            using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
+
         public:
             enumerate_sentinel() = default;
             __host__ __device__ explicit enumerate_sentinel(Range&& r) noexcept : end_(r.size()) {}
-            __host__ __device__ friend bool operator==(const enumerate_iterator<StrideType, Range>& it,
+            __host__ __device__ friend bool operator==(const enumerate_iterator<Range>& it,
                                                        const enumerate_sentinel& se) noexcept
             {
                 return it.index() >= se.end_;
             }
 
+            __host__ __device__ friend difference_type operator-(const enumerate_iterator<Range>& it,
+                                                                 const enumerate_sentinel& se) noexcept
+            {
+                return it.index() - se.end_;
+            }
+            __host__ __device__ friend difference_type operator-(const enumerate_sentinel& se,
+                                                                 const enumerate_iterator<Range>& it) noexcept
+            {
+                return se.end_ - it.index();
+            }
+
         private:
             std::ranges::range_size_t<Range> end_ = 0;
         };
 
-        template <Stride StrideType, std::ranges::random_access_range Range>
+        template <std::ranges::random_access_range Range>
         requires std::is_lvalue_reference_v<Range&&>
-        class enumerate_view : public std::ranges::view_interface<enumerate_view<StrideType, Range>>
+        class enumerate_view : public std::ranges::view_interface<enumerate_view<Range>>
         {
         public:
             enumerate_view() = default;
             __host__ __device__ explicit enumerate_view(Range&& r) noexcept : pointer_(&r) {}
             [[nodiscard]] __host__ __device__ auto begin() const noexcept
             {
-                return enumerate_iterator<StrideType, Range>(*pointer_);
-            }
-            [[nodiscard]] __host__ __device__ auto end() const noexcept
-            {
-                return enumerate_sentinel<StrideType, Range>(*pointer_);
+                return enumerate_iterator<Range>(*pointer_);
             }
+            [[nodiscard]] __host__ __device__ auto end() const noexcept { return enumerate_sentinel<Range>(*pointer_); }
+            [[nodiscard]] __host__ __device__ auto size() const noexcept { return pointer_->size(); }
 
         private:
             std::remove_reference_t<Range>* pointer_ = nullptr;
         };
 
-        template <Stride StrideType>
+        template <class Range>
+        enumerate_view(Range&) -> enumerate_view<Range&>;
+
         struct enumerate_adapter
         {
             template <std::ranges::random_access_range Range>
             requires std::ranges::sized_range<Range>
             [[nodiscard]] __host__ __device__ auto operator()(Range& r) const noexcept
             {
-                return enumerate_view<StrideType, Range&>(r);
+                return enumerate_view<Range&>(r);
             }
 
             template <std::ranges::random_access_range Range>
@@ -2977,44 +3058,57 @@ namespace gpu_array
     }  // namespace detail
 
 #if !defined(ENABLE_HIP)
-    template <std::ranges::random_access_range Range>
-    using block_thread_enumerate_view = detail::enumerate_view<detail::Stride::BlockThread, Range>;
-    template <std::ranges::random_access_range Range>
-    using grid_thread_enumerate_view = detail::enumerate_view<detail::Stride::GridThread, Range>;
-    template <std::ranges::random_access_range Range>
-    using grid_block_enumerate_view = detail::enumerate_view<detail::Stride::GridBlock, Range>;
-
-    template <std::ranges::random_access_range Range>
-    using cluster_thread_enumerate_view = detail::enumerate_view<detail::Stride::ClusterThread, Range>;
-    template <std::ranges::random_access_range Range>
-    using cluster_block_enumerate_view = detail::enumerate_view<detail::Stride::ClusterBlock, Range>;
-    template <std::ranges::random_access_range Range>
-    using grid_cluster_enumerate_view = detail::enumerate_view<detail::Stride::GridCluster, Range>;
+    using detail::enumerate_view;
 #endif
 
     namespace views
     {
 #ifdef GPU_CHECK_ERROR
-        __device__ static constexpr detail::enumerate_adapter<Stride::BlockThread> block_thread_enumerate;
-        __device__ static constexpr detail::enumerate_adapter<Stride::GridThread> grid_thread_enumerate;
-        __device__ static constexpr detail::enumerate_adapter<Stride::GridBlock> grid_block_enumerate;
-#if defined(_CG_HAS_CLUSTER_GROUP)
-        __device__ static constexpr detail::enumerate_adapter<Stride::ClusterThread> cluster_thread_enumerate;
-        __device__ static constexpr detail::enumerate_adapter<Stride::ClusterBlock> cluster_block_enumerate;
-        __device__ static constexpr detail::enumerate_adapter<Stride::GridCluster> grid_cluster_enumerate;
-#endif
+        __device__ static constexpr detail::enumerate_adapter enumerate;
 #else
-        inline constexpr detail::enumerate_adapter<Stride::BlockThread> block_thread_enumerate;
-        inline constexpr detail::enumerate_adapter<Stride::GridThread> grid_thread_enumerate;
-        inline constexpr detail::enumerate_adapter<Stride::GridBlock> grid_block_enumerate;
-#if defined(_CG_HAS_CLUSTER_GROUP)
-        inline constexpr detail::enumerate_adapter<Stride::ClusterThread> cluster_thread_enumerate;
-        inline constexpr detail::enumerate_adapter<Stride::ClusterBlock> cluster_block_enumerate;
-        inline constexpr detail::enumerate_adapter<Stride::GridCluster> grid_cluster_enumerate;
-#endif
+        inline constexpr detail::enumerate_adapter enumerate;
 #endif
     }  // namespace views
 
+    // #if !defined(ENABLE_HIP)
+    //     template <std::ranges::random_access_range Range>
+    //     using block_thread_enumerate_view = detail::enumerate_view<detail::Stride::BlockThread, Range>;
+    //     template <std::ranges::random_access_range Range>
+    //     using grid_thread_enumerate_view = detail::enumerate_view<detail::Stride::GridThread, Range>;
+    //     template <std::ranges::random_access_range Range>
+    //     using grid_block_enumerate_view = detail::enumerate_view<detail::Stride::GridBlock, Range>;
+
+    //     template <std::ranges::random_access_range Range>
+    //     using cluster_thread_enumerate_view = detail::enumerate_view<detail::Stride::ClusterThread, Range>;
+    //     template <std::ranges::random_access_range Range>
+    //     using cluster_block_enumerate_view = detail::enumerate_view<detail::Stride::ClusterBlock, Range>;
+    //     template <std::ranges::random_access_range Range>
+    //     using grid_cluster_enumerate_view = detail::enumerate_view<detail::Stride::GridCluster, Range>;
+    // #endif
+
+    //     namespace views
+    //     {
+    // #ifdef GPU_CHECK_ERROR
+    //         __device__ static constexpr detail::enumerate_adapter<Stride::BlockThread> block_thread_enumerate;
+    //         __device__ static constexpr detail::enumerate_adapter<Stride::GridThread> grid_thread_enumerate;
+    //         __device__ static constexpr detail::enumerate_adapter<Stride::GridBlock> grid_block_enumerate;
+    // #if defined(_CG_HAS_CLUSTER_GROUP)
+    //         __device__ static constexpr detail::enumerate_adapter<Stride::ClusterThread> cluster_thread_enumerate;
+    //         __device__ static constexpr detail::enumerate_adapter<Stride::ClusterBlock> cluster_block_enumerate;
+    //         __device__ static constexpr detail::enumerate_adapter<Stride::GridCluster> grid_cluster_enumerate;
+    // #endif
+    // #else
+    //         inline constexpr detail::enumerate_adapter<Stride::BlockThread> block_thread_enumerate;
+    //         inline constexpr detail::enumerate_adapter<Stride::GridThread> grid_thread_enumerate;
+    //         inline constexpr detail::enumerate_adapter<Stride::GridBlock> grid_block_enumerate;
+    // #if defined(_CG_HAS_CLUSTER_GROUP)
+    //         inline constexpr detail::enumerate_adapter<Stride::ClusterThread> cluster_thread_enumerate;
+    //         inline constexpr detail::enumerate_adapter<Stride::ClusterBlock> cluster_block_enumerate;
+    //         inline constexpr detail::enumerate_adapter<Stride::GridCluster> grid_cluster_enumerate;
+    // #endif
+    // #endif
+    //     }  // namespace views
+
     namespace detail
     {
         template <class... Ranges>

From 77ee080020f6b01171f71504394854d484c241fb Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Mon, 2 Mar 2026 19:05:38 +0900
Subject: [PATCH 11/19] =?UTF-8?q?=E2=9C=A8=20Make=20stride=5Fview=20satisf?=
 =?UTF-8?q?y=20ranges::view?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp |  52 +++++++++------
 test/test.cpp         | 147 +++++++++++++++++++++---------------------
 2 files changed, 105 insertions(+), 94 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index 5ce8f0b..a0fad72 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -2618,11 +2618,11 @@ namespace gpu_array
         };
 
         template <std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
+        requires std::ranges::view<Range> && std::ranges::sized_range<Range>
         class stride_sentinel;
 
         template <std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
+        requires std::ranges::view<Range> && std::ranges::sized_range<Range>
         class stride_iterator_base
         {
             template <typename T>
@@ -2637,18 +2637,17 @@ namespace gpu_array
             }
 
         protected:
-            __host__ __device__ explicit stride_iterator_base(Range&& r,
-                                                              std::ranges::range_size_t<Range> index) noexcept
+            __host__ __device__ explicit stride_iterator_base(Range& r, std::ranges::range_size_t<Range> index) noexcept
                 : pointer_(&r), index_(index)
             {
             }
 
-            std::remove_reference_t<Range>* pointer_ = nullptr;
+            Range* pointer_ = nullptr;
             std::ranges::range_size_t<Range> index_ = 0;
         };
 
         template <std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
+        requires std::ranges::view<Range> && std::ranges::sized_range<Range>
         class stride_sentinel
         {
             template <typename T>
@@ -2657,7 +2656,7 @@ namespace gpu_array
 
         public:
             stride_sentinel() = default;
-            __host__ __device__ explicit stride_sentinel(Range&& r) noexcept : end_(r.size()) {}
+            __host__ __device__ explicit stride_sentinel(const Range& r) noexcept : end_(r.size()) {}
 
         protected:
             std::ranges::range_size_t<Range> end_ = 0;
@@ -2671,7 +2670,7 @@ namespace gpu_array
         }
 
         template <Stride StrideType, std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&>
+        requires std::ranges::view<Range>
         class stride_iterator : public stride_iterator_base<Range>
         {
             using base = stride_iterator_base<Range>;
@@ -2776,10 +2775,7 @@ namespace gpu_array
             using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
 
             stride_iterator() = default;
-            __host__ __device__ explicit stride_iterator(Range&& r) noexcept
-                : base(std::forward<Range>(r), get_initial_index())
-            {
-            }
+            __host__ __device__ explicit stride_iterator(Range& r) noexcept : base(r, get_initial_index()) {}
             __host__ __device__ stride_iterator& operator++() noexcept
             {
                 base::index_ += get_stride();
@@ -2798,20 +2794,30 @@ namespace gpu_array
         };
 
         template <Stride StrideType, std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&>
+        requires std::ranges::view<Range>
         class stride_view : public std::ranges::view_interface<stride_view<StrideType, Range>>
         {
         public:
             stride_view() = default;
-            __host__ __device__ explicit stride_view(Range&& r) noexcept : pointer_(&r) {}
+            __host__ __device__ explicit stride_view(Range r) noexcept : range_(r) {}
+            [[nodiscard]] __host__ __device__ auto begin() noexcept
+            {
+                return stride_iterator<StrideType, Range>(range_);
+            }
             [[nodiscard]] __host__ __device__ auto begin() const noexcept
+            requires std::is_const_v<Range>
             {
-                return stride_iterator<StrideType, Range>(*pointer_);
+                return stride_iterator<StrideType, Range>(range_);
+            }
+            [[nodiscard]] __host__ __device__ auto end() noexcept { return stride_sentinel<Range>(range_); }
+            [[nodiscard]] __host__ __device__ auto end() const noexcept
+            requires std::is_const_v<Range>
+            {
+                return stride_sentinel<Range>(range_);
             }
-            [[nodiscard]] __host__ __device__ auto end() const noexcept { return stride_sentinel<Range>(*pointer_); }
 
         private:
-            std::remove_reference_t<Range>* pointer_ = nullptr;
+            Range range_{};
         };
 
         template <Stride StrideType>
@@ -2819,17 +2825,17 @@ namespace gpu_array
         {
             template <std::ranges::random_access_range Range>
             requires std::ranges::sized_range<Range>
-            [[nodiscard]] constexpr auto operator()(Range& r) const noexcept
+            [[nodiscard]] constexpr auto operator()(Range&& r) const noexcept
             {
-                return stride_view<StrideType, Range&>(r);
+                return stride_view<StrideType, std::remove_reference_t<Range>>(std::forward<Range>(r));
             }
 
             template <std::ranges::random_access_range Range>
             requires std::ranges::sized_range<Range>
-            [[nodiscard]] friend constexpr std::ranges::view auto operator|(Range& range,
+            [[nodiscard]] friend constexpr std::ranges::view auto operator|(Range&& r,
                                                                             const stride_adapter& self) noexcept
             {
-                return self(range);
+                return self(std::forward<Range>(r));
             }
         };
     }  // namespace detail
@@ -3278,6 +3284,10 @@ inline constexpr bool std::ranges::enable_borrowed_range<gpu_array::jagged_array
 #endif
 template <typename... Ts>
 inline constexpr bool std::ranges::enable_borrowed_range<gpu_array::detail::subrange<Ts...>> = true;
+template <gpu_array::detail::Stride StrideType, std::ranges::random_access_range Range>
+inline constexpr bool std::ranges::enable_view<gpu_array::detail::stride_view<StrideType, Range>> = true;
+template <std::ranges::random_access_range Range>
+inline constexpr bool std::ranges::enable_view<gpu_array::enumerate_view<Range>> = true;
 
 #undef SIGSEGV_DEPRECATED
 #undef INCR_GPU_MEMORY_USAGE
diff --git a/test/test.cpp b/test/test.cpp
index c2494ba..a109597 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2071,7 +2071,8 @@ TEST(JaggedArray, MemoryManagement)
 }
 
 #if !defined(ENABLE_HIP)
-static_assert(std::ranges::forward_range<detail::stride_view<detail::Stride::GridBlock, managed_array<int>&>>);
+static_assert(std::ranges::forward_range<detail::stride_view<detail::Stride::GridBlock, managed_array<int>>>);
+static_assert(std::ranges::view<detail::stride_view<detail::Stride::GridBlock, managed_array<int>>>);
 
 template <std::ranges::input_range T>
 requires std::ranges::input_range<std::ranges::range_value_t<T>>
@@ -2128,8 +2129,8 @@ template <std::ranges::input_range T>
 requires std::ranges::input_range<std::ranges::range_value_t<T>>
 __global__ void kernel_enumerate(T array)
 {
-    for (auto&& [i, xs] : grid_block_enumerate_view(array))
-        for (auto&& [j, x] : block_thread_enumerate_view(xs)) x = i * 100 + j;
+    for (auto&& [i, xs] : enumerate_view(array) | views::grid_block_stride)
+        for (auto&& [j, x] : enumerate_view(xs) | views::block_thread_stride) x = i * 100 + j;
 }
 
 TEST(EnumerateView, Simple)
@@ -2150,75 +2151,75 @@ TEST(EnumerateView, Simple)
     }
 }
 
-template <std::ranges::input_range T>
-requires std::ranges::input_range<std::ranges::range_value_t<T>>
-__global__ void zip_test_init(T array, int coeff)
-{
-    for (auto&& [i, xs] : grid_block_enumerate_view(array))
-        for (auto&& [j, x] : block_thread_enumerate_view(xs)) x = (i * xs.size() + j) * coeff;
-}
-
-template <std::ranges::input_range T, std::ranges::input_range U>
-requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
-         std::ranges::input_range<std::ranges::range_value_t<U>>
-__global__ void kernel_zip(T array1, const U array2)
-{
-    for (auto&& [xs, ys] : views::grid_block_zip(array1, array2))
-        for (auto&& [x, y] : views::block_thread_zip(xs, ys)) x = x + y;
-}
-
-template <std::ranges::input_range T, std::ranges::input_range U>
-requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
-         std::ranges::input_range<std::ranges::range_value_t<U>>
-__global__ void kernel_zip2(T array1, const U array2)
-{
-    for (auto&& [xs, ys] : grid_block_zip_view(array1, array2))
-        for (auto&& [x, y] : block_thread_zip_view(xs, ys)) x = x + y;
-}
-
-TEST(ZipView, Simple)
-{
-    auto vec_vec = std::vector(10, std::vector<int>(20, 0));
-    auto array1 = managed_array(vec_vec);
-    auto array2 = managed_array(vec_vec);
-    zip_test_init<<<10, 20>>>(array1, 1);
-    api::gpuDeviceSynchronize();
-    for (int i = 0; const auto& xs : array1)
-    {
-        for (int j = 0; const auto& x : xs)
-        {
-            EXPECT_EQ(x, i * 20 + j);
-            ++j;
-        }
-        ++i;
-    }
-
-    zip_test_init<<<10, 20>>>(array2, 1000);
-    kernel_zip<<<10, 20>>>(array1, array2);
-    api::gpuDeviceSynchronize();
-    for (int i = 0; const auto& xs : array1)
-    {
-        for (int j = 0; const auto& x : xs)
-        {
-            EXPECT_EQ(x, (i * 20 + j) * 1001);
-            ++j;
-        }
-        ++i;
-    }
-
-    zip_test_init<<<10, 20>>>(array1, 1);
-    zip_test_init<<<10, 20>>>(array2, 2000);
-    kernel_zip2<<<10, 20>>>(array1, array2);
-    api::gpuDeviceSynchronize();
-    for (int i = 0; const auto& xs : array1)
-    {
-        for (int j = 0; const auto& x : xs)
-        {
-            EXPECT_EQ(x, (i * 20 + j) * 2001);
-            ++j;
-        }
-        ++i;
-    }
-}
+// template <std::ranges::input_range T>
+// requires std::ranges::input_range<std::ranges::range_value_t<T>>
+// __global__ void zip_test_init(T array, int coeff)
+// {
+//     for (auto&& [i, xs] : grid_block_enumerate_view(array))
+//         for (auto&& [j, x] : block_thread_enumerate_view(xs)) x = (i * xs.size() + j) * coeff;
+// }
+
+// template <std::ranges::input_range T, std::ranges::input_range U>
+// requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
+//          std::ranges::input_range<std::ranges::range_value_t<U>>
+// __global__ void kernel_zip(T array1, const U array2)
+// {
+//     for (auto&& [xs, ys] : views::grid_block_zip(array1, array2))
+//         for (auto&& [x, y] : views::block_thread_zip(xs, ys)) x = x + y;
+// }
+
+// template <std::ranges::input_range T, std::ranges::input_range U>
+// requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
+//          std::ranges::input_range<std::ranges::range_value_t<U>>
+// __global__ void kernel_zip2(T array1, const U array2)
+// {
+//     for (auto&& [xs, ys] : grid_block_zip_view(array1, array2))
+//         for (auto&& [x, y] : block_thread_zip_view(xs, ys)) x = x + y;
+// }
+
+// TEST(ZipView, Simple)
+// {
+//     auto vec_vec = std::vector(10, std::vector<int>(20, 0));
+//     auto array1 = managed_array(vec_vec);
+//     auto array2 = managed_array(vec_vec);
+//     zip_test_init<<<10, 20>>>(array1, 1);
+//     api::gpuDeviceSynchronize();
+//     for (int i = 0; const auto& xs : array1)
+//     {
+//         for (int j = 0; const auto& x : xs)
+//         {
+//             EXPECT_EQ(x, i * 20 + j);
+//             ++j;
+//         }
+//         ++i;
+//     }
+
+//     zip_test_init<<<10, 20>>>(array2, 1000);
+//     kernel_zip<<<10, 20>>>(array1, array2);
+//     api::gpuDeviceSynchronize();
+//     for (int i = 0; const auto& xs : array1)
+//     {
+//         for (int j = 0; const auto& x : xs)
+//         {
+//             EXPECT_EQ(x, (i * 20 + j) * 1001);
+//             ++j;
+//         }
+//         ++i;
+//     }
+
+//     zip_test_init<<<10, 20>>>(array1, 1);
+//     zip_test_init<<<10, 20>>>(array2, 2000);
+//     kernel_zip2<<<10, 20>>>(array1, array2);
+//     api::gpuDeviceSynchronize();
+//     for (int i = 0; const auto& xs : array1)
+//     {
+//         for (int j = 0; const auto& x : xs)
+//         {
+//             EXPECT_EQ(x, (i * 20 + j) * 2001);
+//             ++j;
+//         }
+//         ++i;
+//     }
+// }
 #endif
 // NOLINTEND

From f279a2f82d8c26354041f33f458f1518f2f2951b Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Mon, 2 Mar 2026 23:04:28 +0900
Subject: [PATCH 12/19] =?UTF-8?q?=E2=9C=A8=20Make=20enumerate=5Fview=20sat?=
 =?UTF-8?q?isfy=20ranges::view?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 266 ++++++++++++++++++++++++++++--------------
 test/test.cpp         |   4 +
 2 files changed, 182 insertions(+), 88 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index a0fad72..f5dd90a 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -48,6 +48,25 @@
 #define SIGSEGV_DEPRECATED [[deprecated("Cannot access GPU memory directly")]]
 #endif
 
+#if !defined(__cpp_lib_tuple_like) || __cpp_lib_tuple_like < 202207L
+template <class T1, class T2, class U1, class U2, template <class> class TQual, template <class> class UQual>
+requires requires {
+    typename std::pair<std::common_reference_t<TQual<T1>, UQual<U1>>, std::common_reference_t<TQual<T2>, UQual<U2>>>;
+}
+struct std::basic_common_reference<std::pair<T1, T2>, std::pair<U1, U2>, TQual, UQual>
+{
+    using type =
+        std::pair<std::common_reference_t<TQual<T1>, UQual<U1>>, std::common_reference_t<TQual<T2>, UQual<U2>>>;
+};
+
+template <class T1, class T2, class U1, class U2>
+requires requires { typename std::pair<std::common_type_t<T1, U1>, std::common_type_t<T2, U2>>; }
+struct std::common_type<std::pair<T1, T2>, std::pair<U1, U2>>
+{
+    using type = std::pair<std::common_type_t<T1, U1>, std::common_type_t<T2, U2>>;
+};
+#endif
+
 namespace gpu_array
 {
 #if defined(GPU_USE_32BIT_SIZE_TYPE_DEFAULT)
@@ -2605,6 +2624,112 @@ namespace gpu_array
 
     namespace detail
     {
+        // WORKAROUND: Because std::common_reference_with in C++20 does not work correctly
+        template <class Range>
+        concept RandomAccessRange = true;
+
+        template <class Derived>
+        requires std::is_class_v<Derived> && std::same_as<Derived, std::remove_cv_t<Derived>>
+        class ViewInterface
+        {
+            __host__ __device__ Derived& derived() noexcept { return static_cast<Derived&>(*this); }
+
+            __host__ __device__ Derived const& derived() const noexcept { return static_cast<Derived const&>(*this); }
+
+            template <class V>
+            __host__ __device__ static auto to_unsigned(V v)
+            {
+                return static_cast<std::make_unsigned_t<V>>(v);
+            }
+
+        public:
+            template <class D2 = Derived>
+            [[nodiscard]] __host__ __device__ bool empty()
+            requires std::ranges::sized_range<D2> || RandomAccessRange<D2>
+            {
+                if constexpr (std::ranges::sized_range<D2>)
+                {
+                    return derived().size() == 0;
+                }
+                else
+                {
+                    return derived().begin() == derived().end();
+                }
+            }
+
+            template <class D2 = Derived>
+            [[nodiscard]] __host__ __device__ bool empty() const
+            requires std::ranges::sized_range<const D2> || RandomAccessRange<const D2>
+            {
+                if constexpr (std::ranges::sized_range<const D2>)
+                {
+                    return derived().size() == 0;
+                }
+                else
+                {
+                    return derived().begin() == derived().end();
+                }
+            }
+
+            template <class D2 = Derived>
+            __host__ __device__ explicit operator bool()
+            requires requires(D2& t) { std::ranges::empty(t); }
+            {
+                return !std::ranges::empty(derived());
+            }
+
+            template <class D2 = Derived>
+            __host__ __device__ explicit operator bool() const
+            requires requires(const D2& t) { std::ranges::empty(t); }
+            {
+                return !std::ranges::empty(derived());
+            }
+
+            template <class D2 = Derived>
+            [[nodiscard]] __host__ __device__ auto size()
+            requires RandomAccessRange<D2> &&
+                     std::sized_sentinel_for<std::ranges::sentinel_t<D2>, std::ranges::iterator_t<D2>>
+            {
+                return to_unsigned(derived().end() - derived().begin());
+            }
+
+            template <class D2 = Derived>
+            [[nodiscard]] __host__ __device__ auto size() const
+            requires RandomAccessRange<const D2> &&
+                     std::sized_sentinel_for<std::ranges::sentinel_t<const D2>, std::ranges::iterator_t<const D2>>
+            {
+                return to_unsigned(derived().end() - derived().begin());
+            }
+
+            template <class D2 = Derived>
+            [[nodiscard]] __host__ __device__ decltype(auto) front()
+            requires RandomAccessRange<D2>
+            {
+                return *derived().begin();
+            }
+
+            template <class D2 = Derived>
+            [[nodiscard]] __host__ __device__ decltype(auto) front() const
+            requires RandomAccessRange<const D2>
+            {
+                return *derived().begin();
+            }
+
+            template <class D2 = Derived>
+            [[nodiscard]] __host__ __device__ decltype(auto) operator[](std::ranges::range_difference_t<D2> index)
+            requires RandomAccessRange<D2>
+            {
+                return derived().begin()[index];
+            }
+
+            template <class D2 = const Derived>
+            [[nodiscard]] __host__ __device__ decltype(auto) operator[](std::ranges::range_difference_t<D2> index) const
+            requires RandomAccessRange<const D2>
+            {
+                return derived().begin()[index];
+            }
+        };
+
         enum class Stride : std::uint8_t
         {
             BlockThread,
@@ -2617,11 +2742,11 @@ namespace gpu_array
 #endif
         };
 
-        template <std::ranges::random_access_range Range>
+        template <RandomAccessRange Range>
         requires std::ranges::view<Range> && std::ranges::sized_range<Range>
         class stride_sentinel;
 
-        template <std::ranges::random_access_range Range>
+        template <RandomAccessRange Range>
         requires std::ranges::view<Range> && std::ranges::sized_range<Range>
         class stride_iterator_base
         {
@@ -2646,7 +2771,7 @@ namespace gpu_array
             std::ranges::range_size_t<Range> index_ = 0;
         };
 
-        template <std::ranges::random_access_range Range>
+        template <RandomAccessRange Range>
         requires std::ranges::view<Range> && std::ranges::sized_range<Range>
         class stride_sentinel
         {
@@ -2669,7 +2794,7 @@ namespace gpu_array
             return it.index_ >= se.end_;
         }
 
-        template <Stride StrideType, std::ranges::random_access_range Range>
+        template <Stride StrideType, RandomAccessRange Range>
         requires std::ranges::view<Range>
         class stride_iterator : public stride_iterator_base<Range>
         {
@@ -2793,9 +2918,9 @@ namespace gpu_array
             }
         };
 
-        template <Stride StrideType, std::ranges::random_access_range Range>
+        template <Stride StrideType, RandomAccessRange Range>
         requires std::ranges::view<Range>
-        class stride_view : public std::ranges::view_interface<stride_view<StrideType, Range>>
+        class stride_view : public ViewInterface<stride_view<StrideType, Range>>
         {
         public:
             stride_view() = default;
@@ -2823,17 +2948,17 @@ namespace gpu_array
         template <Stride StrideType>
         struct stride_adapter
         {
-            template <std::ranges::random_access_range Range>
+            template <RandomAccessRange Range>
             requires std::ranges::sized_range<Range>
-            [[nodiscard]] constexpr auto operator()(Range&& r) const noexcept
+            [[nodiscard]] __host__ __device__ auto operator()(Range&& r) const noexcept
             {
                 return stride_view<StrideType, std::remove_reference_t<Range>>(std::forward<Range>(r));
             }
 
-            template <std::ranges::random_access_range Range>
+            template <RandomAccessRange Range>
             requires std::ranges::sized_range<Range>
-            [[nodiscard]] friend constexpr std::ranges::view auto operator|(Range&& r,
-                                                                            const stride_adapter& self) noexcept
+            [[nodiscard]] __host__ __device__ friend std::ranges::view auto operator|(
+                Range&& r, const stride_adapter& self) noexcept
             {
                 return self(std::forward<Range>(r));
             }
@@ -2843,18 +2968,18 @@ namespace gpu_array
 #if !defined(ENABLE_HIP)
     // The following three alias templates are also disabled in HIP because HIP does not support alias template argument
     // deduction.
-    template <std::ranges::random_access_range Range>
+    template <detail::RandomAccessRange Range>
     using block_thread_stride_view = detail::stride_view<detail::Stride::BlockThread, Range>;
-    template <std::ranges::random_access_range Range>
+    template <detail::RandomAccessRange Range>
     using grid_thread_stride_view = detail::stride_view<detail::Stride::GridThread, Range>;
-    template <std::ranges::random_access_range Range>
+    template <detail::RandomAccessRange Range>
     using grid_block_stride_view = detail::stride_view<detail::Stride::GridBlock, Range>;
 
-    template <std::ranges::random_access_range Range>
+    template <detail::RandomAccessRange Range>
     using cluster_thread_stride_view = detail::stride_view<detail::Stride::ClusterThread, Range>;
-    template <std::ranges::random_access_range Range>
+    template <detail::RandomAccessRange Range>
     using cluster_block_stride_view = detail::stride_view<detail::Stride::ClusterBlock, Range>;
-    template <std::ranges::random_access_range Range>
+    template <detail::RandomAccessRange Range>
     using grid_cluster_stride_view = detail::stride_view<detail::Stride::GridCluster, Range>;
 #endif
 
@@ -2884,17 +3009,17 @@ namespace gpu_array
 
     namespace detail
     {
-        template <std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&>
+        template <RandomAccessRange Range>
+        requires std::ranges::view<Range>
         class enumerate_iterator
         {
         public:
             using iterator_category = std::random_access_iterator_tag;
-            using value_type = std::ranges::range_value_t<Range>;
+            using value_type = std::pair<std::ranges::range_size_t<Range>, std::ranges::range_value_t<Range>>;
             using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
 
             enumerate_iterator() = default;
-            __host__ __device__ explicit enumerate_iterator(Range&& r) noexcept : pointer_(&r), index_(0) {}
+            __host__ __device__ explicit enumerate_iterator(Range& r) noexcept : pointer_(&r), index_(0) {}
             __host__ __device__ std::ranges::range_size_t<Range> index() const noexcept { return index_; }
             __host__ __device__ std::pair<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
             operator*() const noexcept
@@ -2989,19 +3114,19 @@ namespace gpu_array
             }
 
         private:
-            std::remove_reference_t<Range>* pointer_ = nullptr;
+            Range* pointer_ = nullptr;
             std::ranges::range_size_t<Range> index_ = 0;
         };
 
-        template <std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&> && std::ranges::sized_range<Range>
+        template <RandomAccessRange Range>
+        requires std::ranges::view<Range> && std::ranges::sized_range<Range>
         class enumerate_sentinel
         {
             using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
 
         public:
             enumerate_sentinel() = default;
-            __host__ __device__ explicit enumerate_sentinel(Range&& r) noexcept : end_(r.size()) {}
+            __host__ __device__ explicit enumerate_sentinel(Range& r) noexcept : end_(r.size()) {}
             __host__ __device__ friend bool operator==(const enumerate_iterator<Range>& it,
                                                        const enumerate_sentinel& se) noexcept
             {
@@ -3023,42 +3148,46 @@ namespace gpu_array
             std::ranges::range_size_t<Range> end_ = 0;
         };
 
-        template <std::ranges::random_access_range Range>
-        requires std::is_lvalue_reference_v<Range&&>
-        class enumerate_view : public std::ranges::view_interface<enumerate_view<Range>>
+        template <RandomAccessRange Range>
+        requires std::ranges::view<Range>
+        class enumerate_view : public ViewInterface<enumerate_view<Range>>
         {
         public:
             enumerate_view() = default;
-            __host__ __device__ explicit enumerate_view(Range&& r) noexcept : pointer_(&r) {}
+            __host__ __device__ explicit enumerate_view(Range r) noexcept : range_(r) {}
+            [[nodiscard]] __host__ __device__ auto begin() noexcept { return enumerate_iterator<Range>(range_); }
             [[nodiscard]] __host__ __device__ auto begin() const noexcept
+            requires std::is_const_v<Range>
             {
-                return enumerate_iterator<Range>(*pointer_);
+                return enumerate_iterator<Range>(range_);
             }
-            [[nodiscard]] __host__ __device__ auto end() const noexcept { return enumerate_sentinel<Range>(*pointer_); }
-            [[nodiscard]] __host__ __device__ auto size() const noexcept { return pointer_->size(); }
+            [[nodiscard]] __host__ __device__ auto end() noexcept { return enumerate_sentinel<Range>(range_); }
+            [[nodiscard]] __host__ __device__ auto end() const noexcept
+            requires std::is_const_v<Range>
+            {
+                return enumerate_sentinel<Range>(range_);
+            }
+            [[nodiscard]] __host__ __device__ auto size() const noexcept { return range_.size(); }
 
         private:
-            std::remove_reference_t<Range>* pointer_ = nullptr;
+            Range range_{};
         };
 
-        template <class Range>
-        enumerate_view(Range&) -> enumerate_view<Range&>;
-
         struct enumerate_adapter
         {
-            template <std::ranges::random_access_range Range>
+            template <RandomAccessRange Range>
             requires std::ranges::sized_range<Range>
-            [[nodiscard]] __host__ __device__ auto operator()(Range& r) const noexcept
+            [[nodiscard]] __host__ __device__ auto operator()(Range&& r) const noexcept
             {
-                return enumerate_view<Range&>(r);
+                return enumerate_view<std::remove_reference_t<Range>>(std::forward<Range>(r));
             }
 
-            template <std::ranges::random_access_range Range>
+            template <RandomAccessRange Range>
             requires std::ranges::sized_range<Range>
             [[nodiscard]] __host__ __device__ friend std::ranges::view auto operator|(
-                Range& range, const enumerate_adapter& self) noexcept
+                Range&& r, const enumerate_adapter& self) noexcept
             {
-                return self(range);
+                return self(std::forward<Range>(r));
             }
         };
     }  // namespace detail
@@ -3076,51 +3205,12 @@ namespace gpu_array
 #endif
     }  // namespace views
 
-    // #if !defined(ENABLE_HIP)
-    //     template <std::ranges::random_access_range Range>
-    //     using block_thread_enumerate_view = detail::enumerate_view<detail::Stride::BlockThread, Range>;
-    //     template <std::ranges::random_access_range Range>
-    //     using grid_thread_enumerate_view = detail::enumerate_view<detail::Stride::GridThread, Range>;
-    //     template <std::ranges::random_access_range Range>
-    //     using grid_block_enumerate_view = detail::enumerate_view<detail::Stride::GridBlock, Range>;
-
-    //     template <std::ranges::random_access_range Range>
-    //     using cluster_thread_enumerate_view = detail::enumerate_view<detail::Stride::ClusterThread, Range>;
-    //     template <std::ranges::random_access_range Range>
-    //     using cluster_block_enumerate_view = detail::enumerate_view<detail::Stride::ClusterBlock, Range>;
-    //     template <std::ranges::random_access_range Range>
-    //     using grid_cluster_enumerate_view = detail::enumerate_view<detail::Stride::GridCluster, Range>;
-    // #endif
-
-    //     namespace views
-    //     {
-    // #ifdef GPU_CHECK_ERROR
-    //         __device__ static constexpr detail::enumerate_adapter<Stride::BlockThread> block_thread_enumerate;
-    //         __device__ static constexpr detail::enumerate_adapter<Stride::GridThread> grid_thread_enumerate;
-    //         __device__ static constexpr detail::enumerate_adapter<Stride::GridBlock> grid_block_enumerate;
-    // #if defined(_CG_HAS_CLUSTER_GROUP)
-    //         __device__ static constexpr detail::enumerate_adapter<Stride::ClusterThread> cluster_thread_enumerate;
-    //         __device__ static constexpr detail::enumerate_adapter<Stride::ClusterBlock> cluster_block_enumerate;
-    //         __device__ static constexpr detail::enumerate_adapter<Stride::GridCluster> grid_cluster_enumerate;
-    // #endif
-    // #else
-    //         inline constexpr detail::enumerate_adapter<Stride::BlockThread> block_thread_enumerate;
-    //         inline constexpr detail::enumerate_adapter<Stride::GridThread> grid_thread_enumerate;
-    //         inline constexpr detail::enumerate_adapter<Stride::GridBlock> grid_block_enumerate;
-    // #if defined(_CG_HAS_CLUSTER_GROUP)
-    //         inline constexpr detail::enumerate_adapter<Stride::ClusterThread> cluster_thread_enumerate;
-    //         inline constexpr detail::enumerate_adapter<Stride::ClusterBlock> cluster_block_enumerate;
-    //         inline constexpr detail::enumerate_adapter<Stride::GridCluster> grid_cluster_enumerate;
-    // #endif
-    // #endif
-    //     }  // namespace views
-
     namespace detail
     {
         template <class... Ranges>
         using first_t = std::tuple_element_t<0, std::tuple<Ranges...>>;
 
-        template <Stride StrideType, std::ranges::random_access_range... Ranges>
+        template <Stride StrideType, RandomAccessRange... Ranges>
         requires (std::is_lvalue_reference_v<Ranges &&> && ...)
         class zip_iterator
         {
@@ -3164,7 +3254,7 @@ namespace gpu_array
             std::common_type_t<std::ranges::range_size_t<Ranges>...> index_ = 0;
         };
 
-        template <Stride StrideType, std::ranges::random_access_range... Ranges>
+        template <Stride StrideType, RandomAccessRange... Ranges>
         requires (std::is_lvalue_reference_v<Ranges &&> && ...) && (std::ranges::sized_range<Ranges> && ...)
         class zip_sentinel
         {
@@ -3181,9 +3271,9 @@ namespace gpu_array
             std::common_type_t<std::ranges::range_size_t<Ranges>...> end_ = 0;
         };
 
-        template <Stride StrideType, std::ranges::random_access_range... Ranges>
+        template <Stride StrideType, RandomAccessRange... Ranges>
         requires (std::is_lvalue_reference_v<Ranges &&> && ...)
-        class zip_view : public std::ranges::view_interface<zip_view<StrideType, Ranges...>>
+        class zip_view : public ViewInterface<zip_view<StrideType, Ranges...>>
         {
         public:
             zip_view() = default;
@@ -3206,7 +3296,7 @@ namespace gpu_array
         template <Stride StrideType>
         struct zip_adapter
         {
-            template <std::ranges::random_access_range... Ranges>
+            template <RandomAccessRange... Ranges>
             requires (std::ranges::sized_range<Ranges> && ...)
             [[nodiscard]] __host__ __device__ auto operator()(Ranges&... rs) const noexcept
             {
@@ -3284,9 +3374,9 @@ inline constexpr bool std::ranges::enable_borrowed_range<gpu_array::jagged_array
 #endif
 template <typename... Ts>
 inline constexpr bool std::ranges::enable_borrowed_range<gpu_array::detail::subrange<Ts...>> = true;
-template <gpu_array::detail::Stride StrideType, std::ranges::random_access_range Range>
+template <gpu_array::detail::Stride StrideType, gpu_array::detail::RandomAccessRange Range>
 inline constexpr bool std::ranges::enable_view<gpu_array::detail::stride_view<StrideType, Range>> = true;
-template <std::ranges::random_access_range Range>
+template <gpu_array::detail::RandomAccessRange Range>
 inline constexpr bool std::ranges::enable_view<gpu_array::enumerate_view<Range>> = true;
 
 #undef SIGSEGV_DEPRECATED
diff --git a/test/test.cpp b/test/test.cpp
index a109597..8a06cd0 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2125,6 +2125,10 @@ TEST(StrideView, AliasTemplate)
         for (const auto& v : inner_array) EXPECT_EQ(v, 3);
 }
 
+static_assert(detail::RandomAccessRange<enumerate_view<managed_array<int>>>);
+static_assert(std::ranges::sized_range<enumerate_view<managed_array<int>>>);
+static_assert(std::ranges::view<enumerate_view<managed_array<int>>>);
+
 template <std::ranges::input_range T>
 requires std::ranges::input_range<std::ranges::range_value_t<T>>
 __global__ void kernel_enumerate(T array)

From 993a3d1e7198cf2c15e6c8e289a2cc78b296aa6b Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Tue, 3 Mar 2026 00:02:41 +0900
Subject: [PATCH 13/19] =?UTF-8?q?=E2=9C=A8=20Make=20zip=5Fview=20satisfy?=
 =?UTF-8?q?=20ranges::view?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 174 ++++++++++++++++++++++++++----------------
 test/test.cpp         |  14 ++--
 2 files changed, 116 insertions(+), 72 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index f5dd90a..bbe0831 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -2923,7 +2923,9 @@ namespace gpu_array
         class stride_view : public ViewInterface<stride_view<StrideType, Range>>
         {
         public:
-            stride_view() = default;
+            stride_view()
+            requires std::default_initializable<Range>
+            = default;
             __host__ __device__ explicit stride_view(Range r) noexcept : range_(r) {}
             [[nodiscard]] __host__ __device__ auto begin() noexcept
             {
@@ -3153,7 +3155,9 @@ namespace gpu_array
         class enumerate_view : public ViewInterface<enumerate_view<Range>>
         {
         public:
-            enumerate_view() = default;
+            enumerate_view()
+            requires std::default_initializable<Range>
+            = default;
             __host__ __device__ explicit enumerate_view(Range r) noexcept : range_(r) {}
             [[nodiscard]] __host__ __device__ auto begin() noexcept { return enumerate_iterator<Range>(range_); }
             [[nodiscard]] __host__ __device__ auto begin() const noexcept
@@ -3192,9 +3196,7 @@ namespace gpu_array
         };
     }  // namespace detail
 
-#if !defined(ENABLE_HIP)
     using detail::enumerate_view;
-#endif
 
     namespace views
     {
@@ -3207,11 +3209,8 @@ namespace gpu_array
 
     namespace detail
     {
-        template <class... Ranges>
-        using first_t = std::tuple_element_t<0, std::tuple<Ranges...>>;
-
-        template <Stride StrideType, RandomAccessRange... Ranges>
-        requires (std::is_lvalue_reference_v<Ranges &&> && ...)
+        template <RandomAccessRange... Ranges>
+        requires (std::ranges::view<Ranges> && ...)
         class zip_iterator
         {
         public:
@@ -3220,10 +3219,7 @@ namespace gpu_array
             using difference_type = std::common_type_t<std::make_signed_t<std::ranges::range_size_t<Ranges>>...>;
 
             zip_iterator() = default;
-            __host__ __device__ explicit zip_iterator(Ranges&&... rs) noexcept
-                : pointers_(&rs...), index_(stride_iterator<StrideType, first_t<Ranges...>>::get_initial_index())
-            {
-            }
+            __host__ __device__ explicit zip_iterator(Ranges&... rs) noexcept : pointers_(&rs...), index_(0) {}
             __host__ __device__ std::common_type_t<std::ranges::range_size_t<Ranges>...> index() const noexcept
             {
                 return index_;
@@ -3238,7 +3234,7 @@ namespace gpu_array
             }
             __host__ __device__ zip_iterator& operator++() noexcept
             {
-                index_ += stride_iterator<StrideType, first_t<Ranges...>>::get_stride();
+                ++index_;
                 return *this;
             }
             __host__ __device__ zip_iterator operator++(int) noexcept
@@ -3247,21 +3243,82 @@ namespace gpu_array
                 ++(*this);
                 return res;
             }
+            __host__ __device__ zip_iterator& operator--() noexcept
+            {
+                --index_;
+                return *this;
+            }
+            __host__ __device__ zip_iterator operator--(int) noexcept
+            {
+                auto res = *this;
+                --(*this);
+                return res;
+            }
+            __host__ __device__ zip_iterator& operator+=(difference_type n)
+            {
+                index_ += n;
+                return *this;
+            }
+            __host__ __device__ zip_iterator& operator-=(difference_type n)
+            {
+                index_ -= n;
+                return *this;
+            }
+            __host__ __device__ std::tuple<std::ranges::range_reference_t<Ranges>...> operator[](
+                difference_type n) const
+            {
+                return *(*this + n);
+            }
+
             __host__ __device__ bool operator==(const zip_iterator& it) const noexcept { return index_ == it.index_; }
+            __host__ __device__ bool operator<(const zip_iterator& it) const& noexcept { return index_ < it.index_; }
+            __host__ __device__ bool operator>(const zip_iterator& it) const& noexcept { return index_ > it.index_; }
+            __host__ __device__ bool operator<=(const zip_iterator& it) const& noexcept { return index_ <= it.index_; }
+            __host__ __device__ bool operator>=(const zip_iterator& it) const& noexcept { return index_ >= it.index_; }
+
+            __host__ __device__ friend zip_iterator operator+(zip_iterator x, difference_type n)
+            {
+                x += n;
+                return x;
+            }
+            __host__ __device__ friend zip_iterator operator+(difference_type n, zip_iterator x)
+            {
+                x += n;
+                return x;
+            }
+            __host__ __device__ friend zip_iterator operator-(zip_iterator x, difference_type n)
+            {
+                x -= n;
+                return x;
+            }
+            __host__ __device__ friend difference_type operator-(const zip_iterator& x, const zip_iterator& y)
+            {
+                return x.index() - y.index();
+            }
+
+            __host__ __device__ friend auto iter_move(const zip_iterator& x)
+            {
+                return std::apply(
+                    [&x](auto&... pointers) {
+                        return std::tuple<std::ranges::range_rvalue_reference_t<Ranges>...>(
+                            std::move((*pointers)[x.index()])...);
+                    },
+                    x.pointers_);
+            }
 
         private:
-            std::tuple<std::remove_reference_t<Ranges>*...> pointers_{};
+            std::tuple<Ranges*...> pointers_{};
             std::common_type_t<std::ranges::range_size_t<Ranges>...> index_ = 0;
         };
 
-        template <Stride StrideType, RandomAccessRange... Ranges>
-        requires (std::is_lvalue_reference_v<Ranges &&> && ...) && (std::ranges::sized_range<Ranges> && ...)
+        template <RandomAccessRange... Ranges>
+        requires (std::ranges::view<Ranges> && ...) && (std::ranges::sized_range<Ranges> && ...)
         class zip_sentinel
         {
         public:
             zip_sentinel() = default;
-            __host__ __device__ explicit zip_sentinel(Ranges&&... rs) noexcept : end_(std::min({rs.size()...})) {}
-            __host__ __device__ friend bool operator==(const zip_iterator<StrideType, Ranges...>& it,
+            __host__ __device__ explicit zip_sentinel(Ranges&... rs) noexcept : end_(std::min({rs.size()...})) {}
+            __host__ __device__ friend bool operator==(const zip_iterator<Ranges...>& it,
                                                        const zip_sentinel& se) noexcept
             {
                 return it.index() >= se.end_;
@@ -3271,80 +3328,61 @@ namespace gpu_array
             std::common_type_t<std::ranges::range_size_t<Ranges>...> end_ = 0;
         };
 
-        template <Stride StrideType, RandomAccessRange... Ranges>
-        requires (std::is_lvalue_reference_v<Ranges &&> && ...)
-        class zip_view : public ViewInterface<zip_view<StrideType, Ranges...>>
+        template <RandomAccessRange... Ranges>
+        requires (std::ranges::view<Ranges> && ...)
+        class zip_view : public ViewInterface<zip_view<Ranges...>>
         {
         public:
-            zip_view() = default;
-            __host__ __device__ explicit zip_view(Ranges&&... rs) noexcept : pointers_(&rs...) {}
+            zip_view()
+            requires (std::default_initializable<Ranges> && ...)
+            = default;
+            __host__ __device__ explicit zip_view(Ranges... rs) noexcept : ranges_(rs...) {}
+            [[nodiscard]] __host__ __device__ auto begin() noexcept
+            {
+                return std::apply([](auto&... ranges) { return zip_iterator<Ranges...>(ranges...); }, ranges_);
+            }
             [[nodiscard]] __host__ __device__ auto begin() const noexcept
+            requires (std::is_const_v<Ranges> && ...)
             {
-                return std::apply(
-                    [this](auto&... pointers) { return zip_iterator<StrideType, Ranges...>(*pointers...); }, pointers_);
+                return std::apply([](auto&... ranges) { return zip_iterator<Ranges...>(ranges...); }, ranges_);
+            }
+            [[nodiscard]] __host__ __device__ auto end() noexcept
+            {
+                return std::apply([](auto&... ranges) { return zip_sentinel<Ranges...>(ranges...); }, ranges_);
             }
             [[nodiscard]] __host__ __device__ auto end() const noexcept
+            requires (std::is_const_v<Ranges> && ...)
             {
-                return std::apply(
-                    [this](auto&... pointers) { return zip_sentinel<StrideType, Ranges...>(*pointers...); }, pointers_);
+                return std::apply([](auto&... ranges) { return zip_sentinel<Ranges...>(ranges...); }, ranges_);
+            }
+            [[nodiscard]] __host__ __device__ auto size() const noexcept
+            {
+                return std::apply([](auto&... ranges) { return std::min({ranges.size()...}); }, ranges_);
             }
 
         private:
-            std::tuple<std::remove_reference_t<Ranges>*...> pointers_{};
+            std::tuple<Ranges...> ranges_{};
         };
 
-        template <Stride StrideType>
         struct zip_adapter
         {
             template <RandomAccessRange... Ranges>
             requires (std::ranges::sized_range<Ranges> && ...)
-            [[nodiscard]] __host__ __device__ auto operator()(Ranges&... rs) const noexcept
+            [[nodiscard]] __host__ __device__ auto operator()(Ranges&&... rs) const noexcept
             {
-                return zip_view<StrideType, Ranges&...>(rs...);
+                return zip_view<std::remove_reference_t<Ranges>...>(std::forward<Ranges>(rs)...);
             }
         };
     }  // namespace detail
 
-#ifdef GPU_CHECK_ERROR
-    __device__ static constexpr detail::zip_adapter<detail::Stride::BlockThread> block_thread_zip_view;
-    __device__ static constexpr detail::zip_adapter<detail::Stride::GridThread> grid_thread_zip_view;
-    __device__ static constexpr detail::zip_adapter<detail::Stride::GridBlock> grid_block_zip_view;
-#if defined(_CG_HAS_CLUSTER_GROUP)
-    __device__ static constexpr detail::zip_adapter<detail::Stride::ClusterThread> cluster_thread_zip_view;
-    __device__ static constexpr detail::zip_adapter<detail::Stride::ClusterBlock> cluster_block_zip_view;
-    __device__ static constexpr detail::zip_adapter<detail::Stride::GridCluster> grid_cluster_zip_view;
-#endif
-#else
-    inline constexpr detail::zip_adapter<Stride::BlockThread> block_thread_zip_view;
-    inline constexpr detail::zip_adapter<Stride::GridThread> grid_thread_zip_view;
-    inline constexpr detail::zip_adapter<Stride::GridBlock> grid_block_zip_view;
-#if defined(_CG_HAS_CLUSTER_GROUP)
-    inline constexpr detail::zip_adapter<Stride::ClusterThread> cluster_thread_zip_view;
-    inline constexpr detail::zip_adapter<Stride::ClusterBlock> cluster_block_zip_view;
-    inline constexpr detail::zip_adapter<Stride::GridCluster> grid_cluster_zip_view;
-#endif
-#endif
+    using detail::zip_view;
 
     namespace views
     {
 #ifdef GPU_CHECK_ERROR
-        __device__ static constexpr detail::zip_adapter<Stride::BlockThread> block_thread_zip;
-        __device__ static constexpr detail::zip_adapter<Stride::GridThread> grid_thread_zip;
-        __device__ static constexpr detail::zip_adapter<Stride::GridBlock> grid_block_zip;
-#if defined(_CG_HAS_CLUSTER_GROUP)
-        __device__ static constexpr detail::zip_adapter<Stride::ClusterThread> cluster_thread_zip;
-        __device__ static constexpr detail::zip_adapter<Stride::ClusterBlock> cluster_block_zip;
-        __device__ static constexpr detail::zip_adapter<Stride::GridCluster> grid_cluster_zip;
-#endif
+        __device__ static constexpr detail::zip_adapter zip;
 #else
-        inline constexpr detail::zip_adapter<Stride::BlockThread> block_thread_zip;
-        inline constexpr detail::zip_adapter<Stride::GridThread> grid_thread_zip;
-        inline constexpr detail::zip_adapter<Stride::GridBlock> grid_block_zip;
-#if defined(_CG_HAS_CLUSTER_GROUP)
-        inline constexpr detail::zip_adapter<Stride::ClusterThread> cluster_thread_zip;
-        inline constexpr detail::zip_adapter<Stride::ClusterBlock> cluster_block_zip;
-        inline constexpr detail::zip_adapter<Stride::GridCluster> grid_cluster_zip;
-#endif
+        inline constexpr detail::zip_adapter zip;
 #endif
     }  // namespace views
 }  // namespace gpu_array
@@ -3378,6 +3416,8 @@ template <gpu_array::detail::Stride StrideType, gpu_array::detail::RandomAccessR
 inline constexpr bool std::ranges::enable_view<gpu_array::detail::stride_view<StrideType, Range>> = true;
 template <gpu_array::detail::RandomAccessRange Range>
 inline constexpr bool std::ranges::enable_view<gpu_array::enumerate_view<Range>> = true;
+template <gpu_array::detail::RandomAccessRange... Ranges>
+inline constexpr bool std::ranges::enable_view<gpu_array::zip_view<Ranges...>> = true;
 
 #undef SIGSEGV_DEPRECATED
 #undef INCR_GPU_MEMORY_USAGE
diff --git a/test/test.cpp b/test/test.cpp
index 8a06cd0..c360986 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2155,21 +2155,25 @@ TEST(EnumerateView, Simple)
     }
 }
 
+static_assert(detail::RandomAccessRange<zip_view<managed_array<int>>>);
+static_assert(std::ranges::sized_range<zip_view<managed_array<int>>>);
+static_assert(std::ranges::view<zip_view<managed_array<int>>>);
+
 // template <std::ranges::input_range T>
 // requires std::ranges::input_range<std::ranges::range_value_t<T>>
 // __global__ void zip_test_init(T array, int coeff)
 // {
-//     for (auto&& [i, xs] : grid_block_enumerate_view(array))
-//         for (auto&& [j, x] : block_thread_enumerate_view(xs)) x = (i * xs.size() + j) * coeff;
+//     for (auto&& [i, xs] : enumerate_view(array) | views::grid_block_stride)
+//         for (auto&& [j, x] : enumerate_view(xs) | views::block_thread_stride) x = (i * xs.size() + j) * coeff;
 // }
 
 // template <std::ranges::input_range T, std::ranges::input_range U>
 // requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
 //          std::ranges::input_range<std::ranges::range_value_t<U>>
-// __global__ void kernel_zip(T array1, const U array2)
+// __global__ void kernel_zip(T array1,  U array2)
 // {
-//     for (auto&& [xs, ys] : views::grid_block_zip(array1, array2))
-//         for (auto&& [x, y] : views::block_thread_zip(xs, ys)) x = x + y;
+//     for (auto&& [xs, ys] : views::zip(array1, array2) | views::grid_block_stride)
+//         for (auto&& [x, y] : views::zip(xs, ys) | views::block_thread_stride) x = x + y;
 // }
 
 // template <std::ranges::input_range T, std::ranges::input_range U>

From 7dd1c2882a96ed4f79bef80c4b6a5e52aaa15912 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Tue, 3 Mar 2026 23:19:47 +0900
Subject: [PATCH 14/19] =?UTF-8?q?=F0=9F=90=9B=20Fix=20kernel=20crush=20bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 118 +++++++++++++++++++++++++++++++++++++-----
 test/test.cpp         | 117 +++++++++++++++++------------------------
 2 files changed, 152 insertions(+), 83 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index bbe0831..87c1f68 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -48,6 +48,83 @@
 #define SIGSEGV_DEPRECATED [[deprecated("Cannot access GPU memory directly")]]
 #endif
 
+namespace gpu_array::detail
+{
+    // Custom implementation of tuple for device code
+
+    template <std::size_t I, class T>
+    struct tuple_leaf
+    {
+        using type = T;
+        T value;
+    };
+
+    template <std::size_t I, class T>
+    tuple_leaf<I, T> at_index(const tuple_leaf<I, T>&);  // undefined
+
+    template <class Seq, class... Ts>
+    struct tuple_impl;
+
+    template <std::size_t... Is, class... Ts>
+    struct tuple_impl<std::index_sequence<Is...>, Ts...> : tuple_leaf<Is, Ts>...
+    {
+    };
+
+    template <class... Ts>
+    struct tuple
+    {
+        __host__ __device__ tuple()
+        requires (std::default_initializable<Ts> && ...)
+        = default;
+        __host__ __device__ tuple(Ts... ts) : base_{std::forward<Ts>(ts)...} {}
+        template <std::size_t I, class... Us>
+        __host__ __device__ friend auto& get(detail::tuple<Us...>&);
+        template <std::size_t I, class... Us>
+        __host__ __device__ friend const auto& get(const detail::tuple<Us...>&);
+        template <std::size_t I, class... Us>
+        __host__ __device__ friend auto&& get(detail::tuple<Us...>&&);
+        template <std::size_t I, class... Us>
+        __host__ __device__ friend const auto&& get(const detail::tuple<Us...>&&);
+
+    private:
+        using base = tuple_impl<std::index_sequence_for<Ts...>, Ts...>;
+        base base_;
+    };
+
+    template <std::size_t I, class... Us>
+    __host__ __device__ auto& get(detail::tuple<Us...>& t)
+    {
+        using leaf = decltype(at_index<I>(t.base_));
+        return static_cast<leaf&>(t.base_).value;
+    }
+    template <std::size_t I, class... Us>
+    __host__ __device__ const auto& get(const detail::tuple<Us...>& t)
+    {
+        using leaf = decltype(at_index<I>(t.base_));
+        return static_cast<const leaf&>(t.base_).value;
+    }
+    template <std::size_t I, class... Us>
+    __host__ __device__ auto&& get(detail::tuple<Us...>&& t)
+    {
+        using leaf = decltype(at_index<I>(t.base_));
+        return static_cast<typename leaf::type&&>(static_cast<leaf&>(t.base_).value);
+    }
+    template <std::size_t I, class... Us>
+    __host__ __device__ const auto&& get(const detail::tuple<Us...>&& t)
+    {
+        using leaf = decltype(at_index<I>(t.base_));
+        return static_cast<const typename leaf::type&&>(static_cast<const leaf&>(t.base_).value);
+    }
+}  // namespace gpu_array::detail
+
+template <class... Ts>
+struct std::tuple_size<gpu_array::detail::tuple<Ts...>> : std::integral_constant<std::size_t, sizeof...(Ts)>
+{
+};
+template <std::size_t I, class... Ts>
+struct std::tuple_element<I, gpu_array::detail::tuple<Ts...>> : std::tuple_element<I, std::tuple<Ts...>>
+{
+};
 #if !defined(__cpp_lib_tuple_like) || __cpp_lib_tuple_like < 202207L
 template <class T1, class T2, class U1, class U2, template <class> class TQual, template <class> class UQual>
 requires requires {
@@ -3209,13 +3286,28 @@ namespace gpu_array
 
     namespace detail
     {
+        template <class F, class Tuple, std::size_t... Is>
+        __host__ __device__ auto apply_impl(F&& f, Tuple&& t, std::index_sequence<Is...>)
+            -> decltype(std::forward<F>(f)(detail::get<Is>(std::forward<Tuple>(t))...))
+        {
+            return std::forward<F>(f)(detail::get<Is>(std::forward<Tuple>(t))...);
+        }
+
+        template <class F, class Tuple>
+        requires requires { std::tuple_size_v<std::remove_reference_t<Tuple>>; }
+        __host__ __device__ decltype(auto) apply(F&& f, Tuple&& t)
+        {
+            return apply_impl(std::forward<F>(f), std::forward<Tuple>(t),
+                              std::make_index_sequence<std::tuple_size_v<std::remove_reference_t<Tuple>>>{});
+        }
+
         template <RandomAccessRange... Ranges>
         requires (std::ranges::view<Ranges> && ...)
         class zip_iterator
         {
         public:
             using iterator_category = std::forward_iterator_tag;
-            using value_type = std::tuple<std::ranges::range_value_t<Ranges>...>;
+            using value_type = detail::tuple<std::ranges::range_value_t<Ranges>...>;
             using difference_type = std::common_type_t<std::make_signed_t<std::ranges::range_size_t<Ranges>>...>;
 
             zip_iterator() = default;
@@ -3226,9 +3318,9 @@ namespace gpu_array
             }
             __host__ __device__ auto operator*() const noexcept
             {
-                return std::apply(
+                return detail::apply(
                     [this](auto&... pointers) {
-                        return std::tuple<std::ranges::range_reference_t<Ranges>...>((*pointers)[index_]...);
+                        return detail::tuple<std::ranges::range_reference_t<Ranges>...>((*pointers)[index_]...);
                     },
                     pointers_);
             }
@@ -3264,7 +3356,7 @@ namespace gpu_array
                 index_ -= n;
                 return *this;
             }
-            __host__ __device__ std::tuple<std::ranges::range_reference_t<Ranges>...> operator[](
+            __host__ __device__ detail::tuple<std::ranges::range_reference_t<Ranges>...> operator[](
                 difference_type n) const
             {
                 return *(*this + n);
@@ -3298,16 +3390,16 @@ namespace gpu_array
 
             __host__ __device__ friend auto iter_move(const zip_iterator& x)
             {
-                return std::apply(
+                return detail::apply(
                     [&x](auto&... pointers) {
-                        return std::tuple<std::ranges::range_rvalue_reference_t<Ranges>...>(
+                        return detail::tuple<std::ranges::range_rvalue_reference_t<Ranges>...>(
                             std::move((*pointers)[x.index()])...);
                     },
                     x.pointers_);
             }
 
         private:
-            std::tuple<Ranges*...> pointers_{};
+            detail::tuple<Ranges*...> pointers_{};
             std::common_type_t<std::ranges::range_size_t<Ranges>...> index_ = 0;
         };
 
@@ -3339,29 +3431,29 @@ namespace gpu_array
             __host__ __device__ explicit zip_view(Ranges... rs) noexcept : ranges_(rs...) {}
             [[nodiscard]] __host__ __device__ auto begin() noexcept
             {
-                return std::apply([](auto&... ranges) { return zip_iterator<Ranges...>(ranges...); }, ranges_);
+                return detail::apply([](auto&... ranges) { return zip_iterator<Ranges...>(ranges...); }, ranges_);
             }
             [[nodiscard]] __host__ __device__ auto begin() const noexcept
             requires (std::is_const_v<Ranges> && ...)
             {
-                return std::apply([](auto&... ranges) { return zip_iterator<Ranges...>(ranges...); }, ranges_);
+                return detail::apply([](auto&... ranges) { return zip_iterator<Ranges...>(ranges...); }, ranges_);
             }
             [[nodiscard]] __host__ __device__ auto end() noexcept
             {
-                return std::apply([](auto&... ranges) { return zip_sentinel<Ranges...>(ranges...); }, ranges_);
+                return detail::apply([](auto&... ranges) { return zip_sentinel<Ranges...>(ranges...); }, ranges_);
             }
             [[nodiscard]] __host__ __device__ auto end() const noexcept
             requires (std::is_const_v<Ranges> && ...)
             {
-                return std::apply([](auto&... ranges) { return zip_sentinel<Ranges...>(ranges...); }, ranges_);
+                return detail::apply([](auto&... ranges) { return zip_sentinel<Ranges...>(ranges...); }, ranges_);
             }
             [[nodiscard]] __host__ __device__ auto size() const noexcept
             {
-                return std::apply([](auto&... ranges) { return std::min({ranges.size()...}); }, ranges_);
+                return detail::apply([](auto&... ranges) { return std::min({ranges.size()...}); }, ranges_);
             }
 
         private:
-            std::tuple<Ranges...> ranges_{};
+            detail::tuple<Ranges...> ranges_{};
         };
 
         struct zip_adapter
diff --git a/test/test.cpp b/test/test.cpp
index c360986..940ee0f 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2159,75 +2159,52 @@ static_assert(detail::RandomAccessRange<zip_view<managed_array<int>>>);
 static_assert(std::ranges::sized_range<zip_view<managed_array<int>>>);
 static_assert(std::ranges::view<zip_view<managed_array<int>>>);
 
-// template <std::ranges::input_range T>
-// requires std::ranges::input_range<std::ranges::range_value_t<T>>
-// __global__ void zip_test_init(T array, int coeff)
-// {
-//     for (auto&& [i, xs] : enumerate_view(array) | views::grid_block_stride)
-//         for (auto&& [j, x] : enumerate_view(xs) | views::block_thread_stride) x = (i * xs.size() + j) * coeff;
-// }
-
-// template <std::ranges::input_range T, std::ranges::input_range U>
-// requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
-//          std::ranges::input_range<std::ranges::range_value_t<U>>
-// __global__ void kernel_zip(T array1,  U array2)
-// {
-//     for (auto&& [xs, ys] : views::zip(array1, array2) | views::grid_block_stride)
-//         for (auto&& [x, y] : views::zip(xs, ys) | views::block_thread_stride) x = x + y;
-// }
-
-// template <std::ranges::input_range T, std::ranges::input_range U>
-// requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
-//          std::ranges::input_range<std::ranges::range_value_t<U>>
-// __global__ void kernel_zip2(T array1, const U array2)
-// {
-//     for (auto&& [xs, ys] : grid_block_zip_view(array1, array2))
-//         for (auto&& [x, y] : block_thread_zip_view(xs, ys)) x = x + y;
-// }
-
-// TEST(ZipView, Simple)
-// {
-//     auto vec_vec = std::vector(10, std::vector<int>(20, 0));
-//     auto array1 = managed_array(vec_vec);
-//     auto array2 = managed_array(vec_vec);
-//     zip_test_init<<<10, 20>>>(array1, 1);
-//     api::gpuDeviceSynchronize();
-//     for (int i = 0; const auto& xs : array1)
-//     {
-//         for (int j = 0; const auto& x : xs)
-//         {
-//             EXPECT_EQ(x, i * 20 + j);
-//             ++j;
-//         }
-//         ++i;
-//     }
-
-//     zip_test_init<<<10, 20>>>(array2, 1000);
-//     kernel_zip<<<10, 20>>>(array1, array2);
-//     api::gpuDeviceSynchronize();
-//     for (int i = 0; const auto& xs : array1)
-//     {
-//         for (int j = 0; const auto& x : xs)
-//         {
-//             EXPECT_EQ(x, (i * 20 + j) * 1001);
-//             ++j;
-//         }
-//         ++i;
-//     }
-
-//     zip_test_init<<<10, 20>>>(array1, 1);
-//     zip_test_init<<<10, 20>>>(array2, 2000);
-//     kernel_zip2<<<10, 20>>>(array1, array2);
-//     api::gpuDeviceSynchronize();
-//     for (int i = 0; const auto& xs : array1)
-//     {
-//         for (int j = 0; const auto& x : xs)
-//         {
-//             EXPECT_EQ(x, (i * 20 + j) * 2001);
-//             ++j;
-//         }
-//         ++i;
-//     }
-// }
+template <std::ranges::input_range T>
+requires std::ranges::input_range<std::ranges::range_value_t<T>>
+__global__ void zip_test_init(T array, int coeff)
+{
+    for (auto&& [i, xs] : enumerate_view(array) | views::grid_block_stride)
+        for (auto&& [j, x] : enumerate_view(xs) | views::block_thread_stride) x = (i * xs.size() + j) * coeff;
+}
+
+template <std::ranges::input_range T, std::ranges::input_range U>
+requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
+         std::ranges::input_range<std::ranges::range_value_t<U>>
+__global__ void kernel_zip(T array1, U array2)
+{
+    for (auto&& [xs, ys] : zip_view(array1, array2) | views::grid_block_stride)
+        for (auto&& [x, y] : zip_view(xs, ys) | views::block_thread_stride) x = x + y;
+}
+
+TEST(ZipView, Simple)
+{
+    auto vec_vec = std::vector(10, std::vector<int>(20, 0));
+    auto array1 = managed_array(vec_vec);
+    auto array2 = managed_array(vec_vec);
+    zip_test_init<<<10, 20>>>(array1, 1);
+    api::gpuDeviceSynchronize();
+    for (int i = 0; const auto& xs : array1)
+    {
+        for (int j = 0; const auto& x : xs)
+        {
+            EXPECT_EQ(x, i * 20 + j);
+            ++j;
+        }
+        ++i;
+    }
+
+    zip_test_init<<<10, 20>>>(array2, 1000);
+    kernel_zip<<<10, 20>>>(array1, array2);
+    api::gpuDeviceSynchronize();
+    for (int i = 0; const auto& xs : array1)
+    {
+        for (int j = 0; const auto& x : xs)
+        {
+            EXPECT_EQ(x, (i * 20 + j) * 1001);
+            ++j;
+        }
+        ++i;
+    }
+}
 #endif
 // NOLINTEND

From 9f37ae3c3b3fd8e293bc0ef16ee453891eb14b76 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Tue, 3 Mar 2026 23:28:43 +0900
Subject: [PATCH 15/19] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Use=20custom=20tuple?=
 =?UTF-8?q?=20instead=20of=20std::pair=20in=20enumerate=5Fview?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index 87c1f68..3e11d19 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -73,7 +73,7 @@ namespace gpu_array::detail
     template <class... Ts>
     struct tuple
     {
-        __host__ __device__ tuple()
+        tuple()
         requires (std::default_initializable<Ts> && ...)
         = default;
         __host__ __device__ tuple(Ts... ts) : base_{std::forward<Ts>(ts)...} {}
@@ -125,24 +125,19 @@ template <std::size_t I, class... Ts>
 struct std::tuple_element<I, gpu_array::detail::tuple<Ts...>> : std::tuple_element<I, std::tuple<Ts...>>
 {
 };
-#if !defined(__cpp_lib_tuple_like) || __cpp_lib_tuple_like < 202207L
-template <class T1, class T2, class U1, class U2, template <class> class TQual, template <class> class UQual>
-requires requires {
-    typename std::pair<std::common_reference_t<TQual<T1>, UQual<U1>>, std::common_reference_t<TQual<T2>, UQual<U2>>>;
-}
-struct std::basic_common_reference<std::pair<T1, T2>, std::pair<U1, U2>, TQual, UQual>
+template <class... TTypes, class... UTypes>
+requires requires { typename gpu_array::detail::tuple<std::common_type_t<TTypes, UTypes>...>; }
+struct std::common_type<gpu_array::detail::tuple<TTypes...>, gpu_array::detail::tuple<UTypes...>>
 {
-    using type =
-        std::pair<std::common_reference_t<TQual<T1>, UQual<U1>>, std::common_reference_t<TQual<T2>, UQual<U2>>>;
+    using type = gpu_array::detail::tuple<std::common_type_t<TTypes, UTypes>...>;
 };
-
-template <class T1, class T2, class U1, class U2>
-requires requires { typename std::pair<std::common_type_t<T1, U1>, std::common_type_t<T2, U2>>; }
-struct std::common_type<std::pair<T1, T2>, std::pair<U1, U2>>
+template <class... TTypes, class... UTypes, template <class> class TQual, template <class> class UQual>
+requires requires { typename gpu_array::detail::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>; }
+struct std::basic_common_reference<gpu_array::detail::tuple<TTypes...>, gpu_array::detail::tuple<UTypes...>, TQual,
+                                   UQual>
 {
-    using type = std::pair<std::common_type_t<T1, U1>, std::common_type_t<T2, U2>>;
+    using type = gpu_array::detail::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>;
 };
-#endif
 
 namespace gpu_array
 {
@@ -3094,13 +3089,13 @@ namespace gpu_array
         {
         public:
             using iterator_category = std::random_access_iterator_tag;
-            using value_type = std::pair<std::ranges::range_size_t<Range>, std::ranges::range_value_t<Range>>;
+            using value_type = detail::tuple<std::ranges::range_size_t<Range>, std::ranges::range_value_t<Range>>;
             using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
 
             enumerate_iterator() = default;
             __host__ __device__ explicit enumerate_iterator(Range& r) noexcept : pointer_(&r), index_(0) {}
             __host__ __device__ std::ranges::range_size_t<Range> index() const noexcept { return index_; }
-            __host__ __device__ std::pair<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
+            __host__ __device__ detail::tuple<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
             operator*() const noexcept
             {
                 return {index_, (*pointer_)[index_]};
@@ -3137,7 +3132,7 @@ namespace gpu_array
                 index_ -= n;
                 return *this;
             }
-            __host__ __device__ std::pair<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
+            __host__ __device__ detail::tuple<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
             operator[](difference_type n) const
             {
                 return *(*this + n);
@@ -3185,8 +3180,8 @@ namespace gpu_array
                 return x.index() - y.index();
             }
 
-            __host__ __device__ friend std::pair<std::ranges::range_size_t<Range>,
-                                                 std::ranges::range_rvalue_reference_t<Range>>
+            __host__ __device__ friend detail::tuple<std::ranges::range_size_t<Range>,
+                                                     std::ranges::range_rvalue_reference_t<Range>>
             iter_move(const enumerate_iterator& x)
             {
                 return {x.index(), std::move(x->second)};

From 978b72d2f49333b5beceffaa28fda2fbff8b89b7 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Wed, 4 Mar 2026 00:03:10 +0900
Subject: [PATCH 16/19] =?UTF-8?q?=E2=9C=85=20Add=20tests=20combining=20enu?=
 =?UTF-8?q?merate=20and=20zip?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp |  9 +++-----
 test/test.cpp         | 52 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index 3e11d19..4702637 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -3385,12 +3385,9 @@ namespace gpu_array
 
             __host__ __device__ friend auto iter_move(const zip_iterator& x)
             {
-                return detail::apply(
-                    [&x](auto&... pointers) {
-                        return detail::tuple<std::ranges::range_rvalue_reference_t<Ranges>...>(
-                            std::move((*pointers)[x.index()])...);
-                    },
-                    x.pointers_);
+                using Tuple = detail::tuple<std::ranges::range_rvalue_reference_t<Ranges>...>;
+                return detail::apply([&x](auto&... pointers) { return Tuple(std::move((*pointers)[x.index()])...); },
+                                     x.pointers_);
             }
 
         private:
diff --git a/test/test.cpp b/test/test.cpp
index 940ee0f..1d83728 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2170,7 +2170,7 @@ __global__ void zip_test_init(T array, int coeff)
 template <std::ranges::input_range T, std::ranges::input_range U>
 requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
          std::ranges::input_range<std::ranges::range_value_t<U>>
-__global__ void kernel_zip(T array1, U array2)
+__global__ void kernel_zip(T array1, const U array2)
 {
     for (auto&& [xs, ys] : zip_view(array1, array2) | views::grid_block_stride)
         for (auto&& [x, y] : zip_view(xs, ys) | views::block_thread_stride) x = x + y;
@@ -2206,5 +2206,55 @@ TEST(ZipView, Simple)
         ++i;
     }
 }
+
+template <std::ranges::input_range Ts, std::ranges::input_range Us>
+__global__ void kernel_zip_enumerate(Ts ts, const Us us)
+{
+    for (auto&& [i, zipped] : zip_view(ts, us) | views::enumerate | views::grid_thread_stride)
+    {
+        auto&& [t, u] = zipped;
+        t = t * 100 + u * (i + 1);
+    }
+}
+
+TEST(ZipView, WithEnumerate)
+{
+    auto vec1 = std::vector<int>{19, 70, 86, 69};
+    auto vec2 = std::vector<int>{16, 6, 14, 17};
+    auto array1 = managed_array(vec1);
+    auto array2 = managed_array(vec2);
+    kernel_zip_enumerate<<<1, 2>>>(array1, array2);
+    api::gpuDeviceSynchronize();
+    for (int i = 0; const auto& t : array1)
+    {
+        EXPECT_EQ(t, vec1[i] * 100 + vec2[i] * (i + 1));
+        ++i;
+    }
+}
+
+template <std::ranges::input_range Ts, std::ranges::input_range Us>
+__global__ void kernel_enumerate_zip(Ts ts, const Us us)
+{
+    for (auto&& [enumerated, u] : zip_view(enumerate_view(ts), us) | views::grid_thread_stride)
+    {
+        auto&& [i, t] = enumerated;
+        t = t * 100 + u * (i + 1);
+    }
+}
+
+TEST(EnumerateView, WithZip)
+{
+    auto vec1 = std::vector<int>{19, 70, 86, 69};
+    auto vec2 = std::vector<int>{16, 6, 14, 17};
+    auto array1 = managed_array(vec1);
+    auto array2 = managed_array(vec2);
+    kernel_enumerate_zip<<<1, 2>>>(array1, array2);
+    api::gpuDeviceSynchronize();
+    for (int i = 0; const auto& t : array1)
+    {
+        EXPECT_EQ(t, vec1[i] * 100 + vec2[i] * (i + 1));
+        ++i;
+    }
+}
 #endif
 // NOLINTEND

From 48e57b4857cfa8bab78f3f151c86b4f7f941dbb3 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Wed, 4 Mar 2026 12:20:24 +0900
Subject: [PATCH 17/19] =?UTF-8?q?=F0=9F=9A=9A=20Rename=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test.cpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/test/test.cpp b/test/test.cpp
index 1d83728..8447e6b 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2131,18 +2131,18 @@ static_assert(std::ranges::view<enumerate_view<managed_array<int>>>);
 
 template <std::ranges::input_range T>
 requires std::ranges::input_range<std::ranges::range_value_t<T>>
-__global__ void kernel_enumerate(T array)
+__global__ void kernel_enumerate_stride(T array)
 {
     for (auto&& [i, xs] : enumerate_view(array) | views::grid_block_stride)
         for (auto&& [j, x] : enumerate_view(xs) | views::block_thread_stride) x = i * 100 + j;
 }
 
-TEST(EnumerateView, Simple)
+TEST(EnumerateView, WithStride)
 {
     auto vec_vec = std::vector(32, std::vector<int>(64, 0));
     auto nested_array = managed_array(vec_vec);
 
-    kernel_enumerate<<<32, 64>>>(nested_array);
+    kernel_enumerate_stride<<<32, 64>>>(nested_array);
     api::gpuDeviceSynchronize();
     for (int i = 0; const auto& xs : nested_array)
     {
@@ -2170,13 +2170,13 @@ __global__ void zip_test_init(T array, int coeff)
 template <std::ranges::input_range T, std::ranges::input_range U>
 requires std::ranges::input_range<std::ranges::range_value_t<T>> &&
          std::ranges::input_range<std::ranges::range_value_t<U>>
-__global__ void kernel_zip(T array1, const U array2)
+__global__ void kernel_zip_stride(T array1, const U array2)
 {
     for (auto&& [xs, ys] : zip_view(array1, array2) | views::grid_block_stride)
         for (auto&& [x, y] : zip_view(xs, ys) | views::block_thread_stride) x = x + y;
 }
 
-TEST(ZipView, Simple)
+TEST(ZipView, WithStride)
 {
     auto vec_vec = std::vector(10, std::vector<int>(20, 0));
     auto array1 = managed_array(vec_vec);
@@ -2194,7 +2194,7 @@ TEST(ZipView, Simple)
     }
 
     zip_test_init<<<10, 20>>>(array2, 1000);
-    kernel_zip<<<10, 20>>>(array1, array2);
+    kernel_zip_stride<<<10, 20>>>(array1, array2);
     api::gpuDeviceSynchronize();
     for (int i = 0; const auto& xs : array1)
     {
@@ -2208,7 +2208,7 @@ TEST(ZipView, Simple)
 }
 
 template <std::ranges::input_range Ts, std::ranges::input_range Us>
-__global__ void kernel_zip_enumerate(Ts ts, const Us us)
+__global__ void kernel_zip_enumerate_stride(Ts ts, const Us us)
 {
     for (auto&& [i, zipped] : zip_view(ts, us) | views::enumerate | views::grid_thread_stride)
     {
@@ -2217,13 +2217,13 @@ __global__ void kernel_zip_enumerate(Ts ts, const Us us)
     }
 }
 
-TEST(ZipView, WithEnumerate)
+TEST(ZipView, WithEnumerateStride)
 {
     auto vec1 = std::vector<int>{19, 70, 86, 69};
     auto vec2 = std::vector<int>{16, 6, 14, 17};
     auto array1 = managed_array(vec1);
     auto array2 = managed_array(vec2);
-    kernel_zip_enumerate<<<1, 2>>>(array1, array2);
+    kernel_zip_enumerate_stride<<<1, 2>>>(array1, array2);
     api::gpuDeviceSynchronize();
     for (int i = 0; const auto& t : array1)
     {
@@ -2233,7 +2233,7 @@ TEST(ZipView, WithEnumerate)
 }
 
 template <std::ranges::input_range Ts, std::ranges::input_range Us>
-__global__ void kernel_enumerate_zip(Ts ts, const Us us)
+__global__ void kernel_enumerate_zip_stride(Ts ts, const Us us)
 {
     for (auto&& [enumerated, u] : zip_view(enumerate_view(ts), us) | views::grid_thread_stride)
     {
@@ -2242,13 +2242,13 @@ __global__ void kernel_enumerate_zip(Ts ts, const Us us)
     }
 }
 
-TEST(EnumerateView, WithZip)
+TEST(EnumerateView, WithZipStride)
 {
     auto vec1 = std::vector<int>{19, 70, 86, 69};
     auto vec2 = std::vector<int>{16, 6, 14, 17};
     auto array1 = managed_array(vec1);
     auto array2 = managed_array(vec2);
-    kernel_enumerate_zip<<<1, 2>>>(array1, array2);
+    kernel_enumerate_zip_stride<<<1, 2>>>(array1, array2);
     api::gpuDeviceSynchronize();
     for (int i = 0; const auto& t : array1)
     {

From f6c576b4e7d3ff2bec770e8ed42c2632b80c0b93 Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Wed, 4 Mar 2026 12:29:45 +0900
Subject: [PATCH 18/19] =?UTF-8?q?=E2=9C=85=20Add=20test=20for=20enumerate?=
 =?UTF-8?q?=20and=20zip?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test.cpp | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/test/test.cpp b/test/test.cpp
index 8447e6b..bd81477 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -2129,6 +2129,35 @@ static_assert(detail::RandomAccessRange<enumerate_view<managed_array<int>>>);
 static_assert(std::ranges::sized_range<enumerate_view<managed_array<int>>>);
 static_assert(std::ranges::view<enumerate_view<managed_array<int>>>);
 
+template <std::ranges::range Ts, std::ranges::range Uss>
+requires std::ranges::input_range<std::ranges::range_value_t<Uss>>
+__global__ void kernel_enumerate(const Ts ts, Uss uss)
+{
+    auto i = cooperative_groups::this_thread_block().thread_rank();
+    for (auto&& [j, t] : enumerate_view(ts))
+    {
+        uss[i][j] = t * (j + 1);
+    }
+}
+
+TEST(EnumerateView, Simple)
+{
+    auto vec1 = std::vector<int>{16, 6, 14, 17};
+    auto vec2 = std::vector(10, std::vector<int>(4, 0));
+    auto array1 = managed_array(vec1);
+    auto array2 = managed_array(vec2);
+    kernel_enumerate<<<1, 10>>>(array1, array2);
+    api::gpuDeviceSynchronize();
+    for (const auto& us : array2)
+    {
+        for (int j = 0; const auto& u : us)
+        {
+            EXPECT_EQ(u, vec1[j] * (j + 1));
+            ++j;
+        }
+    }
+}
+
 template <std::ranges::input_range T>
 requires std::ranges::input_range<std::ranges::range_value_t<T>>
 __global__ void kernel_enumerate_stride(T array)
@@ -2159,6 +2188,30 @@ static_assert(detail::RandomAccessRange<zip_view<managed_array<int>>>);
 static_assert(std::ranges::sized_range<zip_view<managed_array<int>>>);
 static_assert(std::ranges::view<zip_view<managed_array<int>>>);
 
+template <std::ranges::range Ts, std::ranges::range Us>
+__global__ void kernel_zip(Ts ts, const Us us)
+{
+    for (auto&& [t, u] : zip_view(ts, us))
+    {
+        t = t + u;
+    }
+}
+
+TEST(ZipView, Simple)
+{
+    auto vec1 = std::vector<int>{19, 70, 86, 69};
+    auto vec2 = std::vector<int>{16, 6, 14, 17};
+    auto array1 = managed_array(vec1);
+    auto array2 = managed_array(vec2);
+    kernel_zip<<<1, 2>>>(array1, array2);
+    api::gpuDeviceSynchronize();
+    for (int i = 0; const auto& t : array1)
+    {
+        EXPECT_EQ(t, vec1[i] + vec2[i]);
+        ++i;
+    }
+}
+
 template <std::ranges::input_range T>
 requires std::ranges::input_range<std::ranges::range_value_t<T>>
 __global__ void zip_test_init(T array, int coeff)

From 5406178c0021b197192bc793992b3d80e19f612b Mon Sep 17 00:00:00 2001
From: acd1034 <48613285+acd1034@users.noreply.github.com>
Date: Thu, 5 Mar 2026 18:28:35 +0900
Subject: [PATCH 19/19] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Replace=20std::tuple?=
 =?UTF-8?q?=20with=20custom=20tuple?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/gpu_array.hpp | 536 +++++++++++++++++++++++++-----------------
 test/test.cpp         | 197 ++++++++--------
 2 files changed, 420 insertions(+), 313 deletions(-)

diff --git a/include/gpu_array.hpp b/include/gpu_array.hpp
index 4702637..24aabee 100644
--- a/include/gpu_array.hpp
+++ b/include/gpu_array.hpp
@@ -48,95 +48,195 @@
 #define SIGSEGV_DEPRECATED [[deprecated("Cannot access GPU memory directly")]]
 #endif
 
-namespace gpu_array::detail
+namespace gpu_array
 {
     // Custom implementation of tuple for device code
 
-    template <std::size_t I, class T>
-    struct tuple_leaf
+    namespace detail
     {
-        using type = T;
-        T value;
-    };
+        template <std::size_t I, class T>
+        struct tuple_leaf
+        {
+            tuple_leaf()
+            requires std::default_initializable<T>
+            = default;
+            template <class U>
+            __host__ __device__ tuple_leaf(U&& u) : value(std::forward<U>(u))
+            {
+            }
+            using type = T;
+            T value;
+        };
 
-    template <std::size_t I, class T>
-    tuple_leaf<I, T> at_index(const tuple_leaf<I, T>&);  // undefined
+        template <std::size_t I, class T>
+        tuple_leaf<I, T> at_index(const tuple_leaf<I, T>&);  // undefined
 
-    template <class Seq, class... Ts>
-    struct tuple_impl;
+        template <class Seq, class... Ts>
+        struct tuple_impl;
 
-    template <std::size_t... Is, class... Ts>
-    struct tuple_impl<std::index_sequence<Is...>, Ts...> : tuple_leaf<Is, Ts>...
-    {
-    };
+        template <std::size_t... Is, class... Ts>
+        struct tuple_impl<std::index_sequence<Is...>, Ts...> : tuple_leaf<Is, Ts>...
+        {
+            tuple_impl()
+            requires (std::default_initializable<Ts> && ...)
+            = default;
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts))
+            __host__ __device__ tuple_impl(Us&&... us) : tuple_leaf<Is, Ts>(std::forward<Us>(us))...
+            {
+            }
+        };
+
+        template <class... Ts>
+        struct tuple
+        {
+            tuple()
+            requires (std::default_initializable<Ts> && ...)
+            = default;
+
+            template <class... Us>
+            struct is_single_tuple : std::false_type
+            {
+            };
+            template <class... Us>
+            struct is_single_tuple<tuple<Us...>> : std::true_type
+            {
+            };
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts) && !is_single_tuple<std::remove_cvref_t<Us>...>::value)
+            __host__ __device__ tuple(Us&&... us) : base_(std::forward<Us>(us)...)
+            {
+            }
+
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts))
+            __host__ __device__ tuple(const tuple<Us...>& t) : tuple{tuple_convert(t, std::index_sequence_for<Ts...>{})}
+            {
+            }
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts))
+            __host__ __device__ tuple(tuple<Us...>&& t)
+                : tuple{tuple_convert(std::move(t), std::index_sequence_for<Ts...>{})}
+            {
+            }
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts))
+            __host__ __device__ auto& operator=(const tuple<Us...>& t)
+            {
+                *this = tuple_convert(t, std::index_sequence_for<Ts...>{});
+                return *this;
+            }
+            template <class... Us>
+            requires (sizeof...(Us) == sizeof...(Ts))
+            __host__ __device__ auto& operator=(tuple<Us...>&& t)
+            {
+                *this = tuple_convert(std::move(t), std::index_sequence_for<Ts...>{});
+                return *this;
+            }
+
+            template <std::size_t I, class... Us>
+            __host__ __device__ friend auto& get(tuple<Us...>&);
+            template <std::size_t I, class... Us>
+            __host__ __device__ friend const auto& get(const tuple<Us...>&);
+            template <std::size_t I, class... Us>
+            __host__ __device__ friend auto&& get(tuple<Us...>&&);
+            template <std::size_t I, class... Us>
+            __host__ __device__ friend const auto&& get(const tuple<Us...>&&);
+
+        private:
+            template <class... Us, std::size_t... Is>
+            static auto tuple_convert(const tuple<Us...>& t, std::index_sequence<Is...>)
+            {
+                return tuple<Ts...>{get<Is>(t)...};
+            }
+            template <class... Us, std::size_t... Is>
+            static auto tuple_convert(tuple<Us...>&& t, std::index_sequence<Is...>)
+            {
+                return tuple<Ts...>{std::move(get<Is>(t))...};
+            }
+
+            using base = tuple_impl<std::index_sequence_for<Ts...>, Ts...>;
+            base base_;
+        };
+
+        template <class... Ts>
+        tuple(Ts...) -> tuple<Ts...>;
 
-    template <class... Ts>
-    struct tuple
-    {
-        tuple()
-        requires (std::default_initializable<Ts> && ...)
-        = default;
-        __host__ __device__ tuple(Ts... ts) : base_{std::forward<Ts>(ts)...} {}
         template <std::size_t I, class... Us>
-        __host__ __device__ friend auto& get(detail::tuple<Us...>&);
+        __host__ __device__ auto& get(tuple<Us...>& t)
+        {
+            using leaf = decltype(at_index<I>(t.base_));
+            return static_cast<leaf&>(t.base_).value;
+        }
         template <std::size_t I, class... Us>
-        __host__ __device__ friend const auto& get(const detail::tuple<Us...>&);
+        __host__ __device__ const auto& get(const tuple<Us...>& t)
+        {
+            using leaf = decltype(at_index<I>(t.base_));
+            return static_cast<const leaf&>(t.base_).value;
+        }
         template <std::size_t I, class... Us>
-        __host__ __device__ friend auto&& get(detail::tuple<Us...>&&);
+        __host__ __device__ auto&& get(tuple<Us...>&& t)
+        {
+            using leaf = decltype(at_index<I>(t.base_));
+            return static_cast<typename leaf::type&&>(static_cast<leaf&>(t.base_).value);
+        }
         template <std::size_t I, class... Us>
-        __host__ __device__ friend const auto&& get(const detail::tuple<Us...>&&);
+        __host__ __device__ const auto&& get(const tuple<Us...>&& t)
+        {
+            using leaf = decltype(at_index<I>(t.base_));
+            return static_cast<const typename leaf::type&&>(static_cast<const leaf&>(t.base_).value);
+        }
 
-    private:
-        using base = tuple_impl<std::index_sequence_for<Ts...>, Ts...>;
-        base base_;
-    };
+        template <class... Ts, class... Us, std::size_t... Is>
+        __host__ __device__ bool tuple_equal_impl(const tuple<Ts...>& t, const tuple<Us...>& u,
+                                                  std::index_sequence<Is...>)
+        {
+            return ((get<Is>(t) == get<Is>(u)) && ...);
+        }
+        template <class... Ts, class... Us>
+        requires (sizeof...(Ts) == sizeof...(Us))
+        __host__ __device__ bool operator==(const tuple<Ts...>& t, const tuple<Us...>& u)
+        {
+            return tuple_equal_impl(t, u, std::index_sequence_for<Ts...>{});
+        }
 
-    template <std::size_t I, class... Us>
-    __host__ __device__ auto& get(detail::tuple<Us...>& t)
-    {
-        using leaf = decltype(at_index<I>(t.base_));
-        return static_cast<leaf&>(t.base_).value;
-    }
-    template <std::size_t I, class... Us>
-    __host__ __device__ const auto& get(const detail::tuple<Us...>& t)
-    {
-        using leaf = decltype(at_index<I>(t.base_));
-        return static_cast<const leaf&>(t.base_).value;
-    }
-    template <std::size_t I, class... Us>
-    __host__ __device__ auto&& get(detail::tuple<Us...>&& t)
-    {
-        using leaf = decltype(at_index<I>(t.base_));
-        return static_cast<typename leaf::type&&>(static_cast<leaf&>(t.base_).value);
-    }
-    template <std::size_t I, class... Us>
-    __host__ __device__ const auto&& get(const detail::tuple<Us...>&& t)
-    {
-        using leaf = decltype(at_index<I>(t.base_));
-        return static_cast<const typename leaf::type&&>(static_cast<const leaf&>(t.base_).value);
-    }
-}  // namespace gpu_array::detail
+        template <class F, class Tuple, std::size_t... Is>
+        requires requires { std::declval<F>()(get<Is>(std::declval<Tuple>())...); }
+        __host__ __device__ decltype(auto) apply_impl(F&& f, Tuple&& t, std::index_sequence<Is...>)
+        {
+            return std::forward<F>(f)(get<Is>(std::forward<Tuple>(t))...);
+        }
+        template <class F, class Tuple>
+        requires requires { std::tuple_size_v<std::remove_reference_t<Tuple>>; }
+        __host__ __device__ decltype(auto) apply(F&& f, Tuple&& t)
+        {
+            return apply_impl(std::forward<F>(f), std::forward<Tuple>(t),
+                              std::make_index_sequence<std::tuple_size_v<std::remove_reference_t<Tuple>>>{});
+        }
+    }  // namespace detail
+
+    using detail::tuple, detail::get, detail::apply;
+}  // namespace gpu_array
 
 template <class... Ts>
-struct std::tuple_size<gpu_array::detail::tuple<Ts...>> : std::integral_constant<std::size_t, sizeof...(Ts)>
+struct std::tuple_size<gpu_array::tuple<Ts...>> : std::integral_constant<std::size_t, sizeof...(Ts)>
 {
 };
 template <std::size_t I, class... Ts>
-struct std::tuple_element<I, gpu_array::detail::tuple<Ts...>> : std::tuple_element<I, std::tuple<Ts...>>
+struct std::tuple_element<I, gpu_array::tuple<Ts...>> : std::tuple_element<I, std::tuple<Ts...>>
 {
 };
 template <class... TTypes, class... UTypes>
-requires requires { typename gpu_array::detail::tuple<std::common_type_t<TTypes, UTypes>...>; }
-struct std::common_type<gpu_array::detail::tuple<TTypes...>, gpu_array::detail::tuple<UTypes...>>
+requires requires { typename gpu_array::tuple<std::common_type_t<TTypes, UTypes>...>; }
+struct std::common_type<gpu_array::tuple<TTypes...>, gpu_array::tuple<UTypes...>>
 {
-    using type = gpu_array::detail::tuple<std::common_type_t<TTypes, UTypes>...>;
+    using type = gpu_array::tuple<std::common_type_t<TTypes, UTypes>...>;
 };
 template <class... TTypes, class... UTypes, template <class> class TQual, template <class> class UQual>
-requires requires { typename gpu_array::detail::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>; }
-struct std::basic_common_reference<gpu_array::detail::tuple<TTypes...>, gpu_array::detail::tuple<UTypes...>, TQual,
-                                   UQual>
+requires requires { typename gpu_array::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>; }
+struct std::basic_common_reference<gpu_array::tuple<TTypes...>, gpu_array::tuple<UTypes...>, TQual, UQual>
 {
-    using type = gpu_array::detail::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>;
+    using type = gpu_array::tuple<std::common_reference_t<TQual<TTypes>, UQual<UTypes>>...>;
 };
 
 namespace gpu_array
@@ -211,11 +311,11 @@ namespace gpu_array
 
         protected:
             size_type size_ = 0U;
-            std::tuple<ValueTypes*...> data_;
+            gpu_array::tuple<ValueTypes*...> data_;
             std::uint32_t* ref_count_ = nullptr;  // reference counter, not used on GPU
 
             template <std::size_t N>
-            using element_type = std::tuple_element_t<N, std::tuple<ValueTypes...>>;
+            using element_type = std::tuple_element_t<N, gpu_array::tuple<ValueTypes...>>;
 
             __host__ __device__ void init()
             {
@@ -346,11 +446,11 @@ namespace gpu_array
 
             __host__ __device__ void tuple_for_each(auto&& f) const
             {
-                std::apply([&f](const auto&... args) { (f(args), ...); }, data_);
+                gpu_array::apply([&f](const auto&... args) { (f(args), ...); }, data_);
             }
             __host__ __device__ void tuple_for_each(auto&& f)
             {
-                std::apply([&f](auto&... args) { (f(args), ...); }, data_);
+                gpu_array::apply([&f](auto&... args) { (f(args), ...); }, data_);
             }
         };
 
@@ -518,8 +618,8 @@ namespace gpu_array
         {
             return *(data() + base::size_ - 1);
         }
-        __host__ __device__ pointer data() noexcept { return std::get<0>(base::data_); }
-        __host__ __device__ const_pointer data() const noexcept { return std::get<0>(base::data_); }
+        __host__ __device__ pointer data() noexcept { return gpu_array::get<0>(base::data_); }
+        __host__ __device__ const_pointer data() const noexcept { return gpu_array::get<0>(base::data_); }
 
         array() = default;
         __host__ __device__ array(const array& r) : base(r) {}
@@ -529,8 +629,8 @@ namespace gpu_array
         {
             if (base::size_ == 0) return;
             auto buf = std::make_unique<value_type[]>(base::size_);
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(data(), buf.get(), sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
         }
@@ -538,8 +638,8 @@ namespace gpu_array
         __host__ array(std::size_t size, default_init_tag) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             if constexpr (!std::is_trivially_default_constructible_v<value_type>)
             {
@@ -556,8 +656,8 @@ namespace gpu_array
             auto al = std::allocator<value_type>();
             auto buf = al.allocate(base::size_);
             std::ranges::uninitialized_fill(buf, buf + base::size_, value);
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(data(), buf, sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
             al.deallocate(buf, base::size_);
@@ -572,8 +672,8 @@ namespace gpu_array
         {
             if (base::size_ == 0) return;
 
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(
                 api::gpuMemcpy(data(), std::ranges::data(r), sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
@@ -590,8 +690,8 @@ namespace gpu_array
             auto buf = al.allocate(base::size_);
             for (auto i = std::size_t{0}; const auto& v : r) std::ranges::construct_at(buf + i++, v);
 
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(data(), buf, sizeof(value_type) * base::size_, gpuMemcpyHostToDevice));
             al.deallocate(buf, base::size_);
@@ -600,8 +700,8 @@ namespace gpu_array
         __host__ array(std::initializer_list<value_type> r) : base(std::ranges::size(r))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(
-                api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type) * base::size_));
+            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
+                                           sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             GPU_CHECK_ERROR(
                 api::gpuMemcpy(data(), std::ranges::data(r), sizeof(ValueType) * base::size_, gpuMemcpyHostToDevice));
@@ -801,7 +901,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected device memory pointer");
             }
-            std::get<0>(base::data_) = ptr;
+            gpu_array::get<0>(base::data_) = ptr;
         }
 #endif
 #if defined(GPU_OVERLOAD_DEVICE)
@@ -847,7 +947,7 @@ namespace gpu_array
                                    [](auto acc, const auto& r) { return acc + std::ranges::size(r); }))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(ValueType) * base::size_));
             assert(data() != nullptr);
 
@@ -903,8 +1003,8 @@ namespace gpu_array
         __host__ __device__ const_reference front() const noexcept { return *begin(); }
         __host__ __device__ reference back() noexcept { return *(data() + base::size_ - 1); }
         __host__ __device__ const_reference back() const noexcept { return *(data() + base::size_ - 1); }
-        __host__ __device__ pointer data() noexcept { return std::get<0>(base::data_); }
-        __host__ __device__ const_pointer data() const noexcept { return std::get<0>(base::data_); }
+        __host__ __device__ pointer data() noexcept { return gpu_array::get<0>(base::data_); }
+        __host__ __device__ const_pointer data() const noexcept { return gpu_array::get<0>(base::data_); }
 
         managed_array() = default;
         __host__ __device__ managed_array(const managed_array& r) : base(r) {}
@@ -913,7 +1013,7 @@ namespace gpu_array
         __host__ explicit managed_array(std::size_t size) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             std::ranges::uninitialized_value_construct(*this);
@@ -922,7 +1022,7 @@ namespace gpu_array
         __host__ explicit managed_array(std::size_t size, default_init_tag) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             std::ranges::uninitialized_default_construct(*this);
@@ -931,7 +1031,7 @@ namespace gpu_array
         __host__ managed_array(std::size_t size, const value_type& value) : base(size)
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             std::ranges::uninitialized_fill(*this, value);
@@ -942,7 +1042,7 @@ namespace gpu_array
         __host__ explicit managed_array(const T& r) : base(std::ranges::size(r))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
 
@@ -955,7 +1055,7 @@ namespace gpu_array
         __host__ managed_array(std::initializer_list<value_type> r) : base(std::ranges::size(r))
         {
             if (base::size_ == 0) return;
-            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)),
+            GPU_CHECK_ERROR(api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)),
                                                   sizeof(value_type) * base::size_));
             assert(data() != nullptr);
             for (auto i = std::size_t{0}; const auto& v : r) std::ranges::construct_at(data() + i++, v);
@@ -1161,7 +1261,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected managed memory pointer");
             }
-            std::get<0>(base::data_) = ptr;
+            gpu_array::get<0>(base::data_) = ptr;
         }
 #endif
 #if defined(GPU_OVERLOAD_DEVICE)
@@ -1216,7 +1316,8 @@ namespace gpu_array
 
         __host__ explicit value(default_init_tag) : base(1)
         {
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(
+                api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             if constexpr (!std::is_trivially_default_constructible_v<value_type>)
             {
@@ -1227,7 +1328,8 @@ namespace gpu_array
 
         __host__ explicit value(const value_type& r) : base(1)
         {
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(
+                api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(get(), &r, sizeof(value_type), gpuMemcpyHostToDevice));
         }
@@ -1237,7 +1339,8 @@ namespace gpu_array
         __host__ explicit value(Args&&... args) : base(1)
         {
             auto temp = value_type(std::forward<Args>(args)...);
-            GPU_CHECK_ERROR(api::gpuMalloc(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(value_type)));
+            GPU_CHECK_ERROR(
+                api::gpuMalloc(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(value_type)));
             assert(get() != nullptr);
             GPU_CHECK_ERROR(api::gpuMemcpy(get(), &temp, sizeof(value_type), gpuMemcpyHostToDevice));
         }
@@ -1253,7 +1356,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected device memory pointer");
             }
-            std::get<0>(base::data_) = ptr;
+            gpu_array::get<0>(base::data_) = ptr;
         }
 #else
             : base(ptr, ptr == nullptr ? 0 : 1)
@@ -1272,8 +1375,11 @@ namespace gpu_array
             return *this;
         }
 
-        __host__ __device__ pointer get() const noexcept { return std::get<0>(base::data_); }
-        __host__ __device__ explicit operator bool() const noexcept { return std::get<0>(base::data_) != nullptr; }
+        __host__ __device__ pointer get() const noexcept { return gpu_array::get<0>(base::data_); }
+        __host__ __device__ explicit operator bool() const noexcept
+        {
+            return gpu_array::get<0>(base::data_) != nullptr;
+        }
 
 #if defined(GPU_OVERLOAD_DEVICE)
         __device__ reference operator*() const noexcept
@@ -1343,7 +1449,7 @@ namespace gpu_array
         __host__ explicit managed_value(default_init_tag) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::uninitialized_default_construct_n(get(), 1);
         }
@@ -1351,7 +1457,7 @@ namespace gpu_array
         __host__ explicit managed_value(const ValueType& r) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::construct_at(get(), r);
         }
@@ -1359,7 +1465,7 @@ namespace gpu_array
         __host__ explicit managed_value(ValueType&& r) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::construct_at(get(), std::move(r));
         }
@@ -1369,7 +1475,7 @@ namespace gpu_array
         __host__ explicit managed_value(Args&&... args) : base(1)
         {
             GPU_CHECK_ERROR(
-                api::gpuMallocManaged(reinterpret_cast<void**>(&std::get<0>(base::data_)), sizeof(ValueType)));
+                api::gpuMallocManaged(reinterpret_cast<void**>(&gpu_array::get<0>(base::data_)), sizeof(ValueType)));
             assert(get() != nullptr);
             std::ranges::construct_at(get(), std::forward<Args>(args)...);
         }
@@ -1385,7 +1491,7 @@ namespace gpu_array
             {
                 throw std::runtime_error("pointer type mismatch: expected managed memory pointer");
             }
-            std::get<0>(base::data_) = ptr;
+            gpu_array::get<0>(base::data_) = ptr;
         }
 #else
             : base(ptr, ptr == nullptr ? 0 : 1)
@@ -1415,8 +1521,11 @@ namespace gpu_array
             return get();
         }
 
-        __host__ __device__ pointer get() const noexcept { return std::get<0>(base::data_); }
-        __host__ __device__ explicit operator bool() const noexcept { return std::get<0>(base::data_) != nullptr; }
+        __host__ __device__ pointer get() const noexcept { return gpu_array::get<0>(base::data_); }
+        __host__ __device__ explicit operator bool() const noexcept
+        {
+            return gpu_array::get<0>(base::data_) != nullptr;
+        }
 
         __host__ void prefetch(int device_id, api::gpuStream_t stream = 0, bool recursive = true) const
         {
@@ -1467,10 +1576,10 @@ namespace gpu_array
         template <std::size_t N, typename Tuple, typename... Ts>
         constexpr bool assignable_to_tuple_helper_n()
         {
-            return requires(const Tuple& t1, std::tuple<Ts...>& t2) {
-                std::get<N>(t1);
-                std::get<N>(t2);
-                requires std::assignable_from<decltype(std::get<N>(t2)), decltype(std::get<N>(t1))>;
+            return requires(const Tuple& t1, gpu_array::tuple<Ts...>& t2) {
+                gpu_array::get<N>(t1);
+                gpu_array::get<N>(t2);
+                requires std::assignable_from<decltype(gpu_array::get<N>(t2)), decltype(gpu_array::get<N>(t1))>;
             };
         }
         template <typename Tuple, typename... Ts>
@@ -1487,7 +1596,7 @@ namespace gpu_array
     template <template <typename...> typename Tuple, typename... Ts>
     class structure_of_arrays_iterator
     {
-        std::tuple<Ts*...> ptrs_;
+        gpu_array::tuple<Ts*...> ptrs_;
 
     public:
         using difference_type = std::ptrdiff_t;
@@ -1502,15 +1611,15 @@ namespace gpu_array
         structure_of_arrays_iterator& operator=(const structure_of_arrays_iterator&) = default;
         structure_of_arrays_iterator& operator=(structure_of_arrays_iterator&&) noexcept = default;
 
-        __host__ __device__ explicit structure_of_arrays_iterator(std::tuple<Ts*...> ptrs) : ptrs_(ptrs) {}
+        __host__ __device__ explicit structure_of_arrays_iterator(gpu_array::tuple<Ts*...> ptrs) : ptrs_(ptrs) {}
 
         __host__ __device__ Tuple<Ts&...> operator*() const
         {
-            return std::apply([](auto*... ptrs) { return Tuple<Ts&...>(*ptrs...); }, ptrs_);
+            return gpu_array::apply([](auto*... ptrs) { return Tuple<Ts&...>(*ptrs...); }, ptrs_);
         }
         __host__ __device__ Tuple<Ts&...> operator[](size_type n) const
         {
-            return std::apply([n](auto*... ptrs) { return Tuple<Ts&...>(ptrs[n]...); }, ptrs_);
+            return gpu_array::apply([n](auto*... ptrs) { return Tuple<Ts&...>(ptrs[n]...); }, ptrs_);
         }
         __host__ __device__ auto operator->() const
         {
@@ -1523,7 +1632,7 @@ namespace gpu_array
         }
         __host__ __device__ structure_of_arrays_iterator& operator++()
         {
-            std::apply([](auto*&... ptrs) { (++ptrs, ...); }, ptrs_);
+            gpu_array::apply([](auto*&... ptrs) { (++ptrs, ...); }, ptrs_);
             return *this;
         }
         __host__ __device__ structure_of_arrays_iterator operator++(int)
@@ -1534,12 +1643,12 @@ namespace gpu_array
         }
         __host__ __device__ structure_of_arrays_iterator& operator+=(difference_type n)
         {
-            std::apply([n](auto*&... ptrs) { ((ptrs += n), ...); }, ptrs_);
+            gpu_array::apply([n](auto*&... ptrs) { ((ptrs += n), ...); }, ptrs_);
             return *this;
         }
         __host__ __device__ structure_of_arrays_iterator& operator--()
         {
-            std::apply([](auto*&... ptrs) { (--ptrs, ...); }, ptrs_);
+            gpu_array::apply([](auto*&... ptrs) { (--ptrs, ...); }, ptrs_);
             return *this;
         }
         __host__ __device__ structure_of_arrays_iterator operator--(int)
@@ -1550,20 +1659,20 @@ namespace gpu_array
         }
         __host__ __device__ structure_of_arrays_iterator& operator-=(difference_type n)
         {
-            std::apply([n](auto*&... ptrs) { ((ptrs -= n), ...); }, ptrs_);
+            gpu_array::apply([n](auto*&... ptrs) { ((ptrs -= n), ...); }, ptrs_);
             return *this;
         }
 
         __host__ __device__ friend difference_type operator-(const structure_of_arrays_iterator& lhs,
                                                              const structure_of_arrays_iterator& rhs)
         {
-            return std::get<0>(lhs.ptrs_) - std::get<0>(rhs.ptrs_);
+            return gpu_array::get<0>(lhs.ptrs_) - gpu_array::get<0>(rhs.ptrs_);
         }
         __host__ __device__ friend structure_of_arrays_iterator operator+(const structure_of_arrays_iterator& lhs,
                                                                           difference_type n)
         {
             return structure_of_arrays_iterator(
-                std::apply([n](auto*... ptrs) { return std::tuple{ptrs + n...}; }, lhs.ptrs_));
+                gpu_array::apply([n](auto*... ptrs) { return gpu_array::tuple{ptrs + n...}; }, lhs.ptrs_));
         }
         __host__ __device__ friend structure_of_arrays_iterator operator+(structure_of_arrays_iterator&& lhs,
                                                                           difference_type n)
@@ -1585,7 +1694,7 @@ namespace gpu_array
                                                                           difference_type n)
         {
             return structure_of_arrays_iterator(
-                std::apply([n](auto*... ptrs) { return std::tuple{ptrs - n...}; }, lhs.ptrs_));
+                gpu_array::apply([n](auto*... ptrs) { return gpu_array::tuple{ptrs - n...}; }, lhs.ptrs_));
         }
         __host__ __device__ friend structure_of_arrays_iterator operator-(structure_of_arrays_iterator&& lhs,
                                                                           difference_type n)
@@ -1597,16 +1706,16 @@ namespace gpu_array
         __host__ __device__ friend bool operator==(const structure_of_arrays_iterator& lhs,
                                                    const structure_of_arrays_iterator& rhs)
         {
-            return std::get<0>(lhs.ptrs_) == std::get<0>(rhs.ptrs_);
+            return gpu_array::get<0>(lhs.ptrs_) == gpu_array::get<0>(rhs.ptrs_);
         }
         __host__ __device__ friend std::strong_ordering operator<=>(const structure_of_arrays_iterator& lhs,
                                                                     const structure_of_arrays_iterator& rhs)
         {
-            return std::get<0>(lhs.ptrs_) <=> std::get<0>(rhs.ptrs_);
+            return gpu_array::get<0>(lhs.ptrs_) <=> gpu_array::get<0>(rhs.ptrs_);
         }
         __host__ __device__ friend auto iter_move(const structure_of_arrays_iterator& x)
         {
-            return std::apply(
+            return gpu_array::apply(
                 [](auto*... ptrs) {
                     using RetType = std::remove_cvref_t<decltype(x)>::value_type;
                     return RetType(std::move(*ptrs)...);
@@ -1618,15 +1727,15 @@ namespace gpu_array
         {
             constexpr std::size_t size = std::tuple_size_v<std::remove_cvref_t<decltype(lhs.ptrs_)>>;
             [&lhs, &rhs]<std::size_t... N>(std::index_sequence<N...>) {
-                (std::swap(*std::get<N>(lhs.ptrs_), *std::get<N>(rhs.ptrs_)), ...);
+                (std::swap(*gpu_array::get<N>(lhs.ptrs_), *gpu_array::get<N>(rhs.ptrs_)), ...);
             }(std::make_index_sequence<size>());
         }
     };
 
     template <typename... Ts>
-    class structure_of_arrays : public structure_of_arrays<std::tuple<Ts...>, size_type_default>
+    class structure_of_arrays : public structure_of_arrays<gpu_array::tuple<Ts...>, size_type_default>
     {
-        using base = structure_of_arrays<std::tuple<Ts...>, size_type_default>;
+        using base = structure_of_arrays<gpu_array::tuple<Ts...>, size_type_default>;
         using base::base;
 
     public:
@@ -1658,9 +1767,9 @@ namespace gpu_array
         static constexpr auto num_arrays = sizeof...(Ts);
         using base = detail::base<false, SizeType, Ts...>;
 
-        using tuple_value_type = std::tuple<Ts...>;
-        using tuple_pointer_type = std::tuple<Ts*...>;
-        using tuple_const_pointer_type = std::tuple<const Ts*...>;
+        using tuple_value_type = gpu_array::tuple<Ts...>;
+        using tuple_pointer_type = gpu_array::tuple<Ts*...>;
+        using tuple_const_pointer_type = gpu_array::tuple<const Ts*...>;
         using ret_tuple_value_type = Tuple<Ts...>;
         using ret_tuple_reference_type = Tuple<Ts&...>;
         using ret_tuple_const_reference_type = Tuple<const Ts&...>;
@@ -1675,18 +1784,18 @@ namespace gpu_array
         SIGSEGV_DEPRECATED __host__ __device__ auto begin() noexcept { return iterator_type(base::data_); }
         SIGSEGV_DEPRECATED __host__ __device__ auto end() noexcept
         {
-            return std::apply(
+            return gpu_array::apply(
                 [this](auto&... ptrs) { return iterator_type(tuple_pointer_type{(ptrs + base::size_)...}); },
                 base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto begin() const noexcept
         {
-            return std::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
-                              base::data_);
+            return gpu_array::apply(
+                [](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); }, base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto end() const noexcept
         {
-            return std::apply(
+            return gpu_array::apply(
                 [this](auto&... ptrs) {
                     return const_iterator_type(tuple_const_pointer_type{(ptrs + base::size_)...});
                 },
@@ -1695,28 +1804,29 @@ namespace gpu_array
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) &
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; },
+                                    base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) const&
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
-                              base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
+                                    base::data_);
         }
         SIGSEGV_DEPRECATED __host__ __device__ auto operator[](size_type i) &&
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
         }
         template <std::size_t N>
         __host__ __device__ auto* data() noexcept
         {
-            return std::get<N>(base::data_);
+            return gpu_array::get<N>(base::data_);
         }
         template <std::size_t N>
         __host__ __device__ const auto* data() const noexcept
         {
-            return std::get<N>(base::data_);
+            return gpu_array::get<N>(base::data_);
         }
 
         structure_of_arrays() = default;
@@ -1763,7 +1873,7 @@ namespace gpu_array
             };
 
             [this, &value, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), std::get<N>(value)), ...);
+                (alloc_ptr(gpu_array::get<N>(base::data_), gpu_array::get<N>(value)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -1782,8 +1892,8 @@ namespace gpu_array
             };
 
             [this, &array, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_),
-                           array | std::views::transform([](const auto& v) { return (std::get<N>(v)); })),
+                (alloc_ptr(gpu_array::get<N>(base::data_),
+                           array | std::views::transform([](const auto& v) { return (gpu_array::get<N>(v)); })),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -1801,15 +1911,15 @@ namespace gpu_array
             };
 
             [this, &list, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_),
-                           list | std::views::transform([](const auto& v) { return (std::get<N>(v)); })),
+                (alloc_ptr(gpu_array::get<N>(base::data_),
+                           list | std::views::transform([](const auto& v) { return (gpu_array::get<N>(v)); })),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
         template <detail::array_convertible_for_copy... Ranges>
         requires (sizeof...(Ranges) == num_arrays) &&
-                 detail::assignable_to_tuple<std::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
+                 detail::assignable_to_tuple<gpu_array::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
         __host__ explicit structure_of_arrays(const Ranges&... arrays) : base(std::max({std::ranges::size(arrays)...}))
         {
             if (base::size_ == 0) return;
@@ -1828,9 +1938,9 @@ namespace gpu_array
                 GPU_CHECK_ERROR(api::gpuMemcpy(ptr, buf.get(), sizeof(T) * base::size_, gpuMemcpyHostToDevice));
             };
 
-            auto arrays_tuple = std::tuple<const Ranges&...>(arrays...);
+            auto arrays_tuple = gpu_array::tuple<const Ranges&...>(arrays...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), std::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(gpu_array::get<N>(base::data_), gpu_array::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -1853,9 +1963,9 @@ namespace gpu_array
                 GPU_CHECK_ERROR(api::gpuMemcpy(ptr, buf.get(), sizeof(T) * base::size_, gpuMemcpyHostToDevice));
             };
 
-            auto arrays_tuple = std::tuple<std::initializer_list<Ts>...>(lists...);
+            auto arrays_tuple = gpu_array::tuple<std::initializer_list<Ts>...>(lists...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), std::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(gpu_array::get<N>(base::data_), gpu_array::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -1887,9 +1997,9 @@ namespace gpu_array
                 return buf;
             };
 
-            const auto tmp_bufs = std::apply(
+            const auto tmp_bufs = gpu_array::apply(
                 [copy_buffer](const auto*... ptrs) {
-                    return std::tuple<std::unique_ptr<Ts[]>...>(copy_buffer(ptrs)...);
+                    return gpu_array::tuple<std::unique_ptr<Ts[]>...>(copy_buffer(ptrs)...);
                 },
                 base::data_);
 
@@ -1905,7 +2015,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    std::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, tmp_bufs);
+                    gpu_array::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, tmp_bufs);
                 }
 
                 return result;
@@ -1917,7 +2027,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    std::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, tmp_bufs);
+                    gpu_array::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, tmp_bufs);
                 }
 
                 return result;
@@ -1943,7 +2053,7 @@ namespace gpu_array
             if (base::size_ > 0) assert(ptr != nullptr);
 
             // reset specified pointer only
-            std::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
+            gpu_array::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
         }
 
         template <std::size_t N, gpu_array_ptr T>
@@ -1963,9 +2073,9 @@ namespace gpu_array
     };
 
     template <typename... Ts>
-    class managed_structure_of_arrays : public managed_structure_of_arrays<std::tuple<Ts...>, size_type_default>
+    class managed_structure_of_arrays : public managed_structure_of_arrays<gpu_array::tuple<Ts...>, size_type_default>
     {
-        using base = managed_structure_of_arrays<std::tuple<Ts...>, size_type_default>;
+        using base = managed_structure_of_arrays<gpu_array::tuple<Ts...>, size_type_default>;
         using base::base;
 
     public:
@@ -1998,9 +2108,9 @@ namespace gpu_array
         static constexpr auto num_arrays = sizeof...(Ts);
         using base = detail::base<true, SizeType, Ts...>;
 
-        using tuple_value_type = std::tuple<Ts...>;
-        using tuple_pointer_type = std::tuple<Ts*...>;
-        using tuple_const_pointer_type = std::tuple<const Ts*...>;
+        using tuple_value_type = gpu_array::tuple<Ts...>;
+        using tuple_pointer_type = gpu_array::tuple<Ts*...>;
+        using tuple_const_pointer_type = gpu_array::tuple<const Ts*...>;
         using ret_tuple_value_type = Tuple<Ts...>;
         using ret_tuple_reference_type = Tuple<Ts&...>;
         using ret_tuple_const_reference_type = Tuple<const Ts&...>;
@@ -2035,7 +2145,8 @@ namespace gpu_array
             };
 
             [this, &alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), [](const auto& v) -> const auto& { return (std::get<N>(v)); }),
+                (alloc_ptr(gpu_array::get<N>(base::data_),
+                           [](const auto& v) -> const auto& { return (gpu_array::get<N>(v)); }),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -2048,18 +2159,18 @@ namespace gpu_array
         __host__ __device__ auto begin() noexcept { return iterator_type(base::data_); }
         __host__ __device__ auto end() noexcept
         {
-            return std::apply(
+            return gpu_array::apply(
                 [this](auto&... ptrs) { return iterator_type(tuple_pointer_type{(ptrs + base::size_)...}); },
                 base::data_);
         }
         __host__ __device__ auto begin() const noexcept
         {
-            return std::apply([](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); },
-                              base::data_);
+            return gpu_array::apply(
+                [](auto&... ptrs) { return const_iterator_type(tuple_const_pointer_type{(ptrs)...}); }, base::data_);
         }
         __host__ __device__ auto end() const noexcept
         {
-            return std::apply(
+            return gpu_array::apply(
                 [this](auto&... ptrs) {
                     return const_iterator_type(tuple_const_pointer_type{(ptrs + base::size_)...});
                 },
@@ -2068,28 +2179,29 @@ namespace gpu_array
         __host__ __device__ auto operator[](size_type i) &
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; }, base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_reference_type{*(ptrs + i)...}; },
+                                    base::data_);
         }
         __host__ __device__ auto operator[](size_type i) const&
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
-                              base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_const_reference_type{*(ptrs + i)...}; },
+                                    base::data_);
         }
         __host__ __device__ auto operator[](size_type i) &&
         {
             assert(i < base::size_);
-            return std::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
+            return gpu_array::apply([i](auto&... ptrs) { return ret_tuple_value_type{*(ptrs + i)...}; }, base::data_);
         }
         template <std::size_t N>
         __host__ __device__ auto* data() noexcept
         {
-            return std::get<N>(base::data_);
+            return gpu_array::get<N>(base::data_);
         }
         template <std::size_t N>
         __host__ __device__ const auto* data() const noexcept
         {
-            return std::get<N>(base::data_);
+            return gpu_array::get<N>(base::data_);
         }
 
         managed_structure_of_arrays() = default;
@@ -2130,7 +2242,9 @@ namespace gpu_array
             });
 
             [this, &value]<std::size_t... N>(std::index_sequence<N...>) {
-                (std::ranges::uninitialized_fill_n(std::get<N>(base::data_), base::size_, std::get<N>(value)), ...);
+                (std::ranges::uninitialized_fill_n(gpu_array::get<N>(base::data_), base::size_,
+                                                   gpu_array::get<N>(value)),
+                 ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -2151,7 +2265,8 @@ namespace gpu_array
             };
 
             [this, &alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), [](const auto& e) -> const auto& { return (std::get<N>(e)); }),
+                (alloc_ptr(gpu_array::get<N>(base::data_),
+                           [](const auto& e) -> const auto& { return (gpu_array::get<N>(e)); }),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
@@ -2172,14 +2287,15 @@ namespace gpu_array
             };
 
             [this, &alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), [](const auto& e) -> const auto& { return (std::get<N>(e)); }),
+                (alloc_ptr(gpu_array::get<N>(base::data_),
+                           [](const auto& e) -> const auto& { return (gpu_array::get<N>(e)); }),
                  ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
         template <detail::array_convertible_for_copy... Ranges>
         requires (sizeof...(Ranges) == num_arrays) &&
-                 detail::assignable_to_tuple<std::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
+                 detail::assignable_to_tuple<gpu_array::tuple<std::ranges::range_value_t<Ranges>...>, Ts...>
         __host__ explicit managed_structure_of_arrays(const Ranges&... arrays)
             : base(std::max({std::ranges::size(arrays)...}))
         {
@@ -2197,9 +2313,9 @@ namespace gpu_array
                 for (auto i = std::size_t{0}; const auto& v : range) std::ranges::construct_at(ptr + i++, v);
             };
 
-            auto arrays_tuple = std::tuple<const Ranges&...>(arrays...);
+            auto arrays_tuple = gpu_array::tuple<const Ranges&...>(arrays...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), std::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(gpu_array::get<N>(base::data_), gpu_array::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -2220,9 +2336,9 @@ namespace gpu_array
                 for (auto i = std::size_t{0}; const auto& v : range) std::ranges::construct_at(ptr + i++, v);
             };
 
-            auto arrays_tuple = std::tuple<std::initializer_list<Ts>...>(lists...);
+            auto arrays_tuple = gpu_array::tuple<std::initializer_list<Ts>...>(lists...);
             [this, &arrays_tuple, alloc_ptr]<std::size_t... N>(std::index_sequence<N...>) {
-                (alloc_ptr(std::get<N>(base::data_), std::get<N>(arrays_tuple)), ...);
+                (alloc_ptr(gpu_array::get<N>(base::data_), gpu_array::get<N>(arrays_tuple)), ...);
             }(std::make_index_sequence<num_arrays>());
         }
 
@@ -2349,7 +2465,8 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    std::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); }, base::data_);
+                    gpu_array::apply([&result, i](const auto&... bufs) { result.push_back(U{bufs[i]...}); },
+                                     base::data_);
                 }
 
                 return result;
@@ -2361,7 +2478,7 @@ namespace gpu_array
 
                 for (size_type i = 0; i < base::size_; ++i)
                 {
-                    std::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, base::data_);
+                    gpu_array::apply([&result, i](const auto&... bufs) { result[i] = U{bufs[i]...}; }, base::data_);
                 }
 
                 return result;
@@ -2387,7 +2504,7 @@ namespace gpu_array
             if (base::size_ > 0) assert(ptr != nullptr);
 
             // reset specified pointer only
-            std::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
+            gpu_array::get<N>(base::data_) = base::size_ == 0 ? nullptr : ptr;
         }
 
         template <std::size_t N, gpu_array_ptr T>
@@ -3089,14 +3206,15 @@ namespace gpu_array
         {
         public:
             using iterator_category = std::random_access_iterator_tag;
-            using value_type = detail::tuple<std::ranges::range_size_t<Range>, std::ranges::range_value_t<Range>>;
+            using value_type = gpu_array::tuple<std::ranges::range_size_t<Range>, std::ranges::range_value_t<Range>>;
             using difference_type = std::make_signed_t<std::ranges::range_size_t<Range>>;
 
             enumerate_iterator() = default;
             __host__ __device__ explicit enumerate_iterator(Range& r) noexcept : pointer_(&r), index_(0) {}
             __host__ __device__ std::ranges::range_size_t<Range> index() const noexcept { return index_; }
-            __host__ __device__ detail::tuple<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
-            operator*() const noexcept
+            __host__ __device__
+                gpu_array::tuple<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
+                operator*() const noexcept
             {
                 return {index_, (*pointer_)[index_]};
             }
@@ -3132,8 +3250,9 @@ namespace gpu_array
                 index_ -= n;
                 return *this;
             }
-            __host__ __device__ detail::tuple<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
-            operator[](difference_type n) const
+            __host__ __device__
+                gpu_array::tuple<std::ranges::range_size_t<Range>, std::ranges::range_reference_t<Range>>
+                operator[](difference_type n) const
             {
                 return *(*this + n);
             }
@@ -3180,8 +3299,8 @@ namespace gpu_array
                 return x.index() - y.index();
             }
 
-            __host__ __device__ friend detail::tuple<std::ranges::range_size_t<Range>,
-                                                     std::ranges::range_rvalue_reference_t<Range>>
+            __host__ __device__ friend gpu_array::tuple<std::ranges::range_size_t<Range>,
+                                                        std::ranges::range_rvalue_reference_t<Range>>
             iter_move(const enumerate_iterator& x)
             {
                 return {x.index(), std::move(x->second)};
@@ -3281,28 +3400,13 @@ namespace gpu_array
 
     namespace detail
     {
-        template <class F, class Tuple, std::size_t... Is>
-        __host__ __device__ auto apply_impl(F&& f, Tuple&& t, std::index_sequence<Is...>)
-            -> decltype(std::forward<F>(f)(detail::get<Is>(std::forward<Tuple>(t))...))
-        {
-            return std::forward<F>(f)(detail::get<Is>(std::forward<Tuple>(t))...);
-        }
-
-        template <class F, class Tuple>
-        requires requires { std::tuple_size_v<std::remove_reference_t<Tuple>>; }
-        __host__ __device__ decltype(auto) apply(F&& f, Tuple&& t)
-        {
-            return apply_impl(std::forward<F>(f), std::forward<Tuple>(t),
-                              std::make_index_sequence<std::tuple_size_v<std::remove_reference_t<Tuple>>>{});
-        }
-
         template <RandomAccessRange... Ranges>
         requires (std::ranges::view<Ranges> && ...)
         class zip_iterator
         {
         public:
             using iterator_category = std::forward_iterator_tag;
-            using value_type = detail::tuple<std::ranges::range_value_t<Ranges>...>;
+            using value_type = gpu_array::tuple<std::ranges::range_value_t<Ranges>...>;
             using difference_type = std::common_type_t<std::make_signed_t<std::ranges::range_size_t<Ranges>>...>;
 
             zip_iterator() = default;
@@ -3313,9 +3417,9 @@ namespace gpu_array
             }
             __host__ __device__ auto operator*() const noexcept
             {
-                return detail::apply(
+                return gpu_array::apply(
                     [this](auto&... pointers) {
-                        return detail::tuple<std::ranges::range_reference_t<Ranges>...>((*pointers)[index_]...);
+                        return gpu_array::tuple<std::ranges::range_reference_t<Ranges>...>((*pointers)[index_]...);
                     },
                     pointers_);
             }
@@ -3351,7 +3455,7 @@ namespace gpu_array
                 index_ -= n;
                 return *this;
             }
-            __host__ __device__ detail::tuple<std::ranges::range_reference_t<Ranges>...> operator[](
+            __host__ __device__ gpu_array::tuple<std::ranges::range_reference_t<Ranges>...> operator[](
                 difference_type n) const
             {
                 return *(*this + n);
@@ -3385,13 +3489,13 @@ namespace gpu_array
 
             __host__ __device__ friend auto iter_move(const zip_iterator& x)
             {
-                using Tuple = detail::tuple<std::ranges::range_rvalue_reference_t<Ranges>...>;
-                return detail::apply([&x](auto&... pointers) { return Tuple(std::move((*pointers)[x.index()])...); },
-                                     x.pointers_);
+                using Tuple = gpu_array::tuple<std::ranges::range_rvalue_reference_t<Ranges>...>;
+                return gpu_array::apply([&x](auto&... pointers) { return Tuple(std::move((*pointers)[x.index()])...); },
+                                        x.pointers_);
             }
 
         private:
-            detail::tuple<Ranges*...> pointers_{};
+            gpu_array::tuple<Ranges*...> pointers_{};
             std::common_type_t<std::ranges::range_size_t<Ranges>...> index_ = 0;
         };
 
@@ -3423,29 +3527,29 @@ namespace gpu_array
             __host__ __device__ explicit zip_view(Ranges... rs) noexcept : ranges_(rs...) {}
             [[nodiscard]] __host__ __device__ auto begin() noexcept
             {
-                return detail::apply([](auto&... ranges) { return zip_iterator<Ranges...>(ranges...); }, ranges_);
+                return gpu_array::apply([](auto&... ranges) { return zip_iterator<Ranges...>(ranges...); }, ranges_);
             }
             [[nodiscard]] __host__ __device__ auto begin() const noexcept
             requires (std::is_const_v<Ranges> && ...)
             {
-                return detail::apply([](auto&... ranges) { return zip_iterator<Ranges...>(ranges...); }, ranges_);
+                return gpu_array::apply([](auto&... ranges) { return zip_iterator<Ranges...>(ranges...); }, ranges_);
             }
             [[nodiscard]] __host__ __device__ auto end() noexcept
             {
-                return detail::apply([](auto&... ranges) { return zip_sentinel<Ranges...>(ranges...); }, ranges_);
+                return gpu_array::apply([](auto&... ranges) { return zip_sentinel<Ranges...>(ranges...); }, ranges_);
             }
             [[nodiscard]] __host__ __device__ auto end() const noexcept
             requires (std::is_const_v<Ranges> && ...)
             {
-                return detail::apply([](auto&... ranges) { return zip_sentinel<Ranges...>(ranges...); }, ranges_);
+                return gpu_array::apply([](auto&... ranges) { return zip_sentinel<Ranges...>(ranges...); }, ranges_);
             }
             [[nodiscard]] __host__ __device__ auto size() const noexcept
             {
-                return detail::apply([](auto&... ranges) { return std::min({ranges.size()...}); }, ranges_);
+                return gpu_array::apply([](auto&... ranges) { return std::min({ranges.size()...}); }, ranges_);
             }
 
         private:
-            detail::tuple<Ranges...> ranges_{};
+            gpu_array::tuple<Ranges...> ranges_{};
         };
 
         struct zip_adapter
diff --git a/test/test.cpp b/test/test.cpp
index bd81477..8c0c1d3 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -6,19 +6,19 @@
 // NOLINTBEGIN
 using namespace gpu_array;
 
-// Example of custom tuple type derived from std::tuple
+// Example of custom tuple type derived from gpu_array::tuple
 // You may need to specialize std::common_type and std::basic_common_reference to satisfy range concepts
 template <typename... Ts>
-class custom_tuple_example : public std::tuple<Ts...>
+class custom_tuple_example : public gpu_array::tuple<Ts...>
 {
-    using base = std::tuple<Ts...>;
+    using base = gpu_array::tuple<Ts...>;
     using base::base;
 
 public:
     template <std::size_t N>
     __host__ decltype(auto) get_string() const
     {
-        return std::to_string(std::get<N>(*this));
+        return std::to_string(gpu_array::get<N>(*this));
     }
     using base::operator=;
     template <typename... Us>
@@ -1038,7 +1038,7 @@ TEST(StructureOfArrays, Construction)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = std::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = gpu_array::tuple<tuple_elem_type0, tuple_elem_type1>;
     using custom_tuple_type = custom_tuple_example<tuple_elem_type0, tuple_elem_type1>;
 
     // initizalization
@@ -1052,18 +1052,18 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 0);
-            EXPECT_EQ(std::get<1>(vec[i]), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 0);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 0.0);
         }
     }
     {
-        auto soa = structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, std::tuple(1, 2.5));
+        auto soa = structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, gpu_array::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 1);
-            EXPECT_EQ(std::get<1>(vec[i]), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 1);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 2.5);
         }
     }
     {
@@ -1080,18 +1080,18 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 0);
-            EXPECT_EQ(std::get<1>(vec[i]), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 0);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 0.0);
         }
     }
     {
-        auto soa = structure_of_arrays<tuple_type>(10, std::tuple(1, 2.5));
+        auto soa = structure_of_arrays<tuple_type>(10, gpu_array::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 1);
-            EXPECT_EQ(std::get<1>(vec[i]), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 1);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 2.5);
         }
     }
     {
@@ -1108,8 +1108,8 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 0);
-            EXPECT_EQ(std::get<1>(vec[i]), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 0);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 0.0);
         }
     }
     {
@@ -1118,8 +1118,8 @@ TEST(StructureOfArrays, Construction)
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), 1);
-            EXPECT_EQ(std::get<1>(vec[i]), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), 1);
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), 2.5);
         }
     }
     {
@@ -1158,7 +1158,7 @@ TEST(StructureOfArrays, Construction)
         EXPECT_EQ(arr1.use_count(), 1);
     }
 
-    // construction from range of std::tuple
+    // construction from range of gpu_array::tuple
     {
         auto vec = std::vector<tuple_type>();
         for (std::size_t i = 0; i < 10; ++i)
@@ -1171,17 +1171,17 @@ TEST(StructureOfArrays, Construction)
         EXPECT_EQ(soa_vec, vec);
     }
 
-    // construction from initializer_list of std::tuple
+    // construction from initializer_list of gpu_array::tuple
     {
         auto soa = structure_of_arrays<tuple_type>({{0, 0.5}, {1, 1.5}, {2, 2.5}});
         EXPECT_EQ(soa.size(), 3);
         const auto soa_vec = soa.to<std::vector>();
-        EXPECT_EQ(std::get<0>(soa_vec[0]), 0);
-        EXPECT_EQ(std::get<1>(soa_vec[0]), 0.5);
-        EXPECT_EQ(std::get<0>(soa_vec[1]), 1);
-        EXPECT_EQ(std::get<1>(soa_vec[1]), 1.5);
-        EXPECT_EQ(std::get<0>(soa_vec[2]), 2);
-        EXPECT_EQ(std::get<1>(soa_vec[2]), 2.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[0]), 0);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[0]), 0.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[1]), 1);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[1]), 1.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[2]), 2);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[2]), 2.5);
     }
 
     // construction from multiple ranges
@@ -1198,8 +1198,8 @@ TEST(StructureOfArrays, Construction)
         const auto soa_vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < soa_vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(soa_vec[i]), vec0[i]);
-            EXPECT_EQ(std::get<1>(soa_vec[i]), vec1[i]);
+            EXPECT_EQ(gpu_array::get<0>(soa_vec[i]), vec0[i]);
+            EXPECT_EQ(gpu_array::get<1>(soa_vec[i]), vec1[i]);
         }
     }
 
@@ -1208,12 +1208,12 @@ TEST(StructureOfArrays, Construction)
         auto soa = structure_of_arrays<tuple_elem_type0, tuple_elem_type1>({0, 1, 2}, {0.5, 1.5, 2.5});
         EXPECT_EQ(soa.size(), 3);
         const auto soa_vec = soa.to<std::vector>();
-        EXPECT_EQ(std::get<0>(soa_vec[0]), 0);
-        EXPECT_EQ(std::get<1>(soa_vec[0]), 0.5);
-        EXPECT_EQ(std::get<0>(soa_vec[1]), 1);
-        EXPECT_EQ(std::get<1>(soa_vec[1]), 1.5);
-        EXPECT_EQ(std::get<0>(soa_vec[2]), 2);
-        EXPECT_EQ(std::get<1>(soa_vec[2]), 2.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[0]), 0);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[0]), 0.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[1]), 1);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[1]), 1.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[2]), 2);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[2]), 2.5);
     }
 
     // construction from range of custom tuple
@@ -1228,9 +1228,9 @@ TEST(StructureOfArrays, Construction)
         const auto soa_vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < soa_vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(soa_vec[i]), static_cast<tuple_elem_type0>(i));
+            EXPECT_EQ(gpu_array::get<0>(soa_vec[i]), static_cast<tuple_elem_type0>(i));
             EXPECT_EQ(soa_vec[i].get_string<0>(), std::to_string(i));
-            EXPECT_EQ(std::get<1>(soa_vec[i]), static_cast<tuple_elem_type1>(i) + 0.5);
+            EXPECT_EQ(gpu_array::get<1>(soa_vec[i]), static_cast<tuple_elem_type1>(i) + 0.5);
             EXPECT_EQ(soa_vec[i].get_string<1>(), std::to_string(i + 0.5));
         }
     }
@@ -1252,7 +1252,7 @@ TEST(StructureOfArrays, Export)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = std::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = gpu_array::tuple<tuple_elem_type0, tuple_elem_type1>;
 
     // export to range with same value type
     {
@@ -1278,7 +1278,7 @@ TEST(StructureOfArrays, Export)
 
 TEST(StructureOfArrays, RangeInterface)
 {
-    using tuple_type = std::tuple<int, double>;
+    using tuple_type = gpu_array::tuple<int, double>;
     using soa_type1 = structure_of_arrays<int, double>;
     using soa_type2 = structure_of_arrays<tuple_type>;
     using soa_type3 = structure_of_arrays<custom_tuple_example<int, double>>;
@@ -1350,20 +1350,22 @@ TEST(StructureOfArrays, RangeInterface)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #endif
-        static_assert(std::same_as<decltype(soa[0]), std::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
         static_assert(
-            std::same_as<decltype(*soa.begin()), std::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
+            std::same_as<decltype(soa[0]), gpu_array::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
+        static_assert(
+            std::same_as<decltype(*soa.begin()), gpu_array::tuple<decltype(*soa.data<0>()), decltype(*soa.data<1>())>>);
         static_assert(std::same_as<decltype(*(soa.end() - 1)),
-                                   std::tuple<decltype(*(soa.data<0>() + 9)), decltype(*(soa.data<1>() + 9))>>);
+                                   gpu_array::tuple<decltype(*(soa.data<0>() + 9)), decltype(*(soa.data<1>() + 9))>>);
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
-        soa = {std::tuple(0, 0.0), std::tuple(1, 1.0), std::tuple(2, 2.0), std::tuple(3, 3.0), std::tuple(4, 4.0)};
+        soa = {gpu_array::tuple(0, 0.0), gpu_array::tuple(1, 1.0), gpu_array::tuple(2, 2.0), gpu_array::tuple(3, 3.0),
+               gpu_array::tuple(4, 4.0)};
         auto vec = soa.to<std::vector>();
         for (std::size_t i = 0; i < vec.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(vec[i]), static_cast<int>(i));
-            EXPECT_EQ(std::get<1>(vec[i]), static_cast<double>(i));
+            EXPECT_EQ(gpu_array::get<0>(vec[i]), static_cast<int>(i));
+            EXPECT_EQ(gpu_array::get<1>(vec[i]), static_cast<double>(i));
         }
     }
 }
@@ -1385,7 +1387,7 @@ TEST(ManagedStructureOfArrays, Construction)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = std::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = gpu_array::tuple<tuple_elem_type0, tuple_elem_type1>;
     using custom_tuple_type = custom_tuple_example<tuple_elem_type0, tuple_elem_type1>;
 
     // initizalization
@@ -1398,17 +1400,17 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 0);
-            EXPECT_EQ(std::get<1>(v), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(v), 0);
+            EXPECT_EQ(gpu_array::get<1>(v), 0.0);
         }
     }
     {
-        auto soa = managed_structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, std::tuple(1, 2.5));
+        auto soa = managed_structure_of_arrays<tuple_elem_type0, tuple_elem_type1>(10, gpu_array::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 1);
-            EXPECT_EQ(std::get<1>(v), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(v), 1);
+            EXPECT_EQ(gpu_array::get<1>(v), 2.5);
         }
     }
     {
@@ -1424,17 +1426,17 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 0);
-            EXPECT_EQ(std::get<1>(v), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(v), 0);
+            EXPECT_EQ(gpu_array::get<1>(v), 0.0);
         }
     }
     {
-        auto soa = managed_structure_of_arrays<tuple_type>(10, std::tuple(1, 2.5));
+        auto soa = managed_structure_of_arrays<tuple_type>(10, gpu_array::tuple(1, 2.5));
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 1);
-            EXPECT_EQ(std::get<1>(v), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(v), 1);
+            EXPECT_EQ(gpu_array::get<1>(v), 2.5);
         }
     }
     {
@@ -1450,8 +1452,8 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 0);
-            EXPECT_EQ(std::get<1>(v), 0.0);
+            EXPECT_EQ(gpu_array::get<0>(v), 0);
+            EXPECT_EQ(gpu_array::get<1>(v), 0.0);
         }
     }
     {
@@ -1459,8 +1461,8 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), 1);
-            EXPECT_EQ(std::get<1>(v), 2.5);
+            EXPECT_EQ(gpu_array::get<0>(v), 1);
+            EXPECT_EQ(gpu_array::get<1>(v), 2.5);
         }
     }
     {
@@ -1499,7 +1501,7 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(arr1.use_count(), 1);
     }
 
-    // construction from range of std::tuple
+    // construction from range of gpu_array::tuple
     {
         auto vec = std::vector<tuple_type>();
         for (std::size_t i = 0; i < 10; ++i)
@@ -1510,21 +1512,21 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (std::size_t i = 0; i < 10; ++i)
         {
-            EXPECT_EQ(std::get<0>(soa[i]), std::get<0>(vec[i]));
-            EXPECT_EQ(std::get<1>(soa[i]), std::get<1>(vec[i]));
+            EXPECT_EQ(gpu_array::get<0>(soa[i]), gpu_array::get<0>(vec[i]));
+            EXPECT_EQ(gpu_array::get<1>(soa[i]), gpu_array::get<1>(vec[i]));
         }
     }
 
-    // construction from initializer_list of std::tuple
+    // construction from initializer_list of gpu_array::tuple
     {
         auto soa = managed_structure_of_arrays<tuple_type>({{0, 0.5}, {1, 1.5}, {2, 2.5}});
         EXPECT_EQ(soa.size(), 3);
-        EXPECT_EQ(std::get<0>(soa[0]), 0);
-        EXPECT_EQ(std::get<1>(soa[0]), 0.5);
-        EXPECT_EQ(std::get<0>(soa[1]), 1);
-        EXPECT_EQ(std::get<1>(soa[1]), 1.5);
-        EXPECT_EQ(std::get<0>(soa[2]), 2);
-        EXPECT_EQ(std::get<1>(soa[2]), 2.5);
+        EXPECT_EQ(gpu_array::get<0>(soa[0]), 0);
+        EXPECT_EQ(gpu_array::get<1>(soa[0]), 0.5);
+        EXPECT_EQ(gpu_array::get<0>(soa[1]), 1);
+        EXPECT_EQ(gpu_array::get<1>(soa[1]), 1.5);
+        EXPECT_EQ(gpu_array::get<0>(soa[2]), 2);
+        EXPECT_EQ(gpu_array::get<1>(soa[2]), 2.5);
     }
 
     // construction from multiple ranges
@@ -1540,8 +1542,8 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (std::size_t i = 0; const auto& v : soa)
         {
-            EXPECT_EQ(std::get<0>(v), vec0[i]);
-            EXPECT_EQ(std::get<1>(v), vec1[i]);
+            EXPECT_EQ(gpu_array::get<0>(v), vec0[i]);
+            EXPECT_EQ(gpu_array::get<1>(v), vec1[i]);
             ++i;
         }
     }
@@ -1551,12 +1553,12 @@ TEST(ManagedStructureOfArrays, Construction)
         auto soa = managed_structure_of_arrays<tuple_elem_type0, tuple_elem_type1>({0, 1, 2}, {0.5, 1.5, 2.5});
         EXPECT_EQ(soa.size(), 3);
         const auto soa_vec = soa.to<std::vector>();
-        EXPECT_EQ(std::get<0>(soa_vec[0]), 0);
-        EXPECT_EQ(std::get<1>(soa_vec[0]), 0.5);
-        EXPECT_EQ(std::get<0>(soa_vec[1]), 1);
-        EXPECT_EQ(std::get<1>(soa_vec[1]), 1.5);
-        EXPECT_EQ(std::get<0>(soa_vec[2]), 2);
-        EXPECT_EQ(std::get<1>(soa_vec[2]), 2.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[0]), 0);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[0]), 0.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[1]), 1);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[1]), 1.5);
+        EXPECT_EQ(gpu_array::get<0>(soa_vec[2]), 2);
+        EXPECT_EQ(gpu_array::get<1>(soa_vec[2]), 2.5);
     }
 
     // construction from range of custom tuple
@@ -1570,9 +1572,9 @@ TEST(ManagedStructureOfArrays, Construction)
         EXPECT_EQ(soa.size(), 10);
         for (std::size_t i = 0; i < soa.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(soa[i]), static_cast<tuple_elem_type0>(i));
+            EXPECT_EQ(gpu_array::get<0>(soa[i]), static_cast<tuple_elem_type0>(i));
             EXPECT_EQ(soa[i].get_string<0>(), std::to_string(i));
-            EXPECT_EQ(std::get<1>(soa[i]), static_cast<tuple_elem_type1>(i) + 0.5);
+            EXPECT_EQ(gpu_array::get<1>(soa[i]), static_cast<tuple_elem_type1>(i) + 0.5);
             EXPECT_EQ(soa[i].get_string<1>(), std::to_string(i + 0.5));
         }
     }
@@ -1595,7 +1597,7 @@ TEST(ManagedStructureOfArrays, Export)
 {
     using tuple_elem_type0 = int;
     using tuple_elem_type1 = double;
-    using tuple_type = std::tuple<tuple_elem_type0, tuple_elem_type1>;
+    using tuple_type = gpu_array::tuple<tuple_elem_type0, tuple_elem_type1>;
 
     // export to range with same value type
     {
@@ -1621,7 +1623,7 @@ TEST(ManagedStructureOfArrays, Export)
 
 TEST(ManagedStructureOfArrays, RangeInterface)
 {
-    using tuple_type = std::tuple<int, double>;
+    using tuple_type = gpu_array::tuple<int, double>;
     using soa_type1 = managed_structure_of_arrays<int, double>;
     using soa_type2 = managed_structure_of_arrays<tuple_type>;
     using soa_type3 = managed_structure_of_arrays<custom_tuple_example<int, double>>;
@@ -1682,15 +1684,16 @@ TEST(ManagedStructureOfArrays, RangeInterface)
         EXPECT_NE(soa.data<1>(), nullptr);
         EXPECT_EQ(soa.size(), 10);
         EXPECT_FALSE(soa.empty());
-        EXPECT_EQ(soa[0], std::tuple(*soa.data<0>(), *soa.data<1>()));
-        EXPECT_EQ(*soa.begin(), std::tuple(*soa.data<0>(), *soa.data<1>()));
-        EXPECT_EQ(*(soa.end() - 1), std::tuple(*(soa.data<0>() + 9), *(soa.data<1>() + 9)));
+        EXPECT_EQ(soa[0], gpu_array::tuple(*soa.data<0>(), *soa.data<1>()));
+        EXPECT_EQ(*soa.begin(), gpu_array::tuple(*soa.data<0>(), *soa.data<1>()));
+        EXPECT_EQ(*(soa.end() - 1), gpu_array::tuple(*(soa.data<0>() + 9), *(soa.data<1>() + 9)));
 
-        soa = {std::tuple(0, 0.0), std::tuple(1, 1.0), std::tuple(2, 2.0), std::tuple(3, 3.0), std::tuple(4, 4.0)};
+        soa = {gpu_array::tuple(0, 0.0), gpu_array::tuple(1, 1.0), gpu_array::tuple(2, 2.0), gpu_array::tuple(3, 3.0),
+               gpu_array::tuple(4, 4.0)};
         for (std::size_t i = 0; i < soa.size(); ++i)
         {
-            EXPECT_EQ(std::get<0>(soa[i]), static_cast<int>(i));
-            EXPECT_EQ(std::get<1>(soa[i]), static_cast<double>(i));
+            EXPECT_EQ(gpu_array::get<0>(soa[i]), static_cast<int>(i));
+            EXPECT_EQ(gpu_array::get<1>(soa[i]), static_cast<double>(i));
         }
     }
 }
@@ -1711,7 +1714,7 @@ TEST(ManagedStructureOfArrays, SmartPointerInterface)
 TEST(ManagedStructureOfArrays, MemoryManagement)
 {
     // create nested managed_array
-    auto elms = std::vector<std::tuple<int, managed_array<double>>>();
+    auto elms = std::vector<gpu_array::tuple<int, managed_array<double>>>();
     for (std::size_t i = 0; i < 10; ++i) elms.emplace_back(i, managed_array<double>(i, 99));
     auto soa = managed_structure_of_arrays(elms);
 
@@ -1826,7 +1829,7 @@ TEST(JaggedArray, Construction)
 
     // construction from nested containers for jagged structure of arrays
     {
-        using tuple_type = std::tuple<int, double>;
+        using tuple_type = gpu_array::tuple<int, double>;
         auto vec_tpl = std::vector<std::vector<tuple_type>>{
             std::vector<tuple_type>(1, {0, 0.5}), std::vector<tuple_type>(2, {1, 1.5}),
             std::vector<tuple_type>(3, {2, 2.5}), std::vector<tuple_type>(4, {3, 3.5}),
@@ -1891,12 +1894,12 @@ TEST(JaggedArray, Construction)
 
     // construction from container of sizes and flat range (structure of arrays)
     {
-        auto flat_range = std::vector<std::tuple<int, double>>{
+        auto flat_range = std::vector<gpu_array::tuple<int, double>>{
             {0, 0.5}, {1, 1.5}, {2, 2.5},   {3, 3.5},   {4, 4.5},   {5, 5.5},   {6, 6.5},  {7, 7.5},
             {8, 8.5}, {9, 9.5}, {10, 10.5}, {11, 11.5}, {12, 12.5}, {13, 13.5}, {14, 14.5}};
         auto sizes = std::vector<std::uint32_t>{1, 2, 3, 4, 5};
 
-        auto jagged_arr = jagged_array<managed_structure_of_arrays<std::tuple<int, double>>>(
+        auto jagged_arr = jagged_array<managed_structure_of_arrays<gpu_array::tuple<int, double>>>(
             sizes, flat_range);  // no deduction guide
 
         EXPECT_EQ(jagged_arr.size(), 15);
@@ -1905,7 +1908,7 @@ TEST(JaggedArray, Construction)
             EXPECT_EQ(jagged_arr.size(i), sizes[i]);
             for (const auto& v : jagged_arr.row(i))
             {
-                EXPECT_EQ(v, (std::tuple<int, double>(j, static_cast<double>(j) + 0.5)));
+                EXPECT_EQ(v, (gpu_array::tuple<int, double>(j, static_cast<double>(j) + 0.5)));
                 ++j;
             }
         }
@@ -1926,7 +1929,7 @@ TEST(JaggedArray, Construction)
 
     // construction from nested initializer_list (managed structure of arrays)
     {
-        using tuple_type = std::tuple<int, double>;
+        using tuple_type = gpu_array::tuple<int, double>;
         auto jagged_arr = jagged_array<managed_structure_of_arrays<tuple_type>>{
             {{0, 0.5}},
             {{1, 1.5}, {1, 1.5}},
@@ -1967,7 +1970,7 @@ TEST(JaggedArray, Construction)
 
     // wrap managed structure of arrays with sizes
     {
-        using tuple_type = std::tuple<int, double>;
+        using tuple_type = gpu_array::tuple<int, double>;
         auto sizes = std::vector<std::uint32_t>{3, 1, 4, 1, 5};
         auto arr = managed_structure_of_arrays<tuple_type>(14, tuple_type(42, 99.0));
         auto jagged_arr_wrap = jagged_array(sizes, arr);
@@ -1985,8 +1988,8 @@ TEST(JaggedArray, Construction)
             EXPECT_EQ(jagged_arr_wrap.size(i), sizes[i]);
             for (const auto& v : jagged_arr_wrap.row(i))
             {
-                EXPECT_EQ(std::get<0>(v), 42);
-                EXPECT_EQ(std::get<1>(v), 99.0);
+                EXPECT_EQ(gpu_array::get<0>(v), 42);
+                EXPECT_EQ(gpu_array::get<1>(v), 99.0);
             }
         }
     }