From 0e4a4df3b7df7972364110ddbb8561a9b610ed13 Mon Sep 17 00:00:00 2001 From: Adrian Riedl Date: Thu, 12 Mar 2026 15:33:09 +0100 Subject: [PATCH] x86 avx512: add more reduction functions for 32-bit and 64-bit integers, floating-point, and bitwise operations --- simde/x86/avx512/reduce.h | 260 ++++++++++++++ test/x86/avx512/reduce.c | 725 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 985 insertions(+) diff --git a/simde/x86/avx512/reduce.h b/simde/x86/avx512/reduce.h index c007572e2..501603071 100644 --- a/simde/x86/avx512/reduce.h +++ b/simde/x86/avx512/reduce.h @@ -349,6 +349,266 @@ simde_mm512_reduce_min_ps(simde__m512 a) { # define _mm512_reduce_min_ps(a) simde_mm512_reduce_min_ps((a)) #endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm512_reduce_add_epi32(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_reduce_add_epi32(a); + #else + simde__m512i_private a_; + int32_t r; + a_ = simde__m512i_to_private(a); + + r = INT32_C(0); + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r += a_.i32[i]; + } + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_add_epi32(a) simde_mm512_reduce_add_epi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm512_reduce_add_epi64(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_reduce_add_epi64(a); + #else + simde__m512i_private a_; + int64_t r; + a_ = simde__m512i_to_private(a); + + r = INT64_C(0); + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r += a_.i64[i]; + } + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_add_epi64(a) simde_mm512_reduce_add_epi64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm512_reduce_add_ps(simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_reduce_add_ps(a); + #else + simde__m512_private a_; + simde_float32 r; + a_ = simde__m512_to_private(a); + + /* pairwise tree reduction matching Intel's implementation: + * _mm256_add_ps(lo256, hi256) -> _mm_add_ps(lo128, hi128) + * -> _mm_movehl_ps -> _mm_add_ss */ + simde_float32 t[8], u[4], v[2]; + for (size_t i = 0 ; i < 8 ; i++) { + t[i] = a_.f32[i] + a_.f32[i + 8]; + } + for (size_t i = 0 ; i < 4 ; i++) { + u[i] = t[i] + t[i + 4]; + } + v[0] = u[0] + u[2]; + v[1] = u[1] + u[3]; + r = v[0] + v[1]; + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_add_ps(a) simde_mm512_reduce_add_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm512_reduce_add_pd(simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_reduce_add_pd(a); + #else + simde__m512d_private a_; + simde_float64 r; + a_ = simde__m512d_to_private(a); + + r = SIMDE_FLOAT64_C(0.0); + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r += a_.f64[i]; + } + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_add_pd(a) simde_mm512_reduce_add_pd((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16 +simde_mm512_reduce_add_ph(simde__m512h a) { + #if defined(SIMDE_X86_AVX512FP16_NATIVE) + return _mm512_reduce_add_ph(a); + #else + simde__m512h_private a_; + simde_float16 r; + a_ = simde__m512h_to_private(a); + + /* pairwise tree reduction, each step via float32 to simulate float16 add + * (upcasts to float32, adds, rounds back to float16) */ + simde_float16 t[16], u[8], v[4], w[2]; + for (size_t i = 0 ; i < 16 ; i++) { + t[i] = simde_float16_from_float32(simde_float16_to_float32(a_.f16[i]) + simde_float16_to_float32(a_.f16[i + 16])); + } + for (size_t i = 0 ; i < 8 ; i++) { + u[i] = simde_float16_from_float32(simde_float16_to_float32(t[i]) + simde_float16_to_float32(t[i + 8])); + } + for (size_t i = 0 ; i < 4 ; i++) { + v[i] = simde_float16_from_float32(simde_float16_to_float32(u[i]) + simde_float16_to_float32(u[i + 4])); + } + w[0] = simde_float16_from_float32(simde_float16_to_float32(v[0]) + simde_float16_to_float32(v[2])); + w[1] = simde_float16_from_float32(simde_float16_to_float32(v[1]) + simde_float16_to_float32(v[3])); + r = simde_float16_from_float32(simde_float16_to_float32(w[0]) + simde_float16_to_float32(w[1])); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512FP16_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_add_ph(a) simde_mm512_reduce_add_ph((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm512_reduce_and_epi32(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_reduce_and_epi32(a); + #else + simde__m512i_private a_; + int32_t r; + a_ = simde__m512i_to_private(a); + + r = ~INT32_C(0); + SIMDE_VECTORIZE_REDUCTION(&:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r &= a_.i32[i]; + } + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_and_epi32(a) simde_mm512_reduce_and_epi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm512_reduce_and_epi64(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_reduce_and_epi64(a); + #else + simde__m512i_private a_; + int64_t r; + a_ = simde__m512i_to_private(a); + + r = ~INT64_C(0); + SIMDE_VECTORIZE_REDUCTION(&:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r &= a_.i64[i]; + } + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_and_epi64(a) simde_mm512_reduce_and_epi64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm512_reduce_mul_epi32(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_reduce_mul_epi32(a); + #else + simde__m512i_private a_; + int32_t r; + a_ = simde__m512i_to_private(a); + + r = INT32_C(1); + SIMDE_VECTORIZE_REDUCTION(*:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r *= a_.i32[i]; + } + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_mul_epi32(a) simde_mm512_reduce_mul_epi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm512_reduce_mul_epi64(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_reduce_mul_epi64(a); + #else + simde__m512i_private a_; + int64_t r; + a_ = simde__m512i_to_private(a); + + r = INT64_C(1); + SIMDE_VECTORIZE_REDUCTION(*:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r *= a_.i64[i]; + } + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_mul_epi64(a) simde_mm512_reduce_mul_epi64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm512_reduce_or_epi32(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_reduce_or_epi32(a); + #else + simde__m512i_private a_; + int32_t r; + a_ = simde__m512i_to_private(a); + + r = INT32_C(0); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= a_.i32[i]; + } + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_or_epi32(a) simde_mm512_reduce_or_epi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm512_reduce_or_epi64(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_reduce_or_epi64(a); + #else + simde__m512i_private a_; + int64_t r; + a_ = simde__m512i_to_private(a); + + r = INT64_C(0); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= a_.i64[i]; + } + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_reduce_or_epi64(a) simde_mm512_reduce_or_epi64((a)) +#endif + SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP diff --git a/test/x86/avx512/reduce.c b/test/x86/avx512/reduce.c index ecd5af890..5af83445e 100644 --- a/test/x86/avx512/reduce.c +++ b/test/x86/avx512/reduce.c @@ -915,7 +915,728 @@ test_simde_mm512_reduce_min_pd (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_mm512_reduce_add_epi32(SIMDE_MUNIT_TEST_ARGS) +{ +#if 1 + static const struct + { + const int32_t a[16]; + int32_t r; + } test_vec[] = { + { + { + INT32_C(224523122), -INT32_C(505464467), INT32_C(1927752136), -INT32_C(243233513), -INT32_C(1000192152), + INT32_C(596138238), INT32_C(47503433), INT32_C(1398022112), + INT32_C(2137044242), -INT32_C(1218363154), -INT32_C(2128000919), INT32_C(1047701974), -INT32_C(301738768), + INT32_C(1964149548), INT32_C(1148773987), INT32_C(412601349) + }, + INT32_C(1212249872) + }, + { + { + INT32_C(1889007489), -INT32_C(1591216073), INT32_C(388124737), -INT32_C(380267271), -INT32_C(1780983703), + INT32_C(1191963363), -INT32_C(695499824), -INT32_C(789700017), + INT32_C(1363182873), -INT32_C(1074632834), -INT32_C(1328147273), INT32_C(295316392), INT32_C(1739027076), + INT32_C(749646428), -INT32_C(2080229067), INT32_C(1968500827) + }, + -INT32_C(135906877) + }, + { + { + -INT32_C(188312202), -INT32_C(1263290116), INT32_C(1952745932), INT32_C(948371124), -INT32_C(861917840), + INT32_C(351882975), -INT32_C(476513400), INT32_C(1649994988), + INT32_C(2119573121), -INT32_C(1573778986), INT32_C(1175950994), INT32_C(92249493), -INT32_C(1445847094), + -INT32_C(172110995), -INT32_C(1294379578), -INT32_C(1022086847) + }, + -INT32_C(7468431) + }, + { + { + INT32_C(641821264), INT32_C(97088371), -INT32_C(1639129079), INT32_C(1201982333), INT32_C(1475376874), + INT32_C(122531137), INT32_C(1136272898), -INT32_C(1492726185), + -INT32_C(1395767495), -INT32_C(994928710), -INT32_C(194838921), -INT32_C(1287977271), -INT32_C(1123406980), + -INT32_C(624601128), -INT32_C(719487107), -INT32_C(2038684595) + }, + INT32_C(1753459998) + }, + { + { + INT32_C(640895595), INT32_C(1491789281), -INT32_C(1404285725), -INT32_C(815823021), -INT32_C(1970443854), + INT32_C(1046762177), INT32_C(504595153), INT32_C(296063142), + -INT32_C(1137190694), -INT32_C(1609293379), -INT32_C(1051958930), -INT32_C(1685017624), -INT32_C(718988012), + INT32_C(1075087983), -INT32_C(1302386932), -INT32_C(1832713033) + }, + INT32_C(117026719) + }, + { + { + -INT32_C(1722876964), -INT32_C(1975950820), -INT32_C(1404336701), INT32_C(1145560112), INT32_C(1763339513), + INT32_C(44642038), INT32_C(229902421), -INT32_C(392202228), + -INT32_C(1887310477), INT32_C(320518735), INT32_C(1874814271), INT32_C(1001653825), INT32_C(1772408434), + INT32_C(1365986812), INT32_C(1650335830), INT32_C(189529496) + }, + -INT32_C(318952999) + }, + { + { + INT32_C(983223530), -INT32_C(967986041), INT32_C(1530268698), -INT32_C(2070484462), -INT32_C(1259521352), + -INT32_C(570074744), INT32_C(289432697), INT32_C(1276939105), + -INT32_C(544819624), -INT32_C(2052730005), -INT32_C(236921889), INT32_C(2104850373), INT32_C(959538097), + INT32_C(890714044), -INT32_C(62498661), INT32_C(994599651) + }, + INT32_C(1264529417) + }, + { + { + -INT32_C(2095395304), -INT32_C(2146910047), INT32_C(1634855323), INT32_C(299820896), INT32_C(105582666), + -INT32_C(499424697), -INT32_C(1629584966), -INT32_C(69654557), + -INT32_C(1753221899), INT32_C(1326942132), -INT32_C(793736848), -INT32_C(1159557264), -INT32_C(423613025), + INT32_C(1254685583), INT32_C(1609082492), -INT32_C(1017462323) + }, + -INT32_C(1062624542) + }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) + { + simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); + simde_float64 r = simde_mm512_reduce_add_epi32(a); + simde_assert_equal_f64(r, test_vec[i].r, 1); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) + { + simde__m512i a = simde_test_x86_random_u32x16(); + uint32_t r = simde_mm512_reduce_add_epi32(a); + + simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_mm512_reduce_add_epi64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + const int64_t a[8]; + int64_t r; + } test_vec[] = { + { { -INT64_C( 5257623150159040282), -INT64_C( 3893687459107244638), INT64_C( 754871992350975544), -INT64_C( 2689737544661339285), + INT64_C( 6909294498031178385), INT64_C( 8529548582696871859), -INT64_C( 7291668440160315471), -INT64_C( 1257492354557723714) }, + -INT64_C( 4196493875566637602) }, + { { -INT64_C( 5535772761573998321), -INT64_C( 137456212239629981), -INT64_C( 1024127791219382017), INT64_C( 2889110941799259085), + -INT64_C( 7988223355701087632), INT64_C( 1562266482538343779), -INT64_C( 3398513458910301774), INT64_C( 1794986442502076841) }, + INT64_C( 6609014360904831596) }, + { { INT64_C( 7134736452118240836), INT64_C( 3550738108239787290), INT64_C( 4685692600812210739), INT64_C( 6279684817709891685), + INT64_C( 8515969739548896971), INT64_C( 3869414525373923586), INT64_C( 1824364342228535415), INT64_C( 854360394295563377) }, + -INT64_C( 178527167092053333) }, + { { INT64_C( 8416106688483337466), -INT64_C( 2991049450304736699), -INT64_C( 3729483680605410960), -INT64_C( 6064611805587544694), + -INT64_C( 597345214373188145), -INT64_C( 4362373600993367488), INT64_C( 2003018338500369061), INT64_C( 1650777377093328011) }, + -INT64_C( 5674961347787213448) }, + { { -INT64_C( 643437816183264918), -INT64_C( 5491001690753183305), INT64_C( 7355081724162978917), INT64_C( 2304026297684457627), + INT64_C( 1450807092519325812), INT64_C( 2981641079463196831), INT64_C( 3449811824027833502), -INT64_C( 9007469670729415894) }, + INT64_C( 2399458840191928572) }, + { { INT64_C( 2795838750027823707), INT64_C( 8247556374730631561), -INT64_C( 1149882699886127800), -INT64_C( 9088317759337522098), + -INT64_C( 1821981624838288119), INT64_C( 447167800271932143), INT64_C( 4248280842660618301), -INT64_C( 7156090532872666104) }, + -INT64_C( 3477428849243598409) }, + { { -INT64_C( 2430003232157865470), INT64_C( 6860061000186558830), INT64_C( 728272534145013237), INT64_C( 2976010909966250268), + -INT64_C( 3701166842614023724), INT64_C( 1065994220205589321), -INT64_C( 6679389625545445822), INT64_C( 5047584598147696827) }, + INT64_C( 3867363562333773467) }, + { { -INT64_C( 5817899316348873447), -INT64_C( 6190789877792359442), -INT64_C( 3945931665080187884), INT64_C( 5295309779096080580), + INT64_C( 2333429969423664717), INT64_C( 8525099014010928189), INT64_C( 4474131042018540864), -INT64_C( 3938413838286339106) }, + INT64_C( 734935107041454471) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); + int64_t r = simde_mm512_reduce_add_epi64(a); + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) { + simde__m512i a = simde_test_x86_random_i64x8(); + int64_t r = simde_mm512_reduce_add_epi64(a); + + simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_mm512_reduce_add_ps (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + const simde_float32 a[16]; + simde_float32 r; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -25.16), SIMDE_FLOAT32_C( -263.31), SIMDE_FLOAT32_C( 735.36), SIMDE_FLOAT32_C( 466.14), + SIMDE_FLOAT32_C( -795.07), SIMDE_FLOAT32_C( -82.98), SIMDE_FLOAT32_C( -273.33), SIMDE_FLOAT32_C( -643.76), + SIMDE_FLOAT32_C( -540.23), SIMDE_FLOAT32_C( -638.93), SIMDE_FLOAT32_C( 170.25), SIMDE_FLOAT32_C( 279.35), + SIMDE_FLOAT32_C( 43.64), SIMDE_FLOAT32_C( 368.86), SIMDE_FLOAT32_C( 814.71), SIMDE_FLOAT32_C( -770.02) }, + SIMDE_FLOAT32_C( -1154.48) }, + { { SIMDE_FLOAT32_C( 670.38), SIMDE_FLOAT32_C( -286.26), SIMDE_FLOAT32_C( -44.17), SIMDE_FLOAT32_C( -449.83), + SIMDE_FLOAT32_C( 796.32), SIMDE_FLOAT32_C( -708.62), SIMDE_FLOAT32_C( 962.79), SIMDE_FLOAT32_C( 430.49), + SIMDE_FLOAT32_C( -204.37), SIMDE_FLOAT32_C( 906.50), SIMDE_FLOAT32_C( -335.55), SIMDE_FLOAT32_C( 20.70), + SIMDE_FLOAT32_C( -867.84), SIMDE_FLOAT32_C( -888.88), SIMDE_FLOAT32_C( 691.44), SIMDE_FLOAT32_C( 107.00) }, + SIMDE_FLOAT32_C( 800.10) }, + { { SIMDE_FLOAT32_C( -152.19), SIMDE_FLOAT32_C( 426.80), SIMDE_FLOAT32_C( -426.86), SIMDE_FLOAT32_C( 52.74), + SIMDE_FLOAT32_C( -656.18), SIMDE_FLOAT32_C( 299.82), SIMDE_FLOAT32_C( 408.98), SIMDE_FLOAT32_C( -196.41), + SIMDE_FLOAT32_C( 660.88), SIMDE_FLOAT32_C( -420.77), SIMDE_FLOAT32_C( -917.06), SIMDE_FLOAT32_C( -295.48), + SIMDE_FLOAT32_C( 948.09), SIMDE_FLOAT32_C( 897.65), SIMDE_FLOAT32_C( -65.50), SIMDE_FLOAT32_C( 618.47) }, + SIMDE_FLOAT32_C( 1182.98) }, + { { SIMDE_FLOAT32_C( -388.61), SIMDE_FLOAT32_C( 890.33), SIMDE_FLOAT32_C( -831.35), SIMDE_FLOAT32_C( -592.28), + SIMDE_FLOAT32_C( -818.29), SIMDE_FLOAT32_C( -868.57), SIMDE_FLOAT32_C( 838.20), SIMDE_FLOAT32_C( -22.66), + SIMDE_FLOAT32_C( -962.07), SIMDE_FLOAT32_C( -497.35), SIMDE_FLOAT32_C( 998.04), SIMDE_FLOAT32_C( -829.91), + SIMDE_FLOAT32_C( -386.23), SIMDE_FLOAT32_C( 689.49), SIMDE_FLOAT32_C( 277.09), SIMDE_FLOAT32_C( 461.58) }, + SIMDE_FLOAT32_C( -2042.59) }, + { { SIMDE_FLOAT32_C( 116.29), SIMDE_FLOAT32_C( 850.23), SIMDE_FLOAT32_C( -485.68), SIMDE_FLOAT32_C( 460.11), + SIMDE_FLOAT32_C( 150.04), SIMDE_FLOAT32_C( 923.30), SIMDE_FLOAT32_C( -736.30), SIMDE_FLOAT32_C( -189.07), + SIMDE_FLOAT32_C( -497.47), SIMDE_FLOAT32_C( -653.36), SIMDE_FLOAT32_C( 515.44), SIMDE_FLOAT32_C( -549.38), + SIMDE_FLOAT32_C( -755.71), SIMDE_FLOAT32_C( -550.06), SIMDE_FLOAT32_C( -930.91), SIMDE_FLOAT32_C( -144.32) }, + SIMDE_FLOAT32_C( -2476.85) }, + { { SIMDE_FLOAT32_C( -659.73), SIMDE_FLOAT32_C( -762.27), SIMDE_FLOAT32_C( 263.40), SIMDE_FLOAT32_C( -478.02), + SIMDE_FLOAT32_C( -630.83), SIMDE_FLOAT32_C( 101.60), SIMDE_FLOAT32_C( 499.32), SIMDE_FLOAT32_C( -592.90), + SIMDE_FLOAT32_C( 604.25), SIMDE_FLOAT32_C( 497.36), SIMDE_FLOAT32_C( -422.81), SIMDE_FLOAT32_C( -781.98), + SIMDE_FLOAT32_C( 186.85), SIMDE_FLOAT32_C( 854.27), SIMDE_FLOAT32_C( 679.61), SIMDE_FLOAT32_C( -696.87) }, + SIMDE_FLOAT32_C( -1338.75) }, + { { SIMDE_FLOAT32_C( 704.50), SIMDE_FLOAT32_C( -806.08), SIMDE_FLOAT32_C( 763.24), SIMDE_FLOAT32_C( -145.46), + SIMDE_FLOAT32_C( -882.78), SIMDE_FLOAT32_C( -973.06), SIMDE_FLOAT32_C( 665.47), SIMDE_FLOAT32_C( -380.25), + SIMDE_FLOAT32_C( -626.42), SIMDE_FLOAT32_C( 180.91), SIMDE_FLOAT32_C( 70.37), SIMDE_FLOAT32_C( -382.14), + SIMDE_FLOAT32_C( 630.86), SIMDE_FLOAT32_C( 139.46), SIMDE_FLOAT32_C( 473.54), SIMDE_FLOAT32_C( 971.13) }, + SIMDE_FLOAT32_C( 403.29) }, + { { SIMDE_FLOAT32_C( 377.19), SIMDE_FLOAT32_C( -263.06), SIMDE_FLOAT32_C( -506.90), SIMDE_FLOAT32_C( 746.36), + SIMDE_FLOAT32_C( 838.54), SIMDE_FLOAT32_C( 992.42), SIMDE_FLOAT32_C( -846.54), SIMDE_FLOAT32_C( 442.79), + SIMDE_FLOAT32_C( 489.78), SIMDE_FLOAT32_C( -269.36), SIMDE_FLOAT32_C( 660.81), SIMDE_FLOAT32_C( -323.37), + SIMDE_FLOAT32_C( -415.08), SIMDE_FLOAT32_C( 340.42), SIMDE_FLOAT32_C( -20.24), SIMDE_FLOAT32_C( -710.58) }, + SIMDE_FLOAT32_C( 1533.18) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512 a = simde_mm512_loadu_ps(test_vec[i].a); + simde_float32 r = simde_mm512_reduce_add_ps(a); + simde_assert_equal_f32(r, test_vec[i].r, 1); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) { + simde__m512 a = simde_test_x86_random_f32x16(SIMDE_FLOAT32_C(-1000.0), SIMDE_FLOAT32_C(1000.0)); + simde_float32 r = simde_mm512_reduce_add_ps(a); + + simde_test_x86_write_f32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_mm512_reduce_add_pd (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + const simde_float64 a[8]; + simde_float64 r; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 317.91), SIMDE_FLOAT64_C( -618.37), SIMDE_FLOAT64_C( -933.77), SIMDE_FLOAT64_C( 276.99), + SIMDE_FLOAT64_C( 408.53), SIMDE_FLOAT64_C( 144.70), SIMDE_FLOAT64_C( 2.20), SIMDE_FLOAT64_C( -416.37) }, + SIMDE_FLOAT64_C( -818.18) }, + { { SIMDE_FLOAT64_C( -732.26), SIMDE_FLOAT64_C( 364.98), SIMDE_FLOAT64_C( 316.28), SIMDE_FLOAT64_C( -357.42), + SIMDE_FLOAT64_C( -484.06), SIMDE_FLOAT64_C( -986.19), SIMDE_FLOAT64_C( -239.40), SIMDE_FLOAT64_C( -641.83) }, + SIMDE_FLOAT64_C( -2759.90) }, + { { SIMDE_FLOAT64_C( 410.58), SIMDE_FLOAT64_C( -325.61), SIMDE_FLOAT64_C( -465.30), SIMDE_FLOAT64_C( -999.28), + SIMDE_FLOAT64_C( -932.67), SIMDE_FLOAT64_C( 276.19), SIMDE_FLOAT64_C( -540.53), SIMDE_FLOAT64_C( 486.53) }, + SIMDE_FLOAT64_C( -2090.09) }, + { { SIMDE_FLOAT64_C( -866.27), SIMDE_FLOAT64_C( 980.71), SIMDE_FLOAT64_C( 914.39), SIMDE_FLOAT64_C( 522.80), + SIMDE_FLOAT64_C( -856.04), SIMDE_FLOAT64_C( -12.87), SIMDE_FLOAT64_C( -518.29), SIMDE_FLOAT64_C( 461.87) }, + SIMDE_FLOAT64_C( 626.30) }, + { { SIMDE_FLOAT64_C( 368.76), SIMDE_FLOAT64_C( -452.06), SIMDE_FLOAT64_C( -261.14), SIMDE_FLOAT64_C( -222.71), + SIMDE_FLOAT64_C( 692.64), SIMDE_FLOAT64_C( 741.07), SIMDE_FLOAT64_C( 360.92), SIMDE_FLOAT64_C( 960.38) }, + SIMDE_FLOAT64_C( 2187.86) }, + { { SIMDE_FLOAT64_C( 106.05), SIMDE_FLOAT64_C( -322.80), SIMDE_FLOAT64_C( -397.04), SIMDE_FLOAT64_C( 621.99), + SIMDE_FLOAT64_C( -308.99), SIMDE_FLOAT64_C( 363.56), SIMDE_FLOAT64_C( 980.16), SIMDE_FLOAT64_C( -898.41) }, + SIMDE_FLOAT64_C( 144.52) }, + { { SIMDE_FLOAT64_C( -962.04), SIMDE_FLOAT64_C( -485.14), SIMDE_FLOAT64_C( -897.69), SIMDE_FLOAT64_C( -894.71), + SIMDE_FLOAT64_C( 791.05), SIMDE_FLOAT64_C( -438.22), SIMDE_FLOAT64_C( 591.82), SIMDE_FLOAT64_C( 924.78) }, + SIMDE_FLOAT64_C( -1370.15) }, + { { SIMDE_FLOAT64_C( -457.51), SIMDE_FLOAT64_C( 506.22), SIMDE_FLOAT64_C( 447.58), SIMDE_FLOAT64_C( -313.54), + SIMDE_FLOAT64_C( -506.66), SIMDE_FLOAT64_C( 929.29), SIMDE_FLOAT64_C( -851.67), SIMDE_FLOAT64_C( 862.10) }, + SIMDE_FLOAT64_C( 615.81) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512d a = simde_mm512_loadu_pd(test_vec[i].a); + simde_float64 r = simde_mm512_reduce_add_pd(a); + simde_assert_equal_f64(r, test_vec[i].r, 1); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) { + simde__m512d a = simde_test_x86_random_f64x8(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64 r = simde_mm512_reduce_add_pd(a); + + simde_test_x86_write_f64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +#if defined(SIMDE_FLOAT16_IS_SCALAR) +static int +test_simde_mm512_reduce_add_ph (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + const simde_float16 a[32]; + simde_float16 r; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 614.50), SIMDE_FLOAT16_VALUE( 461.50), SIMDE_FLOAT16_VALUE( -280.25), SIMDE_FLOAT16_VALUE( 406.50), + SIMDE_FLOAT16_VALUE( 220.12), SIMDE_FLOAT16_VALUE( 533.00), SIMDE_FLOAT16_VALUE( -839.50), SIMDE_FLOAT16_VALUE( 442.75), + SIMDE_FLOAT16_VALUE( -545.50), SIMDE_FLOAT16_VALUE( -253.00), SIMDE_FLOAT16_VALUE( -800.50), SIMDE_FLOAT16_VALUE( 761.00), + SIMDE_FLOAT16_VALUE( 915.50), SIMDE_FLOAT16_VALUE( 362.75), SIMDE_FLOAT16_VALUE( -995.50), SIMDE_FLOAT16_VALUE( -38.22), + SIMDE_FLOAT16_VALUE( -436.50), SIMDE_FLOAT16_VALUE( 532.00), SIMDE_FLOAT16_VALUE( 657.50), SIMDE_FLOAT16_VALUE( 461.75), + SIMDE_FLOAT16_VALUE( 968.50), SIMDE_FLOAT16_VALUE( -463.50), SIMDE_FLOAT16_VALUE( -257.50), SIMDE_FLOAT16_VALUE( 346.75), + SIMDE_FLOAT16_VALUE( -615.50), SIMDE_FLOAT16_VALUE( -96.31), SIMDE_FLOAT16_VALUE( 463.25), SIMDE_FLOAT16_VALUE( 658.00), + SIMDE_FLOAT16_VALUE( 372.75), SIMDE_FLOAT16_VALUE( -841.50), SIMDE_FLOAT16_VALUE( -798.00), SIMDE_FLOAT16_VALUE( -12.53) }, + SIMDE_FLOAT16_VALUE( 1903.00) }, + { { SIMDE_FLOAT16_VALUE( 620.00), SIMDE_FLOAT16_VALUE( -78.00), SIMDE_FLOAT16_VALUE( -606.00), SIMDE_FLOAT16_VALUE( -159.88), + SIMDE_FLOAT16_VALUE( -545.00), SIMDE_FLOAT16_VALUE( -445.75), SIMDE_FLOAT16_VALUE( -717.00), SIMDE_FLOAT16_VALUE( -90.38), + SIMDE_FLOAT16_VALUE( 301.25), SIMDE_FLOAT16_VALUE( -517.50), SIMDE_FLOAT16_VALUE( -329.25), SIMDE_FLOAT16_VALUE( 216.62), + SIMDE_FLOAT16_VALUE( 845.00), SIMDE_FLOAT16_VALUE( -324.50), SIMDE_FLOAT16_VALUE( -821.50), SIMDE_FLOAT16_VALUE( -591.50), + SIMDE_FLOAT16_VALUE( -792.50), SIMDE_FLOAT16_VALUE( 836.00), SIMDE_FLOAT16_VALUE( 870.00), SIMDE_FLOAT16_VALUE( -824.00), + SIMDE_FLOAT16_VALUE( -627.50), SIMDE_FLOAT16_VALUE( -387.25), SIMDE_FLOAT16_VALUE( 522.50), SIMDE_FLOAT16_VALUE( -242.75), + SIMDE_FLOAT16_VALUE( 516.50), SIMDE_FLOAT16_VALUE( -14.14), SIMDE_FLOAT16_VALUE( -584.50), SIMDE_FLOAT16_VALUE( -110.75), + SIMDE_FLOAT16_VALUE( 144.25), SIMDE_FLOAT16_VALUE( -382.50), SIMDE_FLOAT16_VALUE( 876.50), SIMDE_FLOAT16_VALUE( -235.75) }, + SIMDE_FLOAT16_VALUE( -3680.00) }, + { { SIMDE_FLOAT16_VALUE( 539.50), SIMDE_FLOAT16_VALUE( -729.50), SIMDE_FLOAT16_VALUE( 604.50), SIMDE_FLOAT16_VALUE( 995.00), + SIMDE_FLOAT16_VALUE( -175.00), SIMDE_FLOAT16_VALUE( 887.00), SIMDE_FLOAT16_VALUE( -95.62), SIMDE_FLOAT16_VALUE( -873.50), + SIMDE_FLOAT16_VALUE( -630.50), SIMDE_FLOAT16_VALUE( 575.00), SIMDE_FLOAT16_VALUE( 343.00), SIMDE_FLOAT16_VALUE( -785.50), + SIMDE_FLOAT16_VALUE( -749.50), SIMDE_FLOAT16_VALUE( 521.50), SIMDE_FLOAT16_VALUE( -377.00), SIMDE_FLOAT16_VALUE( -541.50), + SIMDE_FLOAT16_VALUE( 357.25), SIMDE_FLOAT16_VALUE( -506.75), SIMDE_FLOAT16_VALUE( -365.75), SIMDE_FLOAT16_VALUE( 730.00), + SIMDE_FLOAT16_VALUE( 106.12), SIMDE_FLOAT16_VALUE( -843.00), SIMDE_FLOAT16_VALUE( -513.00), SIMDE_FLOAT16_VALUE( -377.50), + SIMDE_FLOAT16_VALUE( 142.75), SIMDE_FLOAT16_VALUE( -97.44), SIMDE_FLOAT16_VALUE( 511.75), SIMDE_FLOAT16_VALUE( -713.00), + SIMDE_FLOAT16_VALUE( 520.00), SIMDE_FLOAT16_VALUE( 388.50), SIMDE_FLOAT16_VALUE( 51.19), SIMDE_FLOAT16_VALUE( 59.72) }, + SIMDE_FLOAT16_VALUE( -1039.00) }, + { { SIMDE_FLOAT16_VALUE( 659.00), SIMDE_FLOAT16_VALUE( -344.50), SIMDE_FLOAT16_VALUE( 54.50), SIMDE_FLOAT16_VALUE( -516.00), + SIMDE_FLOAT16_VALUE( -457.25), SIMDE_FLOAT16_VALUE( 959.00), SIMDE_FLOAT16_VALUE( -389.50), SIMDE_FLOAT16_VALUE( -87.81), + SIMDE_FLOAT16_VALUE( 534.00), SIMDE_FLOAT16_VALUE( 953.50), SIMDE_FLOAT16_VALUE( 126.62), SIMDE_FLOAT16_VALUE( 785.00), + SIMDE_FLOAT16_VALUE( 474.75), SIMDE_FLOAT16_VALUE( 749.50), SIMDE_FLOAT16_VALUE( -757.00), SIMDE_FLOAT16_VALUE( -168.00), + SIMDE_FLOAT16_VALUE( -757.00), SIMDE_FLOAT16_VALUE( -122.69), SIMDE_FLOAT16_VALUE( -438.00), SIMDE_FLOAT16_VALUE( 349.00), + SIMDE_FLOAT16_VALUE( 34.19), SIMDE_FLOAT16_VALUE( 49.09), SIMDE_FLOAT16_VALUE( 971.50), SIMDE_FLOAT16_VALUE( -823.00), + SIMDE_FLOAT16_VALUE( 951.50), SIMDE_FLOAT16_VALUE( 483.50), SIMDE_FLOAT16_VALUE( -536.00), SIMDE_FLOAT16_VALUE( 471.75), + SIMDE_FLOAT16_VALUE( -128.00), SIMDE_FLOAT16_VALUE( 515.00), SIMDE_FLOAT16_VALUE( -468.50), SIMDE_FLOAT16_VALUE( -468.75) }, + SIMDE_FLOAT16_VALUE( 2660.00) }, + { { SIMDE_FLOAT16_VALUE( -829.50), SIMDE_FLOAT16_VALUE( 586.00), SIMDE_FLOAT16_VALUE( 15.40), SIMDE_FLOAT16_VALUE( -286.75), + SIMDE_FLOAT16_VALUE( 545.00), SIMDE_FLOAT16_VALUE( 626.00), SIMDE_FLOAT16_VALUE( 625.50), SIMDE_FLOAT16_VALUE( 79.06), + SIMDE_FLOAT16_VALUE( 579.50), SIMDE_FLOAT16_VALUE( -248.00), SIMDE_FLOAT16_VALUE( -136.12), SIMDE_FLOAT16_VALUE( 53.97), + SIMDE_FLOAT16_VALUE( -498.25), SIMDE_FLOAT16_VALUE( 107.00), SIMDE_FLOAT16_VALUE( 886.00), SIMDE_FLOAT16_VALUE( -255.25), + SIMDE_FLOAT16_VALUE( 984.50), SIMDE_FLOAT16_VALUE( -552.00), SIMDE_FLOAT16_VALUE( -906.00), SIMDE_FLOAT16_VALUE( 18.55), + SIMDE_FLOAT16_VALUE( 497.00), SIMDE_FLOAT16_VALUE( -934.50), SIMDE_FLOAT16_VALUE( 195.50), SIMDE_FLOAT16_VALUE( 448.75), + SIMDE_FLOAT16_VALUE( 549.00), SIMDE_FLOAT16_VALUE( 659.50), SIMDE_FLOAT16_VALUE( -79.56), SIMDE_FLOAT16_VALUE( -579.00), + SIMDE_FLOAT16_VALUE( 174.50), SIMDE_FLOAT16_VALUE( 452.00), SIMDE_FLOAT16_VALUE( -47.78), SIMDE_FLOAT16_VALUE( 345.00) }, + SIMDE_FLOAT16_VALUE( 3076.00) }, + { { SIMDE_FLOAT16_VALUE( 37.97), SIMDE_FLOAT16_VALUE( 967.50), SIMDE_FLOAT16_VALUE( -941.50), SIMDE_FLOAT16_VALUE( -417.00), + SIMDE_FLOAT16_VALUE( 593.50), SIMDE_FLOAT16_VALUE( 684.00), SIMDE_FLOAT16_VALUE( 662.00), SIMDE_FLOAT16_VALUE( 172.75), + SIMDE_FLOAT16_VALUE( -564.00), SIMDE_FLOAT16_VALUE( -474.25), SIMDE_FLOAT16_VALUE( -773.00), SIMDE_FLOAT16_VALUE( -62.44), + SIMDE_FLOAT16_VALUE( 633.00), SIMDE_FLOAT16_VALUE( -887.00), SIMDE_FLOAT16_VALUE( 682.50), SIMDE_FLOAT16_VALUE( 617.00), + SIMDE_FLOAT16_VALUE( -439.25), SIMDE_FLOAT16_VALUE( 776.00), SIMDE_FLOAT16_VALUE( -364.25), SIMDE_FLOAT16_VALUE( -942.50), + SIMDE_FLOAT16_VALUE( 841.50), SIMDE_FLOAT16_VALUE( 831.00), SIMDE_FLOAT16_VALUE( 506.50), SIMDE_FLOAT16_VALUE( 390.50), + SIMDE_FLOAT16_VALUE( 490.50), SIMDE_FLOAT16_VALUE( -573.00), SIMDE_FLOAT16_VALUE( 811.50), SIMDE_FLOAT16_VALUE( -335.00), + SIMDE_FLOAT16_VALUE( 879.00), SIMDE_FLOAT16_VALUE( -236.25), SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( -83.25) }, + SIMDE_FLOAT16_VALUE( 2494.00) }, + { { SIMDE_FLOAT16_VALUE( -268.75), SIMDE_FLOAT16_VALUE( -931.50), SIMDE_FLOAT16_VALUE( 499.75), SIMDE_FLOAT16_VALUE( -675.00), + SIMDE_FLOAT16_VALUE( 752.00), SIMDE_FLOAT16_VALUE( 161.62), SIMDE_FLOAT16_VALUE( 497.75), SIMDE_FLOAT16_VALUE( -812.00), + SIMDE_FLOAT16_VALUE( 687.50), SIMDE_FLOAT16_VALUE( 724.50), SIMDE_FLOAT16_VALUE( 125.62), SIMDE_FLOAT16_VALUE( 320.25), + SIMDE_FLOAT16_VALUE( 837.00), SIMDE_FLOAT16_VALUE( -192.12), SIMDE_FLOAT16_VALUE( -62.56), SIMDE_FLOAT16_VALUE( -602.00), + SIMDE_FLOAT16_VALUE( -416.00), SIMDE_FLOAT16_VALUE( 573.00), SIMDE_FLOAT16_VALUE( -544.50), SIMDE_FLOAT16_VALUE( -574.50), + SIMDE_FLOAT16_VALUE( 404.25), SIMDE_FLOAT16_VALUE( 962.00), SIMDE_FLOAT16_VALUE( 816.00), SIMDE_FLOAT16_VALUE( -105.00), + SIMDE_FLOAT16_VALUE( -611.00), SIMDE_FLOAT16_VALUE( 627.50), SIMDE_FLOAT16_VALUE( 560.00), SIMDE_FLOAT16_VALUE( -732.50), + SIMDE_FLOAT16_VALUE( -609.00), SIMDE_FLOAT16_VALUE( 570.00), SIMDE_FLOAT16_VALUE( 184.38), SIMDE_FLOAT16_VALUE( 122.62) }, + SIMDE_FLOAT16_VALUE( 2290.00) }, + { { SIMDE_FLOAT16_VALUE( 638.50), SIMDE_FLOAT16_VALUE( -316.00), SIMDE_FLOAT16_VALUE( 447.50), SIMDE_FLOAT16_VALUE( 391.00), + SIMDE_FLOAT16_VALUE( 845.50), SIMDE_FLOAT16_VALUE( -54.88), SIMDE_FLOAT16_VALUE( 579.00), SIMDE_FLOAT16_VALUE( 533.00), + SIMDE_FLOAT16_VALUE( -330.50), SIMDE_FLOAT16_VALUE( -295.50), SIMDE_FLOAT16_VALUE( -146.75), SIMDE_FLOAT16_VALUE( -493.25), + SIMDE_FLOAT16_VALUE( 512.50), SIMDE_FLOAT16_VALUE( 790.50), SIMDE_FLOAT16_VALUE( -95.38), SIMDE_FLOAT16_VALUE( -903.50), + SIMDE_FLOAT16_VALUE( 364.00), SIMDE_FLOAT16_VALUE( 360.25), SIMDE_FLOAT16_VALUE( -478.25), SIMDE_FLOAT16_VALUE( -231.75), + SIMDE_FLOAT16_VALUE( 322.25), SIMDE_FLOAT16_VALUE( -662.00), SIMDE_FLOAT16_VALUE( 663.00), SIMDE_FLOAT16_VALUE( 711.00), + SIMDE_FLOAT16_VALUE( 965.50), SIMDE_FLOAT16_VALUE( 223.25), SIMDE_FLOAT16_VALUE( 978.50), SIMDE_FLOAT16_VALUE( -643.50), + SIMDE_FLOAT16_VALUE( -206.50), SIMDE_FLOAT16_VALUE( 163.12), SIMDE_FLOAT16_VALUE( 479.25), SIMDE_FLOAT16_VALUE( -568.00) }, + SIMDE_FLOAT16_VALUE( 4544.00) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512h a = simde_mm512_loadu_ph(test_vec[i].a); + simde_float16 r = simde_mm512_reduce_add_ph(a); + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) { + simde__m512h a = simde_test_x86_random_f16x32(SIMDE_FLOAT16_VALUE(-1000.0), SIMDE_FLOAT16_VALUE(1000.0)); + simde_float16 r = simde_mm512_reduce_add_ph(a); + + simde_test_x86_write_f16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} +#endif + +static int +test_simde_mm512_reduce_and_epi32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + const int32_t a[16]; + int32_t r; + } test_vec[] = { + { { INT32_C( 911118265), -INT32_C( 1667092070), -INT32_C( 180272939), -INT32_C( 1252899561), -INT32_C( 233116269), INT32_C( 1163392418), -INT32_C( 20035201), -INT32_C( 510343384), + -INT32_C( 266804394), -INT32_C( 376587756), INT32_C( 316591866), -INT32_C( 1496895469), -INT32_C( 1063722722), INT32_C( 1443295446), INT32_C( 1632949305), -INT32_C( 247273061) }, + INT32_C( 0) }, + { { -INT32_C( 522036532), INT32_C( 264859413), INT32_C( 1361159997), -INT32_C( 168302377), -INT32_C( 1598648118), -INT32_C( 1175012224), INT32_C( 723143568), INT32_C( 1858612), + -INT32_C( 857669961), -INT32_C( 1411667603), INT32_C( 670891344), -INT32_C( 1356991515), INT32_C( 55628675), INT32_C( 532432527), -INT32_C( 984951151), -INT32_C( 356161997) }, + INT32_C( 0) }, + { { -INT32_C( 759782043), -INT32_C( 1635937970), INT32_C( 1959164302), -INT32_C( 266083475), INT32_C( 1173648310), INT32_C( 1264890042), -INT32_C( 1173246329), INT32_C( 2040845844), + -INT32_C( 900965764), INT32_C( 2053753323), -INT32_C( 1326567613), -INT32_C( 929033966), INT32_C( 1057854597), -INT32_C( 863342267), INT32_C( 864459551), -INT32_C( 290641294) }, + INT32_C( 0) }, + { { INT32_C( 1874393476), INT32_C( 99164610), INT32_C( 1656084304), INT32_C( 1848268521), INT32_C( 799881194), -INT32_C( 939837528), INT32_C( 1174110675), INT32_C( 791914667), + INT32_C( 1671424929), INT32_C( 1567197197), INT32_C( 1237327456), INT32_C( 1605888628), -INT32_C( 896637918), INT32_C( 1855097499), -INT32_C( 1212969717), -INT32_C( 689510859) }, + INT32_C( 0) }, + { { -INT32_C( 549878318), INT32_C( 1849467406), INT32_C( 901250241), INT32_C( 160722663), INT32_C( 1842553810), -INT32_C( 1193515603), INT32_C( 661622770), INT32_C( 1224562294), + -INT32_C( 383305765), -INT32_C( 1705548839), INT32_C( 1204817504), INT32_C( 1313891452), INT32_C( 884745095), INT32_C( 2079168649), -INT32_C( 1650238425), -INT32_C( 1897553742) }, + INT32_C( 0) }, + { { -INT32_C( 1317597993), -INT32_C( 800338064), INT32_C( 1511463901), INT32_C( 128477056), INT32_C( 322725002), INT32_C( 613362172), INT32_C( 952185477), -INT32_C( 1429821486), + INT32_C( 593182131), -INT32_C( 353130996), INT32_C( 1111755458), -INT32_C( 62264207), INT32_C( 1292862801), INT32_C( 879861422), -INT32_C( 1553189936), -INT32_C( 1924320550) }, + INT32_C( 0) }, + { { INT32_C( 2092017775), INT32_C( 275162190), INT32_C( 542288559), -INT32_C( 417555306), -INT32_C( 801821663), -INT32_C( 1694193974), -INT32_C( 1287753511), INT32_C( 289508258), + -INT32_C( 2121403853), INT32_C( 1167258518), INT32_C( 879158429), -INT32_C( 1575255424), INT32_C( 2037534894), -INT32_C( 803965193), -INT32_C( 2004593946), INT32_C( 278513117) }, + INT32_C( 0) }, + { { INT32_C( 1301358519), -INT32_C( 1198316774), -INT32_C( 1997735672), INT32_C( 707397499), INT32_C( 1336122456), -INT32_C( 132139246), -INT32_C( 427777271), INT32_C( 536222312), + INT32_C( 1533904705), -INT32_C( 1290600277), INT32_C( 1950089209), INT32_C( 1604216327), INT32_C( 363741442), INT32_C( 17681912), -INT32_C( 639136143), -INT32_C( 369500760) }, + INT32_C( 0) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); + int32_t r = simde_mm512_reduce_and_epi32(a); + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) { + simde__m512i a = simde_test_x86_random_i32x16(); + int32_t r = simde_mm512_reduce_and_epi32(a); + + simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_mm512_reduce_and_epi64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + const int64_t a[8]; + int64_t r; + } test_vec[] = { + { { -INT64_C( 6796415253357073778), -INT64_C( 6937918192989542337), INT64_C( 8716418624655951018), -INT64_C( 2440524041695220227), + INT64_C( 4784671099518696302), -INT64_C( 4268994520076154375), -INT64_C( 1871395275130406656), -INT64_C( 1273520890085442780) }, + INT64_C( 0) }, + { { -INT64_C( 4499912104635845049), -INT64_C( 3566387122549796190), -INT64_C( 7030851870619008354), -INT64_C( 6138857762423908512), + INT64_C( 4334012716532662684), INT64_C( 5580094833067362771), -INT64_C( 7812828141917450884), -INT64_C( 6402076827678693862) }, + INT64_C( 0) }, + { { INT64_C( 8113289280631806914), -INT64_C( 8703344767203634886), INT64_C( 7499918683838155855), -INT64_C( 8401169569185167178), + -INT64_C( 2290784481186450136), INT64_C( 1847603710411818769), -INT64_C( 1674019435438202611), -INT64_C( 2410530028117120712) }, + INT64_C( 0) }, + { { INT64_C( 220633441440592776), -INT64_C( 1800660456501618400), -INT64_C( 8791582494424916507), INT64_C( 4919077293378676912), + -INT64_C( 2343135422244984955), INT64_C( 4908021303711734619), INT64_C( 1530196266785136233), INT64_C( 6485957338111656450) }, + INT64_C( 0) }, + { { INT64_C( 5928231308975306150), INT64_C( 4795669331856264937), INT64_C( 4934877851019765762), INT64_C( 6640281358734391287), + INT64_C( 3585998107214739834), -INT64_C( 1067344384607237356), INT64_C( 6230400635550312944), -INT64_C( 658289171434932021) }, + INT64_C( 0) }, + { { -INT64_C( 5063404266544652733), INT64_C( 3267317812907423095), INT64_C( 5671955540954033921), INT64_C( 7332049458773874765), + -INT64_C( 3771294831638271925), INT64_C( 3047398907546063328), -INT64_C( 3815885346193102535), -INT64_C( 3269519290162357735) }, + INT64_C( 0) }, + { { INT64_C( 1989583035762380669), -INT64_C( 7993744920756564292), -INT64_C( 831379380708206036), -INT64_C( 2482381280240530637), + INT64_C( 8553544852802784862), -INT64_C( 7205078327451980775), -INT64_C( 2605968276332261370), INT64_C( 3532829623514891465) }, + INT64_C( 1152921504606846976) }, + { { INT64_C( 1918390505449370991), INT64_C( 7815590775752489579), INT64_C( 4927538856226307550), -INT64_C( 8634961680621766874), + -INT64_C( 2430729443305458390), INT64_C( 4331670500355461195), -INT64_C( 5572038453492552979), -INT64_C( 5134713193370019556) }, + INT64_C( 0) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); + int64_t r = simde_mm512_reduce_and_epi64(a); + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) { + simde__m512i a = simde_test_x86_random_i64x8(); + int64_t r = simde_mm512_reduce_and_epi64(a); + + simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_mm512_reduce_mul_epi32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + const int32_t a[16]; + int32_t r; + } test_vec[] = { + { { -INT32_C( 1865536518), INT32_C( 376613919), -INT32_C( 820505661), INT32_C( 1134478853), -INT32_C( 1086512462), INT32_C( 1499270912), INT32_C( 1159178392), INT32_C( 1089429574), + -INT32_C( 1412383348), INT32_C( 767640425), INT32_C( 1543297622), INT32_C( 1402903200), -INT32_C( 1139614532), -INT32_C( 2095746837), INT32_C( 1590242328), INT32_C( 413055116) }, + INT32_C( 1610612736) }, + { { -INT32_C( 557617547), INT32_C( 134972849), -INT32_C( 10287265), INT32_C( 1565655713), -INT32_C( 921082402), -INT32_C( 313708843), -INT32_C( 414509478), INT32_C( 1140845006), + INT32_C( 153207639), -INT32_C( 1492046520), -INT32_C( 693734348), INT32_C( 1412692342), INT32_C( 857623902), -INT32_C( 702518405), INT32_C( 1337813889), -INT32_C( 1416446892) }, + -INT32_C( 633098240) }, + { { -INT32_C( 944458881), INT32_C( 359581152), -INT32_C( 1326770886), INT32_C( 1812274702), -INT32_C( 409001109), INT32_C( 264093582), INT32_C( 2120120874), -INT32_C( 1238765514), + -INT32_C( 2072125789), -INT32_C( 577114973), INT32_C( 260998145), INT32_C( 242979746), INT32_C( 1156913846), INT32_C( 55816921), INT32_C( 1652666668), INT32_C( 1142467233) }, + INT32_C( 766001152) }, + { { INT32_C( 751343240), -INT32_C( 2096537214), -INT32_C( 2003658779), -INT32_C( 527037142), INT32_C( 2394919), INT32_C( 1761834813), -INT32_C( 909409240), -INT32_C( 1223760850), + -INT32_C( 52177286), INT32_C( 494922808), -INT32_C( 1364913788), INT32_C( 1167014686), INT32_C( 54899654), INT32_C( 1399605291), -INT32_C( 98748212), -INT32_C( 1766773988) }, + -INT32_C( 803209216) }, + { { INT32_C( 965907457), INT32_C( 89592193), INT32_C( 1085537314), -INT32_C( 24821192), INT32_C( 553831158), -INT32_C( 562794990), -INT32_C( 1025994330), -INT32_C( 1101493572), + -INT32_C( 1611142626), INT32_C( 497307387), -INT32_C( 2091034549), -INT32_C( 1870536038), -INT32_C( 1095662676), -INT32_C( 1734596878), INT32_C( 1952150967), INT32_C( 489862143) }, + INT32_C( 1484931072) }, + { { -INT32_C( 1715656290), -INT32_C( 1011392136), INT32_C( 1413878969), -INT32_C( 1545287689), INT32_C( 1029871179), INT32_C( 1943404220), INT32_C( 1927753843), -INT32_C( 2104485404), + -INT32_C( 1155838653), INT32_C( 1753141934), -INT32_C( 558054169), -INT32_C( 696147829), -INT32_C( 233577674), INT32_C( 1432807906), -INT32_C( 37204711), -INT32_C( 1434494873) }, + -INT32_C( 1239422976) }, + { { INT32_C( 1399167909), INT32_C( 1421599597), INT32_C( 842168231), INT32_C( 1309258776), INT32_C( 2034310295), INT32_C( 533702405), INT32_C( 1528600564), -INT32_C( 1811571473), + -INT32_C( 1528337865), -INT32_C( 184966323), INT32_C( 841427738), INT32_C( 2004955359), INT32_C( 1391510092), INT32_C( 1567735657), INT32_C( 1169788246), INT32_C( 1624882729) }, + -INT32_C( 183166464) }, + { { INT32_C( 1980088617), INT32_C( 2121006692), INT32_C( 162632233), INT32_C( 243282626), INT32_C( 1566601460), -INT32_C( 2034577104), -INT32_C( 1999867041), INT32_C( 1542038834), + -INT32_C( 892211610), INT32_C( 357121260), -INT32_C( 1877017906), INT32_C( 547331628), INT32_C( 1065221903), INT32_C( 818231249), -INT32_C( 575106645), -INT32_C( 1657233097) }, + -INT32_C( 590413824) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); + int32_t r = simde_mm512_reduce_mul_epi32(a); + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) { + simde__m512i a = simde_test_x86_random_i32x16(); + int32_t r = simde_mm512_reduce_mul_epi32(a); + + simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_mm512_reduce_mul_epi64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + const int64_t a[8]; + int64_t r; + } test_vec[] = { + { { INT64_C( 3819084342353035751), INT64_C( 2784757011166408095), -INT64_C( 4785617923635265167), INT64_C( 4334229024563278427), + INT64_C( 1726521846286080967), INT64_C( 3668200320869814275), INT64_C( 5685890131966354001), INT64_C( 4475167871027523976) }, + INT64_C( 8212044715028596856) }, + { { -INT64_C( 3897597442253721512), -INT64_C( 5388993030692056253), -INT64_C( 3056033967646837163), -INT64_C( 4418086852369355270), + -INT64_C( 1996084601204527683), -INT64_C( 6924250023539910706), -INT64_C( 8271003023452944106), INT64_C( 8437404533641898731) }, + INT64_C( 2046926549093215424) }, + { { -INT64_C( 801283229234910629), -INT64_C( 70070873580189155), INT64_C( 6338703691467648727), -INT64_C( 22020317783460476), + INT64_C( 975272245175495278), -INT64_C( 7741833180323991898), INT64_C( 1094081740453900650), INT64_C( 976714452131633899) }, + INT64_C( 1915637826888418144) }, + { { INT64_C( 910898540948064679), -INT64_C( 132526796882306857), INT64_C( 627972999451256605), -INT64_C( 6518919785205071139), + INT64_C( 3035109841992165805), INT64_C( 255552781283146489), INT64_C( 4554373296974159631), -INT64_C( 7275490506879350172) }, + INT64_C( 7879463648566061996) }, + { { INT64_C( 2227535953213146220), -INT64_C( 7030059028677154937), -INT64_C( 2218305949451805803), -INT64_C( 6894365389765742136), + INT64_C( 6112309167620292752), INT64_C( 5268421790400610032), -INT64_C( 5932797730299451572), -INT64_C( 3945796240441807512) }, + -INT64_C( 7381453079303684096) }, + { { -INT64_C( 3037587902449148745), INT64_C( 1642813078684080591), -INT64_C( 1303077936808653480), -INT64_C( 4793581496813596809), + INT64_C( 1665516334949267299), INT64_C( 4221393348702400948), INT64_C( 8495586594327590004), -INT64_C( 7244991938399211343) }, + -INT64_C( 7049936629195568256) }, + { { -INT64_C( 8794402171465255275), INT64_C( 7534408876779867762), INT64_C( 1909605858004764429), -INT64_C( 2843102581648939592), + -INT64_C( 2138460575389880580), -INT64_C( 318410947406899913), -INT64_C( 697910344784874035), INT64_C( 7153529747180757291) }, + INT64_C( 1615836996182104640) }, + { { -INT64_C( 2652195063329653793), -INT64_C( 234017654958561065), INT64_C( 8629885676855412368), -INT64_C( 401335925920650844), + -INT64_C( 529883099894800858), -INT64_C( 3022902410478768926), INT64_C( 7352240477047111492), INT64_C( 4622346991273438737) }, + INT64_C( 2054176044052524032) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); + int64_t r = simde_mm512_reduce_mul_epi64(a); + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) { + simde__m512i a = simde_test_x86_random_i64x8(); + int64_t r = simde_mm512_reduce_mul_epi64(a); + + simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_mm512_reduce_or_epi32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + const int32_t a[16]; + int32_t r; + } test_vec[] = { + { { -INT32_C( 473995446), -INT32_C( 1766483412), -INT32_C( 1473866883), INT32_C( 469849587), -INT32_C( 2066623672), INT32_C( 971718768), -INT32_C( 610200945), -INT32_C( 1026162569), + -INT32_C( 1817864601), -INT32_C( 1389798864), -INT32_C( 514502674), -INT32_C( 318941532), -INT32_C( 1703883222), -INT32_C( 1630315761), -INT32_C( 445025170), -INT32_C( 609791884) }, + -INT32_C( 1) }, + { { INT32_C( 376327654), -INT32_C( 1782343513), -INT32_C( 1938417177), -INT32_C( 1720159377), INT32_C( 1362422082), -INT32_C( 1292957884), -INT32_C( 258447236), -INT32_C( 1630781512), + INT32_C( 884226700), -INT32_C( 1161201454), INT32_C( 4604049), -INT32_C( 174408013), -INT32_C( 330903897), INT32_C( 1369322965), INT32_C( 1447179934), INT32_C( 49614454) }, + -INT32_C( 1) }, + { { INT32_C( 456567112), -INT32_C( 1294663647), -INT32_C( 206365888), -INT32_C( 2115416615), -INT32_C( 261279973), INT32_C( 54659941), -INT32_C( 1202092990), -INT32_C( 625324398), + INT32_C( 435548664), INT32_C( 835504881), -INT32_C( 1088127003), -INT32_C( 415232564), -INT32_C( 1562923715), -INT32_C( 89843272), INT32_C( 800259741), INT32_C( 1158311245) }, + -INT32_C( 1) }, + { { INT32_C( 1331625822), -INT32_C( 1350554934), INT32_C( 1970185385), -INT32_C( 279138638), INT32_C( 345060443), -INT32_C( 351390131), -INT32_C( 2128953036), -INT32_C( 1933171666), + -INT32_C( 287627996), -INT32_C( 107127985), -INT32_C( 1318187777), INT32_C( 379636666), INT32_C( 1277833727), -INT32_C( 1674102681), INT32_C( 656233209), -INT32_C( 1699486602) }, + -INT32_C( 1) }, + { { INT32_C( 1485344265), -INT32_C( 380557591), -INT32_C( 325402574), -INT32_C( 1979565173), -INT32_C( 724161428), INT32_C( 1584402021), -INT32_C( 695825056), INT32_C( 2054240625), + -INT32_C( 1311508024), INT32_C( 1369056287), INT32_C( 1866348004), -INT32_C( 587644816), -INT32_C( 760164243), INT32_C( 1043341534), INT32_C( 521451182), -INT32_C( 1197832720) }, + -INT32_C( 1) }, + { { -INT32_C( 1637257857), INT32_C( 1995441297), -INT32_C( 1444598215), -INT32_C( 611983506), -INT32_C( 1918028113), INT32_C( 80469590), -INT32_C( 2077958252), -INT32_C( 465781147), + -INT32_C( 1132288469), -INT32_C( 483233110), INT32_C( 260839584), -INT32_C( 1494609417), -INT32_C( 1640785849), INT32_C( 178454134), INT32_C( 1116718813), -INT32_C( 1356412028) }, + -INT32_C( 1) }, + { { INT32_C( 460106097), -INT32_C( 1124164069), -INT32_C( 1379169610), -INT32_C( 481053284), -INT32_C( 1014921651), INT32_C( 1657676932), INT32_C( 1873042922), -INT32_C( 1709257944), + -INT32_C( 1883927948), -INT32_C( 548686807), -INT32_C( 628287938), INT32_C( 431939788), -INT32_C( 337821849), INT32_C( 1313712995), INT32_C( 817754632), INT32_C( 835378109) }, + -INT32_C( 1) }, + { { -INT32_C( 1883209626), INT32_C( 1919814708), -INT32_C( 296879326), INT32_C( 1124600796), -INT32_C( 1372658358), -INT32_C( 1728283504), INT32_C( 734574958), -INT32_C( 94596204), + INT32_C( 1200167955), INT32_C( 1270478888), -INT32_C( 818346253), INT32_C( 1544700177), -INT32_C( 1240841946), INT32_C( 726533821), INT32_C( 1398151103), -INT32_C( 1118915926) }, + -INT32_C( 1) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512i a = simde_mm512_loadu_epi32(test_vec[i].a); + int32_t r = simde_mm512_reduce_or_epi32(a); + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) { + simde__m512i a = simde_test_x86_random_i32x16(); + int32_t r = simde_mm512_reduce_or_epi32(a); + + simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_mm512_reduce_or_epi64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + const int64_t a[8]; + int64_t r; + } test_vec[] = { + { { -INT64_C( 2498594667079982833), INT64_C( 249387630721685344), INT64_C( 8885684221867711730), -INT64_C( 8287679410898430424), + -INT64_C( 7664758002762907808), INT64_C( 2154021532583144230), INT64_C( 986037514647870331), INT64_C( 7186169414489457585) }, + -INT64_C( 1) }, + { { INT64_C( 3112206165109814630), INT64_C( 8375551780960557554), -INT64_C( 3135255059661125750), -INT64_C( 5296307588806023143), + INT64_C( 6890931211990050161), INT64_C( 2758095905008495834), INT64_C( 6824664571418439360), -INT64_C( 1872660217138071300) }, + -INT64_C( 1) }, + { { -INT64_C( 5878325801436556386), INT64_C( 7020205414633498793), INT64_C( 1393857586272666293), INT64_C( 7569814069551550899), + INT64_C( 1168366784818211252), INT64_C( 3660231866550957768), -INT64_C( 4833062746760409148), INT64_C( 7028132944378350677) }, + -INT64_C( 1) }, + { { -INT64_C( 6570435348350239397), -INT64_C( 6014880660457642026), -INT64_C( 8453691675493551384), -INT64_C( 2391355862054956351), + -INT64_C( 5787000344137385681), INT64_C( 9164131001054707352), -INT64_C( 1514864061816130502), INT64_C( 437738493546374321) }, + -INT64_C( 1) }, + { { -INT64_C( 4797807890485038110), INT64_C( 3429609816677313665), -INT64_C( 2795721445842417376), -INT64_C( 7630186611341358274), + INT64_C( 8337776933073923796), INT64_C( 9172746531966350119), -INT64_C( 4309668293288334546), INT64_C( 3733753880448155270) }, + -INT64_C( 1) }, + { { INT64_C( 326759308348563748), INT64_C( 7472153724687750356), -INT64_C( 242651814414157224), -INT64_C( 7729013205561345237), + -INT64_C( 7059055097666672344), -INT64_C( 1231870191121323705), -INT64_C( 4796213127357422337), -INT64_C( 2959703133748942872) }, + -INT64_C( 1) }, + { { INT64_C( 1177661382621632494), -INT64_C( 6011610964912232178), -INT64_C( 5590214212759495396), -INT64_C( 6359653621465107656), + -INT64_C( 8400981880901429105), -INT64_C( 8270085072268297760), INT64_C( 2709943137248959262), INT64_C( 3295166658624857690) }, + -INT64_C( 4611686018427387905) }, + { { -INT64_C( 5662731371781154207), INT64_C( 8639726359813265640), INT64_C( 2514693906107474388), INT64_C( 1904723271366341026), + -INT64_C( 3707199286980054006), INT64_C( 1858311288114236665), -INT64_C( 3956483573567378899), INT64_C( 5081318480436415664) }, + -INT64_C( 1) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde__m512i a = simde_mm512_loadu_epi64(test_vec[i].a); + int64_t r = simde_mm512_reduce_or_epi64(a); + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + + for (int i = 0; i < 8; i++) { + simde__m512i a = simde_test_x86_random_i64x8(); + int64_t r = simde_mm512_reduce_or_epi64(a); + + simde_test_x86_write_i64x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + SIMDE_TEST_FUNC_LIST_BEGIN + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_add_epi32) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_add_epi64) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_add_ps) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_add_pd) + #if defined(SIMDE_FLOAT16_IS_SCALAR) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_add_ph) + #endif + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_and_epi32) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_and_epi64) #if defined(SIMDE_FLOAT16_IS_SCALAR) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_max_ph) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_min_ph) @@ -932,6 +1653,10 @@ SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_min_epu64) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_min_ps) SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_min_pd) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_mul_epi32) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_mul_epi64) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_or_epi32) + SIMDE_TEST_FUNC_LIST_ENTRY(mm512_reduce_or_epi64) SIMDE_TEST_FUNC_LIST_END