Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions simde/x86/avx.h
Original file line number Diff line number Diff line change
Expand Up @@ -3823,7 +3823,7 @@ simde__m128i
simde_mm256_cvttpd_epi32 (simde__m256d a) {
#if defined(SIMDE_X86_AVX_NATIVE)
return _mm256_cvttpd_epi32(a);
#elif defined(SIMDE_LOONGARCH_LASX_NATIVE)
#elif defined(SIMDE_LOONGARCH_LASX_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
simde__m256i_private a_;
a_.i256 = __lasx_xvftintrz_w_d(a, a);
a_.i256 = __lasx_xvpermi_d(a_.i256, 0xd8);
Expand All @@ -3832,13 +3832,20 @@ simde_mm256_cvttpd_epi32 (simde__m256d a) {
simde__m128i_private r_;
simde__m256d_private a_ = simde__m256d_to_private(a);

#if defined(simde_math_trunc)
#if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
r_.m64[0] = simde_mm_cvttpd_pi32(a_.m128d[0]);
r_.m64[1] = simde_mm_cvttpd_pi32(a_.m128d[1]);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) {
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i]));
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
simde_float64 v = simde_math_trunc(a_.f64[i]);
#if defined(SIMDE_FAST_CONVERSION_RANGE)
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
#else
r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ?
SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;
#endif
}
#else
HEDLEY_UNREACHABLE();
#endif

return simde__m128i_from_private(r_);
Expand All @@ -3854,19 +3861,26 @@ simde__m256i
simde_mm256_cvttps_epi32 (simde__m256 a) {
#if defined(SIMDE_X86_AVX_NATIVE)
return _mm256_cvttps_epi32(a);
#elif defined(SIMDE_LOONGARCH_LASX_NATIVE)
#elif defined(SIMDE_LOONGARCH_LASX_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE)
return __lasx_xvftintrz_w_s(a);
#else
simde__m256i_private r_;
simde__m256_private a_ = simde__m256_to_private(a);

#if defined(simde_math_truncf)
#if SIMDE_NATURAL_VECTOR_SIZE_LE(128)
r_.m128i[0] = simde_mm_cvttps_epi32(a_.m128[0]);
r_.m128i[1] = simde_mm_cvttps_epi32(a_.m128[1]);
#else
SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) {
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i]));
for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) {
simde_float32 v = simde_math_truncf(a_.f32[i]);
#if defined(SIMDE_FAST_CONVERSION_RANGE)
r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v);
#else
r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ?
SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN;
#endif
}
#else
HEDLEY_UNREACHABLE();
#endif

return simde__m256i_from_private(r_);
Expand Down
40 changes: 34 additions & 6 deletions test/x86/avx.c
Original file line number Diff line number Diff line change
Expand Up @@ -7125,9 +7125,9 @@ test_simde_mm256_cvtpd_epi32(SIMDE_MUNIT_TEST_ARGS) {
#endif
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
{ simde_mm256_set_pd(
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100)),
simde_mm_set_epi32(
INT32_MIN, INT32_C(2147483547), INT32_MIN, -INT32_C(2147483548)) },
Expand Down Expand Up @@ -7218,9 +7218,9 @@ test_simde_mm256_cvtps_epi32(SIMDE_MUNIT_TEST_ARGS) {
#endif
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
{ simde_mm256_set_ps(
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100),
0.f, 0.f, 0.f, 0.f),
simde_mm256_set_epi32(
Expand Down Expand Up @@ -7436,7 +7436,20 @@ test_simde_mm256_cvttpd_epi32(SIMDE_MUNIT_TEST_ARGS) {
const struct {
simde__m256d a;
simde__m128i r;
} test_vec[8] = {
} test_vec[] = {
#if !defined(SIMDE_FAST_NANS)
{ simde_mm256_set_pd(SIMDE_MATH_NAN, -SIMDE_MATH_NAN, 0.0, 0.0),
simde_mm_set_epi32( INT32_MIN, INT32_MIN, 0, 0) },
#endif
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
{ simde_mm256_set_pd(
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100)),
simde_mm_set_epi32(
INT32_MIN, INT32_C(2147483547), INT32_MIN, -INT32_C(2147483548)) },
#endif
{ simde_mm256_set_pd(SIMDE_FLOAT64_C( -175.82), SIMDE_FLOAT64_C( -91.19),
SIMDE_FLOAT64_C( -855.64), SIMDE_FLOAT64_C(-1000.00)),
simde_mm_set_epi32(INT32_C(-175), INT32_C( -91), INT32_C(-855), INT32_C(-1000)) },
Expand Down Expand Up @@ -7476,7 +7489,22 @@ test_simde_mm256_cvttps_epi32(SIMDE_MUNIT_TEST_ARGS) {
const struct {
simde__m256 a;
simde__m256i r;
} test_vec[8] = {
} test_vec[] = {
#if !defined(SIMDE_FAST_NANS)
{ simde_mm256_set_ps(SIMDE_MATH_NAN, -SIMDE_MATH_NAN, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f),
simde_mm256_set_epi32( INT32_MIN, INT32_MIN, 0, 0, 0, 0, 0, 0) },
#endif
#if !defined(SIMDE_FAST_CONVERSION_RANGE)
{ simde_mm256_set_ps(
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) + 1),
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MAX) - 100),
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) - 1),
HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(int64_t, INT32_MIN) + 100),
0.f, 0.f, 0.f, 0.f),
simde_mm256_set_epi32(
INT32_MIN, INT32_C(2147483520), INT32_MIN, -INT32_C(2147483520),
0, 0, 0, 0) },
#endif
{ simde_mm256_set_ps(SIMDE_FLOAT32_C( -135.75), SIMDE_FLOAT32_C( 534.39),
SIMDE_FLOAT32_C( -81.93), SIMDE_FLOAT32_C( -234.94),
SIMDE_FLOAT32_C( -390.94), SIMDE_FLOAT32_C( -625.05),
Expand Down