From bdd25a6f860dc69ee2ed506b3a789ce9d2ad3aab Mon Sep 17 00:00:00 2001 From: "Ivan A. Melnikov" Date: Wed, 21 Jan 2026 20:49:43 +0400 Subject: [PATCH 1/3] test: Avoid -ffinite-math-only on floating point comparisons With `-ffinite-math-only` (implied by `-ffast-math` and -Ofast), GCC considers all comparisons against infinities to be false and can optimize them away. This breaks several tests that rely on infinities to be compared correctly. To avoid this, we add GCC-specific optimize attribute that disables `-ffinite-math-only` optimization for floating point comarisions used in assertions. Signed-off-by: Ivan A. Melnikov --- test/test.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test.h b/test/test.h index 3e1b3de78..287171bfa 100644 --- a/test/test.h +++ b/test/test.h @@ -126,6 +126,10 @@ simde_test_debug_printf_(const char* format, ...) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#if defined(SIMDE_FAST_MATH) && defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,4,0) +__attribute__((optimize("-fno-finite-math-only"))) +__attribute__((noinline)) +#endif static int simde_test_equal_f32(simde_float32 a, simde_float32 b, simde_float32 slop) { if (simde_math_isnan(a)) { @@ -156,6 +160,10 @@ simde_test_equal_f16(simde_float16 a, simde_float16 b, simde_float16 slop) { return simde_test_equal_f32(af, bf, slopf); } +#if defined(SIMDE_FAST_MATH) && defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,4,0) +__attribute__((optimize("-fno-finite-math-only"))) +__attribute__((noinline)) +#endif static int simde_test_equal_f64(simde_float64 a, simde_float64 b, simde_float64 slop) { if (simde_math_isnan(a)) { From f8f5abd3241c43e5f27c85519212cd5011604a36 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Thu, 22 Jan 2026 13:41:10 +0100 Subject: [PATCH 2/3] gh-actions: test -Ofast instead of -ffast-math From the GCC manual page: > Disregard strict standards compliance. -Ofast enables all -O3 optimizations. > It also enables optimizations that are not valid for all standard-compliant > programs. It turns on -ffast-math, -fallow-store-data-races. > It turns off -fsemantic-interposition. On recent clang, use "-O3 -ffast-math" due to > error: argument '-Ofast' is deprecated; use '-O3 -ffast-math' for the same > behavior, or '-O3' to enable only conforming optimizations > [-Werror,-Wdeprecated-ofast] --- .github/workflows/ci.yml | 44 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e9c908763..f930dc90d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -340,40 +340,40 @@ jobs: distro: ubuntu-24.04 - version: 10 distro: ubuntu-24.04 - arch_flags: -ffast-math + arch_flags: -Ofast - version: 11 distro: ubuntu-24.04 - version: 11 distro: ubuntu-24.04 - arch_flags: -ffast-math + arch_flags: -Ofast - version: 11 distro: ubuntu-24.04-arm - version: 12 distro: ubuntu-24.04 - version: 12 distro: ubuntu-24.04 - arch_flags: -ffast-math + arch_flags: -Ofast - version: 12 distro: ubuntu-24.04-arm - version: 13 distro: ubuntu-24.04 - version: 13 distro: ubuntu-24.04 - arch_flags: -ffast-math + arch_flags: -Ofast - version: 13 distro: ubuntu-24.04-arm - version: 14 distro: ubuntu-24.04 - version: 14 distro: ubuntu-24.04 - arch_flags: -ffast-math + arch_flags: -Ofast - version: 14 distro: ubuntu-24.04-arm - version: 15 distro: ubuntu-24.04 - version: 15 distro: ubuntu-24.04 - arch_flags: -ffast-math + arch_flags: -Ofast - version: 15 distro: ubuntu-24.04-arm runs-on: ${{ matrix.distro }} @@ -739,52 +739,52 @@ jobs: distro: ubuntu-22.04 - version: "12" distro: ubuntu-22.04 - arch_flags: -ffast-math + arch_flags: -Ofast - version: "13" distro: ubuntu-22.04 - version: "13" distro: ubuntu-22.04 - arch_flags: -ffast-math + arch_flags: -Ofast - version: "14" distro: ubuntu-24.04 - version: "14" distro: ubuntu-24.04 - arch_flags: -ffast-math + arch_flags: -Ofast - version: "14" distro: ubuntu-24.04-arm arch_flags: -march=armv8-a+fp+aes+sha2 # grep Features < /proc/cpuinfo | head -n 1 | awk '-F: ' '{print $2}' | sed 's/asimd //;s/evtstrm //;s/pmull //;s/sha1 //;s/crc32 //;s/atomics //;s/fphp //;s/asimd.. //g;s/cpuid //;s/asimd... //g;s/jscvt //;s/fcma //;s/lrcpc //;s/dcpop //;s/sm3 //;s/sha512 //;s/uscat //;s/ilrcpc //;s/pac. //g;s/dcpodp //;s/sveaes //;s/svebitperm //;s/svesha3 //;s/svesm4 //;s/flagm2 //;s/frint //;s/svei8mm //;s/svebf16 //' | tr ' ' '+' - version: "14" distro: ubuntu-24.04-arm - arch_flags: -ffast-math -march=armv8-a+fp+aes+sha2 + arch_flags: -Ofast -march=armv8-a+fp+aes+sha2 - version: "15" distro: ubuntu-24.04 - version: "15" distro: ubuntu-24.04 - arch_flags: -ffast-math + arch_flags: -Ofast - version: "15" distro: ubuntu-24.04-arm - version: "15" distro: ubuntu-24.04-arm - arch_flags: -ffast-math + arch_flags: -Ofast - version: "16" distro: ubuntu-24.04 arch_flags: -Wno-unsafe-buffer-usage - version: "16" distro: ubuntu-24.04 - arch_flags: -ffast-math -Wno-unsafe-buffer-usage + arch_flags: -Ofast -Wno-unsafe-buffer-usage - version: "16" distro: ubuntu-24.04-arm arch_flags: -Wno-unsafe-buffer-usage - version: "16" distro: ubuntu-24.04-arm - arch_flags: -ffast-math -Wno-unsafe-buffer-usage + arch_flags: -Ofast -Wno-unsafe-buffer-usage - version: "17" distro: ubuntu-24.04 arch_flags: -Wno-unsafe-buffer-usage - version: "17" distro: ubuntu-24.04 - arch_flags: -ffast-math -Wno-unsafe-buffer-usage + arch_flags: -Ofast -Wno-unsafe-buffer-usage - version: "17" distro: ubuntu-24.04 arch_flags: -Wno-unsafe-buffer-usage -O2 @@ -793,7 +793,7 @@ jobs: arch_flags: -Wno-unsafe-buffer-usage - version: "17" distro: ubuntu-24.04-arm - arch_flags: -ffast-math -Wno-unsafe-buffer-usage + arch_flags: -Ofast -Wno-unsafe-buffer-usage - version: "17" distro: ubuntu-24.04-arm arch_flags: -Wno-unsafe-buffer-usage -O2 @@ -802,7 +802,7 @@ jobs: arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default - version: "18" distro: ubuntu-24.04 - arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled + arch_flags: -Ofast -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled - version: "18" distro: ubuntu-24.04 arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2 @@ -811,7 +811,7 @@ jobs: arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default - version: "18" distro: ubuntu-24.04-arm - arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled + arch_flags: -Ofast -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled - version: "18" distro: ubuntu-24.04-arm arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2 @@ -820,7 +820,7 @@ jobs: arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default - version: "19" distro: ubuntu-24.04 - arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled + arch_flags: -O3 -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled - version: "19" distro: ubuntu-24.04 arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2 @@ -829,7 +829,7 @@ jobs: arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default - version: "19" distro: ubuntu-24.04-arm - arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled + arch_flags: -O3 -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled - version: "19" distro: ubuntu-24.04-arm arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2 @@ -838,7 +838,7 @@ jobs: arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default - version: "20" distro: ubuntu-24.04 - arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled + arch_flags: -O3 -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled - version: "20" distro: ubuntu-24.04 arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2 @@ -847,7 +847,7 @@ jobs: arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default - version: "20" distro: ubuntu-24.04-arm - arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled + arch_flags: -O3 -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled - version: "20" distro: ubuntu-24.04-arm arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2 From cea5dd8f7b47c945419cb4ecede61efd74135d27 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Thu, 22 Jan 2026 15:03:23 +0100 Subject: [PATCH 3/3] arm neon ext: small adjustment to reduce risk of -Werror=maybe-uninitialized --- simde/arm/neon/ext.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/simde/arm/neon/ext.h b/simde/arm/neon/ext.h index 67e03099c..0fed2a8d1 100644 --- a/simde/arm/neon/ext.h +++ b/simde/arm/neon/ext.h @@ -53,9 +53,10 @@ simde_vext_f16(simde_float16x4_t a, simde_float16x4_t b, const int n) r_.sv64 = __riscv_vslideup_vx_f16m1(a_.sv64, b_.sv64, 4-n, 4); #else const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + const size_t len = sizeof(r_.values) / sizeof(r_.values[0]); + for (size_t i = 0 ; i < len ; i++) { size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; + r_.values[i] = (src < len) ? a_.values[src] : b_.values[src & 3]; } #endif return simde_float16x4_from_private(r_); @@ -500,9 +501,10 @@ simde_vextq_f16(simde_float16x8_t a, simde_float16x8_t b, const int n) r_.sv128 = __riscv_vslideup_vx_f16m1(a_.sv128, b_.sv128, 8-n, 8); #else const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + const size_t len = sizeof(r_.values) / sizeof(r_.values[0]); + for (size_t i = 0 ; i < len ; i++) { size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; + r_.values[i] = (src < len) ? a_.values[src] : b_.values[src & 7]; } #endif return simde_float16x8_from_private(r_); @@ -1106,9 +1108,10 @@ simde_vextq_p8(simde_poly8x16_t a, simde_poly8x16_t b, const int n) b_ = simde_poly8x16_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + const size_t len = sizeof(r_.values) / sizeof(r_.values[0]); + for (size_t i = 0 ; i < len ; i++) { size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15]; + r_.values[i] = (src < len) ? a_.values[src] : b_.values[src & 15]; } return simde_poly8x16_from_private(r_); #endif @@ -1132,9 +1135,10 @@ simde_vextq_p16(simde_poly16x8_t a, simde_poly16x8_t b, const int n) b_ = simde_poly16x8_to_private(b), r_ = a_; const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + const size_t len = sizeof(r_.values) / sizeof(r_.values[0]); + for (size_t i = 0 ; i < len ; i++) { size_t src = i + n_; - r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; + r_.values[i] = (src < len) ? a_.values[src] : b_.values[src & 7]; } return simde_poly16x8_from_private(r_); #endif