From bdd25a6f860dc69ee2ed506b3a789ce9d2ad3aab Mon Sep 17 00:00:00 2001
From: "Ivan A. Melnikov" <iv@altlinux.org>
Date: Wed, 21 Jan 2026 20:49:43 +0400
Subject: [PATCH 1/3] test: Avoid -ffinite-math-only on floating point
 comparisons

With `-ffinite-math-only` (implied by `-ffast-math` and -Ofast),
GCC considers all comparisons against infinities to be false
and can optimize them away. This breaks several tests that rely
on infinities to be compared correctly. To avoid this, we add
GCC-specific optimize attribute that disables `-ffinite-math-only`
optimization for floating point comarisions used in assertions.

Signed-off-by: Ivan A. Melnikov <iv@altlinux.org>
---
 test/test.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/test/test.h b/test/test.h
index 3e1b3de78..287171bfa 100644
--- a/test/test.h
+++ b/test/test.h
@@ -126,6 +126,10 @@ simde_test_debug_printf_(const char* format, ...) {
 HEDLEY_DIAGNOSTIC_PUSH
 SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_
 
+#if defined(SIMDE_FAST_MATH) && defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,4,0)
+__attribute__((optimize("-fno-finite-math-only")))
+__attribute__((noinline))
+#endif
 static int
 simde_test_equal_f32(simde_float32 a, simde_float32 b, simde_float32 slop) {
   if (simde_math_isnan(a)) {
@@ -156,6 +160,10 @@ simde_test_equal_f16(simde_float16 a, simde_float16 b, simde_float16 slop) {
   return simde_test_equal_f32(af, bf, slopf);
 }
 
+#if defined(SIMDE_FAST_MATH) && defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,4,0)
+__attribute__((optimize("-fno-finite-math-only")))
+__attribute__((noinline))
+#endif
 static int
 simde_test_equal_f64(simde_float64 a, simde_float64 b, simde_float64 slop) {
   if (simde_math_isnan(a)) {

From f8f5abd3241c43e5f27c85519212cd5011604a36 Mon Sep 17 00:00:00 2001
From: "Michael R. Crusoe" <michael.crusoe@gmail.com>
Date: Thu, 22 Jan 2026 13:41:10 +0100
Subject: [PATCH 2/3] gh-actions: test -Ofast instead of -ffast-math

From the GCC manual page:

> Disregard strict standards compliance. -Ofast enables all -O3 optimizations.
> It also enables optimizations that are not valid for all standard-compliant
> programs. It turns on -ffast-math, -fallow-store-data-races.
> It turns off -fsemantic-interposition.

On recent clang, use "-O3 -ffast-math" due to
> error: argument '-Ofast' is deprecated; use '-O3 -ffast-math' for the same
> behavior, or '-O3' to enable only conforming optimizations
> [-Werror,-Wdeprecated-ofast]
---
 .github/workflows/ci.yml | 44 ++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e9c908763..f930dc90d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -340,40 +340,40 @@ jobs:
           distro: ubuntu-24.04
         - version: 10
           distro: ubuntu-24.04
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: 11
           distro: ubuntu-24.04
         - version: 11
           distro: ubuntu-24.04
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: 11
           distro:  ubuntu-24.04-arm
         - version: 12
           distro: ubuntu-24.04
         - version: 12
           distro: ubuntu-24.04
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: 12
           distro: ubuntu-24.04-arm
         - version: 13
           distro: ubuntu-24.04
         - version: 13
           distro: ubuntu-24.04
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: 13
           distro: ubuntu-24.04-arm
         - version: 14
           distro: ubuntu-24.04
         - version: 14
           distro: ubuntu-24.04
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: 14
           distro: ubuntu-24.04-arm
         - version: 15
           distro: ubuntu-24.04
         - version: 15
           distro: ubuntu-24.04
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: 15
           distro: ubuntu-24.04-arm
     runs-on: ${{ matrix.distro }}
@@ -739,52 +739,52 @@ jobs:
           distro: ubuntu-22.04
         - version: "12"
           distro: ubuntu-22.04
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: "13"
           distro: ubuntu-22.04
         - version: "13"
           distro: ubuntu-22.04
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: "14"
           distro: ubuntu-24.04
         - version: "14"
           distro: ubuntu-24.04
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: "14"
           distro: ubuntu-24.04-arm
           arch_flags: -march=armv8-a+fp+aes+sha2
           # grep Features < /proc/cpuinfo | head -n 1 | awk '-F: ' '{print $2}' | sed 's/asimd //;s/evtstrm //;s/pmull //;s/sha1 //;s/crc32 //;s/atomics //;s/fphp //;s/asimd.. //g;s/cpuid //;s/asimd... //g;s/jscvt //;s/fcma //;s/lrcpc //;s/dcpop //;s/sm3 //;s/sha512 //;s/uscat //;s/ilrcpc //;s/pac. //g;s/dcpodp //;s/sveaes //;s/svebitperm //;s/svesha3 //;s/svesm4 //;s/flagm2 //;s/frint //;s/svei8mm //;s/svebf16 //' | tr ' ' '+'
         - version: "14"
           distro: ubuntu-24.04-arm
-          arch_flags: -ffast-math -march=armv8-a+fp+aes+sha2
+          arch_flags: -Ofast -march=armv8-a+fp+aes+sha2
         - version: "15"
           distro: ubuntu-24.04
         - version: "15"
           distro: ubuntu-24.04
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: "15"
           distro: ubuntu-24.04-arm
         - version: "15"
           distro: ubuntu-24.04-arm
-          arch_flags: -ffast-math
+          arch_flags: -Ofast
         - version: "16"
           distro: ubuntu-24.04
           arch_flags: -Wno-unsafe-buffer-usage
         - version: "16"
           distro: ubuntu-24.04
-          arch_flags: -ffast-math -Wno-unsafe-buffer-usage
+          arch_flags: -Ofast -Wno-unsafe-buffer-usage
         - version: "16"
           distro: ubuntu-24.04-arm
           arch_flags: -Wno-unsafe-buffer-usage
         - version: "16"
           distro: ubuntu-24.04-arm
-          arch_flags: -ffast-math -Wno-unsafe-buffer-usage
+          arch_flags: -Ofast -Wno-unsafe-buffer-usage
         - version: "17"
           distro: ubuntu-24.04
           arch_flags: -Wno-unsafe-buffer-usage
         - version: "17"
           distro: ubuntu-24.04
-          arch_flags: -ffast-math -Wno-unsafe-buffer-usage
+          arch_flags: -Ofast -Wno-unsafe-buffer-usage
         - version: "17"
           distro: ubuntu-24.04
           arch_flags: -Wno-unsafe-buffer-usage -O2
@@ -793,7 +793,7 @@ jobs:
           arch_flags: -Wno-unsafe-buffer-usage
         - version: "17"
           distro: ubuntu-24.04-arm
-          arch_flags: -ffast-math -Wno-unsafe-buffer-usage
+          arch_flags: -Ofast -Wno-unsafe-buffer-usage
         - version: "17"
           distro: ubuntu-24.04-arm
           arch_flags: -Wno-unsafe-buffer-usage -O2
@@ -802,7 +802,7 @@ jobs:
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default
         - version: "18"
           distro: ubuntu-24.04
-          arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
+          arch_flags: -Ofast -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
         - version: "18"
           distro: ubuntu-24.04
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2
@@ -811,7 +811,7 @@ jobs:
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default
         - version: "18"
           distro: ubuntu-24.04-arm
-          arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
+          arch_flags: -Ofast -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
         - version: "18"
           distro: ubuntu-24.04-arm
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2
@@ -820,7 +820,7 @@ jobs:
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default
         - version: "19"
           distro: ubuntu-24.04
-          arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
+          arch_flags: -O3 -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
         - version: "19"
           distro: ubuntu-24.04
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2
@@ -829,7 +829,7 @@ jobs:
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default
         - version: "19"
           distro: ubuntu-24.04-arm
-          arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
+          arch_flags: -O3 -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
         - version: "19"
           distro: ubuntu-24.04-arm
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2
@@ -838,7 +838,7 @@ jobs:
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default
         - version: "20"
           distro: ubuntu-24.04
-          arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
+          arch_flags: -O3 -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
         - version: "20"
           distro: ubuntu-24.04
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2
@@ -847,7 +847,7 @@ jobs:
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default
         - version: "20"
           distro: ubuntu-24.04-arm
-          arch_flags: -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
+          arch_flags: -O3 -ffast-math -Wno-unsafe-buffer-usage -Wno-switch-default -Wno-nan-infinity-disabled
         - version: "20"
           distro: ubuntu-24.04-arm
           arch_flags: -Wno-unsafe-buffer-usage -Wno-switch-default -O2

From cea5dd8f7b47c945419cb4ecede61efd74135d27 Mon Sep 17 00:00:00 2001
From: "Michael R. Crusoe" <michael.crusoe@gmail.com>
Date: Thu, 22 Jan 2026 15:03:23 +0100
Subject: [PATCH 3/3] arm neon ext: small adjustment to reduce risk of
 -Werror=maybe-uninitialized

---
 simde/arm/neon/ext.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/simde/arm/neon/ext.h b/simde/arm/neon/ext.h
index 67e03099c..0fed2a8d1 100644
--- a/simde/arm/neon/ext.h
+++ b/simde/arm/neon/ext.h
@@ -53,9 +53,10 @@ simde_vext_f16(simde_float16x4_t a, simde_float16x4_t b, const int n)
       r_.sv64 = __riscv_vslideup_vx_f16m1(a_.sv64, b_.sv64, 4-n, 4);
     #else
       const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
-      for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
+      const size_t len = sizeof(r_.values) / sizeof(r_.values[0]);
+      for (size_t i = 0 ; i < len ; i++) {
         size_t src = i + n_;
-        r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3];
+        r_.values[i] = (src < len) ? a_.values[src] : b_.values[src & 3];
       }
     #endif
     return simde_float16x4_from_private(r_);
@@ -500,9 +501,10 @@ simde_vextq_f16(simde_float16x8_t a, simde_float16x8_t b, const int n)
       r_.sv128 = __riscv_vslideup_vx_f16m1(a_.sv128, b_.sv128, 8-n, 8);
     #else
       const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
-      for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
+      const size_t len = sizeof(r_.values) / sizeof(r_.values[0]);
+      for (size_t i = 0 ; i < len ; i++) {
         size_t src = i + n_;
-        r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
+        r_.values[i] = (src < len) ? a_.values[src] : b_.values[src & 7];
       }
     #endif
     return simde_float16x8_from_private(r_);
@@ -1106,9 +1108,10 @@ simde_vextq_p8(simde_poly8x16_t a, simde_poly8x16_t b, const int n)
       b_ = simde_poly8x16_to_private(b),
       r_ = a_;
     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
-    for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
+    const size_t len = sizeof(r_.values) / sizeof(r_.values[0]);
+    for (size_t i = 0 ; i < len ; i++) {
       size_t src = i + n_;
-      r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15];
+      r_.values[i] = (src < len) ? a_.values[src] : b_.values[src & 15];
     }
     return simde_poly8x16_from_private(r_);
   #endif
@@ -1132,9 +1135,10 @@ simde_vextq_p16(simde_poly16x8_t a, simde_poly16x8_t b, const int n)
       b_ = simde_poly16x8_to_private(b),
       r_ = a_;
     const size_t n_ = HEDLEY_STATIC_CAST(size_t, n);
-    for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
+    const size_t len = sizeof(r_.values) / sizeof(r_.values[0]);
+    for (size_t i = 0 ; i < len ; i++) {
       size_t src = i + n_;
-      r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7];
+      r_.values[i] = (src < len) ? a_.values[src] : b_.values[src & 7];
     }
     return simde_poly16x8_from_private(r_);
   #endif