diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e7e9f3899..a87bed913 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -495,6 +495,12 @@ jobs:
           arch_gnu: loongarch64
           arch_deb: loong64
           distro: ubuntu-24.04
+        - extra: -fastmath
+          version: 14
+          cross: loongarch64
+          arch_gnu: loongarch64
+          arch_deb: loong64
+          distro: ubuntu-24.04
         # - version: 14
         #   cross: mips64el
         #   arch_gnu: mips64el
@@ -681,6 +687,19 @@ jobs:
           distro: ubuntu-24.04
         - version: 21
           cross: loongarch64
+          extra: -fastmath
+          arch_deb: loong64
+          arch_gnu: loongarch64
+          distro: ubuntu-24.04
+        - version: 21
+          cross: loongarch64
+          extra: -fastmath
+          arch_deb: loong64
+          arch_gnu: loongarch64
+          distro: ubuntu-24.04
+        - version: 22
+          cross: loongarch64
+          extra: -fastmath
           arch_deb: loong64
           arch_gnu: loongarch64
           distro: ubuntu-24.04
diff --git a/docker/cross-files/loongarch64-clang-20-fastmath-ccache.cross b/docker/cross-files/loongarch64-clang-20-fastmath-ccache.cross
new file mode 100644
index 000000000..380b8e5d5
--- /dev/null
+++ b/docker/cross-files/loongarch64-clang-20-fastmath-ccache.cross
@@ -0,0 +1,21 @@
+[binaries]
+c = ['ccache', 'clang-20']
+cpp = ['ccache', 'clang++-20']
+ar = 'llvm-ar-20'
+strip = 'llvm-strip-20'
+objcopy = 'llvm-objcopy-20'
+c_ld = 'lld'
+cpp_ld = 'lld'
+exe_wrapper = ['qemu-loongarch64-static', '-L', '/usr/loongarch64-linux-gnu/', '-cpu', 'la464']
+
+[properties]
+c_args = ['--target=loongarch64-linux-gnu', '-march=la464', '-isystem=/usr/loongarch64-linux-gnu/include', '-Wextra', '-Werror', '-mlsx', '-mlasx', '-O3', '-ffast-math', '-Wno-nan-infinity-disabled']
+cpp_args = ['--target=loongarch64-linux-gnu', '-march=la464', '-isystem=/usr/loongarch64-linux-gnu/include', '-Wextra', '-Werror', '-mlsx', '-mlasx', '-O3', '-ffast-math', '-Wno-nan-infinity-disabled']
+c_link_args = ['--target=loongarch64-linux-gnu']
+cpp_link_args = ['--target=loongarch64-linux-gnu']
+
+[host_machine]
+system = 'linux'
+cpu_family = 'loongarch64'
+cpu = 'la464'
+endian = 'little'
diff --git a/docker/cross-files/loongarch64-clang-21-fastmath-ccache.cross b/docker/cross-files/loongarch64-clang-21-fastmath-ccache.cross
new file mode 100644
index 000000000..371482c1e
--- /dev/null
+++ b/docker/cross-files/loongarch64-clang-21-fastmath-ccache.cross
@@ -0,0 +1,21 @@
+[binaries]
+c = ['ccache', 'clang-21']
+cpp = ['ccache', 'clang++-21']
+ar = 'llvm-ar-21'
+strip = 'llvm-strip-21'
+objcopy = 'llvm-objcopy-21'
+c_ld = 'lld'
+cpp_ld = 'lld'
+exe_wrapper = ['qemu-loongarch64-static', '-L', '/usr/loongarch64-linux-gnu/', '-cpu', 'la464']
+
+[properties]
+c_args = ['--target=loongarch64-linux-gnu', '-march=la464', '-isystem=/usr/loongarch64-linux-gnu/include', '-Wextra', '-Werror', '-mlsx', '-mlasx', '-O3', '-ffast-math', '-Wno-nan-infinity-disabled']
+cpp_args = ['--target=loongarch64-linux-gnu', '-march=la464', '-isystem=/usr/loongarch64-linux-gnu/include', '-Wextra', '-Werror', '-mlsx', '-mlasx', '-O3', '-ffast-math', '-Wno-nan-infinity-disabled']
+c_link_args = ['--target=loongarch64-linux-gnu']
+cpp_link_args = ['--target=loongarch64-linux-gnu']
+
+[host_machine]
+system = 'linux'
+cpu_family = 'loongarch64'
+cpu = 'la464'
+endian = 'little'
diff --git a/docker/cross-files/loongarch64-clang-22-fastmath-ccache.cross b/docker/cross-files/loongarch64-clang-22-fastmath-ccache.cross
new file mode 100644
index 000000000..1a8145c03
--- /dev/null
+++ b/docker/cross-files/loongarch64-clang-22-fastmath-ccache.cross
@@ -0,0 +1,21 @@
+[binaries]
+c = ['ccache', 'clang-22']
+cpp = ['ccache', 'clang++-22']
+ar = 'llvm-ar-22'
+strip = 'llvm-strip-22'
+objcopy = 'llvm-objcopy-22'
+c_ld = 'lld'
+cpp_ld = 'lld'
+exe_wrapper = ['qemu-loongarch64-static', '-L', '/usr/loongarch64-linux-gnu/', '-cpu', 'la464']
+
+[properties]
+c_args = ['--target=loongarch64-linux-gnu', '-march=la464', '-isystem=/usr/loongarch64-linux-gnu/include', '-Wextra', '-Werror', '-mlsx', '-mlasx', '-O3', '-ffast-math', '-Wno-nan-infinity-disabled']
+cpp_args = ['--target=loongarch64-linux-gnu', '-march=la464', '-isystem=/usr/loongarch64-linux-gnu/include', '-Wextra', '-Werror', '-mlsx', '-mlasx', '-O3', '-ffast-math', '-Wno-nan-infinity-disabled']
+c_link_args = ['--target=loongarch64-linux-gnu']
+cpp_link_args = ['--target=loongarch64-linux-gnu']
+
+[host_machine]
+system = 'linux'
+cpu_family = 'loongarch64'
+cpu = 'la464'
+endian = 'little'
diff --git a/docker/cross-files/loongarch64-gcc-14-fastmath-ccache.cross b/docker/cross-files/loongarch64-gcc-14-fastmath-ccache.cross
new file mode 100644
index 000000000..1892193d6
--- /dev/null
+++ b/docker/cross-files/loongarch64-gcc-14-fastmath-ccache.cross
@@ -0,0 +1,20 @@
+[binaries]
+c = ['ccache', 'loongarch64-linux-gnu-gcc-14']
+cpp = ['ccache', 'loongarch64-linux-gnu-g++-14']
+ar = 'loongarch64-linux-gnu-gcc-ar-14'
+strip = 'loongarch64-linux-gnu-strip'
+objcopy = 'loongarch64-linux-gnu-objcopy'
+ld = 'loongarch64-linux-gnu-ld'
+exe_wrapper = ['qemu-loongarch64-static', '-L', '/usr/loongarch64-linux-gnu/', '-cpu', 'la464']
+
+[properties]
+c_args = ['-march=loongarch64', '-Wextra', '-Werror', '-mlsx', '-mlasx', '-Ofast']
+cpp_args = ['-march=loongarch64', '-Wextra', '-Werror', '-mlsx', '-mlasx', '-Ofast']
+#c_args = ['-march=la464', '-Wextra', '-Werror']
+#cpp_args = ['-march=la464', '-Wextra', '-Werror']
+
+[host_machine]
+system = 'linux'
+cpu_family = 'loongarch64'
+cpu = 'loongarch64'
+endian = 'little'
diff --git a/test/test.h b/test/test.h
index 6cc88ab85..33a30380e 100644
--- a/test/test.h
+++ b/test/test.h
@@ -126,6 +126,10 @@ simde_test_debug_printf_(const char* format, ...) {
 HEDLEY_DIAGNOSTIC_PUSH
 SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_
 
+#if defined(SIMDE_FAST_MATH) && defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,4,0)
+__attribute__((optimize("-fno-finite-math-only")))
+__attribute__((noinline))
+#endif
 static int
 simde_test_equal_f32(simde_float32 a, simde_float32 b, simde_float32 slop) {
   if (simde_math_isnan(a)) {
@@ -156,6 +160,10 @@ simde_test_equal_f16(simde_float16 a, simde_float16 b, simde_float16 slop) {
   return simde_test_equal_f32(af, bf, slopf);
 }
 
+#if defined(SIMDE_FAST_MATH) && defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,4,0)
+__attribute__((optimize("-fno-finite-math-only")))
+__attribute__((noinline))
+#endif
 static int
 simde_test_equal_f64(simde_float64 a, simde_float64 b, simde_float64 slop) {
   if (simde_math_isnan(a)) {