From 978690223d2963d04f3eca178dffa6f2ba086272 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Thu, 11 Sep 2025 10:06:04 +0800 Subject: [PATCH] loongarch: Sync SIMD intrinsics with C --- .../src/loongarch64/lasx/generated.rs | 166 ++++++++++- .../core_arch/src/loongarch64/lasx/tests.rs | 281 ++++++++++++++++++ .../src/loongarch64/lsx/generated.rs | 4 +- crates/stdarch-gen-loongarch/lasx.spec | 92 +++++- crates/stdarch-gen-loongarch/lasxintrin.h | 164 +++++++++- crates/stdarch-gen-loongarch/lsx.spec | 2 +- crates/stdarch-gen-loongarch/lsxintrin.h | 8 +- crates/stdarch-gen-loongarch/src/main.rs | 6 +- 8 files changed, 708 insertions(+), 15 deletions(-) diff --git a/crates/core_arch/src/loongarch64/lasx/generated.rs b/crates/core_arch/src/loongarch64/lasx/generated.rs index cda0ebec67..1d9d4e8248 100644 --- a/crates/core_arch/src/loongarch64/lasx/generated.rs +++ b/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -7,7 +7,7 @@ // ``` use crate::mem::transmute; -use super::types::*; +use super::super::*; #[allow(improper_ctypes)] unsafe extern "unadjusted" { @@ -980,7 +980,7 @@ unsafe extern "unadjusted" { #[link_name = "llvm.loongarch.lasx.xvssrln.w.d"] fn __lasx_xvssrln_w_d(a: __v4i64, b: __v4i64) -> __v8i32; #[link_name = "llvm.loongarch.lasx.xvorn.v"] - fn __lasx_xvorn_v(a: __v32i8, b: __v32i8) -> __v32i8; + fn __lasx_xvorn_v(a: __v32u8, b: __v32u8) -> __v32u8; #[link_name = "llvm.loongarch.lasx.xvldi"] fn __lasx_xvldi(a: i32) -> __v4i64; #[link_name = "llvm.loongarch.lasx.xvldx"] @@ -1491,6 +1491,42 @@ unsafe extern "unadjusted" { fn __lasx_xvrepli_h(a: i32) -> __v16i16; #[link_name = "llvm.loongarch.lasx.xvrepli.w"] fn __lasx_xvrepli_w(a: i32) -> __v8i32; + #[link_name = "llvm.loongarch.lasx.cast.128.s"] + fn __lasx_cast_128_s(a: __v4f32) -> __v8f32; + #[link_name = "llvm.loongarch.lasx.cast.128.d"] + fn __lasx_cast_128_d(a: __v2f64) -> __v4f64; + #[link_name = "llvm.loongarch.lasx.cast.128"] + fn __lasx_cast_128(a: __v2i64) -> __v4i64; + #[link_name = "llvm.loongarch.lasx.concat.128.s"] + fn __lasx_concat_128_s(a: __v4f32, b: __v4f32) -> __v8f32; + #[link_name = "llvm.loongarch.lasx.concat.128.d"] + fn __lasx_concat_128_d(a: __v2f64, b: __v2f64) -> __v4f64; + #[link_name = "llvm.loongarch.lasx.concat.128"] + fn __lasx_concat_128(a: __v2i64, b: __v2i64) -> __v4i64; + #[link_name = "llvm.loongarch.lasx.extract.128.lo.s"] + fn __lasx_extract_128_lo_s(a: __v8f32) -> __v4f32; + #[link_name = "llvm.loongarch.lasx.extract.128.hi.s"] + fn __lasx_extract_128_hi_s(a: __v8f32) -> __v4f32; + #[link_name = "llvm.loongarch.lasx.extract.128.lo.d"] + fn __lasx_extract_128_lo_d(a: __v4f64) -> __v2f64; + #[link_name = "llvm.loongarch.lasx.extract.128.hi.d"] + fn __lasx_extract_128_hi_d(a: __v4f64) -> __v2f64; + #[link_name = "llvm.loongarch.lasx.extract.128.lo"] + fn __lasx_extract_128_lo(a: __v4i64) -> __v2i64; + #[link_name = "llvm.loongarch.lasx.extract.128.hi"] + fn __lasx_extract_128_hi(a: __v4i64) -> __v2i64; + #[link_name = "llvm.loongarch.lasx.insert.128.lo.s"] + fn __lasx_insert_128_lo_s(a: __v8f32, b: __v4f32) -> __v8f32; + #[link_name = "llvm.loongarch.lasx.insert.128.hi.s"] + fn __lasx_insert_128_hi_s(a: __v8f32, b: __v4f32) -> __v8f32; + #[link_name = "llvm.loongarch.lasx.insert.128.lo.d"] + fn __lasx_insert_128_lo_d(a: __v4f64, b: __v2f64) -> __v4f64; + #[link_name = "llvm.loongarch.lasx.insert.128.hi.d"] + fn __lasx_insert_128_hi_d(a: __v4f64, b: __v2f64) -> __v4f64; + #[link_name = "llvm.loongarch.lasx.insert.128.lo"] + fn __lasx_insert_128_lo(a: __v4i64, b: __v2i64) -> __v4i64; + #[link_name = "llvm.loongarch.lasx.insert.128.hi"] + fn __lasx_insert_128_hi(a: __v4i64, b: __v2i64) -> __v4i64; } #[inline] @@ -7062,3 +7098,129 @@ pub fn lasx_xvrepli_w() -> m256i { static_assert_simm_bits!(IMM_S10, 10); unsafe { transmute(__lasx_xvrepli_w(IMM_S10)) } } + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_cast_128_s(a: m128) -> m256 { + unsafe { transmute(__lasx_cast_128_s(transmute(a))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_cast_128_d(a: m128d) -> m256d { + unsafe { transmute(__lasx_cast_128_d(transmute(a))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_cast_128(a: m128i) -> m256i { + unsafe { transmute(__lasx_cast_128(transmute(a))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_concat_128_s(a: m128, b: m128) -> m256 { + unsafe { transmute(__lasx_concat_128_s(transmute(a), transmute(b))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_concat_128_d(a: m128d, b: m128d) -> m256d { + unsafe { transmute(__lasx_concat_128_d(transmute(a), transmute(b))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_concat_128(a: m128i, b: m128i) -> m256i { + unsafe { transmute(__lasx_concat_128(transmute(a), transmute(b))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_extract_128_lo_s(a: m256) -> m128 { + unsafe { transmute(__lasx_extract_128_lo_s(transmute(a))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_extract_128_hi_s(a: m256) -> m128 { + unsafe { transmute(__lasx_extract_128_hi_s(transmute(a))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_extract_128_lo_d(a: m256d) -> m128d { + unsafe { transmute(__lasx_extract_128_lo_d(transmute(a))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_extract_128_hi_d(a: m256d) -> m128d { + unsafe { transmute(__lasx_extract_128_hi_d(transmute(a))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_extract_128_lo(a: m256i) -> m128i { + unsafe { transmute(__lasx_extract_128_lo(transmute(a))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_extract_128_hi(a: m256i) -> m128i { + unsafe { transmute(__lasx_extract_128_hi(transmute(a))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_insert_128_lo_s(a: m256, b: m128) -> m256 { + unsafe { transmute(__lasx_insert_128_lo_s(transmute(a), transmute(b))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_insert_128_hi_s(a: m256, b: m128) -> m256 { + unsafe { transmute(__lasx_insert_128_hi_s(transmute(a), transmute(b))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_insert_128_lo_d(a: m256d, b: m128d) -> m256d { + unsafe { transmute(__lasx_insert_128_lo_d(transmute(a), transmute(b))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_insert_128_hi_d(a: m256d, b: m128d) -> m256d { + unsafe { transmute(__lasx_insert_128_hi_d(transmute(a), transmute(b))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_insert_128_lo(a: m256i, b: m128i) -> m256i { + unsafe { transmute(__lasx_insert_128_lo(transmute(a), transmute(b))) } +} + +#[inline] +#[target_feature(enable = "lasx")] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn lasx_insert_128_hi(a: m256i, b: m128i) -> m256i { + unsafe { transmute(__lasx_insert_128_hi(transmute(a), transmute(b))) } +} diff --git a/crates/core_arch/src/loongarch64/lasx/tests.rs b/crates/core_arch/src/loongarch64/lasx/tests.rs index 54771d7b51..319ce7cf98 100644 --- a/crates/core_arch/src/loongarch64/lasx/tests.rs +++ b/crates/core_arch/src/loongarch64/lasx/tests.rs @@ -14756,3 +14756,284 @@ unsafe fn test_lasx_xvrepli_w() { assert_eq!(r, transmute(lasx_xvrepli_w::<-388>())); } + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_cast_128_s() { + let a = u32x4::new(1031165056, 1051966120, 1060984374, 1062536919); + let r = i64x4::new(4518160082931176576, 4563561318958585398, 1966080, 1966080); + + assert_eq!( + r.as_array()[0..2], + transmute::<_, i64x4>(lasx_cast_128_s(transmute(a))).as_array()[0..2] + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_cast_128_d() { + let a = u64x2::new(4604694967937271251, 4600904075476555984); + let r = i64x4::new( + 4604694967937271251, + 4600904075476555984, + 2910860781861170785, + 8314045306847701346, + ); + + assert_eq!( + r.as_array()[0..2], + transmute::<_, i64x4>(lasx_cast_128_d(transmute(a))).as_array()[0..2] + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_cast_128() { + let a = i64x2::new(-5333716211868108402, 2442107533729495827); + let r = i64x4::new( + -5333716211868108402, + 2442107533729495827, + -1115824375586394527, + 8314045306157170687, + ); + + assert_eq!( + r.as_array()[0..2], + transmute::<_, i64x4>(lasx_cast_128(transmute(a))).as_array()[0..2] + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_concat_128_s() { + let a = u32x4::new(1032255272, 1059413818, 1058434362, 1041454056); + let b = u32x4::new(1047296252, 1059191602, 1051282752, 1026847376); + let r = i64x4::new( + 4550147702272751400, + 4473011111864986938, + 4549193291835144444, + 4410275898954698048, + ); + + assert_eq!(r, transmute(lasx_concat_128_s(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_concat_128_d() { + let a = u64x2::new(4602341404117999960, 4599751584045405722); + let b = u64x2::new(4595947342927040984, 4600308396523102002); + let r = i64x4::new( + 4602341404117999960, + 4599751584045405722, + 4595947342927040984, + 4600308396523102002, + ); + + assert_eq!(r, transmute(lasx_concat_128_d(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_concat_128() { + let a = i64x2::new(3302609705743394573, 8438855426868306143); + let b = i64x2::new(8632034656150002181, 7751541408133090748); + let r = i64x4::new( + 3302609705743394573, + 8438855426868306143, + 8632034656150002181, + 7751541408133090748, + ); + + assert_eq!(r, transmute(lasx_concat_128(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_extract_128_lo_s() { + let a = u32x8::new( + 1038279272, 1053426270, 1062315532, 1055361088, 1061380448, 1052007748, 1063816577, + 1061671114, + ); + let r = i64x2::new(4524431379435545192, 4532741359493293580); + + assert_eq!(r, transmute(lasx_extract_128_lo_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_extract_128_hi_s() { + let a = u32x8::new( + 1059517342, 1052723820, 1053176244, 1060336354, 1058221022, 1064684502, 1061072013, + 1059238420, + ); + let r = i64x2::new(4572785117706267614, 4549394373627784333); + + assert_eq!(r, transmute(lasx_extract_128_hi_s(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_extract_128_lo_d() { + let a = u64x4::new( + 4606487981487128637, + 4592443779247846248, + 4605637448543526041, + 4604126872543611047, + ); + let r = i64x2::new(4606487981487128637, 4592443779247846248); + + assert_eq!(r, transmute(lasx_extract_128_lo_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_extract_128_hi_d() { + let a = u64x4::new( + 4595075050683709816, + 4603388454656549851, + 4603881047625519227, + 4604218419306666352, + ); + let r = i64x2::new(4603881047625519227, 4604218419306666352); + + assert_eq!(r, transmute(lasx_extract_128_hi_d(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_extract_128_lo() { + let a = i64x4::new( + 1690990426210778543, + -1056924033489771427, + 1791197928200737608, + 2648792885519901423, + ); + let r = i64x2::new(1690990426210778543, -1056924033489771427); + + assert_eq!(r, transmute(lasx_extract_128_lo(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_extract_128_hi() { + let a = i64x4::new( + 1400282616691463341, + 6677577875527300174, + -1903780563362068813, + -7449796170151383489, + ); + let r = i64x2::new(-1903780563362068813, -7449796170151383489); + + assert_eq!(r, transmute(lasx_extract_128_hi(transmute(a)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_insert_128_lo_s() { + let a = u32x8::new( + 1063338913, 1017815328, 1065051130, 1040694156, 1059596680, 1048796526, 1058020845, + 1057822131, + ); + let b = u32x4::new(1052930766, 1021556992, 1050709482, 1059704809); + let r = i64x4::new( + 4387553872693064398, + 4551397499119635946, + 4504546780388010376, + 4543311458688048621, + ); + + assert_eq!( + r, + transmute(lasx_insert_128_lo_s(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_insert_128_hi_s() { + let a = u32x8::new( + 1018863744, 1064221149, 1048659080, 1057450774, 1049935896, 1034170664, 1059759433, + 1057849762, + ); + let b = u32x4::new(1060332648, 1063149600, 1051087106, 1060582348); + let r = i64x4::new( + 4570795031685406848, + 4541716492508546184, + 4566192763815814248, + 4555166500425978114, + ); + + assert_eq!( + r, + transmute(lasx_insert_128_hi_s(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_insert_128_lo_d() { + let a = u64x4::new( + 4601319519422109044, + 4601506273633970188, + 4605118087882201940, + 4605125059076454256, + ); + let b = u64x2::new(4587489919640425888, 4591909120489567808); + let r = i64x4::new( + 4587489919640425888, + 4591909120489567808, + 4605118087882201940, + 4605125059076454256, + ); + + assert_eq!( + r, + transmute(lasx_insert_128_lo_d(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_insert_128_hi_d() { + let a = u64x4::new( + 4604690660177752777, + 4593824994203592700, + 4599958775071728504, + 4604125324674373728, + ); + let b = u64x2::new(4601718173474385938, 4591758028383494760); + let r = i64x4::new( + 4604690660177752777, + 4593824994203592700, + 4601718173474385938, + 4591758028383494760, + ); + + assert_eq!( + r, + transmute(lasx_insert_128_hi_d(transmute(a), transmute(b))) + ); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_insert_128_lo() { + let a = i64x4::new( + 8159968186698006293, + 5648210958959948409, + 603295919044368378, + -4396186135186039276, + ); + let b = i64x2::new(-6258666140812668387, 5822982556977506382); + let r = i64x4::new( + -6258666140812668387, + 5822982556977506382, + 603295919044368378, + -4396186135186039276, + ); + + assert_eq!(r, transmute(lasx_insert_128_lo(transmute(a), transmute(b)))); +} + +#[simd_test(enable = "lasx")] +unsafe fn test_lasx_insert_128_hi() { + let a = i64x4::new( + 2981835982487038158, + 5258378092714202875, + 5115371338527125146, + -6993491475145500537, + ); + let b = i64x2::new(1176776599938765863, -7502655081590988207); + let r = i64x4::new( + 2981835982487038158, + 5258378092714202875, + 1176776599938765863, + -7502655081590988207, + ); + + assert_eq!(r, transmute(lasx_insert_128_hi(transmute(a), transmute(b)))); +} diff --git a/crates/core_arch/src/loongarch64/lsx/generated.rs b/crates/core_arch/src/loongarch64/lsx/generated.rs index 764e69ca05..25efaadb42 100644 --- a/crates/core_arch/src/loongarch64/lsx/generated.rs +++ b/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -7,7 +7,7 @@ // ``` use crate::mem::transmute; -use super::types::*; +use super::super::*; #[allow(improper_ctypes)] unsafe extern "unadjusted" { @@ -1324,7 +1324,7 @@ unsafe extern "unadjusted" { #[link_name = "llvm.loongarch.lsx.vssrln.w.d"] fn __lsx_vssrln_w_d(a: __v2i64, b: __v2i64) -> __v4i32; #[link_name = "llvm.loongarch.lsx.vorn.v"] - fn __lsx_vorn_v(a: __v16i8, b: __v16i8) -> __v16i8; + fn __lsx_vorn_v(a: __v16u8, b: __v16u8) -> __v16u8; #[link_name = "llvm.loongarch.lsx.vldi"] fn __lsx_vldi(a: i32) -> __v2i64; #[link_name = "llvm.loongarch.lsx.vshuf.b"] diff --git a/crates/stdarch-gen-loongarch/lasx.spec b/crates/stdarch-gen-loongarch/lasx.spec index e3bdfcb5e9..ac4203a03f 100644 --- a/crates/stdarch-gen-loongarch/lasx.spec +++ b/crates/stdarch-gen-loongarch/lasx.spec @@ -2426,7 +2426,7 @@ data-types = V8SI, V4DI, V4DI /// lasx_xvorn_v name = lasx_xvorn_v asm-fmts = xd, xj, xk -data-types = V32QI, V32QI, V32QI +data-types = UV32QI, UV32QI, UV32QI /// lasx_xvldi name = lasx_xvldi @@ -3703,3 +3703,93 @@ name = lasx_xvrepli_w asm-fmts = xd, si10 data-types = V8SI, HI +/// lasx_cast_128_s +name = lasx_cast_128_s +asm-fmts = xd, vj +data-types = V8SF, V4SF + +/// lasx_cast_128_d +name = lasx_cast_128_d +asm-fmts = xd, vj +data-types = V4DF, V2DF + +/// lasx_cast_128 +name = lasx_cast_128 +asm-fmts = xd, vj +data-types = V4DI, V2DI + +/// lasx_concat_128_s +name = lasx_concat_128_s +asm-fmts = xd, vj, vk +data-types = V8SF, V4SF, V4SF + +/// lasx_concat_128_d +name = lasx_concat_128_d +asm-fmts = xd, vj, vk +data-types = V4DF, V2DF, V2DF + +/// lasx_concat_128 +name = lasx_concat_128 +asm-fmts = xd, vj, vk +data-types = V4DI, V2DI, V2DI + +/// lasx_extract_128_lo_s +name = lasx_extract_128_lo_s +asm-fmts = vd, xj +data-types = V4SF, V8SF + +/// lasx_extract_128_hi_s +name = lasx_extract_128_hi_s +asm-fmts = vd, xj +data-types = V4SF, V8SF + +/// lasx_extract_128_lo_d +name = lasx_extract_128_lo_d +asm-fmts = vd, xj +data-types = V2DF, V4DF + +/// lasx_extract_128_hi_d +name = lasx_extract_128_hi_d +asm-fmts = vd, xj +data-types = V2DF, V4DF + +/// lasx_extract_128_lo +name = lasx_extract_128_lo +asm-fmts = vd, xj +data-types = V2DI, V4DI + +/// lasx_extract_128_hi +name = lasx_extract_128_hi +asm-fmts = vd, xj +data-types = V2DI, V4DI + +/// lasx_insert_128_lo_s +name = lasx_insert_128_lo_s +asm-fmts = xd, xj, vk +data-types = V8SF, V8SF, V4SF + +/// lasx_insert_128_hi_s +name = lasx_insert_128_hi_s +asm-fmts = xd, xj, vk +data-types = V8SF, V8SF, V4SF + +/// lasx_insert_128_lo_d +name = lasx_insert_128_lo_d +asm-fmts = xd, xj, vk +data-types = V4DF, V4DF, V2DF + +/// lasx_insert_128_hi_d +name = lasx_insert_128_hi_d +asm-fmts = xd, xj, vk +data-types = V4DF, V4DF, V2DF + +/// lasx_insert_128_lo +name = lasx_insert_128_lo +asm-fmts = xd, xj, vk +data-types = V4DI, V4DI, V2DI + +/// lasx_insert_128_hi +name = lasx_insert_128_hi +asm-fmts = xd, xj, vk +data-types = V4DI, V4DI, V2DI + diff --git a/crates/stdarch-gen-loongarch/lasxintrin.h b/crates/stdarch-gen-loongarch/lasxintrin.h index c525b6106b..02bb97918d 100644 --- a/crates/stdarch-gen-loongarch/lasxintrin.h +++ b/crates/stdarch-gen-loongarch/lasxintrin.h @@ -1,10 +1,10 @@ /* - * https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/config/loongarch/lasxintrin.h;hb=61f1001f2f4ab9128e5eb6e9a4adbbb0f9f0bc75 + * https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/config/loongarch/lasxintrin.h;hb=c2013267642fea4a6e89b826940c8aa80a76089d */ /* LARCH Loongson ASX intrinsics include file. - Copyright (C) 2018-2024 Free Software Foundation, Inc. + Copyright (C) 2018-2025 Free Software Foundation, Inc. This file is part of GCC. @@ -27,6 +27,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ +#include + #ifndef _GCC_LOONGSON_ASXINTRIN_H #define _GCC_LOONGSON_ASXINTRIN_H 1 @@ -3568,11 +3570,11 @@ __m256i __lasx_xvssrln_w_d (__m256i _1, __m256i _2) } /* Assembly instruction format: xd, xj, xk. */ -/* Data types in instruction templates: V32QI, V32QI, V32QI. */ +/* Data types in instruction templates: UV32QI, UV32QI, UV32QI. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvorn_v (__m256i _1, __m256i _2) { - return (__m256i)__builtin_lasx_xvorn_v ((v32i8)_1, (v32i8)_2); + return (__m256i)__builtin_lasx_xvorn_v ((v32u8)_1, (v32u8)_2); } /* Assembly instruction format: xd, i13. */ @@ -5372,5 +5374,159 @@ __m256i __lasx_xvfcmp_sun_s (__m256 _1, __m256 _2) #define __lasx_xvrepli_w(/*si10*/ _1) \ ((__m256i)__builtin_lasx_xvrepli_w ((_1))) +#if defined (__loongarch_asx_sx_conv) +/* Add builtin interfaces for 128 and 256 vector conversions. + For the assembly instruction format of some functions of the following vector + conversion, it is not described exactly in accordance with the format of the + generated assembly instruction. + In the front end of the Rust language, different built-in functions are called + by analyzing the format of assembly instructions. The data types of instructions + are all defined based on the interfaces of the defined functions, in the + following order: output, input... . */ +/* Assembly instruction format: xd, vj. */ +/* Data types in instruction templates: V8SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_cast_128_s (__m128 _1) +{ + return (__m256)__builtin_lasx_cast_128_s ((v4f32)_1); +} + +/* Assembly instruction format: xd, vj. */ +/* Data types in instruction templates: V4DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_cast_128_d (__m128d _1) +{ + return (__m256d)__builtin_lasx_cast_128_d ((v2f64)_1); +} + +/* Assembly instruction format: xd, vj. */ +/* Data types in instruction templates: V4DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_cast_128 (__m128i _1) +{ + return (__m256i)__builtin_lasx_cast_128 ((v2i64)_1); +} + +/* Assembly instruction format: xd, vj, vk. */ +/* Data types in instruction templates: V8SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_concat_128_s (__m128 _1, __m128 _2) +{ + return (__m256)__builtin_lasx_concat_128_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: xd, vj, vk. */ +/* Data types in instruction templates: V4DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_concat_128_d (__m128d _1, __m128d _2) +{ + return (__m256d)__builtin_lasx_concat_128_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: xd, vj, vk. */ +/* Data types in instruction templates: V4DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_concat_128 (__m128i _1, __m128i _2) +{ + return (__m256i)__builtin_lasx_concat_128 ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V4SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lasx_extract_128_lo_s (__m256 _1) +{ + return (__m128)__builtin_lasx_extract_128_lo_s ((v8f32)_1); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V4SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lasx_extract_128_hi_s (__m256 _1) +{ + return (__m128)__builtin_lasx_extract_128_hi_s ((v8f32)_1); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V2DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lasx_extract_128_lo_d (__m256d _1) +{ + return (__m128d)__builtin_lasx_extract_128_lo_d ((v4f64)_1); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V2DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lasx_extract_128_hi_d (__m256d _1) +{ + return (__m128d)__builtin_lasx_extract_128_hi_d ((v4f64)_1); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V2DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lasx_extract_128_lo (__m256i _1) +{ + return (__m128i)__builtin_lasx_extract_128_lo ((v4i64)_1); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V2DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lasx_extract_128_hi (__m256i _1) +{ + return (__m128i)__builtin_lasx_extract_128_hi ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V8SF, V8SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_insert_128_lo_s (__m256 _1, __m128 _2) +{ + return (__m256)__builtin_lasx_insert_128_lo_s ((v8f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V8SF, V8SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_insert_128_hi_s (__m256 _1, __m128 _2) +{ + return (__m256)__builtin_lasx_insert_128_hi_s ((v8f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V4DF, V4DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_insert_128_lo_d (__m256d _1, __m128d _2) +{ + return (__m256d)__builtin_lasx_insert_128_lo_d ((v4f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V4DF, V4DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_insert_128_hi_d (__m256d _1, __m128d _2) +{ + return (__m256d)__builtin_lasx_insert_128_hi_d ((v4f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V4DI, V4DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_insert_128_lo (__m256i _1, __m128i _2) +{ + return (__m256i)__builtin_lasx_insert_128_lo ((v4i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V4DI, V4DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_insert_128_hi (__m256i _1, __m128i _2) +{ + return (__m256i)__builtin_lasx_insert_128_hi ((v4i64)_1, (v2i64)_2); +} + +#endif /* defined(__loongarch_asx_sx_conv). */ #endif /* defined(__loongarch_asx). */ #endif /* _GCC_LOONGSON_ASXINTRIN_H. */ diff --git a/crates/stdarch-gen-loongarch/lsx.spec b/crates/stdarch-gen-loongarch/lsx.spec index dc835770d5..b5497b6e62 100644 --- a/crates/stdarch-gen-loongarch/lsx.spec +++ b/crates/stdarch-gen-loongarch/lsx.spec @@ -3286,7 +3286,7 @@ data-types = V4SI, V2DI, V2DI /// lsx_vorn_v name = lsx_vorn_v asm-fmts = vd, vj, vk -data-types = V16QI, V16QI, V16QI +data-types = UV16QI, UV16QI, UV16QI /// lsx_vldi name = lsx_vldi diff --git a/crates/stdarch-gen-loongarch/lsxintrin.h b/crates/stdarch-gen-loongarch/lsxintrin.h index 943f2df913..66b7c7e218 100644 --- a/crates/stdarch-gen-loongarch/lsxintrin.h +++ b/crates/stdarch-gen-loongarch/lsxintrin.h @@ -1,10 +1,10 @@ /* - * https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/config/loongarch/lsxintrin.h;hb=61f1001f2f4ab9128e5eb6e9a4adbbb0f9f0bc75 + * https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/config/loongarch/lsxintrin.h;hb=6441eb6dc020faae0672ea724dfdb38c6a9bf6a1 */ /* LARCH Loongson SX intrinsics include file. - Copyright (C) 2018-2024 Free Software Foundation, Inc. + Copyright (C) 2018-2025 Free Software Foundation, Inc. This file is part of GCC. @@ -4749,11 +4749,11 @@ __m128i __lsx_vssrln_w_d (__m128i _1, __m128i _2) } /* Assembly instruction format: vd, vj, vk. */ -/* Data types in instruction templates: V16QI, V16QI, V16QI. */ +/* Data types in instruction templates: UV16QI, UV16QI, UV16QI. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vorn_v (__m128i _1, __m128i _2) { - return (__m128i)__builtin_lsx_vorn_v ((v16i8)_1, (v16i8)_2); + return (__m128i)__builtin_lsx_vorn_v ((v16u8)_1, (v16u8)_2); } /* Assembly instruction format: vd, i13. */ diff --git a/crates/stdarch-gen-loongarch/src/main.rs b/crates/stdarch-gen-loongarch/src/main.rs index 5076064ffc..10b87c70e9 100644 --- a/crates/stdarch-gen-loongarch/src/main.rs +++ b/crates/stdarch-gen-loongarch/src/main.rs @@ -157,7 +157,7 @@ fn gen_bind(in_file: String, ext_name: &str) -> io::Result<()> { // ``` use crate::mem::transmute; -use super::types::*; +use super::super::*; "# )); @@ -1551,6 +1551,10 @@ fn gen_test_body( format!( " printf(\"\\n {current_name}{as_params};\\n assert_eq!(r, transmute(o));\\n\"{as_args});" ) + } else if current_name.starts_with("lasx_cast_128") { + format!( + " printf(\"\\n assert_eq!(r.as_array()[0..2], transmute::<_, i64x4>({current_name}{as_params}).as_array()[0..2]);\\n\"{as_args});" + ) } else { format!( " printf(\"\\n assert_eq!(r, transmute({current_name}{as_params}));\\n\"{as_args});"