From a1f02ccfa851f0e0e48a0b617e84e3af548d6d32 Mon Sep 17 00:00:00 2001 From: Jeff Muizelaar Date: Sat, 9 Mar 2024 03:37:00 +0000 Subject: [PATCH 1/2] Bug 1884279 - compute_precache always returns true so drop the return value. r=gfx-reviewers,nical Differential Revision: https://phabricator.services.mozilla.com/D203986 [ghsync] From https://hg.mozilla.org/mozilla-central/rev/183b81d91b61bdff27a15f3d9cc8e8b52f6bd21c --- src/transform.rs | 21 +++++++++------------ src/transform_util.rs | 3 +-- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/transform.rs b/src/transform.rs index 6b51f09..8a281a0 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -1174,30 +1174,27 @@ pub extern "C" fn qcms_profile_precache_output_transform(profile: &mut Profile) } if profile.output_table_r.is_none() { let mut output_table_r = precache_create(); - if compute_precache( + compute_precache( profile.redTRC.as_deref().unwrap(), &mut Arc::get_mut(&mut output_table_r).unwrap().data, - ) { - profile.output_table_r = Some(output_table_r); - } + ); + profile.output_table_r = Some(output_table_r); } if profile.output_table_g.is_none() { let mut output_table_g = precache_create(); - if compute_precache( + compute_precache( profile.greenTRC.as_deref().unwrap(), &mut Arc::get_mut(&mut output_table_g).unwrap().data, - ) { - profile.output_table_g = Some(output_table_g); - } + ); + profile.output_table_g = Some(output_table_g); } if profile.output_table_b.is_none() { let mut output_table_b = precache_create(); - if compute_precache( + compute_precache( profile.blueTRC.as_deref().unwrap(), &mut Arc::get_mut(&mut output_table_b).unwrap().data, - ) { - profile.output_table_b = Some(output_table_b); - } + ); + profile.output_table_b = Some(output_table_b); }; } /* Replace the current transformation with a LUT transformation using a given number of sample points */ diff --git a/src/transform_util.rs b/src/transform_util.rs index 75fd2ca..f341ae7 100644 --- a/src/transform_util.rs +++ b/src/transform_util.rs @@ -471,7 +471,7 @@ pub fn compute_precache_linear(output: &mut [u8; PRECACHE_OUTPUT_SIZE]) { output[v] = (v / (PRECACHE_OUTPUT_SIZE / 256)) as u8; } } -pub(crate) fn compute_precache(trc: &curveType, output: &mut [u8; PRECACHE_OUTPUT_SIZE]) -> bool { +pub(crate) fn compute_precache(trc: &curveType, output: &mut [u8; PRECACHE_OUTPUT_SIZE]) { match trc { curveType::Parametric(params) => { let mut gamma_table_uint: [u16; 256] = [0; 256]; @@ -512,7 +512,6 @@ pub(crate) fn compute_precache(trc: &curveType, output: &mut [u8; PRECACHE_OUTPU } } } - true } fn build_linear_table(length: usize) -> Vec { let mut output = Vec::with_capacity(length); From 0b04075e45c6d54faa75eda2d010294b16ee15b1 Mon Sep 17 00:00:00 2001 From: Jeff Muizelaar Date: Mon, 11 Mar 2024 21:22:16 +0000 Subject: [PATCH 2/2] Bug 1884381 - Use a single allocation for precache_output. r=gfx-reviewers,gw We always want to either have all of these or none of these so just put them all together. This will also make it easier for us to store the output matrix along side. Differential Revision: https://phabricator.services.mozilla.com/D204070 [ghsync] From https://hg.mozilla.org/mozilla-central/rev/09b0c2bfb164164e8c87a8ec05085c2966d2f148 --- src/iccread.rs | 4 +-- src/transform.rs | 68 ++++++++++++++++++------------------------- src/transform_avx.rs | 12 ++++---- src/transform_neon.rs | 14 ++++----- src/transform_sse2.rs | 12 ++++---- 5 files changed, 49 insertions(+), 61 deletions(-) diff --git a/src/iccread.rs b/src/iccread.rs index ad45a58..ba2b5a3 100644 --- a/src/iccread.rs +++ b/src/iccread.rs @@ -61,9 +61,7 @@ pub struct Profile { pub(crate) mAB: Option>, pub(crate) mBA: Option>, pub(crate) chromaticAdaption: Option, - pub(crate) output_table_r: Option>, - pub(crate) output_table_g: Option>, - pub(crate) output_table_b: Option>, + pub(crate) precache_output: Option>, is_srgb: bool, } diff --git a/src/transform.rs b/src/transform.rs index 8a281a0..54f8986 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -73,13 +73,17 @@ pub struct PrecacheOuput { * improve startup performance and reduce memory usage. ColorSync on * 10.5 uses 4097 which is perhaps because they use a fixed point * representation where 1. is represented by 0x1000. */ - pub data: [u8; PRECACHE_OUTPUT_SIZE], + pub lut_r: [u8; PRECACHE_OUTPUT_SIZE], + pub lut_g: [u8; PRECACHE_OUTPUT_SIZE], + pub lut_b: [u8; PRECACHE_OUTPUT_SIZE], } impl Default for PrecacheOuput { fn default() -> PrecacheOuput { PrecacheOuput { - data: [0; PRECACHE_OUTPUT_SIZE], + lut_r: [0; PRECACHE_OUTPUT_SIZE], + lut_g: [0; PRECACHE_OUTPUT_SIZE], + lut_b: [0; PRECACHE_OUTPUT_SIZE], } } } @@ -113,9 +117,7 @@ pub struct qcms_transform { pub output_gamma_lut_g_length: usize, pub output_gamma_lut_b_length: usize, pub output_gamma_lut_gray_length: usize, - pub output_table_r: Option>, - pub output_table_g: Option>, - pub output_table_b: Option>, + pub precache_output: Option>, pub transform_fn: transform_fn_t, } @@ -481,9 +483,10 @@ unsafe fn qcms_transform_data_gray_template_precache( length: usize, ) { let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; - let output_table_r = transform.output_table_r.as_deref().unwrap(); - let output_table_g = transform.output_table_g.as_deref().unwrap(); - let output_table_b = transform.output_table_b.as_deref().unwrap(); + let precache_output = transform.precache_output.as_deref().unwrap(); + let output_r = &precache_output.lut_r; + let output_g = &precache_output.lut_g; + let output_b = &precache_output.lut_b; let input_gamma_table_gray = transform .input_gamma_table_gray @@ -506,9 +509,9 @@ unsafe fn qcms_transform_data_gray_template_precache( let linear: f32 = *input_gamma_table_gray.offset(device as isize); /* we could round here... */ let gray: u16 = (linear * PRECACHE_OUTPUT_MAX as f32) as u16; - *dest.add(F::kRIndex) = (output_table_r).data[gray as usize]; - *dest.add(F::kGIndex) = (output_table_g).data[gray as usize]; - *dest.add(F::kBIndex) = (output_table_b).data[gray as usize]; + *dest.add(F::kRIndex) = output_r[gray as usize]; + *dest.add(F::kGIndex) = output_g[gray as usize]; + *dest.add(F::kBIndex) = output_b[gray as usize]; if F::kAIndex != 0xff { *dest.add(F::kAIndex) = alpha } @@ -563,9 +566,9 @@ unsafe fn qcms_transform_data_template_lut_precache( length: usize, ) { let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; - let output_table_r = transform.output_table_r.as_deref().unwrap(); - let output_table_g = transform.output_table_g.as_deref().unwrap(); - let output_table_b = transform.output_table_b.as_deref().unwrap(); + let output_table_r = &transform.precache_output.as_deref().unwrap().lut_r; + let output_table_g = &transform.precache_output.as_deref().unwrap().lut_g; + let output_table_b = &transform.precache_output.as_deref().unwrap().lut_b; let input_gamma_table_r = transform.input_gamma_table_r.as_ref().unwrap().as_ptr(); let input_gamma_table_g = transform.input_gamma_table_g.as_ref().unwrap().as_ptr(); let input_gamma_table_b = transform.input_gamma_table_b.as_ref().unwrap().as_ptr(); @@ -596,9 +599,9 @@ unsafe fn qcms_transform_data_template_lut_precache( let r: u16 = (out_linear_r * PRECACHE_OUTPUT_MAX as f32) as u16; let g: u16 = (out_linear_g * PRECACHE_OUTPUT_MAX as f32) as u16; let b: u16 = (out_linear_b * PRECACHE_OUTPUT_MAX as f32) as u16; - *dest.add(F::kRIndex) = (output_table_r).data[r as usize]; - *dest.add(F::kGIndex) = (output_table_g).data[g as usize]; - *dest.add(F::kBIndex) = (output_table_b).data[b as usize]; + *dest.add(F::kRIndex) = output_table_r[r as usize]; + *dest.add(F::kGIndex) = output_table_g[g as usize]; + *dest.add(F::kBIndex) = output_table_b[b as usize]; if F::kAIndex != 0xff { *dest.add(F::kAIndex) = alpha } @@ -1172,30 +1175,22 @@ pub extern "C" fn qcms_profile_precache_output_transform(profile: &mut Profile) if profile.redTRC.is_none() || profile.greenTRC.is_none() || profile.blueTRC.is_none() { return; } - if profile.output_table_r.is_none() { - let mut output_table_r = precache_create(); + if profile.precache_output.is_none() { + let mut precache = precache_create(); compute_precache( profile.redTRC.as_deref().unwrap(), - &mut Arc::get_mut(&mut output_table_r).unwrap().data, + &mut Arc::get_mut(&mut precache).unwrap().lut_r, ); - profile.output_table_r = Some(output_table_r); - } - if profile.output_table_g.is_none() { - let mut output_table_g = precache_create(); compute_precache( profile.greenTRC.as_deref().unwrap(), - &mut Arc::get_mut(&mut output_table_g).unwrap().data, + &mut Arc::get_mut(&mut precache).unwrap().lut_g, ); - profile.output_table_g = Some(output_table_g); - } - if profile.output_table_b.is_none() { - let mut output_table_b = precache_create(); compute_precache( profile.blueTRC.as_deref().unwrap(), - &mut Arc::get_mut(&mut output_table_b).unwrap().data, + &mut Arc::get_mut(&mut precache).unwrap().lut_b, ); - profile.output_table_b = Some(output_table_b); - }; + profile.precache_output = Some(precache); + } } /* Replace the current transformation with a LUT transformation using a given number of sample points */ fn transform_precacheLUT_float( @@ -1300,10 +1295,7 @@ pub fn transform_create( } let mut transform: Box = Box::new(Default::default()); let mut precache: bool = false; - if output.output_table_r.is_some() - && output.output_table_g.is_some() - && output.output_table_b.is_some() - { + if output.precache_output.is_some() { precache = true } // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance) @@ -1327,9 +1319,7 @@ pub fn transform_create( return result; } if precache { - transform.output_table_r = Some(Arc::clone(output.output_table_r.as_ref().unwrap())); - transform.output_table_g = Some(Arc::clone(output.output_table_g.as_ref().unwrap())); - transform.output_table_b = Some(Arc::clone(output.output_table_b.as_ref().unwrap())); + transform.precache_output = Some(Arc::clone(output.precache_output.as_ref().unwrap())); } else { if output.redTRC.is_none() || output.greenTRC.is_none() || output.blueTRC.is_none() { return None; diff --git a/src/transform_avx.rs b/src/transform_avx.rs index 66224a8..04ca752 100644 --- a/src/transform_avx.rs +++ b/src/transform_avx.rs @@ -35,22 +35,22 @@ unsafe extern "C" fn qcms_transform_data_template_lut_avx( let igtbl_b: *const f32 = transform.input_gamma_table_b.as_ref().unwrap().as_ptr(); /* deref *transform now to avoid it in loop */ let otdata_r: *const u8 = transform - .output_table_r + .precache_output .as_deref() .unwrap() - .data + .lut_r .as_ptr(); let otdata_g: *const u8 = (*transform) - .output_table_g + .precache_output .as_deref() .unwrap() - .data + .lut_g .as_ptr(); let otdata_b: *const u8 = (*transform) - .output_table_b + .precache_output .as_deref() .unwrap() - .data + .lut_b .as_ptr(); /* input matrix values never change */ let mat0: __m256 = _mm256_broadcast_ps(&*((*mat.offset(0isize)).as_ptr() as *const __m128)); diff --git a/src/transform_neon.rs b/src/transform_neon.rs index b14b671..121b52f 100644 --- a/src/transform_neon.rs +++ b/src/transform_neon.rs @@ -25,23 +25,23 @@ unsafe fn qcms_transform_data_template_lut_neon( let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); /* deref *transform now to avoid it in loop */ - let otdata_r: *const u8 = (*transform) - .output_table_r + let otdata_r: *const u8 = transform + .precache_output .as_deref() .unwrap() - .data + .lut_r .as_ptr(); let otdata_g: *const u8 = (*transform) - .output_table_g + .precache_output .as_deref() .unwrap() - .data + .lut_g .as_ptr(); let otdata_b: *const u8 = (*transform) - .output_table_b + .precache_output .as_deref() .unwrap() - .data + .lut_b .as_ptr(); /* input matrix values never change */ let mat0: float32x4_t = vld1q_f32((*mat.offset(0isize)).as_ptr()); diff --git a/src/transform_sse2.rs b/src/transform_sse2.rs index f6bccaa..865a01b 100644 --- a/src/transform_sse2.rs +++ b/src/transform_sse2.rs @@ -30,22 +30,22 @@ unsafe extern "C" fn qcms_transform_data_template_lut_sse2( let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); /* deref *transform now to avoid it in loop */ let otdata_r: *const u8 = (*transform) - .output_table_r + .precache_output .as_deref() .unwrap() - .data + .lut_r .as_ptr(); let otdata_g: *const u8 = (*transform) - .output_table_g + .precache_output .as_deref() .unwrap() - .data + .lut_g .as_ptr(); let otdata_b: *const u8 = (*transform) - .output_table_b + .precache_output .as_deref() .unwrap() - .data + .lut_b .as_ptr(); /* input matrix values never change */ let mat0: __m128 = _mm_load_ps((*mat.offset(0isize)).as_ptr());