Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

172
vendor/moxcms/src/chad.rs vendored Normal file
View File

@@ -0,0 +1,172 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::matrix::{Matrix3f, Vector3f, Xyz};
use crate::{Chromaticity, Matrix3d, Vector3d, XyY};
pub(crate) const BRADFORD_D: Matrix3d = Matrix3d {
v: [
[0.8951, 0.2664, -0.1614],
[-0.7502, 1.7135, 0.0367],
[0.0389, -0.0685, 1.0296],
],
};
pub(crate) const BRADFORD_F: Matrix3f = BRADFORD_D.to_f32();
#[inline]
pub(crate) const fn compute_chromatic_adaption(
source_white_point: Xyz,
dest_white_point: Xyz,
chad: Matrix3f,
) -> Matrix3f {
let cone_source_xyz = Vector3f {
v: [
source_white_point.x,
source_white_point.y,
source_white_point.z,
],
};
let cone_source_rgb = chad.mul_vector(cone_source_xyz);
let cone_dest_xyz = Vector3f {
v: [dest_white_point.x, dest_white_point.y, dest_white_point.z],
};
let cone_dest_rgb = chad.mul_vector(cone_dest_xyz);
let cone = Matrix3f {
v: [
[cone_dest_rgb.v[0] / cone_source_rgb.v[0], 0., 0.],
[0., cone_dest_rgb.v[1] / cone_source_rgb.v[1], 0.],
[0., 0., cone_dest_rgb.v[2] / cone_source_rgb.v[2]],
],
};
let chad_inv = chad.inverse();
let p0 = cone.mat_mul_const(chad);
chad_inv.mat_mul_const(p0)
}
#[inline]
pub(crate) const fn compute_chromatic_adaption_d(
source_white_point: Xyz,
dest_white_point: Xyz,
chad: Matrix3d,
) -> Matrix3d {
let cone_source_xyz = Vector3d {
v: [
source_white_point.x as f64,
source_white_point.y as f64,
source_white_point.z as f64,
],
};
let cone_source_rgb = chad.mul_vector(cone_source_xyz);
let cone_dest_xyz = Vector3d {
v: [
dest_white_point.x as f64,
dest_white_point.y as f64,
dest_white_point.z as f64,
],
};
let cone_dest_rgb = chad.mul_vector(cone_dest_xyz);
let cone = Matrix3d {
v: [
[cone_dest_rgb.v[0] / cone_source_rgb.v[0], 0., 0.],
[0., cone_dest_rgb.v[1] / cone_source_rgb.v[1], 0.],
[0., 0., cone_dest_rgb.v[2] / cone_source_rgb.v[2]],
],
};
let chad_inv = chad.inverse();
let p0 = cone.mat_mul_const(chad);
chad_inv.mat_mul_const(p0)
}
pub const fn adaption_matrix(source_illumination: Xyz, target_illumination: Xyz) -> Matrix3f {
compute_chromatic_adaption(source_illumination, target_illumination, BRADFORD_F)
}
pub const fn adaption_matrix_d(source_illumination: Xyz, target_illumination: Xyz) -> Matrix3d {
compute_chromatic_adaption_d(source_illumination, target_illumination, BRADFORD_D)
}
pub const fn adapt_to_d50(r: Matrix3f, source_white_pt: XyY) -> Matrix3f {
adapt_to_illuminant(r, source_white_pt, Chromaticity::D50.to_xyz())
}
pub const fn adapt_to_d50_d(r: Matrix3d, source_white_pt: XyY) -> Matrix3d {
adapt_to_illuminant_d(r, source_white_pt, Chromaticity::D50.to_xyz())
}
pub const fn adapt_to_illuminant(
r: Matrix3f,
source_white_pt: XyY,
illuminant_xyz: Xyz,
) -> Matrix3f {
let bradford = adaption_matrix(source_white_pt.to_xyz(), illuminant_xyz);
bradford.mat_mul_const(r)
}
pub const fn adapt_to_illuminant_d(
r: Matrix3d,
source_white_pt: XyY,
illuminant_xyz: Xyz,
) -> Matrix3d {
let bradford = adaption_matrix_d(source_white_pt.to_xyz(), illuminant_xyz);
bradford.mat_mul_const(r)
}
pub const fn adapt_to_illuminant_xyz(
r: Matrix3f,
source_white_pt: Xyz,
illuminant_xyz: Xyz,
) -> Matrix3f {
if source_white_pt.y == 0.0 {
return r;
}
let bradford = adaption_matrix(source_white_pt, illuminant_xyz);
bradford.mat_mul_const(r)
}
pub const fn adapt_to_illuminant_xyz_d(
r: Matrix3d,
source_white_pt: Xyz,
illuminant_xyz: Xyz,
) -> Matrix3d {
if source_white_pt.y == 0.0 {
return r;
}
let bradford = adaption_matrix_d(source_white_pt, illuminant_xyz);
bradford.mat_mul_const(r)
}

143
vendor/moxcms/src/chromaticity.rs vendored Normal file
View File

@@ -0,0 +1,143 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::{CmsError, XyY, XyYRepresentable, Xyz, Xyzd};
#[derive(Clone, Debug, Copy)]
#[repr(C)]
pub struct Chromaticity {
pub x: f32,
pub y: f32,
}
impl Chromaticity {
#[inline]
pub const fn new(x: f32, y: f32) -> Self {
Self { x, y }
}
/// Converts this chromaticity (`x`, `y`) to a tristimulus [`Xyz`] value,
/// normalized such that `y = 1.0`.
#[inline]
pub const fn to_xyz(&self) -> Xyz {
let reciprocal = if self.y != 0. { 1. / self.y } else { 0. };
Xyz {
x: self.x * reciprocal,
y: 1f32,
z: (1f32 - self.x - self.y) * reciprocal,
}
}
/// Get the color representation with component sum `1`.
///
/// In contrast to the XYZ representation defined through setting `Y` to a known
/// value (such as `1` in [`Self::to_xyz`]) this representation can be uniquely
/// derived from the `xy` coordinates with no ambiguities. It is scaled from the
/// original XYZ color by diving by `X + Y + Z`. Note that, in particular, this
/// method is well-defined even if the original color had pure chromamatic
/// information with no luminance (Y = `0`) and will preserve that information,
/// whereas [`Self::to_xyz`] is ill-defined and returns an incorrect value.
#[inline]
pub const fn to_scaled_xyzd(&self) -> Xyzd {
let z = 1.0 - self.x as f64 - self.y as f64;
Xyzd::new(self.x as f64, self.y as f64, z)
}
/// Get the color representation with component sum `1`.
///
/// In contrast to the XYZ representation defined through setting `Y` to a known
/// value (such as `1` in [`Self::to_xyz`]) this representation can be uniquely
/// derived from the `xy` coordinates with no ambiguities. It is scaled from the
/// original XYZ color by diving by `X + Y + Z`. Note that, in particular, this
/// method is well-defined even if the original color had pure chromamatic
/// information with no luminance (Y = `0`) and will preserve that information,
/// whereas [`Self::to_xyz`] is ill-defined and returns an incorrect value.
#[inline]
pub const fn to_scaled_xyz(&self) -> Xyz {
let z = 1.0 - self.x - self.y;
Xyz::new(self.x, self.y, z)
}
#[inline]
pub const fn to_xyzd(&self) -> Xyzd {
let reciprocal = if self.y != 0. { 1. / self.y } else { 0. };
Xyzd {
x: self.x as f64 * reciprocal as f64,
y: 1f64,
z: (1f64 - self.x as f64 - self.y as f64) * reciprocal as f64,
}
}
#[inline]
pub const fn to_xyyb(&self) -> XyY {
XyY {
x: self.x as f64,
y: self.y as f64,
yb: 1.,
}
}
pub const D65: Chromaticity = Chromaticity {
x: 0.31272,
y: 0.32903,
};
pub const D50: Chromaticity = Chromaticity {
x: 0.34567,
y: 0.35850,
};
}
impl XyYRepresentable for Chromaticity {
fn to_xyy(self) -> XyY {
self.to_xyyb()
}
}
impl TryFrom<Xyz> for Chromaticity {
type Error = CmsError;
#[inline]
fn try_from(xyz: Xyz) -> Result<Self, Self::Error> {
let sum = xyz.x + xyz.y + xyz.z;
// Avoid division by zero or invalid XYZ values
if sum == 0.0 {
return Err(CmsError::DivisionByZero);
}
let rec = 1f32 / sum;
let chromaticity_x = xyz.x * rec;
let chromaticity_y = xyz.y * rec;
Ok(Chromaticity {
x: chromaticity_x,
y: chromaticity_y,
})
}
}

642
vendor/moxcms/src/cicp.rs vendored Normal file
View File

@@ -0,0 +1,642 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::gamma::{
bt1361_to_linear, hlg_to_linear, iec61966_to_linear, log100_sqrt10_to_linear, log100_to_linear,
pq_to_linear, smpte240_to_linear, smpte428_to_linear,
};
use crate::{
Chromaticity, ColorProfile, Matrix3d, Matrix3f, XyYRepresentable,
err::CmsError,
trc::{ToneReprCurve, build_trc_table, curve_from_gamma},
};
use std::convert::TryFrom;
/// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 2
/// Values 0, 3, 1321, 23255 are all reserved so all map to the same variant
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum CicpColorPrimaries {
/// For future use by ITU-T | ISO/IEC
Reserved,
/// Rec. ITU-R BT.709-6<br />
/// Rec. ITU-R BT.1361-0 conventional colour gamut system and extended colour gamut system (historical)<br />
/// IEC 61966-2-1 sRGB or sYCC IEC 61966-2-4<br />
/// Society of Motion Picture and Television Engineers (MPTE) RP 177 (1993) Annex B<br />
Bt709 = 1,
/// Unspecified<br />
/// Image characteristics are unknown or are determined by the application.
Unspecified = 2,
/// Rec. ITU-R BT.470-6 System M (historical)<br />
/// United States National Television System Committee 1953 Recommendation for transmission standards for color television<br />
/// United States Federal Communications Commission (2003) Title 47 Code of Federal Regulations 73.682 (a) (20)<br />
Bt470M = 4,
/// Rec. ITU-R BT.470-6 System B, G (historical) Rec. ITU-R BT.601-7 625<br />
/// Rec. ITU-R BT.1358-0 625 (historical)<br />
/// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM<br />
Bt470Bg = 5,
/// Rec. ITU-R BT.601-7 525<br />
/// Rec. ITU-R BT.1358-1 525 or 625 (historical) Rec. ITU-R BT.1700-0 NTSC<br />
/// SMPTE 170M (2004)<br />
/// (functionally the same as the value 7)<br />
Bt601 = 6,
/// SMPTE 240M (1999) (historical) (functionally the same as the value 6)<br />
Smpte240 = 7,
/// Generic film (colour filters using Illuminant C)<br />
GenericFilm = 8,
/// Rec. ITU-R BT.2020-2<br />
/// Rec. ITU-R BT.2100-0<br />
Bt2020 = 9,
/// SMPTE ST 428-1<br />
/// (CIE 1931 XYZ as in ISO 11664-1)<br />
Xyz = 10,
/// SMPTE RP 431-2 (2011)<br />
Smpte431 = 11,
/// SMPTE EG 432-1 (2010)<br />
Smpte432 = 12,
/// EBU Tech. 3213-E (1975)<br />
Ebu3213 = 22,
}
impl TryFrom<u8> for CicpColorPrimaries {
type Error = CmsError;
#[allow(unreachable_patterns)]
fn try_from(value: u8) -> Result<Self, Self::Error> {
match value {
// Values 0, 3, 1321, 23255 are all reserved so all map to the
// same variant.
0 | 3 | 13..=21 | 23..=255 => Ok(Self::Reserved),
1 => Ok(Self::Bt709),
2 => Ok(Self::Unspecified),
4 => Ok(Self::Bt470M),
5 => Ok(Self::Bt470Bg),
6 => Ok(Self::Bt601),
7 => Ok(Self::Smpte240),
8 => Ok(Self::GenericFilm),
9 => Ok(Self::Bt2020),
10 => Ok(Self::Xyz),
11 => Ok(Self::Smpte431),
12 => Ok(Self::Smpte432),
22 => Ok(Self::Ebu3213),
_ => Err(CmsError::InvalidCicp),
}
}
}
#[derive(Clone, Copy, Debug)]
#[repr(C)]
pub struct ColorPrimaries {
pub red: Chromaticity,
pub green: Chromaticity,
pub blue: Chromaticity,
}
/// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 2.
impl ColorPrimaries {
/// [ACEScg](https://en.wikipedia.org/wiki/Academy_Color_Encoding_System#ACEScg).
pub const ACES_CG: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.713, y: 0.293 },
green: Chromaticity { x: 0.165, y: 0.830 },
blue: Chromaticity { x: 0.128, y: 0.044 },
};
/// [ACES2065-1](https://en.wikipedia.org/wiki/Academy_Color_Encoding_System#ACES2065-1).
pub const ACES_2065_1: ColorPrimaries = ColorPrimaries {
red: Chromaticity {
x: 0.7347,
y: 0.2653,
},
green: Chromaticity {
x: 0.0000,
y: 1.0000,
},
blue: Chromaticity {
x: 0.0001,
y: -0.0770,
},
};
/// [Adobe RGB](https://en.wikipedia.org/wiki/Adobe_RGB_color_space) (1998).
pub const ADOBE_RGB: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.64, y: 0.33 },
green: Chromaticity { x: 0.21, y: 0.71 },
blue: Chromaticity { x: 0.15, y: 0.06 },
};
/// [DCI P3](https://en.wikipedia.org/wiki/DCI-P3#DCI_P3).
///
/// This is the same as [`DISPLAY_P3`](Self::DISPLAY_P3),
/// [`SMPTE_431`](Self::SMPTE_431) and [`SMPTE_432`](Self::SMPTE_432).
pub const DCI_P3: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.680, y: 0.320 },
green: Chromaticity { x: 0.265, y: 0.690 },
blue: Chromaticity { x: 0.150, y: 0.060 },
};
/// [Diplay P3](https://en.wikipedia.org/wiki/DCI-P3#Display_P3).
///
/// This is the same as [`DCI_P3`](Self::DCI_P3),
/// [`SMPTE_431`](Self::SMPTE_431) and [`SMPTE_432`](Self::SMPTE_432).
pub const DISPLAY_P3: ColorPrimaries = Self::DCI_P3;
/// SMPTE RP 431-2 (2011).
///
/// This is the same as [`DCI_P3`](Self::DCI_P3),
/// [`DISPLAY_P3`](Self::DISPLAY_P3) and [`SMPTE_432`](Self::SMPTE_432).
pub const SMPTE_431: ColorPrimaries = Self::DCI_P3;
/// SMPTE EG 432-1 (2010).
///
/// This is the same as [`DCI_P3`](Self::DCI_P3),
/// [`DISPLAY_P3`](Self::DISPLAY_P3) and [`SMPTE_431`](Self::SMPTE_431).
pub const SMPTE_432: ColorPrimaries = Self::DCI_P3;
/// [ProPhoto RGB](https://en.wikipedia.org/wiki/ProPhoto_RGB_color_space).
pub const PRO_PHOTO_RGB: ColorPrimaries = ColorPrimaries {
red: Chromaticity {
x: 0.734699,
y: 0.265301,
},
green: Chromaticity {
x: 0.159597,
y: 0.840403,
},
blue: Chromaticity {
x: 0.036598,
y: 0.000105,
},
};
/// Rec. ITU-R BT.709-6
///
/// Rec. ITU-R BT.1361-0 conventional colour gamut system and extended
/// colour gamut system (historical).
///
/// IEC 61966-2-1 sRGB or sYCC IEC 61966-2-4).
///
/// Society of Motion Picture and Television Engineers (MPTE) RP 177 (1993) Annex B.
pub const BT_709: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.64, y: 0.33 },
green: Chromaticity { x: 0.30, y: 0.60 },
blue: Chromaticity { x: 0.15, y: 0.06 },
};
/// Rec. ITU-R BT.470-6 System M (historical).
///
/// United States National Television System Committee 1953 Recommendation
/// for transmission standards for color television.
///
/// United States Federal Communications Commission (2003) Title 47 Code of
/// Federal Regulations 73.682 (a) (20).
pub const BT_470M: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.67, y: 0.33 },
green: Chromaticity { x: 0.21, y: 0.71 },
blue: Chromaticity { x: 0.14, y: 0.08 },
};
/// Rec. ITU-R BT.470-6 System B, G (historical) Rec. ITU-R BT.601-7 625.
///
/// Rec. ITU-R BT.1358-0 625 (historical).
/// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM.
pub const BT_470BG: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.64, y: 0.33 },
green: Chromaticity { x: 0.29, y: 0.60 },
blue: Chromaticity { x: 0.15, y: 0.06 },
};
/// Rec. ITU-R BT.601-7 525.
///
/// Rec. ITU-R BT.1358-1 525 or 625 (historical) Rec. ITU-R BT.1700-0 NTSC.
///
/// SMPTE 170M (2004) (functionally the same as the [`SMPTE_240`](Self::SMPTE_240)).
pub const BT_601: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.630, y: 0.340 },
green: Chromaticity { x: 0.310, y: 0.595 },
blue: Chromaticity { x: 0.155, y: 0.070 },
};
/// SMPTE 240M (1999) (historical) (functionally the same as [`BT_601`](Self::BT_601)).
pub const SMPTE_240: ColorPrimaries = Self::BT_601;
/// Generic film (colour filters using Illuminant C).
pub const GENERIC_FILM: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.681, y: 0.319 },
green: Chromaticity { x: 0.243, y: 0.692 },
blue: Chromaticity { x: 0.145, y: 0.049 },
};
/// Rec. ITU-R BT.2020-2.
///
/// Rec. ITU-R BT.2100-0.
pub const BT_2020: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.708, y: 0.292 },
green: Chromaticity { x: 0.170, y: 0.797 },
blue: Chromaticity { x: 0.131, y: 0.046 },
};
/// SMPTE ST 428-1 (CIE 1931 XYZ as in ISO 11664-1).
pub const XYZ: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 1.0, y: 0.0 },
green: Chromaticity { x: 0.0, y: 1.0 },
blue: Chromaticity { x: 0.0, y: 0.0 },
};
/// EBU Tech. 3213-E (1975).
pub const EBU_3213: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.630, y: 0.340 },
green: Chromaticity { x: 0.295, y: 0.605 },
blue: Chromaticity { x: 0.155, y: 0.077 },
};
}
impl ColorPrimaries {
/// Returns RGB -> XYZ conversion matrix
///
/// # Arguments
///
/// * `white_point`: [Chromaticity] or [crate::XyY] or any item conforming [XyYRepresentable]
///
/// returns: [Matrix3d]
pub fn transform_to_xyz_d(self, white_point: impl XyYRepresentable) -> Matrix3d {
let red_xyz = self.red.to_scaled_xyzd();
let green_xyz = self.green.to_scaled_xyzd();
let blue_xyz = self.blue.to_scaled_xyzd();
let xyz_matrix = Matrix3d {
v: [
[red_xyz.x, green_xyz.x, blue_xyz.x],
[red_xyz.y, green_xyz.y, blue_xyz.y],
[red_xyz.z, green_xyz.z, blue_xyz.z],
],
};
ColorProfile::rgb_to_xyz_d(xyz_matrix, white_point.to_xyy().to_xyzd())
}
/// Returns RGB -> XYZ conversion matrix
///
/// # Arguments
///
/// * `white_point`: [Chromaticity] or [crate::XyY] or any item conforming [XyYRepresentable]
///
/// returns: [Matrix3f]
pub fn transform_to_xyz(self, white_point: impl XyYRepresentable) -> Matrix3f {
let red_xyz = self.red.to_scaled_xyz();
let green_xyz = self.green.to_scaled_xyz();
let blue_xyz = self.blue.to_scaled_xyz();
let xyz_matrix = Matrix3f {
v: [
[red_xyz.x, green_xyz.x, blue_xyz.x],
[red_xyz.y, green_xyz.y, blue_xyz.y],
[red_xyz.z, green_xyz.z, blue_xyz.z],
],
};
ColorProfile::rgb_to_xyz_static(xyz_matrix, white_point.to_xyy().to_xyz())
}
}
/// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 3
/// Values 0, 3, 19255 are all reserved so all map to the same variant
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum TransferCharacteristics {
/// For future use by ITU-T | ISO/IEC
Reserved,
/// Rec. ITU-R BT.709-6<br />
/// Rec. ITU-R BT.1361-0 conventional colour gamut system (historical)<br />
/// (functionally the same as the values 6, 14 and 15) <br />
Bt709 = 1,
/// Image characteristics are unknown or are determined by the application.<br />
Unspecified = 2,
/// Rec. ITU-R BT.470-6 System M (historical)<br />
/// United States National Television System Committee 1953 Recommendation for transmission standards for color television<br />
/// United States Federal Communications Commission (2003) Title 47 Code of Federal Regulations 73.682 (a) (20)<br />
/// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM<br />
Bt470M = 4,
/// Rec. ITU-R BT.470-6 System B, G (historical)<br />
Bt470Bg = 5,
/// Rec. ITU-R BT.601-7 525 or 625<br />
/// Rec. ITU-R BT.1358-1 525 or 625 (historical)<br />
/// Rec. ITU-R BT.1700-0 NTSC SMPTE 170M (2004)<br />
/// (functionally the same as the values 1, 14 and 15)<br />
Bt601 = 6,
/// SMPTE 240M (1999) (historical)<br />
Smpte240 = 7,
/// Linear transfer characteristics<br />
Linear = 8,
/// Logarithmic transfer characteristic (100:1 range)<br />
Log100 = 9,
/// Logarithmic transfer characteristic (100 * Sqrt( 10 ) : 1 range)<br />
Log100sqrt10 = 10,
/// IEC 61966-2-4<br />
Iec61966 = 11,
/// Rec. ITU-R BT.1361-0 extended colour gamut system (historical)<br />
Bt1361 = 12,
/// IEC 61966-2-1 sRGB or sYCC<br />
Srgb = 13,
/// Rec. ITU-R BT.2020-2 (10-bit system)<br />
/// (functionally the same as the values 1, 6 and 15)<br />
Bt202010bit = 14,
/// Rec. ITU-R BT.2020-2 (12-bit system)<br />
/// (functionally the same as the values 1, 6 and 14)<br />
Bt202012bit = 15,
/// SMPTE ST 2084 for 10-, 12-, 14- and 16-bitsystems<br />
/// Rec. ITU-R BT.2100-0 perceptual quantization (PQ) system<br />
Smpte2084 = 16,
/// SMPTE ST 428-1<br />
Smpte428 = 17,
/// ARIB STD-B67<br />
/// Rec. ITU-R BT.2100-0 hybrid log- gamma (HLG) system<br />
Hlg = 18,
}
impl TryFrom<u8> for TransferCharacteristics {
type Error = CmsError;
#[allow(unreachable_patterns)]
fn try_from(value: u8) -> Result<Self, Self::Error> {
match value {
0 | 3 | 19..=255 => Ok(Self::Reserved),
1 => Ok(Self::Bt709),
2 => Ok(Self::Unspecified),
4 => Ok(Self::Bt470M),
5 => Ok(Self::Bt470Bg),
6 => Ok(Self::Bt601),
7 => Ok(Self::Smpte240), // unimplemented
8 => Ok(Self::Linear),
9 => Ok(Self::Log100),
10 => Ok(Self::Log100sqrt10),
11 => Ok(Self::Iec61966), // unimplemented
12 => Ok(Self::Bt1361), // unimplemented
13 => Ok(Self::Srgb),
14 => Ok(Self::Bt202010bit),
15 => Ok(Self::Bt202012bit),
16 => Ok(Self::Smpte2084),
17 => Ok(Self::Smpte428), // unimplemented
18 => Ok(Self::Hlg),
_ => Err(CmsError::InvalidCicp),
}
}
}
impl CicpColorPrimaries {
pub(crate) const fn has_chromaticity(self) -> bool {
self as u8 != Self::Reserved as u8 && self as u8 != Self::Unspecified as u8
}
pub(crate) const fn white_point(self) -> Result<Chromaticity, CmsError> {
Ok(match self {
Self::Reserved => return Err(CmsError::UnsupportedColorPrimaries(self as u8)),
Self::Bt709
| Self::Bt470Bg
| Self::Bt601
| Self::Smpte240
| Self::Bt2020
| Self::Smpte432
| Self::Ebu3213 => Chromaticity::D65,
Self::Unspecified => return Err(CmsError::UnsupportedColorPrimaries(self as u8)),
Self::Bt470M => Chromaticity { x: 0.310, y: 0.316 },
Self::GenericFilm => Chromaticity { x: 0.310, y: 0.316 },
Self::Xyz => Chromaticity {
x: 1. / 3.,
y: 1. / 3.,
},
Self::Smpte431 => Chromaticity { x: 0.314, y: 0.351 },
})
}
}
impl TryFrom<CicpColorPrimaries> for ColorPrimaries {
type Error = CmsError;
fn try_from(value: CicpColorPrimaries) -> Result<Self, Self::Error> {
match value {
CicpColorPrimaries::Reserved => Err(CmsError::UnsupportedColorPrimaries(value as u8)),
CicpColorPrimaries::Bt709 => Ok(ColorPrimaries::BT_709),
CicpColorPrimaries::Unspecified => {
Err(CmsError::UnsupportedColorPrimaries(value as u8))
}
CicpColorPrimaries::Bt470M => Ok(ColorPrimaries::BT_470M),
CicpColorPrimaries::Bt470Bg => Ok(ColorPrimaries::BT_470BG),
CicpColorPrimaries::Bt601 | CicpColorPrimaries::Smpte240 => Ok(ColorPrimaries::BT_601),
CicpColorPrimaries::GenericFilm => Ok(ColorPrimaries::GENERIC_FILM),
CicpColorPrimaries::Bt2020 => Ok(ColorPrimaries::BT_2020),
CicpColorPrimaries::Xyz => Ok(ColorPrimaries::XYZ),
// These two share primaries, but have distinct white points
CicpColorPrimaries::Smpte431 | CicpColorPrimaries::Smpte432 => {
Ok(ColorPrimaries::SMPTE_431)
}
CicpColorPrimaries::Ebu3213 => Ok(ColorPrimaries::EBU_3213),
}
}
}
impl TransferCharacteristics {
pub(crate) fn has_transfer_curve(self) -> bool {
self != Self::Reserved && self != Self::Unspecified
}
}
pub(crate) fn create_rec709_parametric() -> [f32; 5] {
const POW_EXP: f32 = 0.45;
const G: f32 = 1. / POW_EXP;
const B: f32 = (0.09929682680944f64 / 1.09929682680944f64) as f32;
const C: f32 = 1f32 / 4.5f32;
const D: f32 = (4.5f64 * 0.018053968510807f64) as f32;
const A: f32 = (1. / 1.09929682680944f64) as f32;
[G, A, B, C, D]
}
impl TryFrom<TransferCharacteristics> for ToneReprCurve {
type Error = CmsError;
/// See [ICC.1:2010](https://www.color.org/specification/ICC1v43_2010-12.pdf)
/// See [Rec. ITU-R BT.2100-2](https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-E.pdf)
fn try_from(value: TransferCharacteristics) -> Result<Self, Self::Error> {
const NUM_TRC_TABLE_ENTRIES: i32 = 1024;
Ok(match value {
TransferCharacteristics::Reserved => {
return Err(CmsError::UnsupportedTrc(value as u8));
}
TransferCharacteristics::Bt709
| TransferCharacteristics::Bt601
| TransferCharacteristics::Bt202010bit
| TransferCharacteristics::Bt202012bit => {
// The opto-electronic transfer characteristic function (OETF)
// as defined in ITU-T H.273 table 3, row 1:
//
// V = (α * Lc^0.45) (α 1) for 1 >= Lc >= β
// V = 4.500 * Lc for β > Lc >= 0
//
// Inverting gives the electro-optical transfer characteristic
// function (EOTF) which can be represented as ICC
// parametricCurveType with 4 parameters (ICC.1:2010 Table 5).
// Converting between the two (Lc ↔︎ Y, V ↔︎ X):
//
// Y = (a * X + b)^g for (X >= d)
// Y = c * X for (X < d)
//
// g, a, b, c, d can then be defined in terms of α and β:
//
// g = 1 / 0.45
// a = 1 / α
// b = 1 - α
// c = 1 / 4.500
// d = 4.500 * β
//
// α and β are determined by solving the piecewise equations to
// ensure continuity of both value and slope at the value β.
// We use the values specified for 10-bit systems in
// https://www.itu.int/rec/R-REC-BT.2020-2-201510-I Table 4
// since this results in the similar values as available ICC
// profiles after converting to s15Fixed16Number, providing us
// good test coverage.
ToneReprCurve::Parametric(create_rec709_parametric().to_vec())
}
TransferCharacteristics::Unspecified => {
return Err(CmsError::UnsupportedTrc(value as u8));
}
TransferCharacteristics::Bt470M => curve_from_gamma(2.2),
TransferCharacteristics::Bt470Bg => curve_from_gamma(2.8),
TransferCharacteristics::Smpte240 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, smpte240_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Linear => curve_from_gamma(1.),
TransferCharacteristics::Log100 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, log100_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Log100sqrt10 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, log100_sqrt10_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Iec61966 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, iec61966_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Bt1361 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, bt1361_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Srgb => {
ToneReprCurve::Parametric(vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045])
}
TransferCharacteristics::Smpte2084 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, pq_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Smpte428 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, smpte428_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Hlg => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, hlg_to_linear);
ToneReprCurve::Lut(table)
}
})
}
}
/// Matrix Coefficients Enum (from ISO/IEC 23091-4 / MPEG CICP)
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(C)]
pub enum MatrixCoefficients {
Identity = 0, // RGB (Identity matrix)
Bt709 = 1, // Rec. 709
Unspecified = 2, // Unspecified
Reserved = 3, // Reserved
Fcc = 4, // FCC
Bt470Bg = 5, // BT.470BG / BT.601-625
Smpte170m = 6, // SMPTE 170M / BT.601-525
Smpte240m = 7, // SMPTE 240M
YCgCo = 8, // YCgCo
Bt2020Ncl = 9, // BT.2020 (non-constant luminance)
Bt2020Cl = 10, // BT.2020 (constant luminance)
Smpte2085 = 11, // SMPTE ST 2085
ChromaticityDerivedNCL = 12, // Chromaticity-derived non-constant luminance
ChromaticityDerivedCL = 13, // Chromaticity-derived constant luminance
ICtCp = 14, // ICtCp
}
impl TryFrom<u8> for MatrixCoefficients {
type Error = CmsError;
fn try_from(value: u8) -> Result<Self, CmsError> {
match value {
0 => Ok(MatrixCoefficients::Identity),
1 => Ok(MatrixCoefficients::Bt709),
2 => Ok(MatrixCoefficients::Unspecified),
3 => Ok(MatrixCoefficients::Reserved),
4 => Ok(MatrixCoefficients::Fcc),
5 => Ok(MatrixCoefficients::Bt470Bg),
6 => Ok(MatrixCoefficients::Smpte170m),
7 => Ok(MatrixCoefficients::Smpte240m),
8 => Ok(MatrixCoefficients::YCgCo),
9 => Ok(MatrixCoefficients::Bt2020Ncl),
10 => Ok(MatrixCoefficients::Bt2020Cl),
11 => Ok(MatrixCoefficients::Smpte2085),
12 => Ok(MatrixCoefficients::ChromaticityDerivedNCL),
13 => Ok(MatrixCoefficients::ChromaticityDerivedCL),
14 => Ok(MatrixCoefficients::ICtCp),
_ => Err(CmsError::InvalidCicp),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::WHITE_POINT_D65;
#[test]
fn test_to_xyz_using_absolute_coordinates() {
let conversion_matrix = ColorPrimaries::BT_709.transform_to_xyz_d(WHITE_POINT_D65);
assert!((conversion_matrix.v[0][0] - 0.4121524015214193).abs() < 1e-14);
assert!((conversion_matrix.v[1][1] - 0.7153537403945436).abs() < 1e-14);
assert!((conversion_matrix.v[2][2] - 0.9497138466283235).abs() < 1e-14);
}
#[test]
fn test_to_xyz_using_absolute_coordinates_xyz() {
let conversion_matrix = ColorPrimaries::XYZ.transform_to_xyz_d(WHITE_POINT_D65);
assert!((conversion_matrix.v[0][0] - 0.95015469385536477).abs() < 1e-14);
assert!((conversion_matrix.v[1][1] - 1.0).abs() < 1e-14);
assert!((conversion_matrix.v[2][2] - 1.0882590676722474).abs() < 1e-14);
}
#[test]
fn test_to_xyz_using_absolute_coordinates_f() {
let conversion_matrix = ColorPrimaries::BT_709.transform_to_xyz(WHITE_POINT_D65);
assert!((conversion_matrix.v[0][0] - 0.4121524015214193).abs() < 1e-5);
assert!((conversion_matrix.v[1][1] - 0.7153537403945436).abs() < 1e-5);
assert!((conversion_matrix.v[2][2] - 0.9497138466283235).abs() < 1e-5);
}
}

View File

@@ -0,0 +1,237 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::cube::CubeAvxFma;
use crate::conversions::avx::interpolator::AvxVectorSse;
use crate::{CmsError, DataColorSpace, InPlaceStage, InterpolationMethod};
use std::arch::x86_64::*;
pub(crate) struct ACurves3AvxFma<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves3OptimizedAvxFma<'a> {
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves3InverseAvxFma<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl<const DEPTH: usize> ACurves3AvxFma<'_, DEPTH> {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
unsafe {
let scale_value = (DEPTH - 1) as f32;
for dst in dst.chunks_exact_mut(3) {
let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
let v = fetch(b0, b1, b2).v;
dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
}
}
Ok(())
}
}
impl ACurves3OptimizedAvxFma<'_> {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
unsafe {
for dst in dst.chunks_exact_mut(3) {
let a0 = dst[0];
let a1 = dst[1];
let a2 = dst[2];
let v = fetch(a0, a1, a2).v;
dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
}
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3AvxFma<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
unsafe {
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
}
Ok(())
}
}
impl InPlaceStage for ACurves3OptimizedAvxFma<'_> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
unsafe {
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
}
Ok(())
}
}
impl<const DEPTH: usize> ACurves3InverseAvxFma<'_, DEPTH> {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
unsafe {
let v_scale_value = _mm_set1_ps((DEPTH as u32 - 1u32) as f32);
for dst in dst.chunks_exact_mut(3) {
let mut v = fetch(dst[0], dst[1], dst[2]).v;
v = _mm_mul_ps(v, v_scale_value);
v = _mm_min_ps(v, v_scale_value);
v = _mm_max_ps(v, _mm_setzero_ps());
let c = _mm_cvtps_epi32(v);
let a0 = _mm_extract_epi32::<0>(c) as u16;
let a1 = _mm_extract_epi32::<1>(c) as u16;
let a2 = _mm_extract_epi32::<2>(c) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3InverseAvxFma<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
unsafe {
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,182 @@
// /*
// * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
// * //
// * // Redistribution and use in source and binary forms, with or without modification,
// * // are permitted provided that the following conditions are met:
// * //
// * // 1. Redistributions of source code must retain the above copyright notice, this
// * // list of conditions and the following disclaimer.
// * //
// * // 2. Redistributions in binary form must reproduce the above copyright notice,
// * // this list of conditions and the following disclaimer in the documentation
// * // and/or other materials provided with the distribution.
// * //
// * // 3. Neither the name of the copyright holder nor the names of its
// * // contributors may be used to endorse or promote products derived from
// * // this software without specific prior written permission.
// * //
// * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// */
use crate::conversions::avx::hypercube::HypercubeAvx;
use crate::conversions::avx::interpolator::AvxVectorSse;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Stage};
use std::arch::x86_64::*;
pub(crate) struct ACurves4x3AvxFma<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) curve3: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 4],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves4x3AvxFmaOptimized<'a> {
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 4],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl<const DEPTH: usize> ACurves4x3AvxFma<'_, DEPTH> {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> AvxVectorSse>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
assert_eq!(src.len() / 4, dst.len() / 3);
unsafe {
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let a0 = (src[0] * scale_value).round().min(scale_value) as u16;
let a1 = (src[1] * scale_value).round().min(scale_value) as u16;
let a2 = (src[2] * scale_value).round().min(scale_value) as u16;
let a3 = (src[3] * scale_value).round().min(scale_value) as u16;
let c = self.curve0[a0 as usize];
let m = self.curve1[a1 as usize];
let y = self.curve2[a2 as usize];
let k = self.curve3[a3 as usize];
let v = fetch(c, m, y, k).v;
dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
}
}
Ok(())
}
}
impl ACurves4x3AvxFmaOptimized<'_> {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> AvxVectorSse>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
assert_eq!(src.len() / 4, dst.len() / 3);
unsafe {
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let c = src[0];
let m = src[1];
let y = src[2];
let k = src[3];
let v = fetch(c, m, y, k).v;
dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
}
}
Ok(())
}
}
impl<const DEPTH: usize> Stage for ACurves4x3AvxFma<'_, DEPTH> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = HypercubeAvx::new(self.clut, self.grid_size, 3);
assert!(std::arch::is_x86_feature_detected!("avx2"));
assert!(std::arch::is_x86_feature_detected!("fma"));
unsafe {
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
}
Ok(())
}
}
impl Stage for ACurves4x3AvxFmaOptimized<'_> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = HypercubeAvx::new(self.clut, self.grid_size, 3);
assert!(std::arch::is_x86_feature_detected!("avx2"));
assert!(std::arch::is_x86_feature_detected!("fma"));
unsafe {
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,445 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::interpolator::AvxVectorSse;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use std::arch::x86_64::*;
use std::ops::{Add, Mul, Sub};
/// 3D CLUT NEON helper
///
/// Represents hexahedron.
pub(crate) struct CubeAvxFma<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
grid_size: [u8; 3],
}
struct HexahedronFetch3<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
}
trait CubeFetch<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
impl CubeFetch<AvxVectorSse> for HexahedronFetch3<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> AvxVectorSse {
let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32) as usize * 3;
unsafe {
let k = self.array.get_unchecked(start..);
let lo = _mm_loadu_si64(k.as_ptr() as *const _);
let hi = _mm_insert_epi32::<2>(
lo,
k.get_unchecked(2..).as_ptr().read_unaligned().to_bits() as i32,
);
AvxVectorSse {
v: _mm_castsi128_ps(hi),
}
}
}
}
impl<'a> CubeAvxFma<'a> {
pub(crate) fn new(arr: &'a [f32], grid: [u8; 3], components: usize) -> Self {
// This is safety precondition, array size must be not less than full grid size * components.
// Needs to ensure that it is not missed somewhere else
assert_eq!(
grid[0] as usize * grid[1] as usize * grid[2] as usize * components,
arr.len()
);
let y_stride = grid[1] as u32;
let x_stride = y_stride * grid[0] as u32;
CubeAvxFma {
array: arr,
x_stride,
y_stride,
grid_size: grid,
}
}
#[inline(always)]
fn trilinear<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyNegAdd<T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let x_d = T::from(lin_x * scale_x - x as f32);
let y_d = T::from(lin_y * scale_y - y as f32);
let z_d = T::from(lin_z * scale_z - z as f32);
let c000 = fetch.fetch(x, y, z);
let c100 = fetch.fetch(x_n, y, z);
let c010 = fetch.fetch(x, y_n, z);
let c110 = fetch.fetch(x_n, y_n, z);
let c001 = fetch.fetch(x, y, z_n);
let c101 = fetch.fetch(x_n, y, z_n);
let c011 = fetch.fetch(x, y_n, z_n);
let c111 = fetch.fetch(x_n, y_n, z_n);
let c00 = c000.neg_mla(c000, x_d).mla(c100, x_d);
let c10 = c010.neg_mla(c010, x_d).mla(c110, x_d);
let c01 = c001.neg_mla(c001, x_d).mla(c101, x_d);
let c11 = c011.neg_mla(c011, x_d).mla(c111, x_d);
let c0 = c00.neg_mla(c00, y_d).mla(c10, y_d);
let c1 = c01.neg_mla(c01, y_d).mla(c11, y_d);
c0.neg_mla(c0, z_d).mla(c1, z_d)
}
#[cfg(feature = "options")]
#[inline]
fn pyramid<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
if dr > db && dg > db {
let x0 = fetch.fetch(x_n, y_n, z_n);
let x1 = fetch.fetch(x_n, y_n, z);
let x2 = fetch.fetch(x_n, y, z);
let x3 = fetch.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y_n, z_n);
let x2 = fetch.fetch(x, y_n, z_n);
let x3 = fetch.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y, z);
let x2 = fetch.fetch(x_n, y, z_n);
let x3 = fetch.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
}
}
#[cfg(feature = "options")]
#[inline]
fn tetra<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let rx = lin_x * scale_x - x as f32;
let ry = lin_y * scale_y - y as f32;
let rz = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = fetch.fetch(x_n, y, z) - c0;
c2 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x_n, y, z);
c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = fetch.fetch(x_n, y, z) - c0;
c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n);
c3 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x, y, z_n);
c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n);
c3 = fetch.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x, y_n, z);
c2 = fetch.fetch(x, y_n, z) - c0;
c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n);
c2 = fetch.fetch(x, y_n, z) - c0;
c3 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n);
c2 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y, z_n);
c3 = fetch.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
s1.mla(c3, T::from(rz))
}
#[cfg(feature = "options")]
#[inline]
fn prism<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
if db >= dr {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y, z_n);
let x2 = fetch.fetch(x, y_n, z);
let x3 = fetch.fetch(x, y_n, z_n);
let x4 = fetch.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = fetch.fetch(x_n, y, z);
let x1 = fetch.fetch(x_n, y, z_n);
let x2 = fetch.fetch(x, y_n, z);
let x3 = fetch.fetch(x_n, y_n, z);
let x4 = fetch.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
}
}
#[inline]
pub(crate) fn trilinear_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> AvxVectorSse {
self.trilinear(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn prism_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> AvxVectorSse {
self.prism(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn pyramid_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> AvxVectorSse {
self.pyramid(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn tetra_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> AvxVectorSse {
self.tetra(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
}

View File

@@ -0,0 +1,644 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::interpolator::AvxVectorSse;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use crate::nd_array::lerp;
use std::arch::x86_64::*;
use std::ops::{Add, Mul, Sub};
/// 4D CLUT helper.
///
/// Represents hypercube.
pub(crate) struct HypercubeAvx<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
grid_size: [u8; 4],
}
trait Fetcher4<T> {
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> T;
}
struct Fetch4Vec3<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
}
impl Fetcher4<AvxVectorSse> for Fetch4Vec3<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> AvxVectorSse {
let start = (x as u32 * self.x_stride
+ y as u32 * self.y_stride
+ z as u32 * self.z_stride
+ w as u32) as usize
* 3;
unsafe {
let k = self.array.get_unchecked(start..);
let lo = _mm_loadu_si64(k.as_ptr() as *const _);
let hi = _mm_insert_epi32::<2>(
lo,
k.get_unchecked(2..).as_ptr().read_unaligned().to_bits() as i32,
);
AvxVectorSse {
v: _mm_castsi128_ps(hi),
}
}
}
}
impl<'a> HypercubeAvx<'a> {
pub(crate) fn new(arr: &'a [f32], grid: [u8; 4], components: usize) -> Self {
// This is safety precondition, array size must be not less than full grid size * components.
// Needs to ensure that it is not missed somewhere else
assert_eq!(
grid[0] as usize * grid[1] as usize * grid[2] as usize * grid[3] as usize * components,
arr.len()
);
let z_stride = grid[2] as u32;
let y_stride = z_stride * grid[1] as u32;
let x_stride = y_stride * grid[0] as u32;
HypercubeAvx {
array: arr,
x_stride,
y_stride,
z_stride,
grid_size: grid,
}
}
#[inline(always)]
fn quadlinear<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let x_d = T::from(lin_x * scale_x - x as f32);
let y_d = T::from(lin_y * scale_y - y as f32);
let z_d = T::from(lin_z * scale_z - z as f32);
let w_d = T::from(lin_w * scale_w - w as f32);
let r_x1 = lerp(r.fetch(x, y, z, w), r.fetch(x_n, y, z, w), x_d);
let r_x2 = lerp(r.fetch(x, y_n, z, w), r.fetch(x_n, y_n, z, w), x_d);
let r_y1 = lerp(r_x1, r_x2, y_d);
let r_x3 = lerp(r.fetch(x, y, z_n, w), r.fetch(x_n, y, z_n, w), x_d);
let r_x4 = lerp(r.fetch(x, y_n, z_n, w), r.fetch(x_n, y_n, z_n, w), x_d);
let r_y2 = lerp(r_x3, r_x4, y_d);
let r_z1 = lerp(r_y1, r_y2, z_d);
let r_x1 = lerp(r.fetch(x, y, z, w_n), r.fetch(x_n, y, z, w_n), x_d);
let r_x2 = lerp(r.fetch(x, y_n, z, w_n), r.fetch(x_n, y_n, z, w_n), x_d);
let r_y1 = lerp(r_x1, r_x2, y_d);
let r_x3 = lerp(r.fetch(x, y, z_n, w_n), r.fetch(x_n, y, z_n, w_n), x_d);
let r_x4 = lerp(r.fetch(x, y_n, z_n, w_n), r.fetch(x_n, y_n, z_n, w_n), x_d);
let r_y2 = lerp(r_x3, r_x4, y_d);
let r_z2 = lerp(r_y1, r_y2, z_d);
lerp(r_z1, r_z2, w_d)
}
#[inline(always)]
pub(crate) fn quadlinear_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> AvxVectorSse {
self.quadlinear(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn pyramid<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let dw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let w0 = if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n, w);
let x1 = r.fetch(x_n, y_n, z, w);
let x2 = r.fetch(x_n, y, z, w);
let x3 = r.fetch(x, y_n, z, w);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y_n, z_n, w);
let x2 = r.fetch(x, y_n, z_n, w);
let x3 = r.fetch(x, y_n, z, w);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y, z, w);
let x2 = r.fetch(x_n, y, z_n, w);
let x3 = r.fetch(x_n, y_n, z_n, w);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
};
let c0 = r.fetch(x, y, z, w_n);
let w1 = if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n, w_n);
let x1 = r.fetch(x_n, y_n, z, w_n);
let x2 = r.fetch(x_n, y, z, w_n);
let x3 = r.fetch(x, y_n, z, w_n);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y_n, z_n, w_n);
let x2 = r.fetch(x, y_n, z_n, w_n);
let x3 = r.fetch(x, y_n, z, w_n);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y, z, w_n);
let x2 = r.fetch(x_n, y, z_n, w_n);
let x3 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
};
w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
}
#[cfg(feature = "options")]
#[inline(always)]
pub(crate) fn pyramid_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> AvxVectorSse {
self.pyramid(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn prism<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let dw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let w0 = if db >= dr {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y, z_n, w);
let x2 = r.fetch(x, y_n, z, w);
let x3 = r.fetch(x, y_n, z_n, w);
let x4 = r.fetch(x_n, y_n, z_n, w);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z, w);
let x1 = r.fetch(x_n, y, z_n, w);
let x2 = r.fetch(x, y_n, z, w);
let x3 = r.fetch(x_n, y_n, z, w);
let x4 = r.fetch(x_n, y_n, z_n, w);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
};
let c0 = r.fetch(x, y, z, w_n);
let w1 = if db >= dr {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y, z_n, w_n);
let x2 = r.fetch(x, y_n, z, w_n);
let x3 = r.fetch(x, y_n, z_n, w_n);
let x4 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z, w_n);
let x1 = r.fetch(x_n, y, z_n, w_n);
let x2 = r.fetch(x, y_n, z, w_n);
let x3 = r.fetch(x_n, y_n, z, w_n);
let x4 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
};
w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
}
#[cfg(feature = "options")]
#[inline(always)]
pub(crate) fn prism_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> AvxVectorSse {
self.prism(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn tetra<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let rx = lin_x * scale_x - x as f32;
let ry = lin_y * scale_y - y as f32;
let rz = lin_z * scale_z - z as f32;
let rw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z, w) - c0;
c2 = r.fetch(x_n, y_n, z, w) - r.fetch(x_n, y, z, w);
c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z, w) - c0;
c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
c3 = r.fetch(x_n, y, z_n, w) - r.fetch(x_n, y, z, w);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n, w) - r.fetch(x, y, z_n, w);
c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
c3 = r.fetch(x, y, z_n, w) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z, w) - r.fetch(x, y_n, z, w);
c2 = r.fetch(x, y_n, z, w) - c0;
c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
c2 = r.fetch(x, y_n, z, w) - c0;
c3 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y_n, z, w);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
c2 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y, z_n, w);
c3 = r.fetch(x, y, z_n, w) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
let w0 = s1.mla(c3, T::from(rz));
let c0 = r.fetch(x, y, z, w_n);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z, w_n) - c0;
c2 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x_n, y, z, w_n);
c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z, w_n) - c0;
c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
c3 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x_n, y, z, w_n);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x, y, z_n, w_n);
c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
c3 = r.fetch(x, y, z_n, w_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x, y_n, z, w_n);
c2 = r.fetch(x, y_n, z, w_n) - c0;
c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
c2 = r.fetch(x, y_n, z, w_n) - c0;
c3 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y_n, z, w_n);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
c2 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y, z_n, w_n);
c3 = r.fetch(x, y, z_n, w_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
let w1 = s1.mla(c3, T::from(rz));
w0.neg_mla(w0, T::from(rw)).mla(w1, T::from(rw))
}
#[cfg(feature = "options")]
#[inline(always)]
pub(crate) fn tetra_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> AvxVectorSse {
self.tetra(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,327 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::avx::interpolator::*;
use crate::conversions::avx::interpolator_q0_15::AvxAlignedI16;
use crate::conversions::avx::lut4_to_3_q0_15::TransformLut4To3AvxQ0_15;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut4x3Factory;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
use std::marker::PhantomData;
struct TransformLut4To3Avx<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<SseAlignedF32>,
_phantom: PhantomData<T>,
_phantom1: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3Avx<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_chunk<'b, Interpolator: AvxMdInterpolationDouble<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) };
let max_value = ((1 << BIT_DEPTH) - 1u32).as_();
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
let t: f32 = k_weights.w;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let interpolator = Interpolator::new(table1, table2);
let v = interpolator.inter3_sse(c, m, y, &self.weights);
let (a0, b0) = (v.0.v, v.1.v);
if T::FINITE {
unsafe {
let t0 = _mm_set1_ps(t);
let hp = _mm_fnmadd_ps(a0, t0, a0);
let mut v = _mm_fmadd_ps(b0, t0, hp);
v = _mm_max_ps(v, _mm_setzero_ps());
v = _mm_mul_ps(v, value_scale);
v = _mm_min_ps(v, value_scale);
let jvz = _mm_cvtps_epi32(v);
let x = _mm_extract_epi32::<0>(jvz);
let y = _mm_extract_epi32::<1>(jvz);
let z = _mm_extract_epi32::<2>(jvz);
dst[cn.r_i()] = (x as u32).as_();
dst[cn.g_i()] = (y as u32).as_();
dst[cn.b_i()] = (z as u32).as_();
}
} else {
unsafe {
let t0 = _mm_set1_ps(t);
let hp = _mm_fnmadd_ps(a0, t0, a0);
let v = _mm_fmadd_ps(b0, t0, hp);
dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v) as u32).as_();
dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v) as u32).as_();
dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v) as u32).as_();
}
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3Avx<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearAvxFmaDouble<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralAvxFmaDouble<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidAvxFmaDouble<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticAvxFmaDouble<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearAvxFmaDouble<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}
pub(crate) struct AvxLut4x3Factory {}
impl Lut4x3Factory for AvxLut4x3Factory {
fn make_transform_4x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point && BIT_DEPTH < 16 {
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
AvxAlignedI16([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut4To3AvxQ0_15::<
T,
u8,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut4To3AvxQ0_15::<
T,
u16,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
}),
};
}
assert!(
std::arch::is_x86_feature_detected!("fma"),
"Internal configuration error, this might not be called without `fma` feature"
);
let lut = lut
.chunks_exact(3)
.map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => {
Box::new(
TransformLut4To3Avx::<T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
},
)
}
#[cfg(feature = "options")]
BarycentricWeightScale::High => {
Box::new(
TransformLut4To3Avx::<T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
},
)
}
}
}
}

View File

@@ -0,0 +1,207 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::avx::interpolator_q0_15::*;
use crate::conversions::interpolator::BarycentricWeight;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut4To3AvxQ0_15<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<AvxAlignedI16>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3AvxQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2")]
unsafe fn transform_chunk<'b, Interpolator: AvxMdInterpolationQ0_15Double<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
_mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
_mm_set1_epi16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
const Q: i16 = ((1i32 << 15) - 1) as i16;
let t: i16 = k_weights.w;
let t_n: i16 = Q - t;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let interpolator = Interpolator::new(table1, table2);
let v = interpolator.inter3_sse(c, m, y, &self.weights);
let (a0, b0) = (v.0.v, v.1.v);
let hp = _mm_mulhrs_epi16(_mm_set1_epi16(t_n), a0);
let v = _mm_add_epi16(hp, _mm_mulhrs_epi16(b0, _mm_set1_epi16(t)));
if T::FINITE {
let mut o = _mm_max_epi16(v, _mm_setzero_si128());
o = _mm_min_epi16(o, v_max_scale);
let x = _mm_extract_epi16::<0>(o);
let y = _mm_extract_epi16::<1>(o);
let z = _mm_extract_epi16::<2>(o);
dst[cn.r_i()] = (x as u32).as_();
dst[cn.g_i()] = (y as u32).as_();
dst[cn.b_i()] = (z as u32).as_();
} else {
let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v));
r = _mm_mul_ps(r, f_value_scale);
dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_();
dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_();
dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_();
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3AvxQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearAvxQ0_15Double<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralAvxQ0_15Double<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidAvxFmaQ0_15Double<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticAvxQ0_15Double<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearAvxQ0_15Double<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,53 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod a_curves3;
mod a_curves4x3;
mod cube;
mod hypercube;
mod interpolator;
mod interpolator_q0_15;
mod lut4_to_3;
mod lut4_to_3_q0_15;
mod preheat_lut4x3;
mod rgb_xyz;
mod rgb_xyz_opt;
mod rgb_xyz_q2_13;
mod rgb_xyz_q2_13_opt;
mod t_lut3_to_3;
mod t_lut3_to_3_q0_15;
pub(crate) use a_curves3::{ACurves3AvxFma, ACurves3InverseAvxFma, ACurves3OptimizedAvxFma};
pub(crate) use a_curves4x3::{ACurves4x3AvxFma, ACurves4x3AvxFmaOptimized};
pub(crate) use lut4_to_3::AvxLut4x3Factory;
pub(crate) use preheat_lut4x3::Lut4x3AvxFma;
pub(crate) use rgb_xyz::TransformShaperRgbAvx;
pub(crate) use rgb_xyz_opt::TransformShaperRgbOptAvx;
pub(crate) use rgb_xyz_q2_13::TransformShaperRgbQ2_13Avx;
pub(crate) use rgb_xyz_q2_13_opt::TransformShaperRgbQ2_13OptAvx;
pub(crate) use t_lut3_to_3::AvxLut3x3Factory;

View File

@@ -0,0 +1,135 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::hypercube::HypercubeAvx;
use crate::conversions::avx::interpolator::AvxVectorSse;
use crate::trc::{lut_interp_linear_float, lut_interp_linear_float_clamped};
use crate::{CmsError, DataColorSpace, InterpolationMethod, Stage};
use std::arch::x86_64::*;
#[derive(Default)]
pub(crate) struct Lut4x3AvxFma {
pub(crate) linearization: [Vec<f32>; 4],
pub(crate) clut: Vec<f32>,
pub(crate) grid_size: u8,
pub(crate) output: [Vec<f32>; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl Lut4x3AvxFma {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> AvxVectorSse>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let linearization_0 = &self.linearization[0];
let linearization_1 = &self.linearization[1];
let linearization_2 = &self.linearization[2];
let linearization_3 = &self.linearization[3];
unsafe {
let ones = _mm_set1_ps(1.);
for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(4)) {
debug_assert!(self.grid_size as i32 >= 1);
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let linear_w = lut_interp_linear_float(src[3], linearization_3);
let mut v = fetch(linear_x, linear_y, linear_z, linear_w).v;
v = _mm_max_ps(v, _mm_setzero_ps());
v = _mm_min_ps(v, ones);
let pcs_x = lut_interp_linear_float_clamped(
f32::from_bits(_mm_extract_ps::<0>(v) as u32),
&self.output[0],
);
let pcs_y = lut_interp_linear_float_clamped(
f32::from_bits(_mm_extract_ps::<1>(v) as u32),
&self.output[1],
);
let pcs_z = lut_interp_linear_float_clamped(
f32::from_bits(_mm_extract_ps::<2>(v) as u32),
&self.output[2],
);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
}
Ok(())
}
}
impl Stage for Lut4x3AvxFma {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let l_tbl = HypercubeAvx::new(
&self.clut,
[
self.grid_size,
self.grid_size,
self.grid_size,
self.grid_size,
],
3,
);
assert!(std::arch::is_x86_feature_detected!("avx2"));
assert!(std::arch::is_x86_feature_detected!("fma"));
unsafe {
// If Source PCS is LAB trilinear should be used
if self.pcs == DataColorSpace::Lab {
return self
.transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))?
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w))?
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,325 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaper;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
#[repr(align(32), C)]
#[derive(Debug)]
pub(crate) struct AvxAlignedU16(pub(crate) [u16; 16]);
pub(crate) struct TransformShaperRgbAvx<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformShaperRgbAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
#[inline(always)]
unsafe fn transform_impl<const FMA: bool>(
&self,
src: &[T],
dst: &mut [T],
) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = AvxAlignedU16([0; 16]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm256_setr_ps(
t.v[0][0], t.v[0][1], t.v[0][2], 0., t.v[0][0], t.v[0][1], t.v[0][2], 0.,
);
let m1 = _mm256_setr_ps(
t.v[1][0], t.v[1][1], t.v[1][2], 0., t.v[1][0], t.v[1][1], t.v[1][2], 0.,
);
let m2 = _mm256_setr_ps(
t.v[2][0], t.v[2][1], t.v[2][2], 0., t.v[2][0], t.v[2][1], t.v[2][2], 0.,
);
let zeros = _mm_setzero_ps();
let v_scale = _mm256_set1_ps(scale);
let mut src = src;
let mut dst = dst;
let mut src_iter = src.chunks_exact(src_channels * 2);
let dst_iter = dst.chunks_exact_mut(dst_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
if let Some(src) = src_iter.next() {
r0 = _mm_broadcast_ss(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.r_linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.g_linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.b_linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_ps();
g0 = _mm_setzero_ps();
b0 = _mm_setzero_ps();
a0 = max_colors;
r1 = _mm_setzero_ps();
g1 = _mm_setzero_ps();
b1 = _mm_setzero_ps();
a1 = max_colors;
}
for (src, dst) in src_iter.zip(dst_iter) {
let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let mut v = if FMA {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_fmadd_ps(g, m1, v0);
_mm256_fmadd_ps(b, m2, v1)
} else {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_mul_ps(g, m1);
let v2 = _mm256_mul_ps(b, m2);
_mm256_add_ps(_mm256_add_ps(v0, v1), v2)
};
v = _mm256_max_ps(v, _mm256_setzero_ps());
v = _mm256_mul_ps(v, v_scale);
v = _mm256_min_ps(v, v_scale);
let zx = _mm256_cvtps_epi32(v);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx);
r0 = _mm_broadcast_ss(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.r_linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.g_linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.b_linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() {
let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let mut v = if FMA {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_fmadd_ps(g, m1, v0);
_mm256_fmadd_ps(b, m2, v1)
} else {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_mul_ps(g, m1);
let v2 = _mm256_mul_ps(b, m2);
_mm256_add_ps(_mm256_add_ps(v0, v1), v2)
};
v = _mm256_max_ps(v, _mm256_setzero_ps());
v = _mm256_mul_ps(v, v_scale);
v = _mm256_min_ps(v, v_scale);
let zx = _mm256_cvtps_epi32(v);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
}
src = src.chunks_exact(src_channels * 2).remainder();
dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = _mm_broadcast_ss(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
let g = _mm_broadcast_ss(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
let b = _mm_broadcast_ss(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let mut v = if FMA {
let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0));
let v1 = _mm_fmadd_ps(g, _mm256_castps256_ps128(m1), v0);
_mm_fmadd_ps(b, _mm256_castps256_ps128(m2), v1)
} else {
let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0));
let v1 = _mm_mul_ps(g, _mm256_castps256_ps128(m1));
let v2 = _mm_mul_ps(b, _mm256_castps256_ps128(m2));
_mm_add_ps(_mm_add_ps(v0, v1), v2)
};
v = _mm_max_ps(v, zeros);
v = _mm_mul_ps(v, _mm256_castps256_ps128(v_scale));
v = _mm_min_ps(v, _mm256_castps256_ps128(v_scale));
let zx = _mm_cvtps_epi32(v);
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_fma(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl::<true>(src, dst) }
}
#[target_feature(enable = "avx2")]
unsafe fn transform_avx(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl::<false>(src, dst) }
}
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T> for TransformShaperRgbAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe {
if std::arch::is_x86_feature_detected!("fma") {
self.transform_fma(src, dst)
} else {
self.transform_avx(src, dst)
}
}
}
}

View File

@@ -0,0 +1,323 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaperOptimized;
use crate::conversions::avx::rgb_xyz::AvxAlignedU16;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbOptAvx<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformShaperRgbOptAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
#[inline(always)]
unsafe fn transform_impl<const FMA: bool>(
&self,
src: &[T],
dst: &mut [T],
) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = AvxAlignedU16([0; 16]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm256_setr_ps(
t.v[0][0], t.v[0][1], t.v[0][2], 0., t.v[0][0], t.v[0][1], t.v[0][2], 0.,
);
let m1 = _mm256_setr_ps(
t.v[1][0], t.v[1][1], t.v[1][2], 0., t.v[1][0], t.v[1][1], t.v[1][2], 0.,
);
let m2 = _mm256_setr_ps(
t.v[2][0], t.v[2][1], t.v[2][2], 0., t.v[2][0], t.v[2][1], t.v[2][2], 0.,
);
let zeros = _mm_setzero_ps();
let v_scale = _mm256_set1_ps(scale);
let mut src = src;
let mut dst = dst;
let mut src_iter = src.chunks_exact(src_channels * 2);
let dst_iter = dst.chunks_exact_mut(dst_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
if let Some(src) = src_iter.next() {
r0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_ps();
g0 = _mm_setzero_ps();
b0 = _mm_setzero_ps();
a0 = max_colors;
r1 = _mm_setzero_ps();
g1 = _mm_setzero_ps();
b1 = _mm_setzero_ps();
a1 = max_colors;
}
for (src, dst) in src_iter.zip(dst_iter) {
let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let mut v = if FMA {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_fmadd_ps(g, m1, v0);
_mm256_fmadd_ps(b, m2, v1)
} else {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_mul_ps(g, m1);
let v2 = _mm256_mul_ps(b, m2);
_mm256_add_ps(_mm256_add_ps(v0, v1), v2)
};
v = _mm256_max_ps(v, _mm256_setzero_ps());
v = _mm256_mul_ps(v, v_scale);
v = _mm256_min_ps(v, v_scale);
let zx = _mm256_cvtps_epi32(v);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx);
r0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() {
let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let mut v = if FMA {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_fmadd_ps(g, m1, v0);
_mm256_fmadd_ps(b, m2, v1)
} else {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_mul_ps(g, m1);
let v2 = _mm256_mul_ps(b, m2);
_mm256_add_ps(_mm256_add_ps(v0, v1), v2)
};
v = _mm256_max_ps(v, _mm256_setzero_ps());
v = _mm256_mul_ps(v, v_scale);
v = _mm256_min_ps(v, v_scale);
let zx = _mm256_cvtps_epi32(v);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
}
src = src.chunks_exact(src_channels * 2).remainder();
dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = _mm_broadcast_ss(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
let g = _mm_broadcast_ss(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
let b = _mm_broadcast_ss(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let mut v = if FMA {
let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0));
let v1 = _mm_fmadd_ps(g, _mm256_castps256_ps128(m1), v0);
_mm_fmadd_ps(b, _mm256_castps256_ps128(m2), v1)
} else {
let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0));
let v1 = _mm_mul_ps(g, _mm256_castps256_ps128(m1));
let v2 = _mm_mul_ps(b, _mm256_castps256_ps128(m2));
_mm_add_ps(_mm_add_ps(v0, v1), v2)
};
v = _mm_max_ps(v, zeros);
v = _mm_mul_ps(v, _mm256_castps256_ps128(v_scale));
v = _mm_min_ps(v, _mm256_castps256_ps128(v_scale));
let zx = _mm_cvtps_epi32(v);
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_fma(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl::<true>(src, dst) }
}
#[target_feature(enable = "avx2")]
unsafe fn transform_avx(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl::<false>(src, dst) }
}
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T>
for TransformShaperRgbOptAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe {
if std::arch::is_x86_feature_detected!("fma") {
self.transform_fma(src, dst)
} else {
self.transform_avx(src, dst)
}
}
}
}

View File

@@ -0,0 +1,304 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::rgb_xyz::AvxAlignedU16;
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPoint;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbQ2_13Avx<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPoint<i32, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
#[inline(always)]
pub(crate) unsafe fn _xmm_broadcast_epi32(f: &i32) -> __m128i {
let float_ref: &f32 = unsafe { &*(f as *const i32 as *const f32) };
unsafe { _mm_castps_si128(_mm_broadcast_ss(float_ref)) }
}
impl<
T: Copy + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformShaperRgbQ2_13Avx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "avx2")]
unsafe fn transform_avx2(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = AvxAlignedU16([0; 16]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm256_setr_epi16(
t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0, t.v[0][0],
t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0,
);
let m2 = _mm256_setr_epi16(
t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0, t.v[2][0], 1, t.v[2][1], 1,
t.v[2][2], 1, 0, 0,
);
let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes();
let rnd = _mm256_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]]));
let zeros = _mm256_setzero_si256();
let v_max_value = _mm256_set1_epi32(GAMMA_LUT as i32 - 1);
let mut src = src;
let mut dst = dst;
let mut src_iter = src.chunks_exact(src_channels * 2);
let dst_iter = dst.chunks_exact_mut(dst_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
if let Some(src) = src_iter.next() {
r0 = _xmm_broadcast_epi32(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.r_linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.g_linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.b_linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_si128();
g0 = _mm_setzero_si128();
b0 = _mm_setzero_si128();
a0 = max_colors;
r1 = _mm_setzero_si128();
g1 = _mm_setzero_si128();
b1 = _mm_setzero_si128();
a1 = max_colors;
}
for (src, dst) in src_iter.zip(dst_iter) {
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
r0 = _xmm_broadcast_epi32(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.r_linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.g_linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.b_linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() {
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
}
src = src.chunks_exact(src_channels * 2).remainder();
dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = _xmm_broadcast_epi32(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
let mut g =
_xmm_broadcast_epi32(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
let b = _xmm_broadcast_epi32(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
g = _mm_slli_epi32::<16>(g);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let zrg0 = _mm_or_si128(r, g);
let zbz0 = _mm_or_si128(b, _mm256_castsi256_si128(rnd));
let v0 = _mm_madd_epi16(zrg0, _mm256_castsi256_si128(m0));
let v1 = _mm_madd_epi16(zbz0, _mm256_castsi256_si128(m2));
let mut v = _mm_add_epi32(v0, v1);
v = _mm_srai_epi32::<PRECISION>(v);
v = _mm_max_epi32(v, _mm_setzero_si128());
v = _mm_min_epi32(v, _mm256_castsi256_si128(v_max_value));
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, v);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperRgbQ2_13Avx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_avx2(src, dst) }
}
}

View File

@@ -0,0 +1,298 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::rgb_xyz::AvxAlignedU16;
use crate::conversions::avx::rgb_xyz_q2_13::_xmm_broadcast_epi32;
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbQ2_13OptAvx<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i32, i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Copy + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformShaperRgbQ2_13OptAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "avx2")]
unsafe fn transform_avx2(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = AvxAlignedU16([0; 16]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm256_setr_epi16(
t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0, t.v[0][0],
t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0,
);
let m2 = _mm256_setr_epi16(
t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0, t.v[2][0], 1, t.v[2][1], 1,
t.v[2][2], 1, 0, 0,
);
let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes();
let rnd = _mm256_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]]));
let zeros = _mm256_setzero_si256();
let v_max_value = _mm256_set1_epi32(GAMMA_LUT as i32 - 1);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let mut src_iter = src.chunks_exact(src_channels * 2);
if let Some(src0) = src_iter.next() {
r0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_si128();
g0 = _mm_setzero_si128();
b0 = _mm_setzero_si128();
a0 = max_colors;
r1 = _mm_setzero_si128();
g1 = _mm_setzero_si128();
b1 = _mm_setzero_si128();
a1 = max_colors;
}
for (src, dst) in src_iter.zip(dst.chunks_exact_mut(dst_channels * 2)) {
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
r0 = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() {
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
}
let src = src.chunks_exact(src_channels * 2).remainder();
let dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
let mut g =
_xmm_broadcast_epi32(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
let b = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
g = _mm_slli_epi32::<16>(g);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let zrg0 = _mm_or_si128(r, g);
let zbz0 = _mm_or_si128(b, _mm256_castsi256_si128(rnd));
let v0 = _mm_madd_epi16(zrg0, _mm256_castsi256_si128(m0));
let v1 = _mm_madd_epi16(zbz0, _mm256_castsi256_si128(m2));
let mut v = _mm_add_epi32(v0, v1);
v = _mm_srai_epi32::<PRECISION>(v);
v = _mm_max_epi32(v, _mm_setzero_si128());
v = _mm_min_epi32(v, _mm256_castsi256_si128(v_max_value));
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, v);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperRgbQ2_13OptAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_avx2(src, dst) }
}
}

View File

@@ -0,0 +1,344 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::avx::interpolator::*;
use crate::conversions::avx::interpolator_q0_15::AvxAlignedI16;
use crate::conversions::avx::t_lut3_to_3_q0_15::TransformLut3x3AvxQ0_15;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut3x3Factory;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
use std::marker::PhantomData;
struct TransformLut3x3AvxFma<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<SseAlignedF32>,
_phantom: PhantomData<T>,
_phantom2: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut3x3AvxFma<T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_chunk<'b, Interpolator: AvxMdInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) };
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_sse(x, y, z, &self.weights);
if T::FINITE {
unsafe {
let mut r = _mm_mul_ps(v.v, value_scale);
r = _mm_max_ps(r, _mm_setzero_ps());
r = _mm_min_ps(r, value_scale);
let jvz = _mm_cvtps_epi32(r);
let x = _mm_extract_epi32::<0>(jvz);
let y = _mm_extract_epi32::<1>(jvz);
let z = _mm_extract_epi32::<2>(jvz);
dst[dst_cn.r_i()] = (x as u32).as_();
dst[dst_cn.g_i()] = (y as u32).as_();
dst[dst_cn.b_i()] = (z as u32).as_();
}
} else {
unsafe {
dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v.v) as u32).as_();
dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v.v) as u32).as_();
dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v.v) as u32).as_();
}
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3AvxFma<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearAvxFma<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralAvxFma<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalAvxFma<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticAvxFma<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearAvxFma<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}
pub(crate) struct AvxLut3x3Factory {}
impl Lut3x3Factory for AvxLut3x3Factory {
fn make_transform_3x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point && BIT_DEPTH < 16 {
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
AvxAlignedI16([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3AvxQ0_15::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3AvxQ0_15::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
};
}
assert!(
std::arch::is_x86_feature_detected!("fma"),
"Internal configuration error, this might not be called without `fma` feature"
);
let lut = lut
.chunks_exact(3)
.map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3AvxFma::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3AvxFma::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
}
}
}

View File

@@ -0,0 +1,222 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::avx::interpolator_q0_15::*;
use crate::conversions::interpolator::BarycentricWeight;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut3x3AvxQ0_15<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<AvxAlignedI16>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom2: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
>
TransformLut3x3AvxQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2")]
unsafe fn transform_chunk<'b, Interpolator: AvxMdInterpolationQ0_15<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
_mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
_mm_set1_epi16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_sse(x, y, z, &self.weights);
if T::FINITE {
let mut o = _mm_max_epi16(v.v, _mm_setzero_si128());
o = _mm_min_epi16(o, v_max_scale);
let x = _mm_extract_epi16::<0>(o);
let y = _mm_extract_epi16::<1>(o);
let z = _mm_extract_epi16::<2>(o);
dst[dst_cn.r_i()] = (x as u32).as_();
dst[dst_cn.g_i()] = (y as u32).as_();
dst[dst_cn.b_i()] = (z as u32).as_();
} else {
let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v.v));
r = _mm_mul_ps(r, f_value_scale);
dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_();
dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_();
dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_();
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3AvxQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearAvxQ0_15<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralAvxQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalAvxQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticAvxQ0_15<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearAvxQ0_15<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,33 @@
/*
* // Copyright (c) Radzivon Bartoshyk 5/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod rgb_xyz_opt;
mod rgb_xyz_q2_13_opt;
pub(crate) use rgb_xyz_opt::TransformShaperRgbOptAvx512;
pub(crate) use rgb_xyz_q2_13_opt::TransformShaperRgbQ2_13OptAvx512;

View File

@@ -0,0 +1,420 @@
/*
* // Copyright (c) Radzivon Bartoshyk 5/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaperOptimized;
use crate::conversions::avx512::rgb_xyz_q2_13_opt::{
AvxAlignedU16, split_by_twos, split_by_twos_mut,
};
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbOptAvx512<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformShaperRgbOptAvx512<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "avx512bw", enable = "avx512vl", enable = "fma")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
let mut temporary0 = AvxAlignedU16([0; 16]);
let mut temporary1 = AvxAlignedU16([0; 16]);
unsafe {
let m0 = _mm256_setr_ps(
t.v[0][0], t.v[0][1], t.v[0][2], 0f32, t.v[0][0], t.v[0][1], t.v[0][2], 0f32,
);
let m1 = _mm256_setr_ps(
t.v[1][0], t.v[1][1], t.v[1][2], 0f32, t.v[1][0], t.v[1][1], t.v[1][2], 0f32,
);
let m2 = _mm256_setr_ps(
t.v[2][0], t.v[2][1], t.v[2][2], 0f32, t.v[2][0], t.v[2][1], t.v[2][2], 0f32,
);
let zeros = _mm_setzero_ps();
let v_scale = _mm256_set1_ps(scale);
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
r0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()],
);
r2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]);
g2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]);
b2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]);
r3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()],
);
g3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()],
);
b3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_ps();
g0 = _mm_setzero_ps();
b0 = _mm_setzero_ps();
a0 = max_colors;
r1 = _mm_setzero_ps();
g1 = _mm_setzero_ps();
b1 = _mm_setzero_ps();
a1 = max_colors;
r2 = _mm_setzero_ps();
g2 = _mm_setzero_ps();
b2 = _mm_setzero_ps();
a2 = max_colors;
r3 = _mm_setzero_ps();
g3 = _mm_setzero_ps();
b3 = _mm_setzero_ps();
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let rz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let gz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let bz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let rz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r2), r3);
let gz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g2), g3);
let bz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b2), b3);
let v0 = _mm256_mul_ps(rz0, m0);
let v1 = _mm256_fmadd_ps(gz0, m1, v0);
let mut vz0 = _mm256_fmadd_ps(bz0, m2, v1);
let v2 = _mm256_mul_ps(rz1, m0);
let v3 = _mm256_fmadd_ps(gz1, m1, v2);
let mut vz1 = _mm256_fmadd_ps(bz1, m2, v3);
vz0 = _mm256_max_ps(vz0, _mm256_setzero_ps());
vz0 = _mm256_mul_ps(vz0, v_scale);
vz0 = _mm256_min_ps(vz0, v_scale);
vz1 = _mm256_max_ps(vz1, _mm256_setzero_ps());
vz1 = _mm256_mul_ps(vz1, v_scale);
vz1 = _mm256_min_ps(vz1, v_scale);
let zx0 = _mm256_cvtps_epi32(vz0);
let zx1 = _mm256_cvtps_epi32(vz1);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx0);
_mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, zx1);
r0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()],
);
r2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]);
g2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]);
b2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]);
r3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()],
);
g3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()],
);
b3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()],
);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary0.0[8] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary0.0[10] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[8] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[10] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[12] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let rz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let gz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let bz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let rz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r2), r3);
let gz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g2), g3);
let bz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b2), b3);
let v0 = _mm256_mul_ps(rz0, m0);
let v1 = _mm256_fmadd_ps(gz0, m1, v0);
let mut vz0 = _mm256_fmadd_ps(bz0, m2, v1);
let v2 = _mm256_mul_ps(rz1, m0);
let v3 = _mm256_fmadd_ps(gz1, m1, v2);
let mut vz1 = _mm256_fmadd_ps(bz1, m2, v3);
vz0 = _mm256_max_ps(vz0, _mm256_setzero_ps());
vz0 = _mm256_mul_ps(vz0, v_scale);
vz0 = _mm256_min_ps(vz0, v_scale);
vz1 = _mm256_max_ps(vz1, _mm256_setzero_ps());
vz1 = _mm256_mul_ps(vz1, v_scale);
vz1 = _mm256_min_ps(vz1, v_scale);
let zx0 = _mm256_cvtps_epi32(vz0);
let zx1 = _mm256_cvtps_epi32(vz1);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx0);
_mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, zx1);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary0.0[8] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary0.0[10] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[8] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[10] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[12] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let r = _mm_broadcast_ss(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
let g = _mm_broadcast_ss(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
let b = _mm_broadcast_ss(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0));
let v1 = _mm_fmadd_ps(g, _mm256_castps256_ps128(m1), v0);
let mut v = _mm_fmadd_ps(b, _mm256_castps256_ps128(m2), v1);
v = _mm_max_ps(v, zeros);
v = _mm_mul_ps(v, _mm256_castps256_ps128(v_scale));
v = _mm_min_ps(v, _mm256_castps256_ps128(v_scale));
let zx = _mm_cvtps_epi32(v);
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T>
for TransformShaperRgbOptAvx512<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,476 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbQ2_13OptAvx512<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i32, i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
#[inline(always)]
pub(crate) unsafe fn _xmm_broadcast_epi32(f: &i32) -> __m128i {
let float_ref: &f32 = unsafe { &*(f as *const i32 as *const f32) };
unsafe { _mm_castps_si128(_mm_broadcast_ss(float_ref)) }
}
#[repr(align(32), C)]
#[derive(Debug)]
pub(crate) struct AvxAlignedU16(pub(crate) [u16; 16]);
#[inline]
pub(crate) fn split_by_twos<T: Copy>(data: &[T], channels: usize) -> (&[T], &[T]) {
let len = data.len() / (channels * 4);
let split_point = len * 4;
data.split_at(split_point * channels)
}
#[inline]
pub(crate) fn split_by_twos_mut<T: Copy>(data: &mut [T], channels: usize) -> (&mut [T], &mut [T]) {
let len = data.len() / (channels * 4);
let split_point = len * 4;
data.split_at_mut(split_point * channels)
}
impl<
T: Copy + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformShaperRgbQ2_13OptAvx512<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "avx512bw", enable = "avx512vl")]
unsafe fn transform_avx512(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors = ((1 << self.bit_depth) - 1).as_();
// If precision changed in another place it should be also changed here
assert_eq!(PRECISION, 13);
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
let mut temporary0 = AvxAlignedU16([0; 16]);
let mut temporary1 = AvxAlignedU16([0; 16]);
unsafe {
let m0 = _mm256_set_epi16(
0, 0, t.v[1][2], t.v[0][2], t.v[1][1], t.v[0][1], t.v[1][0], t.v[0][0], 0, 0,
t.v[1][2], t.v[0][2], t.v[1][1], t.v[0][1], t.v[1][0], t.v[0][0],
);
let m2 = _mm256_set_epi16(
0, 0, 1, t.v[2][2], 1, t.v[2][1], 1, t.v[2][0], 0, 0, 1, t.v[2][2], 1, t.v[2][1],
1, t.v[2][0],
);
let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes();
let rnd = _mm256_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]]));
let zeros = _mm256_setzero_si256();
let v_max_value = _mm256_set1_epi32(GAMMA_LUT as i32 - 1);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
r0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()],
);
r2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]);
g2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]);
b2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]);
r3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()],
);
g3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()],
);
b3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_si128();
g0 = _mm_setzero_si128();
b0 = _mm_setzero_si128();
a0 = max_colors;
r1 = _mm_setzero_si128();
g1 = _mm_setzero_si128();
b1 = _mm_setzero_si128();
a1 = max_colors;
r2 = _mm_setzero_si128();
g2 = _mm_setzero_si128();
b2 = _mm_setzero_si128();
a2 = max_colors;
r3 = _mm_setzero_si128();
g3 = _mm_setzero_si128();
b3 = _mm_setzero_si128();
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zr1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r2), r3);
let mut zg1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g2), g3);
let zb1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b2), b3);
zg1 = _mm256_slli_epi32::<16>(zg1);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let zrg1 = _mm256_or_si256(zr1, zg1);
let zbz1 = _mm256_or_si256(zb1, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let va2 = _mm256_madd_epi16(zrg1, m0);
let va3 = _mm256_madd_epi16(zbz1, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
let mut v1 = _mm256_add_epi32(va2, va3);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
v1 = _mm256_srai_epi32::<PRECISION>(v1);
v1 = _mm256_max_epi32(v1, zeros);
v1 = _mm256_min_epi32(v1, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
_mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, v1);
r0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()],
);
r2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]);
g2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]);
b2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]);
r3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()],
);
g3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()],
);
b3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()],
);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary0.0[8] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary0.0[10] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[8] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[10] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[12] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zr1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r2), r3);
let mut zg1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g2), g3);
let zb1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b2), b3);
zg1 = _mm256_slli_epi32::<16>(zg1);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let zrg1 = _mm256_or_si256(zr1, zg1);
let zbz1 = _mm256_or_si256(zb1, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let va2 = _mm256_madd_epi16(zrg1, m0);
let va3 = _mm256_madd_epi16(zbz1, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
let mut v1 = _mm256_add_epi32(va2, va3);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
v1 = _mm256_srai_epi32::<PRECISION>(v1);
v1 = _mm256_max_epi32(v1, zeros);
v1 = _mm256_min_epi32(v1, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
_mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, v1);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary0.0[8] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary0.0[10] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[8] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[10] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[12] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let r = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
let mut g =
_xmm_broadcast_epi32(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
let b = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
g = _mm_slli_epi32::<16>(g);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let zrg0 = _mm_or_si128(r, g);
let zbz0 = _mm_or_si128(b, _mm256_castsi256_si128(rnd));
let v0 = _mm_madd_epi16(zrg0, _mm256_castsi256_si128(m0));
let v1 = _mm_madd_epi16(zbz0, _mm256_castsi256_si128(m2));
let mut v = _mm_add_epi32(v0, v1);
v = _mm_srai_epi32::<PRECISION>(v);
v = _mm_max_epi32(v, _mm_setzero_si128());
v = _mm_min_epi32(v, _mm256_castsi256_si128(v_max_value));
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, v);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperRgbQ2_13OptAvx512<
T,
SRC_LAYOUT,
DST_LAYOUT,
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_avx512(src, dst) }
}
}

121
vendor/moxcms/src/conversions/bpc.rs vendored Normal file
View File

@@ -0,0 +1,121 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// use crate::conversions::interpolator::{MultidimensionalInterpolation, Tetrahedral};
// use crate::conversions::transform_lut4_to_4::{NonFiniteVector3fLerp, Vector3fCmykLerp};
// use crate::mlaf::mlaf;
// use crate::{Chromaticity, ColorProfile, DataColorSpace, Lab, Xyz};
//
// impl ColorProfile {
// #[inline]
// pub(crate) fn detect_black_point<const GRID_SIZE: usize>(&self, lut: &[f32]) -> Option<Xyz> {
// if self.color_space == DataColorSpace::Cmyk {
// // if let Some(mut bp) = self.black_point {
// // if let Some(wp) = self.media_white_point.map(|x| x.normalize()) {
// // if wp != Chromaticity::D50.to_xyz() {
// // let ad = adaption_matrix(wp, Chromaticity::D50.to_xyz());
// // let v = ad.mul_vector(bp.to_vector());
// // bp = Xyz {
// // x: v.v[0],
// // y: v.v[1],
// // z: v.v[2],
// // };
// // }
// // }
// // let mut lab = Lab::from_xyz(bp);
// // lab.a = 0.;
// // lab.b = 0.;
// // if lab.l > 50. {
// // lab.l = 50.;
// // }
// // bp = lab.to_xyz();
// // return Some(bp);
// // }
// let c = 65535;
// let m = 65535;
// let y = 65535;
// let k = 65535;
//
// let linear_k: f32 = k as f32 * (1. / 65535.);
// let w: i32 = k * (GRID_SIZE as i32 - 1) / 65535;
// let w_n: i32 = (w + 1).min(GRID_SIZE as i32 - 1);
// let t: f32 = linear_k * (GRID_SIZE as i32 - 1) as f32 - w as f32;
//
// let grid_size = GRID_SIZE as i32;
// let grid_size3 = grid_size * grid_size * grid_size;
//
// let table1 = &lut[(w * grid_size3 * 3) as usize..];
// let table2 = &lut[(w_n * grid_size3 * 3) as usize..];
//
// let tetrahedral1 = Tetrahedral::<GRID_SIZE>::new(table1);
// let tetrahedral2 = Tetrahedral::<GRID_SIZE>::new(table2);
// let r1 = tetrahedral1.inter3(c, m, y);
// let r2 = tetrahedral2.inter3(c, m, y);
// let r = NonFiniteVector3fLerp::interpolate(r1, r2, t, 1.0);
//
// let mut lab = Lab::from_xyz(Xyz {
// x: r.v[0],
// y: r.v[1],
// z: r.v[2],
// });
// lab.a = 0.;
// lab.b = 0.;
// if lab.l > 50. {
// lab.l = 50.;
// }
// let bp = lab.to_xyz();
//
// return Some(bp);
// }
// if self.color_space == DataColorSpace::Rgb {
// return Some(Xyz::new(0.0, 0.0, 0.0));
// }
// None
// }
// }
//
// pub(crate) fn compensate_bpc_in_lut(lut_xyz: &mut [f32], src_bp: Xyz, dst_bp: Xyz) {
// const WP_50: Xyz = Chromaticity::D50.to_xyz();
// let tx = src_bp.x - WP_50.x;
// let ty = src_bp.y - WP_50.y;
// let tz = src_bp.z - WP_50.z;
// let ax = (dst_bp.x - WP_50.x) / tx;
// let ay = (dst_bp.y - WP_50.y) / ty;
// let az = (dst_bp.z - WP_50.z) / tz;
//
// let bx = -WP_50.x * (dst_bp.x - src_bp.x) / tx;
// let by = -WP_50.y * (dst_bp.y - src_bp.y) / ty;
// let bz = -WP_50.z * (dst_bp.z - src_bp.z) / tz;
//
// for dst in lut_xyz.chunks_exact_mut(3) {
// dst[0] = mlaf(bx, dst[0], ax);
// dst[1] = mlaf(by, dst[1], ay);
// dst[2] = mlaf(bz, dst[2], az);
// }
// }

View File

@@ -0,0 +1,416 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[derive(Clone)]
struct TransformGray2RgbFusedExecutor<
T,
const SRC_LAYOUT: u8,
const DEST_LAYOUT: u8,
const BUCKET: usize,
> {
fused_gamma: Box<[T; BUCKET]>,
bit_depth: usize,
}
pub(crate) fn make_gray_to_x<
T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync,
const BUCKET: usize,
>(
src_layout: Layout,
dst_layout: Layout,
gray_linear: &[f32; BUCKET],
gray_gamma: &[T; 65536],
bit_depth: usize,
gamma_lut: usize,
) -> Result<Box<dyn TransformExecutor<T> + Sync + Send>, CmsError>
where
u32: AsPrimitive<T>,
{
if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha {
return Err(CmsError::UnsupportedProfileConnection);
}
let mut fused_gamma = Box::new([T::default(); BUCKET]);
let max_lut_size = (gamma_lut - 1) as f32;
for (&src, dst) in gray_linear.iter().zip(fused_gamma.iter_mut()) {
let possible_value = ((src * max_lut_size).round() as u32).min(max_lut_size as u32) as u16;
*dst = gray_gamma[possible_value as usize];
}
match src_layout {
Layout::Gray => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgb as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
_ => unreachable!(),
},
Layout::GrayAlpha => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::GrayAlpha as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
_ => unreachable!(),
},
_ => Err(CmsError::UnsupportedProfileConnection),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const BUCKET: usize,
> TransformExecutor<T> for TransformGray2RgbFusedExecutor<T, SRC_LAYOUT, DST_LAYOUT, BUCKET>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let is_gray_alpha = src_cn == Layout::GrayAlpha;
let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let g = self.fused_gamma[src[0]._as_usize()];
let a = if is_gray_alpha { src[1] } else { max_value };
dst[0] = g;
if dst_cn == Layout::GrayAlpha {
dst[1] = a;
} else if dst_cn == Layout::Rgb {
dst[1] = g;
dst[2] = g;
} else if dst_cn == Layout::Rgba {
dst[1] = g;
dst[2] = g;
dst[3] = a;
}
}
Ok(())
}
}
#[derive(Clone)]
struct TransformGrayToRgbExecutor<
T,
const SRC_LAYOUT: u8,
const DEST_LAYOUT: u8,
const BUCKET: usize,
> {
gray_linear: Box<[f32; BUCKET]>,
red_gamma: Box<[T; 65536]>,
green_gamma: Box<[T; 65536]>,
blue_gamma: Box<[T; 65536]>,
bit_depth: usize,
gamma_lut: usize,
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn make_gray_to_unfused<
T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync,
const BUCKET: usize,
>(
src_layout: Layout,
dst_layout: Layout,
gray_linear: Box<[f32; BUCKET]>,
red_gamma: Box<[T; 65536]>,
green_gamma: Box<[T; 65536]>,
blue_gamma: Box<[T; 65536]>,
bit_depth: usize,
gamma_lut: usize,
) -> Result<Box<dyn TransformExecutor<T> + Sync + Send>, CmsError>
where
u32: AsPrimitive<T>,
{
if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha {
return Err(CmsError::UnsupportedProfileConnection);
}
if dst_layout != Layout::Rgb && dst_layout != Layout::Rgba {
return Err(CmsError::UnsupportedProfileConnection);
}
match src_layout {
Layout::Gray => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgb as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::Rgba => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::Gray => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
_ => Err(CmsError::UnsupportedProfileConnection),
},
Layout::GrayAlpha => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::Rgba => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::Gray => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::GrayAlpha as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
_ => Err(CmsError::UnsupportedProfileConnection),
},
_ => Err(CmsError::UnsupportedProfileConnection),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const BUCKET: usize,
> TransformExecutor<T> for TransformGrayToRgbExecutor<T, SRC_LAYOUT, DST_LAYOUT, BUCKET>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let is_gray_alpha = src_cn == Layout::GrayAlpha;
let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_();
let max_lut_size = (self.gamma_lut - 1) as f32;
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let g = self.gray_linear[src[0]._as_usize()];
let a = if is_gray_alpha { src[1] } else { max_value };
let possible_value = ((g * max_lut_size).round() as u16) as usize;
let red_value = self.red_gamma[possible_value];
let green_value = self.green_gamma[possible_value];
let blue_value = self.blue_gamma[possible_value];
if dst_cn == Layout::Rgb {
dst[0] = red_value;
dst[1] = green_value;
dst[2] = blue_value;
} else if dst_cn == Layout::Rgba {
dst[0] = red_value;
dst[1] = green_value;
dst[2] = blue_value;
dst[3] = a;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
}
Ok(())
}
}

View File

@@ -0,0 +1,383 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::transform::PointeeSizeExpressible;
use crate::trc::ToneCurveEvaluator;
use crate::{CmsError, Layout, Rgb, TransformExecutor};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
struct TransformGrayOneToOneExecutor<T, const SRC_LAYOUT: u8, const DEST_LAYOUT: u8> {
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
_phantom: PhantomData<T>,
bit_depth: usize,
}
pub(crate) fn make_gray_to_one_trc_extended<
T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync + AsPrimitive<f32>,
>(
src_layout: Layout,
dst_layout: Layout,
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
bit_depth: usize,
) -> Result<Box<dyn TransformExecutor<T> + Sync + Send>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha {
return Err(CmsError::UnsupportedProfileConnection);
}
match src_layout {
Layout::Gray => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgb as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
_ => unreachable!(),
},
Layout::GrayAlpha => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::GrayAlpha as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
_ => unreachable!(),
},
_ => Err(CmsError::UnsupportedProfileConnection),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static + AsPrimitive<f32>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
> TransformExecutor<T> for TransformGrayOneToOneExecutor<T, SRC_LAYOUT, DST_LAYOUT>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let is_gray_alpha = src_cn == Layout::GrayAlpha;
let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let linear_value = self.linear_eval.evaluate_value(src[0].as_());
let g = self.gamma_eval.evaluate_value(linear_value).as_();
let a = if is_gray_alpha { src[1] } else { max_value };
dst[0] = g;
if dst_cn == Layout::GrayAlpha {
dst[1] = a;
} else if dst_cn == Layout::Rgb {
dst[1] = g;
dst[2] = g;
} else if dst_cn == Layout::Rgba {
dst[1] = g;
dst[2] = g;
dst[3] = a;
}
}
Ok(())
}
}
struct TransformGrayToRgbExtendedExecutor<T, const SRC_LAYOUT: u8, const DEST_LAYOUT: u8> {
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
_phantom: PhantomData<T>,
bit_depth: usize,
}
pub(crate) fn make_gray_to_rgb_extended<
T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync + AsPrimitive<f32>,
>(
src_layout: Layout,
dst_layout: Layout,
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
bit_depth: usize,
) -> Result<Box<dyn TransformExecutor<T> + Sync + Send>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha {
return Err(CmsError::UnsupportedProfileConnection);
}
if dst_layout != Layout::Rgb && dst_layout != Layout::Rgba {
return Err(CmsError::UnsupportedProfileConnection);
}
match src_layout {
Layout::Gray => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgb as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
_ => Err(CmsError::UnsupportedProfileConnection),
},
Layout::GrayAlpha => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::GrayAlpha as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
_ => Err(CmsError::UnsupportedProfileConnection),
},
_ => Err(CmsError::UnsupportedProfileConnection),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static + AsPrimitive<f32>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
> TransformExecutor<T> for TransformGrayToRgbExtendedExecutor<T, SRC_LAYOUT, DST_LAYOUT>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let is_gray_alpha = src_cn == Layout::GrayAlpha;
let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let linear_value = self.linear_eval.evaluate_value(src[0].as_());
let a = if is_gray_alpha { src[1] } else { max_value };
let tristimulus = self.gamma_eval.evaluate_tristimulus(Rgb::new(
linear_value,
linear_value,
linear_value,
));
let red_value = tristimulus.r.as_();
let green_value = tristimulus.g.as_();
let blue_value = tristimulus.b.as_();
if dst_cn == Layout::Rgb {
dst[0] = red_value;
dst[1] = green_value;
dst[2] = blue_value;
} else if dst_cn == Layout::Rgba {
dst[0] = red_value;
dst[1] = green_value;
dst[2] = blue_value;
dst[3] = a;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
}
Ok(())
}
}

View File

@@ -0,0 +1,645 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(dead_code)]
use crate::conversions::lut_transforms::LUT_SAMPLING;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use crate::{Vector3f, Vector4f};
use num_traits::AsPrimitive;
use std::ops::{Add, Mul, Sub};
#[cfg(feature = "options")]
pub(crate) struct Tetrahedral<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [f32],
}
#[cfg(feature = "options")]
pub(crate) struct Pyramidal<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [f32],
}
#[cfg(feature = "options")]
pub(crate) struct Prismatic<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [f32],
}
pub(crate) struct Trilinear<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [f32],
}
#[derive(Debug, Copy, Clone, Default)]
pub(crate) struct BarycentricWeight<V> {
pub x: i32,
pub x_n: i32,
pub w: V,
}
impl BarycentricWeight<f32> {
pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<f32>; 256]>
{
let mut weights = Box::new([BarycentricWeight::default(); 256]);
for (index, weight) in weights.iter_mut().enumerate() {
const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
let dr = index as f32 * scale - x as f32;
*weight = BarycentricWeight { x, x_n, w: dr };
}
weights
}
#[cfg(feature = "options")]
pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
-> Box<[BarycentricWeight<f32>; 65536]> {
let mut weights = Box::new([BarycentricWeight::<f32>::default(); 65536]);
let b_scale: f32 = 1.0 / (BINS - 1) as f32;
for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
let dr = index as f32 * scale - x as f32;
*weight = BarycentricWeight { x, x_n, w: dr };
}
weights
}
}
#[allow(dead_code)]
impl BarycentricWeight<i16> {
pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<i16>; 256]>
{
let mut weights = Box::new([BarycentricWeight::default(); 256]);
for (index, weight) in weights.iter_mut().enumerate() {
const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
const Q: f32 = ((1i32 << 15) - 1) as f32;
let dr = ((index as f32 * scale - x as f32) * Q)
.round()
.min(i16::MAX as f32)
.max(-i16::MAX as f32) as i16;
*weight = BarycentricWeight { x, x_n, w: dr };
}
weights
}
#[cfg(feature = "options")]
pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
-> Box<[BarycentricWeight<i16>; 65536]> {
let mut weights = Box::new([BarycentricWeight::<i16>::default(); 65536]);
let b_scale: f32 = 1.0 / (BINS - 1) as f32;
for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
const Q: f32 = ((1i32 << 15) - 1) as f32;
let dr = ((index as f32 * scale - x as f32) * Q)
.round()
.min(i16::MAX as f32)
.max(-i16::MAX as f32) as i16;
*weight = BarycentricWeight { x, x_n, w: dr };
}
weights
}
}
trait Fetcher<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
struct TetrahedralFetchVector3f<'a, const GRID_SIZE: usize> {
cube: &'a [f32],
}
pub(crate) trait MultidimensionalInterpolation<'a, const GRID_SIZE: usize> {
fn new(table: &'a [f32]) -> Self;
fn inter3<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> Vector3f;
fn inter4<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> Vector4f;
}
impl<const GRID_SIZE: usize> Fetcher<Vector3f> for TetrahedralFetchVector3f<'_, GRID_SIZE> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> Vector3f {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize
* 3;
let jx = &self.cube[offset..offset + 3];
Vector3f {
v: [jx[0], jx[1], jx[2]],
}
}
}
struct TetrahedralFetchVector4f<'a, const GRID_SIZE: usize> {
cube: &'a [f32],
}
impl<const GRID_SIZE: usize> Fetcher<Vector4f> for TetrahedralFetchVector4f<'_, GRID_SIZE> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> Vector4f {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize
* 4;
let jx = &self.cube[offset..offset + 4];
Vector4f {
v: [jx[0], jx[1], jx[2], jx[3]],
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> Tetrahedral<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<
T: Copy
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Mul<f32, Output = T>
+ Add<T, Output = T>
+ From<f32>
+ FusedMultiplyAdd<T>,
U: AsPrimitive<usize>,
const BINS: usize,
>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<T>,
) -> T {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
s1.mla(c3, T::from(rz))
}
}
macro_rules! define_md_inter {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> MultidimensionalInterpolation<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
fn new(table: &'a [f32]) -> Self {
Self { cube: table }
}
fn inter3<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> Vector3f {
self.interpolate::<Vector3f, U, BINS>(
in_r,
in_g,
in_b,
lut,
TetrahedralFetchVector3f::<GRID_SIZE> { cube: self.cube },
)
}
fn inter4<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> Vector4f {
self.interpolate::<Vector4f, U, BINS>(
in_r,
in_g,
in_b,
lut,
TetrahedralFetchVector4f::<GRID_SIZE> { cube: self.cube },
)
}
}
};
}
#[cfg(feature = "options")]
define_md_inter!(Tetrahedral);
#[cfg(feature = "options")]
define_md_inter!(Pyramidal);
#[cfg(feature = "options")]
define_md_inter!(Prismatic);
define_md_inter!(Trilinear);
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> Pyramidal<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<
T: Copy
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Mul<f32, Output = T>
+ Add<T, Output = T>
+ From<f32>
+ FusedMultiplyAdd<T>,
U: AsPrimitive<usize>,
const BINS: usize,
>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<T>,
) -> T {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> Prismatic<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<
T: Copy
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Mul<f32, Output = T>
+ Add<T, Output = T>
+ From<f32>
+ FusedMultiplyAdd<T>,
U: AsPrimitive<usize>,
const BINS: usize,
>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<T>,
) -> T {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if db >= dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x, y_n, z_n);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x_n, y_n, z);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
}
}
}
impl<const GRID_SIZE: usize> Trilinear<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<
T: Copy
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Mul<f32, Output = T>
+ Add<T, Output = T>
+ From<f32>
+ FusedMultiplyAdd<T>
+ FusedMultiplyNegAdd<T>,
U: AsPrimitive<usize>,
const BINS: usize,
>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<T>,
) -> T {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = T::from(dr);
let w1 = T::from(dg);
let w2 = T::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = T::from(dr);
let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
let dy = T::from(dg);
let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
let dz = T::from(db);
c0.neg_mla(c0, dz).mla(c1, w2)
}
}
pub(crate) trait LutBarycentricReduction<T, U> {
fn reduce<const SRC_BP: usize, const BINS: usize>(v: T) -> U;
}
impl LutBarycentricReduction<u8, u8> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: u8) -> u8 {
v
}
}
impl LutBarycentricReduction<u8, u16> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: u8) -> u16 {
if BINS == 65536 {
return u16::from_ne_bytes([v, v]);
}
if BINS == 16384 {
return u16::from_ne_bytes([v, v]) >> 2;
}
unimplemented!()
}
}
impl LutBarycentricReduction<f32, u8> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: f32) -> u8 {
(v * 255.).round().min(255.).max(0.) as u8
}
}
impl LutBarycentricReduction<f32, u16> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: f32) -> u16 {
let scale = (BINS - 1) as f32;
(v * scale).round().min(scale).max(0.) as u16
}
}
impl LutBarycentricReduction<f64, u8> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: f64) -> u8 {
(v * 255.).round().min(255.).max(0.) as u8
}
}
impl LutBarycentricReduction<f64, u16> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: f64) -> u16 {
let scale = (BINS - 1) as f64;
(v * scale).round().min(scale).max(0.) as u16
}
}
impl LutBarycentricReduction<u16, u16> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: u16) -> u16 {
let src_scale = 1. / ((1 << SRC_BP) - 1) as f32;
let scale = src_scale * (BINS - 1) as f32;
(v as f32 * scale).round().min(scale).max(0.) as u16
}
}
impl LutBarycentricReduction<u16, u8> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: u16) -> u8 {
let shift = SRC_BP as u16 - 8;
if SRC_BP == 16 {
(v >> 8) as u8
} else {
(v >> shift).min(255) as u8
}
}
}

View File

@@ -0,0 +1,118 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaPostFinalizationStage;
use crate::{CmsError, DataColorSpace, Layout, PointeeSizeExpressible};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) struct InjectAlphaStage<I> {
pub(crate) dst_layout: Layout,
pub(crate) target_color_space: DataColorSpace,
pub(crate) _phantom: PhantomData<I>,
pub(crate) bit_depth: usize,
}
pub(crate) struct CopyAlphaStage<I> {
pub(crate) src_layout: Layout,
pub(crate) dst_layout: Layout,
pub(crate) target_color_space: DataColorSpace,
pub(crate) _phantom: PhantomData<I>,
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaPostFinalizationStage<T> for InjectAlphaStage<T>
where
f32: AsPrimitive<T>,
{
fn finalize(&self, _: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let norm_value: T = (if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
})
.as_();
if self.dst_layout == Layout::Rgba && self.target_color_space == DataColorSpace::Rgb {
for dst in dst.chunks_exact_mut(self.dst_layout.channels()) {
dst[3] = norm_value;
}
} else if self.dst_layout == Layout::GrayAlpha
&& self.target_color_space == DataColorSpace::Gray
{
for dst in dst.chunks_exact_mut(self.dst_layout.channels()) {
dst[1] = norm_value;
}
}
Ok(())
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaPostFinalizationStage<T> for CopyAlphaStage<T>
where
f32: AsPrimitive<T>,
{
fn finalize(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
if self.dst_layout == Layout::Rgba && self.target_color_space == DataColorSpace::Rgb {
if self.src_layout == Layout::Rgba {
for (src, dst) in src
.chunks_exact(self.src_layout.channels())
.zip(dst.chunks_exact_mut(self.dst_layout.channels()))
{
dst[3] = src[3];
}
} else if self.src_layout == Layout::GrayAlpha {
for (src, dst) in src
.chunks_exact(self.src_layout.channels())
.zip(dst.chunks_exact_mut(self.dst_layout.channels()))
{
dst[3] = src[1];
}
}
} else if self.dst_layout == Layout::GrayAlpha
&& self.target_color_space == DataColorSpace::Gray
{
if self.src_layout == Layout::Rgba {
for (src, dst) in src
.chunks_exact(self.src_layout.channels())
.zip(dst.chunks_exact_mut(self.dst_layout.channels()))
{
dst[1] = src[3];
}
} else if self.src_layout == Layout::GrayAlpha {
for (src, dst) in src
.chunks_exact(self.src_layout.channels())
.zip(dst.chunks_exact_mut(self.dst_layout.channels()))
{
dst[1] = src[1];
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,483 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::{KatanaFinalStage, KatanaInitialStage};
use crate::mlaf::mlaf;
use crate::safe_math::SafeMul;
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, Cube, DataColorSpace, InterpolationMethod, LutMultidimensionalType, MalformedSize,
Matrix3d, Matrix3f, PointeeSizeExpressible, TransformOptions, Vector3d, Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
pub(crate) enum MultidimensionalDirection {
DeviceToPcs,
PcsToDevice,
}
struct Multidimensional3x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
> {
a_curves: Option<Box<[Vec<f32>; 3]>>,
m_curves: Option<Box<[Vec<f32>; 3]>>,
b_curves: Option<Box<[Vec<f32>; 3]>>,
clut: Option<Vec<f32>>,
matrix: Matrix3f,
bias: Vector3f,
direction: MultidimensionalDirection,
options: TransformOptions,
pcs: DataColorSpace,
grid_size: [u8; 3],
_phantom: PhantomData<T>,
bit_depth: usize,
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
Multidimensional3x3<T>
{
fn execute_matrix_stage(&self, dst: &mut [f32]) {
let m = self.matrix;
let b = self.bias;
if !m.test_equality(Matrix3f::IDENTITY) || !b.eq(&Vector3f::default()) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(mlaf(b.v[0], x, m.v[0][0]), y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(mlaf(b.v[1], x, m.v[1][0]), y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(mlaf(b.v[2], x, m.v[2][0]), y, m.v[2][1]), z, m.v[2][2]);
}
}
}
fn execute_simple_curves(&self, dst: &mut [f32], curves: &[Vec<f32>; 3]) {
let curve0 = &curves[0];
let curve1 = &curves[1];
let curve2 = &curves[2];
for dst in dst.chunks_exact_mut(3) {
let a0 = dst[0];
let a1 = dst[1];
let a2 = dst[2];
let b0 = lut_interp_linear_float(a0, curve0);
let b1 = lut_interp_linear_float(a1, curve1);
let b2 = lut_interp_linear_float(a2, curve2);
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
}
fn to_pcs_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
input: &[T],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let norm_value = if T::FINITE {
1.0 / ((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
assert_eq!(
self.direction,
MultidimensionalDirection::DeviceToPcs,
"PCS to device cannot be used on `to pcs` stage"
);
// A -> B
// OR B - A A - curves stage
if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) {
if !clut.is_empty() {
let curve0 = &a_curves[0];
let curve1 = &a_curves[1];
let curve2 = &a_curves[2];
for (src, dst) in input.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
let b0 = lut_interp_linear_float(src[0].as_() * norm_value, curve0);
let b1 = lut_interp_linear_float(src[1].as_() * norm_value, curve1);
let b2 = lut_interp_linear_float(src[2].as_() * norm_value, curve2);
let interpolated = fetch(b0, b1, b2);
dst[0] = interpolated.v[0];
dst[1] = interpolated.v[1];
dst[2] = interpolated.v[2];
}
} else {
for (src, dst) in input.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
dst[0] = src[0].as_() * norm_value;
dst[1] = src[1].as_() * norm_value;
dst[2] = src[2].as_() * norm_value;
}
}
} else {
for (src, dst) in input.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
dst[0] = src[0].as_() * norm_value;
dst[1] = src[1].as_() * norm_value;
dst[2] = src[2].as_() * norm_value;
}
}
// Matrix stage
if let Some(m_curves) = self.m_curves.as_ref() {
self.execute_simple_curves(dst, m_curves);
self.execute_matrix_stage(dst);
}
// B-curves is mandatory
if let Some(b_curves) = &self.b_curves.as_ref() {
self.execute_simple_curves(dst, b_curves);
}
Ok(())
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaInitialStage<f32, T> for Multidimensional3x3<T>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let fixed_new_clut = Vec::new();
let new_clut = self.clut.as_ref().unwrap_or(&fixed_new_clut);
let lut = Cube::new_cube(new_clut, self.grid_size);
let mut new_dst = vec![0f32; input.len()];
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
return Ok(new_dst);
}
match self.options.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(new_dst)
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
Multidimensional3x3<T>
where
f32: AsPrimitive<T>,
{
fn to_output_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
src: &mut [f32],
dst: &mut [T],
fetch: Fetch,
) -> Result<(), CmsError> {
let norm_value = if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
assert_eq!(
self.direction,
MultidimensionalDirection::PcsToDevice,
"Device to PCS cannot be used on `to output` stage"
);
if let Some(b_curves) = &self.b_curves.as_ref() {
self.execute_simple_curves(src, b_curves);
}
// Matrix stage
if let Some(m_curves) = self.m_curves.as_ref() {
self.execute_matrix_stage(src);
self.execute_simple_curves(src, m_curves);
}
if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) {
if !clut.is_empty() {
let curve0 = &a_curves[0];
let curve1 = &a_curves[1];
let curve2 = &a_curves[2];
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
let b0 = lut_interp_linear_float(src[0], curve0);
let b1 = lut_interp_linear_float(src[1], curve1);
let b2 = lut_interp_linear_float(src[2], curve2);
let interpolated = fetch(b0, b1, b2);
if T::FINITE {
dst[0] = (interpolated.v[0] * norm_value)
.round()
.max(0.0)
.min(norm_value)
.as_();
dst[1] = (interpolated.v[1] * norm_value)
.round()
.max(0.0)
.min(norm_value)
.as_();
dst[2] = (interpolated.v[2] * norm_value)
.round()
.max(0.0)
.min(norm_value)
.as_();
} else {
dst[0] = interpolated.v[0].as_();
dst[1] = interpolated.v[1].as_();
dst[2] = interpolated.v[2].as_();
}
}
} else {
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
if T::FINITE {
dst[0] = (src[0] * norm_value).round().max(0.0).min(norm_value).as_();
dst[1] = (src[1] * norm_value).round().max(0.0).min(norm_value).as_();
dst[2] = (src[2] * norm_value).round().max(0.0).min(norm_value).as_();
} else {
dst[0] = src[0].as_();
dst[1] = src[1].as_();
dst[2] = src[2].as_();
}
}
}
} else {
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
if T::FINITE {
dst[0] = (src[0] * norm_value).round().max(0.0).min(norm_value).as_();
dst[1] = (src[1] * norm_value).round().max(0.0).min(norm_value).as_();
dst[2] = (src[2] * norm_value).round().max(0.0).min(norm_value).as_();
} else {
dst[0] = src[0].as_();
dst[1] = src[1].as_();
dst[2] = src[2].as_();
}
}
}
Ok(())
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaFinalStage<f32, T> for Multidimensional3x3<T>
where
f32: AsPrimitive<T>,
{
fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if src.len() != dst.len() {
return Err(CmsError::LaneSizeMismatch);
}
let fixed_new_clut = Vec::new();
let new_clut = self.clut.as_ref().unwrap_or(&fixed_new_clut);
let lut = Cube::new_cube(new_clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.to_output_impl(src, dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.options.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_output_impl(src, dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_output_impl(src, dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_output_impl(src, dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.to_output_impl(src, dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
fn make_multidimensional_3x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
direction: MultidimensionalDirection,
bit_depth: usize,
) -> Result<Multidimensional3x3<T>, CmsError> {
if mab.num_input_channels != 3 && mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.b_curves.is_empty() || mab.b_curves.len() != 3 {
return Err(CmsError::InvalidAtoBLut);
}
let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]];
let clut: Option<Vec<f32>> = if mab.a_curves.len() == 3 && mab.clut.is_some() {
let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
Some(clut)
} else {
None
};
let a_curves: Option<Box<[Vec<f32>; 3]>> = if mab.a_curves.len() == 3 && mab.clut.is_some() {
let mut arr = Box::<[Vec<f32>; 3]>::default();
for (a_curve, dst) in mab.a_curves.iter().zip(arr.iter_mut()) {
*dst = a_curve.to_clut()?;
}
Some(arr)
} else {
None
};
let b_curves: Option<Box<[Vec<f32>; 3]>> = if mab.b_curves.len() == 3 {
let mut arr = Box::<[Vec<f32>; 3]>::default();
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if all_curves_linear {
None
} else {
for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) {
*dst = c_curve.to_clut()?;
}
Some(arr)
}
} else {
return Err(CmsError::InvalidAtoBLut);
};
let matrix = mab.matrix.to_f32();
let m_curves: Option<Box<[Vec<f32>; 3]>> = if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let mut arr = Box::<[Vec<f32>; 3]>::default();
for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) {
*dst = curve.to_clut()?;
}
Some(arr)
} else {
None
}
} else {
None
};
let bias = mab.bias.cast();
let transform = Multidimensional3x3::<T> {
a_curves,
b_curves,
m_curves,
matrix,
direction,
options,
clut,
pcs,
grid_size,
bias,
_phantom: PhantomData,
bit_depth,
};
Ok(transform)
}
pub(crate) fn multi_dimensional_3x3_to_pcs<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
let transform = make_multidimensional_3x3::<T>(
mab,
options,
pcs,
MultidimensionalDirection::DeviceToPcs,
bit_depth,
)?;
Ok(Box::new(transform))
}
pub(crate) fn multi_dimensional_3x3_to_device<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaFinalStage<f32, T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
{
let transform = make_multidimensional_3x3::<T>(
mab,
options,
pcs,
MultidimensionalDirection::PcsToDevice,
bit_depth,
)?;
Ok(Box::new(transform))
}

View File

@@ -0,0 +1,321 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaInitialStage;
use crate::conversions::katana::md3x3::MultidimensionalDirection;
use crate::mlaf::mlaf;
use crate::safe_math::SafeMul;
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, DataColorSpace, Hypercube, InterpolationMethod, LutMultidimensionalType,
MalformedSize, Matrix3d, Matrix3f, PointeeSizeExpressible, TransformOptions, Vector3d,
Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) fn execute_simple_curves3(dst: &mut [f32], curves: &[Vec<f32>; 3]) {
let curve0 = &curves[0];
let curve1 = &curves[1];
let curve2 = &curves[2];
for dst in dst.chunks_exact_mut(3) {
let a0 = dst[0];
let a1 = dst[1];
let a2 = dst[2];
let b0 = lut_interp_linear_float(a0, curve0);
let b1 = lut_interp_linear_float(a1, curve1);
let b2 = lut_interp_linear_float(a2, curve2);
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
}
pub(crate) fn execute_matrix_stage3(matrix: Matrix3f, bias: Vector3f, dst: &mut [f32]) {
let m = matrix;
let b = bias;
if !m.test_equality(Matrix3f::IDENTITY) || !b.eq(&Vector3f::default()) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(mlaf(b.v[0], x, m.v[0][0]), y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(mlaf(b.v[1], x, m.v[1][0]), y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(mlaf(b.v[2], x, m.v[2][0]), y, m.v[2][1]), z, m.v[2][2]);
}
}
}
struct Multidimensional4x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
> {
a_curves: Option<Box<[Vec<f32>; 4]>>,
m_curves: Option<Box<[Vec<f32>; 3]>>,
b_curves: Option<Box<[Vec<f32>; 3]>>,
clut: Option<Vec<f32>>,
matrix: Matrix3f,
bias: Vector3f,
direction: MultidimensionalDirection,
options: TransformOptions,
pcs: DataColorSpace,
grid_size: [u8; 4],
_phantom: PhantomData<T>,
bit_depth: usize,
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
Multidimensional4x3<T>
{
fn to_pcs_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
&self,
input: &[T],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let norm_value = if T::FINITE {
1.0 / ((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
assert_eq!(
self.direction,
MultidimensionalDirection::DeviceToPcs,
"PCS to device cannot be used on `to pcs` stage"
);
// A -> B
// OR B - A A - curves stage
if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) {
if !clut.is_empty() {
let curve0 = &a_curves[0];
let curve1 = &a_curves[1];
let curve2 = &a_curves[2];
let curve3 = &a_curves[3];
for (src, dst) in input.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let b0 = lut_interp_linear_float(src[0].as_() * norm_value, curve0);
let b1 = lut_interp_linear_float(src[1].as_() * norm_value, curve1);
let b2 = lut_interp_linear_float(src[2].as_() * norm_value, curve2);
let b3 = lut_interp_linear_float(src[3].as_() * norm_value, curve3);
let interpolated = fetch(b0, b1, b2, b3);
dst[0] = interpolated.v[0];
dst[1] = interpolated.v[1];
dst[2] = interpolated.v[2];
}
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
// Matrix stage
if let Some(m_curves) = self.m_curves.as_ref() {
execute_simple_curves3(dst, m_curves);
execute_matrix_stage3(self.matrix, self.bias, dst);
}
// B-curves is mandatory
if let Some(b_curves) = &self.b_curves.as_ref() {
execute_simple_curves3(dst, b_curves);
}
Ok(())
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaInitialStage<f32, T> for Multidimensional4x3<T>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let fixed_new_clut = Vec::new();
let new_clut = self.clut.as_ref().unwrap_or(&fixed_new_clut);
let lut = Hypercube::new_hypercube(new_clut, self.grid_size);
let mut new_dst = vec![0f32; (input.len() / 4) * 3];
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| {
lut.quadlinear_vec3(x, y, z, w)
})?;
return Ok(new_dst);
}
match self.options.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| {
lut.pyramid_vec3(x, y, z, w)
})?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| {
lut.quadlinear_vec3(x, y, z, w)
})?;
}
}
Ok(new_dst)
}
}
fn make_multidimensional_4x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
direction: MultidimensionalDirection,
bit_depth: usize,
) -> Result<Multidimensional4x3<T>, CmsError> {
if mab.num_input_channels != 4 && mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.b_curves.is_empty() || mab.b_curves.len() != 3 {
return Err(CmsError::InvalidAtoBLut);
}
let grid_size = [
mab.grid_points[0],
mab.grid_points[1],
mab.grid_points[2],
mab.grid_points[3],
];
let clut: Option<Vec<f32>> = if mab.a_curves.len() == 4 && mab.clut.is_some() {
let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.grid_points[3] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
Some(clut)
} else {
return Err(CmsError::InvalidAtoBLut);
};
let a_curves: Option<Box<[Vec<f32>; 4]>> = if mab.a_curves.len() == 4 && mab.clut.is_some() {
let mut arr = Box::<[Vec<f32>; 4]>::default();
for (a_curve, dst) in mab.a_curves.iter().zip(arr.iter_mut()) {
*dst = a_curve.to_clut()?;
}
Some(arr)
} else {
None
};
let b_curves: Option<Box<[Vec<f32>; 3]>> = if mab.b_curves.len() == 3 {
let mut arr = Box::<[Vec<f32>; 3]>::default();
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if all_curves_linear {
None
} else {
for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) {
*dst = c_curve.to_clut()?;
}
Some(arr)
}
} else {
return Err(CmsError::InvalidAtoBLut);
};
let matrix = mab.matrix.to_f32();
let m_curves: Option<Box<[Vec<f32>; 3]>> = if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let mut arr = Box::<[Vec<f32>; 3]>::default();
for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) {
*dst = curve.to_clut()?;
}
Some(arr)
} else {
None
}
} else {
None
};
let bias = mab.bias.cast();
let transform = Multidimensional4x3::<T> {
a_curves,
b_curves,
m_curves,
matrix,
direction,
options,
clut,
pcs,
grid_size,
bias,
_phantom: PhantomData,
bit_depth,
};
Ok(transform)
}
pub(crate) fn multi_dimensional_4x3_to_pcs<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
let transform = make_multidimensional_4x3::<T>(
mab,
options,
pcs,
MultidimensionalDirection::DeviceToPcs,
bit_depth,
)?;
Ok(Box::new(transform))
}

View File

@@ -0,0 +1,284 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaFinalStage;
use crate::conversions::katana::md3x3::MultidimensionalDirection;
use crate::conversions::katana::md4x3::{execute_matrix_stage3, execute_simple_curves3};
use crate::conversions::md_lut::{MultidimensionalLut, tetra_3i_to_any_vec};
use crate::safe_math::SafeMul;
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, DataColorSpace, Layout, LutMultidimensionalType, MalformedSize, Matrix3d, Matrix3f,
PointeeSizeExpressible, TransformOptions, Vector3d, Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
struct Multidimensional3xN<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
> {
a_curves: Option<Vec<Vec<f32>>>,
m_curves: Option<Box<[Vec<f32>; 3]>>,
b_curves: Option<Box<[Vec<f32>; 3]>>,
clut: Option<Vec<f32>>,
matrix: Matrix3f,
bias: Vector3f,
direction: MultidimensionalDirection,
grid_size: [u8; 16],
output_inks: usize,
_phantom: PhantomData<T>,
dst_layout: Layout,
bit_depth: usize,
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
Multidimensional3xN<T>
where
f32: AsPrimitive<T>,
{
fn to_output_impl(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
let norm_value = if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
assert_eq!(
self.direction,
MultidimensionalDirection::PcsToDevice,
"PCS to device cannot be used on `to pcs` stage"
);
// B-curves is mandatory
if let Some(b_curves) = &self.b_curves.as_ref() {
execute_simple_curves3(src, b_curves);
}
// Matrix stage
if let Some(m_curves) = self.m_curves.as_ref() {
execute_matrix_stage3(self.matrix, self.bias, src);
execute_simple_curves3(src, m_curves);
}
if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) {
let mut inks = vec![0.; self.output_inks];
if clut.is_empty() {
return Err(CmsError::InvalidAtoBLut);
}
let md_lut = MultidimensionalLut::new(self.grid_size, 3, self.output_inks);
for (src, dst) in src
.chunks_exact(3)
.zip(dst.chunks_exact_mut(self.dst_layout.channels()))
{
tetra_3i_to_any_vec(
&md_lut,
clut,
src[0],
src[1],
src[2],
&mut inks,
self.output_inks,
);
for (ink, curve) in inks.iter_mut().zip(a_curves.iter()) {
*ink = lut_interp_linear_float(*ink, curve);
}
if T::FINITE {
for (dst, ink) in dst.iter_mut().zip(inks.iter()) {
*dst = (*ink * norm_value).round().max(0.).min(norm_value).as_();
}
} else {
for (dst, ink) in dst.iter_mut().zip(inks.iter()) {
*dst = (*ink * norm_value).as_();
}
}
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
Ok(())
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaFinalStage<f32, T> for Multidimensional3xN<T>
where
f32: AsPrimitive<T>,
{
fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % self.output_inks != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
self.to_output_impl(src, dst)?;
Ok(())
}
}
fn make_multidimensional_nx3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
dst_layout: Layout,
mab: &LutMultidimensionalType,
_: TransformOptions,
pcs: DataColorSpace,
direction: MultidimensionalDirection,
bit_depth: usize,
) -> Result<Multidimensional3xN<T>, CmsError> {
let real_inks = if pcs == DataColorSpace::Rgb {
3
} else {
dst_layout.channels()
};
if mab.num_output_channels != real_inks as u8 {
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.b_curves.is_empty() || mab.b_curves.len() != 3 {
return Err(CmsError::InvalidAtoBLut);
}
let clut: Option<Vec<f32>> =
if mab.a_curves.len() == mab.num_output_channels as usize && mab.clut.is_some() {
let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let mut lut_grid = 1usize;
for grid in mab.grid_points.iter().take(mab.num_input_channels as usize) {
lut_grid = lut_grid.safe_mul(*grid as usize)?;
}
let lut_grid = lut_grid.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
Some(clut)
} else {
return Err(CmsError::InvalidAtoBLut);
};
let a_curves: Option<Vec<Vec<f32>>> =
if mab.a_curves.len() == mab.num_output_channels as usize && mab.clut.is_some() {
let mut arr = Vec::new();
for a_curve in mab.a_curves.iter() {
arr.push(a_curve.to_clut()?);
}
Some(arr)
} else {
None
};
let b_curves: Option<Box<[Vec<f32>; 3]>> = if mab.b_curves.len() == 3 {
let mut arr = Box::<[Vec<f32>; 3]>::default();
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if all_curves_linear {
None
} else {
for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) {
*dst = c_curve.to_clut()?;
}
Some(arr)
}
} else {
return Err(CmsError::InvalidAtoBLut);
};
let matrix = mab.matrix.to_f32();
let m_curves: Option<Box<[Vec<f32>; 3]>> = if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let mut arr = Box::<[Vec<f32>; 3]>::default();
for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) {
*dst = curve.to_clut()?;
}
Some(arr)
} else {
None
}
} else {
None
};
let bias = mab.bias.cast();
let transform = Multidimensional3xN::<T> {
a_curves,
b_curves,
m_curves,
matrix,
direction,
clut,
grid_size: mab.grid_points,
bias,
dst_layout,
output_inks: real_inks,
_phantom: PhantomData,
bit_depth,
};
Ok(transform)
}
pub(crate) fn katana_multi_dimensional_3xn_to_device<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
dst_layout: Layout,
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaFinalStage<f32, T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
{
if mab.num_input_channels == 0 {
return Err(CmsError::UnsupportedProfileConnection);
}
let transform = make_multidimensional_nx3::<T>(
dst_layout,
mab,
options,
pcs,
MultidimensionalDirection::PcsToDevice,
bit_depth,
)?;
Ok(Box::new(transform))
}

View File

@@ -0,0 +1,296 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaInitialStage;
use crate::conversions::katana::md3x3::MultidimensionalDirection;
use crate::conversions::katana::md4x3::{execute_matrix_stage3, execute_simple_curves3};
use crate::conversions::md_lut::{
MultidimensionalLut, NVector, linear_1i_vec3f, linear_2i_vec3f_direct, linear_3i_vec3f_direct,
linear_4i_vec3f, linear_5i_vec3f, linear_6i_vec3f, linear_7i_vec3f, linear_8i_vec3f,
linear_9i_vec3f, linear_10i_vec3f, linear_11i_vec3f, linear_12i_vec3f, linear_13i_vec3f,
linear_14i_vec3f, linear_15i_vec3f,
};
use crate::safe_math::SafeMul;
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, DataColorSpace, Layout, LutMultidimensionalType, MalformedSize, Matrix3d, Matrix3f,
PointeeSizeExpressible, TransformOptions, Vector3d, Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
struct MultidimensionalNx3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
const BIT_DEPTH: usize,
> {
a_curves: Option<Vec<Vec<f32>>>,
m_curves: Option<Box<[Vec<f32>; 3]>>,
b_curves: Option<Box<[Vec<f32>; 3]>>,
clut: Option<Vec<f32>>,
matrix: Matrix3f,
bias: Vector3f,
direction: MultidimensionalDirection,
grid_size: [u8; 16],
input_inks: usize,
_phantom: PhantomData<T>,
}
#[inline(never)]
pub(crate) fn interpolate_out_function(
layout: Layout,
) -> fn(lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32]) -> NVector<f32, 3> {
const OUT: usize = 3;
match layout {
Layout::Rgb => linear_3i_vec3f_direct::<OUT>,
Layout::Rgba => linear_4i_vec3f::<OUT>,
Layout::Gray => linear_1i_vec3f::<OUT>,
Layout::GrayAlpha => linear_2i_vec3f_direct::<OUT>,
Layout::Inks5 => linear_5i_vec3f::<OUT>,
Layout::Inks6 => linear_6i_vec3f::<OUT>,
Layout::Inks7 => linear_7i_vec3f::<OUT>,
Layout::Inks8 => linear_8i_vec3f::<OUT>,
Layout::Inks9 => linear_9i_vec3f::<OUT>,
Layout::Inks10 => linear_10i_vec3f::<OUT>,
Layout::Inks11 => linear_11i_vec3f::<OUT>,
Layout::Inks12 => linear_12i_vec3f::<OUT>,
Layout::Inks13 => linear_13i_vec3f::<OUT>,
Layout::Inks14 => linear_14i_vec3f::<OUT>,
Layout::Inks15 => linear_15i_vec3f::<OUT>,
}
}
impl<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
const BIT_DEPTH: usize,
> MultidimensionalNx3<T, BIT_DEPTH>
{
fn to_pcs_impl(&self, input: &[T], dst: &mut [f32]) -> Result<(), CmsError> {
let norm_value = if T::FINITE {
1.0 / ((1u32 << BIT_DEPTH) - 1) as f32
} else {
1.0
};
assert_eq!(
self.direction,
MultidimensionalDirection::DeviceToPcs,
"PCS to device cannot be used on `to pcs` stage"
);
// A -> B
// OR B - A A - curves stage
if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) {
let layout = Layout::from_inks(self.input_inks);
let mut inks = vec![0.; self.input_inks];
if clut.is_empty() {
return Err(CmsError::InvalidAtoBLut);
}
let fetcher = interpolate_out_function(layout);
let md_lut = MultidimensionalLut::new(self.grid_size, self.input_inks, 3);
for (src, dst) in input
.chunks_exact(layout.channels())
.zip(dst.chunks_exact_mut(3))
{
for ((ink, src_ink), curve) in inks.iter_mut().zip(src).zip(a_curves.iter()) {
*ink = lut_interp_linear_float(src_ink.as_() * norm_value, curve);
}
let interpolated = fetcher(&md_lut, clut, &inks);
dst[0] = interpolated.v[0];
dst[1] = interpolated.v[1];
dst[2] = interpolated.v[2];
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
// Matrix stage
if let Some(m_curves) = self.m_curves.as_ref() {
execute_simple_curves3(dst, m_curves);
execute_matrix_stage3(self.matrix, self.bias, dst);
}
// B-curves is mandatory
if let Some(b_curves) = &self.b_curves.as_ref() {
execute_simple_curves3(dst, b_curves);
}
Ok(())
}
}
impl<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
const BIT_DEPTH: usize,
> KatanaInitialStage<f32, T> for MultidimensionalNx3<T, BIT_DEPTH>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % self.input_inks != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let mut new_dst = vec![0f32; (input.len() / self.input_inks) * 3];
self.to_pcs_impl(input, &mut new_dst)?;
Ok(new_dst)
}
}
fn make_multidimensional_nx3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
const BIT_DEPTH: usize,
>(
mab: &LutMultidimensionalType,
_: TransformOptions,
_: DataColorSpace,
direction: MultidimensionalDirection,
) -> Result<MultidimensionalNx3<T, BIT_DEPTH>, CmsError> {
if mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.b_curves.is_empty() || mab.b_curves.len() != 3 {
return Err(CmsError::InvalidAtoBLut);
}
let clut: Option<Vec<f32>> =
if mab.a_curves.len() == mab.num_input_channels as usize && mab.clut.is_some() {
let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let mut lut_grid = 1usize;
for grid in mab.grid_points.iter().take(mab.num_input_channels as usize) {
lut_grid = lut_grid.safe_mul(*grid as usize)?;
}
let lut_grid = lut_grid.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
Some(clut)
} else {
return Err(CmsError::InvalidAtoBLut);
};
let a_curves: Option<Vec<Vec<f32>>> =
if mab.a_curves.len() == mab.num_input_channels as usize && mab.clut.is_some() {
let mut arr = Vec::new();
for a_curve in mab.a_curves.iter() {
arr.push(a_curve.to_clut()?);
}
Some(arr)
} else {
None
};
let b_curves: Option<Box<[Vec<f32>; 3]>> = if mab.b_curves.len() == 3 {
let mut arr = Box::<[Vec<f32>; 3]>::default();
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if all_curves_linear {
None
} else {
for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) {
*dst = c_curve.to_clut()?;
}
Some(arr)
}
} else {
return Err(CmsError::InvalidAtoBLut);
};
let matrix = mab.matrix.to_f32();
let m_curves: Option<Box<[Vec<f32>; 3]>> = if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let mut arr = Box::<[Vec<f32>; 3]>::default();
for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) {
*dst = curve.to_clut()?;
}
Some(arr)
} else {
None
}
} else {
None
};
let bias = mab.bias.cast();
let transform = MultidimensionalNx3::<T, BIT_DEPTH> {
a_curves,
b_curves,
m_curves,
matrix,
direction,
clut,
grid_size: mab.grid_points,
bias,
input_inks: mab.num_input_channels as usize,
_phantom: PhantomData,
};
Ok(transform)
}
pub(crate) fn katana_multi_dimensional_nx3_to_pcs<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
if pcs == DataColorSpace::Rgb {
if mab.num_input_channels != 3 {
return Err(CmsError::InvalidAtoBLut);
}
if src_layout != Layout::Rgba && src_layout != Layout::Rgb {
return Err(CmsError::InvalidInksCountForProfile);
}
} else if mab.num_input_channels != src_layout.channels() as u8 {
return Err(CmsError::InvalidInksCountForProfile);
}
let transform = make_multidimensional_nx3::<T, BIT_DEPTH>(
mab,
options,
pcs,
MultidimensionalDirection::DeviceToPcs,
)?;
Ok(Box::new(transform))
}

View File

@@ -0,0 +1,393 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::md_nx3::interpolate_out_function;
use crate::conversions::katana::{KatanaFinalStage, KatanaInitialStage};
use crate::conversions::md_lut::{MultidimensionalLut, tetra_3i_to_any_vec};
use crate::profile::LutDataType;
use crate::safe_math::{SafeMul, SafePowi};
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, DataColorSpace, Layout, MalformedSize, PointeeSizeExpressible, TransformOptions,
};
use num_traits::AsPrimitive;
use std::array::from_fn;
use std::marker::PhantomData;
#[derive(Default)]
struct KatanaLutNx3<T> {
linearization: Vec<Vec<f32>>,
clut: Vec<f32>,
grid_size: u8,
input_inks: usize,
output: [Vec<f32>; 3],
_phantom: PhantomData<T>,
bit_depth: usize,
}
struct KatanaLut3xN<T> {
linearization: [Vec<f32>; 3],
clut: Vec<f32>,
grid_size: u8,
output_inks: usize,
output: Vec<Vec<f32>>,
dst_layout: Layout,
target_color_space: DataColorSpace,
_phantom: PhantomData<T>,
bit_depth: usize,
}
impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaLutNx3<T> {
fn to_pcs_impl(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % self.input_inks != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let norm_value = if T::FINITE {
1.0 / ((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
let grid_sizes: [u8; 16] = from_fn(|i| {
if i < self.input_inks {
self.grid_size
} else {
0
}
});
let md_lut = MultidimensionalLut::new(grid_sizes, self.input_inks, 3);
let layout = Layout::from_inks(self.input_inks);
let mut inks = vec![0.; self.input_inks];
let mut dst = vec![0.; (input.len() / layout.channels()) * 3];
let fetcher = interpolate_out_function(layout);
for (dest, src) in dst
.chunks_exact_mut(3)
.zip(input.chunks_exact(layout.channels()))
{
for ((ink, src_ink), curve) in inks.iter_mut().zip(src).zip(self.linearization.iter()) {
*ink = lut_interp_linear_float(src_ink.as_() * norm_value, curve);
}
let clut = fetcher(&md_lut, &self.clut, &inks);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
Ok(dst)
}
}
impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaInitialStage<f32, T>
for KatanaLutNx3<T>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % self.input_inks != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
self.to_pcs_impl(input)
}
}
impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaFinalStage<f32, T>
for KatanaLut3xN<T>
where
f32: AsPrimitive<T>,
{
fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let grid_sizes: [u8; 16] = from_fn(|i| {
if i < self.output_inks {
self.grid_size
} else {
0
}
});
let md_lut = MultidimensionalLut::new(grid_sizes, 3, self.output_inks);
let scale_value = if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
let mut working = vec![0.; self.output_inks];
for (dest, src) in dst
.chunks_exact_mut(self.dst_layout.channels())
.zip(src.chunks_exact(3))
{
let x = lut_interp_linear_float(src[0], &self.linearization[0]);
let y = lut_interp_linear_float(src[1], &self.linearization[1]);
let z = lut_interp_linear_float(src[2], &self.linearization[2]);
tetra_3i_to_any_vec(&md_lut, &self.clut, x, y, z, &mut working, self.output_inks);
for (ink, curve) in working.iter_mut().zip(self.output.iter()) {
*ink = lut_interp_linear_float(*ink, curve);
}
if T::FINITE {
for (dst, ink) in dest.iter_mut().zip(working.iter()) {
*dst = (*ink * scale_value).round().max(0.).min(scale_value).as_();
}
} else {
for (dst, ink) in dest.iter_mut().zip(working.iter()) {
*dst = (*ink * scale_value).as_();
}
}
}
if self.dst_layout == Layout::Rgba && self.target_color_space == DataColorSpace::Rgb {
for dst in dst.chunks_exact_mut(self.dst_layout.channels()) {
dst[3] = scale_value.as_();
}
}
Ok(())
}
}
fn katana_make_lut_nx3<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>>(
inks: usize,
lut: &LutDataType,
_: TransformOptions,
_: DataColorSpace,
bit_depth: usize,
) -> Result<KatanaLutNx3<T>, CmsError> {
if inks != lut.num_input_channels as usize {
return Err(CmsError::UnsupportedProfileConnection);
}
if lut.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
let clut_length: usize = (lut.num_clut_grid_points as usize)
.safe_powi(lut.num_input_channels as u32)?
.safe_mul(lut.num_output_channels as usize)?;
let clut_table = lut.clut_table.to_clut_f32();
if clut_table.len() != clut_length {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut_table.len(),
expected: clut_length,
}));
}
let linearization_table = lut.input_table.to_clut_f32();
if linearization_table.len() < lut.num_input_table_entries as usize * inks {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: linearization_table.len(),
expected: lut.num_input_table_entries as usize * inks,
}));
}
let linearization = (0..inks)
.map(|x| {
linearization_table[x * lut.num_input_table_entries as usize
..(x + 1) * lut.num_input_table_entries as usize]
.to_vec()
})
.collect::<_>();
let gamma_table = lut.output_table.to_clut_f32();
if gamma_table.len() < lut.num_output_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: gamma_table.len(),
expected: lut.num_output_table_entries as usize * 3,
}));
}
let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec();
let gamma_curve1 = gamma_table
[lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2]
.to_vec();
let gamma_curve2 = gamma_table
[lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3]
.to_vec();
let transform = KatanaLutNx3::<T> {
linearization,
clut: clut_table,
grid_size: lut.num_clut_grid_points,
output: [gamma_curve0, gamma_curve1, gamma_curve2],
input_inks: inks,
_phantom: PhantomData,
bit_depth,
};
Ok(transform)
}
fn katana_make_lut_3xn<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>>(
inks: usize,
dst_layout: Layout,
lut: &LutDataType,
_: TransformOptions,
target_color_space: DataColorSpace,
bit_depth: usize,
) -> Result<KatanaLut3xN<T>, CmsError> {
if lut.num_input_channels as usize != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
if target_color_space == DataColorSpace::Rgb {
if lut.num_output_channels != 3 || lut.num_output_channels != 4 {
return Err(CmsError::InvalidInksCountForProfile);
}
if dst_layout != Layout::Rgb || dst_layout != Layout::Rgba {
return Err(CmsError::InvalidInksCountForProfile);
}
} else if lut.num_output_channels as usize != dst_layout.channels() {
return Err(CmsError::InvalidInksCountForProfile);
}
let clut_length: usize = (lut.num_clut_grid_points as usize)
.safe_powi(lut.num_input_channels as u32)?
.safe_mul(lut.num_output_channels as usize)?;
let clut_table = lut.clut_table.to_clut_f32();
if clut_table.len() != clut_length {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut_table.len(),
expected: clut_length,
}));
}
let linearization_table = lut.input_table.to_clut_f32();
if linearization_table.len() < lut.num_input_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: linearization_table.len(),
expected: lut.num_input_table_entries as usize * 3,
}));
}
let linear_curve0 = linearization_table[..lut.num_input_table_entries as usize].to_vec();
let linear_curve1 = linearization_table
[lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2]
.to_vec();
let linear_curve2 = linearization_table
[lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3]
.to_vec();
let gamma_table = lut.output_table.to_clut_f32();
if gamma_table.len() < lut.num_output_table_entries as usize * inks {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: gamma_table.len(),
expected: lut.num_output_table_entries as usize * inks,
}));
}
let gamma = (0..inks)
.map(|x| {
gamma_table[x * lut.num_output_table_entries as usize
..(x + 1) * lut.num_output_table_entries as usize]
.to_vec()
})
.collect::<_>();
let transform = KatanaLut3xN::<T> {
linearization: [linear_curve0, linear_curve1, linear_curve2],
clut: clut_table,
grid_size: lut.num_clut_grid_points,
output: gamma,
output_inks: inks,
_phantom: PhantomData,
target_color_space,
dst_layout,
bit_depth,
};
Ok(transform)
}
pub(crate) fn katana_input_make_lut_nx3<
T: Copy + PointeeSizeExpressible + AsPrimitive<f32> + Send + Sync,
>(
src_layout: Layout,
inks: usize,
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
if pcs == DataColorSpace::Rgb {
if lut.num_input_channels != 3 {
return Err(CmsError::InvalidAtoBLut);
}
if src_layout != Layout::Rgba && src_layout != Layout::Rgb {
return Err(CmsError::InvalidInksCountForProfile);
}
} else if lut.num_input_channels != src_layout.channels() as u8 {
return Err(CmsError::InvalidInksCountForProfile);
}
let z0 = katana_make_lut_nx3::<T>(inks, lut, options, pcs, bit_depth)?;
Ok(Box::new(z0))
}
pub(crate) fn katana_output_make_lut_3xn<
T: Copy + PointeeSizeExpressible + AsPrimitive<f32> + Send + Sync,
>(
dst_layout: Layout,
lut: &LutDataType,
options: TransformOptions,
target_color_space: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaFinalStage<f32, T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
{
let real_inks = if target_color_space == DataColorSpace::Rgb {
3
} else {
dst_layout.channels()
};
let z0 = katana_make_lut_3xn::<T>(
real_inks,
dst_layout,
lut,
options,
target_color_space,
bit_depth,
)?;
Ok(Box::new(z0))
}

View File

@@ -0,0 +1,56 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod finalizers;
mod md3x3;
mod md4x3;
mod md_3xn;
mod md_nx3;
mod md_pipeline;
mod pcs_stages;
mod rgb_xyz;
mod stages;
mod xyz_lab;
mod xyz_rgb;
pub(crate) use finalizers::{CopyAlphaStage, InjectAlphaStage};
pub(crate) use md_3xn::katana_multi_dimensional_3xn_to_device;
pub(crate) use md_nx3::katana_multi_dimensional_nx3_to_pcs;
pub(crate) use md_pipeline::{katana_input_make_lut_nx3, katana_output_make_lut_3xn};
pub(crate) use md3x3::{multi_dimensional_3x3_to_device, multi_dimensional_3x3_to_pcs};
pub(crate) use md4x3::multi_dimensional_4x3_to_pcs;
pub(crate) use pcs_stages::{
KatanaDefaultIntermediate, katana_pcs_lab_v2_to_v4, katana_pcs_lab_v4_to_v2,
};
pub(crate) use rgb_xyz::katana_create_rgb_lin_lut;
pub(crate) use stages::{
Katana, KatanaFinalStage, KatanaInitialStage, KatanaIntermediateStage,
KatanaPostFinalizationStage,
};
pub(crate) use xyz_lab::{KatanaStageLabToXyz, KatanaStageXyzToLab};
pub(crate) use xyz_rgb::katana_prepare_inverse_lut_rgb_xyz;

View File

@@ -0,0 +1,100 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaIntermediateStage;
use crate::conversions::katana::stages::BlackholeIntermediateStage;
use crate::mlaf::mlaf;
use crate::{CmsError, ColorProfile, DataColorSpace, Matrix3f, ProfileVersion};
use std::marker::PhantomData;
pub(crate) struct KatanaMatrixStage {
pub(crate) matrices: Vec<Matrix3f>,
}
impl KatanaMatrixStage {
pub(crate) fn new(matrix: Matrix3f) -> Self {
Self {
matrices: vec![matrix],
}
}
}
pub(crate) type KatanaDefaultIntermediate = dyn KatanaIntermediateStage<f32> + Send + Sync;
impl KatanaIntermediateStage<f32> for KatanaMatrixStage {
fn stage(&self, input: &mut Vec<f32>) -> Result<Vec<f32>, CmsError> {
if input.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
for m in self.matrices.iter() {
for dst in input.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
Ok(std::mem::take(input))
}
}
pub(crate) fn katana_pcs_lab_v4_to_v2(profile: &ColorProfile) -> Box<KatanaDefaultIntermediate> {
if profile.pcs == DataColorSpace::Lab && profile.version_internal <= ProfileVersion::V4_0 {
let v_mat = vec![Matrix3f {
v: [
[65280.0 / 65535.0, 0., 0.],
[0., 65280.0 / 65535.0, 0.],
[0., 0., 65280.0 / 65535.0],
],
}];
return Box::new(KatanaMatrixStage { matrices: v_mat });
}
Box::new(BlackholeIntermediateStage {
_phantom: PhantomData,
})
}
pub(crate) fn katana_pcs_lab_v2_to_v4(profile: &ColorProfile) -> Box<KatanaDefaultIntermediate> {
if profile.pcs == DataColorSpace::Lab && profile.version_internal <= ProfileVersion::V4_0 {
let v_mat = vec![Matrix3f {
v: [
[65535.0 / 65280.0, 0., 0.],
[0., 65535.0 / 65280.0, 0.],
[0., 0., 65535.0 / 65280.0],
],
}];
return Box::new(KatanaMatrixStage { matrices: v_mat });
}
Box::new(BlackholeIntermediateStage {
_phantom: PhantomData,
})
}

View File

@@ -0,0 +1,161 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::pcs_stages::KatanaMatrixStage;
use crate::conversions::katana::{KatanaInitialStage, KatanaIntermediateStage};
use crate::{CmsError, ColorProfile, Layout, Matrix3f, PointeeSizeExpressible, TransformOptions};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
struct KatanaRgbLinearizationStage<T: Clone, const LAYOUT: u8, const LINEAR_CAP: usize> {
r_lin: Box<[f32; LINEAR_CAP]>,
g_lin: Box<[f32; LINEAR_CAP]>,
b_lin: Box<[f32; LINEAR_CAP]>,
linear_cap: usize,
bit_depth: usize,
_phantom: PhantomData<T>,
}
impl<
T: Clone + AsPrimitive<f32> + PointeeSizeExpressible,
const LAYOUT: u8,
const LINEAR_CAP: usize,
> KatanaInitialStage<f32, T> for KatanaRgbLinearizationStage<T, LAYOUT, LINEAR_CAP>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
let src_layout = Layout::from(LAYOUT);
if input.len() % src_layout.channels() != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let mut dst = vec![0.; input.len() / src_layout.channels() * 3];
let scale = if T::FINITE {
(self.linear_cap as f32 - 1.) / ((1 << self.bit_depth) - 1) as f32
} else {
(T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32
};
let cap_value = if T::FINITE {
((1 << self.bit_depth) - 1) as f32
} else {
(T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32
};
for (src, dst) in input
.chunks_exact(src_layout.channels())
.zip(dst.chunks_exact_mut(3))
{
let j_r = src[0].as_() * scale;
let j_g = src[1].as_() * scale;
let j_b = src[2].as_() * scale;
dst[0] = self.r_lin[(j_r.round().min(cap_value).max(0.) as u16) as usize];
dst[1] = self.g_lin[(j_g.round().min(cap_value).max(0.) as u16) as usize];
dst[2] = self.b_lin[(j_b.round().min(cap_value).max(0.) as u16) as usize];
}
Ok(dst)
}
}
pub(crate) struct KatanaRgbLinearizationState<T> {
pub(crate) stages: Vec<Box<dyn KatanaIntermediateStage<f32> + Send + Sync>>,
pub(crate) initial_stage: Box<dyn KatanaInitialStage<f32, T> + Send + Sync>,
}
pub(crate) fn katana_create_rgb_lin_lut<
T: Copy + Default + AsPrimitive<f32> + Send + Sync + AsPrimitive<usize> + PointeeSizeExpressible,
const BIT_DEPTH: usize,
const LINEAR_CAP: usize,
>(
layout: Layout,
source: &ColorProfile,
opts: TransformOptions,
) -> Result<KatanaRgbLinearizationState<T>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
let lin_r =
source.build_r_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_g =
source.build_g_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_b =
source.build_b_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_stage: Box<dyn KatanaInitialStage<f32, T> + Send + Sync> = match layout {
Layout::Rgb => {
Box::new(
KatanaRgbLinearizationStage::<T, { Layout::Rgb as u8 }, LINEAR_CAP> {
r_lin: lin_r,
g_lin: lin_g,
b_lin: lin_b,
bit_depth: BIT_DEPTH,
linear_cap: LINEAR_CAP,
_phantom: PhantomData,
},
)
}
Layout::Rgba => {
Box::new(
KatanaRgbLinearizationStage::<T, { Layout::Rgba as u8 }, LINEAR_CAP> {
r_lin: lin_r,
g_lin: lin_g,
b_lin: lin_b,
bit_depth: BIT_DEPTH,
linear_cap: LINEAR_CAP,
_phantom: PhantomData,
},
)
}
Layout::Gray => unimplemented!("Gray should not be called on Rgb/Rgba execution path"),
Layout::GrayAlpha => {
unimplemented!("GrayAlpha should not be called on Rgb/Rgba execution path")
}
_ => unreachable!(),
};
let xyz_to_rgb = source.rgb_to_xyz_matrix();
let matrices: Vec<Box<dyn KatanaIntermediateStage<f32> + Send + Sync>> =
vec![Box::new(KatanaMatrixStage {
matrices: vec![
xyz_to_rgb.to_f32(),
Matrix3f {
v: [
[32768.0 / 65535.0, 0.0, 0.0],
[0.0, 32768.0 / 65535.0, 0.0],
[0.0, 0.0, 32768.0 / 65535.0],
],
},
],
})];
Ok(KatanaRgbLinearizationState {
stages: matrices,
initial_stage: lin_stage,
})
}

View File

@@ -0,0 +1,85 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::{CmsError, TransformExecutor};
use std::marker::PhantomData;
/// W storage working data type
/// I input/output data type
pub(crate) trait KatanaInitialStage<W, I> {
fn to_pcs(&self, input: &[I]) -> Result<Vec<W>, CmsError>;
}
/// W storage working data type
/// I input/output data type
pub(crate) trait KatanaFinalStage<W, I> {
fn to_output(&self, src: &mut [W], dst: &mut [I]) -> Result<(), CmsError>;
}
/// W storage working data type
pub(crate) trait KatanaIntermediateStage<W> {
fn stage(&self, input: &mut Vec<W>) -> Result<Vec<W>, CmsError>;
}
pub(crate) struct BlackholeIntermediateStage<W> {
pub(crate) _phantom: PhantomData<W>,
}
impl<W> KatanaIntermediateStage<W> for BlackholeIntermediateStage<W> {
fn stage(&self, input: &mut Vec<W>) -> Result<Vec<W>, CmsError> {
Ok(std::mem::take(input))
}
}
/// I input/output data type
pub(crate) trait KatanaPostFinalizationStage<I> {
fn finalize(&self, src: &[I], dst: &mut [I]) -> Result<(), CmsError>;
}
/// W storage working data type
/// I input/output data type
pub(crate) struct Katana<W, I> {
pub(crate) initial_stage: Box<dyn KatanaInitialStage<W, I> + Send + Sync>,
pub(crate) final_stage: Box<dyn KatanaFinalStage<W, I> + Sync + Send>,
pub(crate) stages: Vec<Box<dyn KatanaIntermediateStage<W> + Send + Sync>>,
pub(crate) post_finalization: Vec<Box<dyn KatanaPostFinalizationStage<I> + Send + Sync>>,
}
impl<W, I: Copy + Default> TransformExecutor<I> for Katana<W, I> {
fn transform(&self, src: &[I], dst: &mut [I]) -> Result<(), CmsError> {
let mut working_vec = self.initial_stage.to_pcs(src)?;
for stage in self.stages.iter() {
working_vec = stage.stage(&mut working_vec)?;
}
self.final_stage.to_output(&mut working_vec, dst)?;
for finalization in self.post_finalization.iter() {
finalization.finalize(src, dst)?;
}
Ok(())
}
}

View File

@@ -0,0 +1,62 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaIntermediateStage;
use crate::{CmsError, Lab, Xyz};
#[derive(Default)]
pub(crate) struct KatanaStageLabToXyz {}
impl KatanaIntermediateStage<f32> for KatanaStageLabToXyz {
fn stage(&self, input: &mut Vec<f32>) -> Result<Vec<f32>, CmsError> {
for dst in input.chunks_exact_mut(3) {
let lab = Lab::new(dst[0], dst[1], dst[2]);
let xyz = lab.to_pcs_xyz();
dst[0] = xyz.x;
dst[1] = xyz.y;
dst[2] = xyz.z;
}
Ok(std::mem::take(input))
}
}
#[derive(Default)]
pub(crate) struct KatanaStageXyzToLab {}
impl KatanaIntermediateStage<f32> for KatanaStageXyzToLab {
fn stage(&self, input: &mut Vec<f32>) -> Result<Vec<f32>, CmsError> {
for dst in input.chunks_exact_mut(3) {
let xyz = Xyz::new(dst[0], dst[1], dst[2]);
let lab = Lab::from_pcs_xyz(xyz);
dst[0] = lab.l;
dst[1] = lab.a;
dst[2] = lab.b;
}
Ok(std::mem::take(input))
}
}

View File

@@ -0,0 +1,223 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::pcs_stages::KatanaMatrixStage;
use crate::conversions::katana::{
KatanaDefaultIntermediate, KatanaFinalStage, KatanaIntermediateStage,
};
use crate::mlaf::mlaf;
use crate::{
CmsError, ColorProfile, GammaLutInterpolate, Layout, Matrix3f, PointeeSizeExpressible,
RenderingIntent, Rgb, TransformOptions, filmlike_clip,
};
use num_traits::AsPrimitive;
pub(crate) struct KatanaXyzToRgbStage<T: Clone, const LAYOUT: u8> {
pub(crate) r_gamma: Box<[T; 65536]>,
pub(crate) g_gamma: Box<[T; 65536]>,
pub(crate) b_gamma: Box<[T; 65536]>,
pub(crate) intent: RenderingIntent,
pub(crate) bit_depth: usize,
pub(crate) gamma_lut: usize,
}
impl<T: Clone + AsPrimitive<f32> + PointeeSizeExpressible, const LAYOUT: u8>
KatanaFinalStage<f32, T> for KatanaXyzToRgbStage<T, LAYOUT>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
let dst_cn = Layout::from(LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 3;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
let max_colors: T = (if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
})
.as_();
let lut_cap = (self.gamma_lut - 1) as f32;
if self.intent != RenderingIntent::AbsoluteColorimetric {
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(dst_channels)) {
let mut rgb = Rgb::new(src[0], src[1], src[2]);
if rgb.is_out_of_gamut() {
rgb = filmlike_clip(rgb);
}
let r = mlaf(0.5, rgb.r, lut_cap).min(lut_cap).max(0.) as u16;
let g = mlaf(0.5, rgb.g, lut_cap).min(lut_cap).max(0.) as u16;
let b = mlaf(0.5, rgb.b, lut_cap).min(lut_cap).max(0.) as u16;
dst[0] = self.r_gamma[r as usize];
dst[1] = self.g_gamma[g as usize];
dst[2] = self.b_gamma[b as usize];
if dst_cn == Layout::Rgba {
dst[3] = max_colors;
}
}
} else {
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(dst_channels)) {
let rgb = Rgb::new(src[0], src[1], src[2]);
let r = mlaf(0.5, rgb.r, lut_cap).min(lut_cap).max(0.) as u16;
let g = mlaf(0.5, rgb.g, lut_cap).min(lut_cap).max(0.) as u16;
let b = mlaf(0.5, rgb.b, lut_cap).min(lut_cap).max(0.) as u16;
dst[0] = self.r_gamma[r as usize];
dst[1] = self.g_gamma[g as usize];
dst[2] = self.b_gamma[b as usize];
if dst_cn == Layout::Rgba {
dst[3] = max_colors;
}
}
}
Ok(())
}
}
pub(crate) struct KatanaXyzRgbState<T> {
pub(crate) stages: Vec<Box<dyn KatanaIntermediateStage<f32> + Send + Sync>>,
pub(crate) final_stage: Box<dyn KatanaFinalStage<f32, T> + Send + Sync>,
}
pub(crate) fn katana_prepare_inverse_lut_rgb_xyz<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible
+ GammaLutInterpolate,
const BIT_DEPTH: usize,
const GAMMA_LUT: usize,
>(
dest: &ColorProfile,
dest_layout: Layout,
options: TransformOptions,
) -> Result<KatanaXyzRgbState<T>, CmsError>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
{
// if !T::FINITE {
// if let Some(extended_gamma) = dest.try_extended_gamma_evaluator() {
// let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse();
//
// let mut matrices = vec![Matrix3f {
// v: [
// [65535.0 / 32768.0, 0.0, 0.0],
// [0.0, 65535.0 / 32768.0, 0.0],
// [0.0, 0.0, 65535.0 / 32768.0],
// ],
// }];
//
// matrices.push(xyz_to_rgb.to_f32());
// let xyz_to_rgb_stage = XyzToRgbStageExtended::<T> {
// gamma_evaluator: extended_gamma,
// matrices,
// phantom_data: PhantomData,
// };
// xyz_to_rgb_stage.transform(lut)?;
// return Ok(());
// }
// }
let gamma_map_r = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.red_trc,
options.allow_use_cicp_transfer,
)?;
let gamma_map_g = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.green_trc,
options.allow_use_cicp_transfer,
)?;
let gamma_map_b = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.blue_trc,
options.allow_use_cicp_transfer,
)?;
let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse();
let mut matrices: Vec<Box<KatanaDefaultIntermediate>> =
vec![Box::new(KatanaMatrixStage::new(Matrix3f {
v: [
[65535.0 / 32768.0, 0.0, 0.0],
[0.0, 65535.0 / 32768.0, 0.0],
[0.0, 0.0, 65535.0 / 32768.0],
],
}))];
matrices.push(Box::new(KatanaMatrixStage::new(xyz_to_rgb.to_f32())));
match dest_layout {
Layout::Rgb => {
let xyz_to_rgb_stage = KatanaXyzToRgbStage::<T, { Layout::Rgb as u8 }> {
r_gamma: gamma_map_r,
g_gamma: gamma_map_g,
b_gamma: gamma_map_b,
intent: options.rendering_intent,
bit_depth: BIT_DEPTH,
gamma_lut: GAMMA_LUT,
};
Ok(KatanaXyzRgbState {
stages: matrices,
final_stage: Box::new(xyz_to_rgb_stage),
})
}
Layout::Rgba => {
let xyz_to_rgb_stage = KatanaXyzToRgbStage::<T, { Layout::Rgba as u8 }> {
r_gamma: gamma_map_r,
g_gamma: gamma_map_g,
b_gamma: gamma_map_b,
intent: options.rendering_intent,
bit_depth: BIT_DEPTH,
gamma_lut: GAMMA_LUT,
};
Ok(KatanaXyzRgbState {
stages: matrices,
final_stage: Box::new(xyz_to_rgb_stage),
})
}
Layout::Gray => unreachable!("Gray layout must not be called on Rgb/Rgba path"),
Layout::GrayAlpha => unreachable!("Gray layout must not be called on Rgb/Rgba path"),
_ => unreachable!(
"layout {:?} should not be called on xyz->rgb path",
dest_layout
),
}
}

428
vendor/moxcms/src/conversions/lut3x3.rs vendored Normal file
View File

@@ -0,0 +1,428 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::{KatanaFinalStage, KatanaInitialStage};
use crate::err::MalformedSize;
use crate::profile::LutDataType;
use crate::safe_math::{SafeMul, SafePowi};
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, Cube, DataColorSpace, InterpolationMethod, PointeeSizeExpressible, Stage,
TransformOptions, Vector3f,
};
use num_traits::AsPrimitive;
#[derive(Default)]
struct Lut3x3 {
input: [Vec<f32>; 3],
clut: Vec<f32>,
grid_size: u8,
gamma: [Vec<f32>; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[derive(Default)]
struct KatanaLut3x3<T: Copy + Default> {
input: [Vec<f32>; 3],
clut: Vec<f32>,
grid_size: u8,
gamma: [Vec<f32>; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
_phantom: std::marker::PhantomData<T>,
bit_depth: usize,
}
fn make_lut_3x3(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Lut3x3, CmsError> {
let clut_length: usize = (lut.num_clut_grid_points as usize)
.safe_powi(lut.num_input_channels as u32)?
.safe_mul(lut.num_output_channels as usize)?;
let lin_table = lut.input_table.to_clut_f32();
if lin_table.len() < lut.num_input_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: lin_table.len(),
expected: lut.num_input_table_entries as usize * 3,
}));
}
let lin_curve0 = lin_table[..lut.num_input_table_entries as usize].to_vec();
let lin_curve1 = lin_table
[lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2]
.to_vec();
let lin_curve2 = lin_table
[lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3]
.to_vec();
let clut_table = lut.clut_table.to_clut_f32();
if clut_table.len() != clut_length {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut_table.len(),
expected: clut_length,
}));
}
let gamma_curves = lut.output_table.to_clut_f32();
if gamma_curves.len() < lut.num_output_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: gamma_curves.len(),
expected: lut.num_output_table_entries as usize * 3,
}));
}
let gamma_curve0 = gamma_curves[..lut.num_output_table_entries as usize].to_vec();
let gamma_curve1 = gamma_curves
[lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2]
.to_vec();
let gamma_curve2 = gamma_curves
[lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3]
.to_vec();
let transform = Lut3x3 {
input: [lin_curve0, lin_curve1, lin_curve2],
gamma: [gamma_curve0, gamma_curve1, gamma_curve2],
interpolation_method: options.interpolation_method,
clut: clut_table,
grid_size: lut.num_clut_grid_points,
pcs,
};
Ok(transform)
}
fn stage_lut_3x3(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Box<dyn Stage>, CmsError> {
let lut = make_lut_3x3(lut, options, pcs)?;
let transform = Lut3x3 {
input: lut.input,
gamma: lut.gamma,
interpolation_method: lut.interpolation_method,
clut: lut.clut,
grid_size: lut.grid_size,
pcs: lut.pcs,
};
Ok(Box::new(transform))
}
pub(crate) fn katana_input_stage_lut_3x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
{
let lut = make_lut_3x3(lut, options, pcs)?;
let transform = KatanaLut3x3::<T> {
input: lut.input,
gamma: lut.gamma,
interpolation_method: lut.interpolation_method,
clut: lut.clut,
grid_size: lut.grid_size,
pcs: lut.pcs,
_phantom: std::marker::PhantomData,
bit_depth,
};
Ok(Box::new(transform))
}
pub(crate) fn katana_output_stage_lut_3x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaFinalStage<f32, T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
{
let lut = make_lut_3x3(lut, options, pcs)?;
let transform = KatanaLut3x3::<T> {
input: lut.input,
gamma: lut.gamma,
interpolation_method: lut.interpolation_method,
clut: lut.clut,
grid_size: lut.grid_size,
pcs: lut.pcs,
_phantom: std::marker::PhantomData,
bit_depth,
};
Ok(Box::new(transform))
}
impl Lut3x3 {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let linearization_0 = &self.input[0];
let linearization_1 = &self.input[1];
let linearization_2 = &self.input[2];
for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
debug_assert!(self.grid_size as i32 >= 1);
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let clut = fetch(linear_x, linear_y, linear_z);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
Ok(())
}
}
impl Stage for Lut3x3 {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let l_tbl = Cube::new(&self.clut, self.grid_size as usize);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z| l_tbl.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z| l_tbl.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z| l_tbl.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
impl<T: Copy + Default + PointeeSizeExpressible + AsPrimitive<f32>> KatanaLut3x3<T>
where
f32: AsPrimitive<T>,
{
fn to_pcs_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
input: &[T],
fetch: Fetch,
) -> Result<Vec<f32>, CmsError> {
if input.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let normalizing_value = if T::FINITE {
1.0 / ((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
let mut dst = vec![0.; input.len()];
let linearization_0 = &self.input[0];
let linearization_1 = &self.input[1];
let linearization_2 = &self.input[2];
for (dest, src) in dst.chunks_exact_mut(3).zip(input.chunks_exact(3)) {
let linear_x =
lut_interp_linear_float(src[0].as_() * normalizing_value, linearization_0);
let linear_y =
lut_interp_linear_float(src[1].as_() * normalizing_value, linearization_1);
let linear_z =
lut_interp_linear_float(src[2].as_() * normalizing_value, linearization_2);
let clut = fetch(linear_x, linear_y, linear_z);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
Ok(dst)
}
fn to_output<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
src: &[f32],
dst: &mut [T],
fetch: Fetch,
) -> Result<(), CmsError> {
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() != src.len() {
return Err(CmsError::LaneSizeMismatch);
}
let norm_value = if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
let linearization_0 = &self.input[0];
let linearization_1 = &self.input[1];
let linearization_2 = &self.input[2];
for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let clut = fetch(linear_x, linear_y, linear_z);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]);
if T::FINITE {
dest[0] = (pcs_x * norm_value).round().max(0.0).min(norm_value).as_();
dest[1] = (pcs_y * norm_value).round().max(0.0).min(norm_value).as_();
dest[2] = (pcs_z * norm_value).round().max(0.0).min(norm_value).as_();
} else {
dest[0] = pcs_x.as_();
dest[1] = pcs_y.as_();
dest[2] = pcs_z.as_();
}
}
Ok(())
}
}
impl<T: Copy + Default + PointeeSizeExpressible + AsPrimitive<f32>> KatanaInitialStage<f32, T>
for KatanaLut3x3<T>
where
f32: AsPrimitive<T>,
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
let l_tbl = Cube::new(&self.clut, self.grid_size as usize);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.to_pcs_impl(input, |x, y, z| l_tbl.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_pcs_impl(input, |x, y, z| l_tbl.tetra_vec3(x, y, z))
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_pcs_impl(input, |x, y, z| l_tbl.pyramid_vec3(x, y, z))
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_pcs_impl(input, |x, y, z| l_tbl.prism_vec3(x, y, z))
}
InterpolationMethod::Linear => {
self.to_pcs_impl(input, |x, y, z| l_tbl.trilinear_vec3(x, y, z))
}
}
}
}
impl<T: Copy + Default + PointeeSizeExpressible + AsPrimitive<f32>> KatanaFinalStage<f32, T>
for KatanaLut3x3<T>
where
f32: AsPrimitive<T>,
{
fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
let l_tbl = Cube::new(&self.clut, self.grid_size as usize);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.to_output(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_output(src, dst, |x, y, z| l_tbl.tetra_vec3(x, y, z))
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_output(src, dst, |x, y, z| l_tbl.pyramid_vec3(x, y, z))
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_output(src, dst, |x, y, z| l_tbl.prism_vec3(x, y, z))
}
InterpolationMethod::Linear => {
self.to_output(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z))
}
}
}
}
pub(crate) fn create_lut3x3(
lut: &LutDataType,
src: &[f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Vec<f32>, CmsError> {
if lut.num_input_channels != 3 || lut.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
let mut dest = vec![0.; src.len()];
let lut_stage = stage_lut_3x3(lut, options, pcs)?;
lut_stage.transform(src, &mut dest)?;
Ok(dest)
}

248
vendor/moxcms/src/conversions/lut3x4.rs vendored Normal file
View File

@@ -0,0 +1,248 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::profile::LutDataType;
use crate::safe_math::{SafeMul, SafePowi};
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, Cube, DataColorSpace, InterpolationMethod, MalformedSize, Stage, TransformOptions,
Vector4f,
};
use num_traits::AsPrimitive;
#[derive(Default)]
struct Lut3x4 {
input: [Vec<f32>; 3],
clut: Vec<f32>,
grid_size: u8,
gamma: [Vec<f32>; 4],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
fn make_lut_3x4(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Lut3x4, CmsError> {
let clut_length: usize = (lut.num_clut_grid_points as usize)
.safe_powi(lut.num_input_channels as u32)?
.safe_mul(lut.num_output_channels as usize)?;
let clut_table = lut.clut_table.to_clut_f32();
if clut_table.len() != clut_length {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut_table.len(),
expected: clut_length,
}));
}
let linearization_table = lut.input_table.to_clut_f32();
if linearization_table.len() < lut.num_input_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: linearization_table.len(),
expected: lut.num_input_table_entries as usize * 3,
}));
}
let linear_curve0 = linearization_table[..lut.num_input_table_entries as usize].to_vec();
let linear_curve1 = linearization_table
[lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2]
.to_vec();
let linear_curve2 = linearization_table
[lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3]
.to_vec();
let gamma_table = lut.output_table.to_clut_f32();
if gamma_table.len() < lut.num_output_table_entries as usize * 4 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: gamma_table.len(),
expected: lut.num_output_table_entries as usize * 4,
}));
}
let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec();
let gamma_curve1 = gamma_table
[lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2]
.to_vec();
let gamma_curve2 = gamma_table
[lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3]
.to_vec();
let gamma_curve3 = gamma_table
[lut.num_output_table_entries as usize * 3..lut.num_output_table_entries as usize * 4]
.to_vec();
let transform = Lut3x4 {
input: [linear_curve0, linear_curve1, linear_curve2],
interpolation_method: options.interpolation_method,
clut: clut_table,
grid_size: lut.num_clut_grid_points,
pcs,
gamma: [gamma_curve0, gamma_curve1, gamma_curve2, gamma_curve3],
};
Ok(transform)
}
fn stage_lut_3x4(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Box<dyn Stage>, CmsError> {
let lut = make_lut_3x4(lut, options, pcs)?;
let transform = Lut3x4 {
input: lut.input,
interpolation_method: lut.interpolation_method,
clut: lut.clut,
grid_size: lut.grid_size,
pcs: lut.pcs,
gamma: lut.gamma,
};
Ok(Box::new(transform))
}
impl Lut3x4 {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector4f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let linearization_0 = &self.input[0];
let linearization_1 = &self.input[1];
let linearization_2 = &self.input[2];
for (dest, src) in dst.chunks_exact_mut(4).zip(src.chunks_exact(3)) {
debug_assert!(self.grid_size as i32 >= 1);
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let clut = fetch(linear_x, linear_y, linear_z);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]);
let pcs_w = lut_interp_linear_float(clut.v[3], &self.gamma[3]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
dest[3] = pcs_w;
}
Ok(())
}
}
impl Stage for Lut3x4 {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let l_tbl = Cube::new(&self.clut, self.grid_size as usize);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec4(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z| l_tbl.tetra_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z| l_tbl.pyramid_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z| l_tbl.prism_vec4(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec4(x, y, z))?;
}
}
Ok(())
}
}
pub(crate) fn create_lut3_samples<T: Copy + 'static, const SAMPLES: usize>() -> Vec<T>
where
u32: AsPrimitive<T>,
{
let lut_size: u32 = (3 * SAMPLES * SAMPLES * SAMPLES) as u32;
assert!(SAMPLES >= 1);
let mut src = Vec::with_capacity(lut_size as usize);
for x in 0..SAMPLES as u32 {
for y in 0..SAMPLES as u32 {
for z in 0..SAMPLES as u32 {
src.push(x.as_());
src.push(y.as_());
src.push(z.as_());
}
}
}
src
}
pub(crate) fn create_lut3_samples_norm<const SAMPLES: usize>() -> Vec<f32> {
let lut_size: u32 = (3 * SAMPLES * SAMPLES * SAMPLES) as u32;
assert!(SAMPLES >= 1);
let scale = 1. / (SAMPLES as f32 - 1.0);
let mut src = Vec::with_capacity(lut_size as usize);
for x in 0..SAMPLES as u32 {
for y in 0..SAMPLES as u32 {
for z in 0..SAMPLES as u32 {
src.push(x as f32 * scale);
src.push(y as f32 * scale);
src.push(z as f32 * scale);
}
}
}
src
}
pub(crate) fn create_lut3x4(
lut: &LutDataType,
src: &[f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Vec<f32>, CmsError> {
if lut.num_input_channels != 3 || lut.num_output_channels != 4 {
return Err(CmsError::UnsupportedProfileConnection);
}
let mut dest = vec![0.; (src.len() / 3) * 4];
let lut_stage = stage_lut_3x4(lut, options, pcs)?;
lut_stage.transform(src, &mut dest)?;
Ok(dest)
}

392
vendor/moxcms/src/conversions/lut4.rs vendored Normal file
View File

@@ -0,0 +1,392 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaInitialStage;
use crate::profile::LutDataType;
use crate::safe_math::{SafeMul, SafePowi};
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, DataColorSpace, Hypercube, InterpolationMethod, MalformedSize,
PointeeSizeExpressible, Stage, TransformOptions, Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
#[allow(unused)]
#[derive(Default)]
struct Lut4x3 {
linearization: [Vec<f32>; 4],
clut: Vec<f32>,
grid_size: u8,
output: [Vec<f32>; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(unused)]
#[derive(Default)]
struct KatanaLut4x3<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> {
linearization: [Vec<f32>; 4],
clut: Vec<f32>,
grid_size: u8,
output: [Vec<f32>; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
_phantom: PhantomData<T>,
bit_depth: usize,
}
#[allow(unused)]
impl Lut4x3 {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let linearization_0 = &self.linearization[0];
let linearization_1 = &self.linearization[1];
let linearization_2 = &self.linearization[2];
let linearization_3 = &self.linearization[3];
for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(4)) {
debug_assert!(self.grid_size as i32 >= 1);
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let linear_w = lut_interp_linear_float(src[3], linearization_3);
let clut = fetch(linear_x, linear_y, linear_z, linear_w);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
Ok(())
}
}
macro_rules! define_lut4_dispatch {
($dispatcher: ident) => {
impl Stage for $dispatcher {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let l_tbl = Hypercube::new(&self.clut, self.grid_size as usize);
// If Source PCS is LAB trilinear should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self
.transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))?
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| {
l_tbl.quadlinear_vec3(x, y, z, w)
})?
}
}
Ok(())
}
}
};
}
impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaLut4x3<T> {
fn to_pcs_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
&self,
input: &[T],
fetch: Fetch,
) -> Result<Vec<f32>, CmsError> {
if input.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let norm_value = if T::FINITE {
1.0 / ((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
let mut dst = vec![0.; (input.len() / 4) * 3];
let linearization_0 = &self.linearization[0];
let linearization_1 = &self.linearization[1];
let linearization_2 = &self.linearization[2];
let linearization_3 = &self.linearization[3];
for (dest, src) in dst.chunks_exact_mut(3).zip(input.chunks_exact(4)) {
let linear_x = lut_interp_linear_float(src[0].as_() * norm_value, linearization_0);
let linear_y = lut_interp_linear_float(src[1].as_() * norm_value, linearization_1);
let linear_z = lut_interp_linear_float(src[2].as_() * norm_value, linearization_2);
let linear_w = lut_interp_linear_float(src[3].as_() * norm_value, linearization_3);
let clut = fetch(linear_x, linear_y, linear_z, linear_w);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
Ok(dst)
}
}
impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaInitialStage<f32, T>
for KatanaLut4x3<T>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let l_tbl = Hypercube::new(&self.clut, self.grid_size as usize);
// If Source PCS is LAB trilinear should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.to_pcs_impl(input, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_pcs_impl(input, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_pcs_impl(input, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_pcs_impl(input, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))
}
InterpolationMethod::Linear => {
self.to_pcs_impl(input, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w))
}
}
}
}
define_lut4_dispatch!(Lut4x3);
fn make_lut_4x3(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Lut4x3, CmsError> {
// There is 4 possible cases:
// - All curves are non-linear
// - Linearization curves are non-linear, but gamma is linear
// - Gamma curves are non-linear, but linearization is linear
// - All curves linear
let clut_length: usize = (lut.num_clut_grid_points as usize)
.safe_powi(lut.num_input_channels as u32)?
.safe_mul(lut.num_output_channels as usize)?;
let clut_table = lut.clut_table.to_clut_f32();
if clut_table.len() != clut_length {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut_table.len(),
expected: clut_length,
}));
}
let linearization_table = lut.input_table.to_clut_f32();
if linearization_table.len() < lut.num_input_table_entries as usize * 4 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: linearization_table.len(),
expected: lut.num_input_table_entries as usize * 4,
}));
}
let lin_curve0 = linearization_table[0..lut.num_input_table_entries as usize].to_vec();
let lin_curve1 = linearization_table
[lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2]
.to_vec();
let lin_curve2 = linearization_table
[lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3]
.to_vec();
let lin_curve3 = linearization_table
[lut.num_input_table_entries as usize * 3..lut.num_input_table_entries as usize * 4]
.to_vec();
let gamma_table = lut.output_table.to_clut_f32();
if gamma_table.len() < lut.num_output_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: gamma_table.len(),
expected: lut.num_output_table_entries as usize * 3,
}));
}
let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec();
let gamma_curve1 = gamma_table
[lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2]
.to_vec();
let gamma_curve2 = gamma_table
[lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3]
.to_vec();
let transform = Lut4x3 {
linearization: [lin_curve0, lin_curve1, lin_curve2, lin_curve3],
interpolation_method: options.interpolation_method,
pcs,
clut: clut_table,
grid_size: lut.num_clut_grid_points,
output: [gamma_curve0, gamma_curve1, gamma_curve2],
};
Ok(transform)
}
fn stage_lut_4x3(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Box<dyn Stage>, CmsError> {
let lut = make_lut_4x3(lut, options, pcs)?;
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
use crate::conversions::neon::Lut4x3Neon;
let transform = Lut4x3Neon {
linearization: lut.linearization,
interpolation_method: lut.interpolation_method,
pcs: lut.pcs,
clut: lut.clut,
grid_size: lut.grid_size,
output: lut.output,
};
Ok(Box::new(transform))
}
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
{
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::Lut4x3AvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
let transform = Lut4x3AvxFma {
linearization: lut.linearization,
interpolation_method: lut.interpolation_method,
pcs: lut.pcs,
clut: lut.clut,
grid_size: lut.grid_size,
output: lut.output,
};
return Ok(Box::new(transform));
}
}
let transform = Lut4x3 {
linearization: lut.linearization,
interpolation_method: lut.interpolation_method,
pcs: lut.pcs,
clut: lut.clut,
grid_size: lut.grid_size,
output: lut.output,
};
Ok(Box::new(transform))
}
}
pub(crate) fn katana_input_stage_lut_4x3<
T: Copy + PointeeSizeExpressible + AsPrimitive<f32> + Send + Sync,
>(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
// There is 4 possible cases:
// - All curves are non-linear
// - Linearization curves are non-linear, but gamma is linear
// - Gamma curves are non-linear, but linearization is linear
// - All curves linear
let lut = make_lut_4x3(lut, options, pcs)?;
let transform = KatanaLut4x3::<T> {
linearization: lut.linearization,
interpolation_method: lut.interpolation_method,
pcs: lut.pcs,
clut: lut.clut,
grid_size: lut.grid_size,
output: lut.output,
_phantom: PhantomData,
bit_depth,
};
Ok(Box::new(transform))
}
pub(crate) fn create_lut4_norm_samples<const SAMPLES: usize>() -> Vec<f32> {
let lut_size: u32 = (4 * SAMPLES * SAMPLES * SAMPLES * SAMPLES) as u32;
let mut src = Vec::with_capacity(lut_size as usize);
let recpeq = 1f32 / (SAMPLES - 1) as f32;
for k in 0..SAMPLES {
for c in 0..SAMPLES {
for m in 0..SAMPLES {
for y in 0..SAMPLES {
src.push(c as f32 * recpeq);
src.push(m as f32 * recpeq);
src.push(y as f32 * recpeq);
src.push(k as f32 * recpeq);
}
}
}
}
src
}
pub(crate) fn create_lut4<const SAMPLES: usize>(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Vec<f32>, CmsError> {
if lut.num_input_channels != 4 {
return Err(CmsError::UnsupportedProfileConnection);
}
let lut_size: u32 = (4 * SAMPLES * SAMPLES * SAMPLES * SAMPLES) as u32;
let src = create_lut4_norm_samples::<SAMPLES>();
let mut dest = vec![0.; (lut_size as usize) / 4 * 3];
let lut_stage = stage_lut_4x3(lut, options, pcs)?;
lut_stage.transform(&src, &mut dest)?;
Ok(dest)
}

View File

@@ -0,0 +1,802 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::lut3x3::{
create_lut3x3, katana_input_stage_lut_3x3, katana_output_stage_lut_3x3,
};
use crate::conversions::lut3x4::{create_lut3_samples_norm, create_lut3x4};
use crate::conversions::lut4::{create_lut4, create_lut4_norm_samples, katana_input_stage_lut_4x3};
use crate::conversions::mab::{prepare_mab_3x3, prepare_mba_3x3};
use crate::conversions::transform_lut3_to_4::make_transform_3x4;
use crate::mlaf::mlaf;
use crate::{
CmsError, ColorProfile, DataColorSpace, InPlaceStage, Layout, LutWarehouse, Matrix3f,
ProfileVersion, TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
pub(crate) struct MatrixStage {
pub(crate) matrices: Vec<Matrix3f>,
}
impl InPlaceStage for MatrixStage {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
if !self.matrices.is_empty() {
let m = self.matrices[0];
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
for m in self.matrices.iter().skip(1) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
Ok(())
}
}
pub(crate) const LUT_SAMPLING: u16 = 255;
pub(crate) trait Lut3x3Factory {
fn make_transform_3x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>;
}
pub(crate) trait Lut4x3Factory {
fn make_transform_4x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>;
}
fn pcs_lab_v4_to_v2(profile: &ColorProfile, lut: &mut [f32]) {
if profile.pcs == DataColorSpace::Lab
&& profile.version_internal <= ProfileVersion::V4_0
&& lut.len() % 3 == 0
{
assert_eq!(
lut.len() % 3,
0,
"Lut {:?} is not a multiple of 3, this should not happen for lab",
lut.len()
);
let v_mat = vec![Matrix3f {
v: [
[65280.0 / 65535.0, 0f32, 0f32],
[0f32, 65280.0 / 65535.0, 0f32],
[0f32, 0f32, 65280.0 / 65535.0f32],
],
}];
let stage = MatrixStage { matrices: v_mat };
stage.transform(lut).unwrap();
}
}
fn pcs_lab_v2_to_v4(profile: &ColorProfile, lut: &mut [f32]) {
if profile.pcs == DataColorSpace::Lab
&& profile.version_internal <= ProfileVersion::V4_0
&& lut.len() % 3 == 0
{
assert_eq!(
lut.len() % 3,
0,
"Lut {:?} is not a multiple of 3, this should not happen for lab",
lut.len()
);
let v_mat = vec![Matrix3f {
v: [
[65535.0 / 65280.0f32, 0f32, 0f32],
[0f32, 65535.0f32 / 65280.0f32, 0f32],
[0f32, 0f32, 65535.0f32 / 65280.0f32],
],
}];
let stage = MatrixStage { matrices: v_mat };
stage.transform(lut).unwrap();
}
}
macro_rules! make_transform_3x3_fn {
($method_name: ident, $exec_impl: ident) => {
fn $method_name<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
match src_layout {
Layout::Rgb => match dst_layout {
Layout::Rgb => $exec_impl::make_transform_3x3::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, color_space, is_linear),
Layout::Rgba => $exec_impl::make_transform_3x3::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, color_space, is_linear),
_ => unimplemented!(),
},
Layout::Rgba => match dst_layout {
Layout::Rgb => $exec_impl::make_transform_3x3::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, color_space, is_linear),
Layout::Rgba => $exec_impl::make_transform_3x3::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, color_space, is_linear),
_ => unimplemented!(),
},
_ => unimplemented!(),
}
}
};
}
macro_rules! make_transform_4x3_fn {
($method_name: ident, $exec_name: ident) => {
fn $method_name<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
dst_layout: Layout,
lut: Vec<f32>,
options: TransformOptions,
data_color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
match dst_layout {
Layout::Rgb => $exec_name::make_transform_4x3::<
T,
{ Layout::Rgb as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, data_color_space, is_linear),
Layout::Rgba => $exec_name::make_transform_4x3::<
T,
{ Layout::Rgba as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, data_color_space, is_linear),
_ => unimplemented!(),
}
}
};
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
use crate::conversions::neon::NeonLut3x3Factory;
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
make_transform_3x3_fn!(make_transformer_3x3, NeonLut3x3Factory);
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
use crate::conversions::transform_lut3_to_3::DefaultLut3x3Factory;
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
make_transform_3x3_fn!(make_transformer_3x3, DefaultLut3x3Factory);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
use crate::conversions::avx::AvxLut3x3Factory;
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
make_transform_3x3_fn!(make_transformer_3x3_avx_fma, AvxLut3x3Factory);
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
use crate::conversions::sse::SseLut3x3Factory;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
make_transform_3x3_fn!(make_transformer_3x3_sse41, SseLut3x3Factory);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
use crate::conversions::avx::AvxLut4x3Factory;
use crate::conversions::interpolator::LutBarycentricReduction;
use crate::conversions::katana::{
Katana, KatanaDefaultIntermediate, KatanaInitialStage, KatanaPostFinalizationStage,
KatanaStageLabToXyz, KatanaStageXyzToLab, katana_create_rgb_lin_lut, katana_pcs_lab_v2_to_v4,
katana_pcs_lab_v4_to_v2, katana_prepare_inverse_lut_rgb_xyz, multi_dimensional_3x3_to_device,
multi_dimensional_3x3_to_pcs, multi_dimensional_4x3_to_pcs,
};
use crate::conversions::mab4x3::prepare_mab_4x3;
use crate::conversions::mba3x4::prepare_mba_3x4;
use crate::conversions::md_luts_factory::{do_any_to_any, prepare_alpha_finalizer};
// use crate::conversions::bpc::compensate_bpc_in_lut;
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
make_transform_4x3_fn!(make_transformer_4x3_avx_fma, AvxLut4x3Factory);
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
use crate::conversions::sse::SseLut4x3Factory;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
make_transform_4x3_fn!(make_transformer_4x3_sse41, SseLut4x3Factory);
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
use crate::conversions::transform_lut4_to_3::DefaultLut4x3Factory;
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
make_transform_4x3_fn!(make_transformer_4x3, DefaultLut4x3Factory);
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
use crate::conversions::neon::NeonLut4x3Factory;
use crate::conversions::prelude_lut_xyz_rgb::{create_rgb_lin_lut, prepare_inverse_lut_rgb_xyz};
use crate::conversions::xyz_lab::{StageLabToXyz, StageXyzToLab};
use crate::transform::PointeeSizeExpressible;
use crate::trc::GammaLutInterpolate;
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
make_transform_4x3_fn!(make_transformer_4x3, NeonLut4x3Factory);
#[inline(never)]
#[cold]
pub(crate) fn make_lut_transform<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible
+ GammaLutInterpolate,
const BIT_DEPTH: usize,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
>(
src_layout: Layout,
source: &ColorProfile,
dst_layout: Layout,
dest: &ColorProfile,
options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if (source.color_space == DataColorSpace::Cmyk || source.color_space == DataColorSpace::Color4)
&& (dest.color_space == DataColorSpace::Rgb || dest.color_space == DataColorSpace::Lab)
{
source.color_space.check_layout(src_layout)?;
dest.color_space.check_layout(dst_layout)?;
if source.pcs != DataColorSpace::Xyz && source.pcs != DataColorSpace::Lab {
return Err(CmsError::UnsupportedProfileConnection);
}
if dest.pcs != DataColorSpace::Lab && dest.pcs != DataColorSpace::Xyz {
return Err(CmsError::UnsupportedProfileConnection);
}
const GRID_SIZE: usize = 17;
let is_katana_required_for_source = source
.get_device_to_pcs(options.rendering_intent)
.ok_or(CmsError::UnsupportedLutRenderingIntent(
source.rendering_intent,
))
.map(|x| x.is_katana_required())?;
let is_katana_required_for_destination =
if dest.is_matrix_shaper() || dest.pcs == DataColorSpace::Xyz {
false
} else if dest.pcs == DataColorSpace::Lab {
dest.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)
.map(|x| x.is_katana_required())?
} else {
return Err(CmsError::UnsupportedProfileConnection);
};
if is_katana_required_for_source || is_katana_required_for_destination {
let initial_stage: Box<dyn KatanaInitialStage<f32, T> + Send + Sync> =
match source.get_device_to_pcs(options.rendering_intent).ok_or(
CmsError::UnsupportedLutRenderingIntent(source.rendering_intent),
)? {
LutWarehouse::Lut(lut) => {
katana_input_stage_lut_4x3::<T>(lut, options, source.pcs, BIT_DEPTH)?
}
LutWarehouse::Multidimensional(mab) => {
multi_dimensional_4x3_to_pcs::<T>(mab, options, source.pcs, BIT_DEPTH)?
}
};
let mut stages = Vec::new();
stages.push(katana_pcs_lab_v2_to_v4(source));
if source.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageLabToXyz::default()));
}
if dest.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageXyzToLab::default()));
}
stages.push(katana_pcs_lab_v4_to_v2(dest));
let final_stage = if dest.has_pcs_to_device_lut() {
let pcs_to_device = dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
match pcs_to_device {
LutWarehouse::Lut(lut) => {
katana_output_stage_lut_3x3::<T>(lut, options, dest.pcs, BIT_DEPTH)?
}
LutWarehouse::Multidimensional(mab) => {
multi_dimensional_3x3_to_device::<T>(mab, options, dest.pcs, BIT_DEPTH)?
}
}
} else if dest.is_matrix_shaper() {
let state = katana_prepare_inverse_lut_rgb_xyz::<T, BIT_DEPTH, GAMMA_LUT>(
dest, dst_layout, options,
)?;
stages.extend(state.stages);
state.final_stage
} else {
return Err(CmsError::UnsupportedProfileConnection);
};
let mut post_finalization: Vec<Box<dyn KatanaPostFinalizationStage<T> + Send + Sync>> =
Vec::new();
if let Some(stage) =
prepare_alpha_finalizer::<T>(src_layout, source, dst_layout, dest, BIT_DEPTH)
{
post_finalization.push(stage);
}
return Ok(Box::new(Katana::<f32, T> {
initial_stage,
final_stage,
stages,
post_finalization,
}));
}
let mut lut = match source.get_device_to_pcs(options.rendering_intent).ok_or(
CmsError::UnsupportedLutRenderingIntent(source.rendering_intent),
)? {
LutWarehouse::Lut(lut) => create_lut4::<GRID_SIZE>(lut, options, source.pcs)?,
LutWarehouse::Multidimensional(m_curves) => {
let mut samples = create_lut4_norm_samples::<GRID_SIZE>();
prepare_mab_4x3(m_curves, &mut samples, options, source.pcs)?
}
};
pcs_lab_v2_to_v4(source, &mut lut);
if source.pcs == DataColorSpace::Lab {
let lab_to_xyz_stage = StageLabToXyz::default();
lab_to_xyz_stage.transform(&mut lut)?;
}
// if source.color_space == DataColorSpace::Cmyk
// && (options.rendering_intent == RenderingIntent::Perceptual
// || options.rendering_intent == RenderingIntent::RelativeColorimetric)
// && options.black_point_compensation
// {
// if let (Some(src_bp), Some(dst_bp)) = (
// source.detect_black_point::<GRID_SIZE>(&lut),
// dest.detect_black_point::<GRID_SIZE>(&lut),
// ) {
// compensate_bpc_in_lut(&mut lut, src_bp, dst_bp);
// }
// }
if dest.pcs == DataColorSpace::Lab {
let lab_to_xyz_stage = StageXyzToLab::default();
lab_to_xyz_stage.transform(&mut lut)?;
}
pcs_lab_v4_to_v2(dest, &mut lut);
if dest.pcs == DataColorSpace::Xyz {
if dest.is_matrix_shaper() {
prepare_inverse_lut_rgb_xyz::<T, BIT_DEPTH, GAMMA_LUT>(dest, &mut lut, options)?;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
} else if dest.pcs == DataColorSpace::Lab {
let pcs_to_device = dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
match pcs_to_device {
LutWarehouse::Lut(lut_data_type) => {
lut = create_lut3x3(lut_data_type, &lut, options, dest.pcs)?
}
LutWarehouse::Multidimensional(mab) => {
prepare_mba_3x3(mab, &mut lut, options, dest.pcs)?
}
}
}
let is_dest_linear_profile = dest.color_space == DataColorSpace::Rgb
&& dest.is_matrix_shaper()
&& dest.is_linear_matrix_shaper();
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma")
{
return Ok(make_transformer_4x3_avx_fma::<T, GRID_SIZE, BIT_DEPTH>(
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
));
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
if std::arch::is_x86_feature_detected!("sse4.1") {
return Ok(make_transformer_4x3_sse41::<T, GRID_SIZE, BIT_DEPTH>(
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
));
}
Ok(make_transformer_4x3::<T, GRID_SIZE, BIT_DEPTH>(
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
))
} else if (source.color_space == DataColorSpace::Rgb
|| source.color_space == DataColorSpace::Lab)
&& (dest.color_space == DataColorSpace::Cmyk || dest.color_space == DataColorSpace::Color4)
{
source.color_space.check_layout(src_layout)?;
dest.color_space.check_layout(dst_layout)?;
if source.pcs != DataColorSpace::Xyz && source.pcs != DataColorSpace::Lab {
return Err(CmsError::UnsupportedProfileConnection);
}
const GRID_SIZE: usize = 33;
let mut lut: Vec<f32>;
if source.has_device_to_pcs_lut() {
let device_to_pcs = source
.get_device_to_pcs(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
lut = create_lut3_samples_norm::<GRID_SIZE>();
match device_to_pcs {
LutWarehouse::Lut(lut_data_type) => {
lut = create_lut3x3(lut_data_type, &lut, options, source.pcs)?;
}
LutWarehouse::Multidimensional(mab) => {
prepare_mab_3x3(mab, &mut lut, options, source.pcs)?
}
}
} else if source.is_matrix_shaper() {
lut = create_rgb_lin_lut::<T, BIT_DEPTH, LINEAR_CAP, GRID_SIZE>(source, options)?;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
pcs_lab_v2_to_v4(source, &mut lut);
if source.pcs == DataColorSpace::Xyz && dest.pcs == DataColorSpace::Lab {
let xyz_to_lab = StageXyzToLab::default();
xyz_to_lab.transform(&mut lut)?;
} else if source.pcs == DataColorSpace::Lab && dest.pcs == DataColorSpace::Xyz {
let lab_to_xyz_stage = StageLabToXyz::default();
lab_to_xyz_stage.transform(&mut lut)?;
}
pcs_lab_v4_to_v2(dest, &mut lut);
let lut = match dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?
{
LutWarehouse::Lut(lut_type) => create_lut3x4(lut_type, &lut, options, dest.pcs)?,
LutWarehouse::Multidimensional(m_curves) => {
prepare_mba_3x4(m_curves, &mut lut, options, dest.pcs)?
}
};
let is_dest_linear_profile = dest.color_space == DataColorSpace::Rgb
&& dest.is_matrix_shaper()
&& dest.is_linear_matrix_shaper();
Ok(make_transform_3x4::<T, GRID_SIZE, BIT_DEPTH>(
src_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
))
} else if (source.color_space.is_three_channels()) && (dest.color_space.is_three_channels()) {
source.color_space.check_layout(src_layout)?;
dest.color_space.check_layout(dst_layout)?;
const GRID_SIZE: usize = 33;
let is_katana_required_for_source = if source.is_matrix_shaper() {
false
} else {
source
.get_device_to_pcs(options.rendering_intent)
.ok_or(CmsError::UnsupportedLutRenderingIntent(
source.rendering_intent,
))
.map(|x| x.is_katana_required())?
};
let is_katana_required_for_destination =
if source.is_matrix_shaper() || dest.pcs == DataColorSpace::Xyz {
false
} else if dest.pcs == DataColorSpace::Lab {
dest.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)
.map(|x| x.is_katana_required())?
} else {
return Err(CmsError::UnsupportedProfileConnection);
};
let mut stages: Vec<Box<KatanaDefaultIntermediate>> = Vec::new();
// Slow and accurate fallback if anything not acceptable is detected by curve analysis
if is_katana_required_for_source || is_katana_required_for_destination {
let source_stage: Box<dyn KatanaInitialStage<f32, T> + Send + Sync> =
if source.is_matrix_shaper() {
let state = katana_create_rgb_lin_lut::<T, BIT_DEPTH, LINEAR_CAP>(
src_layout, source, options,
)?;
stages.extend(state.stages);
state.initial_stage
} else {
match source.get_device_to_pcs(options.rendering_intent).ok_or(
CmsError::UnsupportedLutRenderingIntent(source.rendering_intent),
)? {
LutWarehouse::Lut(lut) => {
katana_input_stage_lut_3x3::<T>(lut, options, source.pcs, BIT_DEPTH)?
}
LutWarehouse::Multidimensional(mab) => {
multi_dimensional_3x3_to_pcs::<T>(mab, options, source.pcs, BIT_DEPTH)?
}
}
};
stages.push(katana_pcs_lab_v2_to_v4(source));
if source.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageLabToXyz::default()));
}
if dest.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageXyzToLab::default()));
}
stages.push(katana_pcs_lab_v4_to_v2(dest));
let final_stage = if dest.has_pcs_to_device_lut() {
let pcs_to_device = dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
match pcs_to_device {
LutWarehouse::Lut(lut) => {
katana_output_stage_lut_3x3::<T>(lut, options, dest.pcs, BIT_DEPTH)?
}
LutWarehouse::Multidimensional(mab) => {
multi_dimensional_3x3_to_device::<T>(mab, options, dest.pcs, BIT_DEPTH)?
}
}
} else if dest.is_matrix_shaper() {
let state = katana_prepare_inverse_lut_rgb_xyz::<T, BIT_DEPTH, GAMMA_LUT>(
dest, dst_layout, options,
)?;
stages.extend(state.stages);
state.final_stage
} else {
return Err(CmsError::UnsupportedProfileConnection);
};
let mut post_finalization: Vec<Box<dyn KatanaPostFinalizationStage<T> + Send + Sync>> =
Vec::new();
if let Some(stage) =
prepare_alpha_finalizer::<T>(src_layout, source, dst_layout, dest, BIT_DEPTH)
{
post_finalization.push(stage);
}
return Ok(Box::new(Katana::<f32, T> {
initial_stage: source_stage,
final_stage,
stages,
post_finalization,
}));
}
let mut lut: Vec<f32>;
if source.has_device_to_pcs_lut() {
let device_to_pcs = source
.get_device_to_pcs(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
lut = create_lut3_samples_norm::<GRID_SIZE>();
match device_to_pcs {
LutWarehouse::Lut(lut_data_type) => {
lut = create_lut3x3(lut_data_type, &lut, options, source.pcs)?;
}
LutWarehouse::Multidimensional(mab) => {
prepare_mab_3x3(mab, &mut lut, options, source.pcs)?
}
}
} else if source.is_matrix_shaper() {
lut = create_rgb_lin_lut::<T, BIT_DEPTH, LINEAR_CAP, GRID_SIZE>(source, options)?;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
pcs_lab_v2_to_v4(source, &mut lut);
if source.pcs == DataColorSpace::Xyz && dest.pcs == DataColorSpace::Lab {
let xyz_to_lab = StageXyzToLab::default();
xyz_to_lab.transform(&mut lut)?;
} else if source.pcs == DataColorSpace::Lab && dest.pcs == DataColorSpace::Xyz {
let lab_to_xyz_stage = StageLabToXyz::default();
lab_to_xyz_stage.transform(&mut lut)?;
}
pcs_lab_v4_to_v2(dest, &mut lut);
if dest.has_pcs_to_device_lut() {
let pcs_to_device = dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
match pcs_to_device {
LutWarehouse::Lut(lut_data_type) => {
lut = create_lut3x3(lut_data_type, &lut, options, dest.pcs)?;
}
LutWarehouse::Multidimensional(mab) => {
prepare_mba_3x3(mab, &mut lut, options, dest.pcs)?
}
}
} else if dest.is_matrix_shaper() {
prepare_inverse_lut_rgb_xyz::<T, BIT_DEPTH, GAMMA_LUT>(dest, &mut lut, options)?;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
let is_dest_linear_profile = dest.color_space == DataColorSpace::Rgb
&& dest.is_matrix_shaper()
&& dest.is_linear_matrix_shaper();
#[cfg(all(feature = "avx", target_arch = "x86_64"))]
if std::arch::is_x86_feature_detected!("avx2") && std::is_x86_feature_detected!("fma") {
return Ok(make_transformer_3x3_avx_fma::<T, GRID_SIZE, BIT_DEPTH>(
src_layout,
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
));
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
if std::arch::is_x86_feature_detected!("sse4.1") {
return Ok(make_transformer_3x3_sse41::<T, GRID_SIZE, BIT_DEPTH>(
src_layout,
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
));
}
Ok(make_transformer_3x3::<T, GRID_SIZE, BIT_DEPTH>(
src_layout,
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
))
} else {
do_any_to_any::<T, BIT_DEPTH, LINEAR_CAP, GAMMA_LUT>(
src_layout, source, dst_layout, dest, options,
)
}
}

730
vendor/moxcms/src/conversions/mab.rs vendored Normal file
View File

@@ -0,0 +1,730 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::mlaf::mlaf;
use crate::safe_math::SafeMul;
use crate::{
CmsError, Cube, DataColorSpace, InPlaceStage, InterpolationMethod, LutMultidimensionalType,
MalformedSize, Matrix3d, Matrix3f, TransformOptions, Vector3d, Vector3f,
};
#[allow(unused)]
struct ACurves3<'a, const DEPTH: usize> {
curve0: Box<[f32; 65536]>,
curve1: Box<[f32; 65536]>,
curve2: Box<[f32; 65536]>,
clut: &'a [f32],
grid_size: [u8; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(unused)]
struct ACurves3Optimized<'a> {
clut: &'a [f32],
grid_size: [u8; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(unused)]
impl<const DEPTH: usize> ACurves3<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
for dst in dst.chunks_exact_mut(3) {
let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
let interpolated = fetch(b0, b1, b2);
dst[0] = interpolated.v[0];
dst[1] = interpolated.v[1];
dst[2] = interpolated.v[2];
}
Ok(())
}
}
#[allow(unused)]
impl ACurves3Optimized<'_> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
for dst in dst.chunks_exact_mut(3) {
let a0 = dst[0];
let a1 = dst[1];
let a2 = dst[2];
let interpolated = fetch(a0, a1, a2);
dst[0] = interpolated.v[0];
dst[1] = interpolated.v[1];
dst[2] = interpolated.v[2];
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Cube::new_cube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
impl InPlaceStage for ACurves3Optimized<'_> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Cube::new_cube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
#[allow(unused)]
struct ACurves3Inverse<'a, const DEPTH: usize> {
curve0: Box<[f32; 65536]>,
curve1: Box<[f32; 65536]>,
curve2: Box<[f32; 65536]>,
clut: &'a [f32],
grid_size: [u8; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(unused)]
impl<const DEPTH: usize> ACurves3Inverse<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH as u32 - 1u32) as f32;
for dst in dst.chunks_exact_mut(3) {
let interpolated = fetch(dst[0], dst[1], dst[2]);
let a0 = (interpolated.v[0] * scale_value).round().min(scale_value) as u16;
let a1 = (interpolated.v[1] * scale_value).round().min(scale_value) as u16;
let a2 = (interpolated.v[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3Inverse<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Cube::new_cube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
pub(crate) struct MCurves3<const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) matrix: Matrix3f,
pub(crate) bias: Vector3f,
pub(crate) inverse: bool,
}
impl<const DEPTH: usize> MCurves3<DEPTH> {
fn execute_matrix_stage(&self, dst: &mut [f32]) {
let m = self.matrix;
let b = self.bias;
if !m.test_equality(Matrix3f::IDENTITY) || !b.eq(&Vector3f::default()) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(mlaf(b.v[0], x, m.v[0][0]), y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(mlaf(b.v[1], x, m.v[1][0]), y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(mlaf(b.v[2], x, m.v[2][0]), y, m.v[2][1]), z, m.v[2][2]);
}
}
}
}
impl<const DEPTH: usize> InPlaceStage for MCurves3<DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
if self.inverse {
self.execute_matrix_stage(dst);
}
for dst in dst.chunks_exact_mut(3) {
let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
if !self.inverse {
self.execute_matrix_stage(dst);
}
Ok(())
}
}
pub(crate) struct BCurves3<const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
}
impl<const DEPTH: usize> InPlaceStage for BCurves3<DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
for dst in dst.chunks_exact_mut(3) {
let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
Ok(())
}
}
pub(crate) fn prepare_mab_3x3(
mab: &LutMultidimensionalType,
lut: &mut [f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<(), CmsError> {
const LERP_DEPTH: usize = 65536;
const BP: usize = 13;
const DEPTH: usize = 8192;
if mab.num_input_channels != 3 && mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.a_curves.len() == 3 && mab.clut.is_some() {
let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear());
let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]];
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
if all_curves_linear {
use crate::conversions::neon::ACurves3OptimizedNeon;
let a_curves = ACurves3OptimizedNeon {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut)?;
} else {
use crate::conversions::neon::ACurves3Neon;
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let a_curves = ACurves3Neon::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut)?;
}
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
{
let mut execution_box: Option<Box<dyn InPlaceStage>> = None;
if all_curves_linear {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves3OptimizedAvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
execution_box = Some(Box::new(ACurves3OptimizedAvxFma {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
execution_box = Some(Box::new(ACurves3Optimized {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
} else {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves3AvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves3AvxFma::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves3::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
execution_box
.expect("LUT Sampler on Multidimensional 3x3 must be set")
.transform(lut)?;
}
}
if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let curves: Result<Vec<_>, _> = mab
.m_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let matrix = mab.matrix.to_f32();
let bias: Vector3f = mab.bias.cast();
let m_curves = MCurves3::<DEPTH> {
curve0,
curve1,
curve2,
matrix,
bias,
inverse: false,
};
m_curves.transform(lut)?;
}
}
if mab.b_curves.len() == 3 {
const LERP_DEPTH: usize = 65536;
const BP: usize = 13;
const DEPTH: usize = 8192;
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear {
let curves: Result<Vec<_>, _> = mab
.b_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let b_curves = BCurves3::<DEPTH> {
curve0,
curve1,
curve2,
};
b_curves.transform(lut)?;
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
Ok(())
}
pub(crate) fn prepare_mba_3x3(
mab: &LutMultidimensionalType,
lut: &mut [f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<(), CmsError> {
if mab.num_input_channels != 3 && mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
const LERP_DEPTH: usize = 65536;
const BP: usize = 13;
const DEPTH: usize = 8192;
if mab.b_curves.len() == 3 {
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear {
let curves: Result<Vec<_>, _> = mab
.b_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let b_curves = BCurves3::<DEPTH> {
curve0,
curve1,
curve2,
};
b_curves.transform(lut)?;
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let curves: Result<Vec<_>, _> = mab
.m_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let matrix = mab.matrix.to_f32();
let bias: Vector3f = mab.bias.cast();
let m_curves = MCurves3::<DEPTH> {
curve0,
curve1,
curve2,
matrix,
bias,
inverse: true,
};
m_curves.transform(lut)?;
}
}
if mab.a_curves.len() == 3 && mab.clut.is_some() {
let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear());
let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]];
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
if all_curves_linear {
use crate::conversions::neon::ACurves3OptimizedNeon;
let a_curves = ACurves3OptimizedNeon {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut)?;
} else {
use crate::conversions::neon::ACurves3InverseNeon;
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let a_curves = ACurves3InverseNeon::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut)?;
}
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
{
let mut execution_box: Option<Box<dyn InPlaceStage>> = None;
if all_curves_linear {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves3OptimizedAvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
execution_box = Some(Box::new(ACurves3OptimizedAvxFma {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
execution_box = Some(Box::new(ACurves3Optimized {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
} else {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves3InverseAvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves3InverseAvxFma::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves3Inverse::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
execution_box
.expect("LUT Sampler on Multidimensional Inverse 3x3 must be set")
.transform(lut)?;
}
}
Ok(())
}

394
vendor/moxcms/src/conversions/mab4x3.rs vendored Normal file
View File

@@ -0,0 +1,394 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::mab::{BCurves3, MCurves3};
use crate::safe_math::SafeMul;
use crate::{
CmsError, DataColorSpace, Hypercube, InPlaceStage, InterpolationMethod,
LutMultidimensionalType, MalformedSize, Matrix3d, Stage, TransformOptions, Vector3d, Vector3f,
};
#[allow(dead_code)]
struct ACurves4x3<'a, const DEPTH: usize> {
curve0: Box<[f32; 65536]>,
curve1: Box<[f32; 65536]>,
curve2: Box<[f32; 65536]>,
curve3: Box<[f32; 65536]>,
clut: &'a [f32],
grid_size: [u8; 4],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(dead_code)]
struct ACurves4x3Optimized<'a> {
clut: &'a [f32],
grid_size: [u8; 4],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(dead_code)]
impl<const DEPTH: usize> ACurves4x3<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
assert_eq!(src.len() / 4, dst.len() / 3);
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let a0 = (src[0] * scale_value).round().min(scale_value) as u16;
let a1 = (src[1] * scale_value).round().min(scale_value) as u16;
let a2 = (src[2] * scale_value).round().min(scale_value) as u16;
let a3 = (src[3] * scale_value).round().min(scale_value) as u16;
let c = self.curve0[a0 as usize];
let m = self.curve1[a1 as usize];
let y = self.curve2[a2 as usize];
let k = self.curve3[a3 as usize];
let r = fetch(c, m, y, k);
dst[0] = r.v[0];
dst[1] = r.v[1];
dst[2] = r.v[2];
}
Ok(())
}
}
#[allow(dead_code)]
impl ACurves4x3Optimized<'_> {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
assert_eq!(src.len() / 4, dst.len() / 3);
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let c = src[0];
let m = src[1];
let y = src[2];
let k = src[3];
let r = fetch(c, m, y, k);
dst[0] = r.v[0];
dst[1] = r.v[1];
dst[2] = r.v[2];
}
Ok(())
}
}
impl<const DEPTH: usize> Stage for ACurves4x3<'_, DEPTH> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Hypercube::new_hypercube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
Ok(())
}
}
impl Stage for ACurves4x3Optimized<'_> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Hypercube::new_hypercube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
Ok(())
}
}
pub(crate) fn prepare_mab_4x3(
mab: &LutMultidimensionalType,
lut: &mut [f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Vec<f32>, CmsError> {
const LERP_DEPTH: usize = 65536;
const BP: usize = 13;
const DEPTH: usize = 8192;
if mab.num_input_channels != 4 && mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
let mut new_lut = vec![0f32; (lut.len() / 4) * 3];
if mab.a_curves.len() == 4 && mab.clut.is_some() {
let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.grid_points[3] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear());
let grid_size = [
mab.grid_points[0],
mab.grid_points[1],
mab.grid_points[2],
mab.grid_points[3],
];
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
if all_curves_linear {
use crate::conversions::neon::ACurves4x3NeonOptimizedNeon;
let a_curves = ACurves4x3NeonOptimizedNeon {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut, &mut new_lut)?;
} else {
use crate::conversions::neon::ACurves4x3Neon;
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2, curve3] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let a_curves = ACurves4x3Neon::<DEPTH> {
curve0,
curve1,
curve2,
curve3,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut, &mut new_lut)?;
}
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
{
let mut execution_box: Option<Box<dyn Stage>> = None;
if all_curves_linear {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves4x3AvxFmaOptimized;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
execution_box = Some(Box::new(ACurves4x3AvxFmaOptimized {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
execution_box = Some(Box::new(ACurves4x3Optimized {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
} else {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves4x3AvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2, curve3] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves4x3AvxFma::<DEPTH> {
curve0,
curve1,
curve2,
curve3,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2, curve3] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves4x3::<DEPTH> {
curve0,
curve1,
curve2,
curve3,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
execution_box
.expect("Sampler for Multidimensional 4x3 must be set")
.transform(lut, &mut new_lut)?;
}
} else {
// Not supported
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let curves: Result<Vec<_>, _> = mab
.m_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let matrix = mab.matrix.to_f32();
let bias: Vector3f = mab.bias.cast();
let m_curves = MCurves3::<DEPTH> {
curve0,
curve1,
curve2,
matrix,
bias,
inverse: false,
};
m_curves.transform(&mut new_lut)?;
}
}
if mab.b_curves.len() == 3 {
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear {
let curves: Result<Vec<_>, _> = mab
.b_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let b_curves = BCurves3::<DEPTH> {
curve0,
curve1,
curve2,
};
b_curves.transform(&mut new_lut)?;
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
Ok(new_lut)
}

298
vendor/moxcms/src/conversions/mba3x4.rs vendored Normal file
View File

@@ -0,0 +1,298 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::mab::{BCurves3, MCurves3};
use crate::safe_math::SafeMul;
use crate::{
CmsError, Cube, DataColorSpace, InPlaceStage, InterpolationMethod, LutMultidimensionalType,
MalformedSize, Matrix3d, Stage, TransformOptions, Vector3d, Vector4f,
};
struct ACurves3x4Inverse<'a, const DEPTH: usize> {
curve0: Box<[f32; 65536]>,
curve1: Box<[f32; 65536]>,
curve2: Box<[f32; 65536]>,
curve3: Box<[f32; 65536]>,
clut: &'a [f32],
grid_size: [u8; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
struct ACurves3x4InverseOptimized<'a> {
clut: &'a [f32],
grid_size: [u8; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
impl<const DEPTH: usize> ACurves3x4Inverse<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector4f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH as u32 - 1u32) as f32;
assert_eq!(src.len() / 3, dst.len() / 4);
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(4)) {
let interpolated = fetch(src[0], src[1], src[2]);
let a0 = (interpolated.v[0] * scale_value).round().min(scale_value) as u16;
let a1 = (interpolated.v[1] * scale_value).round().min(scale_value) as u16;
let a2 = (interpolated.v[2] * scale_value).round().min(scale_value) as u16;
let a3 = (interpolated.v[3] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
let b3 = self.curve3[a3 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
dst[3] = b3;
}
Ok(())
}
}
impl ACurves3x4InverseOptimized<'_> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector4f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
assert_eq!(src.len() / 3, dst.len() / 4);
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(4)) {
let interpolated = fetch(src[0], src[1], src[2]);
let b0 = interpolated.v[0];
let b1 = interpolated.v[1];
let b2 = interpolated.v[2];
let b3 = interpolated.v[3];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
dst[3] = b3;
}
Ok(())
}
}
impl<const DEPTH: usize> Stage for ACurves3x4Inverse<'_, DEPTH> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Cube::new_cube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z| lut.tetra_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z| lut.pyramid_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z| lut.prism_vec4(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z))?;
}
}
Ok(())
}
}
impl Stage for ACurves3x4InverseOptimized<'_> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Cube::new_cube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z| lut.tetra_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z| lut.pyramid_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z| lut.prism_vec4(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z))?;
}
}
Ok(())
}
}
pub(crate) fn prepare_mba_3x4(
mab: &LutMultidimensionalType,
lut: &mut [f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Vec<f32>, CmsError> {
if mab.num_input_channels != 3 && mab.num_output_channels != 4 {
return Err(CmsError::UnsupportedProfileConnection);
}
const LERP_DEPTH: usize = 65536;
const BP: usize = 13;
const DEPTH: usize = 8192;
if mab.b_curves.len() == 3 {
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear {
let curves: Result<Vec<_>, _> = mab
.b_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let b_curves = BCurves3::<DEPTH> {
curve0,
curve1,
curve2,
};
b_curves.transform(lut)?;
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let curves: Result<Vec<_>, _> = mab
.m_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let matrix = mab.matrix.to_f32();
let bias = mab.bias.cast();
let m_curves = MCurves3::<DEPTH> {
curve0,
curve1,
curve2,
matrix,
bias,
inverse: true,
};
m_curves.transform(lut)?;
}
}
let mut new_lut = vec![0f32; (lut.len() / 3) * 4];
if mab.a_curves.len() == 4 && mab.clut.is_some() {
let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]];
let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear());
if all_curves_linear {
let a_curves = ACurves3x4InverseOptimized {
clut,
grid_size: [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]],
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut, &mut new_lut)?;
} else {
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2, curve3] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let a_curves = ACurves3x4Inverse::<DEPTH> {
curve0,
curve1,
curve2,
curve3,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut, &mut new_lut)?;
}
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
Ok(new_lut)
}

728
vendor/moxcms/src/conversions/md_lut.rs vendored Normal file
View File

@@ -0,0 +1,728 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use crate::mlaf::{mlaf, neg_mlaf};
use crate::nd_array::{ArrayFetch, lerp};
use crate::{Vector3f, Vector3i};
use num_traits::MulAdd;
use std::array::from_fn;
use std::marker::PhantomData;
use std::ops::{Add, Mul, Neg, Sub};
pub(crate) struct MultidimensionalLut {
pub(crate) grid_strides: [u32; 16],
pub(crate) grid_filling_size: [u32; 16],
pub(crate) grid_scale: [f32; 16],
pub(crate) output_inks: usize,
}
struct FastCube<T, F: ArrayFetch<T>> {
fetch: F,
_phantom: PhantomData<T>,
}
struct ArrayFetchVectorN<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
output_inks: usize,
}
#[repr(transparent)]
#[derive(Copy, Clone, Debug)]
pub(crate) struct NVector<T, const N: usize> {
pub(crate) v: [T; N],
}
impl<T: Copy, const N: usize> NVector<T, N> {
pub(crate) fn from_slice(v: &[T; N]) -> Self {
Self { v: *v }
}
}
impl<T: Copy, const N: usize> From<T> for NVector<T, N> {
#[inline]
fn from(value: T) -> Self {
Self { v: [value; N] }
}
}
impl<T: Copy + Add<T, Output = T> + Mul<T, Output = T> + MulAdd<T, Output = T>, const N: usize>
FusedMultiplyAdd<NVector<T, N>> for NVector<T, N>
{
#[inline]
fn mla(&self, b: NVector<T, N>, c: NVector<T, N>) -> NVector<T, N> {
Self {
v: from_fn(|i| mlaf(self.v[i], b.v[i], c.v[i])),
}
}
}
impl<
T: Copy + Add<T, Output = T> + Mul<T, Output = T> + MulAdd<T, Output = T> + Neg<Output = T>,
const N: usize,
> FusedMultiplyNegAdd<NVector<T, N>> for NVector<T, N>
{
#[inline]
fn neg_mla(&self, b: NVector<T, N>, c: NVector<T, N>) -> NVector<T, N> {
Self {
v: from_fn(|i| neg_mlaf(self.v[i], b.v[i], c.v[i])),
}
}
}
impl<T: Sub<Output = T> + Default + Copy, const N: usize> Sub<NVector<T, N>> for NVector<T, N> {
type Output = Self;
#[inline]
fn sub(self, rhs: NVector<T, N>) -> Self::Output {
Self {
v: from_fn(|i| self.v[i] - rhs.v[i]),
}
}
}
impl<T: Add<Output = T> + Default + Copy, const N: usize> Add<NVector<T, N>> for NVector<T, N> {
type Output = Self;
#[inline]
fn add(self, rhs: NVector<T, N>) -> Self::Output {
Self {
v: from_fn(|i| self.v[i] + rhs.v[i]),
}
}
}
impl<T: Mul<Output = T> + Default + Copy, const N: usize> Mul<NVector<T, N>> for NVector<T, N> {
type Output = Self;
#[inline]
fn mul(self, rhs: NVector<T, N>) -> Self::Output {
Self {
v: from_fn(|i| self.v[i] * rhs.v[i]),
}
}
}
impl<const N: usize> ArrayFetch<NVector<f32, N>> for ArrayFetchVectorN<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> NVector<f32, N> {
let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32 * self.z_stride)
as usize
* self.output_inks;
let k = &self.array[start..start + N];
NVector::<f32, N>::from_slice(k.try_into().unwrap())
}
}
impl<T, F: ArrayFetch<T>> FastCube<T, F>
where
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyNegAdd<T>
+ FusedMultiplyAdd<T>,
{
#[inline(always)]
fn tetra(&self, src: Vector3i, src_next: Vector3i, w: Vector3f) -> T {
let x = src.v[0];
let y = src.v[1];
let z = src.v[2];
let x_n = src_next.v[0];
let y_n = src_next.v[1];
let z_n = src_next.v[2];
let rx = w.v[0];
let ry = w.v[1];
let rz = w.v[2];
let c0 = self.fetch.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = self.fetch.fetch(x_n, y, z) - c0;
c2 = self.fetch.fetch(x_n, y_n, z) - self.fetch.fetch(x_n, y, z);
c3 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = self.fetch.fetch(x_n, y, z) - c0;
c2 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y, z_n);
c3 = self.fetch.fetch(x_n, y, z_n) - self.fetch.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = self.fetch.fetch(x_n, y, z_n) - self.fetch.fetch(x, y, z_n);
c2 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y, z_n);
c3 = self.fetch.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = self.fetch.fetch(x_n, y_n, z) - self.fetch.fetch(x, y_n, z);
c2 = self.fetch.fetch(x, y_n, z) - c0;
c3 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x, y_n, z_n);
c2 = self.fetch.fetch(x, y_n, z) - c0;
c3 = self.fetch.fetch(x, y_n, z_n) - self.fetch.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x, y_n, z_n);
c2 = self.fetch.fetch(x, y_n, z_n) - self.fetch.fetch(x, y, z_n);
c3 = self.fetch.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
s1.mla(c3, T::from(rz))
}
}
impl MultidimensionalLut {
pub(crate) fn new(grid_size: [u8; 16], input_inks: usize, output_inks: usize) -> Self {
assert!(input_inks <= 16);
let mut grid_strides = [1u32; 16];
let mut grid_filling_size = [1u32; 16];
for (ink, dst_stride) in grid_strides.iter_mut().take(input_inks - 1).enumerate() {
let mut stride = 1u32;
let how_many = input_inks.saturating_sub(ink).saturating_sub(1);
for &grid_stride in grid_size.iter().take(how_many) {
stride *= grid_stride as u32;
}
*dst_stride = stride;
}
for (ink, dst_stride) in grid_filling_size.iter_mut().take(input_inks).enumerate() {
let mut stride = output_inks as u32;
let how_many = input_inks.saturating_sub(ink).saturating_sub(1);
for &grid_stride in grid_size.iter().take(how_many) {
stride *= grid_stride as u32;
}
*dst_stride = stride;
}
let mut grid_strides_f = [0f32; 16];
for (dst, src) in grid_strides_f
.iter_mut()
.zip(grid_size.iter())
.take(input_inks)
{
*dst = (*src - 1) as f32;
}
Self {
grid_strides,
grid_scale: grid_strides_f,
grid_filling_size,
output_inks,
}
}
}
pub(crate) fn linear_4i_vec3f_direct<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
lx: f32,
ly: f32,
lz: f32,
lw: f32,
) -> NVector<f32, N> {
let lin_x = lx.max(0.0).min(1.0);
let lin_y = ly.max(0.0).min(1.0);
let lin_z = lz.max(0.0).min(1.0);
let lin_w = lw.max(0.0).min(1.0);
let scale_x = lut.grid_scale[0];
let scale_y = lut.grid_scale[1];
let scale_z = lut.grid_scale[2];
let scale_w = lut.grid_scale[3];
let lx = lin_x * scale_x;
let ly = lin_y * scale_y;
let lz = lin_z * scale_z;
let lw = lin_w * scale_w;
let x = lx.floor() as i32;
let y = ly.floor() as i32;
let z = lz.floor() as i32;
let w = lw.floor() as i32;
let src_x = Vector3i { v: [x, y, z] };
let x_n = lx.ceil() as i32;
let y_n = ly.ceil() as i32;
let z_n = lz.ceil() as i32;
let w_n = lw.ceil() as i32;
let src_next = Vector3i { v: [x_n, y_n, z_n] };
let x_w = lx - x as f32;
let y_w = ly - y as f32;
let z_w = lz - z as f32;
let w_w = lw - w as f32;
let weights = Vector3f { v: [x_w, y_w, z_w] };
let cube0 = &arr[(w as usize * lut.grid_filling_size[3] as usize)..];
let cube1 = &arr[(w_n as usize * lut.grid_filling_size[3] as usize)..];
let fast_cube0 = FastCube {
fetch: ArrayFetchVectorN {
array: cube0,
x_stride: lut.grid_strides[0],
y_stride: lut.grid_strides[1],
z_stride: lut.grid_strides[2],
output_inks: lut.output_inks,
},
_phantom: PhantomData,
};
let fast_cube1 = FastCube {
fetch: ArrayFetchVectorN {
array: cube1,
x_stride: lut.grid_strides[0],
y_stride: lut.grid_strides[1],
z_stride: lut.grid_strides[2],
output_inks: lut.output_inks,
},
_phantom: PhantomData,
};
let w0 = fast_cube0.tetra(src_x, src_next, weights);
let w1 = fast_cube1.tetra(src_x, src_next, weights);
lerp(w0, w1, NVector::<f32, N>::from(w_w))
}
pub(crate) fn linear_3i_vec3f_direct<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
linear_3i_vec3f(lut, arr, inputs[0], inputs[1], inputs[2])
}
fn linear_3i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
x: f32,
y: f32,
z: f32,
) -> NVector<f32, N> {
let lin_x = x.max(0.0).min(1.0);
let lin_y = y.max(0.0).min(1.0);
let lin_z = z.max(0.0).min(1.0);
let scale_x = lut.grid_scale[0];
let scale_y = lut.grid_scale[1];
let scale_z = lut.grid_scale[2];
let lx = lin_x * scale_x;
let ly = lin_y * scale_y;
let lz = lin_z * scale_z;
let x = lx.floor() as i32;
let y = ly.floor() as i32;
let z = lz.floor() as i32;
let src_x = Vector3i { v: [x, y, z] };
let x_n = lx.ceil() as i32;
let y_n = ly.ceil() as i32;
let z_n = lz.ceil() as i32;
let src_next = Vector3i { v: [x_n, y_n, z_n] };
let x_w = lx - x as f32;
let y_w = ly - y as f32;
let z_w = lz - z as f32;
let weights = Vector3f { v: [x_w, y_w, z_w] };
let fast_cube = FastCube {
fetch: ArrayFetchVectorN {
array: arr,
x_stride: lut.grid_strides[0],
y_stride: lut.grid_strides[1],
z_stride: lut.grid_strides[2],
output_inks: lut.output_inks,
},
_phantom: PhantomData,
};
fast_cube.tetra(src_x, src_next, weights)
}
pub(crate) fn linear_1i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let lin_x = inputs[0].max(0.0).min(1.0);
let scale_x = lut.grid_scale[0];
let lx = lin_x * scale_x;
let x = lx.floor() as i32;
let x_n = lx.ceil() as i32;
let x_w = lx - x as f32;
let x_stride = lut.grid_strides[0];
let offset = |xi: i32| -> usize { (xi as u32 * x_stride) as usize * lut.output_inks };
// Sample 2 corners
let a = NVector::<f32, N>::from_slice(&arr[offset(x)..][..N].try_into().unwrap());
let b = NVector::<f32, N>::from_slice(&arr[offset(x_n)..][..N].try_into().unwrap());
a * NVector::<f32, N>::from(1.0 - x_w) + b * NVector::<f32, N>::from(x_w)
}
pub(crate) fn linear_2i_vec3f_direct<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
linear_2i_vec3f(lut, arr, inputs[0], inputs[1])
}
fn linear_2i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
x: f32,
y: f32,
) -> NVector<f32, N> {
let lin_x = x.max(0.0).min(1.0);
let lin_y = y.max(0.0).min(1.0);
let scale_x = lut.grid_scale[0];
let scale_y = lut.grid_scale[1];
let lx = lin_x * scale_x;
let ly = lin_y * scale_y;
let x = lx.floor() as i32;
let y = ly.floor() as i32;
let x_n = lx.ceil() as i32;
let y_n = ly.ceil() as i32;
let x_w = lx - x as f32;
let y_w = ly - y as f32;
let x_stride = lut.grid_strides[0];
let y_stride = lut.grid_strides[1];
let offset = |xi: i32, yi: i32| -> usize {
(xi as u32 * x_stride + yi as u32 * y_stride) as usize * lut.output_inks
};
// Sample 4 corners
let a = NVector::<f32, N>::from_slice(&arr[offset(x, y)..][..N].try_into().unwrap());
let b = NVector::<f32, N>::from_slice(&arr[offset(x_n, y)..][..N].try_into().unwrap());
let c = NVector::<f32, N>::from_slice(&arr[offset(x, y_n)..][..N].try_into().unwrap());
let d = NVector::<f32, N>::from_slice(&arr[offset(x_n, y_n)..][..N].try_into().unwrap());
let ab = a * NVector::<f32, N>::from(1.0 - x_w) + b * NVector::<f32, N>::from(x_w);
let cd = c * NVector::<f32, N>::from(1.0 - x_w) + d * NVector::<f32, N>::from(x_w);
ab * NVector::<f32, N>::from(1.0 - y_w) + cd * NVector::<f32, N>::from(y_w)
}
pub(crate) fn linear_4i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
linear_4i_vec3f_direct(lut, arr, inputs[0], inputs[1], inputs[2], inputs[3])
}
type FHandle<const N: usize> = fn(&MultidimensionalLut, &[f32], &[f32]) -> NVector<f32, N>;
#[inline(never)]
pub(crate) fn linear_n_i_vec3f<
const N: usize,
const I: usize,
Handle: Fn(&MultidimensionalLut, &[f32], &[f32]) -> NVector<f32, N>,
>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
handle: Handle,
) -> NVector<f32, N> {
let lin_w = inputs[I];
let w_c = lin_w.max(0.).min(1.);
let scale_p = lut.grid_scale[I];
let wf = w_c * scale_p;
let w0 = wf.min(scale_p) as usize;
let w1 = (wf + 1.).min(scale_p) as usize;
let w = wf - w0 as f32;
let cube0 = &arr[(w0 * lut.grid_filling_size[I] as usize)..];
let cube1 = &arr[(w1 * lut.grid_filling_size[I] as usize)..];
let inputs_sliced = &inputs[0..I];
let w0 = handle(lut, cube0, inputs_sliced);
let w1 = handle(lut, cube1, inputs_sliced);
lerp(w0, w1, NVector::<f32, N>::from(w))
}
#[inline(never)]
pub(crate) fn linear_5i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let lin_w = inputs[4];
let w_c = lin_w.max(0.).min(1.);
let scale_p = lut.grid_scale[4];
let wf = w_c * scale_p;
let w0 = wf.min(scale_p) as usize;
let w1 = (wf + 1.).min(scale_p) as usize;
let w = wf - w0 as f32;
let cube0 = &arr[(w0 * lut.grid_filling_size[4] as usize)..];
let cube1 = &arr[(w1 * lut.grid_filling_size[4] as usize)..];
let w0 = linear_4i_vec3f_direct(lut, cube0, inputs[0], inputs[1], inputs[2], inputs[3]);
let w1 = linear_4i_vec3f_direct(lut, cube1, inputs[0], inputs[1], inputs[2], inputs[3]);
lerp(w0, w1, NVector::<f32, N>::from(w))
}
#[inline(never)]
pub(crate) fn linear_6i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_5i_vec3f::<N>;
linear_n_i_vec3f::<N, 5, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_7i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_6i_vec3f::<N>;
linear_n_i_vec3f::<N, 6, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_8i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_7i_vec3f::<N>;
linear_n_i_vec3f::<N, 7, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_9i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_8i_vec3f::<N>;
linear_n_i_vec3f::<N, 8, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_10i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_9i_vec3f::<N>;
linear_n_i_vec3f::<N, 9, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_11i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_10i_vec3f::<N>;
linear_n_i_vec3f::<N, 10, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_12i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_11i_vec3f::<N>;
linear_n_i_vec3f::<N, 11, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_13i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_12i_vec3f::<N>;
linear_n_i_vec3f::<N, 12, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_14i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_13i_vec3f::<N>;
linear_n_i_vec3f::<N, 13, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_15i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_14i_vec3f::<N>;
linear_n_i_vec3f::<N, 14, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn tetra_3i_to_any_vec(
lut: &MultidimensionalLut,
arr: &[f32],
x: f32,
y: f32,
z: f32,
dst: &mut [f32],
inks: usize,
) {
match inks {
1 => {
let vec3 = linear_3i_vec3f::<1>(lut, arr, x, y, z);
dst[0] = vec3.v[0];
}
2 => {
let vec3 = linear_3i_vec3f::<2>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
3 => {
let vec3 = linear_3i_vec3f::<3>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
4 => {
let vec3 = linear_3i_vec3f::<4>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
5 => {
let vec3 = linear_3i_vec3f::<5>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
6 => {
let vec3 = linear_3i_vec3f::<6>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
7 => {
let vec3 = linear_3i_vec3f::<7>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
8 => {
let vec3 = linear_3i_vec3f::<8>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
9 => {
let vec3 = linear_3i_vec3f::<9>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
10 => {
let vec3 = linear_3i_vec3f::<10>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
11 => {
let vec3 = linear_3i_vec3f::<11>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
12 => {
let vec3 = linear_3i_vec3f::<12>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
13 => {
let vec3 = linear_3i_vec3f::<13>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
14 => {
let vec3 = linear_3i_vec3f::<14>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
15 => {
let vec3 = linear_3i_vec3f::<15>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
_ => unreachable!(),
}
}

View File

@@ -0,0 +1,190 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::katana::{
CopyAlphaStage, InjectAlphaStage, Katana, KatanaInitialStage, KatanaIntermediateStage,
KatanaPostFinalizationStage, KatanaStageLabToXyz, KatanaStageXyzToLab,
katana_create_rgb_lin_lut, katana_input_make_lut_nx3, katana_multi_dimensional_3xn_to_device,
katana_multi_dimensional_nx3_to_pcs, katana_output_make_lut_3xn, katana_pcs_lab_v2_to_v4,
katana_pcs_lab_v4_to_v2, katana_prepare_inverse_lut_rgb_xyz,
};
use crate::{
CmsError, ColorProfile, DataColorSpace, GammaLutInterpolate, Layout, LutWarehouse,
PointeeSizeExpressible, TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
pub(crate) fn do_any_to_any<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible
+ GammaLutInterpolate,
const BIT_DEPTH: usize,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
>(
src_layout: Layout,
source: &ColorProfile,
dst_layout: Layout,
dest: &ColorProfile,
options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
let mut stages: Vec<Box<dyn KatanaIntermediateStage<f32> + Send + Sync>> = Vec::new();
let initial_stage: Box<dyn KatanaInitialStage<f32, T> + Send + Sync> = match source
.is_matrix_shaper()
{
true => {
let state =
katana_create_rgb_lin_lut::<T, BIT_DEPTH, LINEAR_CAP>(src_layout, source, options)?;
stages.extend(state.stages);
state.initial_stage
}
false => match source.get_device_to_pcs(options.rendering_intent).ok_or(
CmsError::UnsupportedLutRenderingIntent(source.rendering_intent),
)? {
LutWarehouse::Lut(lut) => katana_input_make_lut_nx3::<T>(
src_layout,
src_layout.channels(),
lut,
options,
source.pcs,
BIT_DEPTH,
)?,
LutWarehouse::Multidimensional(mab) => {
katana_multi_dimensional_nx3_to_pcs::<T, BIT_DEPTH>(
src_layout, mab, options, source.pcs,
)?
}
},
};
stages.push(katana_pcs_lab_v2_to_v4(source));
if source.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageLabToXyz::default()));
}
if dest.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageXyzToLab::default()));
}
stages.push(katana_pcs_lab_v4_to_v2(dest));
let final_stage = if dest.has_pcs_to_device_lut() {
let pcs_to_device = dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
match pcs_to_device {
LutWarehouse::Lut(lut) => katana_output_make_lut_3xn::<T>(
dst_layout,
lut,
options,
dest.color_space,
BIT_DEPTH,
)?,
LutWarehouse::Multidimensional(mab) => katana_multi_dimensional_3xn_to_device::<T>(
dst_layout, mab, options, dest.pcs, BIT_DEPTH,
)?,
}
} else if dest.is_matrix_shaper() {
let state = katana_prepare_inverse_lut_rgb_xyz::<T, BIT_DEPTH, GAMMA_LUT>(
dest, dst_layout, options,
)?;
stages.extend(state.stages);
state.final_stage
} else {
return Err(CmsError::UnsupportedProfileConnection);
};
let mut post_finalization: Vec<Box<dyn KatanaPostFinalizationStage<T> + Send + Sync>> =
Vec::new();
if let Some(stage) =
prepare_alpha_finalizer::<T>(src_layout, source, dst_layout, dest, BIT_DEPTH)
{
post_finalization.push(stage);
}
Ok(Box::new(Katana::<f32, T> {
initial_stage,
final_stage,
stages,
post_finalization,
}))
}
pub(crate) fn prepare_alpha_finalizer<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible
+ GammaLutInterpolate,
>(
src_layout: Layout,
source: &ColorProfile,
dst_layout: Layout,
dest: &ColorProfile,
bit_depth: usize,
) -> Option<Box<dyn KatanaPostFinalizationStage<T> + Send + Sync>>
where
f32: AsPrimitive<T>,
{
if (dst_layout == Layout::GrayAlpha && dest.color_space == DataColorSpace::Gray)
|| (dst_layout == Layout::Rgba || dest.color_space == DataColorSpace::Rgb)
{
return if (src_layout == Layout::GrayAlpha && source.color_space == DataColorSpace::Gray)
|| (src_layout == Layout::Rgba || source.color_space == DataColorSpace::Rgb)
{
Some(Box::new(CopyAlphaStage {
src_layout,
dst_layout,
target_color_space: dest.color_space,
_phantom: Default::default(),
}))
} else {
Some(Box::new(InjectAlphaStage {
dst_layout,
target_color_space: dest.color_space,
_phantom: Default::default(),
bit_depth,
}))
};
}
None
}

74
vendor/moxcms/src/conversions/mod.rs vendored Normal file
View File

@@ -0,0 +1,74 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
mod avx;
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
mod avx512;
mod bpc;
mod gray2rgb;
mod gray2rgb_extended;
mod interpolator;
mod katana;
mod lut3x3;
mod lut3x4;
mod lut4;
mod lut_transforms;
mod mab;
mod mab4x3;
mod mba3x4;
mod md_lut;
mod md_luts_factory;
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
mod neon;
mod prelude_lut_xyz_rgb;
mod rgb2gray;
mod rgb2gray_extended;
mod rgb_xyz_factory;
mod rgbxyz;
mod rgbxyz_fixed;
mod rgbxyz_float;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
mod sse;
mod transform_lut3_to_3;
mod transform_lut3_to_4;
mod transform_lut4_to_3;
mod xyz_lab;
pub(crate) use gray2rgb::{make_gray_to_unfused, make_gray_to_x};
pub(crate) use gray2rgb_extended::{make_gray_to_one_trc_extended, make_gray_to_rgb_extended};
pub(crate) use interpolator::LutBarycentricReduction;
pub(crate) use lut_transforms::make_lut_transform;
pub(crate) use rgb_xyz_factory::{RgbXyzFactory, RgbXyzFactoryOpt};
pub(crate) use rgb2gray::{ToneReproductionRgbToGray, make_rgb_to_gray};
pub(crate) use rgb2gray_extended::make_rgb_to_gray_extended;
pub(crate) use rgbxyz::{TransformMatrixShaper, TransformMatrixShaperOptimized};
pub(crate) use rgbxyz_float::{
TransformShaperFloatInOut, TransformShaperRgbFloat, make_rgb_xyz_rgb_transform_float,
make_rgb_xyz_rgb_transform_float_in_out,
};

View File

@@ -0,0 +1,225 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::cube::CubeNeon;
use crate::conversions::neon::interpolator::NeonVector;
use crate::{CmsError, DataColorSpace, InPlaceStage, InterpolationMethod};
use std::arch::aarch64::*;
pub(crate) struct ACurves3Neon<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves3OptimizedNeon<'a> {
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves3InverseNeon<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl<const DEPTH: usize> ACurves3Neon<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> NeonVector>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
for dst in dst.chunks_exact_mut(3) {
let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
let v = fetch(b0, b1, b2).v;
unsafe {
dst[0] = vgetq_lane_f32::<0>(v);
dst[1] = vgetq_lane_f32::<1>(v);
dst[2] = vgetq_lane_f32::<2>(v);
}
}
Ok(())
}
}
impl ACurves3OptimizedNeon<'_> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> NeonVector>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
for dst in dst.chunks_exact_mut(3) {
let a0 = dst[0];
let a1 = dst[1];
let a2 = dst[2];
let v = fetch(a0, a1, a2).v;
unsafe {
dst[0] = vgetq_lane_f32::<0>(v);
dst[1] = vgetq_lane_f32::<1>(v);
dst[2] = vgetq_lane_f32::<2>(v);
}
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3Neon<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeNeon::new(self.clut, self.grid_size, 3);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
impl InPlaceStage for ACurves3OptimizedNeon<'_> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeNeon::new(self.clut, self.grid_size, 3);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
impl<const DEPTH: usize> ACurves3InverseNeon<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> NeonVector>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let v_scale_value = unsafe { vdupq_n_f32((DEPTH as u32 - 1u32) as f32) };
unsafe {
for dst in dst.chunks_exact_mut(3) {
let mut v = fetch(dst[0], dst[1], dst[2]).v;
v = vmulq_f32(v, v_scale_value);
v = vminq_f32(v, v_scale_value);
let c = vcvtaq_u32_f32(v);
let a0 = vgetq_lane_u32::<0>(c) as u16;
let a1 = vgetq_lane_u32::<1>(c) as u16;
let a2 = vgetq_lane_u32::<2>(c) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3InverseNeon<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeNeon::new(self.clut, self.grid_size, 3);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,168 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::hypercube::HypercubeNeon;
use crate::conversions::neon::interpolator::NeonVector;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Stage};
use std::arch::aarch64::vgetq_lane_f32;
pub(crate) struct ACurves4x3Neon<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) curve3: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 4],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves4x3NeonOptimizedNeon<'a> {
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 4],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl<const DEPTH: usize> ACurves4x3Neon<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> NeonVector>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
assert_eq!(src.len() / 4, dst.len() / 3);
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let a0 = (src[0] * scale_value).round().min(scale_value) as u16;
let a1 = (src[1] * scale_value).round().min(scale_value) as u16;
let a2 = (src[2] * scale_value).round().min(scale_value) as u16;
let a3 = (src[3] * scale_value).round().min(scale_value) as u16;
let c = self.curve0[a0 as usize];
let m = self.curve1[a1 as usize];
let y = self.curve2[a2 as usize];
let k = self.curve3[a3 as usize];
let v = fetch(c, m, y, k).v;
unsafe {
dst[0] = vgetq_lane_f32::<0>(v);
dst[1] = vgetq_lane_f32::<1>(v);
dst[2] = vgetq_lane_f32::<2>(v);
}
}
Ok(())
}
}
impl ACurves4x3NeonOptimizedNeon<'_> {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> NeonVector>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
assert_eq!(src.len() / 4, dst.len() / 3);
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let c = src[0];
let m = src[1];
let y = src[2];
let k = src[3];
let v = fetch(c, m, y, k).v;
unsafe {
dst[0] = vgetq_lane_f32::<0>(v);
dst[1] = vgetq_lane_f32::<1>(v);
dst[2] = vgetq_lane_f32::<2>(v);
}
}
Ok(())
}
}
impl<const DEPTH: usize> Stage for ACurves4x3Neon<'_, DEPTH> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = HypercubeNeon::new(self.clut, self.grid_size, 3);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
Ok(())
}
}
impl Stage for ACurves4x3NeonOptimizedNeon<'_> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = HypercubeNeon::new(self.clut, self.grid_size, 3);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,442 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::interpolator::NeonVector;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use std::arch::aarch64::*;
use std::ops::{Add, Mul, Sub};
/// 3D CLUT NEON helper
///
/// Represents hexahedron.
pub(crate) struct CubeNeon<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
grid_size: [u8; 3],
}
struct HexahedronFetch3<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
}
trait CubeFetch<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
impl CubeFetch<NeonVector> for HexahedronFetch3<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVector {
let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32) as usize * 3;
unsafe {
let k = self.array.get_unchecked(start..);
let lo = vld1_f32(k.as_ptr());
let hi = vld1_lane_f32::<0>(k.get_unchecked(2..).as_ptr(), vdup_n_f32(0.));
NeonVector {
v: vcombine_f32(lo, hi),
}
}
}
}
impl<'a> CubeNeon<'a> {
pub(crate) fn new(arr: &'a [f32], grid: [u8; 3], components: usize) -> Self {
// This is safety precondition, array size must be not less than full grid size * components.
// Needs to ensure that it is not missed somewhere else
assert_eq!(
grid[0] as usize * grid[1] as usize * grid[2] as usize * components,
arr.len()
);
let y_stride = grid[1] as u32;
let x_stride = y_stride * grid[0] as u32;
CubeNeon {
array: arr,
x_stride,
y_stride,
grid_size: grid,
}
}
#[inline(always)]
fn trilinear<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyNegAdd<T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let x_d = T::from(lin_x * scale_x - x as f32);
let y_d = T::from(lin_y * scale_y - y as f32);
let z_d = T::from(lin_z * scale_z - z as f32);
let c000 = fetch.fetch(x, y, z);
let c100 = fetch.fetch(x_n, y, z);
let c010 = fetch.fetch(x, y_n, z);
let c110 = fetch.fetch(x_n, y_n, z);
let c001 = fetch.fetch(x, y, z_n);
let c101 = fetch.fetch(x_n, y, z_n);
let c011 = fetch.fetch(x, y_n, z_n);
let c111 = fetch.fetch(x_n, y_n, z_n);
let c00 = c000.neg_mla(c000, x_d).mla(c100, x_d);
let c10 = c010.neg_mla(c010, x_d).mla(c110, x_d);
let c01 = c001.neg_mla(c001, x_d).mla(c101, x_d);
let c11 = c011.neg_mla(c011, x_d).mla(c111, x_d);
let c0 = c00.neg_mla(c00, y_d).mla(c10, y_d);
let c1 = c01.neg_mla(c01, y_d).mla(c11, y_d);
c0.neg_mla(c0, z_d).mla(c1, z_d)
}
#[cfg(feature = "options")]
#[inline]
fn pyramid<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
if dr > db && dg > db {
let x0 = fetch.fetch(x_n, y_n, z_n);
let x1 = fetch.fetch(x_n, y_n, z);
let x2 = fetch.fetch(x_n, y, z);
let x3 = fetch.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y_n, z_n);
let x2 = fetch.fetch(x, y_n, z_n);
let x3 = fetch.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y, z);
let x2 = fetch.fetch(x_n, y, z_n);
let x3 = fetch.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
}
}
#[cfg(feature = "options")]
#[inline]
fn tetra<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let rx = lin_x * scale_x - x as f32;
let ry = lin_y * scale_y - y as f32;
let rz = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = fetch.fetch(x_n, y, z) - c0;
c2 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x_n, y, z);
c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = fetch.fetch(x_n, y, z) - c0;
c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n);
c3 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x, y, z_n);
c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n);
c3 = fetch.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x, y_n, z);
c2 = fetch.fetch(x, y_n, z) - c0;
c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n);
c2 = fetch.fetch(x, y_n, z) - c0;
c3 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n);
c2 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y, z_n);
c3 = fetch.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
s1.mla(c3, T::from(rz))
}
#[cfg(feature = "options")]
#[inline]
fn prism<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
if db >= dr {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y, z_n);
let x2 = fetch.fetch(x, y_n, z);
let x3 = fetch.fetch(x, y_n, z_n);
let x4 = fetch.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = fetch.fetch(x_n, y, z);
let x1 = fetch.fetch(x_n, y, z_n);
let x2 = fetch.fetch(x, y_n, z);
let x3 = fetch.fetch(x_n, y_n, z);
let x4 = fetch.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
}
}
#[inline]
pub(crate) fn trilinear_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> NeonVector {
self.trilinear(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn prism_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> NeonVector {
self.prism(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn pyramid_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> NeonVector {
self.pyramid(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn tetra_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> NeonVector {
self.tetra(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
}

View File

@@ -0,0 +1,629 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::interpolator::NeonVector;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use crate::nd_array::lerp;
use std::arch::aarch64::{vcombine_f32, vdup_n_f32, vld1_f32, vld1_lane_f32};
use std::ops::{Add, Mul, Sub};
/// 4D CLUT helper.
///
/// Represents hypercube.
pub(crate) struct HypercubeNeon<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
grid_size: [u8; 4],
}
trait Fetcher4<T> {
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> T;
}
struct Fetch4Vec3<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
}
impl Fetcher4<NeonVector> for Fetch4Vec3<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> NeonVector {
let start = (x as u32 * self.x_stride
+ y as u32 * self.y_stride
+ z as u32 * self.z_stride
+ w as u32) as usize
* 3;
unsafe {
let k = self.array.get_unchecked(start..);
let lo = vld1_f32(k.as_ptr());
let hi = vld1_lane_f32::<0>(k.get_unchecked(2..).as_ptr(), vdup_n_f32(0.));
NeonVector {
v: vcombine_f32(lo, hi),
}
}
}
}
impl<'a> HypercubeNeon<'a> {
pub(crate) fn new(arr: &'a [f32], grid: [u8; 4], components: usize) -> Self {
// This is safety precondition, array size must be not less than full grid size * components.
// Needs to ensure that it is not missed somewhere else
assert_eq!(
grid[0] as usize * grid[1] as usize * grid[2] as usize * grid[3] as usize * components,
arr.len()
);
let z_stride = grid[2] as u32;
let y_stride = z_stride * grid[1] as u32;
let x_stride = y_stride * grid[0] as u32;
HypercubeNeon {
array: arr,
x_stride,
y_stride,
z_stride,
grid_size: grid,
}
}
#[inline(always)]
fn quadlinear<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let x_d = T::from(lin_x * scale_x - x as f32);
let y_d = T::from(lin_y * scale_y - y as f32);
let z_d = T::from(lin_z * scale_z - z as f32);
let w_d = T::from(lin_w * scale_w - w as f32);
let r_x1 = lerp(r.fetch(x, y, z, w), r.fetch(x_n, y, z, w), x_d);
let r_x2 = lerp(r.fetch(x, y_n, z, w), r.fetch(x_n, y_n, z, w), x_d);
let r_y1 = lerp(r_x1, r_x2, y_d);
let r_x3 = lerp(r.fetch(x, y, z_n, w), r.fetch(x_n, y, z_n, w), x_d);
let r_x4 = lerp(r.fetch(x, y_n, z_n, w), r.fetch(x_n, y_n, z_n, w), x_d);
let r_y2 = lerp(r_x3, r_x4, y_d);
let r_z1 = lerp(r_y1, r_y2, z_d);
let r_x1 = lerp(r.fetch(x, y, z, w_n), r.fetch(x_n, y, z, w_n), x_d);
let r_x2 = lerp(r.fetch(x, y_n, z, w_n), r.fetch(x_n, y_n, z, w_n), x_d);
let r_y1 = lerp(r_x1, r_x2, y_d);
let r_x3 = lerp(r.fetch(x, y, z_n, w_n), r.fetch(x_n, y, z_n, w_n), x_d);
let r_x4 = lerp(r.fetch(x, y_n, z_n, w_n), r.fetch(x_n, y_n, z_n, w_n), x_d);
let r_y2 = lerp(r_x3, r_x4, y_d);
let r_z2 = lerp(r_y1, r_y2, z_d);
lerp(r_z1, r_z2, w_d)
}
#[inline]
pub(crate) fn quadlinear_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> NeonVector {
self.quadlinear(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn pyramid<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let dw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let w0 = if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n, w);
let x1 = r.fetch(x_n, y_n, z, w);
let x2 = r.fetch(x_n, y, z, w);
let x3 = r.fetch(x, y_n, z, w);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y_n, z_n, w);
let x2 = r.fetch(x, y_n, z_n, w);
let x3 = r.fetch(x, y_n, z, w);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y, z, w);
let x2 = r.fetch(x_n, y, z_n, w);
let x3 = r.fetch(x_n, y_n, z_n, w);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
};
let c0 = r.fetch(x, y, z, w_n);
let w1 = if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n, w_n);
let x1 = r.fetch(x_n, y_n, z, w_n);
let x2 = r.fetch(x_n, y, z, w_n);
let x3 = r.fetch(x, y_n, z, w_n);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y_n, z_n, w_n);
let x2 = r.fetch(x, y_n, z_n, w_n);
let x3 = r.fetch(x, y_n, z, w_n);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y, z, w_n);
let x2 = r.fetch(x_n, y, z_n, w_n);
let x3 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
};
w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn pyramid_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> NeonVector {
self.pyramid(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn prism<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let dw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let w0 = if db >= dr {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y, z_n, w);
let x2 = r.fetch(x, y_n, z, w);
let x3 = r.fetch(x, y_n, z_n, w);
let x4 = r.fetch(x_n, y_n, z_n, w);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z, w);
let x1 = r.fetch(x_n, y, z_n, w);
let x2 = r.fetch(x, y_n, z, w);
let x3 = r.fetch(x_n, y_n, z, w);
let x4 = r.fetch(x_n, y_n, z_n, w);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
};
let c0 = r.fetch(x, y, z, w_n);
let w1 = if db >= dr {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y, z_n, w_n);
let x2 = r.fetch(x, y_n, z, w_n);
let x3 = r.fetch(x, y_n, z_n, w_n);
let x4 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z, w_n);
let x1 = r.fetch(x_n, y, z_n, w_n);
let x2 = r.fetch(x, y_n, z, w_n);
let x3 = r.fetch(x_n, y_n, z, w_n);
let x4 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
};
w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn prism_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> NeonVector {
self.prism(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn tetra<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let rx = lin_x * scale_x - x as f32;
let ry = lin_y * scale_y - y as f32;
let rz = lin_z * scale_z - z as f32;
let rw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z, w) - c0;
c2 = r.fetch(x_n, y_n, z, w) - r.fetch(x_n, y, z, w);
c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z, w) - c0;
c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
c3 = r.fetch(x_n, y, z_n, w) - r.fetch(x_n, y, z, w);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n, w) - r.fetch(x, y, z_n, w);
c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
c3 = r.fetch(x, y, z_n, w) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z, w) - r.fetch(x, y_n, z, w);
c2 = r.fetch(x, y_n, z, w) - c0;
c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
c2 = r.fetch(x, y_n, z, w) - c0;
c3 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y_n, z, w);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
c2 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y, z_n, w);
c3 = r.fetch(x, y, z_n, w) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
let w0 = s1.mla(c3, T::from(rz));
let c0 = r.fetch(x, y, z, w_n);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z, w_n) - c0;
c2 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x_n, y, z, w_n);
c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z, w_n) - c0;
c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
c3 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x_n, y, z, w_n);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x, y, z_n, w_n);
c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
c3 = r.fetch(x, y, z_n, w_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x, y_n, z, w_n);
c2 = r.fetch(x, y_n, z, w_n) - c0;
c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
c2 = r.fetch(x, y_n, z, w_n) - c0;
c3 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y_n, z, w_n);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
c2 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y, z_n, w_n);
c3 = r.fetch(x, y, z_n, w_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
let w1 = s1.mla(c3, T::from(rz));
w0.neg_mla(w0, T::from(rw)).mla(w1, T::from(rw))
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn tetra_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> NeonVector {
self.tetra(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
}

View File

@@ -0,0 +1,905 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(dead_code)]
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::neon::rgb_xyz::NeonAlignedF32;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::ops::{Add, Mul, Sub};
pub(crate) struct TetrahedralNeon<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedF32],
}
pub(crate) struct PyramidalNeon<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedF32],
}
pub(crate) struct TrilinearNeon<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedF32],
}
pub(crate) struct PyramidalNeonDouble<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedF32],
pub(crate) cube1: &'a [NeonAlignedF32],
}
pub(crate) struct PrismaticNeonDouble<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedF32],
pub(crate) cube1: &'a [NeonAlignedF32],
}
pub(crate) struct TrilinearNeonDouble<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedF32],
pub(crate) cube1: &'a [NeonAlignedF32],
}
pub(crate) struct TetrahedralNeonDouble<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedF32],
pub(crate) cube1: &'a [NeonAlignedF32],
}
pub(crate) struct PrismaticNeon<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedF32],
}
trait Fetcher<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
struct TetrahedralNeonFetchVector<'a, const GRID_SIZE: usize> {
cube: &'a [NeonAlignedF32],
}
struct TetrahedralNeonFetchVectorDouble<'a, const GRID_SIZE: usize> {
cube0: &'a [NeonAlignedF32],
cube1: &'a [NeonAlignedF32],
}
#[derive(Copy, Clone)]
pub(crate) struct NeonVector {
pub(crate) v: float32x4_t,
}
#[derive(Copy, Clone)]
pub(crate) struct NeonVectorDouble {
pub(crate) v0: float32x4_t,
pub(crate) v1: float32x4_t,
}
impl From<f32> for NeonVector {
#[inline(always)]
fn from(v: f32) -> Self {
NeonVector {
v: unsafe { vdupq_n_f32(v) },
}
}
}
impl From<f32> for NeonVectorDouble {
#[inline(always)]
fn from(v: f32) -> Self {
NeonVectorDouble {
v0: unsafe { vdupq_n_f32(v) },
v1: unsafe { vdupq_n_f32(v) },
}
}
}
impl Sub<NeonVector> for NeonVector {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: NeonVector) -> Self::Output {
NeonVector {
v: unsafe { vsubq_f32(self.v, rhs.v) },
}
}
}
impl Mul<NeonVector> for NeonVector {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: NeonVector) -> Self::Output {
NeonVector {
v: unsafe { vmulq_f32(self.v, rhs.v) },
}
}
}
impl Sub<NeonVectorDouble> for NeonVectorDouble {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: NeonVectorDouble) -> Self::Output {
NeonVectorDouble {
v0: unsafe { vsubq_f32(self.v0, rhs.v0) },
v1: unsafe { vsubq_f32(self.v1, rhs.v1) },
}
}
}
impl Mul<NeonVectorDouble> for NeonVectorDouble {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: NeonVectorDouble) -> Self::Output {
NeonVectorDouble {
v0: unsafe { vmulq_f32(self.v0, rhs.v0) },
v1: unsafe { vmulq_f32(self.v1, rhs.v1) },
}
}
}
impl Add<NeonVector> for NeonVector {
type Output = Self;
#[inline(always)]
fn add(self, rhs: NeonVector) -> Self::Output {
NeonVector {
v: unsafe { vaddq_f32(self.v, rhs.v) },
}
}
}
impl Add<NeonVectorDouble> for NeonVectorDouble {
type Output = Self;
#[inline(always)]
fn add(self, rhs: NeonVectorDouble) -> Self::Output {
NeonVectorDouble {
v0: unsafe { vaddq_f32(self.v0, rhs.v0) },
v1: unsafe { vaddq_f32(self.v1, rhs.v1) },
}
}
}
impl FusedMultiplyAdd<NeonVector> for NeonVector {
#[inline(always)]
fn mla(&self, b: NeonVector, c: NeonVector) -> NeonVector {
NeonVector {
v: unsafe { vfmaq_f32(self.v, b.v, c.v) },
}
}
}
impl FusedMultiplyNegAdd<NeonVector> for NeonVector {
#[inline(always)]
fn neg_mla(&self, b: NeonVector, c: NeonVector) -> NeonVector {
NeonVector {
v: unsafe { vfmsq_f32(self.v, b.v, c.v) },
}
}
}
impl NeonVectorDouble {
#[inline(always)]
fn neg_mla(&self, b: NeonVectorDouble, c: NeonVectorDouble) -> NeonVectorDouble {
NeonVectorDouble {
v0: unsafe { vfmsq_f32(self.v0, b.v0, c.v0) },
v1: unsafe { vfmsq_f32(self.v1, b.v1, c.v1) },
}
}
}
impl NeonVectorDouble {
#[inline(always)]
fn mla(&self, b: NeonVectorDouble, c: NeonVector) -> NeonVectorDouble {
NeonVectorDouble {
v0: unsafe { vfmaq_f32(self.v0, b.v0, c.v) },
v1: unsafe { vfmaq_f32(self.v1, b.v1, c.v) },
}
}
#[inline(always)]
pub(crate) fn split(self) -> (NeonVector, NeonVector) {
(NeonVector { v: self.v0 }, NeonVector { v: self.v1 })
}
}
impl<const GRID_SIZE: usize> Fetcher<NeonVector> for TetrahedralNeonFetchVector<'_, GRID_SIZE> {
fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVector {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx = unsafe { self.cube.get_unchecked(offset..) };
NeonVector {
v: unsafe { vld1q_f32(jx.as_ptr() as *const f32) },
}
}
}
impl<const GRID_SIZE: usize> Fetcher<NeonVectorDouble>
for TetrahedralNeonFetchVectorDouble<'_, GRID_SIZE>
{
fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVectorDouble {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx0 = unsafe { self.cube0.get_unchecked(offset..) };
let jx1 = unsafe { self.cube1.get_unchecked(offset..) };
NeonVectorDouble {
v0: unsafe { vld1q_f32(jx0.as_ptr() as *const f32) },
v1: unsafe { vld1q_f32(jx1.as_ptr() as *const f32) },
}
}
}
pub(crate) trait NeonMdInterpolation<'a, const GRID_SIZE: usize> {
fn new(table: &'a [NeonAlignedF32]) -> Self;
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> NeonVector;
}
pub(crate) trait NeonMdInterpolationDouble<'a, const GRID_SIZE: usize> {
fn new(table0: &'a [NeonAlignedF32], table1: &'a [NeonAlignedF32]) -> Self;
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> (NeonVector, NeonVector);
}
impl<const GRID_SIZE: usize> TetrahedralNeon<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVector>,
) -> NeonVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, NeonVector::from(rx));
let s1 = s0.mla(c2, NeonVector::from(ry));
s1.mla(c3, NeonVector::from(rz))
}
}
impl<const GRID_SIZE: usize> TetrahedralNeonDouble<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVectorDouble>,
) -> (NeonVector, NeonVector) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, NeonVector::from(rx));
let s1 = s0.mla(c2, NeonVector::from(ry));
s1.mla(c3, NeonVector::from(rz)).split()
}
}
macro_rules! define_md_inter_neon {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> NeonMdInterpolation<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline(always)]
fn new(table: &'a [NeonAlignedF32]) -> Self {
Self { cube: table }
}
#[inline(always)]
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> NeonVector {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralNeonFetchVector::<GRID_SIZE> { cube: self.cube },
)
}
}
};
}
macro_rules! define_md_inter_neon_d {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> NeonMdInterpolationDouble<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline(always)]
fn new(table0: &'a [NeonAlignedF32], table1: &'a [NeonAlignedF32]) -> Self {
Self {
cube0: table0,
cube1: table1,
}
}
#[inline(always)]
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> (NeonVector, NeonVector) {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralNeonFetchVectorDouble::<GRID_SIZE> {
cube0: self.cube0,
cube1: self.cube1,
},
)
}
}
};
}
define_md_inter_neon!(TetrahedralNeon);
define_md_inter_neon!(PyramidalNeon);
define_md_inter_neon!(PrismaticNeon);
define_md_inter_neon!(TrilinearNeon);
define_md_inter_neon_d!(PrismaticNeonDouble);
define_md_inter_neon_d!(PyramidalNeonDouble);
define_md_inter_neon_d!(TetrahedralNeonDouble);
define_md_inter_neon_d!(TrilinearNeonDouble);
impl<const GRID_SIZE: usize> PyramidalNeon<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVector>,
) -> NeonVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, NeonVector::from(db));
let s1 = s0.mla(c2, NeonVector::from(dr));
let s2 = s1.mla(c3, NeonVector::from(dg));
s2.mla(c4, NeonVector::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, NeonVector::from(db));
let s1 = s0.mla(c2, NeonVector::from(dr));
let s2 = s1.mla(c3, NeonVector::from(dg));
s2.mla(c4, NeonVector::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, NeonVector::from(db));
let s1 = s0.mla(c2, NeonVector::from(dr));
let s2 = s1.mla(c3, NeonVector::from(dg));
s2.mla(c4, NeonVector::from(db * dr))
}
}
}
impl<const GRID_SIZE: usize> PyramidalNeonDouble<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVectorDouble>,
) -> (NeonVector, NeonVector) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
let w0 = NeonVector::from(db);
let w1 = NeonVector::from(dr);
let w2 = NeonVector::from(dg);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let w3 = NeonVector::from(dr * dg);
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let w3 = NeonVector::from(dg * db);
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let w3 = NeonVector::from(db * dr);
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
}
}
}
impl<const GRID_SIZE: usize> PrismaticNeon<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVector>,
) -> NeonVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if db > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x, y_n, z_n);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, NeonVector::from(db));
let s1 = s0.mla(c2, NeonVector::from(dr));
let s2 = s1.mla(c3, NeonVector::from(dg));
let s3 = s2.mla(c4, NeonVector::from(dg * db));
s3.mla(c5, NeonVector::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x_n, y_n, z);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, NeonVector::from(db));
let s1 = s0.mla(c2, NeonVector::from(dr));
let s2 = s1.mla(c3, NeonVector::from(dg));
let s3 = s2.mla(c4, NeonVector::from(dg * db));
s3.mla(c5, NeonVector::from(dr * dg))
}
}
}
impl<const GRID_SIZE: usize> PrismaticNeonDouble<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
rv: impl Fetcher<NeonVectorDouble>,
) -> (NeonVector, NeonVector) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = rv.fetch(x, y, z);
let w0 = NeonVector::from(db);
let w1 = NeonVector::from(dr);
let w2 = NeonVector::from(dg);
let w3 = NeonVector::from(dg * db);
let w4 = NeonVector::from(dr * dg);
if db > dr {
let x0 = rv.fetch(x, y, z_n);
let x1 = rv.fetch(x_n, y, z_n);
let x2 = rv.fetch(x, y_n, z);
let x3 = rv.fetch(x, y_n, z_n);
let x4 = rv.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4).split()
} else {
let x0 = rv.fetch(x_n, y, z);
let x1 = rv.fetch(x_n, y, z_n);
let x2 = rv.fetch(x, y_n, z);
let x3 = rv.fetch(x_n, y_n, z);
let x4 = rv.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4).split()
}
}
}
impl<const GRID_SIZE: usize> TrilinearNeonDouble<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVectorDouble>,
) -> (NeonVector, NeonVector) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = NeonVector::from(dr);
let w1 = NeonVector::from(dg);
let w2 = NeonVector::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = NeonVectorDouble::from(dr);
let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
let dy = NeonVectorDouble::from(dg);
let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
let dz = NeonVectorDouble::from(db);
c0.neg_mla(c0, dz).mla(c1, w2).split()
}
}
impl<const GRID_SIZE: usize> TrilinearNeon<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVector>,
) -> NeonVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = NeonVector::from(dr);
let w1 = NeonVector::from(dg);
let w2 = NeonVector::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = NeonVector::from(dr);
let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
let dy = NeonVector::from(dg);
let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
let dz = NeonVector::from(db);
c0.neg_mla(c0, dz).mla(c1, w2)
}
}

View File

@@ -0,0 +1,947 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::interpolator::BarycentricWeight;
use crate::math::FusedMultiplyAdd;
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::ops::{Add, Mul, Sub};
#[repr(align(8), C)]
pub(crate) struct NeonAlignedI16x4(pub(crate) [i16; 4]);
#[cfg(feature = "options")]
pub(crate) struct TetrahedralNeonQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PyramidalNeonQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedI16x4],
}
pub(crate) struct TrilinearNeonQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PyramidalNeonQ0_15Double<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedI16x4],
pub(crate) cube1: &'a [NeonAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PrismaticNeonQ0_15Double<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedI16x4],
pub(crate) cube1: &'a [NeonAlignedI16x4],
}
pub(crate) struct TrilinearNeonQ0_15Double<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedI16x4],
pub(crate) cube1: &'a [NeonAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct TetrahedralNeonQ0_15Double<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedI16x4],
pub(crate) cube1: &'a [NeonAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PrismaticNeonQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedI16x4],
}
trait Fetcher<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
struct TetrahedralNeonQ0_15FetchVector<'a, const GRID_SIZE: usize> {
cube: &'a [NeonAlignedI16x4],
}
struct TetrahedralNeonQ0_15FetchVectorDouble<'a, const GRID_SIZE: usize> {
cube0: &'a [NeonAlignedI16x4],
cube1: &'a [NeonAlignedI16x4],
}
#[derive(Copy, Clone)]
pub(crate) struct NeonVectorQ0_15 {
pub(crate) v: int16x4_t,
}
#[derive(Copy, Clone)]
pub(crate) struct NeonVectorQ0_15Double {
pub(crate) v: int16x8_t,
}
impl From<i16> for NeonVectorQ0_15 {
#[inline(always)]
fn from(v: i16) -> Self {
NeonVectorQ0_15 {
v: unsafe { vdup_n_s16(v) },
}
}
}
impl From<i16> for NeonVectorQ0_15Double {
#[inline(always)]
fn from(v: i16) -> Self {
NeonVectorQ0_15Double {
v: unsafe { vdupq_n_s16(v) },
}
}
}
impl Sub<NeonVectorQ0_15> for NeonVectorQ0_15 {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: NeonVectorQ0_15) -> Self::Output {
NeonVectorQ0_15 {
v: unsafe { vsub_s16(self.v, rhs.v) },
}
}
}
impl Mul<NeonVectorQ0_15> for NeonVectorQ0_15 {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: NeonVectorQ0_15) -> Self::Output {
NeonVectorQ0_15 {
v: unsafe { vqrdmulh_s16(self.v, rhs.v) },
}
}
}
impl Sub<NeonVectorQ0_15Double> for NeonVectorQ0_15Double {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: NeonVectorQ0_15Double) -> Self::Output {
NeonVectorQ0_15Double {
v: unsafe { vsubq_s16(self.v, rhs.v) },
}
}
}
impl Mul<NeonVectorQ0_15Double> for NeonVectorQ0_15Double {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: NeonVectorQ0_15Double) -> Self::Output {
NeonVectorQ0_15Double {
v: unsafe { vqrdmulhq_s16(self.v, rhs.v) },
}
}
}
impl Add<NeonVectorQ0_15> for NeonVectorQ0_15 {
type Output = Self;
#[inline(always)]
fn add(self, rhs: NeonVectorQ0_15) -> Self::Output {
NeonVectorQ0_15 {
v: unsafe { vadd_s16(self.v, rhs.v) },
}
}
}
impl Add<NeonVectorQ0_15Double> for NeonVectorQ0_15Double {
type Output = Self;
#[inline(always)]
fn add(self, rhs: NeonVectorQ0_15Double) -> Self::Output {
NeonVectorQ0_15Double {
v: unsafe { vaddq_s16(self.v, rhs.v) },
}
}
}
impl FusedMultiplyAdd<NeonVectorQ0_15> for NeonVectorQ0_15 {
#[inline(always)]
fn mla(&self, b: NeonVectorQ0_15, c: NeonVectorQ0_15) -> NeonVectorQ0_15 {
NeonVectorQ0_15 {
v: unsafe { vqrdmlah_s16(self.v, b.v, c.v) },
}
}
}
impl NeonVectorQ0_15 {
#[inline(always)]
fn neg_mla(&self, b: NeonVectorQ0_15, c: NeonVectorQ0_15) -> NeonVectorQ0_15 {
NeonVectorQ0_15 {
v: unsafe { vqrdmlsh_s16(self.v, b.v, c.v) },
}
}
}
impl NeonVectorQ0_15Double {
#[inline(always)]
fn neg_mla(&self, b: NeonVectorQ0_15Double, c: NeonVectorQ0_15Double) -> NeonVectorQ0_15Double {
NeonVectorQ0_15Double {
v: unsafe { vqrdmlshq_s16(self.v, b.v, c.v) },
}
}
}
impl NeonVectorQ0_15Double {
#[inline(always)]
fn mla(&self, b: NeonVectorQ0_15Double, c: NeonVectorQ0_15) -> NeonVectorQ0_15Double {
NeonVectorQ0_15Double {
v: unsafe { vqrdmlahq_s16(self.v, b.v, vcombine_s16(c.v, c.v)) },
}
}
#[inline(always)]
pub(crate) fn split(self) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
unsafe {
(
NeonVectorQ0_15 {
v: vget_low_s16(self.v),
},
NeonVectorQ0_15 {
v: vget_high_s16(self.v),
},
)
}
}
}
impl<const GRID_SIZE: usize> Fetcher<NeonVectorQ0_15>
for TetrahedralNeonQ0_15FetchVector<'_, GRID_SIZE>
{
fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVectorQ0_15 {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx = unsafe { self.cube.get_unchecked(offset..) };
NeonVectorQ0_15 {
v: unsafe { vld1_s16(jx.as_ptr() as *const i16) },
}
}
}
impl<const GRID_SIZE: usize> Fetcher<NeonVectorQ0_15Double>
for TetrahedralNeonQ0_15FetchVectorDouble<'_, GRID_SIZE>
{
fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVectorQ0_15Double {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx0 = unsafe { self.cube0.get_unchecked(offset..) };
let jx1 = unsafe { self.cube1.get_unchecked(offset..) };
NeonVectorQ0_15Double {
v: unsafe {
vcombine_s16(
vld1_s16(jx0.as_ptr() as *const i16),
vld1_s16(jx1.as_ptr() as *const i16),
)
},
}
}
}
pub(crate) trait NeonMdInterpolationQ0_15<'a, const GRID_SIZE: usize> {
fn new(table: &'a [NeonAlignedI16x4]) -> Self;
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> NeonVectorQ0_15;
}
pub(crate) trait NeonMdInterpolationQ0_15Double<'a, const GRID_SIZE: usize> {
fn new(table0: &'a [NeonAlignedI16x4], table1: &'a [NeonAlignedI16x4]) -> Self;
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> (NeonVectorQ0_15, NeonVectorQ0_15);
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> TetrahedralNeonQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15>,
) -> NeonVectorQ0_15 {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, NeonVectorQ0_15::from(rx));
let s1 = s0.mla(c2, NeonVectorQ0_15::from(ry));
s1.mla(c3, NeonVectorQ0_15::from(rz))
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> TetrahedralNeonQ0_15Double<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15Double>,
) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, NeonVectorQ0_15::from(rx));
let s1 = s0.mla(c2, NeonVectorQ0_15::from(ry));
s1.mla(c3, NeonVectorQ0_15::from(rz)).split()
}
}
macro_rules! define_md_inter_neon {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> NeonMdInterpolationQ0_15<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline(always)]
fn new(table: &'a [NeonAlignedI16x4]) -> Self {
Self { cube: table }
}
#[inline(always)]
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> NeonVectorQ0_15 {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralNeonQ0_15FetchVector::<GRID_SIZE> { cube: self.cube },
)
}
}
};
}
macro_rules! define_md_inter_neon_d {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> NeonMdInterpolationQ0_15Double<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline(always)]
fn new(table0: &'a [NeonAlignedI16x4], table1: &'a [NeonAlignedI16x4]) -> Self {
Self {
cube0: table0,
cube1: table1,
}
}
#[inline(always)]
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralNeonQ0_15FetchVectorDouble::<GRID_SIZE> {
cube0: self.cube0,
cube1: self.cube1,
},
)
}
}
};
}
#[cfg(feature = "options")]
define_md_inter_neon!(TetrahedralNeonQ0_15);
#[cfg(feature = "options")]
define_md_inter_neon!(PyramidalNeonQ0_15);
#[cfg(feature = "options")]
define_md_inter_neon!(PrismaticNeonQ0_15);
define_md_inter_neon!(TrilinearNeonQ0_15);
#[cfg(feature = "options")]
define_md_inter_neon_d!(PrismaticNeonQ0_15Double);
#[cfg(feature = "options")]
define_md_inter_neon_d!(PyramidalNeonQ0_15Double);
#[cfg(feature = "options")]
define_md_inter_neon_d!(TetrahedralNeonQ0_15Double);
define_md_inter_neon_d!(TrilinearNeonQ0_15Double);
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PyramidalNeonQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15>,
) -> NeonVectorQ0_15 {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
let w0 = NeonVectorQ0_15::from(db);
let w1 = NeonVectorQ0_15::from(dr);
let w2 = NeonVectorQ0_15::from(dg);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let w3 = w1 * w2;
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3)
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let w3 = w2 * w0;
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3)
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let w3 = w0 * w1;
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3)
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PyramidalNeonQ0_15Double<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15Double>,
) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
let w0 = NeonVectorQ0_15::from(db);
let w1 = NeonVectorQ0_15::from(dr);
let w2 = NeonVectorQ0_15::from(dg);
if dr > db && dg > db {
let w3 = NeonVectorQ0_15::from(dr) * NeonVectorQ0_15::from(dg);
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
} else if db > dr && dg > dr {
let w3 = NeonVectorQ0_15::from(dg) * NeonVectorQ0_15::from(db);
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
} else {
let w3 = NeonVectorQ0_15::from(db) * NeonVectorQ0_15::from(dr);
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PrismaticNeonQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15>,
) -> NeonVectorQ0_15 {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
let w0 = NeonVectorQ0_15::from(db);
let w1 = NeonVectorQ0_15::from(dr);
let w2 = NeonVectorQ0_15::from(dg);
if db > dr {
let w3 = w2 * w0;
let w4 = w1 * w2;
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x, y_n, z_n);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4)
} else {
let w3 = w2 * w0;
let w4 = w1 * w2;
let x0 = r.fetch(x_n, y, z);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x_n, y_n, z);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4)
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PrismaticNeonQ0_15Double<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
rv: impl Fetcher<NeonVectorQ0_15Double>,
) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = rv.fetch(x, y, z);
let w0 = NeonVectorQ0_15::from(db);
let w1 = NeonVectorQ0_15::from(dr);
let w2 = NeonVectorQ0_15::from(dg);
let w3 = NeonVectorQ0_15::from(dg) * NeonVectorQ0_15::from(db);
let w4 = NeonVectorQ0_15::from(dr) * NeonVectorQ0_15::from(dg);
if db > dr {
let x0 = rv.fetch(x, y, z_n);
let x1 = rv.fetch(x_n, y, z_n);
let x2 = rv.fetch(x, y_n, z);
let x3 = rv.fetch(x, y_n, z_n);
let x4 = rv.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4).split()
} else {
let x0 = rv.fetch(x_n, y, z);
let x1 = rv.fetch(x_n, y, z_n);
let x2 = rv.fetch(x, y_n, z);
let x3 = rv.fetch(x_n, y_n, z);
let x4 = rv.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4).split()
}
}
}
impl<const GRID_SIZE: usize> TrilinearNeonQ0_15Double<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15Double>,
) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = NeonVectorQ0_15::from(dr);
let w1 = NeonVectorQ0_15::from(dg);
let w2 = NeonVectorQ0_15::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = NeonVectorQ0_15Double::from(dr);
let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
let dy = NeonVectorQ0_15Double::from(dg);
let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
let dz = NeonVectorQ0_15Double::from(db);
c0.neg_mla(c0, dz).mla(c1, w2).split()
}
}
impl<const GRID_SIZE: usize> TrilinearNeonQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15>,
) -> NeonVectorQ0_15 {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = NeonVectorQ0_15::from(dr);
let w1 = NeonVectorQ0_15::from(dg);
let w2 = NeonVectorQ0_15::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = NeonVectorQ0_15::from(dr);
let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
let dy = NeonVectorQ0_15::from(dg);
let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
let dz = NeonVectorQ0_15::from(db);
c0.neg_mla(c0, dz).mla(c1, w2)
}
}

View File

@@ -0,0 +1,321 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut4x3Factory;
use crate::conversions::neon::interpolator::*;
use crate::conversions::neon::interpolator_q0_15::NeonAlignedI16x4;
use crate::conversions::neon::lut4_to_3_q0_15::TransformLut4To3NeonQ0_15;
use crate::conversions::neon::rgb_xyz::NeonAlignedF32;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::marker::PhantomData;
struct TransformLut4To3Neon<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<NeonAlignedF32>,
_phantom: PhantomData<T>,
_phantom1: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3Neon<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
fn transform_chunk<'b, Interpolator: NeonMdInterpolationDouble<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let value_scale = unsafe { vdupq_n_f32(((1 << BIT_DEPTH) - 1) as f32) };
let max_value = ((1 << BIT_DEPTH) - 1u32).as_();
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
let t: f32 = k_weights.w;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let tetrahedral1 = Interpolator::new(table1, table2);
let (a0, b0) = tetrahedral1.inter3_neon(c, m, y, &self.weights);
let (a0, b0) = (a0.v, b0.v);
if T::FINITE {
unsafe {
let t0 = vdupq_n_f32(t);
let hp = vfmsq_f32(a0, a0, t0);
let mut v = vfmaq_f32(hp, b0, t0);
v = vmulq_f32(v, value_scale);
v = vminq_f32(v, value_scale);
let jvx = vcvtaq_u32_f32(v);
dst[cn.r_i()] = vgetq_lane_u32::<0>(jvx).as_();
dst[cn.g_i()] = vgetq_lane_u32::<1>(jvx).as_();
dst[cn.b_i()] = vgetq_lane_u32::<2>(jvx).as_();
}
} else {
unsafe {
let t0 = vdupq_n_f32(t);
let hp = vfmsq_f32(a0, a0, t0);
let v = vfmaq_f32(hp, b0, t0);
dst[cn.r_i()] = vgetq_lane_f32::<0>(v).as_();
dst[cn.g_i()] = vgetq_lane_f32::<1>(v).as_();
dst[cn.b_i()] = vgetq_lane_f32::<2>(v).as_();
}
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3Neon<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearNeonDouble<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralNeonDouble<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalNeonDouble<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticNeonDouble<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearNeonDouble<GRID_SIZE>>(src, dst);
}
}
}
Ok(())
}
}
pub(crate) struct NeonLut4x3Factory {}
impl Lut4x3Factory for NeonLut4x3Factory {
fn make_transform_4x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point
&& BIT_DEPTH < 16
&& std::arch::is_aarch64_feature_detected!("rdm")
{
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
NeonAlignedI16x4([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut4To3NeonQ0_15::<
T,
u8,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut4To3NeonQ0_15::<
T,
u16,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
};
}
let lut = lut
.chunks_exact(3)
.map(|x| NeonAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => {
Box::new(
TransformLut4To3Neon::<T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
},
)
}
#[cfg(feature = "options")]
BarycentricWeightScale::High => {
Box::new(
TransformLut4To3Neon::<T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
},
)
}
}
}
}

View File

@@ -0,0 +1,202 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::neon::interpolator_q0_15::*;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut4To3NeonQ0_15<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<NeonAlignedI16x4>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3NeonQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "rdm")]
unsafe fn transform_chunk<'b, Interpolator: NeonMdInterpolationQ0_15Double<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let f_value_scale = vdupq_n_f32(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
vdup_n_s16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
vdup_n_s16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
let t: i16 = k_weights.w;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let tetrahedral1 = Interpolator::new(table1, table2);
let (a0, b0) = tetrahedral1.inter3_neon(c, m, y, &self.weights);
let (a0, b0) = (a0.v, b0.v);
let t0 = vdup_n_s16(t);
let hp = vqrdmlsh_s16(a0, a0, t0);
let mut v = vqrdmlah_s16(hp, b0, t0);
if T::FINITE {
v = vmax_s16(v, vdup_n_s16(0));
v = vmin_s16(v, v_max_scale);
dst[cn.r_i()] = (vget_lane_s16::<0>(v) as u32).as_();
dst[cn.g_i()] = (vget_lane_s16::<1>(v) as u32).as_();
dst[cn.b_i()] = (vget_lane_s16::<2>(v) as u32).as_();
} else {
let o = vcvtq_f32_s32(vmovl_s16(v));
let r = vmulq_f32(o, f_value_scale);
dst[cn.r_i()] = vgetq_lane_f32::<0>(r).as_();
dst[cn.g_i()] = vgetq_lane_f32::<1>(r).as_();
dst[cn.b_i()] = vgetq_lane_f32::<2>(r).as_();
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3NeonQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearNeonQ0_15Double<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralNeonQ0_15Double<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalNeonQ0_15Double<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticNeonQ0_15Double<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearNeonQ0_15Double<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,55 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod a_curves3;
mod a_curves4x3;
mod cube;
mod hypercube;
mod interpolator;
mod interpolator_q0_15;
mod lut4_to_3;
mod lut4_to_3_q0_15;
mod preheat_lut4x3;
mod rgb_xyz;
mod rgb_xyz_opt;
mod rgb_xyz_q1_30_opt;
mod rgb_xyz_q2_13;
mod rgb_xyz_q2_13_opt;
mod t_lut3_to_3;
mod t_lut3_to_3_q0_15;
pub(crate) use a_curves3::{ACurves3InverseNeon, ACurves3Neon, ACurves3OptimizedNeon};
pub(crate) use a_curves4x3::{ACurves4x3Neon, ACurves4x3NeonOptimizedNeon};
pub(crate) use lut4_to_3::NeonLut4x3Factory;
pub(crate) use preheat_lut4x3::Lut4x3Neon;
pub(crate) use rgb_xyz::TransformShaperRgbNeon;
pub(crate) use rgb_xyz_opt::TransformShaperRgbOptNeon;
pub(crate) use rgb_xyz_q1_30_opt::TransformShaperQ1_30NeonOpt;
pub(crate) use rgb_xyz_q2_13::TransformShaperQ2_13Neon;
pub(crate) use rgb_xyz_q2_13_opt::TransformShaperQ2_13NeonOpt;
pub(crate) use t_lut3_to_3::NeonLut3x3Factory;

View File

@@ -0,0 +1,129 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::hypercube::HypercubeNeon;
use crate::conversions::neon::interpolator::NeonVector;
use crate::trc::{lut_interp_linear_float, lut_interp_linear_float_clamped};
use crate::{CmsError, DataColorSpace, InterpolationMethod, Stage};
use std::arch::aarch64::{vdupq_n_f32, vgetq_lane_f32, vmaxq_f32, vminq_f32};
#[derive(Default)]
pub(crate) struct Lut4x3Neon {
pub(crate) linearization: [Vec<f32>; 4],
pub(crate) clut: Vec<f32>,
pub(crate) grid_size: u8,
pub(crate) output: [Vec<f32>; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl Lut4x3Neon {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> NeonVector>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let linearization_0 = &self.linearization[0];
let linearization_1 = &self.linearization[1];
let linearization_2 = &self.linearization[2];
let linearization_3 = &self.linearization[3];
for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(4)) {
debug_assert!(self.grid_size as i32 >= 1);
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let linear_w = lut_interp_linear_float(src[3], linearization_3);
unsafe {
let mut v = fetch(linear_x, linear_y, linear_z, linear_w).v;
v = vmaxq_f32(v, vdupq_n_f32(0.));
v = vminq_f32(v, vdupq_n_f32(1.));
let pcs_x =
lut_interp_linear_float_clamped(vgetq_lane_f32::<0>(v), &self.output[0]);
let pcs_y =
lut_interp_linear_float_clamped(vgetq_lane_f32::<1>(v), &self.output[1]);
let pcs_z =
lut_interp_linear_float_clamped(vgetq_lane_f32::<2>(v), &self.output[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
}
Ok(())
}
}
macro_rules! dispatch_preheat {
($heater: ident) => {
impl Stage for $heater {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let l_tbl = HypercubeNeon::new(
&self.clut,
[
self.grid_size,
self.grid_size,
self.grid_size,
self.grid_size,
],
3,
);
// If Source PCS is LAB trilinear should be used
if self.pcs == DataColorSpace::Lab {
return self
.transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))?
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| {
l_tbl.quadlinear_vec3(x, y, z, w)
})?
}
}
Ok(())
}
}
};
}
dispatch_preheat!(Lut4x3Neon);

View File

@@ -0,0 +1,427 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaper;
use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut};
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
#[repr(align(16), C)]
pub(crate) struct NeonAlignedU16(pub(crate) [u16; 8]);
#[repr(align(16), C)]
pub(crate) struct NeonAlignedF32(pub(crate) [f32; 4]);
pub(crate) struct TransformShaperRgbNeon<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T> for TransformShaperRgbNeon<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = NeonAlignedU16([0; 8]);
let mut temporary1 = NeonAlignedU16([0; 8]);
let mut temporary2 = NeonAlignedU16([0; 8]);
let mut temporary3 = NeonAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
unsafe {
let m0 = vld1q_f32([t.v[0][0], t.v[0][1], t.v[0][2], 0.].as_ptr());
let m1 = vld1q_f32([t.v[1][0], t.v[1][1], t.v[1][2], 0.].as_ptr());
let m2 = vld1q_f32([t.v[2][0], t.v[2][1], t.v[2][2], 0.].as_ptr());
let v_scale = vdupq_n_f32(scale);
let rnd = vdupq_n_f32(0.5);
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
let r0p = &self.profile.r_linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.g_linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.b_linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.r_linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.g_linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.b_linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.r_linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.g_linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.b_linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.r_linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.g_linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.b_linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_f32(r0p);
g0 = vld1q_dup_f32(g0p);
b0 = vld1q_dup_f32(b0p);
r1 = vld1q_dup_f32(r1p);
g1 = vld1q_dup_f32(g1p);
b1 = vld1q_dup_f32(b1p);
r2 = vld1q_dup_f32(r2p);
g2 = vld1q_dup_f32(g2p);
b2 = vld1q_dup_f32(b2p);
r3 = vld1q_dup_f32(r3p);
g3 = vld1q_dup_f32(g3p);
b3 = vld1q_dup_f32(b3p);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = vdupq_n_f32(0.);
g0 = vdupq_n_f32(0.);
b0 = vdupq_n_f32(0.);
r1 = vdupq_n_f32(0.);
g1 = vdupq_n_f32(0.);
b1 = vdupq_n_f32(0.);
r2 = vdupq_n_f32(0.);
g2 = vdupq_n_f32(0.);
b2 = vdupq_n_f32(0.);
r3 = vdupq_n_f32(0.);
g3 = vdupq_n_f32(0.);
b3 = vdupq_n_f32(0.);
a0 = max_colors;
a1 = max_colors;
a2 = max_colors;
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let v0_0 = vmulq_f32(r0, m0);
let v0_1 = vmulq_f32(r1, m0);
let v0_2 = vmulq_f32(r2, m0);
let v0_3 = vmulq_f32(r3, m0);
let v1_0 = vfmaq_f32(v0_0, g0, m1);
let v1_1 = vfmaq_f32(v0_1, g1, m1);
let v1_2 = vfmaq_f32(v0_2, g2, m1);
let v1_3 = vfmaq_f32(v0_3, g3, m1);
let mut vr0 = vfmaq_f32(v1_0, b0, m2);
let mut vr1 = vfmaq_f32(v1_1, b1, m2);
let mut vr2 = vfmaq_f32(v1_2, b2, m2);
let mut vr3 = vfmaq_f32(v1_3, b3, m2);
vr0 = vfmaq_f32(rnd, vr0, v_scale);
vr1 = vfmaq_f32(rnd, vr1, v_scale);
vr2 = vfmaq_f32(rnd, vr2, v_scale);
vr3 = vfmaq_f32(rnd, vr3, v_scale);
vr0 = vminq_f32(vr0, v_scale);
vr1 = vminq_f32(vr1, v_scale);
vr2 = vminq_f32(vr2, v_scale);
vr3 = vminq_f32(vr3, v_scale);
let zx0 = vcvtq_u32_f32(vr0);
let zx1 = vcvtq_u32_f32(vr1);
let zx2 = vcvtq_u32_f32(vr2);
let zx3 = vcvtq_u32_f32(vr3);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0);
vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1);
vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2);
vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3);
let r0p = &self.profile.r_linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.g_linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.b_linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.r_linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.g_linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.b_linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.r_linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.g_linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.b_linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.r_linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.g_linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.b_linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_f32(r0p);
g0 = vld1q_dup_f32(g0p);
b0 = vld1q_dup_f32(b0p);
r1 = vld1q_dup_f32(r1p);
g1 = vld1q_dup_f32(g1p);
b1 = vld1q_dup_f32(b1p);
r2 = vld1q_dup_f32(r2p);
g2 = vld1q_dup_f32(g2p);
b2 = vld1q_dup_f32(b2p);
r3 = vld1q_dup_f32(r3p);
g3 = vld1q_dup_f32(g3p);
b3 = vld1q_dup_f32(b3p);
dst0[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[temporary1.0[0] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[temporary1.0[2] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.r_gamma[temporary2.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.g_gamma[temporary2.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.b_gamma[temporary2.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[temporary3.0[0] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[temporary3.0[2] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[temporary3.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let v0_0 = vmulq_f32(r0, m0);
let v0_1 = vmulq_f32(r1, m0);
let v0_2 = vmulq_f32(r2, m0);
let v0_3 = vmulq_f32(r3, m0);
let v1_0 = vfmaq_f32(v0_0, g0, m1);
let v1_1 = vfmaq_f32(v0_1, g1, m1);
let v1_2 = vfmaq_f32(v0_2, g2, m1);
let v1_3 = vfmaq_f32(v0_3, g3, m1);
let mut vr0 = vfmaq_f32(v1_0, b0, m2);
let mut vr1 = vfmaq_f32(v1_1, b1, m2);
let mut vr2 = vfmaq_f32(v1_2, b2, m2);
let mut vr3 = vfmaq_f32(v1_3, b3, m2);
vr0 = vfmaq_f32(rnd, vr0, v_scale);
vr1 = vfmaq_f32(rnd, vr1, v_scale);
vr2 = vfmaq_f32(rnd, vr2, v_scale);
vr3 = vfmaq_f32(rnd, vr3, v_scale);
vr0 = vminq_f32(vr0, v_scale);
vr1 = vminq_f32(vr1, v_scale);
vr2 = vminq_f32(vr2, v_scale);
vr3 = vminq_f32(vr3, v_scale);
let zx0 = vcvtq_u32_f32(vr0);
let zx1 = vcvtq_u32_f32(vr1);
let zx2 = vcvtq_u32_f32(vr2);
let zx3 = vcvtq_u32_f32(vr3);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0);
vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1);
vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2);
vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3);
dst0[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[temporary1.0[0] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[temporary1.0[2] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.r_gamma[temporary2.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.g_gamma[temporary2.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.b_gamma[temporary2.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[temporary3.0[0] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[temporary3.0[2] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[temporary3.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let r = vld1q_dup_f32(rp);
let g = vld1q_dup_f32(gp);
let b = vld1q_dup_f32(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = vmulq_f32(r, m0);
let v1 = vfmaq_f32(v0, g, m1);
let mut v = vfmaq_f32(v1, b, m2);
v = vfmaq_f32(rnd, v, v_scale);
v = vminq_f32(v, v_scale);
let zx = vcvtq_u32_f32(v);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,423 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::rgb_xyz::NeonAlignedU16;
use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut};
use crate::conversions::rgbxyz::TransformMatrixShaperOptimized;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
pub(crate) struct TransformShaperRgbOptNeon<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T>
for TransformShaperRgbOptNeon<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = NeonAlignedU16([0; 8]);
let mut temporary1 = NeonAlignedU16([0; 8]);
let mut temporary2 = NeonAlignedU16([0; 8]);
let mut temporary3 = NeonAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
unsafe {
let m0 = vld1q_f32([t.v[0][0], t.v[0][1], t.v[0][2], 0.].as_ptr());
let m1 = vld1q_f32([t.v[1][0], t.v[1][1], t.v[1][2], 0.].as_ptr());
let m2 = vld1q_f32([t.v[2][0], t.v[2][1], t.v[2][2], 0.].as_ptr());
let v_scale = vdupq_n_f32(scale);
let rnd = vdupq_n_f32(0.5);
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_f32(r0p);
g0 = vld1q_dup_f32(g0p);
b0 = vld1q_dup_f32(b0p);
r1 = vld1q_dup_f32(r1p);
g1 = vld1q_dup_f32(g1p);
b1 = vld1q_dup_f32(b1p);
r2 = vld1q_dup_f32(r2p);
g2 = vld1q_dup_f32(g2p);
b2 = vld1q_dup_f32(b2p);
r3 = vld1q_dup_f32(r3p);
g3 = vld1q_dup_f32(g3p);
b3 = vld1q_dup_f32(b3p);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = vdupq_n_f32(0.);
g0 = vdupq_n_f32(0.);
b0 = vdupq_n_f32(0.);
r1 = vdupq_n_f32(0.);
g1 = vdupq_n_f32(0.);
b1 = vdupq_n_f32(0.);
r2 = vdupq_n_f32(0.);
g2 = vdupq_n_f32(0.);
b2 = vdupq_n_f32(0.);
r3 = vdupq_n_f32(0.);
g3 = vdupq_n_f32(0.);
b3 = vdupq_n_f32(0.);
a0 = max_colors;
a1 = max_colors;
a2 = max_colors;
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let v0_0 = vmulq_f32(r0, m0);
let v0_1 = vmulq_f32(r1, m0);
let v0_2 = vmulq_f32(r2, m0);
let v0_3 = vmulq_f32(r3, m0);
let v1_0 = vfmaq_f32(v0_0, g0, m1);
let v1_1 = vfmaq_f32(v0_1, g1, m1);
let v1_2 = vfmaq_f32(v0_2, g2, m1);
let v1_3 = vfmaq_f32(v0_3, g3, m1);
let mut vr0 = vfmaq_f32(v1_0, b0, m2);
let mut vr1 = vfmaq_f32(v1_1, b1, m2);
let mut vr2 = vfmaq_f32(v1_2, b2, m2);
let mut vr3 = vfmaq_f32(v1_3, b3, m2);
vr0 = vfmaq_f32(rnd, vr0, v_scale);
vr1 = vfmaq_f32(rnd, vr1, v_scale);
vr2 = vfmaq_f32(rnd, vr2, v_scale);
vr3 = vfmaq_f32(rnd, vr3, v_scale);
vr0 = vminq_f32(vr0, v_scale);
vr1 = vminq_f32(vr1, v_scale);
vr2 = vminq_f32(vr2, v_scale);
vr3 = vminq_f32(vr3, v_scale);
let zx0 = vcvtq_u32_f32(vr0);
let zx1 = vcvtq_u32_f32(vr1);
let zx2 = vcvtq_u32_f32(vr2);
let zx3 = vcvtq_u32_f32(vr3);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0);
vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1);
vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2);
vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3);
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_f32(r0p);
g0 = vld1q_dup_f32(g0p);
b0 = vld1q_dup_f32(b0p);
r1 = vld1q_dup_f32(r1p);
g1 = vld1q_dup_f32(g1p);
b1 = vld1q_dup_f32(b1p);
r2 = vld1q_dup_f32(r2p);
g2 = vld1q_dup_f32(g2p);
b2 = vld1q_dup_f32(b2p);
r3 = vld1q_dup_f32(r3p);
g3 = vld1q_dup_f32(g3p);
b3 = vld1q_dup_f32(b3p);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[0] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[2] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary2.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary2.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary2.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary3.0[0] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary3.0[2] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary3.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let v0_0 = vmulq_f32(r0, m0);
let v0_1 = vmulq_f32(r1, m0);
let v0_2 = vmulq_f32(r2, m0);
let v0_3 = vmulq_f32(r3, m0);
let v1_0 = vfmaq_f32(v0_0, g0, m1);
let v1_1 = vfmaq_f32(v0_1, g1, m1);
let v1_2 = vfmaq_f32(v0_2, g2, m1);
let v1_3 = vfmaq_f32(v0_3, g3, m1);
let mut vr0 = vfmaq_f32(v1_0, b0, m2);
let mut vr1 = vfmaq_f32(v1_1, b1, m2);
let mut vr2 = vfmaq_f32(v1_2, b2, m2);
let mut vr3 = vfmaq_f32(v1_3, b3, m2);
vr0 = vfmaq_f32(rnd, vr0, v_scale);
vr1 = vfmaq_f32(rnd, vr1, v_scale);
vr2 = vfmaq_f32(rnd, vr2, v_scale);
vr3 = vfmaq_f32(rnd, vr3, v_scale);
vr0 = vminq_f32(vr0, v_scale);
vr1 = vminq_f32(vr1, v_scale);
vr2 = vminq_f32(vr2, v_scale);
vr3 = vminq_f32(vr3, v_scale);
let zx0 = vcvtq_u32_f32(vr0);
let zx1 = vcvtq_u32_f32(vr1);
let zx2 = vcvtq_u32_f32(vr2);
let zx3 = vcvtq_u32_f32(vr3);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0);
vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1);
vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2);
vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[0] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[2] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary2.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary2.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary2.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary3.0[0] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary3.0[2] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary3.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.linear[src[src_cn.b_i()]._as_usize()];
let r = vld1q_dup_f32(rp);
let g = vld1q_dup_f32(gp);
let b = vld1q_dup_f32(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = vmulq_f32(r, m0);
let v1 = vfmaq_f32(v0, g, m1);
let mut v = vfmaq_f32(v1, b, m2);
v = vfmaq_f32(rnd, v, v_scale);
v = vminq_f32(v, v_scale);
let zx = vcvtq_u32_f32(v);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,437 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut};
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
pub(crate) struct TransformShaperQ1_30NeonOpt<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i32, i32, T, LINEAR_CAP>,
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
const PRECISION: i32,
>
TransformShaperQ1_30NeonOpt<
T,
SRC_LAYOUT,
DST_LAYOUT,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "rdm")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors: T = ((1 << BIT_DEPTH) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
unsafe {
let m0 = vld1q_s32([t.v[0][0], t.v[0][1], t.v[0][2], 0].as_ptr());
let m1 = vld1q_s32([t.v[1][0], t.v[1][1], t.v[1][2], 0].as_ptr());
let m2 = vld1q_s32([t.v[2][0], t.v[2][1], t.v[2][2], 0].as_ptr());
let v_max_value = vdup_n_u16((GAMMA_LUT - 1) as u16);
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_s32(r0p);
g0 = vld1q_dup_s32(g0p);
b0 = vld1q_dup_s32(b0p);
r1 = vld1q_dup_s32(r1p);
g1 = vld1q_dup_s32(g1p);
b1 = vld1q_dup_s32(b1p);
r2 = vld1q_dup_s32(r2p);
g2 = vld1q_dup_s32(g2p);
b2 = vld1q_dup_s32(b2p);
r3 = vld1q_dup_s32(r3p);
g3 = vld1q_dup_s32(g3p);
b3 = vld1q_dup_s32(b3p);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = vdupq_n_s32(0);
g0 = vdupq_n_s32(0);
b0 = vdupq_n_s32(0);
r1 = vdupq_n_s32(0);
g1 = vdupq_n_s32(0);
b1 = vdupq_n_s32(0);
r2 = vdupq_n_s32(0);
g2 = vdupq_n_s32(0);
b2 = vdupq_n_s32(0);
r3 = vdupq_n_s32(0);
g3 = vdupq_n_s32(0);
b3 = vdupq_n_s32(0);
a0 = max_colors;
a1 = max_colors;
a2 = max_colors;
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let v0_0 = vqrdmulhq_s32(r0, m0);
let v0_1 = vqrdmulhq_s32(r1, m0);
let v0_2 = vqrdmulhq_s32(r2, m0);
let v0_3 = vqrdmulhq_s32(r3, m0);
let v1_0 = vqrdmlahq_s32(v0_0, g0, m1);
let v1_1 = vqrdmlahq_s32(v0_1, g1, m1);
let v1_2 = vqrdmlahq_s32(v0_2, g2, m1);
let v1_3 = vqrdmlahq_s32(v0_3, g3, m1);
let vr0 = vqrdmlahq_s32(v1_0, b0, m2);
let vr1 = vqrdmlahq_s32(v1_1, b1, m2);
let vr2 = vqrdmlahq_s32(v1_2, b2, m2);
let vr3 = vqrdmlahq_s32(v1_3, b3, m2);
let mut vr0 = vqmovun_s32(vr0);
let mut vr1 = vqmovun_s32(vr1);
let mut vr2 = vqmovun_s32(vr2);
let mut vr3 = vqmovun_s32(vr3);
if BIT_DEPTH != 16 {
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
}
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_s32(r0p);
g0 = vld1q_dup_s32(g0p);
b0 = vld1q_dup_s32(b0p);
r1 = vld1q_dup_s32(r1p);
g1 = vld1q_dup_s32(g1p);
b1 = vld1q_dup_s32(b1p);
r2 = vld1q_dup_s32(r2p);
g2 = vld1q_dup_s32(g2p);
b2 = vld1q_dup_s32(b2p);
r3 = vld1q_dup_s32(r3p);
g3 = vld1q_dup_s32(g3p);
b3 = vld1q_dup_s32(b3p);
dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let v0_0 = vqrdmulhq_s32(r0, m0);
let v0_1 = vqrdmulhq_s32(r1, m0);
let v0_2 = vqrdmulhq_s32(r2, m0);
let v0_3 = vqrdmulhq_s32(r3, m0);
let v1_0 = vqrdmlahq_s32(v0_0, g0, m1);
let v1_1 = vqrdmlahq_s32(v0_1, g1, m1);
let v1_2 = vqrdmlahq_s32(v0_2, g2, m1);
let v1_3 = vqrdmlahq_s32(v0_3, g3, m1);
let vr0 = vqrdmlahq_s32(v1_0, b0, m2);
let vr1 = vqrdmlahq_s32(v1_1, b1, m2);
let vr2 = vqrdmlahq_s32(v1_2, b2, m2);
let vr3 = vqrdmlahq_s32(v1_3, b3, m2);
let mut vr0 = vqmovun_s32(vr0);
let mut vr1 = vqmovun_s32(vr1);
let mut vr2 = vqmovun_s32(vr2);
let mut vr3 = vqmovun_s32(vr3);
if BIT_DEPTH != 16 {
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
}
dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.linear[src[src_cn.b_i()]._as_usize()];
let r = vld1q_dup_s32(rp);
let g = vld1q_dup_s32(gp);
let b = vld1q_dup_s32(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = vqrdmulhq_s32(r, m0);
let v1 = vqrdmlahq_s32(v0, g, m1);
let v = vqrdmlahq_s32(v1, b, m2);
let mut vr0 = vqmovun_s32(v);
if BIT_DEPTH != 16 {
vr0 = vmin_u16(vr0, v_max_value);
}
dst[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperQ1_30NeonOpt<
T,
SRC_LAYOUT,
DST_LAYOUT,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,412 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPoint;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
#[allow(dead_code)]
#[inline]
pub(crate) fn split_by_twos<T: Copy>(data: &[T], channels: usize) -> (&[T], &[T]) {
let len = data.len() / (channels * 4);
let split_point = len * 4;
data.split_at(split_point * channels)
}
#[allow(dead_code)]
#[inline]
pub(crate) fn split_by_twos_mut<T: Copy>(data: &mut [T], channels: usize) -> (&mut [T], &mut [T]) {
let len = data.len() / (channels * 4);
let split_point = len * 4;
data.split_at_mut(split_point * channels)
}
pub(crate) struct TransformShaperQ2_13Neon<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPoint<i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperQ2_13Neon<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
unsafe {
let m0 = vld1_s16([t.v[0][0], t.v[0][1], t.v[0][2], 0].as_ptr());
let m1 = vld1_s16([t.v[1][0], t.v[1][1], t.v[1][2], 0].as_ptr());
let m2 = vld1_s16([t.v[2][0], t.v[2][1], t.v[2][2], 0].as_ptr());
let v_max_value = vdup_n_u16((GAMMA_LUT - 1) as u16);
let rnd = vdupq_n_s32(1 << (PRECISION - 1));
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
let r0p = &self.profile.r_linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.g_linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.b_linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.r_linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.g_linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.b_linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.r_linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.g_linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.b_linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.r_linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.g_linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.b_linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1_dup_s16(r0p);
g0 = vld1_dup_s16(g0p);
b0 = vld1_dup_s16(b0p);
r1 = vld1_dup_s16(r1p);
g1 = vld1_dup_s16(g1p);
b1 = vld1_dup_s16(b1p);
r2 = vld1_dup_s16(r2p);
g2 = vld1_dup_s16(g2p);
b2 = vld1_dup_s16(b2p);
r3 = vld1_dup_s16(r3p);
g3 = vld1_dup_s16(g3p);
b3 = vld1_dup_s16(b3p);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = vdup_n_s16(0);
g0 = vdup_n_s16(0);
b0 = vdup_n_s16(0);
r1 = vdup_n_s16(0);
g1 = vdup_n_s16(0);
b1 = vdup_n_s16(0);
r2 = vdup_n_s16(0);
g2 = vdup_n_s16(0);
b2 = vdup_n_s16(0);
r3 = vdup_n_s16(0);
g3 = vdup_n_s16(0);
b3 = vdup_n_s16(0);
a0 = max_colors;
a1 = max_colors;
a2 = max_colors;
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let v0_0 = vmlal_s16(rnd, r0, m0);
let v0_1 = vmlal_s16(rnd, r1, m0);
let v0_2 = vmlal_s16(rnd, r2, m0);
let v0_3 = vmlal_s16(rnd, r3, m0);
let v1_0 = vmlal_s16(v0_0, g0, m1);
let v1_1 = vmlal_s16(v0_1, g1, m1);
let v1_2 = vmlal_s16(v0_2, g2, m1);
let v1_3 = vmlal_s16(v0_3, g3, m1);
let vr0 = vmlal_s16(v1_0, b0, m2);
let vr1 = vmlal_s16(v1_1, b1, m2);
let vr2 = vmlal_s16(v1_2, b2, m2);
let vr3 = vmlal_s16(v1_3, b3, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(vr0);
let mut vr1 = vqshrun_n_s32::<PRECISION>(vr1);
let mut vr2 = vqshrun_n_s32::<PRECISION>(vr2);
let mut vr3 = vqshrun_n_s32::<PRECISION>(vr3);
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
let r0p = &self.profile.r_linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.g_linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.b_linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.r_linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.g_linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.b_linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.r_linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.g_linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.b_linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.r_linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.g_linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.b_linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1_dup_s16(r0p);
g0 = vld1_dup_s16(g0p);
b0 = vld1_dup_s16(b0p);
r1 = vld1_dup_s16(r1p);
g1 = vld1_dup_s16(g1p);
b1 = vld1_dup_s16(b1p);
r2 = vld1_dup_s16(r2p);
g2 = vld1_dup_s16(g2p);
b2 = vld1_dup_s16(b2p);
r3 = vld1_dup_s16(r3p);
g3 = vld1_dup_s16(g3p);
b3 = vld1_dup_s16(b3p);
dst0[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let v0_0 = vmlal_s16(rnd, r0, m0);
let v0_1 = vmlal_s16(rnd, r1, m0);
let v0_2 = vmlal_s16(rnd, r2, m0);
let v0_3 = vmlal_s16(rnd, r3, m0);
let v1_0 = vmlal_s16(v0_0, g0, m1);
let v1_1 = vmlal_s16(v0_1, g1, m1);
let v1_2 = vmlal_s16(v0_2, g2, m1);
let v1_3 = vmlal_s16(v0_3, g3, m1);
let vr0 = vmlal_s16(v1_0, b0, m2);
let vr1 = vmlal_s16(v1_1, b1, m2);
let vr2 = vmlal_s16(v1_2, b2, m2);
let vr3 = vmlal_s16(v1_3, b3, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(vr0);
let mut vr1 = vqshrun_n_s32::<PRECISION>(vr1);
let mut vr2 = vqshrun_n_s32::<PRECISION>(vr2);
let mut vr3 = vqshrun_n_s32::<PRECISION>(vr3);
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
dst0[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let r = vld1_dup_s16(rp);
let g = vld1_dup_s16(gp);
let b = vld1_dup_s16(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = vmlal_s16(rnd, r, m0);
let v1 = vmlal_s16(v0, g, m1);
let v = vmlal_s16(v1, b, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(v);
vr0 = vmin_u16(vr0, v_max_value);
dst[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr0) as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr0) as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,397 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut};
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
pub(crate) struct TransformShaperQ2_13NeonOpt<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i16, i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperQ2_13NeonOpt<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
unsafe {
let m0 = vld1_s16([t.v[0][0], t.v[0][1], t.v[0][2], 0].as_ptr());
let m1 = vld1_s16([t.v[1][0], t.v[1][1], t.v[1][2], 0].as_ptr());
let m2 = vld1_s16([t.v[2][0], t.v[2][1], t.v[2][2], 0].as_ptr());
let v_max_value = vdup_n_u16((GAMMA_LUT - 1) as u16);
let rnd = vdupq_n_s32(1 << (PRECISION - 1));
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1_dup_s16(r0p);
g0 = vld1_dup_s16(g0p);
b0 = vld1_dup_s16(b0p);
r1 = vld1_dup_s16(r1p);
g1 = vld1_dup_s16(g1p);
b1 = vld1_dup_s16(b1p);
r2 = vld1_dup_s16(r2p);
g2 = vld1_dup_s16(g2p);
b2 = vld1_dup_s16(b2p);
r3 = vld1_dup_s16(r3p);
g3 = vld1_dup_s16(g3p);
b3 = vld1_dup_s16(b3p);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = vdup_n_s16(0);
g0 = vdup_n_s16(0);
b0 = vdup_n_s16(0);
r1 = vdup_n_s16(0);
g1 = vdup_n_s16(0);
b1 = vdup_n_s16(0);
r2 = vdup_n_s16(0);
g2 = vdup_n_s16(0);
b2 = vdup_n_s16(0);
r3 = vdup_n_s16(0);
g3 = vdup_n_s16(0);
b3 = vdup_n_s16(0);
a0 = max_colors;
a1 = max_colors;
a2 = max_colors;
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let v0_0 = vmlal_s16(rnd, r0, m0);
let v0_1 = vmlal_s16(rnd, r1, m0);
let v0_2 = vmlal_s16(rnd, r2, m0);
let v0_3 = vmlal_s16(rnd, r3, m0);
let v1_0 = vmlal_s16(v0_0, g0, m1);
let v1_1 = vmlal_s16(v0_1, g1, m1);
let v1_2 = vmlal_s16(v0_2, g2, m1);
let v1_3 = vmlal_s16(v0_3, g3, m1);
let vr0 = vmlal_s16(v1_0, b0, m2);
let vr1 = vmlal_s16(v1_1, b1, m2);
let vr2 = vmlal_s16(v1_2, b2, m2);
let vr3 = vmlal_s16(v1_3, b3, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(vr0);
let mut vr1 = vqshrun_n_s32::<PRECISION>(vr1);
let mut vr2 = vqshrun_n_s32::<PRECISION>(vr2);
let mut vr3 = vqshrun_n_s32::<PRECISION>(vr3);
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1_dup_s16(r0p);
g0 = vld1_dup_s16(g0p);
b0 = vld1_dup_s16(b0p);
r1 = vld1_dup_s16(r1p);
g1 = vld1_dup_s16(g1p);
b1 = vld1_dup_s16(b1p);
r2 = vld1_dup_s16(r2p);
g2 = vld1_dup_s16(g2p);
b2 = vld1_dup_s16(b2p);
r3 = vld1_dup_s16(r3p);
g3 = vld1_dup_s16(g3p);
b3 = vld1_dup_s16(b3p);
dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let v0_0 = vmlal_s16(rnd, r0, m0);
let v0_1 = vmlal_s16(rnd, r1, m0);
let v0_2 = vmlal_s16(rnd, r2, m0);
let v0_3 = vmlal_s16(rnd, r3, m0);
let v1_0 = vmlal_s16(v0_0, g0, m1);
let v1_1 = vmlal_s16(v0_1, g1, m1);
let v1_2 = vmlal_s16(v0_2, g2, m1);
let v1_3 = vmlal_s16(v0_3, g3, m1);
let vr0 = vmlal_s16(v1_0, b0, m2);
let vr1 = vmlal_s16(v1_1, b1, m2);
let vr2 = vmlal_s16(v1_2, b2, m2);
let vr3 = vmlal_s16(v1_3, b3, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(vr0);
let mut vr1 = vqshrun_n_s32::<PRECISION>(vr1);
let mut vr2 = vqshrun_n_s32::<PRECISION>(vr2);
let mut vr3 = vqshrun_n_s32::<PRECISION>(vr3);
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.linear[src[src_cn.b_i()]._as_usize()];
let r = vld1_dup_s16(rp);
let g = vld1_dup_s16(gp);
let b = vld1_dup_s16(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = vmlal_s16(rnd, r, m0);
let v1 = vmlal_s16(v0, g, m1);
let v = vmlal_s16(v1, b, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(v);
vr0 = vmin_u16(vr0, v_max_value);
dst[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,335 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut3x3Factory;
use crate::conversions::neon::interpolator::*;
use crate::conversions::neon::interpolator_q0_15::NeonAlignedI16x4;
use crate::conversions::neon::rgb_xyz::NeonAlignedF32;
use crate::conversions::neon::t_lut3_to_3_q0_15::TransformLut3x3NeonQ0_15;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::marker::PhantomData;
struct TransformLut3x3Neon<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<NeonAlignedF32>,
_phantom: PhantomData<T>,
_phantom1: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut3x3Neon<T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[inline(always)]
fn transform_chunk<'b, Interpolator: NeonMdInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let value_scale = vdupq_n_f32(((1 << BIT_DEPTH) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_neon(x, y, z, &self.weights);
if T::FINITE {
let mut r = vfmaq_f32(vdupq_n_f32(0.5f32), v.v, value_scale);
r = vminq_f32(r, value_scale);
let jvx = vcvtaq_u32_f32(r);
dst[dst_cn.r_i()] = vgetq_lane_u32::<0>(jvx).as_();
dst[dst_cn.g_i()] = vgetq_lane_u32::<1>(jvx).as_();
dst[dst_cn.b_i()] = vgetq_lane_u32::<2>(jvx).as_();
} else {
dst[dst_cn.r_i()] = vgetq_lane_f32::<0>(v.v).as_();
dst[dst_cn.g_i()] = vgetq_lane_f32::<1>(v.v).as_();
dst[dst_cn.b_i()] = vgetq_lane_f32::<2>(v.v).as_();
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3Neon<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearNeon<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralNeon<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalNeon<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticNeon<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearNeon<GRID_SIZE>>(src, dst);
}
}
}
Ok(())
}
}
pub(crate) struct NeonLut3x3Factory {}
impl Lut3x3Factory for NeonLut3x3Factory {
fn make_transform_3x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point
&& BIT_DEPTH < 16
&& std::arch::is_aarch64_feature_detected!("rdm")
{
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
NeonAlignedI16x4([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3NeonQ0_15::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3NeonQ0_15::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
};
}
let lut = lut
.chunks_exact(3)
.map(|x| NeonAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3Neon::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3Neon::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
}
}
}

View File

@@ -0,0 +1,219 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::neon::interpolator_q0_15::*;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut3x3NeonQ0_15<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<NeonAlignedI16x4>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
>
TransformLut3x3NeonQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "rdm")]
unsafe fn transform_chunk<'b, Interpolator: NeonMdInterpolationQ0_15<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let f_value_scale = vdupq_n_f32(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
vdup_n_s16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
vdup_n_s16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_neon(x, y, z, &self.weights);
if T::FINITE {
let mut o = vmax_s16(v.v, vdup_n_s16(0));
o = vmin_s16(o, v_max_scale);
dst[dst_cn.r_i()] = (vget_lane_s16::<0>(o) as u32).as_();
dst[dst_cn.g_i()] = (vget_lane_s16::<1>(o) as u32).as_();
dst[dst_cn.b_i()] = (vget_lane_s16::<2>(o) as u32).as_();
} else {
let o = vcvtq_f32_s32(vmovl_s16(v.v));
let r = vmulq_f32(o, f_value_scale);
dst[dst_cn.r_i()] = vgetq_lane_f32::<0>(r).as_();
dst[dst_cn.g_i()] = vgetq_lane_f32::<1>(r).as_();
dst[dst_cn.b_i()] = vgetq_lane_f32::<2>(r).as_();
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3NeonQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearNeonQ0_15<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralNeonQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalNeonQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticNeonQ0_15<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearNeonQ0_15<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,327 @@
/*
* // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::lut3x4::create_lut3_samples;
use crate::mlaf::mlaf;
use crate::trc::ToneCurveEvaluator;
use crate::{
CmsError, ColorProfile, GammaLutInterpolate, InPlaceStage, Matrix3f, PointeeSizeExpressible,
RenderingIntent, Rgb, TransformOptions, filmlike_clip,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) struct XyzToRgbStage<T: Clone> {
pub(crate) r_gamma: Box<[T; 65536]>,
pub(crate) g_gamma: Box<[T; 65536]>,
pub(crate) b_gamma: Box<[T; 65536]>,
pub(crate) matrices: Vec<Matrix3f>,
pub(crate) intent: RenderingIntent,
pub(crate) bit_depth: usize,
pub(crate) gamma_lut: usize,
}
impl<T: Clone + AsPrimitive<f32>> InPlaceStage for XyzToRgbStage<T> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
assert!(self.bit_depth > 0);
if !self.matrices.is_empty() {
let m = self.matrices[0];
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
for m in self.matrices.iter().skip(1) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
let max_colors = (1 << self.bit_depth) - 1;
let color_scale = 1f32 / max_colors as f32;
let lut_cap = (self.gamma_lut - 1) as f32;
if self.intent != RenderingIntent::AbsoluteColorimetric {
for dst in dst.chunks_exact_mut(3) {
let mut rgb = Rgb::new(dst[0], dst[1], dst[2]);
if rgb.is_out_of_gamut() {
rgb = filmlike_clip(rgb);
}
let r = mlaf(0.5f32, rgb.r, lut_cap).min(lut_cap).max(0f32) as u16;
let g = mlaf(0.5f32, rgb.g, lut_cap).min(lut_cap).max(0f32) as u16;
let b = mlaf(0.5f32, rgb.b, lut_cap).min(lut_cap).max(0f32) as u16;
dst[0] = self.r_gamma[r as usize].as_() * color_scale;
dst[1] = self.g_gamma[g as usize].as_() * color_scale;
dst[2] = self.b_gamma[b as usize].as_() * color_scale;
}
} else {
for dst in dst.chunks_exact_mut(3) {
let rgb = Rgb::new(dst[0], dst[1], dst[2]);
let r = mlaf(0.5f32, rgb.r, lut_cap).min(lut_cap).max(0f32) as u16;
let g = mlaf(0.5f32, rgb.g, lut_cap).min(lut_cap).max(0f32) as u16;
let b = mlaf(0.5f32, rgb.b, lut_cap).min(lut_cap).max(0f32) as u16;
dst[0] = self.r_gamma[r as usize].as_() * color_scale;
dst[1] = self.g_gamma[g as usize].as_() * color_scale;
dst[2] = self.b_gamma[b as usize].as_() * color_scale;
}
}
Ok(())
}
}
pub(crate) struct XyzToRgbStageExtended<T: Clone> {
pub(crate) gamma_evaluator: Box<dyn ToneCurveEvaluator>,
pub(crate) matrices: Vec<Matrix3f>,
pub(crate) phantom_data: PhantomData<T>,
}
impl<T: Clone + AsPrimitive<f32>> InPlaceStage for XyzToRgbStageExtended<T> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
if !self.matrices.is_empty() {
let m = self.matrices[0];
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
for m in self.matrices.iter().skip(1) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
for dst in dst.chunks_exact_mut(3) {
let mut rgb = Rgb::new(dst[0], dst[1], dst[2]);
rgb = self.gamma_evaluator.evaluate_tristimulus(rgb);
dst[0] = rgb.r.as_();
dst[1] = rgb.g.as_();
dst[2] = rgb.b.as_();
}
Ok(())
}
}
struct RgbLinearizationStage<T: Clone, const LINEAR_CAP: usize, const SAMPLES: usize> {
r_lin: Box<[f32; LINEAR_CAP]>,
g_lin: Box<[f32; LINEAR_CAP]>,
b_lin: Box<[f32; LINEAR_CAP]>,
_phantom: PhantomData<T>,
bit_depth: usize,
}
impl<
T: Clone + AsPrimitive<usize> + PointeeSizeExpressible,
const LINEAR_CAP: usize,
const SAMPLES: usize,
> RgbLinearizationStage<T, LINEAR_CAP, SAMPLES>
{
fn transform(&self, src: &[T], dst: &mut [f32]) -> Result<(), CmsError> {
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let scale = if T::FINITE {
((1 << self.bit_depth) - 1) as f32 / (SAMPLES as f32 - 1f32)
} else {
(T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 / (SAMPLES as f32 - 1f32)
};
let capped_value = if T::FINITE {
(1 << self.bit_depth) - 1
} else {
T::NOT_FINITE_LINEAR_TABLE_SIZE - 1
};
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
let j_r = src[0].as_() as f32 * scale;
let j_g = src[1].as_() as f32 * scale;
let j_b = src[2].as_() as f32 * scale;
dst[0] = self.r_lin[(j_r.round().max(0.0).min(capped_value as f32) as u16) as usize];
dst[1] = self.g_lin[(j_g.round().max(0.0).min(capped_value as f32) as u16) as usize];
dst[2] = self.b_lin[(j_b.round().max(0.0).min(capped_value as f32) as u16) as usize];
}
Ok(())
}
}
pub(crate) fn create_rgb_lin_lut<
T: Copy + Default + AsPrimitive<f32> + Send + Sync + AsPrimitive<usize> + PointeeSizeExpressible,
const BIT_DEPTH: usize,
const LINEAR_CAP: usize,
const GRID_SIZE: usize,
>(
source: &ColorProfile,
opts: TransformOptions,
) -> Result<Vec<f32>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
let lut_origins = create_lut3_samples::<T, GRID_SIZE>();
let lin_r =
source.build_r_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_g =
source.build_g_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_b =
source.build_b_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_stage = RgbLinearizationStage::<T, LINEAR_CAP, GRID_SIZE> {
r_lin: lin_r,
g_lin: lin_g,
b_lin: lin_b,
_phantom: PhantomData,
bit_depth: BIT_DEPTH,
};
let mut lut = vec![0f32; lut_origins.len()];
lin_stage.transform(&lut_origins, &mut lut)?;
let xyz_to_rgb = source.rgb_to_xyz_matrix();
let matrices = vec![
xyz_to_rgb.to_f32(),
Matrix3f {
v: [
[32768.0 / 65535.0, 0.0, 0.0],
[0.0, 32768.0 / 65535.0, 0.0],
[0.0, 0.0, 32768.0 / 65535.0],
],
},
];
let matrix_stage = crate::conversions::lut_transforms::MatrixStage { matrices };
matrix_stage.transform(&mut lut)?;
Ok(lut)
}
pub(crate) fn prepare_inverse_lut_rgb_xyz<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible
+ GammaLutInterpolate,
const BIT_DEPTH: usize,
const GAMMA_LUT: usize,
>(
dest: &ColorProfile,
lut: &mut [f32],
options: TransformOptions,
) -> Result<(), CmsError>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
{
if !T::FINITE {
if let Some(extended_gamma) = dest.try_extended_gamma_evaluator() {
let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse();
let mut matrices = vec![Matrix3f {
v: [
[65535.0 / 32768.0, 0.0, 0.0],
[0.0, 65535.0 / 32768.0, 0.0],
[0.0, 0.0, 65535.0 / 32768.0],
],
}];
matrices.push(xyz_to_rgb.to_f32());
let xyz_to_rgb_stage = XyzToRgbStageExtended::<T> {
gamma_evaluator: extended_gamma,
matrices,
phantom_data: PhantomData,
};
xyz_to_rgb_stage.transform(lut)?;
return Ok(());
}
}
let gamma_map_r = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.red_trc,
options.allow_use_cicp_transfer,
)?;
let gamma_map_g = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.green_trc,
options.allow_use_cicp_transfer,
)?;
let gamma_map_b = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.blue_trc,
options.allow_use_cicp_transfer,
)?;
let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse();
let mut matrices = vec![Matrix3f {
v: [
[65535.0 / 32768.0, 0.0, 0.0],
[0.0, 65535.0 / 32768.0, 0.0],
[0.0, 0.0, 65535.0 / 32768.0],
],
}];
matrices.push(xyz_to_rgb.to_f32());
let xyz_to_rgb_stage = XyzToRgbStage::<T> {
r_gamma: gamma_map_r,
g_gamma: gamma_map_g,
b_gamma: gamma_map_b,
matrices,
intent: options.rendering_intent,
gamma_lut: GAMMA_LUT,
bit_depth: BIT_DEPTH,
};
xyz_to_rgb_stage.transform(lut)?;
Ok(())
}

View File

@@ -0,0 +1,190 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::mlaf::mlaf;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor, Vector3f};
use num_traits::AsPrimitive;
#[derive(Clone)]
pub(crate) struct ToneReproductionRgbToGray<T, const BUCKET: usize> {
pub(crate) r_linear: Box<[f32; BUCKET]>,
pub(crate) g_linear: Box<[f32; BUCKET]>,
pub(crate) b_linear: Box<[f32; BUCKET]>,
pub(crate) gray_gamma: Box<[T; 65536]>,
}
#[derive(Clone)]
struct TransformRgbToGrayExecutor<
T,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const BUCKET: usize,
const GAMMA_LUT: usize,
> {
trc_box: ToneReproductionRgbToGray<T, BUCKET>,
weights: Vector3f,
bit_depth: usize,
}
pub(crate) fn make_rgb_to_gray<
T: Copy + Default + PointeeSizeExpressible + Send + Sync + 'static,
const BUCKET: usize,
const BIT_DEPTH: usize,
const GAMMA_LUT: usize,
>(
src_layout: Layout,
dst_layout: Layout,
trc: ToneReproductionRgbToGray<T, BUCKET>,
weights: Vector3f,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
u32: AsPrimitive<T>,
{
match src_layout {
Layout::Rgb => match dst_layout {
Layout::Rgb => unreachable!(),
Layout::Rgba => unreachable!(),
Layout::Gray => Box::new(TransformRgbToGrayExecutor::<
T,
{ Layout::Rgb as u8 },
{ Layout::Gray as u8 },
BUCKET,
GAMMA_LUT,
> {
trc_box: trc,
weights,
bit_depth: BIT_DEPTH,
}),
Layout::GrayAlpha => Box::new(TransformRgbToGrayExecutor::<
T,
{ Layout::Rgb as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
GAMMA_LUT,
> {
trc_box: trc,
weights,
bit_depth: BIT_DEPTH,
}),
_ => unreachable!(),
},
Layout::Rgba => match dst_layout {
Layout::Rgb => unreachable!(),
Layout::Rgba => unreachable!(),
Layout::Gray => Box::new(TransformRgbToGrayExecutor::<
T,
{ Layout::Rgba as u8 },
{ Layout::Gray as u8 },
BUCKET,
GAMMA_LUT,
> {
trc_box: trc,
weights,
bit_depth: BIT_DEPTH,
}),
Layout::GrayAlpha => Box::new(TransformRgbToGrayExecutor::<
T,
{ Layout::Rgba as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
GAMMA_LUT,
> {
trc_box: trc,
weights,
bit_depth: BIT_DEPTH,
}),
_ => unreachable!(),
},
Layout::Gray => unreachable!(),
Layout::GrayAlpha => unreachable!(),
_ => unreachable!(),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const BUCKET: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T> for TransformRgbToGrayExecutor<T, SRC_LAYOUT, DST_LAYOUT, BUCKET, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let scale_value = (GAMMA_LUT - 1) as f32;
let max_value = ((1u32 << self.bit_depth) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.trc_box.r_linear[src[src_cn.r_i()]._as_usize()];
let g = self.trc_box.g_linear[src[src_cn.g_i()]._as_usize()];
let b = self.trc_box.b_linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let grey = mlaf(
0.5,
mlaf(
mlaf(self.weights.v[0] * r, self.weights.v[1], g),
self.weights.v[2],
b,
)
.min(1.)
.max(0.),
scale_value,
);
dst[0] = self.trc_box.gray_gamma[(grey as u16) as usize];
if dst_channels == 2 {
dst[1] = a;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,181 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::mlaf::mlaf;
use crate::transform::PointeeSizeExpressible;
use crate::trc::ToneCurveEvaluator;
use crate::{CmsError, Layout, Rgb, TransformExecutor, Vector3f};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
struct TransformRgbToGrayExtendedExecutor<T, const SRC_LAYOUT: u8, const DST_LAYOUT: u8> {
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
weights: Vector3f,
_phantom: PhantomData<T>,
bit_depth: usize,
}
pub(crate) fn make_rgb_to_gray_extended<
T: Copy + Default + PointeeSizeExpressible + Send + Sync + 'static + AsPrimitive<f32>,
>(
src_layout: Layout,
dst_layout: Layout,
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
weights: Vector3f,
bit_depth: usize,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
match src_layout {
Layout::Rgb => match dst_layout {
Layout::Rgb => unreachable!(),
Layout::Rgba => unreachable!(),
Layout::Gray => Box::new(TransformRgbToGrayExtendedExecutor::<
T,
{ Layout::Rgb as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
weights,
_phantom: PhantomData,
bit_depth,
}),
Layout::GrayAlpha => Box::new(TransformRgbToGrayExtendedExecutor::<
T,
{ Layout::Rgb as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
weights,
_phantom: PhantomData,
bit_depth,
}),
_ => unreachable!(),
},
Layout::Rgba => match dst_layout {
Layout::Rgb => unreachable!(),
Layout::Rgba => unreachable!(),
Layout::Gray => Box::new(TransformRgbToGrayExtendedExecutor::<
T,
{ Layout::Rgba as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
weights,
_phantom: PhantomData,
bit_depth,
}),
Layout::GrayAlpha => Box::new(TransformRgbToGrayExtendedExecutor::<
T,
{ Layout::Rgba as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
weights,
_phantom: PhantomData,
bit_depth,
}),
_ => unreachable!(),
},
Layout::Gray => unreachable!(),
Layout::GrayAlpha => unreachable!(),
_ => unreachable!(),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static + AsPrimitive<f32>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
> TransformExecutor<T> for TransformRgbToGrayExtendedExecutor<T, SRC_LAYOUT, DST_LAYOUT>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let max_value = ((1u32 << self.bit_depth) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let in_tristimulus = Rgb::<f32>::new(
src[src_cn.r_i()].as_(),
src[src_cn.g_i()].as_(),
src[src_cn.b_i()].as_(),
);
let lin_tristimulus = self.linear_eval.evaluate_tristimulus(in_tristimulus);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let grey = mlaf(
mlaf(
self.weights.v[0] * lin_tristimulus.r,
self.weights.v[1],
lin_tristimulus.g,
),
self.weights.v[2],
lin_tristimulus.b,
)
.min(1.)
.max(0.);
let gamma_value = self.gamma_eval.evaluate_value(grey);
dst[0] = gamma_value.as_();
if dst_channels == 2 {
dst[1] = a;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,437 @@
/*
* // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaper;
use crate::conversions::rgbxyz::{
TransformMatrixShaperOptimized, make_rgb_xyz_rgb_transform, make_rgb_xyz_rgb_transform_opt,
};
use crate::conversions::rgbxyz_fixed::{make_rgb_xyz_q2_13, make_rgb_xyz_q2_13_opt};
use crate::{CmsError, Layout, TransformExecutor, TransformOptions};
use num_traits::AsPrimitive;
const FIXED_POINT_SCALE: i32 = 13; // Q2.13;
pub(crate) trait RgbXyzFactory<T: Clone + AsPrimitive<usize> + Default> {
fn make_transform<const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<T, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>;
}
pub(crate) trait RgbXyzFactoryOpt<T: Clone + AsPrimitive<usize> + Default> {
fn make_optimized_transform<
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>;
}
impl RgbXyzFactory<u16> for u16 {
fn make_transform<const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<u16, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<u16> + Send + Sync>, CmsError> {
if BIT_DEPTH < 16 && transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
return make_rgb_xyz_q2_13::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
make_rgb_xyz_rgb_transform::<u16, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactory<f32> for f32 {
fn make_transform<const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<f32, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<f32> + Send + Sync>, CmsError> {
if transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2::<
f32,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41::<
f32,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
return make_rgb_xyz_q2_13::<
f32,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
make_rgb_xyz_rgb_transform::<f32, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactory<f64> for f64 {
fn make_transform<const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<f64, LINEAR_CAP>,
_: TransformOptions,
) -> Result<Box<dyn TransformExecutor<f64> + Send + Sync>, CmsError> {
make_rgb_xyz_rgb_transform::<f64, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactory<u8> for u8 {
fn make_transform<const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<u8, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<u8> + Send + Sync>, CmsError> {
if transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2::<
u8,
LINEAR_CAP,
GAMMA_LUT,
8,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41::<
u8,
LINEAR_CAP,
GAMMA_LUT,
8,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
make_rgb_xyz_q2_13::<u8, LINEAR_CAP, GAMMA_LUT, 8, FIXED_POINT_SCALE>(
src_layout, dst_layout, profile,
)
} else {
make_rgb_xyz_rgb_transform::<u8, LINEAR_CAP, GAMMA_LUT, 8>(
src_layout, dst_layout, profile,
)
}
}
}
// Optimized factories
impl RgbXyzFactoryOpt<u16> for u16 {
fn make_optimized_transform<
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<u16, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<u16> + Send + Sync>, CmsError> {
if BIT_DEPTH >= 12 && transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
if std::arch::is_aarch64_feature_detected!("rdm") {
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q1_30_opt;
return make_rgb_xyz_q1_30_opt::<u16, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH, 30>(
src_layout, dst_layout, profile,
);
}
}
}
if BIT_DEPTH < 16 && transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2_opt;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2_opt::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41_opt;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41_opt::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
return make_rgb_xyz_q2_13_opt::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
make_rgb_xyz_rgb_transform_opt::<u16, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactoryOpt<f32> for f32 {
fn make_optimized_transform<
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<f32, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<f32> + Send + Sync>, CmsError> {
if transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2_opt;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2_opt::<
f32,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41_opt;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41_opt::<
f32,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
return if std::arch::is_aarch64_feature_detected!("rdm") {
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q1_30_opt;
make_rgb_xyz_q1_30_opt::<f32, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH, 30>(
src_layout, dst_layout, profile,
)
} else {
make_rgb_xyz_q2_13_opt::<f32, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH, FIXED_POINT_SCALE>(
src_layout, dst_layout, profile,
)
};
}
}
make_rgb_xyz_rgb_transform_opt::<f32, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactoryOpt<f64> for f64 {
fn make_optimized_transform<
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<f64, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<f64> + Send + Sync>, CmsError> {
if transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
if std::arch::is_aarch64_feature_detected!("rdm") {
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q1_30_opt;
return make_rgb_xyz_q1_30_opt::<f64, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH, 30>(
src_layout, dst_layout, profile,
);
}
}
}
make_rgb_xyz_rgb_transform_opt::<f64, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactoryOpt<u8> for u8 {
fn make_optimized_transform<
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<u8, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<u8> + Send + Sync>, CmsError> {
if transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx512_opt;
if std::arch::is_x86_feature_detected!("avx512bw")
&& std::arch::is_x86_feature_detected!("avx512vl")
{
return make_rgb_xyz_q2_13_transform_avx512_opt::<
u8,
LINEAR_CAP,
GAMMA_LUT,
8,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2_opt;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2_opt::<
u8,
LINEAR_CAP,
GAMMA_LUT,
8,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41_opt;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41_opt::<
u8,
LINEAR_CAP,
GAMMA_LUT,
8,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
make_rgb_xyz_q2_13_opt::<u8, LINEAR_CAP, GAMMA_LUT, 8, FIXED_POINT_SCALE>(
src_layout, dst_layout, profile,
)
} else {
make_rgb_xyz_rgb_transform_opt::<u8, LINEAR_CAP, GAMMA_LUT, 8>(
src_layout, dst_layout, profile,
)
}
}
}

701
vendor/moxcms/src/conversions/rgbxyz.rs vendored Normal file
View File

@@ -0,0 +1,701 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::{CmsError, Layout, Matrix3, Matrix3f, TransformExecutor};
use num_traits::AsPrimitive;
pub(crate) struct TransformMatrixShaper<T: Clone, const BUCKET: usize> {
pub(crate) r_linear: Box<[f32; BUCKET]>,
pub(crate) g_linear: Box<[f32; BUCKET]>,
pub(crate) b_linear: Box<[f32; BUCKET]>,
pub(crate) r_gamma: Box<[T; 65536]>,
pub(crate) g_gamma: Box<[T; 65536]>,
pub(crate) b_gamma: Box<[T; 65536]>,
pub(crate) adaptation_matrix: Matrix3f,
}
/// Low memory footprint optimized routine for matrix shaper profiles with the same
/// Gamma and linear curves.
pub(crate) struct TransformMatrixShaperOptimized<T: Clone, const BUCKET: usize> {
pub(crate) linear: Box<[f32; BUCKET]>,
pub(crate) gamma: Box<[T; 65536]>,
pub(crate) adaptation_matrix: Matrix3f,
}
impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize> TransformMatrixShaper<T, BUCKET> {
pub(crate) fn to_q2_13_n<
R: Copy + 'static + Default,
const PRECISION: i32,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
&self,
) -> TransformMatrixShaperFixedPoint<R, T, BUCKET>
where
f32: AsPrimitive<R>,
{
let linear_scale = if T::FINITE {
let lut_scale = (GAMMA_LUT - 1) as f32 / ((1 << BIT_DEPTH) - 1) as f32;
((1 << BIT_DEPTH) - 1) as f32 * lut_scale
} else {
let lut_scale = (GAMMA_LUT - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
(T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
};
let mut new_box_r = Box::new([R::default(); BUCKET]);
let mut new_box_g = Box::new([R::default(); BUCKET]);
let mut new_box_b = Box::new([R::default(); BUCKET]);
for (dst, &src) in new_box_r.iter_mut().zip(self.r_linear.iter()) {
*dst = (src * linear_scale).round().as_();
}
for (dst, &src) in new_box_g.iter_mut().zip(self.g_linear.iter()) {
*dst = (src * linear_scale).round().as_();
}
for (dst, &src) in new_box_b.iter_mut().zip(self.b_linear.iter()) {
*dst = (src * linear_scale).round().as_();
}
let scale: f32 = (1i32 << PRECISION) as f32;
let source_matrix = self.adaptation_matrix;
let mut dst_matrix = Matrix3::<i16> { v: [[0i16; 3]; 3] };
for i in 0..3 {
for j in 0..3 {
dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
}
}
TransformMatrixShaperFixedPoint {
r_linear: new_box_r,
g_linear: new_box_g,
b_linear: new_box_b,
r_gamma: self.r_gamma.clone(),
g_gamma: self.g_gamma.clone(),
b_gamma: self.b_gamma.clone(),
adaptation_matrix: dst_matrix,
}
}
}
impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize>
TransformMatrixShaperOptimized<T, BUCKET>
{
pub(crate) fn to_q2_13_n<
R: Copy + 'static + Default,
const PRECISION: i32,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
&self,
) -> TransformMatrixShaperFixedPointOpt<R, i16, T, BUCKET>
where
f32: AsPrimitive<R>,
{
let linear_scale = if T::FINITE {
let lut_scale = (GAMMA_LUT - 1) as f32 / ((1 << BIT_DEPTH) - 1) as f32;
((1 << BIT_DEPTH) - 1) as f32 * lut_scale
} else {
let lut_scale = (GAMMA_LUT - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
(T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
};
let mut new_box_linear = Box::new([R::default(); BUCKET]);
for (dst, src) in new_box_linear.iter_mut().zip(self.linear.iter()) {
*dst = (*src * linear_scale).round().as_();
}
let scale: f32 = (1i32 << PRECISION) as f32;
let source_matrix = self.adaptation_matrix;
let mut dst_matrix = Matrix3::<i16> {
v: [[i16::default(); 3]; 3],
};
for i in 0..3 {
for j in 0..3 {
dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
}
}
TransformMatrixShaperFixedPointOpt {
linear: new_box_linear,
gamma: self.gamma.clone(),
adaptation_matrix: dst_matrix,
}
}
#[allow(dead_code)]
pub(crate) fn to_q1_30_n<
R: Copy + 'static + Default,
const PRECISION: i32,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
&self,
) -> TransformMatrixShaperFixedPointOpt<R, i32, T, BUCKET>
where
f32: AsPrimitive<R>,
f64: AsPrimitive<R>,
{
// It is important to scale 1 bit more to compensate vqrdmlah Q0.31, because we're going to use Q1.30
let table_size = if T::FINITE {
(1 << BIT_DEPTH) - 1
} else {
T::NOT_FINITE_LINEAR_TABLE_SIZE - 1
};
let ext_bp = if T::FINITE {
BIT_DEPTH as u32 + 1
} else {
let bp = (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1).count_ones();
bp + 1
};
let linear_scale = {
let lut_scale = (GAMMA_LUT - 1) as f64 / table_size as f64;
((1u32 << ext_bp) - 1) as f64 * lut_scale
};
let mut new_box_linear = Box::new([R::default(); BUCKET]);
for (dst, &src) in new_box_linear.iter_mut().zip(self.linear.iter()) {
*dst = (src as f64 * linear_scale).round().as_();
}
let scale: f64 = (1i64 << PRECISION) as f64;
let source_matrix = self.adaptation_matrix;
let mut dst_matrix = Matrix3::<i32> {
v: [[i32::default(); 3]; 3],
};
for i in 0..3 {
for j in 0..3 {
dst_matrix.v[i][j] = (source_matrix.v[i][j] as f64 * scale) as i32;
}
}
TransformMatrixShaperFixedPointOpt {
linear: new_box_linear,
gamma: self.gamma.clone(),
adaptation_matrix: dst_matrix,
}
}
}
#[allow(unused)]
struct TransformMatrixShaperScalar<
T: Clone,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
> {
pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
}
#[allow(unused)]
struct TransformMatrixShaperOptScalar<
T: Clone,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
> {
pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
}
#[cfg(any(
any(target_arch = "x86", target_arch = "x86_64"),
all(target_arch = "aarch64", target_feature = "neon")
))]
#[allow(unused)]
macro_rules! create_rgb_xyz_dependant_executor {
($dep_name: ident, $dependant: ident, $shaper: ident) => {
pub(crate) fn $dep_name<
T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: $shaper<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
{
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
> {
profile,
bit_depth: BIT_DEPTH,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
use crate::conversions::sse::{TransformShaperRgbOptSse, TransformShaperRgbSse};
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
use crate::conversions::avx::{TransformShaperRgbAvx, TransformShaperRgbOptAvx};
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_sse_41,
TransformShaperRgbSse,
TransformMatrixShaper
);
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_sse_41_opt,
TransformShaperRgbOptSse,
TransformMatrixShaperOptimized
);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_avx2,
TransformShaperRgbAvx,
TransformMatrixShaper
);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_avx2_opt,
TransformShaperRgbOptAvx,
TransformMatrixShaperOptimized
);
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
use crate::conversions::avx512::TransformShaperRgbOptAvx512;
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_avx512_opt,
TransformShaperRgbOptAvx512,
TransformMatrixShaperOptimized
);
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
pub(crate) fn make_rgb_xyz_rgb_transform<
T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
{
#[cfg(all(feature = "avx", target_arch = "x86_64"))]
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_rgb_transform_avx2::<T, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
);
}
#[cfg(all(feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))]
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_rgb_transform_sse_41::<T, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
);
}
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformMatrixShaperScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformMatrixShaperScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformMatrixShaperScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformMatrixShaperScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
pub(crate) fn make_rgb_xyz_rgb_transform_opt<
T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
{
#[cfg(all(feature = "avx512", target_arch = "x86_64"))]
if std::arch::is_x86_feature_detected!("avx512bw")
&& std::arch::is_x86_feature_detected!("avx512vl")
&& std::arch::is_x86_feature_detected!("fma")
{
return make_rgb_xyz_rgb_transform_avx512_opt::<T, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
);
}
#[cfg(all(feature = "avx", target_arch = "x86_64"))]
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_rgb_transform_avx2_opt::<T, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
);
}
#[cfg(all(feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))]
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_rgb_transform_sse_41_opt::<T, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
);
}
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformMatrixShaperOptScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformMatrixShaperOptScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformMatrixShaperOptScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformMatrixShaperOptScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
use crate::conversions::neon::{TransformShaperRgbNeon, TransformShaperRgbOptNeon};
use crate::conversions::rgbxyz_fixed::{
TransformMatrixShaperFixedPoint, TransformMatrixShaperFixedPointOpt,
};
use crate::transform::PointeeSizeExpressible;
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform,
TransformShaperRgbNeon,
TransformMatrixShaper
);
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_opt,
TransformShaperRgbOptNeon,
TransformMatrixShaperOptimized
);
#[allow(unused)]
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
> TransformExecutor<T>
for TransformMatrixShaperScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
use crate::mlaf::mlaf;
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[0][0], g, transform.v[0][1]),
b,
transform.v[0][2],
)
.max(0f32)
.min(1f32),
scale,
);
let new_g = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[1][0], g, transform.v[1][1]),
b,
transform.v[1][2],
)
.max(0f32)
.min(1f32),
scale,
);
let new_b = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[2][0], g, transform.v[2][1]),
b,
transform.v[2][2],
)
.max(0f32)
.min(1f32),
scale,
);
dst[dst_cn.r_i()] = self.profile.r_gamma[(new_r as u16) as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[(new_g as u16) as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[(new_b as u16) as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}
#[allow(unused)]
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
> TransformExecutor<T>
for TransformMatrixShaperOptScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
use crate::mlaf::mlaf;
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.profile.linear[src[src_cn.r_i()]._as_usize()];
let g = self.profile.linear[src[src_cn.g_i()]._as_usize()];
let b = self.profile.linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[0][0], g, transform.v[0][1]),
b,
transform.v[0][2],
)
.max(0f32)
.min(1f32),
scale,
);
let new_g = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[1][0], g, transform.v[1][1]),
b,
transform.v[1][2],
)
.max(0f32)
.min(1f32),
scale,
);
let new_b = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[2][0], g, transform.v[2][1]),
b,
transform.v[2][2],
)
.max(0f32)
.min(1f32),
scale,
);
dst[dst_cn.r_i()] = self.profile.gamma[(new_r as u16) as usize];
dst[dst_cn.g_i()] = self.profile.gamma[(new_g as u16) as usize];
dst[dst_cn.b_i()] = self.profile.gamma[(new_b as u16) as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,487 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::Layout;
use crate::conversions::TransformMatrixShaper;
use crate::matrix::Matrix3;
use crate::{CmsError, TransformExecutor};
use num_traits::AsPrimitive;
/// Fixed point conversion Q2.13
pub(crate) struct TransformMatrixShaperFixedPoint<R, T, const LINEAR_CAP: usize> {
pub(crate) r_linear: Box<[R; LINEAR_CAP]>,
pub(crate) g_linear: Box<[R; LINEAR_CAP]>,
pub(crate) b_linear: Box<[R; LINEAR_CAP]>,
pub(crate) r_gamma: Box<[T; 65536]>,
pub(crate) g_gamma: Box<[T; 65536]>,
pub(crate) b_gamma: Box<[T; 65536]>,
pub(crate) adaptation_matrix: Matrix3<i16>,
}
/// Fixed point conversion Q2.13
///
/// Optimized routine for *all same curves* matrix shaper.
pub(crate) struct TransformMatrixShaperFixedPointOpt<R, W, T, const LINEAR_CAP: usize> {
pub(crate) linear: Box<[R; LINEAR_CAP]>,
pub(crate) gamma: Box<[T; 65536]>,
pub(crate) adaptation_matrix: Matrix3<W>,
}
#[allow(unused)]
struct TransformMatrixShaperQ2_13<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPoint<i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
#[allow(unused)]
struct TransformMatrixShaperQ2_13Optimized<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i16, i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
#[allow(unused)]
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformMatrixShaperQ2_13<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let max_colors: T = ((1 << self.bit_depth as u32) - 1u32).as_();
let rnd: i32 = (1i32 << (PRECISION - 1));
let v_gamma_max = GAMMA_LUT as i32 - 1;
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = r as i32 * transform.v[0][0] as i32
+ g as i32 * transform.v[0][1] as i32
+ b as i32 * transform.v[0][2] as i32
+ rnd;
let r_q2_13 = (new_r >> PRECISION).min(v_gamma_max).max(0) as u16;
let new_g = r as i32 * transform.v[1][0] as i32
+ g as i32 * transform.v[1][1] as i32
+ b as i32 * transform.v[1][2] as i32
+ rnd;
let g_q2_13 = (new_g >> PRECISION).min(v_gamma_max).max(0) as u16;
let new_b = r as i32 * transform.v[2][0] as i32
+ g as i32 * transform.v[2][1] as i32
+ b as i32 * transform.v[2][2] as i32
+ rnd;
let b_q2_13 = (new_b >> PRECISION).min(v_gamma_max).max(0) as u16;
dst[dst_cn.r_i()] = self.profile.r_gamma[r_q2_13 as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[g_q2_13 as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[b_q2_13 as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}
#[allow(unused)]
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformMatrixShaperQ2_13Optimized<
T,
SRC_LAYOUT,
DST_LAYOUT,
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let max_colors: T = ((1 << self.bit_depth as u32) - 1u32).as_();
let rnd: i32 = (1i32 << (PRECISION - 1));
let v_gamma_max = GAMMA_LUT as i32 - 1;
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.profile.linear[src[src_cn.r_i()]._as_usize()];
let g = self.profile.linear[src[src_cn.g_i()]._as_usize()];
let b = self.profile.linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = r as i32 * transform.v[0][0] as i32
+ g as i32 * transform.v[0][1] as i32
+ b as i32 * transform.v[0][2] as i32
+ rnd;
let r_q2_13 = (new_r >> PRECISION).min(v_gamma_max).max(0) as u16;
let new_g = r as i32 * transform.v[1][0] as i32
+ g as i32 * transform.v[1][1] as i32
+ b as i32 * transform.v[1][2] as i32
+ rnd;
let g_q2_13 = (new_g >> PRECISION).min(v_gamma_max).max(0) as u16;
let new_b = r as i32 * transform.v[2][0] as i32
+ g as i32 * transform.v[2][1] as i32
+ b as i32 * transform.v[2][2] as i32
+ rnd;
let b_q2_13 = (new_b >> PRECISION).min(v_gamma_max).max(0) as u16;
dst[dst_cn.r_i()] = self.profile.gamma[r_q2_13 as usize];
dst[dst_cn.g_i()] = self.profile.gamma[g_q2_13 as usize];
dst[dst_cn.b_i()] = self.profile.gamma[b_q2_13 as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}
macro_rules! create_rgb_xyz_dependant_q2_13_executor {
($dep_name: ident, $dependant: ident, $resolution: ident, $shaper: ident) => {
pub(crate) fn $dep_name<
T: Clone + Send + Sync + AsPrimitive<usize> + Default + PointeeSizeExpressible,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
const PRECISION: i32,
>(
src_layout: Layout,
dst_layout: Layout,
profile: $shaper<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
{
let q2_13_profile =
profile.to_q2_13_n::<$resolution, PRECISION, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>();
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
> {
profile: q2_13_profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
> {
profile: q2_13_profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
> {
profile: q2_13_profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
> {
profile: q2_13_profile,
bit_depth: BIT_DEPTH,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
};
}
#[cfg(all(target_arch = "aarch64", feature = "neon"))]
macro_rules! create_rgb_xyz_dependant_q1_30_executor {
($dep_name: ident, $dependant: ident, $resolution: ident, $shaper: ident) => {
pub(crate) fn $dep_name<
T: Clone + Send + Sync + AsPrimitive<usize> + Default + PointeeSizeExpressible,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
const PRECISION: i32,
>(
src_layout: Layout,
dst_layout: Layout,
profile: $shaper<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
{
let q1_30_profile =
profile.to_q1_30_n::<$resolution, PRECISION, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>();
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
> {
profile: q1_30_profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
> {
profile: q1_30_profile,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
> {
profile: q1_30_profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
> {
profile: q1_30_profile,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
};
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
use crate::conversions::neon::{
TransformShaperQ1_30NeonOpt, TransformShaperQ2_13Neon, TransformShaperQ2_13NeonOpt,
};
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13,
TransformShaperQ2_13Neon,
i16,
TransformMatrixShaper
);
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_opt,
TransformShaperQ2_13NeonOpt,
i16,
TransformMatrixShaperOptimized
);
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
create_rgb_xyz_dependant_q1_30_executor!(
make_rgb_xyz_q1_30_opt,
TransformShaperQ1_30NeonOpt,
i32,
TransformMatrixShaperOptimized
);
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13,
TransformMatrixShaperQ2_13,
i16,
TransformMatrixShaper
);
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_opt,
TransformMatrixShaperQ2_13Optimized,
i16,
TransformMatrixShaperOptimized
);
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
use crate::conversions::sse::{TransformShaperQ2_13OptSse, TransformShaperQ2_13Sse};
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_transform_sse_41,
TransformShaperQ2_13Sse,
i32,
TransformMatrixShaper
);
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_transform_sse_41_opt,
TransformShaperQ2_13OptSse,
i32,
TransformMatrixShaperOptimized
);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
use crate::conversions::avx::{TransformShaperRgbQ2_13Avx, TransformShaperRgbQ2_13OptAvx};
use crate::conversions::rgbxyz::TransformMatrixShaperOptimized;
use crate::transform::PointeeSizeExpressible;
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_transform_avx2,
TransformShaperRgbQ2_13Avx,
i32,
TransformMatrixShaper
);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_transform_avx2_opt,
TransformShaperRgbQ2_13OptAvx,
i32,
TransformMatrixShaperOptimized
);
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
use crate::conversions::avx512::TransformShaperRgbQ2_13OptAvx512;
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_transform_avx512_opt,
TransformShaperRgbQ2_13OptAvx512,
i32,
TransformMatrixShaperOptimized
);

View File

@@ -0,0 +1,332 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::trc::ToneCurveEvaluator;
use crate::{CmsError, Layout, Matrix3f, PointeeSizeExpressible, Rgb, TransformExecutor};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) struct TransformShaperRgbFloat<T: Clone, const BUCKET: usize> {
pub(crate) r_linear: Box<[f32; BUCKET]>,
pub(crate) g_linear: Box<[f32; BUCKET]>,
pub(crate) b_linear: Box<[f32; BUCKET]>,
pub(crate) gamma_evaluator: Box<dyn ToneCurveEvaluator + Send + Sync>,
pub(crate) adaptation_matrix: Matrix3f,
pub(crate) phantom_data: PhantomData<T>,
}
pub(crate) struct TransformShaperFloatInOut<T: Clone> {
pub(crate) linear_evaluator: Box<dyn ToneCurveEvaluator + Send + Sync>,
pub(crate) gamma_evaluator: Box<dyn ToneCurveEvaluator + Send + Sync>,
pub(crate) adaptation_matrix: Matrix3f,
pub(crate) phantom_data: PhantomData<T>,
}
struct TransformShaperFloatScalar<
T: Clone,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const BIT_DEPTH: usize,
> {
pub(crate) profile: TransformShaperRgbFloat<T, LINEAR_CAP>,
}
struct TransformShaperRgbFloatInOut<T: Clone, const SRC_LAYOUT: u8, const DST_LAYOUT: u8> {
pub(crate) profile: TransformShaperFloatInOut<T>,
pub(crate) bit_depth: usize,
}
pub(crate) fn make_rgb_xyz_rgb_transform_float<
T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
const LINEAR_CAP: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformShaperRgbFloat<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformShaperFloatScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformShaperFloatScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformShaperFloatScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformShaperFloatScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
BIT_DEPTH,
> {
profile,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
pub(crate) fn make_rgb_xyz_rgb_transform_float_in_out<
T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default + AsPrimitive<f32>,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformShaperFloatInOut<T>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformShaperRgbFloatInOut::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformShaperRgbFloatInOut::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformShaperRgbFloatInOut::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformShaperRgbFloatInOut::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
> {
profile,
bit_depth: BIT_DEPTH,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const BIT_DEPTH: usize,
> TransformExecutor<T>
for TransformShaperFloatScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, BIT_DEPTH>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
use crate::mlaf::mlaf;
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let max_colors: T = ((1 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = mlaf(
mlaf(r * transform.v[0][0], g, transform.v[0][1]),
b,
transform.v[0][2],
);
let new_g = mlaf(
mlaf(r * transform.v[1][0], g, transform.v[1][1]),
b,
transform.v[1][2],
);
let new_b = mlaf(
mlaf(r * transform.v[2][0], g, transform.v[2][1]),
b,
transform.v[2][2],
);
let mut rgb = Rgb::new(new_r, new_g, new_b);
rgb = self.profile.gamma_evaluator.evaluate_tristimulus(rgb);
dst[dst_cn.r_i()] = rgb.r.as_();
dst[dst_cn.g_i()] = rgb.g.as_();
dst[dst_cn.b_i()] = rgb.b.as_();
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static + AsPrimitive<f32>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
> TransformExecutor<T> for TransformShaperRgbFloatInOut<T, SRC_LAYOUT, DST_LAYOUT>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
use crate::mlaf::mlaf;
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let mut src_rgb = Rgb::new(
src[src_cn.r_i()].as_(),
src[src_cn.g_i()].as_(),
src[src_cn.b_i()].as_(),
);
src_rgb = self.profile.linear_evaluator.evaluate_tristimulus(src_rgb);
let r = src_rgb.r;
let g = src_rgb.g;
let b = src_rgb.b;
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = mlaf(
mlaf(r * transform.v[0][0], g, transform.v[0][1]),
b,
transform.v[0][2],
);
let new_g = mlaf(
mlaf(r * transform.v[1][0], g, transform.v[1][1]),
b,
transform.v[1][2],
);
let new_b = mlaf(
mlaf(r * transform.v[2][0], g, transform.v[2][1]),
b,
transform.v[2][2],
);
let mut rgb = Rgb::new(new_r, new_g, new_b);
rgb = self.profile.gamma_evaluator.evaluate_tristimulus(rgb);
dst[dst_cn.r_i()] = rgb.r.as_();
dst[dst_cn.g_i()] = rgb.g.as_();
dst[dst_cn.b_i()] = rgb.b.as_();
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,457 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::interpolator::BarycentricWeight;
use crate::math::FusedMultiplyAdd;
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::ops::{Add, Mul, Sub};
#[repr(align(16), C)]
pub(crate) struct SseAlignedF32(pub(crate) [f32; 4]);
#[cfg(feature = "options")]
pub(crate) struct TetrahedralSse<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedF32],
}
#[cfg(feature = "options")]
pub(crate) struct PyramidalSse<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedF32],
}
#[cfg(feature = "options")]
pub(crate) struct PrismaticSse<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedF32],
}
pub(crate) struct TrilinearSse<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedF32],
}
trait Fetcher<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
#[derive(Copy, Clone)]
#[repr(transparent)]
pub(crate) struct SseVector {
pub(crate) v: __m128,
}
impl From<f32> for SseVector {
#[inline(always)]
fn from(v: f32) -> Self {
SseVector {
v: unsafe { _mm_set1_ps(v) },
}
}
}
impl Sub<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_sub_ps(self.v, rhs.v) },
}
}
}
impl Add<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn add(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_add_ps(self.v, rhs.v) },
}
}
}
impl Mul<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_mul_ps(self.v, rhs.v) },
}
}
}
impl FusedMultiplyAdd<SseVector> for SseVector {
#[inline(always)]
fn mla(&self, b: SseVector, c: SseVector) -> SseVector {
SseVector {
v: unsafe { _mm_add_ps(self.v, _mm_mul_ps(b.v, c.v)) },
}
}
}
struct TetrahedralSseFetchVector<'a, const GRID_SIZE: usize> {
cube: &'a [SseAlignedF32],
}
impl<const GRID_SIZE: usize> Fetcher<SseVector> for TetrahedralSseFetchVector<'_, GRID_SIZE> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> SseVector {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx = unsafe { self.cube.get_unchecked(offset..) };
SseVector {
v: unsafe { _mm_load_ps(jx.as_ptr() as *const _) },
}
}
}
pub(crate) trait SseMdInterpolation<'a, const GRID_SIZE: usize> {
fn new(table: &'a [SseAlignedF32]) -> Self;
fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> SseVector;
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> TetrahedralSse<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, SseVector::from(rx));
let s1 = s0.mla(c2, SseVector::from(ry));
s1.mla(c3, SseVector::from(rz))
}
}
macro_rules! define_inter_sse {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> SseMdInterpolation<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline]
fn new(table: &'a [SseAlignedF32]) -> Self {
Self { cube: table }
}
#[inline(always)]
fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> SseVector {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralSseFetchVector::<GRID_SIZE> { cube: self.cube },
)
}
}
};
}
#[cfg(feature = "options")]
define_inter_sse!(TetrahedralSse);
#[cfg(feature = "options")]
define_inter_sse!(PyramidalSse);
#[cfg(feature = "options")]
define_inter_sse!(PrismaticSse);
define_inter_sse!(TrilinearSse);
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PyramidalSse<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(db * dr))
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PrismaticSse<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if db > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x, y_n, z_n);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
let s3 = s2.mla(c4, SseVector::from(dg * db));
s3.mla(c5, SseVector::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x_n, y_n, z);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
let s3 = s2.mla(c4, SseVector::from(dg * db));
s3.mla(c5, SseVector::from(dr * dg))
}
}
}
impl<const GRID_SIZE: usize> TrilinearSse<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = SseVector::from(dr);
let w1 = SseVector::from(dg);
let w2 = SseVector::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = SseVector::from(1.0 - dr);
let c00 = (c000 * dx).mla(c100, w0);
let c10 = (c010 * dx).mla(c110, w0);
let c01 = (c001 * dx).mla(c101, w0);
let c11 = (c011 * dx).mla(c111, w0);
let dy = SseVector::from(1.0 - dg);
let c0 = (c00 * dy).mla(c10, w1);
let c1 = (c01 * dy).mla(c11, w1);
let dz = SseVector::from(1.0 - db);
(c0 * dz).mla(c1, w2)
}
}

View File

@@ -0,0 +1,456 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::interpolator::BarycentricWeight;
use crate::math::FusedMultiplyAdd;
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::ops::{Add, Mul, Sub};
#[repr(align(8), C)]
pub(crate) struct SseAlignedI16x4(pub(crate) [i16; 4]);
#[cfg(feature = "options")]
pub(crate) struct TetrahedralSseQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PyramidalSseQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PrismaticSseQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedI16x4],
}
pub(crate) struct TrilinearSseQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedI16x4],
}
trait Fetcher<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
#[derive(Copy, Clone)]
#[repr(transparent)]
pub(crate) struct SseVector {
pub(crate) v: __m128i,
}
impl From<i16> for SseVector {
#[inline(always)]
fn from(v: i16) -> Self {
SseVector {
v: unsafe { _mm_set1_epi16(v) },
}
}
}
impl Sub<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_sub_epi16(self.v, rhs.v) },
}
}
}
impl Add<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn add(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_add_epi16(self.v, rhs.v) },
}
}
}
impl Mul<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_mulhrs_epi16(self.v, rhs.v) },
}
}
}
impl FusedMultiplyAdd<SseVector> for SseVector {
#[inline(always)]
fn mla(&self, b: SseVector, c: SseVector) -> SseVector {
SseVector {
v: unsafe { _mm_add_epi16(self.v, _mm_mulhrs_epi16(b.v, c.v)) },
}
}
}
struct TetrahedralSseQ0_15FetchVector<'a, const GRID_SIZE: usize> {
cube: &'a [SseAlignedI16x4],
}
impl<const GRID_SIZE: usize> Fetcher<SseVector> for TetrahedralSseQ0_15FetchVector<'_, GRID_SIZE> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> SseVector {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx = unsafe { self.cube.get_unchecked(offset..) };
SseVector {
v: unsafe { _mm_loadu_si64(jx.as_ptr() as *const _) },
}
}
}
pub(crate) trait SseMdInterpolationQ0_15<'a, const GRID_SIZE: usize> {
fn new(table: &'a [SseAlignedI16x4]) -> Self;
fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> SseVector;
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> TetrahedralSseQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, SseVector::from(rx));
let s1 = s0.mla(c2, SseVector::from(ry));
s1.mla(c3, SseVector::from(rz))
}
}
macro_rules! define_inter_sse {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> SseMdInterpolationQ0_15<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline]
fn new(table: &'a [SseAlignedI16x4]) -> Self {
Self { cube: table }
}
#[inline(always)]
fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> SseVector {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralSseQ0_15FetchVector::<GRID_SIZE> { cube: self.cube },
)
}
}
};
}
#[cfg(feature = "options")]
define_inter_sse!(TetrahedralSseQ0_15);
#[cfg(feature = "options")]
define_inter_sse!(PyramidalSseQ0_15);
#[cfg(feature = "options")]
define_inter_sse!(PrismaticSseQ0_15);
define_inter_sse!(TrilinearSseQ0_15);
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PyramidalSseQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(dr) * SseVector::from(dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(dg) * SseVector::from(db))
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(db) * SseVector::from(dr))
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PrismaticSseQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if db > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x, y_n, z_n);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
let s3 = s2.mla(c4, SseVector::from(dg) * SseVector::from(db));
s3.mla(c5, SseVector::from(dr) * SseVector::from(dg))
} else {
let x0 = r.fetch(x_n, y, z);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x_n, y_n, z);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
let s3 = s2.mla(c4, SseVector::from(dg) * SseVector::from(db));
s3.mla(c5, SseVector::from(dr) * SseVector::from(dg))
}
}
}
impl<const GRID_SIZE: usize> TrilinearSseQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
const Q_MAX: i16 = ((1i32 << 15i32) - 1) as i16;
let q_max = SseVector::from(Q_MAX);
let w0 = SseVector::from(dr);
let w1 = SseVector::from(dg);
let w2 = SseVector::from(db);
let dx = q_max - SseVector::from(dr);
let dy = q_max - SseVector::from(dg);
let dz = q_max - SseVector::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let c00 = (c000 * dx).mla(c100, w0);
let c10 = (c010 * dx).mla(c110, w0);
let c01 = (c001 * dx).mla(c101, w0);
let c11 = (c011 * dx).mla(c111, w0);
let c0 = (c00 * dy).mla(c10, w1);
let c1 = (c01 * dy).mla(c11, w1);
(c0 * dz).mla(c1, w2)
}
}

View File

@@ -0,0 +1,330 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut4x3Factory;
use crate::conversions::sse::interpolator::*;
use crate::conversions::sse::interpolator_q0_15::SseAlignedI16x4;
use crate::conversions::sse::lut4_to_3_q0_15::TransformLut4To3SseQ0_15;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::marker::PhantomData;
struct TransformLut4To3Sse<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<SseAlignedF32>,
_phantom: PhantomData<T>,
_phantom1: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3Sse<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "sse4.1")]
unsafe fn transform_chunk<'b, Interpolator: SseMdInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) };
let max_value = ((1 << BIT_DEPTH) - 1u32).as_();
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
let t: f32 = k_weights.w;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let tetrahedral1 = Interpolator::new(table1);
let tetrahedral2 = Interpolator::new(table2);
let a0 = tetrahedral1.inter3_sse(c, m, y, &self.weights).v;
let b0 = tetrahedral2.inter3_sse(c, m, y, &self.weights).v;
if T::FINITE {
unsafe {
let t0 = _mm_set1_ps(t);
let ones = _mm_set1_ps(1f32);
let hp = _mm_mul_ps(a0, _mm_sub_ps(ones, t0));
let mut v = _mm_add_ps(_mm_mul_ps(b0, t0), hp);
v = _mm_max_ps(v, _mm_setzero_ps());
v = _mm_mul_ps(v, value_scale);
v = _mm_min_ps(v, value_scale);
let jvz = _mm_cvtps_epi32(v);
let x = _mm_extract_epi32::<0>(jvz);
let y = _mm_extract_epi32::<1>(jvz);
let z = _mm_extract_epi32::<2>(jvz);
dst[cn.r_i()] = (x as u32).as_();
dst[cn.g_i()] = (y as u32).as_();
dst[cn.b_i()] = (z as u32).as_();
}
} else {
unsafe {
let t0 = _mm_set1_ps(t);
let ones = _mm_set1_ps(1f32);
let hp = _mm_mul_ps(a0, _mm_sub_ps(ones, t0));
let v = _mm_add_ps(_mm_mul_ps(b0, t0), hp);
dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v) as u32).as_();
dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v) as u32).as_();
dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v) as u32).as_();
}
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3Sse<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearSse<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralSse<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalSse<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticSse<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearSse<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}
pub(crate) struct SseLut4x3Factory {}
impl Lut4x3Factory for SseLut4x3Factory {
fn make_transform_4x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point && BIT_DEPTH < 16 {
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
SseAlignedI16x4([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut4To3SseQ0_15::<
T,
u8,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut4To3SseQ0_15::<
T,
u16,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
}),
};
}
let lut = lut
.chunks_exact(3)
.map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => {
Box::new(
TransformLut4To3Sse::<T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
},
)
}
#[cfg(feature = "options")]
BarycentricWeightScale::High => {
Box::new(
TransformLut4To3Sse::<T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
},
)
}
}
}
}

View File

@@ -0,0 +1,212 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::sse::interpolator_q0_15::*;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut4To3SseQ0_15<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<SseAlignedI16x4>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3SseQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "sse4.1")]
unsafe fn transform_chunk<'b, Interpolator: SseMdInterpolationQ0_15<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
_mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
_mm_set1_epi16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
const Q: i16 = ((1i32 << 15) - 1) as i16;
let t: i16 = k_weights.w;
let t_n: i16 = Q - t;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let tetrahedral1 = Interpolator::new(table1);
let tetrahedral2 = Interpolator::new(table2);
let a0 = tetrahedral1.inter3_sse(c, m, y, &self.weights).v;
let b0 = tetrahedral2.inter3_sse(c, m, y, &self.weights).v;
let hp = _mm_mulhrs_epi16(_mm_set1_epi16(t_n), a0);
let v = _mm_add_epi16(hp, _mm_mulhrs_epi16(b0, _mm_set1_epi16(t)));
if T::FINITE {
let mut o = _mm_max_epi16(v, _mm_setzero_si128());
o = _mm_min_epi16(o, v_max_scale);
let x = _mm_extract_epi16::<0>(o);
let y = _mm_extract_epi16::<1>(o);
let z = _mm_extract_epi16::<2>(o);
dst[cn.r_i()] = (x as u32).as_();
dst[cn.g_i()] = (y as u32).as_();
dst[cn.b_i()] = (z as u32).as_();
} else {
let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v));
r = _mm_mul_ps(r, f_value_scale);
dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_();
dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_();
dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_();
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3SseQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearSseQ0_15<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralSseQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalSseQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticSseQ0_15<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearSseQ0_15<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,45 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod interpolator;
mod interpolator_q0_15;
mod lut4_to_3;
mod lut4_to_3_q0_15;
mod rgb_xyz;
mod rgb_xyz_opt;
mod rgb_xyz_q2_13;
mod rgb_xyz_q2_13_opt;
mod t_lut3_to_3;
mod t_lut3_to_3_q0_15;
pub(crate) use lut4_to_3::SseLut4x3Factory;
pub(crate) use rgb_xyz::TransformShaperRgbSse;
pub(crate) use rgb_xyz_opt::TransformShaperRgbOptSse;
pub(crate) use rgb_xyz_q2_13::TransformShaperQ2_13Sse;
pub(crate) use rgb_xyz_q2_13_opt::TransformShaperQ2_13OptSse;
pub(crate) use t_lut3_to_3::SseLut3x3Factory;

View File

@@ -0,0 +1,154 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaper;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
#[repr(align(16), C)]
pub(crate) struct SseAlignedU16(pub(crate) [u16; 8]);
pub(crate) struct TransformShaperRgbSse<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformShaperRgbSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "sse4.1")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary = SseAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm_setr_ps(t.v[0][0], t.v[0][1], t.v[0][2], 0f32);
let m1 = _mm_setr_ps(t.v[1][0], t.v[1][1], t.v[1][2], 0f32);
let m2 = _mm_setr_ps(t.v[2][0], t.v[2][1], t.v[2][2], 0f32);
let zeros = _mm_setzero_ps();
let v_scale = _mm_set1_ps(scale);
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let mut r = _mm_load_ss(rp);
let mut g = _mm_load_ss(gp);
let mut b = _mm_load_ss(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
r = _mm_shuffle_ps::<0>(r, r);
g = _mm_shuffle_ps::<0>(g, g);
b = _mm_shuffle_ps::<0>(b, b);
let v0 = _mm_mul_ps(r, m0);
let v1 = _mm_mul_ps(g, m1);
let v2 = _mm_mul_ps(b, m2);
let mut v = _mm_add_ps(_mm_add_ps(v0, v1), v2);
v = _mm_max_ps(v, zeros);
v = _mm_mul_ps(v, v_scale);
v = _mm_min_ps(v, v_scale);
let zx = _mm_cvtps_epi32(v);
_mm_store_si128(temporary.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T> for TransformShaperRgbSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,153 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaperOptimized;
use crate::conversions::sse::rgb_xyz::SseAlignedU16;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbOptSse<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformShaperRgbOptSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "sse4.1")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary = SseAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm_setr_ps(t.v[0][0], t.v[0][1], t.v[0][2], 0f32);
let m1 = _mm_setr_ps(t.v[1][0], t.v[1][1], t.v[1][2], 0f32);
let m2 = _mm_setr_ps(t.v[2][0], t.v[2][1], t.v[2][2], 0f32);
let zeros = _mm_setzero_ps();
let v_scale = _mm_set1_ps(scale);
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.linear[src[src_cn.b_i()]._as_usize()];
let mut r = _mm_load_ss(rp);
let mut g = _mm_load_ss(gp);
let mut b = _mm_load_ss(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
r = _mm_shuffle_ps::<0>(r, r);
g = _mm_shuffle_ps::<0>(g, g);
b = _mm_shuffle_ps::<0>(b, b);
let v0 = _mm_mul_ps(r, m0);
let v1 = _mm_mul_ps(g, m1);
let v2 = _mm_mul_ps(b, m2);
let mut v = _mm_add_ps(_mm_add_ps(v0, v1), v2);
v = _mm_max_ps(v, zeros);
v = _mm_mul_ps(v, v_scale);
v = _mm_min_ps(v, v_scale);
let zx = _mm_cvtps_epi32(v);
_mm_store_si128(temporary.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.gamma[temporary.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T>
for TransformShaperRgbOptSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,167 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPoint;
use crate::conversions::sse::rgb_xyz::SseAlignedU16;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
pub(crate) struct TransformShaperQ2_13Sse<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPoint<i32, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
#[inline(always)]
pub(crate) unsafe fn _xmm_load_epi32(f: &i32) -> __m128i {
let float_ref: &f32 = unsafe { &*(f as *const i32 as *const f32) };
unsafe { _mm_castps_si128(_mm_load_ss(float_ref)) }
}
impl<
T: Copy + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformShaperQ2_13Sse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "sse4.1")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary = SseAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm_setr_epi16(
t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0,
);
let m2 = _mm_setr_epi16(t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0);
let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes();
let rnd = _mm_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]]));
let v_max_value = _mm_set1_epi32(GAMMA_LUT as i32 - 1);
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let mut r = _xmm_load_epi32(rp);
let mut g = _xmm_load_epi32(gp);
let mut b = _xmm_load_epi32(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
r = _mm_shuffle_epi32::<0>(r);
g = _mm_shuffle_epi32::<0>(g);
b = _mm_shuffle_epi32::<0>(b);
g = _mm_slli_epi32::<16>(g);
let zrg0 = _mm_or_si128(r, g);
let zbz0 = _mm_or_si128(b, rnd);
let v0 = _mm_madd_epi16(zrg0, m0);
let v1 = _mm_madd_epi16(zbz0, m2);
let mut v = _mm_add_epi32(v0, v1);
v = _mm_srai_epi32::<PRECISION>(v);
v = _mm_max_epi32(v, _mm_setzero_si128());
v = _mm_min_epi32(v, v_max_value);
_mm_store_si128(temporary.0.as_mut_ptr() as *mut _, v);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperQ2_13Sse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,162 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt;
use crate::conversions::sse::rgb_xyz::SseAlignedU16;
use crate::conversions::sse::rgb_xyz_q2_13::_xmm_load_epi32;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
pub(crate) struct TransformShaperQ2_13OptSse<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i32, i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Copy + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformShaperQ2_13OptSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "sse4.1")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary = SseAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm_setr_epi16(
t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0,
);
let m2 = _mm_setr_epi16(t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0);
let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes();
let rnd = _mm_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]]));
let v_max_value = _mm_set1_epi32(GAMMA_LUT as i32 - 1);
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.linear[src[src_cn.b_i()]._as_usize()];
let mut r = _xmm_load_epi32(rp);
let mut g = _xmm_load_epi32(gp);
let mut b = _xmm_load_epi32(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
r = _mm_shuffle_epi32::<0>(r);
g = _mm_shuffle_epi32::<0>(g);
b = _mm_shuffle_epi32::<0>(b);
g = _mm_slli_epi32::<16>(g);
let zrg0 = _mm_or_si128(r, g);
let zbz0 = _mm_or_si128(b, rnd);
let v0 = _mm_madd_epi16(zrg0, m0);
let v1 = _mm_madd_epi16(zbz0, m2);
let mut v = _mm_add_epi32(v0, v1);
v = _mm_srai_epi32::<PRECISION>(v);
v = _mm_max_epi32(v, _mm_setzero_si128());
v = _mm_min_epi32(v, v_max_value);
_mm_store_si128(temporary.0.as_mut_ptr() as *mut _, v);
dst[dst_cn.r_i()] = self.profile.gamma[temporary.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperQ2_13OptSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,343 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut3x3Factory;
use crate::conversions::sse::interpolator::*;
use crate::conversions::sse::interpolator_q0_15::SseAlignedI16x4;
use crate::conversions::sse::t_lut3_to_3_q0_15::TransformLut3x3SseQ0_15;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::marker::PhantomData;
struct TransformLut3x3Sse<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<SseAlignedF32>,
_phantom: PhantomData<T>,
_phantom2: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut3x3Sse<T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "sse4.1")]
unsafe fn transform_chunk<'b, Interpolator: SseMdInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) };
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_sse(x, y, z, &self.weights);
if T::FINITE {
unsafe {
let mut r = _mm_mul_ps(v.v, value_scale);
r = _mm_max_ps(r, _mm_setzero_ps());
r = _mm_min_ps(r, value_scale);
let jvz = _mm_cvtps_epi32(r);
let x = _mm_extract_epi32::<0>(jvz);
let y = _mm_extract_epi32::<1>(jvz);
let z = _mm_extract_epi32::<2>(jvz);
dst[dst_cn.r_i()] = (x as u32).as_();
dst[dst_cn.g_i()] = (y as u32).as_();
dst[dst_cn.b_i()] = (z as u32).as_();
}
} else {
unsafe {
dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v.v) as u32).as_();
dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v.v) as u32).as_();
dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v.v) as u32).as_();
}
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3Sse<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearSse<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralSse<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalSse<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticSse<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearSse<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}
pub(crate) struct SseLut3x3Factory {}
impl Lut3x3Factory for SseLut3x3Factory {
fn make_transform_3x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point && BIT_DEPTH < 16 {
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
SseAlignedI16x4([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3SseQ0_15::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3SseQ0_15::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
};
}
let lut = lut
.chunks_exact(3)
.map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3Sse::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3Sse::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
}
}
}

View File

@@ -0,0 +1,225 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::sse::interpolator_q0_15::*;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut3x3SseQ0_15<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<SseAlignedI16x4>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom2: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
>
TransformLut3x3SseQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "sse4.1")]
unsafe fn transform_chunk<'b, Interpolator: SseMdInterpolationQ0_15<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
_mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
_mm_set1_epi16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_sse(x, y, z, &self.weights);
if T::FINITE {
let mut o = _mm_max_epi16(v.v, _mm_setzero_si128());
o = _mm_min_epi16(o, v_max_scale);
let x = _mm_extract_epi16::<0>(o);
let y = _mm_extract_epi16::<1>(o);
let z = _mm_extract_epi16::<2>(o);
dst[dst_cn.r_i()] = (x as u32).as_();
dst[dst_cn.g_i()] = (y as u32).as_();
dst[dst_cn.b_i()] = (z as u32).as_();
} else {
let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v.v));
r = _mm_mul_ps(r, f_value_scale);
dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_();
dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_();
dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_();
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3SseQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearSseQ0_15<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralSseQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalSseQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticSseQ0_15<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearSseQ0_15<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,261 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(dead_code)]
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::{BarycentricWeight, MultidimensionalInterpolation};
use crate::conversions::lut_transforms::Lut3x3Factory;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) struct TransformLut3x3<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<f32>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<f32>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut3x3<T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[inline(always)]
fn transform_chunk<'b, Tetrahedral: MultidimensionalInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let value_scale = ((1 << BIT_DEPTH) - 1) as f32;
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Tetrahedral::new(&self.lut);
let v = tetrahedral.inter3(x, y, z, &self.weights);
if T::FINITE {
let r = v * value_scale + 0.5;
dst[dst_cn.r_i()] = r.v[0].min(value_scale).max(0.).as_();
dst[dst_cn.g_i()] = r.v[1].min(value_scale).max(0.).as_();
dst[dst_cn.b_i()] = r.v[2].min(value_scale).max(0.).as_();
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
} else {
dst[dst_cn.r_i()] = v.v[0].as_();
dst[dst_cn.g_i()] = v.v[1].as_();
dst[dst_cn.b_i()] = v.v[2].as_();
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3<T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
use crate::conversions::interpolator::Trilinear;
self.transform_chunk::<Trilinear<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
use crate::conversions::interpolator::Tetrahedral;
self.transform_chunk::<Tetrahedral<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
use crate::conversions::interpolator::Pyramidal;
self.transform_chunk::<Pyramidal<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
use crate::conversions::interpolator::Prismatic;
self.transform_chunk::<Prismatic<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
use crate::conversions::interpolator::Trilinear;
self.transform_chunk::<Trilinear<GRID_SIZE>>(src, dst);
}
}
}
Ok(())
}
}
pub(crate) struct DefaultLut3x3Factory {}
impl Lut3x3Factory for DefaultLut3x3Factory {
fn make_transform_3x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
}
}
}

View File

@@ -0,0 +1,269 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::{BarycentricWeight, MultidimensionalInterpolation};
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) struct TransformLut3x4<
T,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<f32>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<f32>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut3x4<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[inline(always)]
fn transform_chunk<'b, Tetrahedral: MultidimensionalInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let value_scale = ((1 << BIT_DEPTH) - 1) as f32;
for (src, dst) in src.chunks_exact(channels).zip(dst.chunks_exact_mut(4)) {
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[cn.b_i()],
);
let tetrahedral = Tetrahedral::new(&self.lut);
let v = tetrahedral.inter4(x, y, z, &self.weights);
if T::FINITE {
let r = v * value_scale + 0.5;
dst[0] = r.v[0].min(value_scale).max(0.).as_();
dst[1] = r.v[1].min(value_scale).max(0.).as_();
dst[2] = r.v[2].min(value_scale).max(0.).as_();
dst[3] = r.v[3].min(value_scale).max(0.).as_();
} else {
dst[0] = v.v[0].as_();
dst[1] = v.v[1].as_();
dst[2] = v.v[2].as_();
dst[3] = v.v[3].as_();
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x4<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / channels;
let dst_chunks = dst.len() / 4;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
use crate::conversions::interpolator::Trilinear;
self.transform_chunk::<Trilinear<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
use crate::conversions::interpolator::Tetrahedral;
self.transform_chunk::<Tetrahedral<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
use crate::conversions::interpolator::Pyramidal;
self.transform_chunk::<Pyramidal<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
use crate::conversions::interpolator::Prismatic;
self.transform_chunk::<Prismatic<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
use crate::conversions::interpolator::Trilinear;
self.transform_chunk::<Trilinear<GRID_SIZE>>(src, dst);
}
}
}
Ok(())
}
}
pub(crate) fn make_transform_3x4<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
layout: Layout,
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
match layout {
Layout::Rgb => match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x4::<
T,
u8,
{ Layout::Rgb as u8 },
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x4::<
T,
u16,
{ Layout::Rgb as u8 },
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
},
Layout::Rgba => match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x4::<
T,
u8,
{ Layout::Rgba as u8 },
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x4::<
T,
u16,
{ Layout::Rgba as u8 },
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
},
_ => unimplemented!(),
}
}

View File

@@ -0,0 +1,316 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::interpolator::*;
use crate::conversions::lut_transforms::Lut4x3Factory;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd, m_clamp};
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
PointeeSizeExpressible, TransformExecutor, TransformOptions, Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) trait Vector3fCmykLerp {
fn interpolate(a: Vector3f, b: Vector3f, t: f32, scale: f32) -> Vector3f;
}
#[allow(unused)]
#[derive(Copy, Clone, Default)]
struct DefaultVector3fLerp;
impl Vector3fCmykLerp for DefaultVector3fLerp {
#[inline(always)]
fn interpolate(a: Vector3f, b: Vector3f, t: f32, scale: f32) -> Vector3f {
let t = Vector3f::from(t);
let inter = a.neg_mla(a, t).mla(b, t);
let mut new_vec = Vector3f::from(0.5).mla(inter, Vector3f::from(scale));
new_vec.v[0] = m_clamp(new_vec.v[0], 0.0, scale);
new_vec.v[1] = m_clamp(new_vec.v[1], 0.0, scale);
new_vec.v[2] = m_clamp(new_vec.v[2], 0.0, scale);
new_vec
}
}
#[allow(unused)]
#[derive(Copy, Clone, Default)]
pub(crate) struct NonFiniteVector3fLerp;
impl Vector3fCmykLerp for NonFiniteVector3fLerp {
#[inline(always)]
fn interpolate(a: Vector3f, b: Vector3f, t: f32, _: f32) -> Vector3f {
let t = Vector3f::from(t);
a.neg_mla(a, t).mla(b, t)
}
}
#[allow(unused)]
#[derive(Copy, Clone, Default)]
pub(crate) struct NonFiniteVector3fLerpUnbound;
impl Vector3fCmykLerp for NonFiniteVector3fLerpUnbound {
#[inline(always)]
fn interpolate(a: Vector3f, b: Vector3f, t: f32, _: f32) -> Vector3f {
let t = Vector3f::from(t);
a.neg_mla(a, t).mla(b, t)
}
}
#[allow(unused)]
struct TransformLut4To3<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<f32>,
_phantom: PhantomData<T>,
_phantom1: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
#[allow(unused)]
impl<
T: Copy + AsPrimitive<f32> + Default,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[inline(always)]
fn transform_chunk<
'k,
Tetrahedral: MultidimensionalInterpolation<'k, GRID_SIZE>,
Interpolation: Vector3fCmykLerp,
>(
&'k self,
src: &[T],
dst: &mut [T],
) {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let value_scale = ((1 << BIT_DEPTH) - 1) as f32;
let max_value = ((1 << BIT_DEPTH) - 1u32).as_();
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
let t: f32 = k_weights.w;
let table1 = &self.lut[(w * grid_size3 * 3) as usize..];
let table2 = &self.lut[(w_n * grid_size3 * 3) as usize..];
let tetrahedral1 = Tetrahedral::new(table1);
let tetrahedral2 = Tetrahedral::new(table2);
let r1 = tetrahedral1.inter3(c, m, y, &self.weights);
let r2 = tetrahedral2.inter3(c, m, y, &self.weights);
let r = Interpolation::interpolate(r1, r2, t, value_scale);
dst[cn.r_i()] = r.v[0].as_();
dst[cn.g_i()] = r.v[1].as_();
dst[cn.b_i()] = r.v[2].as_();
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
#[allow(unused)]
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
if T::FINITE {
self.transform_chunk::<Trilinear<GRID_SIZE>, DefaultVector3fLerp>(src, dst);
} else {
self.transform_chunk::<Trilinear<GRID_SIZE>, NonFiniteVector3fLerp>(src, dst);
}
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
if T::FINITE {
self.transform_chunk::<Tetrahedral<GRID_SIZE>, DefaultVector3fLerp>(
src, dst,
);
} else {
self.transform_chunk::<Tetrahedral<GRID_SIZE>, NonFiniteVector3fLerp>(
src, dst,
);
}
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
if T::FINITE {
self.transform_chunk::<Pyramidal<GRID_SIZE>, DefaultVector3fLerp>(src, dst);
} else {
self.transform_chunk::<Pyramidal<GRID_SIZE>, NonFiniteVector3fLerp>(
src, dst,
);
}
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
if T::FINITE {
self.transform_chunk::<Prismatic<GRID_SIZE>, DefaultVector3fLerp>(src, dst);
} else {
self.transform_chunk::<Prismatic<GRID_SIZE>, NonFiniteVector3fLerp>(
src, dst,
);
}
}
InterpolationMethod::Linear => {
if T::FINITE {
self.transform_chunk::<Trilinear<GRID_SIZE>, DefaultVector3fLerp>(src, dst);
} else {
self.transform_chunk::<Trilinear<GRID_SIZE>, NonFiniteVector3fLerp>(
src, dst,
);
}
}
}
}
Ok(())
}
}
#[allow(dead_code)]
pub(crate) struct DefaultLut4x3Factory {}
#[allow(dead_code)]
impl Lut4x3Factory for DefaultLut4x3Factory {
fn make_transform_4x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => {
Box::new(
TransformLut4To3::<T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
},
)
}
#[cfg(feature = "options")]
BarycentricWeightScale::High => {
Box::new(
TransformLut4To3::<T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
},
)
}
}
}
}

View File

@@ -0,0 +1,61 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::{CmsError, InPlaceStage, Lab, Xyz};
#[derive(Default)]
pub(crate) struct StageLabToXyz {}
impl InPlaceStage for StageLabToXyz {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
for dst in dst.chunks_exact_mut(3) {
let lab = Lab::new(dst[0], dst[1], dst[2]);
let xyz = lab.to_pcs_xyz();
dst[0] = xyz.x;
dst[1] = xyz.y;
dst[2] = xyz.z;
}
Ok(())
}
}
#[derive(Default)]
pub(crate) struct StageXyzToLab {}
impl InPlaceStage for StageXyzToLab {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
for dst in dst.chunks_exact_mut(3) {
let xyz = Xyz::new(dst[0], dst[1], dst[2]);
let lab = Lab::from_pcs_xyz(xyz);
dst[0] = lab.l;
dst[1] = lab.a;
dst[2] = lab.b;
}
Ok(())
}
}

154
vendor/moxcms/src/dat.rs vendored Normal file
View File

@@ -0,0 +1,154 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::CmsError;
use crate::writer::write_u16_be;
use std::time::{SystemTime, UNIX_EPOCH};
#[repr(C)]
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Default)]
pub struct ColorDateTime {
pub year: u16,
pub month: u16,
pub day_of_the_month: u16,
pub hours: u16,
pub minutes: u16,
pub seconds: u16,
}
fn is_leap(year: i32) -> bool {
(year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)
}
fn days_in_month(year: i32, month: i32) -> i32 {
match month {
1 => 31,
2 => {
if is_leap(year) {
29
} else {
28
}
}
3 => 31,
4 => 30,
5 => 31,
6 => 30,
7 => 31,
8 => 31,
9 => 30,
10 => 31,
11 => 30,
12 => 31,
_ => unreachable!("Unknown month"),
}
}
impl ColorDateTime {
/// Parses slice for date time
pub fn new_from_slice(slice: &[u8]) -> Result<ColorDateTime, CmsError> {
if slice.len() != 12 {
return Err(CmsError::InvalidProfile);
}
let year = u16::from_be_bytes([slice[0], slice[1]]);
let month = u16::from_be_bytes([slice[2], slice[3]]);
let day_of_the_month = u16::from_be_bytes([slice[4], slice[5]]);
let hours = u16::from_be_bytes([slice[6], slice[7]]);
let minutes = u16::from_be_bytes([slice[8], slice[9]]);
let seconds = u16::from_be_bytes([slice[10], slice[11]]);
Ok(ColorDateTime {
year,
month,
day_of_the_month,
hours,
minutes,
seconds,
})
}
/// Creates a new `ColorDateTime` from the current system time (UTC)
pub fn now() -> Self {
let now = match SystemTime::now().duration_since(UNIX_EPOCH) {
Ok(v) => v,
Err(_) => return Self::default(),
};
let mut days = (now.as_secs() / 86_400) as i64;
let secs_of_day = (now.as_secs() % 86_400) as i64;
let mut year = 1970;
loop {
let year_days = if is_leap(year) { 366 } else { 365 };
if days >= year_days {
days -= year_days;
year += 1;
} else {
break;
}
}
let mut month = 1;
loop {
let mdays = days_in_month(year, month);
if days >= mdays as i64 {
days -= mdays as i64;
month += 1;
} else {
break;
}
}
let day = days + 1; // days from zero based to 1 base
let hour = secs_of_day / 3600;
let min = (secs_of_day % 3600) / 60;
let sec = secs_of_day % 60;
Self {
year: year as u16,
month: month as u16,
day_of_the_month: day as u16,
hours: hour as u16,
minutes: min as u16,
seconds: sec as u16,
}
}
#[inline]
pub(crate) fn encode(&self, into: &mut Vec<u8>) {
let year = self.year;
let month = self.month;
let day_of_the_month = self.day_of_the_month;
let hours = self.hours;
let minutes = self.minutes;
let seconds = self.seconds;
write_u16_be(into, year);
write_u16_be(into, month);
write_u16_be(into, day_of_the_month);
write_u16_be(into, hours);
write_u16_be(into, minutes);
write_u16_be(into, seconds);
}
}

541
vendor/moxcms/src/defaults.rs vendored Normal file
View File

@@ -0,0 +1,541 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::chad::BRADFORD_D;
use crate::cicp::create_rec709_parametric;
use crate::trc::{ToneReprCurve, curve_from_gamma};
use crate::{
CicpColorPrimaries, CicpProfile, ColorPrimaries, ColorProfile, DataColorSpace,
LocalizableString, Matrix3d, MatrixCoefficients, ProfileClass, ProfileText, RenderingIntent,
TransferCharacteristics, XyY,
};
use pxfm::{copysignk, exp, floor, pow};
/// From lcms: `cmsWhitePointFromTemp`
/// tempK must be >= 4000. and <= 25000.
/// Invalid values of tempK will return
/// (x,y,Y) = (-1.0, -1.0, -1.0)
/// similar to argyll: `icx_DTEMP2XYZ()`
const fn white_point_from_temperature(temp_k: i32) -> XyY {
let mut white_point = XyY {
x: 0.,
y: 0.,
yb: 0.,
};
// No optimization provided.
let temp_k = temp_k as f64; // Square
let temp_k2 = temp_k * temp_k; // Cube
let temp_k3 = temp_k2 * temp_k;
// For correlated color temperature (T) between 4000K and 7000K:
let x = if temp_k > 4000.0 && temp_k <= 7000.0 {
-4.6070 * (1E9 / temp_k3) + 2.9678 * (1E6 / temp_k2) + 0.09911 * (1E3 / temp_k) + 0.244063
} else if temp_k > 7000.0 && temp_k <= 25000.0 {
-2.0064 * (1E9 / temp_k3) + 1.9018 * (1E6 / temp_k2) + 0.24748 * (1E3 / temp_k) + 0.237040
} else {
// or for correlated color temperature (T) between 7000K and 25000K:
// Invalid tempK
white_point.x = -1.0;
white_point.y = -1.0;
white_point.yb = -1.0;
debug_assert!(false, "invalid temp");
return white_point;
};
// Obtain y(x)
let y = -3.000 * (x * x) + 2.870 * x - 0.275;
// wave factors (not used, but here for futures extensions)
// let M1 = (-1.3515 - 1.7703*x + 5.9114 *y)/(0.0241 + 0.2562*x - 0.7341*y);
// let M2 = (0.0300 - 31.4424*x + 30.0717*y)/(0.0241 + 0.2562*x - 0.7341*y);
// Fill white_point struct
white_point.x = x;
white_point.y = y;
white_point.yb = 1.0;
white_point
}
pub const WHITE_POINT_D50: XyY = white_point_from_temperature(5003);
pub const WHITE_POINT_D60: XyY = white_point_from_temperature(6000);
pub const WHITE_POINT_D65: XyY = white_point_from_temperature(6504);
pub const WHITE_POINT_DCI_P3: XyY = white_point_from_temperature(6300);
// https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-F.pdf
// Perceptual Quantization / SMPTE standard ST.2084
#[inline]
const fn pq_curve(x: f64) -> f64 {
const M1: f64 = 2610.0 / 16384.0;
const M2: f64 = (2523.0 / 4096.0) * 128.0;
const C1: f64 = 3424.0 / 4096.0;
const C2: f64 = (2413.0 / 4096.0) * 32.0;
const C3: f64 = (2392.0 / 4096.0) * 32.0;
if x == 0.0 {
return 0.0;
}
let sign = x;
let x = x.abs();
let xpo = pow(x, 1.0 / M2);
let num = (xpo - C1).max(0.0);
let den = C2 - C3 * xpo;
let res = pow(num / den, 1.0 / M1);
copysignk(res, sign)
}
pub(crate) const fn build_trc_table_pq() -> [u16; 4096] {
let mut table = [0u16; 4096];
const NUM_ENTRIES: usize = 4096;
let mut i = 0usize;
while i < NUM_ENTRIES {
let x: f64 = i as f64 / (NUM_ENTRIES - 1) as f64;
let y: f64 = pq_curve(x);
let mut output: f64;
output = y * 65535.0 + 0.5;
if output > 65535.0 {
output = 65535.0
}
if output < 0.0 {
output = 0.0
}
table[i] = floor(output) as u16;
i += 1;
}
table
}
pub(crate) const fn build_trc_table_hlg() -> [u16; 4096] {
let mut table = [0u16; 4096];
const NUM_ENTRIES: usize = 4096;
let mut i = 0usize;
while i < NUM_ENTRIES {
let x: f64 = i as f64 / (NUM_ENTRIES - 1) as f64;
let y: f64 = hlg_curve(x);
let mut output: f64;
output = y * 65535.0 + 0.5;
if output > 65535.0 {
output = 65535.0
}
if output < 0.0 {
output = 0.0
}
table[i] = floor(output) as u16;
i += 1;
}
table
}
// https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-F.pdf
// Hybrid Log-Gamma
const fn hlg_curve(x: f64) -> f64 {
const BETA: f64 = 0.04;
const RA: f64 = 5.591816309728916; // 1.0 / A where A = 0.17883277
const B: f64 = 0.28466892; // 1.0 - 4.0 * A
const C: f64 = 0.5599107295; // 0,5 aln(4a)
let e = (x * (1.0 - BETA) + BETA).max(0.0);
if e == 0.0 {
return 0.0;
}
let sign = e.abs();
let res = if e <= 0.5 {
e * e / 3.0
} else {
(exp((e - C) * RA) + B) / 12.0
};
copysignk(res, sign)
}
/// Perceptual Quantizer Lookup table
pub const PQ_LUT_TABLE: [u16; 4096] = build_trc_table_pq();
/// Hybrid Log Gamma Lookup table
pub const HLG_LUT_TABLE: [u16; 4096] = build_trc_table_hlg();
impl ColorProfile {
const SRGB_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::BT_709);
const DISPLAY_P3_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::SMPTE_432);
const ADOBE_RGB_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::ADOBE_RGB);
const DCI_P3_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_DCI_P3, ColorPrimaries::DCI_P3);
const PRO_PHOTO_RGB_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D50, ColorPrimaries::PRO_PHOTO_RGB);
const BT2020_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::BT_2020);
const ACES_2065_1_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D60, ColorPrimaries::ACES_2065_1);
const ACES_CG_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D60, ColorPrimaries::ACES_CG);
#[inline]
fn basic_rgb_profile() -> ColorProfile {
ColorProfile {
profile_class: ProfileClass::DisplayDevice,
rendering_intent: RenderingIntent::Perceptual,
color_space: DataColorSpace::Rgb,
pcs: DataColorSpace::Xyz,
chromatic_adaptation: Some(BRADFORD_D),
white_point: WHITE_POINT_D50.to_xyzd(),
..Default::default()
}
}
/// Creates new profile from CICP
pub fn new_from_cicp(cicp_color_primaries: CicpProfile) -> ColorProfile {
let mut basic = ColorProfile::basic_rgb_profile();
basic.update_rgb_colorimetry_from_cicp(cicp_color_primaries);
basic
}
/// Creates new sRGB profile
pub fn new_srgb() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::SRGB_COLORANTS);
let curve =
ToneReprCurve::Parametric(vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045]);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Bt709,
transfer_characteristics: TransferCharacteristics::Srgb,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"sRGB IEC61966-2.1".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Adobe RGB profile
pub fn new_adobe_rgb() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::ADOBE_RGB_COLORANTS);
let curve = curve_from_gamma(2.19921875f32);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.white_point = WHITE_POINT_D50.to_xyzd();
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Adobe RGB 1998".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Display P3 profile
pub fn new_display_p3() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::DISPLAY_P3_COLORANTS);
let curve =
ToneReprCurve::Parametric(vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045]);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Smpte431,
transfer_characteristics: TransferCharacteristics::Srgb,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Display P3".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Display P3 PQ profile
pub fn new_display_p3_pq() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::DISPLAY_P3_COLORANTS);
let curve = ToneReprCurve::Lut(PQ_LUT_TABLE.to_vec());
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Smpte431,
transfer_characteristics: TransferCharacteristics::Smpte2084,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Display P3 PQ".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new DCI P3 profile
pub fn new_dci_p3() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::DCI_P3_COLORANTS);
let curve = curve_from_gamma(2.6f32);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_DCI_P3.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Smpte432,
transfer_characteristics: TransferCharacteristics::Srgb,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"DCI P3".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new ProPhoto RGB profile
pub fn new_pro_photo_rgb() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::PRO_PHOTO_RGB_COLORANTS);
let curve = curve_from_gamma(1.8f32);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D50.to_xyzd());
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"ProPhoto RGB".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Bt.2020 profile
pub fn new_bt2020() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::BT2020_COLORANTS);
let curve = ToneReprCurve::Parametric(create_rec709_parametric().to_vec());
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Rec.2020".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Bt.2020 PQ profile
pub fn new_bt2020_pq() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::BT2020_COLORANTS);
let curve = ToneReprCurve::Lut(PQ_LUT_TABLE.to_vec());
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Bt2020,
transfer_characteristics: TransferCharacteristics::Smpte2084,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Rec.2020 PQ".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Bt.2020 HLG profile
pub fn new_bt2020_hlg() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::BT2020_COLORANTS);
let curve = ToneReprCurve::Lut(HLG_LUT_TABLE.to_vec());
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Bt2020,
transfer_characteristics: TransferCharacteristics::Hlg,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Rec.2020 HLG".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Monochrome profile
pub fn new_gray_with_gamma(gamma: f32) -> ColorProfile {
ColorProfile {
gray_trc: Some(curve_from_gamma(gamma)),
profile_class: ProfileClass::DisplayDevice,
rendering_intent: RenderingIntent::Perceptual,
color_space: DataColorSpace::Gray,
media_white_point: Some(WHITE_POINT_D65.to_xyzd()),
white_point: WHITE_POINT_D50.to_xyzd(),
chromatic_adaptation: Some(BRADFORD_D),
copyright: Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)])),
..Default::default()
}
}
/// Creates new ACES 2065-1/AP0 profile
pub fn new_aces_aces_2065_1_linear() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::ACES_2065_1_COLORANTS);
let curve = ToneReprCurve::Lut(vec![]);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D60.to_xyzd());
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"ACES 2065-1".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new ACEScg profile
pub fn new_aces_cg_linear() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::ACES_CG_COLORANTS);
let curve = ToneReprCurve::Lut(vec![]);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D60.to_xyzd());
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"ACEScg/AP1".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
}

359
vendor/moxcms/src/dt_ucs.rs vendored Normal file
View File

@@ -0,0 +1,359 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::Xyz;
use crate::mlaf::mlaf;
use pxfm::{f_atan2f, f_powf, f_sincosf};
/// Darktable UCS JCH ( Darktable Uniform Color Space )
#[derive(Copy, Clone, PartialOrd, PartialEq, Debug)]
pub struct DtUchJch {
pub j: f32,
pub c: f32,
pub h: f32,
}
/// Darktable UCS HSB ( Darktable Uniform Color Space )
#[derive(Copy, Clone, PartialOrd, PartialEq, Debug)]
pub struct DtUchHsb {
pub h: f32,
pub s: f32,
pub b: f32,
}
/// Darktable HCB ( Darktable Uniform Color Space )
#[derive(Copy, Clone, PartialOrd, PartialEq, Debug)]
pub struct DtUchHcb {
pub h: f32,
pub c: f32,
pub b: f32,
}
const DT_UCS_L_STAR_RANGE: f32 = 2.098883786377;
#[inline]
fn y_to_dt_ucs_l_star(y: f32) -> f32 {
let y_hat = f_powf(y, 0.631651345306265);
DT_UCS_L_STAR_RANGE * y_hat / (y_hat + 1.12426773749357)
}
#[inline]
fn dt_ucs_l_star_to_y(x: f32) -> f32 {
f_powf(
1.12426773749357 * x / (DT_UCS_L_STAR_RANGE - x),
1.5831518565279648,
)
}
const L_WHITE: f32 = 0.98805060;
#[inline]
fn dt_ucs_luv_to_ucs_jch(
l_star: f32,
l_white: f32,
u_star_prime: f32,
v_star_prime: f32,
) -> DtUchJch {
let m2: f32 = mlaf(u_star_prime * u_star_prime, v_star_prime, v_star_prime); // square of colorfulness M
// should be JCH[0] = powf(L_star / L_white), cz) but we treat only the case where cz = 1
let j = l_star / l_white;
let c =
15.932993652962535 * f_powf(l_star, 0.6523997524738018) * f_powf(m2, 0.6007557017508491)
/ l_white;
let h = f_atan2f(v_star_prime, u_star_prime);
DtUchJch::new(j, c, h)
}
#[inline]
fn dt_ucs_xy_to_uv(x: f32, y: f32) -> (f32, f32) {
const X_C: [f32; 3] = [-0.783941002840055, 0.745273540913283, 0.318707282433486];
const Y_C: [f32; 3] = [0.277512987809202, -0.205375866083878, 2.16743692732158];
const BIAS: [f32; 3] = [0.153836578598858, -0.165478376301988, 0.291320554395942];
let mut u_c = mlaf(mlaf(BIAS[0], Y_C[0], y), X_C[0], x);
let mut v_c = mlaf(mlaf(BIAS[1], Y_C[1], y), X_C[1], x);
let d_c = mlaf(mlaf(BIAS[2], Y_C[2], y), X_C[2], x);
let div = if d_c >= 0.0 {
d_c.max(f32::MIN)
} else {
d_c.min(-f32::MIN)
};
u_c /= div;
v_c /= div;
const STAR_C: [f32; 2] = [1.39656225667, 1.4513954287];
const STAR_HF_C: [f32; 2] = [1.49217352929, 1.52488637914];
let u_star = STAR_C[0] * u_c / (u_c.abs() + STAR_HF_C[0]);
let v_star = STAR_C[1] * v_c / (v_c.abs() + STAR_HF_C[1]);
// The following is equivalent to a 2D matrix product
let u_star_prime = mlaf(-1.124983854323892 * u_star, -0.980483721769325, v_star);
let v_star_prime = mlaf(1.86323315098672 * u_star, 1.971853092390862, v_star);
(u_star_prime, v_star_prime)
}
impl DtUchJch {
#[inline]
pub fn new(j: f32, c: f32, h: f32) -> DtUchJch {
DtUchJch { j, c, h }
}
#[inline]
pub fn from_xyz(xyz: Xyz) -> DtUchJch {
DtUchJch::from_xyy(xyz.to_xyy())
}
#[inline]
pub fn to_xyz(&self) -> Xyz {
let xyy = self.to_xyy();
Xyz::from_xyy(xyy)
}
#[inline]
pub fn from_xyy(xyy: [f32; 3]) -> DtUchJch {
let l_star = y_to_dt_ucs_l_star(xyy[2]);
// let l_white = y_to_dt_ucs_l_star(1.);
let (u_star_prime, v_star_prime) = dt_ucs_xy_to_uv(xyy[0], xyy[1]);
dt_ucs_luv_to_ucs_jch(l_star, L_WHITE, u_star_prime, v_star_prime)
}
#[inline]
pub fn to_xyy(&self) -> [f32; 3] {
// let l_white: f32 = y_to_dt_ucs_l_star(1.0);
let l_star = (self.j * L_WHITE).max(0.0).min(2.09885);
let m = if l_star != 0. {
f_powf(
self.c * L_WHITE / (15.932993652962535 * f_powf(l_star, 0.6523997524738018)),
0.8322850678616855,
)
} else {
0.
};
let sin_cos_h = f_sincosf(self.h);
let u_star_prime = m * sin_cos_h.1;
let v_star_prime = m * sin_cos_h.0;
// The following is equivalent to a 2D matrix product
let u_star = mlaf(
-5.037522385190711 * u_star_prime,
-2.504856328185843,
v_star_prime,
);
let v_star = mlaf(
4.760029407436461 * u_star_prime,
2.874012963239247,
v_star_prime,
);
const F: [f32; 2] = [1.39656225667, 1.4513954287];
const HF: [f32; 2] = [1.49217352929, 1.52488637914];
let u_c = -HF[0] * u_star / (u_star.abs() - F[0]);
let v_c = -HF[1] * v_star / (v_star.abs() - F[1]);
const U_C: [f32; 3] = [0.167171472114775, -0.150959086409163, 0.940254742367256];
const V_C: [f32; 3] = [0.141299802443708, -0.155185060382272, 1.000000000000000];
const BIAS: [f32; 3] = [
-0.00801531300850582,
-0.00843312433578007,
-0.0256325967652889,
];
let mut x = mlaf(mlaf(BIAS[0], V_C[0], v_c), U_C[0], u_c);
let mut y = mlaf(mlaf(BIAS[1], V_C[1], v_c), U_C[1], u_c);
let d = mlaf(mlaf(BIAS[2], V_C[2], v_c), U_C[2], u_c);
let div = if d >= 0.0 {
d.max(f32::MIN)
} else {
d.min(-f32::MIN)
};
x /= div;
y /= div;
let yb = dt_ucs_l_star_to_y(l_star);
[x, y, yb]
}
}
impl DtUchHsb {
#[inline]
pub fn new(h: f32, s: f32, b: f32) -> DtUchHsb {
DtUchHsb { h, s, b }
}
#[inline]
pub fn from_jch(jch: DtUchJch) -> DtUchHsb {
let b = jch.j * (f_powf(jch.c, 1.33654221029386) + 1.);
let s = if b > 0. { jch.c / b } else { 0. };
let h = jch.h;
DtUchHsb::new(h, s, b)
}
#[inline]
pub fn to_jch(&self) -> DtUchJch {
let h = self.h;
let c = self.s * self.b;
let j = self.b / (f_powf(c, 1.33654221029386) + 1.);
DtUchJch::new(j, c, h)
}
}
impl DtUchHcb {
#[inline]
pub fn new(h: f32, c: f32, b: f32) -> DtUchHcb {
DtUchHcb { h, c, b }
}
#[inline]
pub fn from_jch(jch: DtUchJch) -> DtUchHcb {
let b = jch.j * (f_powf(jch.c, 1.33654221029386) + 1.);
let c = jch.c;
let h = jch.h;
DtUchHcb::new(h, c, b)
}
#[inline]
pub fn to_jch(&self) -> DtUchJch {
let h = self.h;
let c = self.c;
let j = self.b / (f_powf(self.c, 1.33654221029386) + 1.);
DtUchJch::new(j, c, h)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_darktable_ucs_jch() {
let xyy = [0.4, 0.2, 0.5];
let ucs = DtUchJch::from_xyy(xyy);
let xyy_rev = ucs.to_xyy();
assert!(
(xyy[0] - xyy_rev[0]).abs() < 1e-5,
"Expected {}, got {}",
xyy[0],
xyy_rev[0]
);
assert!(
(xyy[1] - xyy_rev[1]).abs() < 1e-5,
"Expected {}, got {}",
xyy[1],
xyy_rev[1]
);
assert!(
(xyy[2] - xyy_rev[2]).abs() < 1e-5,
"Expected {}, got {}",
xyy[2],
xyy_rev[2]
);
}
#[test]
fn test_darktable_hsb() {
let jch = DtUchJch::new(0.3, 0.6, 0.4);
let hsb = DtUchHsb::from_jch(jch);
let r_jch = hsb.to_jch();
assert!(
(r_jch.j - jch.j).abs() < 1e-5,
"Expected {}, got {}",
jch.j,
r_jch.j
);
assert!(
(r_jch.c - jch.c).abs() < 1e-5,
"Expected {}, got {}",
jch.c,
r_jch.c
);
assert!(
(r_jch.h - jch.h).abs() < 1e-5,
"Expected {}, got {}",
jch.h,
r_jch.h
);
}
#[test]
fn test_darktable_hcb() {
let jch = DtUchJch::new(0.3, 0.6, 0.4);
let hcb = DtUchHcb::from_jch(jch);
let r_jch = hcb.to_jch();
assert!(
(r_jch.j - jch.j).abs() < 1e-5,
"Expected {}, got {}",
jch.j,
r_jch.j
);
assert!(
(r_jch.c - jch.c).abs() < 1e-5,
"Expected {}, got {}",
jch.c,
r_jch.c
);
assert!(
(r_jch.h - jch.h).abs() < 1e-5,
"Expected {}, got {}",
jch.h,
r_jch.h
);
}
#[test]
fn test_darktable_ucs_jch_from_xyz() {
let xyz = Xyz::new(0.4, 0.2, 0.5);
let ucs = DtUchJch::from_xyz(xyz);
let xyy_rev = ucs.to_xyz();
assert!(
(xyz.x - xyz.x).abs() < 1e-5,
"Expected {}, got {}",
xyz.x,
xyy_rev.x
);
assert!(
(xyz.y - xyz.y).abs() < 1e-5,
"Expected {}, got {}",
xyz.y,
xyy_rev.y
);
assert!(
(xyz.z - xyz.z).abs() < 1e-5,
"Expected {}, got {}",
xyz.z,
xyy_rev.z
);
}
}

122
vendor/moxcms/src/err.rs vendored Normal file
View File

@@ -0,0 +1,122 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::RenderingIntent;
use std::error::Error;
use std::fmt::Display;
#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
pub struct MalformedSize {
pub size: usize,
pub expected: usize,
}
#[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum CmsError {
LaneSizeMismatch,
LaneMultipleOfChannels,
InvalidProfile,
InvalidTrcCurve,
InvalidCicp,
CurveLutIsTooLarge,
ParametricCurveZeroDivision,
InvalidRenderingIntent,
DivisionByZero,
UnsupportedColorPrimaries(u8),
UnsupportedTrc(u8),
InvalidLayout,
UnsupportedProfileConnection,
BuildTransferFunction,
UnsupportedChannelConfiguration,
UnknownTag(u32),
UnknownTagTypeDefinition(u32),
UnsupportedLutRenderingIntent(RenderingIntent),
InvalidAtoBLut,
OverflowingError,
LUTTablesInvalidKind,
MalformedClut(MalformedSize),
MalformedCurveLutTable(MalformedSize),
InvalidInksCountForProfile,
MalformedTrcCurve(String),
}
impl Display for CmsError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CmsError::LaneSizeMismatch => f.write_str("Lanes length must match"),
CmsError::LaneMultipleOfChannels => {
f.write_str("Lane length must not be multiple of channel count")
}
CmsError::InvalidProfile => f.write_str("Invalid ICC profile"),
CmsError::InvalidCicp => {
f.write_str("Invalid Code Independent point (CICP) in ICC profile")
}
CmsError::InvalidTrcCurve => f.write_str("Invalid TRC curve"),
CmsError::CurveLutIsTooLarge => f.write_str("Curve Lut is too large"),
CmsError::ParametricCurveZeroDivision => {
f.write_str("Parametric Curve definition causes division by zero")
}
CmsError::InvalidRenderingIntent => f.write_str("Invalid rendering intent"),
CmsError::DivisionByZero => f.write_str("Division by zero"),
CmsError::UnsupportedColorPrimaries(value) => {
f.write_fmt(format_args!("Unsupported color primaries, {value}"))
}
CmsError::UnsupportedTrc(value) => f.write_fmt(format_args!("Unsupported TRC {value}")),
CmsError::InvalidLayout => f.write_str("Invalid layout"),
CmsError::UnsupportedProfileConnection => f.write_str("Unsupported profile connection"),
CmsError::BuildTransferFunction => f.write_str("Can't reconstruct transfer function"),
CmsError::UnsupportedChannelConfiguration => {
f.write_str("Can't reconstruct channel configuration")
}
CmsError::UnknownTag(t) => f.write_fmt(format_args!("Unknown tag: {t}")),
CmsError::UnknownTagTypeDefinition(t) => {
f.write_fmt(format_args!("Unknown tag type definition: {t}"))
}
CmsError::UnsupportedLutRenderingIntent(intent) => f.write_fmt(format_args!(
"Can't find LUT for rendering intent: {intent:?}"
)),
CmsError::InvalidAtoBLut => f.write_str("Invalid A to B Lut"),
CmsError::OverflowingError => {
f.write_str("Overflowing was happen, that is not allowed")
}
CmsError::LUTTablesInvalidKind => f.write_str("All LUT curves must have same kind"),
CmsError::MalformedClut(size) => {
f.write_fmt(format_args!("Invalid CLUT size: {size:?}"))
}
CmsError::MalformedCurveLutTable(size) => {
f.write_fmt(format_args!("Malformed curve LUT size: {size:?}"))
}
CmsError::InvalidInksCountForProfile => {
f.write_str("Invalid inks count for profile was provided")
}
CmsError::MalformedTrcCurve(str) => f.write_str(str),
}
}
}
impl Error for CmsError {}

1078
vendor/moxcms/src/gamma.rs vendored Normal file

File diff suppressed because it is too large Load Diff

66
vendor/moxcms/src/gamut.rs vendored Normal file
View File

@@ -0,0 +1,66 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::Rgb;
#[inline]
fn filmlike_clip_rgb_tone(r: &mut f32, g: &mut f32, b: &mut f32, l: f32) {
let new_r = r.min(l);
let new_b = b.min(l);
let new_g = new_b + ((new_r - new_b) * (*g - *b) / (*r - *b));
*r = new_r;
*g = new_g;
*b = new_b;
}
/// Soft clipping out-of-bounds values in S-curve
///
/// Works only on highlights, negative values are skipped
#[inline]
pub fn filmlike_clip(rgb: Rgb<f32>) -> Rgb<f32> {
const L: f32 = 1.;
let mut rgb = rgb;
if rgb.r >= rgb.g {
if rgb.g > rgb.b {
filmlike_clip_rgb_tone(&mut rgb.r, &mut rgb.g, &mut rgb.b, L);
} else if rgb.b > rgb.r {
filmlike_clip_rgb_tone(&mut rgb.b, &mut rgb.r, &mut rgb.g, L);
} else if rgb.b > rgb.g {
filmlike_clip_rgb_tone(&mut rgb.r, &mut rgb.b, &mut rgb.g, L);
} else {
Rgb::new(rgb.r.min(L), rgb.g.min(L), rgb.g);
}
} else if rgb.r >= rgb.b {
filmlike_clip_rgb_tone(&mut rgb.g, &mut rgb.r, &mut rgb.b, L);
} else if rgb.b > rgb.g {
filmlike_clip_rgb_tone(&mut rgb.b, &mut rgb.g, &mut rgb.r, L);
} else {
filmlike_clip_rgb_tone(&mut rgb.g, &mut rgb.b, &mut rgb.r, L);
}
rgb
}

223
vendor/moxcms/src/helpers.rs vendored Normal file
View File

@@ -0,0 +1,223 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::matan::{
does_curve_have_discontinuity, is_curve_ascending, is_curve_degenerated, is_curve_descending,
is_curve_linear8, is_curve_linear16, is_curve_monotonic,
};
use crate::reader::{
s15_fixed16_number_to_double, uint8_number_to_float_fast, uint16_number_to_float_fast,
};
use crate::{CmsError, LutStore, Matrix3d, ToneReprCurve, Vector3d};
impl LutStore {
pub fn to_clut_f32(&self) -> Vec<f32> {
match self {
LutStore::Store8(store) => store
.iter()
.map(|x| uint8_number_to_float_fast(*x))
.collect(),
LutStore::Store16(store) => store
.iter()
.map(|x| uint16_number_to_float_fast(*x as u32))
.collect(),
}
}
pub(crate) fn is_degenerated(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => is_curve_degenerated(&v[start..end]),
LutStore::Store16(v) => is_curve_degenerated(&v[start..end]),
}
}
pub(crate) fn is_monotonic(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => is_curve_monotonic(&v[start..end]),
LutStore::Store16(v) => is_curve_monotonic(&v[start..end]),
}
}
pub(crate) fn have_discontinuities(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => does_curve_have_discontinuity(&v[start..end]),
LutStore::Store16(v) => does_curve_have_discontinuity(&v[start..end]),
}
}
#[allow(dead_code)]
pub(crate) fn is_linear(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => is_curve_linear8(&v[start..end]),
LutStore::Store16(v) => is_curve_linear16(&v[start..end]),
}
}
#[allow(dead_code)]
pub(crate) fn is_descending(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => is_curve_descending(&v[start..end]),
LutStore::Store16(v) => is_curve_descending(&v[start..end]),
}
}
#[allow(dead_code)]
pub(crate) fn is_ascending(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => is_curve_ascending(&v[start..end]),
LutStore::Store16(v) => is_curve_ascending(&v[start..end]),
}
}
}
impl ToneReprCurve {
pub(crate) fn is_linear(&self) -> bool {
match &self {
ToneReprCurve::Lut(lut) => {
if lut.is_empty() {
return true;
}
if lut.len() == 1 {
let gamma = 1. / crate::trc::u8_fixed_8number_to_float(lut[0]);
if (gamma - 1.).abs() < 1e-4 {
return true;
}
}
is_curve_linear16(lut)
}
ToneReprCurve::Parametric(parametric) => {
if parametric.is_empty() {
return true;
}
if parametric.len() == 1 && parametric[0] == 1. {
return true;
}
false
}
}
}
pub(crate) fn is_monotonic(&self) -> bool {
match &self {
ToneReprCurve::Lut(lut) => is_curve_monotonic(lut),
ToneReprCurve::Parametric(_) => true,
}
}
pub(crate) fn is_degenerated(&self) -> bool {
match &self {
ToneReprCurve::Lut(lut) => is_curve_degenerated(lut),
ToneReprCurve::Parametric(_) => false,
}
}
pub(crate) fn have_discontinuities(&self) -> bool {
match &self {
ToneReprCurve::Lut(lut) => does_curve_have_discontinuity(lut),
ToneReprCurve::Parametric(_) => false,
}
}
}
pub(crate) fn read_matrix_3d(arr: &[u8]) -> Result<Matrix3d, CmsError> {
if arr.len() < 36 {
return Err(CmsError::InvalidProfile);
}
let m_tag = &arr[..36];
let e00 = i32::from_be_bytes([m_tag[0], m_tag[1], m_tag[2], m_tag[3]]);
let e01 = i32::from_be_bytes([m_tag[4], m_tag[5], m_tag[6], m_tag[7]]);
let e02 = i32::from_be_bytes([m_tag[8], m_tag[9], m_tag[10], m_tag[11]]);
let e10 = i32::from_be_bytes([m_tag[12], m_tag[13], m_tag[14], m_tag[15]]);
let e11 = i32::from_be_bytes([m_tag[16], m_tag[17], m_tag[18], m_tag[19]]);
let e12 = i32::from_be_bytes([m_tag[20], m_tag[21], m_tag[22], m_tag[23]]);
let e20 = i32::from_be_bytes([m_tag[24], m_tag[25], m_tag[26], m_tag[27]]);
let e21 = i32::from_be_bytes([m_tag[28], m_tag[29], m_tag[30], m_tag[31]]);
let e22 = i32::from_be_bytes([m_tag[32], m_tag[33], m_tag[34], m_tag[35]]);
Ok(Matrix3d {
v: [
[
s15_fixed16_number_to_double(e00),
s15_fixed16_number_to_double(e01),
s15_fixed16_number_to_double(e02),
],
[
s15_fixed16_number_to_double(e10),
s15_fixed16_number_to_double(e11),
s15_fixed16_number_to_double(e12),
],
[
s15_fixed16_number_to_double(e20),
s15_fixed16_number_to_double(e21),
s15_fixed16_number_to_double(e22),
],
],
})
}
pub(crate) fn read_vector_3d(arr: &[u8]) -> Result<Vector3d, CmsError> {
if arr.len() < 12 {
return Err(CmsError::InvalidProfile);
}
let m_tag = &arr[..12];
let b0 = i32::from_be_bytes([m_tag[0], m_tag[1], m_tag[2], m_tag[3]]);
let b1 = i32::from_be_bytes([m_tag[4], m_tag[5], m_tag[6], m_tag[7]]);
let b2 = i32::from_be_bytes([m_tag[8], m_tag[9], m_tag[10], m_tag[11]]);
Ok(Vector3d {
v: [
s15_fixed16_number_to_double(b0),
s15_fixed16_number_to_double(b1),
s15_fixed16_number_to_double(b2),
],
})
}

192
vendor/moxcms/src/ictcp.rs vendored Normal file
View File

@@ -0,0 +1,192 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::gamma::{pq_from_linearf, pq_to_linearf};
use crate::{Matrix3f, Rgb, Vector3f, Xyz};
const CROSSTALK: Matrix3f = Matrix3f {
v: [[0.92, 0.04, 0.04], [0.04, 0.92, 0.04], [0.04, 0.04, 0.92]],
};
const HPE_LMS: Matrix3f = Matrix3f {
v: [
[0.4002, 0.7076, -0.0808],
[-0.2263, 1.1653, 0.0457],
[0f32, 0f32, 0.9182],
],
};
const XYZ_TO_LMS: Matrix3f = CROSSTALK.mat_mul_const(HPE_LMS);
const LMS_TO_XYZ: Matrix3f = XYZ_TO_LMS.inverse();
const L_LMS_TO_ICTCP: Matrix3f = Matrix3f {
v: [
[2048. / 4096., 2048. / 4096., 0.],
[6610. / 4096., -13613. / 4096., 7003. / 4096.],
[17933. / 4096., -17390. / 4096., -543. / 4096.],
],
};
const ICTCP_TO_L_LMS: Matrix3f = L_LMS_TO_ICTCP.inverse();
#[derive(Copy, Clone, Default, PartialOrd, PartialEq)]
pub struct ICtCp {
/// Lightness
pub i: f32,
/// Tritan
pub ct: f32,
/// Protan
pub cp: f32,
}
impl ICtCp {
#[inline]
pub const fn new(i: f32, ct: f32, cp: f32) -> ICtCp {
ICtCp { i, ct, cp }
}
/// Converts XYZ D65 to ICtCp
#[inline]
pub fn from_xyz(xyz: Xyz) -> ICtCp {
let lms = XYZ_TO_LMS.mul_vector(xyz.to_vector());
let lin_l = pq_from_linearf(lms.v[0]);
let lin_m = pq_from_linearf(lms.v[1]);
let lin_s = pq_from_linearf(lms.v[2]);
let ictcp = L_LMS_TO_ICTCP.mul_vector(Vector3f {
v: [lin_l, lin_m, lin_s],
});
ICtCp {
i: ictcp.v[0],
ct: ictcp.v[1],
cp: ictcp.v[2],
}
}
/// Converts to [ICtCp] from linear light [Rgb]
///
/// Precompute forward matrix by [ICtCp::prepare_to_lms].
/// D65 white point is assumed.
#[inline]
pub fn from_linear_rgb(rgb: Rgb<f32>, matrix: Matrix3f) -> ICtCp {
let lms = matrix.mul_vector(rgb.to_vector());
let lin_l = pq_from_linearf(lms.v[0]);
let lin_m = pq_from_linearf(lms.v[1]);
let lin_s = pq_from_linearf(lms.v[2]);
let ictcp = L_LMS_TO_ICTCP.mul_vector(Vector3f {
v: [lin_l, lin_m, lin_s],
});
ICtCp {
i: ictcp.v[0],
ct: ictcp.v[1],
cp: ictcp.v[2],
}
}
/// Converts [ICtCp] to [Rgb]
///
/// Precompute forward matrix by [ICtCp::prepare_to_lms] and then inverse it
#[inline]
pub fn to_linear_rgb(&self, matrix: Matrix3f) -> Rgb<f32> {
let l_lms = ICTCP_TO_L_LMS.mul_vector(Vector3f {
v: [self.i, self.ct, self.cp],
});
let gamma_l = pq_to_linearf(l_lms.v[0]);
let gamma_m = pq_to_linearf(l_lms.v[1]);
let gamma_s = pq_to_linearf(l_lms.v[2]);
let lms = matrix.mul_vector(Vector3f {
v: [gamma_l, gamma_m, gamma_s],
});
Rgb {
r: lms.v[0],
g: lms.v[1],
b: lms.v[2],
}
}
/// Converts ICtCp to XYZ D65
#[inline]
pub fn to_xyz(&self) -> Xyz {
let l_lms = ICTCP_TO_L_LMS.mul_vector(Vector3f {
v: [self.i, self.ct, self.cp],
});
let gamma_l = pq_to_linearf(l_lms.v[0]);
let gamma_m = pq_to_linearf(l_lms.v[1]);
let gamma_s = pq_to_linearf(l_lms.v[2]);
let lms = LMS_TO_XYZ.mul_vector(Vector3f {
v: [gamma_l, gamma_m, gamma_s],
});
Xyz {
x: lms.v[0],
y: lms.v[1],
z: lms.v[2],
}
}
/// Prepares RGB->LMS matrix
#[inline]
pub const fn prepare_to_lms(rgb_to_xyz: Matrix3f) -> Matrix3f {
XYZ_TO_LMS.mat_mul_const(rgb_to_xyz)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn check_roundtrip() {
let xyz = Xyz::new(0.5, 0.4, 0.3);
let ictcp = ICtCp::from_xyz(xyz);
let r_xyz = ictcp.to_xyz();
assert!((r_xyz.x - xyz.x).abs() < 1e-4);
assert!((r_xyz.y - xyz.y).abs() < 1e-4);
assert!((r_xyz.z - xyz.z).abs() < 1e-4);
}
#[test]
fn check_roundtrip_rgb() {
let rgb_to_xyz = Matrix3f {
v: [
[0.67345345, 0.165661961, 0.125096574],
[0.27903071, 0.675341845, 0.045627553],
[-0.00193137419, 0.0299795717, 0.797140181],
],
};
let prepared_matrix = ICtCp::prepare_to_lms(rgb_to_xyz);
let inversed_matrix = prepared_matrix.inverse();
let rgb = Rgb::new(0.5, 0.4, 0.3);
let ictcp = ICtCp::from_linear_rgb(rgb, prepared_matrix);
let r_xyz = ictcp.to_linear_rgb(inversed_matrix);
assert!((r_xyz.r - rgb.r).abs() < 1e-4);
assert!((r_xyz.g - rgb.g).abs() < 1e-4);
assert!((r_xyz.b - rgb.b).abs() < 1e-4);
}
}

434
vendor/moxcms/src/jzazbz.rs vendored Normal file
View File

@@ -0,0 +1,434 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::Xyz;
use crate::jzczhz::Jzczhz;
use crate::mlaf::mlaf;
use num_traits::Pow;
use pxfm::{dirty_powf, f_cbrtf, f_powf};
use std::ops::{
Add, AddAssign, Div, DivAssign, Index, IndexMut, Mul, MulAssign, Neg, Sub, SubAssign,
};
#[inline]
fn perceptual_quantizer(x: f32) -> f32 {
if x <= 0. {
return 0.;
}
let xx = dirty_powf(x * 1e-4, 0.1593017578125);
let rs = dirty_powf(
mlaf(0.8359375, 18.8515625, xx) / mlaf(1., 18.6875, xx),
134.034375,
);
if rs.is_nan() {
return 0.;
}
rs
}
#[inline]
fn perceptual_quantizer_inverse(x: f32) -> f32 {
if x <= 0. {
return 0.;
}
let xx = dirty_powf(x, 7.460772656268214e-03);
let rs = 1e4
* dirty_powf(
(0.8359375 - xx) / mlaf(-18.8515625, 18.6875, xx),
6.277394636015326,
);
if rs.is_nan() {
return 0.;
}
rs
}
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialOrd, PartialEq, Default)]
/// Represents Jzazbz
pub struct Jzazbz {
/// Jz(lightness) generally expects to be between `0.0..1.0`.
pub jz: f32,
/// Az generally expects to be between `-0.5..0.5`.
pub az: f32,
/// Bz generally expects to be between `-0.5..0.5`.
pub bz: f32,
}
impl Jzazbz {
/// Constructs new instance
#[inline]
pub fn new(jz: f32, az: f32, bz: f32) -> Jzazbz {
Jzazbz { jz, az, bz }
}
/// Creates new [Jzazbz] from CIE [Xyz].
///
/// JzAzBz is defined in D65 white point, adapt XYZ if needed first.
#[inline]
pub fn from_xyz(xyz: Xyz) -> Jzazbz {
Self::from_xyz_with_display_luminance(xyz, 200.)
}
/// Creates new [Jzazbz] from CIE [Xyz].
///
/// JzAzBz is defined in D65 white point, adapt XYZ if needed first.
#[inline]
pub fn from_xyz_with_display_luminance(xyz: Xyz, display_luminance: f32) -> Jzazbz {
let abs_xyz = xyz * display_luminance;
let lp = perceptual_quantizer(mlaf(
mlaf(0.674207838 * abs_xyz.x, 0.382799340, abs_xyz.y),
-0.047570458,
abs_xyz.z,
));
let mp = perceptual_quantizer(mlaf(
mlaf(0.149284160 * abs_xyz.x, 0.739628340, abs_xyz.y),
0.083327300,
abs_xyz.z,
));
let sp = perceptual_quantizer(mlaf(
mlaf(0.070941080 * abs_xyz.x, 0.174768000, abs_xyz.y),
0.670970020,
abs_xyz.z,
));
let iz = 0.5 * (lp + mp);
let az = mlaf(mlaf(3.524000 * lp, -4.066708, mp), 0.542708, sp);
let bz = mlaf(mlaf(0.199076 * lp, 1.096799, mp), -1.295875, sp);
let jz = (0.44 * iz) / mlaf(1., -0.56, iz) - 1.6295499532821566e-11;
Jzazbz::new(jz, az, bz)
}
/// Converts [Jzazbz] to [Xyz] D65
#[inline]
pub fn to_xyz(&self, display_luminance: f32) -> Xyz {
let jz = self.jz + 1.6295499532821566e-11;
let iz = jz / mlaf(0.44f32, 0.56, jz);
let l = perceptual_quantizer_inverse(mlaf(
mlaf(iz, 1.386050432715393e-1, self.az),
5.804731615611869e-2,
self.bz,
));
let m = perceptual_quantizer_inverse(mlaf(
mlaf(iz, -1.386050432715393e-1, self.az),
-5.804731615611891e-2,
self.bz,
));
let s = perceptual_quantizer_inverse(mlaf(
mlaf(iz, -9.601924202631895e-2, self.az),
-8.118918960560390e-1,
self.bz,
));
let x = mlaf(
mlaf(1.661373055774069e+00 * l, -9.145230923250668e-01, m),
2.313620767186147e-01,
s,
);
let y = mlaf(
mlaf(-3.250758740427037e-01 * l, 1.571847038366936e+00, m),
-2.182538318672940e-01,
s,
);
let z = mlaf(
mlaf(-9.098281098284756e-02 * l, -3.127282905230740e-01, m),
1.522766561305260e+00,
s,
);
let rel_luminance = 1f32 / display_luminance;
Xyz::new(x, y, z) * rel_luminance
}
/// Converts into *Jzczhz*
#[inline]
pub fn to_jzczhz(&self) -> Jzczhz {
Jzczhz::from_jzazbz(*self)
}
#[inline]
pub fn euclidean_distance(&self, other: Self) -> f32 {
let djz = self.jz - other.jz;
let daz = self.az - other.az;
let dbz = self.bz - other.bz;
(djz * djz + daz * daz + dbz * dbz).sqrt()
}
#[inline]
pub fn taxicab_distance(&self, other: Self) -> f32 {
let djz = self.jz - other.jz;
let daz = self.az - other.az;
let dbz = self.bz - other.bz;
djz.abs() + daz.abs() + dbz.abs()
}
}
impl Index<usize> for Jzazbz {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
match index {
0 => &self.jz,
1 => &self.az,
2 => &self.bz,
_ => panic!("Index out of bounds for Jzazbz"),
}
}
}
impl IndexMut<usize> for Jzazbz {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
match index {
0 => &mut self.jz,
1 => &mut self.az,
2 => &mut self.bz,
_ => panic!("Index out of bounds for Jzazbz"),
}
}
}
impl Add<f32> for Jzazbz {
type Output = Jzazbz;
#[inline]
fn add(self, rhs: f32) -> Self::Output {
Jzazbz::new(self.jz + rhs, self.az + rhs, self.bz + rhs)
}
}
impl Sub<f32> for Jzazbz {
type Output = Jzazbz;
#[inline]
fn sub(self, rhs: f32) -> Self::Output {
Jzazbz::new(self.jz - rhs, self.az - rhs, self.bz - rhs)
}
}
impl Mul<f32> for Jzazbz {
type Output = Jzazbz;
#[inline]
fn mul(self, rhs: f32) -> Self::Output {
Jzazbz::new(self.jz * rhs, self.az * rhs, self.bz * rhs)
}
}
impl Div<f32> for Jzazbz {
type Output = Jzazbz;
#[inline]
fn div(self, rhs: f32) -> Self::Output {
Jzazbz::new(self.jz / rhs, self.az / rhs, self.bz / rhs)
}
}
impl Add<Jzazbz> for Jzazbz {
type Output = Jzazbz;
#[inline]
fn add(self, rhs: Jzazbz) -> Self::Output {
Jzazbz::new(self.jz + rhs.jz, self.az + rhs.az, self.bz + rhs.bz)
}
}
impl Sub<Jzazbz> for Jzazbz {
type Output = Jzazbz;
#[inline]
fn sub(self, rhs: Jzazbz) -> Self::Output {
Jzazbz::new(self.jz - rhs.jz, self.az - rhs.az, self.bz - rhs.bz)
}
}
impl Mul<Jzazbz> for Jzazbz {
type Output = Jzazbz;
#[inline]
fn mul(self, rhs: Jzazbz) -> Self::Output {
Jzazbz::new(self.jz * rhs.jz, self.az * rhs.az, self.bz * rhs.bz)
}
}
impl Div<Jzazbz> for Jzazbz {
type Output = Jzazbz;
#[inline]
fn div(self, rhs: Jzazbz) -> Self::Output {
Jzazbz::new(self.jz / rhs.jz, self.az / rhs.az, self.bz / rhs.bz)
}
}
impl AddAssign<Jzazbz> for Jzazbz {
#[inline]
fn add_assign(&mut self, rhs: Jzazbz) {
self.jz += rhs.jz;
self.az += rhs.az;
self.bz += rhs.bz;
}
}
impl SubAssign<Jzazbz> for Jzazbz {
#[inline]
fn sub_assign(&mut self, rhs: Jzazbz) {
self.jz -= rhs.jz;
self.az -= rhs.az;
self.bz -= rhs.bz;
}
}
impl MulAssign<Jzazbz> for Jzazbz {
#[inline]
fn mul_assign(&mut self, rhs: Jzazbz) {
self.jz *= rhs.jz;
self.az *= rhs.az;
self.bz *= rhs.bz;
}
}
impl DivAssign<Jzazbz> for Jzazbz {
#[inline]
fn div_assign(&mut self, rhs: Jzazbz) {
self.jz /= rhs.jz;
self.az /= rhs.az;
self.bz /= rhs.bz;
}
}
impl AddAssign<f32> for Jzazbz {
#[inline]
fn add_assign(&mut self, rhs: f32) {
self.jz += rhs;
self.az += rhs;
self.bz += rhs;
}
}
impl SubAssign<f32> for Jzazbz {
#[inline]
fn sub_assign(&mut self, rhs: f32) {
self.jz -= rhs;
self.az -= rhs;
self.bz -= rhs;
}
}
impl MulAssign<f32> for Jzazbz {
#[inline]
fn mul_assign(&mut self, rhs: f32) {
self.jz *= rhs;
self.az *= rhs;
self.bz *= rhs;
}
}
impl DivAssign<f32> for Jzazbz {
#[inline]
fn div_assign(&mut self, rhs: f32) {
self.jz /= rhs;
self.az /= rhs;
self.bz /= rhs;
}
}
impl Neg for Jzazbz {
type Output = Jzazbz;
#[inline]
fn neg(self) -> Self::Output {
Jzazbz::new(-self.jz, -self.az, -self.bz)
}
}
impl Jzazbz {
#[inline]
pub fn sqrt(&self) -> Jzazbz {
Jzazbz::new(self.jz.sqrt(), self.az.sqrt(), self.bz.sqrt())
}
#[inline]
pub fn cbrt(&self) -> Jzazbz {
Jzazbz::new(f_cbrtf(self.jz), f_cbrtf(self.az), f_cbrtf(self.bz))
}
}
impl Pow<f32> for Jzazbz {
type Output = Jzazbz;
#[inline]
fn pow(self, rhs: f32) -> Self::Output {
Jzazbz::new(
f_powf(self.jz, rhs),
f_powf(self.az, rhs),
f_powf(self.bz, rhs),
)
}
}
impl Pow<Jzazbz> for Jzazbz {
type Output = Jzazbz;
#[inline]
fn pow(self, rhs: Jzazbz) -> Self::Output {
Jzazbz::new(
f_powf(self.jz, rhs.jz),
f_powf(self.az, self.az),
f_powf(self.bz, self.bz),
)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn jzazbz_round() {
let xyz = Xyz::new(0.5, 0.4, 0.3);
let jzazbz = Jzazbz::from_xyz_with_display_luminance(xyz, 253f32);
let old_xyz = jzazbz.to_xyz(253f32);
assert!(
(xyz.x - old_xyz.x).abs() <= 1e-3,
"{:?} != {:?}",
xyz,
old_xyz
);
assert!(
(xyz.y - old_xyz.y).abs() <= 1e-3,
"{:?} != {:?}",
xyz,
old_xyz
);
assert!(
(xyz.z - old_xyz.z).abs() <= 1e-3,
"{:?} != {:?}",
xyz,
old_xyz
);
}
}

375
vendor/moxcms/src/jzczhz.rs vendored Normal file
View File

@@ -0,0 +1,375 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::Xyz;
use crate::jzazbz::Jzazbz;
use num_traits::Pow;
use pxfm::{f_atan2f, f_cbrtf, f_hypot3f, f_hypotf, f_powf, f_sincosf, f_sinf};
use std::ops::{
Add, AddAssign, Div, DivAssign, Index, IndexMut, Mul, MulAssign, Neg, Sub, SubAssign,
};
/// Represents Jzazbz in polar coordinates as Jzczhz
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
pub struct Jzczhz {
/// Jz(lightness) generally expects to be between `0.0..1.0`.
pub jz: f32,
/// Cz generally expects to be between `-1.0..1.0`.
pub cz: f32,
/// Hz generally expects to be between `-1.0..1.0`.
pub hz: f32,
}
impl Jzczhz {
/// Creates new instance of Jzczhz
#[inline]
pub fn new(jz: f32, cz: f32, hz: f32) -> Jzczhz {
Jzczhz { jz, cz, hz }
}
/// Converts Jzazbz to polar coordinates Jzczhz
#[inline]
pub fn from_jzazbz(jzazbz: Jzazbz) -> Jzczhz {
let cz = f_hypotf(jzazbz.az, jzazbz.bz);
let hz = f_atan2f(jzazbz.bz, jzazbz.az);
Jzczhz::new(jzazbz.jz, cz, hz)
}
/// Converts Jzczhz into Jzazbz
#[inline]
pub fn to_jzazbz(&self) -> Jzazbz {
let sincos = f_sincosf(self.hz);
let az = self.cz * sincos.1;
let bz = self.cz * sincos.0;
Jzazbz::new(self.jz, az, bz)
}
/// Converts Jzczhz into Jzazbz
#[inline]
pub fn to_jzazbz_with_luminance(&self) -> Jzazbz {
let sincos = f_sincosf(self.hz);
let az = self.cz * sincos.1;
let bz = self.cz * sincos.0;
Jzazbz::new(self.jz, az, bz)
}
/// Converts Jzczhz to *Xyz*
#[inline]
pub fn to_xyz(&self, display_luminance: f32) -> Xyz {
let jzazbz = self.to_jzazbz();
jzazbz.to_xyz(display_luminance)
}
/// Converts [Xyz] to [Jzczhz]
#[inline]
pub fn from_xyz(xyz: Xyz) -> Jzczhz {
let jzazbz = Jzazbz::from_xyz(xyz);
Jzczhz::from_jzazbz(jzazbz)
}
/// Converts [Xyz] to [Jzczhz]
#[inline]
pub fn from_xyz_with_display_luminance(xyz: Xyz, luminance: f32) -> Jzczhz {
let jzazbz = Jzazbz::from_xyz_with_display_luminance(xyz, luminance);
Jzczhz::from_jzazbz(jzazbz)
}
/// Computes distance for *Jzczhz*
#[inline]
pub fn distance(&self, other: Jzczhz) -> f32 {
let djz = self.jz - other.jz;
let dcz = self.cz - other.cz;
let dhz = self.hz - other.hz;
let dh = 2. * (self.cz * other.cz).sqrt() * f_sinf(dhz * 0.5);
f_hypot3f(djz, dcz, dh)
}
#[inline]
pub fn euclidean_distance(&self, other: Self) -> f32 {
let djz = self.jz - other.jz;
let dhz = self.hz - other.hz;
let dcz = self.cz - other.cz;
(djz * djz + dhz * dhz + dcz * dcz).sqrt()
}
#[inline]
pub fn taxicab_distance(&self, other: Self) -> f32 {
let djz = self.jz - other.jz;
let dhz = self.hz - other.hz;
let dcz = self.cz - other.cz;
djz.abs() + dhz.abs() + dcz.abs()
}
}
impl Index<usize> for Jzczhz {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
match index {
0 => &self.jz,
1 => &self.cz,
2 => &self.hz,
_ => panic!("Index out of bounds for Jzczhz"),
}
}
}
impl IndexMut<usize> for Jzczhz {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
match index {
0 => &mut self.jz,
1 => &mut self.cz,
2 => &mut self.hz,
_ => panic!("Index out of bounds for Jzczhz"),
}
}
}
impl Add<f32> for Jzczhz {
type Output = Jzczhz;
#[inline]
fn add(self, rhs: f32) -> Self::Output {
Jzczhz::new(self.jz + rhs, self.cz + rhs, self.hz + rhs)
}
}
impl Sub<f32> for Jzczhz {
type Output = Jzczhz;
#[inline]
fn sub(self, rhs: f32) -> Self::Output {
Jzczhz::new(self.jz - rhs, self.cz - rhs, self.hz - rhs)
}
}
impl Mul<f32> for Jzczhz {
type Output = Jzczhz;
#[inline]
fn mul(self, rhs: f32) -> Self::Output {
Jzczhz::new(self.jz * rhs, self.cz * rhs, self.hz * rhs)
}
}
impl Div<f32> for Jzczhz {
type Output = Jzczhz;
#[inline]
fn div(self, rhs: f32) -> Self::Output {
Jzczhz::new(self.jz / rhs, self.cz / rhs, self.hz / rhs)
}
}
impl Add<Jzczhz> for Jzczhz {
type Output = Jzczhz;
#[inline]
fn add(self, rhs: Jzczhz) -> Self::Output {
Jzczhz::new(self.jz + rhs.jz, self.cz + rhs.cz, self.hz + rhs.hz)
}
}
impl Sub<Jzczhz> for Jzczhz {
type Output = Jzczhz;
#[inline]
fn sub(self, rhs: Jzczhz) -> Self::Output {
Jzczhz::new(self.jz - rhs.jz, self.cz - rhs.cz, self.hz - rhs.hz)
}
}
impl Mul<Jzczhz> for Jzczhz {
type Output = Jzczhz;
#[inline]
fn mul(self, rhs: Jzczhz) -> Self::Output {
Jzczhz::new(self.jz * rhs.jz, self.cz * rhs.cz, self.hz * rhs.hz)
}
}
impl Div<Jzczhz> for Jzczhz {
type Output = Jzczhz;
#[inline]
fn div(self, rhs: Jzczhz) -> Self::Output {
Jzczhz::new(self.jz / rhs.jz, self.cz / rhs.cz, self.hz / rhs.hz)
}
}
impl AddAssign<Jzczhz> for Jzczhz {
#[inline]
fn add_assign(&mut self, rhs: Jzczhz) {
self.jz += rhs.jz;
self.cz += rhs.cz;
self.hz += rhs.hz;
}
}
impl SubAssign<Jzczhz> for Jzczhz {
#[inline]
fn sub_assign(&mut self, rhs: Jzczhz) {
self.jz -= rhs.jz;
self.cz -= rhs.cz;
self.hz -= rhs.hz;
}
}
impl MulAssign<Jzczhz> for Jzczhz {
#[inline]
fn mul_assign(&mut self, rhs: Jzczhz) {
self.jz *= rhs.jz;
self.cz *= rhs.cz;
self.hz *= rhs.hz;
}
}
impl DivAssign<Jzczhz> for Jzczhz {
#[inline]
fn div_assign(&mut self, rhs: Jzczhz) {
self.jz /= rhs.jz;
self.cz /= rhs.cz;
self.hz /= rhs.hz;
}
}
impl AddAssign<f32> for Jzczhz {
#[inline]
fn add_assign(&mut self, rhs: f32) {
self.jz += rhs;
self.cz += rhs;
self.hz += rhs;
}
}
impl SubAssign<f32> for Jzczhz {
#[inline]
fn sub_assign(&mut self, rhs: f32) {
self.jz -= rhs;
self.cz -= rhs;
self.hz -= rhs;
}
}
impl MulAssign<f32> for Jzczhz {
#[inline]
fn mul_assign(&mut self, rhs: f32) {
self.jz *= rhs;
self.cz *= rhs;
self.hz *= rhs;
}
}
impl DivAssign<f32> for Jzczhz {
#[inline]
fn div_assign(&mut self, rhs: f32) {
self.jz /= rhs;
self.cz /= rhs;
self.hz /= rhs;
}
}
impl Jzczhz {
#[inline]
pub fn sqrt(&self) -> Jzczhz {
Jzczhz::new(self.jz.sqrt(), self.cz.sqrt(), self.hz.sqrt())
}
#[inline]
pub fn cbrt(&self) -> Jzczhz {
Jzczhz::new(f_cbrtf(self.jz), f_cbrtf(self.cz), f_cbrtf(self.hz))
}
}
impl Pow<f32> for Jzczhz {
type Output = Jzczhz;
#[inline]
fn pow(self, rhs: f32) -> Self::Output {
Jzczhz::new(
f_powf(self.jz, rhs),
f_powf(self.cz, rhs),
f_powf(self.hz, rhs),
)
}
}
impl Pow<Jzczhz> for Jzczhz {
type Output = Jzczhz;
#[inline]
fn pow(self, rhs: Jzczhz) -> Self::Output {
Jzczhz::new(
f_powf(self.jz, rhs.jz),
f_powf(self.cz, self.cz),
f_powf(self.hz, self.hz),
)
}
}
impl Neg for Jzczhz {
type Output = Jzczhz;
#[inline]
fn neg(self) -> Self::Output {
Jzczhz::new(-self.jz, -self.cz, -self.hz)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn jzczhz_round() {
let xyz = Xyz::new(0.5, 0.4, 0.3);
let jzczhz = Jzczhz::from_xyz_with_display_luminance(xyz, 253.);
let old_xyz = jzczhz.to_xyz(253f32);
assert!(
(xyz.x - old_xyz.x).abs() <= 1e-3,
"{:?} != {:?}",
xyz,
old_xyz
);
assert!(
(xyz.y - old_xyz.y).abs() <= 1e-3,
"{:?} != {:?}",
xyz,
old_xyz
);
assert!(
(xyz.z - old_xyz.z).abs() <= 1e-3,
"{:?} != {:?}",
xyz,
old_xyz
);
}
}

242
vendor/moxcms/src/lab.rs vendored Normal file
View File

@@ -0,0 +1,242 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::mlaf::{fmla, mlaf};
use crate::{Chromaticity, LCh, Xyz};
use pxfm::f_cbrtf;
/// Holds CIE LAB values
#[repr(C)]
#[derive(Copy, Clone, Debug, Default, PartialOrd, PartialEq)]
pub struct Lab {
/// `l`: lightness component (0 to 100)
pub l: f32,
/// `a`: green (negative) and red (positive) component.
pub a: f32,
/// `b`: blue (negative) and yellow (positive) component
pub b: f32,
}
impl Lab {
/// Create a new CIELAB color.
///
/// # Arguments
///
/// * `l`: lightness component (0 to 100).
/// * `a`: green (negative) and red (positive) component.
/// * `b`: blue (negative) and yellow (positive) component.
#[inline]
pub const fn new(l: f32, a: f32, b: f32) -> Self {
Self { l, a, b }
}
}
#[inline(always)]
const fn f_1(t: f32) -> f32 {
if t <= 24.0 / 116.0 {
(108.0 / 841.0) * (t - 16.0 / 116.0)
} else {
t * t * t
}
}
#[inline(always)]
fn f(t: f32) -> f32 {
if t <= 24. / 116. * (24. / 116.) * (24. / 116.) {
(841. / 108. * t) + 16. / 116.
} else {
f_cbrtf(t)
}
}
impl Lab {
/// Converts to CIE Lab from CIE XYZ for PCS encoding
#[inline]
pub fn from_pcs_xyz(xyz: Xyz) -> Self {
const WP: Xyz = Chromaticity::D50.to_xyz();
let device_x = (xyz.x as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.x as f64) as f32;
let device_y = (xyz.y as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.y as f64) as f32;
let device_z = (xyz.z as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.z as f64) as f32;
let fx = f(device_x);
let fy = f(device_y);
let fz = f(device_z);
let lb = mlaf(-16.0, 116.0, fy);
let a = 500.0 * (fx - fy);
let b = 200.0 * (fy - fz);
let l = lb / 100.0;
let a = (a + 128.0) / 255.0;
let b = (b + 128.0) / 255.0;
Self::new(l, a, b)
}
/// Converts to CIE Lab from CIE XYZ
#[inline]
pub fn from_xyz(xyz: Xyz) -> Self {
const WP: Xyz = Chromaticity::D50.to_xyz();
let device_x = (xyz.x as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.x as f64) as f32;
let device_y = (xyz.y as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.y as f64) as f32;
let device_z = (xyz.z as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WP.z as f64) as f32;
let fx = f(device_x);
let fy = f(device_y);
let fz = f(device_z);
let lb = mlaf(-16.0, 116.0, fy);
let a = 500.0 * (fx - fy);
let b = 200.0 * (fy - fz);
Self::new(lb, a, b)
}
/// Converts CIE [Lab] into CIE [Xyz] for PCS encoding
#[inline]
pub fn to_pcs_xyz(self) -> Xyz {
let device_l = self.l * 100.0;
let device_a = fmla(self.a, 255.0, -128.0);
let device_b = fmla(self.b, 255.0, -128.0);
let y = (device_l + 16.0) / 116.0;
const WP: Xyz = Chromaticity::D50.to_xyz();
let x = f_1(mlaf(y, 0.002, device_a)) * WP.x;
let y1 = f_1(y) * WP.y;
let z = f_1(mlaf(y, -0.005, device_b)) * WP.z;
let x = (x as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32;
let y = (y1 as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32;
let z = (z as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32;
Xyz::new(x, y, z)
}
/// Converts CIE [Lab] into CIE [Xyz]
#[inline]
pub fn to_xyz(self) -> Xyz {
let device_l = self.l;
let device_a = self.a;
let device_b = self.b;
let y = (device_l + 16.0) / 116.0;
const WP: Xyz = Chromaticity::D50.to_xyz();
let x = f_1(mlaf(y, 0.002, device_a)) * WP.x;
let y1 = f_1(y) * WP.y;
let z = f_1(mlaf(y, -0.005, device_b)) * WP.z;
let x = (x as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32;
let y = (y1 as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32;
let z = (z as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32;
Xyz::new(x, y, z)
}
/// Desaturates out of gamut PCS encoded LAB
pub fn desaturate_pcs(self) -> Lab {
if self.l < 0. {
return Lab::new(0., 0., 0.);
}
let mut new_lab = self;
if new_lab.l > 1. {
new_lab.l = 1.;
}
let amax = 1.0;
let amin = 0.0;
let bmin = 0.0;
let bmax = 1.0;
if self.a < amin || self.a > amax || self.b < bmin || self.b > bmax {
if self.a == 0.0 {
// Is hue exactly 90?
// atan will not work, so clamp here
new_lab.b = if new_lab.b < bmin { bmin } else { bmax };
return Lab::new(self.l, self.a, self.b);
}
let lch = LCh::from_lab(new_lab);
let slope = new_lab.b / new_lab.a;
let h = lch.h * (180.0 / std::f32::consts::PI);
// There are 4 zones
if (0. ..45.).contains(&h) || (315. ..=360.).contains(&h) {
// clip by amax
new_lab.a = amax;
new_lab.b = amax * slope;
} else if (45. ..135.).contains(&h) {
// clip by bmax
new_lab.b = bmax;
new_lab.a = bmax / slope;
} else if (135. ..225.).contains(&h) {
// clip by amin
new_lab.a = amin;
new_lab.b = amin * slope;
} else if (225. ..315.).contains(&h) {
// clip by bmin
new_lab.b = bmin;
new_lab.a = bmin / slope;
}
}
new_lab
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn round_trip() {
let xyz = Xyz::new(0.1, 0.2, 0.3);
let lab = Lab::from_xyz(xyz);
let rolled_back = lab.to_xyz();
let dx = (xyz.x - rolled_back.x).abs();
let dy = (xyz.y - rolled_back.y).abs();
let dz = (xyz.z - rolled_back.z).abs();
assert!(dx < 1e-5);
assert!(dy < 1e-5);
assert!(dz < 1e-5);
}
#[test]
fn round_pcs_trip() {
let xyz = Xyz::new(0.1, 0.2, 0.3);
let lab = Lab::from_pcs_xyz(xyz);
let rolled_back = lab.to_pcs_xyz();
let dx = (xyz.x - rolled_back.x).abs();
let dy = (xyz.y - rolled_back.y).abs();
let dz = (xyz.z - rolled_back.z).abs();
assert!(dx < 1e-5);
assert!(dy < 1e-5);
assert!(dz < 1e-5);
}
}

133
vendor/moxcms/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,133 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(clippy::manual_clamp, clippy::excessive_precision)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![deny(unreachable_pub)]
#![deny(
clippy::print_stdout,
clippy::print_stderr,
clippy::print_literal,
clippy::print_in_format_impl
)]
#![allow(stable_features)]
#![cfg_attr(
not(any(feature = "avx", feature = "sse", feature = "avx512", feature = "neon")),
forbid(unsafe_code)
)]
#![cfg_attr(all(feature = "avx512", target_arch = "x86_64"), feature(cfg_version))]
#![cfg_attr(
all(feature = "avx512", target_arch = "x86_64"),
feature(avx512_target_feature)
)]
#![cfg_attr(
all(feature = "avx512", target_arch = "x86_64"),
feature(stdarch_x86_avx512)
)]
mod chad;
mod cicp;
mod conversions;
mod dat;
mod defaults;
mod err;
mod gamma;
mod gamut;
mod ictcp;
mod jzazbz;
mod jzczhz;
mod lab;
mod luv;
/// One of main intent is to provide fast math available in const context
/// ULP most of the methods <= 0.5
mod math;
mod matrix;
mod mlaf;
mod nd_array;
mod oklab;
mod oklch;
mod profile;
mod reader;
mod rgb;
mod safe_math;
mod tag;
mod transform;
mod trc;
mod writer;
mod yrg;
// Simple math analysis module
mod chromaticity;
mod dt_ucs;
mod helpers;
mod lut_hint;
mod matan;
mod srlab2;
mod xyy;
pub use chad::{
adapt_to_d50, adapt_to_d50_d, adapt_to_illuminant, adapt_to_illuminant_d,
adapt_to_illuminant_xyz, adapt_to_illuminant_xyz_d, adaption_matrix, adaption_matrix_d,
};
pub use chromaticity::Chromaticity;
pub use cicp::{CicpColorPrimaries, ColorPrimaries, MatrixCoefficients, TransferCharacteristics};
pub use dat::ColorDateTime;
pub use defaults::{
HLG_LUT_TABLE, PQ_LUT_TABLE, WHITE_POINT_D50, WHITE_POINT_D60, WHITE_POINT_D65,
WHITE_POINT_DCI_P3,
};
pub use dt_ucs::{DtUchHcb, DtUchHsb, DtUchJch};
pub use err::{CmsError, MalformedSize};
pub use gamut::filmlike_clip;
pub use ictcp::ICtCp;
pub use jzazbz::Jzazbz;
pub use jzczhz::Jzczhz;
pub use lab::Lab;
pub use luv::{LCh, Luv};
pub use math::rounding_div_ceil;
pub use matrix::{
BT2020_MATRIX, DISPLAY_P3_MATRIX, Matrix3, Matrix3d, Matrix3f, Matrix4f, SRGB_MATRIX, Vector3,
Vector3d, Vector3f, Vector3i, Vector3u, Vector4, Vector4d, Vector4f, Vector4i, Xyz, Xyzd,
};
pub use nd_array::{Cube, Hypercube};
pub use oklab::Oklab;
pub use oklch::Oklch;
pub use profile::{
CicpProfile, ColorProfile, DataColorSpace, DescriptionString, LocalizableString, LutDataType,
LutMultidimensionalType, LutStore, LutType, LutWarehouse, Measurement, MeasurementGeometry,
ParsingOptions, ProfileClass, ProfileSignature, ProfileText, ProfileVersion, RenderingIntent,
StandardIlluminant, StandardObserver, TechnologySignatures, ViewingConditions,
};
pub use rgb::{FusedExp, FusedExp2, FusedExp10, FusedLog, FusedLog2, FusedLog10, FusedPow, Rgb};
pub use srlab2::Srlab2;
pub use transform::{
BarycentricWeightScale, InPlaceStage, InterpolationMethod, Layout, PointeeSizeExpressible,
Stage, Transform8BitExecutor, Transform16BitExecutor, TransformExecutor,
TransformF32BitExecutor, TransformF64BitExecutor, TransformOptions,
};
pub use trc::{GammaLutInterpolate, ToneCurveEvaluator, ToneReprCurve, curve_from_gamma};
pub use xyy::{XyY, XyYRepresentable};
pub use yrg::{Ych, Yrg, cie_y_1931_to_cie_y_2006};

106
vendor/moxcms/src/lut_hint.rs vendored Normal file
View File

@@ -0,0 +1,106 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::LutWarehouse;
impl LutWarehouse {
/// Method tests if mathematical fusion on LUT table is allowed.
/// If it's not, full brute-force pass in [Katana] is required.
pub(crate) fn is_katana_required(&self) -> bool {
match self {
LutWarehouse::Lut(lut) => {
let input_entries = lut.num_input_channels as usize;
let output_entries = lut.num_output_channels as usize;
for i in 0..input_entries {
if lut.input_table.is_degenerated(input_entries, i) {
return true;
}
if !lut.input_table.is_monotonic(input_entries, i) {
return true;
}
if lut.input_table.have_discontinuities(input_entries, i) {
return true;
}
}
for i in 0..output_entries {
if lut.output_table.is_degenerated(output_entries, i) {
return true;
}
if !lut.output_table.is_monotonic(output_entries, i) {
return true;
}
if lut.output_table.have_discontinuities(output_entries, i) {
return true;
}
}
false
}
LutWarehouse::Multidimensional(mab) => {
for curve in mab.a_curves.iter() {
if curve.is_degenerated() {
return true;
}
if !curve.is_monotonic() {
return true;
}
if curve.have_discontinuities() {
return true;
}
}
for curve in mab.m_curves.iter() {
if curve.is_degenerated() {
return true;
}
if !curve.is_monotonic() {
return true;
}
if curve.have_discontinuities() {
return true;
}
}
for curve in mab.b_curves.iter() {
if curve.is_degenerated() {
return true;
}
if !curve.is_monotonic() {
return true;
}
if curve.have_discontinuities() {
return true;
}
}
false
}
}
}
}

Some files were not shown because too many files have changed in this diff Show More