Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

1
vendor/moxcms/.cargo-checksum.json vendored Normal file

File diff suppressed because one or more lines are too long

183
vendor/moxcms/Cargo.lock generated vendored Normal file
View File

@@ -0,0 +1,183 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "bitflags"
version = "2.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d"
[[package]]
name = "cfg-if"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
[[package]]
name = "getrandom"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
dependencies = [
"cfg-if",
"libc",
"r-efi",
"wasi",
]
[[package]]
name = "libc"
version = "0.2.175"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
[[package]]
name = "moxcms"
version = "0.7.5"
dependencies = [
"num-traits",
"pxfm",
"rand",
]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "ppv-lite86"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro2"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [
"unicode-ident",
]
[[package]]
name = "pxfm"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "069f3b41a7e17d18b8af925e597c8b2430591341415f98c5e1ecb2a245cea7ae"
dependencies = [
"num-traits",
]
[[package]]
name = "quote"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "r-efi"
version = "5.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
[[package]]
name = "rand"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
dependencies = [
"getrandom",
]
[[package]]
name = "syn"
version = "2.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "wasi"
version = "0.14.2+wasi-0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wit-bindgen-rt"
version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [
"bitflags",
]
[[package]]
name = "zerocopy"
version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

79
vendor/moxcms/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,79 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2024"
rust-version = "1.85.0"
name = "moxcms"
version = "0.7.5"
authors = ["Radzivon Bartoshyk"]
build = false
exclude = [
"*.jpg",
"../../assets/*",
"*.png",
"*.icc",
"./assets/*",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Simple Color Management in Rust"
homepage = "https://github.com/awxkee/moxcms"
documentation = "https://github.com/awxkee/moxcms"
readme = "README.md"
keywords = [
"icc",
"cms",
"color",
"cmyk",
]
categories = ["multimedia::images"]
license = "BSD-3-Clause OR Apache-2.0"
repository = "https://github.com/awxkee/moxcms.git"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
]
[features]
avx = []
avx512 = []
default = [
"avx",
"sse",
"neon",
]
neon = []
options = []
sse = []
[lib]
name = "moxcms"
path = "src/lib.rs"
[dependencies.num-traits]
version = "0.2"
[dependencies.pxfm]
version = "^0.1.1"
[dev-dependencies.rand]
version = "0.9"
[profile.profiling]
debug = 2
inherits = "release"

201
vendor/moxcms/LICENSE-APACHE.md vendored Normal file
View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2024 Radzivon Bartoshyk
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

26
vendor/moxcms/LICENSE.md vendored Normal file
View File

@@ -0,0 +1,26 @@
Copyright (c) Radzivon Bartoshyk. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

74
vendor/moxcms/README.md vendored Normal file
View File

@@ -0,0 +1,74 @@
# Rust ICC Management
Fast and safe conversion between ICC profiles; in pure Rust.
Supports CMYK⬌RGBX, RGBX⬌RGBX, RGBX⬌GRAY, LAB⬌RGBX and CMYK⬌LAB, GRAY⬌RGB, any 3/4 color profiles to RGB and vice versa. Also supports almost any to any Display Class ICC profiles up to 16 inks.
## Example
```rust
let f_str = "./assets/dci_p3_profile.jpeg";
let file = File::open(f_str).expect("Failed to open file");
let img = image::ImageReader::open(f_str).unwrap().decode().unwrap();
let rgb = img.to_rgb8();
let mut decoder = JpegDecoder::new(BufReader::new(file)).unwrap();
let icc = decoder.icc_profile().unwrap().unwrap();
let color_profile = ColorProfile::new_from_slice(&icc).unwrap();
let dest_profile = ColorProfile::new_srgb();
let transform = color_profile
.create_transform_8bit(&dest_profile, Layout::Rgb8, TransformOptions::default())
.unwrap();
let mut dst = vec![0u8; rgb.len()];
for (src, dst) in rgb
.chunks_exact(img.width() as usize * 3)
.zip(dst.chunks_exact_mut(img.dimensions().0 as usize * 3))
{
transform
.transform(
&src[..img.dimensions().0 as usize * 3],
&mut dst[..img.dimensions().0 as usize * 3],
)
.unwrap();
}
image::save_buffer(
"v1.jpg",
&dst,
img.dimensions().0,
img.dimensions().1,
image::ExtendedColorType::Rgb8,
)
.unwrap();
```
## Benchmarks
### ICC Transform 8-Bit
Tests were ran with a 1997×1331 resolution image.
| Conversion | time(NEON) | Time(AVX2) |
|--------------------|:----------:|:----------:|
| moxcms RGB⮕RGB | 2.68ms | 4.52ms |
| moxcms LUT RGB⮕RGB | 6.03ms | 12.43ms |
| moxcms RGBA⮕RGBA | 2.96ms | 4.83ms |
| moxcms CMYK⮕RGBA | 9.74ms | 21.65ms |
| lcms2 RGB⮕RGB | 13.1ms | 27.73ms |
| lcms2 LUT RGB⮕RGB | 27.60ms | 58.26ms |
| lcms2 RGBA⮕RGBA | 21.97ms | 35.70ms |
| lcms2 CMYK⮕RGBA | 39.71ms | 79.40ms |
| qcms RGB⮕RGB | 6.47ms | 4.59ms |
| qcms LUT RGB⮕RGB | 26.72ms | 60.80ms |
| qcms RGBA⮕RGBA | 6.83ms | 4.99ms |
| qcms CMYK⮕RGBA | 25.97ms | 61.54ms |
## License
This project is licensed under either of
- BSD-3-Clause License (see [LICENSE](LICENSE.md))
- Apache License, Version 2.0 (see [LICENSE](LICENSE-APACHE.md))
at your option.

172
vendor/moxcms/src/chad.rs vendored Normal file
View File

@@ -0,0 +1,172 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::matrix::{Matrix3f, Vector3f, Xyz};
use crate::{Chromaticity, Matrix3d, Vector3d, XyY};
pub(crate) const BRADFORD_D: Matrix3d = Matrix3d {
v: [
[0.8951, 0.2664, -0.1614],
[-0.7502, 1.7135, 0.0367],
[0.0389, -0.0685, 1.0296],
],
};
pub(crate) const BRADFORD_F: Matrix3f = BRADFORD_D.to_f32();
#[inline]
pub(crate) const fn compute_chromatic_adaption(
source_white_point: Xyz,
dest_white_point: Xyz,
chad: Matrix3f,
) -> Matrix3f {
let cone_source_xyz = Vector3f {
v: [
source_white_point.x,
source_white_point.y,
source_white_point.z,
],
};
let cone_source_rgb = chad.mul_vector(cone_source_xyz);
let cone_dest_xyz = Vector3f {
v: [dest_white_point.x, dest_white_point.y, dest_white_point.z],
};
let cone_dest_rgb = chad.mul_vector(cone_dest_xyz);
let cone = Matrix3f {
v: [
[cone_dest_rgb.v[0] / cone_source_rgb.v[0], 0., 0.],
[0., cone_dest_rgb.v[1] / cone_source_rgb.v[1], 0.],
[0., 0., cone_dest_rgb.v[2] / cone_source_rgb.v[2]],
],
};
let chad_inv = chad.inverse();
let p0 = cone.mat_mul_const(chad);
chad_inv.mat_mul_const(p0)
}
#[inline]
pub(crate) const fn compute_chromatic_adaption_d(
source_white_point: Xyz,
dest_white_point: Xyz,
chad: Matrix3d,
) -> Matrix3d {
let cone_source_xyz = Vector3d {
v: [
source_white_point.x as f64,
source_white_point.y as f64,
source_white_point.z as f64,
],
};
let cone_source_rgb = chad.mul_vector(cone_source_xyz);
let cone_dest_xyz = Vector3d {
v: [
dest_white_point.x as f64,
dest_white_point.y as f64,
dest_white_point.z as f64,
],
};
let cone_dest_rgb = chad.mul_vector(cone_dest_xyz);
let cone = Matrix3d {
v: [
[cone_dest_rgb.v[0] / cone_source_rgb.v[0], 0., 0.],
[0., cone_dest_rgb.v[1] / cone_source_rgb.v[1], 0.],
[0., 0., cone_dest_rgb.v[2] / cone_source_rgb.v[2]],
],
};
let chad_inv = chad.inverse();
let p0 = cone.mat_mul_const(chad);
chad_inv.mat_mul_const(p0)
}
pub const fn adaption_matrix(source_illumination: Xyz, target_illumination: Xyz) -> Matrix3f {
compute_chromatic_adaption(source_illumination, target_illumination, BRADFORD_F)
}
pub const fn adaption_matrix_d(source_illumination: Xyz, target_illumination: Xyz) -> Matrix3d {
compute_chromatic_adaption_d(source_illumination, target_illumination, BRADFORD_D)
}
pub const fn adapt_to_d50(r: Matrix3f, source_white_pt: XyY) -> Matrix3f {
adapt_to_illuminant(r, source_white_pt, Chromaticity::D50.to_xyz())
}
pub const fn adapt_to_d50_d(r: Matrix3d, source_white_pt: XyY) -> Matrix3d {
adapt_to_illuminant_d(r, source_white_pt, Chromaticity::D50.to_xyz())
}
pub const fn adapt_to_illuminant(
r: Matrix3f,
source_white_pt: XyY,
illuminant_xyz: Xyz,
) -> Matrix3f {
let bradford = adaption_matrix(source_white_pt.to_xyz(), illuminant_xyz);
bradford.mat_mul_const(r)
}
pub const fn adapt_to_illuminant_d(
r: Matrix3d,
source_white_pt: XyY,
illuminant_xyz: Xyz,
) -> Matrix3d {
let bradford = adaption_matrix_d(source_white_pt.to_xyz(), illuminant_xyz);
bradford.mat_mul_const(r)
}
pub const fn adapt_to_illuminant_xyz(
r: Matrix3f,
source_white_pt: Xyz,
illuminant_xyz: Xyz,
) -> Matrix3f {
if source_white_pt.y == 0.0 {
return r;
}
let bradford = adaption_matrix(source_white_pt, illuminant_xyz);
bradford.mat_mul_const(r)
}
pub const fn adapt_to_illuminant_xyz_d(
r: Matrix3d,
source_white_pt: Xyz,
illuminant_xyz: Xyz,
) -> Matrix3d {
if source_white_pt.y == 0.0 {
return r;
}
let bradford = adaption_matrix_d(source_white_pt, illuminant_xyz);
bradford.mat_mul_const(r)
}

143
vendor/moxcms/src/chromaticity.rs vendored Normal file
View File

@@ -0,0 +1,143 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::{CmsError, XyY, XyYRepresentable, Xyz, Xyzd};
#[derive(Clone, Debug, Copy)]
#[repr(C)]
pub struct Chromaticity {
pub x: f32,
pub y: f32,
}
impl Chromaticity {
#[inline]
pub const fn new(x: f32, y: f32) -> Self {
Self { x, y }
}
/// Converts this chromaticity (`x`, `y`) to a tristimulus [`Xyz`] value,
/// normalized such that `y = 1.0`.
#[inline]
pub const fn to_xyz(&self) -> Xyz {
let reciprocal = if self.y != 0. { 1. / self.y } else { 0. };
Xyz {
x: self.x * reciprocal,
y: 1f32,
z: (1f32 - self.x - self.y) * reciprocal,
}
}
/// Get the color representation with component sum `1`.
///
/// In contrast to the XYZ representation defined through setting `Y` to a known
/// value (such as `1` in [`Self::to_xyz`]) this representation can be uniquely
/// derived from the `xy` coordinates with no ambiguities. It is scaled from the
/// original XYZ color by diving by `X + Y + Z`. Note that, in particular, this
/// method is well-defined even if the original color had pure chromamatic
/// information with no luminance (Y = `0`) and will preserve that information,
/// whereas [`Self::to_xyz`] is ill-defined and returns an incorrect value.
#[inline]
pub const fn to_scaled_xyzd(&self) -> Xyzd {
let z = 1.0 - self.x as f64 - self.y as f64;
Xyzd::new(self.x as f64, self.y as f64, z)
}
/// Get the color representation with component sum `1`.
///
/// In contrast to the XYZ representation defined through setting `Y` to a known
/// value (such as `1` in [`Self::to_xyz`]) this representation can be uniquely
/// derived from the `xy` coordinates with no ambiguities. It is scaled from the
/// original XYZ color by diving by `X + Y + Z`. Note that, in particular, this
/// method is well-defined even if the original color had pure chromamatic
/// information with no luminance (Y = `0`) and will preserve that information,
/// whereas [`Self::to_xyz`] is ill-defined and returns an incorrect value.
#[inline]
pub const fn to_scaled_xyz(&self) -> Xyz {
let z = 1.0 - self.x - self.y;
Xyz::new(self.x, self.y, z)
}
#[inline]
pub const fn to_xyzd(&self) -> Xyzd {
let reciprocal = if self.y != 0. { 1. / self.y } else { 0. };
Xyzd {
x: self.x as f64 * reciprocal as f64,
y: 1f64,
z: (1f64 - self.x as f64 - self.y as f64) * reciprocal as f64,
}
}
#[inline]
pub const fn to_xyyb(&self) -> XyY {
XyY {
x: self.x as f64,
y: self.y as f64,
yb: 1.,
}
}
pub const D65: Chromaticity = Chromaticity {
x: 0.31272,
y: 0.32903,
};
pub const D50: Chromaticity = Chromaticity {
x: 0.34567,
y: 0.35850,
};
}
impl XyYRepresentable for Chromaticity {
fn to_xyy(self) -> XyY {
self.to_xyyb()
}
}
impl TryFrom<Xyz> for Chromaticity {
type Error = CmsError;
#[inline]
fn try_from(xyz: Xyz) -> Result<Self, Self::Error> {
let sum = xyz.x + xyz.y + xyz.z;
// Avoid division by zero or invalid XYZ values
if sum == 0.0 {
return Err(CmsError::DivisionByZero);
}
let rec = 1f32 / sum;
let chromaticity_x = xyz.x * rec;
let chromaticity_y = xyz.y * rec;
Ok(Chromaticity {
x: chromaticity_x,
y: chromaticity_y,
})
}
}

642
vendor/moxcms/src/cicp.rs vendored Normal file
View File

@@ -0,0 +1,642 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::gamma::{
bt1361_to_linear, hlg_to_linear, iec61966_to_linear, log100_sqrt10_to_linear, log100_to_linear,
pq_to_linear, smpte240_to_linear, smpte428_to_linear,
};
use crate::{
Chromaticity, ColorProfile, Matrix3d, Matrix3f, XyYRepresentable,
err::CmsError,
trc::{ToneReprCurve, build_trc_table, curve_from_gamma},
};
use std::convert::TryFrom;
/// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 2
/// Values 0, 3, 1321, 23255 are all reserved so all map to the same variant
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum CicpColorPrimaries {
/// For future use by ITU-T | ISO/IEC
Reserved,
/// Rec. ITU-R BT.709-6<br />
/// Rec. ITU-R BT.1361-0 conventional colour gamut system and extended colour gamut system (historical)<br />
/// IEC 61966-2-1 sRGB or sYCC IEC 61966-2-4<br />
/// Society of Motion Picture and Television Engineers (MPTE) RP 177 (1993) Annex B<br />
Bt709 = 1,
/// Unspecified<br />
/// Image characteristics are unknown or are determined by the application.
Unspecified = 2,
/// Rec. ITU-R BT.470-6 System M (historical)<br />
/// United States National Television System Committee 1953 Recommendation for transmission standards for color television<br />
/// United States Federal Communications Commission (2003) Title 47 Code of Federal Regulations 73.682 (a) (20)<br />
Bt470M = 4,
/// Rec. ITU-R BT.470-6 System B, G (historical) Rec. ITU-R BT.601-7 625<br />
/// Rec. ITU-R BT.1358-0 625 (historical)<br />
/// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM<br />
Bt470Bg = 5,
/// Rec. ITU-R BT.601-7 525<br />
/// Rec. ITU-R BT.1358-1 525 or 625 (historical) Rec. ITU-R BT.1700-0 NTSC<br />
/// SMPTE 170M (2004)<br />
/// (functionally the same as the value 7)<br />
Bt601 = 6,
/// SMPTE 240M (1999) (historical) (functionally the same as the value 6)<br />
Smpte240 = 7,
/// Generic film (colour filters using Illuminant C)<br />
GenericFilm = 8,
/// Rec. ITU-R BT.2020-2<br />
/// Rec. ITU-R BT.2100-0<br />
Bt2020 = 9,
/// SMPTE ST 428-1<br />
/// (CIE 1931 XYZ as in ISO 11664-1)<br />
Xyz = 10,
/// SMPTE RP 431-2 (2011)<br />
Smpte431 = 11,
/// SMPTE EG 432-1 (2010)<br />
Smpte432 = 12,
/// EBU Tech. 3213-E (1975)<br />
Ebu3213 = 22,
}
impl TryFrom<u8> for CicpColorPrimaries {
type Error = CmsError;
#[allow(unreachable_patterns)]
fn try_from(value: u8) -> Result<Self, Self::Error> {
match value {
// Values 0, 3, 1321, 23255 are all reserved so all map to the
// same variant.
0 | 3 | 13..=21 | 23..=255 => Ok(Self::Reserved),
1 => Ok(Self::Bt709),
2 => Ok(Self::Unspecified),
4 => Ok(Self::Bt470M),
5 => Ok(Self::Bt470Bg),
6 => Ok(Self::Bt601),
7 => Ok(Self::Smpte240),
8 => Ok(Self::GenericFilm),
9 => Ok(Self::Bt2020),
10 => Ok(Self::Xyz),
11 => Ok(Self::Smpte431),
12 => Ok(Self::Smpte432),
22 => Ok(Self::Ebu3213),
_ => Err(CmsError::InvalidCicp),
}
}
}
#[derive(Clone, Copy, Debug)]
#[repr(C)]
pub struct ColorPrimaries {
pub red: Chromaticity,
pub green: Chromaticity,
pub blue: Chromaticity,
}
/// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 2.
impl ColorPrimaries {
/// [ACEScg](https://en.wikipedia.org/wiki/Academy_Color_Encoding_System#ACEScg).
pub const ACES_CG: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.713, y: 0.293 },
green: Chromaticity { x: 0.165, y: 0.830 },
blue: Chromaticity { x: 0.128, y: 0.044 },
};
/// [ACES2065-1](https://en.wikipedia.org/wiki/Academy_Color_Encoding_System#ACES2065-1).
pub const ACES_2065_1: ColorPrimaries = ColorPrimaries {
red: Chromaticity {
x: 0.7347,
y: 0.2653,
},
green: Chromaticity {
x: 0.0000,
y: 1.0000,
},
blue: Chromaticity {
x: 0.0001,
y: -0.0770,
},
};
/// [Adobe RGB](https://en.wikipedia.org/wiki/Adobe_RGB_color_space) (1998).
pub const ADOBE_RGB: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.64, y: 0.33 },
green: Chromaticity { x: 0.21, y: 0.71 },
blue: Chromaticity { x: 0.15, y: 0.06 },
};
/// [DCI P3](https://en.wikipedia.org/wiki/DCI-P3#DCI_P3).
///
/// This is the same as [`DISPLAY_P3`](Self::DISPLAY_P3),
/// [`SMPTE_431`](Self::SMPTE_431) and [`SMPTE_432`](Self::SMPTE_432).
pub const DCI_P3: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.680, y: 0.320 },
green: Chromaticity { x: 0.265, y: 0.690 },
blue: Chromaticity { x: 0.150, y: 0.060 },
};
/// [Diplay P3](https://en.wikipedia.org/wiki/DCI-P3#Display_P3).
///
/// This is the same as [`DCI_P3`](Self::DCI_P3),
/// [`SMPTE_431`](Self::SMPTE_431) and [`SMPTE_432`](Self::SMPTE_432).
pub const DISPLAY_P3: ColorPrimaries = Self::DCI_P3;
/// SMPTE RP 431-2 (2011).
///
/// This is the same as [`DCI_P3`](Self::DCI_P3),
/// [`DISPLAY_P3`](Self::DISPLAY_P3) and [`SMPTE_432`](Self::SMPTE_432).
pub const SMPTE_431: ColorPrimaries = Self::DCI_P3;
/// SMPTE EG 432-1 (2010).
///
/// This is the same as [`DCI_P3`](Self::DCI_P3),
/// [`DISPLAY_P3`](Self::DISPLAY_P3) and [`SMPTE_431`](Self::SMPTE_431).
pub const SMPTE_432: ColorPrimaries = Self::DCI_P3;
/// [ProPhoto RGB](https://en.wikipedia.org/wiki/ProPhoto_RGB_color_space).
pub const PRO_PHOTO_RGB: ColorPrimaries = ColorPrimaries {
red: Chromaticity {
x: 0.734699,
y: 0.265301,
},
green: Chromaticity {
x: 0.159597,
y: 0.840403,
},
blue: Chromaticity {
x: 0.036598,
y: 0.000105,
},
};
/// Rec. ITU-R BT.709-6
///
/// Rec. ITU-R BT.1361-0 conventional colour gamut system and extended
/// colour gamut system (historical).
///
/// IEC 61966-2-1 sRGB or sYCC IEC 61966-2-4).
///
/// Society of Motion Picture and Television Engineers (MPTE) RP 177 (1993) Annex B.
pub const BT_709: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.64, y: 0.33 },
green: Chromaticity { x: 0.30, y: 0.60 },
blue: Chromaticity { x: 0.15, y: 0.06 },
};
/// Rec. ITU-R BT.470-6 System M (historical).
///
/// United States National Television System Committee 1953 Recommendation
/// for transmission standards for color television.
///
/// United States Federal Communications Commission (2003) Title 47 Code of
/// Federal Regulations 73.682 (a) (20).
pub const BT_470M: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.67, y: 0.33 },
green: Chromaticity { x: 0.21, y: 0.71 },
blue: Chromaticity { x: 0.14, y: 0.08 },
};
/// Rec. ITU-R BT.470-6 System B, G (historical) Rec. ITU-R BT.601-7 625.
///
/// Rec. ITU-R BT.1358-0 625 (historical).
/// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM.
pub const BT_470BG: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.64, y: 0.33 },
green: Chromaticity { x: 0.29, y: 0.60 },
blue: Chromaticity { x: 0.15, y: 0.06 },
};
/// Rec. ITU-R BT.601-7 525.
///
/// Rec. ITU-R BT.1358-1 525 or 625 (historical) Rec. ITU-R BT.1700-0 NTSC.
///
/// SMPTE 170M (2004) (functionally the same as the [`SMPTE_240`](Self::SMPTE_240)).
pub const BT_601: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.630, y: 0.340 },
green: Chromaticity { x: 0.310, y: 0.595 },
blue: Chromaticity { x: 0.155, y: 0.070 },
};
/// SMPTE 240M (1999) (historical) (functionally the same as [`BT_601`](Self::BT_601)).
pub const SMPTE_240: ColorPrimaries = Self::BT_601;
/// Generic film (colour filters using Illuminant C).
pub const GENERIC_FILM: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.681, y: 0.319 },
green: Chromaticity { x: 0.243, y: 0.692 },
blue: Chromaticity { x: 0.145, y: 0.049 },
};
/// Rec. ITU-R BT.2020-2.
///
/// Rec. ITU-R BT.2100-0.
pub const BT_2020: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.708, y: 0.292 },
green: Chromaticity { x: 0.170, y: 0.797 },
blue: Chromaticity { x: 0.131, y: 0.046 },
};
/// SMPTE ST 428-1 (CIE 1931 XYZ as in ISO 11664-1).
pub const XYZ: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 1.0, y: 0.0 },
green: Chromaticity { x: 0.0, y: 1.0 },
blue: Chromaticity { x: 0.0, y: 0.0 },
};
/// EBU Tech. 3213-E (1975).
pub const EBU_3213: ColorPrimaries = ColorPrimaries {
red: Chromaticity { x: 0.630, y: 0.340 },
green: Chromaticity { x: 0.295, y: 0.605 },
blue: Chromaticity { x: 0.155, y: 0.077 },
};
}
impl ColorPrimaries {
/// Returns RGB -> XYZ conversion matrix
///
/// # Arguments
///
/// * `white_point`: [Chromaticity] or [crate::XyY] or any item conforming [XyYRepresentable]
///
/// returns: [Matrix3d]
pub fn transform_to_xyz_d(self, white_point: impl XyYRepresentable) -> Matrix3d {
let red_xyz = self.red.to_scaled_xyzd();
let green_xyz = self.green.to_scaled_xyzd();
let blue_xyz = self.blue.to_scaled_xyzd();
let xyz_matrix = Matrix3d {
v: [
[red_xyz.x, green_xyz.x, blue_xyz.x],
[red_xyz.y, green_xyz.y, blue_xyz.y],
[red_xyz.z, green_xyz.z, blue_xyz.z],
],
};
ColorProfile::rgb_to_xyz_d(xyz_matrix, white_point.to_xyy().to_xyzd())
}
/// Returns RGB -> XYZ conversion matrix
///
/// # Arguments
///
/// * `white_point`: [Chromaticity] or [crate::XyY] or any item conforming [XyYRepresentable]
///
/// returns: [Matrix3f]
pub fn transform_to_xyz(self, white_point: impl XyYRepresentable) -> Matrix3f {
let red_xyz = self.red.to_scaled_xyz();
let green_xyz = self.green.to_scaled_xyz();
let blue_xyz = self.blue.to_scaled_xyz();
let xyz_matrix = Matrix3f {
v: [
[red_xyz.x, green_xyz.x, blue_xyz.x],
[red_xyz.y, green_xyz.y, blue_xyz.y],
[red_xyz.z, green_xyz.z, blue_xyz.z],
],
};
ColorProfile::rgb_to_xyz_static(xyz_matrix, white_point.to_xyy().to_xyz())
}
}
/// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 3
/// Values 0, 3, 19255 are all reserved so all map to the same variant
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum TransferCharacteristics {
/// For future use by ITU-T | ISO/IEC
Reserved,
/// Rec. ITU-R BT.709-6<br />
/// Rec. ITU-R BT.1361-0 conventional colour gamut system (historical)<br />
/// (functionally the same as the values 6, 14 and 15) <br />
Bt709 = 1,
/// Image characteristics are unknown or are determined by the application.<br />
Unspecified = 2,
/// Rec. ITU-R BT.470-6 System M (historical)<br />
/// United States National Television System Committee 1953 Recommendation for transmission standards for color television<br />
/// United States Federal Communications Commission (2003) Title 47 Code of Federal Regulations 73.682 (a) (20)<br />
/// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM<br />
Bt470M = 4,
/// Rec. ITU-R BT.470-6 System B, G (historical)<br />
Bt470Bg = 5,
/// Rec. ITU-R BT.601-7 525 or 625<br />
/// Rec. ITU-R BT.1358-1 525 or 625 (historical)<br />
/// Rec. ITU-R BT.1700-0 NTSC SMPTE 170M (2004)<br />
/// (functionally the same as the values 1, 14 and 15)<br />
Bt601 = 6,
/// SMPTE 240M (1999) (historical)<br />
Smpte240 = 7,
/// Linear transfer characteristics<br />
Linear = 8,
/// Logarithmic transfer characteristic (100:1 range)<br />
Log100 = 9,
/// Logarithmic transfer characteristic (100 * Sqrt( 10 ) : 1 range)<br />
Log100sqrt10 = 10,
/// IEC 61966-2-4<br />
Iec61966 = 11,
/// Rec. ITU-R BT.1361-0 extended colour gamut system (historical)<br />
Bt1361 = 12,
/// IEC 61966-2-1 sRGB or sYCC<br />
Srgb = 13,
/// Rec. ITU-R BT.2020-2 (10-bit system)<br />
/// (functionally the same as the values 1, 6 and 15)<br />
Bt202010bit = 14,
/// Rec. ITU-R BT.2020-2 (12-bit system)<br />
/// (functionally the same as the values 1, 6 and 14)<br />
Bt202012bit = 15,
/// SMPTE ST 2084 for 10-, 12-, 14- and 16-bitsystems<br />
/// Rec. ITU-R BT.2100-0 perceptual quantization (PQ) system<br />
Smpte2084 = 16,
/// SMPTE ST 428-1<br />
Smpte428 = 17,
/// ARIB STD-B67<br />
/// Rec. ITU-R BT.2100-0 hybrid log- gamma (HLG) system<br />
Hlg = 18,
}
impl TryFrom<u8> for TransferCharacteristics {
type Error = CmsError;
#[allow(unreachable_patterns)]
fn try_from(value: u8) -> Result<Self, Self::Error> {
match value {
0 | 3 | 19..=255 => Ok(Self::Reserved),
1 => Ok(Self::Bt709),
2 => Ok(Self::Unspecified),
4 => Ok(Self::Bt470M),
5 => Ok(Self::Bt470Bg),
6 => Ok(Self::Bt601),
7 => Ok(Self::Smpte240), // unimplemented
8 => Ok(Self::Linear),
9 => Ok(Self::Log100),
10 => Ok(Self::Log100sqrt10),
11 => Ok(Self::Iec61966), // unimplemented
12 => Ok(Self::Bt1361), // unimplemented
13 => Ok(Self::Srgb),
14 => Ok(Self::Bt202010bit),
15 => Ok(Self::Bt202012bit),
16 => Ok(Self::Smpte2084),
17 => Ok(Self::Smpte428), // unimplemented
18 => Ok(Self::Hlg),
_ => Err(CmsError::InvalidCicp),
}
}
}
impl CicpColorPrimaries {
pub(crate) const fn has_chromaticity(self) -> bool {
self as u8 != Self::Reserved as u8 && self as u8 != Self::Unspecified as u8
}
pub(crate) const fn white_point(self) -> Result<Chromaticity, CmsError> {
Ok(match self {
Self::Reserved => return Err(CmsError::UnsupportedColorPrimaries(self as u8)),
Self::Bt709
| Self::Bt470Bg
| Self::Bt601
| Self::Smpte240
| Self::Bt2020
| Self::Smpte432
| Self::Ebu3213 => Chromaticity::D65,
Self::Unspecified => return Err(CmsError::UnsupportedColorPrimaries(self as u8)),
Self::Bt470M => Chromaticity { x: 0.310, y: 0.316 },
Self::GenericFilm => Chromaticity { x: 0.310, y: 0.316 },
Self::Xyz => Chromaticity {
x: 1. / 3.,
y: 1. / 3.,
},
Self::Smpte431 => Chromaticity { x: 0.314, y: 0.351 },
})
}
}
impl TryFrom<CicpColorPrimaries> for ColorPrimaries {
type Error = CmsError;
fn try_from(value: CicpColorPrimaries) -> Result<Self, Self::Error> {
match value {
CicpColorPrimaries::Reserved => Err(CmsError::UnsupportedColorPrimaries(value as u8)),
CicpColorPrimaries::Bt709 => Ok(ColorPrimaries::BT_709),
CicpColorPrimaries::Unspecified => {
Err(CmsError::UnsupportedColorPrimaries(value as u8))
}
CicpColorPrimaries::Bt470M => Ok(ColorPrimaries::BT_470M),
CicpColorPrimaries::Bt470Bg => Ok(ColorPrimaries::BT_470BG),
CicpColorPrimaries::Bt601 | CicpColorPrimaries::Smpte240 => Ok(ColorPrimaries::BT_601),
CicpColorPrimaries::GenericFilm => Ok(ColorPrimaries::GENERIC_FILM),
CicpColorPrimaries::Bt2020 => Ok(ColorPrimaries::BT_2020),
CicpColorPrimaries::Xyz => Ok(ColorPrimaries::XYZ),
// These two share primaries, but have distinct white points
CicpColorPrimaries::Smpte431 | CicpColorPrimaries::Smpte432 => {
Ok(ColorPrimaries::SMPTE_431)
}
CicpColorPrimaries::Ebu3213 => Ok(ColorPrimaries::EBU_3213),
}
}
}
impl TransferCharacteristics {
pub(crate) fn has_transfer_curve(self) -> bool {
self != Self::Reserved && self != Self::Unspecified
}
}
pub(crate) fn create_rec709_parametric() -> [f32; 5] {
const POW_EXP: f32 = 0.45;
const G: f32 = 1. / POW_EXP;
const B: f32 = (0.09929682680944f64 / 1.09929682680944f64) as f32;
const C: f32 = 1f32 / 4.5f32;
const D: f32 = (4.5f64 * 0.018053968510807f64) as f32;
const A: f32 = (1. / 1.09929682680944f64) as f32;
[G, A, B, C, D]
}
impl TryFrom<TransferCharacteristics> for ToneReprCurve {
type Error = CmsError;
/// See [ICC.1:2010](https://www.color.org/specification/ICC1v43_2010-12.pdf)
/// See [Rec. ITU-R BT.2100-2](https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-E.pdf)
fn try_from(value: TransferCharacteristics) -> Result<Self, Self::Error> {
const NUM_TRC_TABLE_ENTRIES: i32 = 1024;
Ok(match value {
TransferCharacteristics::Reserved => {
return Err(CmsError::UnsupportedTrc(value as u8));
}
TransferCharacteristics::Bt709
| TransferCharacteristics::Bt601
| TransferCharacteristics::Bt202010bit
| TransferCharacteristics::Bt202012bit => {
// The opto-electronic transfer characteristic function (OETF)
// as defined in ITU-T H.273 table 3, row 1:
//
// V = (α * Lc^0.45) (α 1) for 1 >= Lc >= β
// V = 4.500 * Lc for β > Lc >= 0
//
// Inverting gives the electro-optical transfer characteristic
// function (EOTF) which can be represented as ICC
// parametricCurveType with 4 parameters (ICC.1:2010 Table 5).
// Converting between the two (Lc ↔︎ Y, V ↔︎ X):
//
// Y = (a * X + b)^g for (X >= d)
// Y = c * X for (X < d)
//
// g, a, b, c, d can then be defined in terms of α and β:
//
// g = 1 / 0.45
// a = 1 / α
// b = 1 - α
// c = 1 / 4.500
// d = 4.500 * β
//
// α and β are determined by solving the piecewise equations to
// ensure continuity of both value and slope at the value β.
// We use the values specified for 10-bit systems in
// https://www.itu.int/rec/R-REC-BT.2020-2-201510-I Table 4
// since this results in the similar values as available ICC
// profiles after converting to s15Fixed16Number, providing us
// good test coverage.
ToneReprCurve::Parametric(create_rec709_parametric().to_vec())
}
TransferCharacteristics::Unspecified => {
return Err(CmsError::UnsupportedTrc(value as u8));
}
TransferCharacteristics::Bt470M => curve_from_gamma(2.2),
TransferCharacteristics::Bt470Bg => curve_from_gamma(2.8),
TransferCharacteristics::Smpte240 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, smpte240_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Linear => curve_from_gamma(1.),
TransferCharacteristics::Log100 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, log100_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Log100sqrt10 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, log100_sqrt10_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Iec61966 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, iec61966_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Bt1361 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, bt1361_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Srgb => {
ToneReprCurve::Parametric(vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045])
}
TransferCharacteristics::Smpte2084 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, pq_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Smpte428 => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, smpte428_to_linear);
ToneReprCurve::Lut(table)
}
TransferCharacteristics::Hlg => {
let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, hlg_to_linear);
ToneReprCurve::Lut(table)
}
})
}
}
/// Matrix Coefficients Enum (from ISO/IEC 23091-4 / MPEG CICP)
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
#[repr(C)]
pub enum MatrixCoefficients {
Identity = 0, // RGB (Identity matrix)
Bt709 = 1, // Rec. 709
Unspecified = 2, // Unspecified
Reserved = 3, // Reserved
Fcc = 4, // FCC
Bt470Bg = 5, // BT.470BG / BT.601-625
Smpte170m = 6, // SMPTE 170M / BT.601-525
Smpte240m = 7, // SMPTE 240M
YCgCo = 8, // YCgCo
Bt2020Ncl = 9, // BT.2020 (non-constant luminance)
Bt2020Cl = 10, // BT.2020 (constant luminance)
Smpte2085 = 11, // SMPTE ST 2085
ChromaticityDerivedNCL = 12, // Chromaticity-derived non-constant luminance
ChromaticityDerivedCL = 13, // Chromaticity-derived constant luminance
ICtCp = 14, // ICtCp
}
impl TryFrom<u8> for MatrixCoefficients {
type Error = CmsError;
fn try_from(value: u8) -> Result<Self, CmsError> {
match value {
0 => Ok(MatrixCoefficients::Identity),
1 => Ok(MatrixCoefficients::Bt709),
2 => Ok(MatrixCoefficients::Unspecified),
3 => Ok(MatrixCoefficients::Reserved),
4 => Ok(MatrixCoefficients::Fcc),
5 => Ok(MatrixCoefficients::Bt470Bg),
6 => Ok(MatrixCoefficients::Smpte170m),
7 => Ok(MatrixCoefficients::Smpte240m),
8 => Ok(MatrixCoefficients::YCgCo),
9 => Ok(MatrixCoefficients::Bt2020Ncl),
10 => Ok(MatrixCoefficients::Bt2020Cl),
11 => Ok(MatrixCoefficients::Smpte2085),
12 => Ok(MatrixCoefficients::ChromaticityDerivedNCL),
13 => Ok(MatrixCoefficients::ChromaticityDerivedCL),
14 => Ok(MatrixCoefficients::ICtCp),
_ => Err(CmsError::InvalidCicp),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::WHITE_POINT_D65;
#[test]
fn test_to_xyz_using_absolute_coordinates() {
let conversion_matrix = ColorPrimaries::BT_709.transform_to_xyz_d(WHITE_POINT_D65);
assert!((conversion_matrix.v[0][0] - 0.4121524015214193).abs() < 1e-14);
assert!((conversion_matrix.v[1][1] - 0.7153537403945436).abs() < 1e-14);
assert!((conversion_matrix.v[2][2] - 0.9497138466283235).abs() < 1e-14);
}
#[test]
fn test_to_xyz_using_absolute_coordinates_xyz() {
let conversion_matrix = ColorPrimaries::XYZ.transform_to_xyz_d(WHITE_POINT_D65);
assert!((conversion_matrix.v[0][0] - 0.95015469385536477).abs() < 1e-14);
assert!((conversion_matrix.v[1][1] - 1.0).abs() < 1e-14);
assert!((conversion_matrix.v[2][2] - 1.0882590676722474).abs() < 1e-14);
}
#[test]
fn test_to_xyz_using_absolute_coordinates_f() {
let conversion_matrix = ColorPrimaries::BT_709.transform_to_xyz(WHITE_POINT_D65);
assert!((conversion_matrix.v[0][0] - 0.4121524015214193).abs() < 1e-5);
assert!((conversion_matrix.v[1][1] - 0.7153537403945436).abs() < 1e-5);
assert!((conversion_matrix.v[2][2] - 0.9497138466283235).abs() < 1e-5);
}
}

View File

@@ -0,0 +1,237 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::cube::CubeAvxFma;
use crate::conversions::avx::interpolator::AvxVectorSse;
use crate::{CmsError, DataColorSpace, InPlaceStage, InterpolationMethod};
use std::arch::x86_64::*;
pub(crate) struct ACurves3AvxFma<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves3OptimizedAvxFma<'a> {
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves3InverseAvxFma<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl<const DEPTH: usize> ACurves3AvxFma<'_, DEPTH> {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
unsafe {
let scale_value = (DEPTH - 1) as f32;
for dst in dst.chunks_exact_mut(3) {
let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
let v = fetch(b0, b1, b2).v;
dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
}
}
Ok(())
}
}
impl ACurves3OptimizedAvxFma<'_> {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
unsafe {
for dst in dst.chunks_exact_mut(3) {
let a0 = dst[0];
let a1 = dst[1];
let a2 = dst[2];
let v = fetch(a0, a1, a2).v;
dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
}
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3AvxFma<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
unsafe {
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
}
Ok(())
}
}
impl InPlaceStage for ACurves3OptimizedAvxFma<'_> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
unsafe {
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
}
Ok(())
}
}
impl<const DEPTH: usize> ACurves3InverseAvxFma<'_, DEPTH> {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32) -> AvxVectorSse>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
unsafe {
let v_scale_value = _mm_set1_ps((DEPTH as u32 - 1u32) as f32);
for dst in dst.chunks_exact_mut(3) {
let mut v = fetch(dst[0], dst[1], dst[2]).v;
v = _mm_mul_ps(v, v_scale_value);
v = _mm_min_ps(v, v_scale_value);
v = _mm_max_ps(v, _mm_setzero_ps());
let c = _mm_cvtps_epi32(v);
let a0 = _mm_extract_epi32::<0>(c) as u16;
let a1 = _mm_extract_epi32::<1>(c) as u16;
let a2 = _mm_extract_epi32::<2>(c) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3InverseAvxFma<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeAvxFma::new(self.clut, self.grid_size, 3);
unsafe {
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,182 @@
// /*
// * // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
// * //
// * // Redistribution and use in source and binary forms, with or without modification,
// * // are permitted provided that the following conditions are met:
// * //
// * // 1. Redistributions of source code must retain the above copyright notice, this
// * // list of conditions and the following disclaimer.
// * //
// * // 2. Redistributions in binary form must reproduce the above copyright notice,
// * // this list of conditions and the following disclaimer in the documentation
// * // and/or other materials provided with the distribution.
// * //
// * // 3. Neither the name of the copyright holder nor the names of its
// * // contributors may be used to endorse or promote products derived from
// * // this software without specific prior written permission.
// * //
// * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// */
use crate::conversions::avx::hypercube::HypercubeAvx;
use crate::conversions::avx::interpolator::AvxVectorSse;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Stage};
use std::arch::x86_64::*;
pub(crate) struct ACurves4x3AvxFma<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) curve3: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 4],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves4x3AvxFmaOptimized<'a> {
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 4],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl<const DEPTH: usize> ACurves4x3AvxFma<'_, DEPTH> {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> AvxVectorSse>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
assert_eq!(src.len() / 4, dst.len() / 3);
unsafe {
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let a0 = (src[0] * scale_value).round().min(scale_value) as u16;
let a1 = (src[1] * scale_value).round().min(scale_value) as u16;
let a2 = (src[2] * scale_value).round().min(scale_value) as u16;
let a3 = (src[3] * scale_value).round().min(scale_value) as u16;
let c = self.curve0[a0 as usize];
let m = self.curve1[a1 as usize];
let y = self.curve2[a2 as usize];
let k = self.curve3[a3 as usize];
let v = fetch(c, m, y, k).v;
dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
}
}
Ok(())
}
}
impl ACurves4x3AvxFmaOptimized<'_> {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> AvxVectorSse>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
assert_eq!(src.len() / 4, dst.len() / 3);
unsafe {
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let c = src[0];
let m = src[1];
let y = src[2];
let k = src[3];
let v = fetch(c, m, y, k).v;
dst[0] = f32::from_bits(_mm_extract_ps::<0>(v) as u32);
dst[1] = f32::from_bits(_mm_extract_ps::<1>(v) as u32);
dst[2] = f32::from_bits(_mm_extract_ps::<2>(v) as u32);
}
}
Ok(())
}
}
impl<const DEPTH: usize> Stage for ACurves4x3AvxFma<'_, DEPTH> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = HypercubeAvx::new(self.clut, self.grid_size, 3);
assert!(std::arch::is_x86_feature_detected!("avx2"));
assert!(std::arch::is_x86_feature_detected!("fma"));
unsafe {
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
}
Ok(())
}
}
impl Stage for ACurves4x3AvxFmaOptimized<'_> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = HypercubeAvx::new(self.clut, self.grid_size, 3);
assert!(std::arch::is_x86_feature_detected!("avx2"));
assert!(std::arch::is_x86_feature_detected!("fma"));
unsafe {
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,445 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::interpolator::AvxVectorSse;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use std::arch::x86_64::*;
use std::ops::{Add, Mul, Sub};
/// 3D CLUT NEON helper
///
/// Represents hexahedron.
pub(crate) struct CubeAvxFma<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
grid_size: [u8; 3],
}
struct HexahedronFetch3<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
}
trait CubeFetch<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
impl CubeFetch<AvxVectorSse> for HexahedronFetch3<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> AvxVectorSse {
let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32) as usize * 3;
unsafe {
let k = self.array.get_unchecked(start..);
let lo = _mm_loadu_si64(k.as_ptr() as *const _);
let hi = _mm_insert_epi32::<2>(
lo,
k.get_unchecked(2..).as_ptr().read_unaligned().to_bits() as i32,
);
AvxVectorSse {
v: _mm_castsi128_ps(hi),
}
}
}
}
impl<'a> CubeAvxFma<'a> {
pub(crate) fn new(arr: &'a [f32], grid: [u8; 3], components: usize) -> Self {
// This is safety precondition, array size must be not less than full grid size * components.
// Needs to ensure that it is not missed somewhere else
assert_eq!(
grid[0] as usize * grid[1] as usize * grid[2] as usize * components,
arr.len()
);
let y_stride = grid[1] as u32;
let x_stride = y_stride * grid[0] as u32;
CubeAvxFma {
array: arr,
x_stride,
y_stride,
grid_size: grid,
}
}
#[inline(always)]
fn trilinear<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyNegAdd<T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let x_d = T::from(lin_x * scale_x - x as f32);
let y_d = T::from(lin_y * scale_y - y as f32);
let z_d = T::from(lin_z * scale_z - z as f32);
let c000 = fetch.fetch(x, y, z);
let c100 = fetch.fetch(x_n, y, z);
let c010 = fetch.fetch(x, y_n, z);
let c110 = fetch.fetch(x_n, y_n, z);
let c001 = fetch.fetch(x, y, z_n);
let c101 = fetch.fetch(x_n, y, z_n);
let c011 = fetch.fetch(x, y_n, z_n);
let c111 = fetch.fetch(x_n, y_n, z_n);
let c00 = c000.neg_mla(c000, x_d).mla(c100, x_d);
let c10 = c010.neg_mla(c010, x_d).mla(c110, x_d);
let c01 = c001.neg_mla(c001, x_d).mla(c101, x_d);
let c11 = c011.neg_mla(c011, x_d).mla(c111, x_d);
let c0 = c00.neg_mla(c00, y_d).mla(c10, y_d);
let c1 = c01.neg_mla(c01, y_d).mla(c11, y_d);
c0.neg_mla(c0, z_d).mla(c1, z_d)
}
#[cfg(feature = "options")]
#[inline]
fn pyramid<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
if dr > db && dg > db {
let x0 = fetch.fetch(x_n, y_n, z_n);
let x1 = fetch.fetch(x_n, y_n, z);
let x2 = fetch.fetch(x_n, y, z);
let x3 = fetch.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y_n, z_n);
let x2 = fetch.fetch(x, y_n, z_n);
let x3 = fetch.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y, z);
let x2 = fetch.fetch(x_n, y, z_n);
let x3 = fetch.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
}
}
#[cfg(feature = "options")]
#[inline]
fn tetra<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let rx = lin_x * scale_x - x as f32;
let ry = lin_y * scale_y - y as f32;
let rz = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = fetch.fetch(x_n, y, z) - c0;
c2 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x_n, y, z);
c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = fetch.fetch(x_n, y, z) - c0;
c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n);
c3 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x, y, z_n);
c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n);
c3 = fetch.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x, y_n, z);
c2 = fetch.fetch(x, y_n, z) - c0;
c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n);
c2 = fetch.fetch(x, y_n, z) - c0;
c3 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n);
c2 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y, z_n);
c3 = fetch.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
s1.mla(c3, T::from(rz))
}
#[cfg(feature = "options")]
#[inline]
fn prism<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
if db >= dr {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y, z_n);
let x2 = fetch.fetch(x, y_n, z);
let x3 = fetch.fetch(x, y_n, z_n);
let x4 = fetch.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = fetch.fetch(x_n, y, z);
let x1 = fetch.fetch(x_n, y, z_n);
let x2 = fetch.fetch(x, y_n, z);
let x3 = fetch.fetch(x_n, y_n, z);
let x4 = fetch.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
}
}
#[inline]
pub(crate) fn trilinear_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> AvxVectorSse {
self.trilinear(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn prism_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> AvxVectorSse {
self.prism(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn pyramid_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> AvxVectorSse {
self.pyramid(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn tetra_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> AvxVectorSse {
self.tetra(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
}

View File

@@ -0,0 +1,644 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::interpolator::AvxVectorSse;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use crate::nd_array::lerp;
use std::arch::x86_64::*;
use std::ops::{Add, Mul, Sub};
/// 4D CLUT helper.
///
/// Represents hypercube.
pub(crate) struct HypercubeAvx<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
grid_size: [u8; 4],
}
trait Fetcher4<T> {
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> T;
}
struct Fetch4Vec3<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
}
impl Fetcher4<AvxVectorSse> for Fetch4Vec3<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> AvxVectorSse {
let start = (x as u32 * self.x_stride
+ y as u32 * self.y_stride
+ z as u32 * self.z_stride
+ w as u32) as usize
* 3;
unsafe {
let k = self.array.get_unchecked(start..);
let lo = _mm_loadu_si64(k.as_ptr() as *const _);
let hi = _mm_insert_epi32::<2>(
lo,
k.get_unchecked(2..).as_ptr().read_unaligned().to_bits() as i32,
);
AvxVectorSse {
v: _mm_castsi128_ps(hi),
}
}
}
}
impl<'a> HypercubeAvx<'a> {
pub(crate) fn new(arr: &'a [f32], grid: [u8; 4], components: usize) -> Self {
// This is safety precondition, array size must be not less than full grid size * components.
// Needs to ensure that it is not missed somewhere else
assert_eq!(
grid[0] as usize * grid[1] as usize * grid[2] as usize * grid[3] as usize * components,
arr.len()
);
let z_stride = grid[2] as u32;
let y_stride = z_stride * grid[1] as u32;
let x_stride = y_stride * grid[0] as u32;
HypercubeAvx {
array: arr,
x_stride,
y_stride,
z_stride,
grid_size: grid,
}
}
#[inline(always)]
fn quadlinear<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let x_d = T::from(lin_x * scale_x - x as f32);
let y_d = T::from(lin_y * scale_y - y as f32);
let z_d = T::from(lin_z * scale_z - z as f32);
let w_d = T::from(lin_w * scale_w - w as f32);
let r_x1 = lerp(r.fetch(x, y, z, w), r.fetch(x_n, y, z, w), x_d);
let r_x2 = lerp(r.fetch(x, y_n, z, w), r.fetch(x_n, y_n, z, w), x_d);
let r_y1 = lerp(r_x1, r_x2, y_d);
let r_x3 = lerp(r.fetch(x, y, z_n, w), r.fetch(x_n, y, z_n, w), x_d);
let r_x4 = lerp(r.fetch(x, y_n, z_n, w), r.fetch(x_n, y_n, z_n, w), x_d);
let r_y2 = lerp(r_x3, r_x4, y_d);
let r_z1 = lerp(r_y1, r_y2, z_d);
let r_x1 = lerp(r.fetch(x, y, z, w_n), r.fetch(x_n, y, z, w_n), x_d);
let r_x2 = lerp(r.fetch(x, y_n, z, w_n), r.fetch(x_n, y_n, z, w_n), x_d);
let r_y1 = lerp(r_x1, r_x2, y_d);
let r_x3 = lerp(r.fetch(x, y, z_n, w_n), r.fetch(x_n, y, z_n, w_n), x_d);
let r_x4 = lerp(r.fetch(x, y_n, z_n, w_n), r.fetch(x_n, y_n, z_n, w_n), x_d);
let r_y2 = lerp(r_x3, r_x4, y_d);
let r_z2 = lerp(r_y1, r_y2, z_d);
lerp(r_z1, r_z2, w_d)
}
#[inline(always)]
pub(crate) fn quadlinear_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> AvxVectorSse {
self.quadlinear(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn pyramid<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let dw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let w0 = if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n, w);
let x1 = r.fetch(x_n, y_n, z, w);
let x2 = r.fetch(x_n, y, z, w);
let x3 = r.fetch(x, y_n, z, w);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y_n, z_n, w);
let x2 = r.fetch(x, y_n, z_n, w);
let x3 = r.fetch(x, y_n, z, w);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y, z, w);
let x2 = r.fetch(x_n, y, z_n, w);
let x3 = r.fetch(x_n, y_n, z_n, w);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
};
let c0 = r.fetch(x, y, z, w_n);
let w1 = if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n, w_n);
let x1 = r.fetch(x_n, y_n, z, w_n);
let x2 = r.fetch(x_n, y, z, w_n);
let x3 = r.fetch(x, y_n, z, w_n);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y_n, z_n, w_n);
let x2 = r.fetch(x, y_n, z_n, w_n);
let x3 = r.fetch(x, y_n, z, w_n);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y, z, w_n);
let x2 = r.fetch(x_n, y, z_n, w_n);
let x3 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
};
w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
}
#[cfg(feature = "options")]
#[inline(always)]
pub(crate) fn pyramid_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> AvxVectorSse {
self.pyramid(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn prism<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let dw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let w0 = if db >= dr {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y, z_n, w);
let x2 = r.fetch(x, y_n, z, w);
let x3 = r.fetch(x, y_n, z_n, w);
let x4 = r.fetch(x_n, y_n, z_n, w);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z, w);
let x1 = r.fetch(x_n, y, z_n, w);
let x2 = r.fetch(x, y_n, z, w);
let x3 = r.fetch(x_n, y_n, z, w);
let x4 = r.fetch(x_n, y_n, z_n, w);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
};
let c0 = r.fetch(x, y, z, w_n);
let w1 = if db >= dr {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y, z_n, w_n);
let x2 = r.fetch(x, y_n, z, w_n);
let x3 = r.fetch(x, y_n, z_n, w_n);
let x4 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z, w_n);
let x1 = r.fetch(x_n, y, z_n, w_n);
let x2 = r.fetch(x, y_n, z, w_n);
let x3 = r.fetch(x_n, y_n, z, w_n);
let x4 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
};
w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
}
#[cfg(feature = "options")]
#[inline(always)]
pub(crate) fn prism_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> AvxVectorSse {
self.prism(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn tetra<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let rx = lin_x * scale_x - x as f32;
let ry = lin_y * scale_y - y as f32;
let rz = lin_z * scale_z - z as f32;
let rw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z, w) - c0;
c2 = r.fetch(x_n, y_n, z, w) - r.fetch(x_n, y, z, w);
c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z, w) - c0;
c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
c3 = r.fetch(x_n, y, z_n, w) - r.fetch(x_n, y, z, w);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n, w) - r.fetch(x, y, z_n, w);
c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
c3 = r.fetch(x, y, z_n, w) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z, w) - r.fetch(x, y_n, z, w);
c2 = r.fetch(x, y_n, z, w) - c0;
c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
c2 = r.fetch(x, y_n, z, w) - c0;
c3 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y_n, z, w);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
c2 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y, z_n, w);
c3 = r.fetch(x, y, z_n, w) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
let w0 = s1.mla(c3, T::from(rz));
let c0 = r.fetch(x, y, z, w_n);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z, w_n) - c0;
c2 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x_n, y, z, w_n);
c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z, w_n) - c0;
c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
c3 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x_n, y, z, w_n);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x, y, z_n, w_n);
c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
c3 = r.fetch(x, y, z_n, w_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x, y_n, z, w_n);
c2 = r.fetch(x, y_n, z, w_n) - c0;
c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
c2 = r.fetch(x, y_n, z, w_n) - c0;
c3 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y_n, z, w_n);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
c2 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y, z_n, w_n);
c3 = r.fetch(x, y, z_n, w_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
let w1 = s1.mla(c3, T::from(rz));
w0.neg_mla(w0, T::from(rw)).mla(w1, T::from(rw))
}
#[cfg(feature = "options")]
#[inline(always)]
pub(crate) fn tetra_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> AvxVectorSse {
self.tetra(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,327 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::avx::interpolator::*;
use crate::conversions::avx::interpolator_q0_15::AvxAlignedI16;
use crate::conversions::avx::lut4_to_3_q0_15::TransformLut4To3AvxQ0_15;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut4x3Factory;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
use std::marker::PhantomData;
struct TransformLut4To3Avx<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<SseAlignedF32>,
_phantom: PhantomData<T>,
_phantom1: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3Avx<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_chunk<'b, Interpolator: AvxMdInterpolationDouble<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) };
let max_value = ((1 << BIT_DEPTH) - 1u32).as_();
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
let t: f32 = k_weights.w;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let interpolator = Interpolator::new(table1, table2);
let v = interpolator.inter3_sse(c, m, y, &self.weights);
let (a0, b0) = (v.0.v, v.1.v);
if T::FINITE {
unsafe {
let t0 = _mm_set1_ps(t);
let hp = _mm_fnmadd_ps(a0, t0, a0);
let mut v = _mm_fmadd_ps(b0, t0, hp);
v = _mm_max_ps(v, _mm_setzero_ps());
v = _mm_mul_ps(v, value_scale);
v = _mm_min_ps(v, value_scale);
let jvz = _mm_cvtps_epi32(v);
let x = _mm_extract_epi32::<0>(jvz);
let y = _mm_extract_epi32::<1>(jvz);
let z = _mm_extract_epi32::<2>(jvz);
dst[cn.r_i()] = (x as u32).as_();
dst[cn.g_i()] = (y as u32).as_();
dst[cn.b_i()] = (z as u32).as_();
}
} else {
unsafe {
let t0 = _mm_set1_ps(t);
let hp = _mm_fnmadd_ps(a0, t0, a0);
let v = _mm_fmadd_ps(b0, t0, hp);
dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v) as u32).as_();
dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v) as u32).as_();
dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v) as u32).as_();
}
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3Avx<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearAvxFmaDouble<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralAvxFmaDouble<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidAvxFmaDouble<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticAvxFmaDouble<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearAvxFmaDouble<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}
pub(crate) struct AvxLut4x3Factory {}
impl Lut4x3Factory for AvxLut4x3Factory {
fn make_transform_4x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point && BIT_DEPTH < 16 {
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
AvxAlignedI16([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut4To3AvxQ0_15::<
T,
u8,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut4To3AvxQ0_15::<
T,
u16,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
}),
};
}
assert!(
std::arch::is_x86_feature_detected!("fma"),
"Internal configuration error, this might not be called without `fma` feature"
);
let lut = lut
.chunks_exact(3)
.map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => {
Box::new(
TransformLut4To3Avx::<T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
},
)
}
#[cfg(feature = "options")]
BarycentricWeightScale::High => {
Box::new(
TransformLut4To3Avx::<T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
},
)
}
}
}
}

View File

@@ -0,0 +1,207 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::avx::interpolator_q0_15::*;
use crate::conversions::interpolator::BarycentricWeight;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut4To3AvxQ0_15<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<AvxAlignedI16>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3AvxQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2")]
unsafe fn transform_chunk<'b, Interpolator: AvxMdInterpolationQ0_15Double<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
_mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
_mm_set1_epi16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
const Q: i16 = ((1i32 << 15) - 1) as i16;
let t: i16 = k_weights.w;
let t_n: i16 = Q - t;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let interpolator = Interpolator::new(table1, table2);
let v = interpolator.inter3_sse(c, m, y, &self.weights);
let (a0, b0) = (v.0.v, v.1.v);
let hp = _mm_mulhrs_epi16(_mm_set1_epi16(t_n), a0);
let v = _mm_add_epi16(hp, _mm_mulhrs_epi16(b0, _mm_set1_epi16(t)));
if T::FINITE {
let mut o = _mm_max_epi16(v, _mm_setzero_si128());
o = _mm_min_epi16(o, v_max_scale);
let x = _mm_extract_epi16::<0>(o);
let y = _mm_extract_epi16::<1>(o);
let z = _mm_extract_epi16::<2>(o);
dst[cn.r_i()] = (x as u32).as_();
dst[cn.g_i()] = (y as u32).as_();
dst[cn.b_i()] = (z as u32).as_();
} else {
let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v));
r = _mm_mul_ps(r, f_value_scale);
dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_();
dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_();
dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_();
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3AvxQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearAvxQ0_15Double<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralAvxQ0_15Double<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidAvxFmaQ0_15Double<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticAvxQ0_15Double<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearAvxQ0_15Double<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,53 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod a_curves3;
mod a_curves4x3;
mod cube;
mod hypercube;
mod interpolator;
mod interpolator_q0_15;
mod lut4_to_3;
mod lut4_to_3_q0_15;
mod preheat_lut4x3;
mod rgb_xyz;
mod rgb_xyz_opt;
mod rgb_xyz_q2_13;
mod rgb_xyz_q2_13_opt;
mod t_lut3_to_3;
mod t_lut3_to_3_q0_15;
pub(crate) use a_curves3::{ACurves3AvxFma, ACurves3InverseAvxFma, ACurves3OptimizedAvxFma};
pub(crate) use a_curves4x3::{ACurves4x3AvxFma, ACurves4x3AvxFmaOptimized};
pub(crate) use lut4_to_3::AvxLut4x3Factory;
pub(crate) use preheat_lut4x3::Lut4x3AvxFma;
pub(crate) use rgb_xyz::TransformShaperRgbAvx;
pub(crate) use rgb_xyz_opt::TransformShaperRgbOptAvx;
pub(crate) use rgb_xyz_q2_13::TransformShaperRgbQ2_13Avx;
pub(crate) use rgb_xyz_q2_13_opt::TransformShaperRgbQ2_13OptAvx;
pub(crate) use t_lut3_to_3::AvxLut3x3Factory;

View File

@@ -0,0 +1,135 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::hypercube::HypercubeAvx;
use crate::conversions::avx::interpolator::AvxVectorSse;
use crate::trc::{lut_interp_linear_float, lut_interp_linear_float_clamped};
use crate::{CmsError, DataColorSpace, InterpolationMethod, Stage};
use std::arch::x86_64::*;
#[derive(Default)]
pub(crate) struct Lut4x3AvxFma {
pub(crate) linearization: [Vec<f32>; 4],
pub(crate) clut: Vec<f32>,
pub(crate) grid_size: u8,
pub(crate) output: [Vec<f32>; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl Lut4x3AvxFma {
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> AvxVectorSse>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let linearization_0 = &self.linearization[0];
let linearization_1 = &self.linearization[1];
let linearization_2 = &self.linearization[2];
let linearization_3 = &self.linearization[3];
unsafe {
let ones = _mm_set1_ps(1.);
for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(4)) {
debug_assert!(self.grid_size as i32 >= 1);
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let linear_w = lut_interp_linear_float(src[3], linearization_3);
let mut v = fetch(linear_x, linear_y, linear_z, linear_w).v;
v = _mm_max_ps(v, _mm_setzero_ps());
v = _mm_min_ps(v, ones);
let pcs_x = lut_interp_linear_float_clamped(
f32::from_bits(_mm_extract_ps::<0>(v) as u32),
&self.output[0],
);
let pcs_y = lut_interp_linear_float_clamped(
f32::from_bits(_mm_extract_ps::<1>(v) as u32),
&self.output[1],
);
let pcs_z = lut_interp_linear_float_clamped(
f32::from_bits(_mm_extract_ps::<2>(v) as u32),
&self.output[2],
);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
}
Ok(())
}
}
impl Stage for Lut4x3AvxFma {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let l_tbl = HypercubeAvx::new(
&self.clut,
[
self.grid_size,
self.grid_size,
self.grid_size,
self.grid_size,
],
3,
);
assert!(std::arch::is_x86_feature_detected!("avx2"));
assert!(std::arch::is_x86_feature_detected!("fma"));
unsafe {
// If Source PCS is LAB trilinear should be used
if self.pcs == DataColorSpace::Lab {
return self
.transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))?
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w))?
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,325 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaper;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
#[repr(align(32), C)]
#[derive(Debug)]
pub(crate) struct AvxAlignedU16(pub(crate) [u16; 16]);
pub(crate) struct TransformShaperRgbAvx<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformShaperRgbAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
#[inline(always)]
unsafe fn transform_impl<const FMA: bool>(
&self,
src: &[T],
dst: &mut [T],
) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = AvxAlignedU16([0; 16]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm256_setr_ps(
t.v[0][0], t.v[0][1], t.v[0][2], 0., t.v[0][0], t.v[0][1], t.v[0][2], 0.,
);
let m1 = _mm256_setr_ps(
t.v[1][0], t.v[1][1], t.v[1][2], 0., t.v[1][0], t.v[1][1], t.v[1][2], 0.,
);
let m2 = _mm256_setr_ps(
t.v[2][0], t.v[2][1], t.v[2][2], 0., t.v[2][0], t.v[2][1], t.v[2][2], 0.,
);
let zeros = _mm_setzero_ps();
let v_scale = _mm256_set1_ps(scale);
let mut src = src;
let mut dst = dst;
let mut src_iter = src.chunks_exact(src_channels * 2);
let dst_iter = dst.chunks_exact_mut(dst_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
if let Some(src) = src_iter.next() {
r0 = _mm_broadcast_ss(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.r_linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.g_linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.b_linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_ps();
g0 = _mm_setzero_ps();
b0 = _mm_setzero_ps();
a0 = max_colors;
r1 = _mm_setzero_ps();
g1 = _mm_setzero_ps();
b1 = _mm_setzero_ps();
a1 = max_colors;
}
for (src, dst) in src_iter.zip(dst_iter) {
let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let mut v = if FMA {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_fmadd_ps(g, m1, v0);
_mm256_fmadd_ps(b, m2, v1)
} else {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_mul_ps(g, m1);
let v2 = _mm256_mul_ps(b, m2);
_mm256_add_ps(_mm256_add_ps(v0, v1), v2)
};
v = _mm256_max_ps(v, _mm256_setzero_ps());
v = _mm256_mul_ps(v, v_scale);
v = _mm256_min_ps(v, v_scale);
let zx = _mm256_cvtps_epi32(v);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx);
r0 = _mm_broadcast_ss(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.r_linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.g_linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.b_linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() {
let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let mut v = if FMA {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_fmadd_ps(g, m1, v0);
_mm256_fmadd_ps(b, m2, v1)
} else {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_mul_ps(g, m1);
let v2 = _mm256_mul_ps(b, m2);
_mm256_add_ps(_mm256_add_ps(v0, v1), v2)
};
v = _mm256_max_ps(v, _mm256_setzero_ps());
v = _mm256_mul_ps(v, v_scale);
v = _mm256_min_ps(v, v_scale);
let zx = _mm256_cvtps_epi32(v);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
}
src = src.chunks_exact(src_channels * 2).remainder();
dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = _mm_broadcast_ss(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
let g = _mm_broadcast_ss(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
let b = _mm_broadcast_ss(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let mut v = if FMA {
let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0));
let v1 = _mm_fmadd_ps(g, _mm256_castps256_ps128(m1), v0);
_mm_fmadd_ps(b, _mm256_castps256_ps128(m2), v1)
} else {
let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0));
let v1 = _mm_mul_ps(g, _mm256_castps256_ps128(m1));
let v2 = _mm_mul_ps(b, _mm256_castps256_ps128(m2));
_mm_add_ps(_mm_add_ps(v0, v1), v2)
};
v = _mm_max_ps(v, zeros);
v = _mm_mul_ps(v, _mm256_castps256_ps128(v_scale));
v = _mm_min_ps(v, _mm256_castps256_ps128(v_scale));
let zx = _mm_cvtps_epi32(v);
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_fma(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl::<true>(src, dst) }
}
#[target_feature(enable = "avx2")]
unsafe fn transform_avx(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl::<false>(src, dst) }
}
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T> for TransformShaperRgbAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe {
if std::arch::is_x86_feature_detected!("fma") {
self.transform_fma(src, dst)
} else {
self.transform_avx(src, dst)
}
}
}
}

View File

@@ -0,0 +1,323 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaperOptimized;
use crate::conversions::avx::rgb_xyz::AvxAlignedU16;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbOptAvx<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformShaperRgbOptAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
#[inline(always)]
unsafe fn transform_impl<const FMA: bool>(
&self,
src: &[T],
dst: &mut [T],
) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = AvxAlignedU16([0; 16]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm256_setr_ps(
t.v[0][0], t.v[0][1], t.v[0][2], 0., t.v[0][0], t.v[0][1], t.v[0][2], 0.,
);
let m1 = _mm256_setr_ps(
t.v[1][0], t.v[1][1], t.v[1][2], 0., t.v[1][0], t.v[1][1], t.v[1][2], 0.,
);
let m2 = _mm256_setr_ps(
t.v[2][0], t.v[2][1], t.v[2][2], 0., t.v[2][0], t.v[2][1], t.v[2][2], 0.,
);
let zeros = _mm_setzero_ps();
let v_scale = _mm256_set1_ps(scale);
let mut src = src;
let mut dst = dst;
let mut src_iter = src.chunks_exact(src_channels * 2);
let dst_iter = dst.chunks_exact_mut(dst_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
if let Some(src) = src_iter.next() {
r0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_ps();
g0 = _mm_setzero_ps();
b0 = _mm_setzero_ps();
a0 = max_colors;
r1 = _mm_setzero_ps();
g1 = _mm_setzero_ps();
b1 = _mm_setzero_ps();
a1 = max_colors;
}
for (src, dst) in src_iter.zip(dst_iter) {
let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let mut v = if FMA {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_fmadd_ps(g, m1, v0);
_mm256_fmadd_ps(b, m2, v1)
} else {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_mul_ps(g, m1);
let v2 = _mm256_mul_ps(b, m2);
_mm256_add_ps(_mm256_add_ps(v0, v1), v2)
};
v = _mm256_max_ps(v, _mm256_setzero_ps());
v = _mm256_mul_ps(v, v_scale);
v = _mm256_min_ps(v, v_scale);
let zx = _mm256_cvtps_epi32(v);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx);
r0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() {
let r = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let g = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let b = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let mut v = if FMA {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_fmadd_ps(g, m1, v0);
_mm256_fmadd_ps(b, m2, v1)
} else {
let v0 = _mm256_mul_ps(r, m0);
let v1 = _mm256_mul_ps(g, m1);
let v2 = _mm256_mul_ps(b, m2);
_mm256_add_ps(_mm256_add_ps(v0, v1), v2)
};
v = _mm256_max_ps(v, _mm256_setzero_ps());
v = _mm256_mul_ps(v, v_scale);
v = _mm256_min_ps(v, v_scale);
let zx = _mm256_cvtps_epi32(v);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
}
src = src.chunks_exact(src_channels * 2).remainder();
dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = _mm_broadcast_ss(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
let g = _mm_broadcast_ss(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
let b = _mm_broadcast_ss(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let mut v = if FMA {
let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0));
let v1 = _mm_fmadd_ps(g, _mm256_castps256_ps128(m1), v0);
_mm_fmadd_ps(b, _mm256_castps256_ps128(m2), v1)
} else {
let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0));
let v1 = _mm_mul_ps(g, _mm256_castps256_ps128(m1));
let v2 = _mm_mul_ps(b, _mm256_castps256_ps128(m2));
_mm_add_ps(_mm_add_ps(v0, v1), v2)
};
v = _mm_max_ps(v, zeros);
v = _mm_mul_ps(v, _mm256_castps256_ps128(v_scale));
v = _mm_min_ps(v, _mm256_castps256_ps128(v_scale));
let zx = _mm_cvtps_epi32(v);
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_fma(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl::<true>(src, dst) }
}
#[target_feature(enable = "avx2")]
unsafe fn transform_avx(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl::<false>(src, dst) }
}
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T>
for TransformShaperRgbOptAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe {
if std::arch::is_x86_feature_detected!("fma") {
self.transform_fma(src, dst)
} else {
self.transform_avx(src, dst)
}
}
}
}

View File

@@ -0,0 +1,304 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::rgb_xyz::AvxAlignedU16;
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPoint;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbQ2_13Avx<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPoint<i32, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
#[inline(always)]
pub(crate) unsafe fn _xmm_broadcast_epi32(f: &i32) -> __m128i {
let float_ref: &f32 = unsafe { &*(f as *const i32 as *const f32) };
unsafe { _mm_castps_si128(_mm_broadcast_ss(float_ref)) }
}
impl<
T: Copy + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformShaperRgbQ2_13Avx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "avx2")]
unsafe fn transform_avx2(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = AvxAlignedU16([0; 16]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm256_setr_epi16(
t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0, t.v[0][0],
t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0,
);
let m2 = _mm256_setr_epi16(
t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0, t.v[2][0], 1, t.v[2][1], 1,
t.v[2][2], 1, 0, 0,
);
let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes();
let rnd = _mm256_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]]));
let zeros = _mm256_setzero_si256();
let v_max_value = _mm256_set1_epi32(GAMMA_LUT as i32 - 1);
let mut src = src;
let mut dst = dst;
let mut src_iter = src.chunks_exact(src_channels * 2);
let dst_iter = dst.chunks_exact_mut(dst_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
if let Some(src) = src_iter.next() {
r0 = _xmm_broadcast_epi32(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.r_linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.g_linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.b_linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_si128();
g0 = _mm_setzero_si128();
b0 = _mm_setzero_si128();
a0 = max_colors;
r1 = _mm_setzero_si128();
g1 = _mm_setzero_si128();
b1 = _mm_setzero_si128();
a1 = max_colors;
}
for (src, dst) in src_iter.zip(dst_iter) {
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
r0 = _xmm_broadcast_epi32(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.r_linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.g_linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.b_linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() {
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.r_gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.g_gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.b_gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
}
src = src.chunks_exact(src_channels * 2).remainder();
dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = _xmm_broadcast_epi32(&self.profile.r_linear[src[src_cn.r_i()]._as_usize()]);
let mut g =
_xmm_broadcast_epi32(&self.profile.g_linear[src[src_cn.g_i()]._as_usize()]);
let b = _xmm_broadcast_epi32(&self.profile.b_linear[src[src_cn.b_i()]._as_usize()]);
g = _mm_slli_epi32::<16>(g);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let zrg0 = _mm_or_si128(r, g);
let zbz0 = _mm_or_si128(b, _mm256_castsi256_si128(rnd));
let v0 = _mm_madd_epi16(zrg0, _mm256_castsi256_si128(m0));
let v1 = _mm_madd_epi16(zbz0, _mm256_castsi256_si128(m2));
let mut v = _mm_add_epi32(v0, v1);
v = _mm_srai_epi32::<PRECISION>(v);
v = _mm_max_epi32(v, _mm_setzero_si128());
v = _mm_min_epi32(v, _mm256_castsi256_si128(v_max_value));
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, v);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperRgbQ2_13Avx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_avx2(src, dst) }
}
}

View File

@@ -0,0 +1,298 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::avx::rgb_xyz::AvxAlignedU16;
use crate::conversions::avx::rgb_xyz_q2_13::_xmm_broadcast_epi32;
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbQ2_13OptAvx<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i32, i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Copy + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformShaperRgbQ2_13OptAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "avx2")]
unsafe fn transform_avx2(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = AvxAlignedU16([0; 16]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm256_setr_epi16(
t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0, t.v[0][0],
t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0,
);
let m2 = _mm256_setr_epi16(
t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0, t.v[2][0], 1, t.v[2][1], 1,
t.v[2][2], 1, 0, 0,
);
let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes();
let rnd = _mm256_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]]));
let zeros = _mm256_setzero_si256();
let v_max_value = _mm256_set1_epi32(GAMMA_LUT as i32 - 1);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let mut src_iter = src.chunks_exact(src_channels * 2);
if let Some(src0) = src_iter.next() {
r0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_si128();
g0 = _mm_setzero_si128();
b0 = _mm_setzero_si128();
a0 = max_colors;
r1 = _mm_setzero_si128();
g1 = _mm_setzero_si128();
b1 = _mm_setzero_si128();
a1 = max_colors;
}
for (src, dst) in src_iter.zip(dst.chunks_exact_mut(dst_channels * 2)) {
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
r0 = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.linear[src[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.linear[src[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.linear[src[src_cn.b_i() + src_channels]._as_usize()],
);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let Some(dst) = dst.chunks_exact_mut(dst_channels * 2).last() {
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a0;
}
dst[dst_cn.r_i() + dst_channels] = self.profile.gamma[temporary0.0[8] as usize];
dst[dst_cn.g_i() + dst_channels] = self.profile.gamma[temporary0.0[10] as usize];
dst[dst_cn.b_i() + dst_channels] = self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i() + dst_channels] = a1;
}
}
let src = src.chunks_exact(src_channels * 2).remainder();
let dst = dst.chunks_exact_mut(dst_channels * 2).into_remainder();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
let mut g =
_xmm_broadcast_epi32(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
let b = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
g = _mm_slli_epi32::<16>(g);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let zrg0 = _mm_or_si128(r, g);
let zbz0 = _mm_or_si128(b, _mm256_castsi256_si128(rnd));
let v0 = _mm_madd_epi16(zrg0, _mm256_castsi256_si128(m0));
let v1 = _mm_madd_epi16(zbz0, _mm256_castsi256_si128(m2));
let mut v = _mm_add_epi32(v0, v1);
v = _mm_srai_epi32::<PRECISION>(v);
v = _mm_max_epi32(v, _mm_setzero_si128());
v = _mm_min_epi32(v, _mm256_castsi256_si128(v_max_value));
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, v);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperRgbQ2_13OptAvx<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_avx2(src, dst) }
}
}

View File

@@ -0,0 +1,344 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::avx::interpolator::*;
use crate::conversions::avx::interpolator_q0_15::AvxAlignedI16;
use crate::conversions::avx::t_lut3_to_3_q0_15::TransformLut3x3AvxQ0_15;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut3x3Factory;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
use std::marker::PhantomData;
struct TransformLut3x3AvxFma<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<SseAlignedF32>,
_phantom: PhantomData<T>,
_phantom2: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut3x3AvxFma<T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2", enable = "fma")]
unsafe fn transform_chunk<'b, Interpolator: AvxMdInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) };
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_sse(x, y, z, &self.weights);
if T::FINITE {
unsafe {
let mut r = _mm_mul_ps(v.v, value_scale);
r = _mm_max_ps(r, _mm_setzero_ps());
r = _mm_min_ps(r, value_scale);
let jvz = _mm_cvtps_epi32(r);
let x = _mm_extract_epi32::<0>(jvz);
let y = _mm_extract_epi32::<1>(jvz);
let z = _mm_extract_epi32::<2>(jvz);
dst[dst_cn.r_i()] = (x as u32).as_();
dst[dst_cn.g_i()] = (y as u32).as_();
dst[dst_cn.b_i()] = (z as u32).as_();
}
} else {
unsafe {
dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v.v) as u32).as_();
dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v.v) as u32).as_();
dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v.v) as u32).as_();
}
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3AvxFma<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearAvxFma<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralAvxFma<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalAvxFma<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticAvxFma<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearAvxFma<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}
pub(crate) struct AvxLut3x3Factory {}
impl Lut3x3Factory for AvxLut3x3Factory {
fn make_transform_3x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point && BIT_DEPTH < 16 {
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
AvxAlignedI16([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3AvxQ0_15::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3AvxQ0_15::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
};
}
assert!(
std::arch::is_x86_feature_detected!("fma"),
"Internal configuration error, this might not be called without `fma` feature"
);
let lut = lut
.chunks_exact(3)
.map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3AvxFma::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3AvxFma::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
}
}
}

View File

@@ -0,0 +1,222 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::avx::interpolator_q0_15::*;
use crate::conversions::interpolator::BarycentricWeight;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut3x3AvxQ0_15<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<AvxAlignedI16>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom2: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
>
TransformLut3x3AvxQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "avx2")]
unsafe fn transform_chunk<'b, Interpolator: AvxMdInterpolationQ0_15<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
_mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
_mm_set1_epi16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_sse(x, y, z, &self.weights);
if T::FINITE {
let mut o = _mm_max_epi16(v.v, _mm_setzero_si128());
o = _mm_min_epi16(o, v_max_scale);
let x = _mm_extract_epi16::<0>(o);
let y = _mm_extract_epi16::<1>(o);
let z = _mm_extract_epi16::<2>(o);
dst[dst_cn.r_i()] = (x as u32).as_();
dst[dst_cn.g_i()] = (y as u32).as_();
dst[dst_cn.b_i()] = (z as u32).as_();
} else {
let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v.v));
r = _mm_mul_ps(r, f_value_scale);
dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_();
dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_();
dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_();
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3AvxQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearAvxQ0_15<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralAvxQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalAvxQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticAvxQ0_15<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearAvxQ0_15<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,33 @@
/*
* // Copyright (c) Radzivon Bartoshyk 5/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod rgb_xyz_opt;
mod rgb_xyz_q2_13_opt;
pub(crate) use rgb_xyz_opt::TransformShaperRgbOptAvx512;
pub(crate) use rgb_xyz_q2_13_opt::TransformShaperRgbQ2_13OptAvx512;

View File

@@ -0,0 +1,420 @@
/*
* // Copyright (c) Radzivon Bartoshyk 5/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaperOptimized;
use crate::conversions::avx512::rgb_xyz_q2_13_opt::{
AvxAlignedU16, split_by_twos, split_by_twos_mut,
};
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbOptAvx512<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformShaperRgbOptAvx512<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "avx512bw", enable = "avx512vl", enable = "fma")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
let mut temporary0 = AvxAlignedU16([0; 16]);
let mut temporary1 = AvxAlignedU16([0; 16]);
unsafe {
let m0 = _mm256_setr_ps(
t.v[0][0], t.v[0][1], t.v[0][2], 0f32, t.v[0][0], t.v[0][1], t.v[0][2], 0f32,
);
let m1 = _mm256_setr_ps(
t.v[1][0], t.v[1][1], t.v[1][2], 0f32, t.v[1][0], t.v[1][1], t.v[1][2], 0f32,
);
let m2 = _mm256_setr_ps(
t.v[2][0], t.v[2][1], t.v[2][2], 0f32, t.v[2][0], t.v[2][1], t.v[2][2], 0f32,
);
let zeros = _mm_setzero_ps();
let v_scale = _mm256_set1_ps(scale);
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
r0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()],
);
r2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]);
g2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]);
b2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]);
r3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()],
);
g3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()],
);
b3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_ps();
g0 = _mm_setzero_ps();
b0 = _mm_setzero_ps();
a0 = max_colors;
r1 = _mm_setzero_ps();
g1 = _mm_setzero_ps();
b1 = _mm_setzero_ps();
a1 = max_colors;
r2 = _mm_setzero_ps();
g2 = _mm_setzero_ps();
b2 = _mm_setzero_ps();
a2 = max_colors;
r3 = _mm_setzero_ps();
g3 = _mm_setzero_ps();
b3 = _mm_setzero_ps();
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let rz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let gz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let bz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let rz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r2), r3);
let gz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g2), g3);
let bz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b2), b3);
let v0 = _mm256_mul_ps(rz0, m0);
let v1 = _mm256_fmadd_ps(gz0, m1, v0);
let mut vz0 = _mm256_fmadd_ps(bz0, m2, v1);
let v2 = _mm256_mul_ps(rz1, m0);
let v3 = _mm256_fmadd_ps(gz1, m1, v2);
let mut vz1 = _mm256_fmadd_ps(bz1, m2, v3);
vz0 = _mm256_max_ps(vz0, _mm256_setzero_ps());
vz0 = _mm256_mul_ps(vz0, v_scale);
vz0 = _mm256_min_ps(vz0, v_scale);
vz1 = _mm256_max_ps(vz1, _mm256_setzero_ps());
vz1 = _mm256_mul_ps(vz1, v_scale);
vz1 = _mm256_min_ps(vz1, v_scale);
let zx0 = _mm256_cvtps_epi32(vz0);
let zx1 = _mm256_cvtps_epi32(vz1);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx0);
_mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, zx1);
r0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]);
g0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]);
b0 = _mm_broadcast_ss(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]);
r1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _mm_broadcast_ss(
&self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()],
);
r2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]);
g2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]);
b2 = _mm_broadcast_ss(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]);
r3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()],
);
g3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()],
);
b3 = _mm_broadcast_ss(
&self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()],
);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary0.0[8] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary0.0[10] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[8] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[10] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[12] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let rz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r0), r1);
let gz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g0), g1);
let bz0 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b0), b1);
let rz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(r2), r3);
let gz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(g2), g3);
let bz1 = _mm256_insertf128_ps::<1>(_mm256_castps128_ps256(b2), b3);
let v0 = _mm256_mul_ps(rz0, m0);
let v1 = _mm256_fmadd_ps(gz0, m1, v0);
let mut vz0 = _mm256_fmadd_ps(bz0, m2, v1);
let v2 = _mm256_mul_ps(rz1, m0);
let v3 = _mm256_fmadd_ps(gz1, m1, v2);
let mut vz1 = _mm256_fmadd_ps(bz1, m2, v3);
vz0 = _mm256_max_ps(vz0, _mm256_setzero_ps());
vz0 = _mm256_mul_ps(vz0, v_scale);
vz0 = _mm256_min_ps(vz0, v_scale);
vz1 = _mm256_max_ps(vz1, _mm256_setzero_ps());
vz1 = _mm256_mul_ps(vz1, v_scale);
vz1 = _mm256_min_ps(vz1, v_scale);
let zx0 = _mm256_cvtps_epi32(vz0);
let zx1 = _mm256_cvtps_epi32(vz1);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, zx0);
_mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, zx1);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary0.0[8] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary0.0[10] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[8] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[10] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[12] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let r = _mm_broadcast_ss(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
let g = _mm_broadcast_ss(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
let b = _mm_broadcast_ss(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = _mm_mul_ps(r, _mm256_castps256_ps128(m0));
let v1 = _mm_fmadd_ps(g, _mm256_castps256_ps128(m1), v0);
let mut v = _mm_fmadd_ps(b, _mm256_castps256_ps128(m2), v1);
v = _mm_max_ps(v, zeros);
v = _mm_mul_ps(v, _mm256_castps256_ps128(v_scale));
v = _mm_min_ps(v, _mm256_castps256_ps128(v_scale));
let zx = _mm_cvtps_epi32(v);
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T>
for TransformShaperRgbOptAvx512<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,476 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbQ2_13OptAvx512<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i32, i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
#[inline(always)]
pub(crate) unsafe fn _xmm_broadcast_epi32(f: &i32) -> __m128i {
let float_ref: &f32 = unsafe { &*(f as *const i32 as *const f32) };
unsafe { _mm_castps_si128(_mm_broadcast_ss(float_ref)) }
}
#[repr(align(32), C)]
#[derive(Debug)]
pub(crate) struct AvxAlignedU16(pub(crate) [u16; 16]);
#[inline]
pub(crate) fn split_by_twos<T: Copy>(data: &[T], channels: usize) -> (&[T], &[T]) {
let len = data.len() / (channels * 4);
let split_point = len * 4;
data.split_at(split_point * channels)
}
#[inline]
pub(crate) fn split_by_twos_mut<T: Copy>(data: &mut [T], channels: usize) -> (&mut [T], &mut [T]) {
let len = data.len() / (channels * 4);
let split_point = len * 4;
data.split_at_mut(split_point * channels)
}
impl<
T: Copy + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformShaperRgbQ2_13OptAvx512<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "avx512bw", enable = "avx512vl")]
unsafe fn transform_avx512(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors = ((1 << self.bit_depth) - 1).as_();
// If precision changed in another place it should be also changed here
assert_eq!(PRECISION, 13);
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
let mut temporary0 = AvxAlignedU16([0; 16]);
let mut temporary1 = AvxAlignedU16([0; 16]);
unsafe {
let m0 = _mm256_set_epi16(
0, 0, t.v[1][2], t.v[0][2], t.v[1][1], t.v[0][1], t.v[1][0], t.v[0][0], 0, 0,
t.v[1][2], t.v[0][2], t.v[1][1], t.v[0][1], t.v[1][0], t.v[0][0],
);
let m2 = _mm256_set_epi16(
0, 0, 1, t.v[2][2], 1, t.v[2][1], 1, t.v[2][0], 0, 0, 1, t.v[2][2], 1, t.v[2][1],
1, t.v[2][0],
);
let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes();
let rnd = _mm256_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]]));
let zeros = _mm256_setzero_si256();
let v_max_value = _mm256_set1_epi32(GAMMA_LUT as i32 - 1);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
r0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()],
);
r2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]);
g2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]);
b2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]);
r3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()],
);
g3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()],
);
b3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()],
);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = _mm_setzero_si128();
g0 = _mm_setzero_si128();
b0 = _mm_setzero_si128();
a0 = max_colors;
r1 = _mm_setzero_si128();
g1 = _mm_setzero_si128();
b1 = _mm_setzero_si128();
a1 = max_colors;
r2 = _mm_setzero_si128();
g2 = _mm_setzero_si128();
b2 = _mm_setzero_si128();
a2 = max_colors;
r3 = _mm_setzero_si128();
g3 = _mm_setzero_si128();
b3 = _mm_setzero_si128();
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zr1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r2), r3);
let mut zg1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g2), g3);
let zb1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b2), b3);
zg1 = _mm256_slli_epi32::<16>(zg1);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let zrg1 = _mm256_or_si256(zr1, zg1);
let zbz1 = _mm256_or_si256(zb1, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let va2 = _mm256_madd_epi16(zrg1, m0);
let va3 = _mm256_madd_epi16(zbz1, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
let mut v1 = _mm256_add_epi32(va2, va3);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
v1 = _mm256_srai_epi32::<PRECISION>(v1);
v1 = _mm256_max_epi32(v1, zeros);
v1 = _mm256_min_epi32(v1, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
_mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, v1);
r0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.r_i()]._as_usize()]);
g0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.g_i()]._as_usize()]);
b0 = _xmm_broadcast_epi32(&self.profile.linear[src0[src_cn.b_i()]._as_usize()]);
r1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()],
);
g1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()],
);
b1 = _xmm_broadcast_epi32(
&self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()],
);
r2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.r_i()]._as_usize()]);
g2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.g_i()]._as_usize()]);
b2 = _xmm_broadcast_epi32(&self.profile.linear[src1[src_cn.b_i()]._as_usize()]);
r3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()],
);
g3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()],
);
b3 = _xmm_broadcast_epi32(
&self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()],
);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary0.0[8] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary0.0[10] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[8] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[10] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[12] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let zr0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r0), r1);
let mut zg0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g0), g1);
let zb0 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b0), b1);
zg0 = _mm256_slli_epi32::<16>(zg0);
let zr1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(r2), r3);
let mut zg1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(g2), g3);
let zb1 = _mm256_inserti128_si256::<1>(_mm256_castsi128_si256(b2), b3);
zg1 = _mm256_slli_epi32::<16>(zg1);
let zrg0 = _mm256_or_si256(zr0, zg0);
let zbz0 = _mm256_or_si256(zb0, rnd);
let zrg1 = _mm256_or_si256(zr1, zg1);
let zbz1 = _mm256_or_si256(zb1, rnd);
let va0 = _mm256_madd_epi16(zrg0, m0);
let va1 = _mm256_madd_epi16(zbz0, m2);
let va2 = _mm256_madd_epi16(zrg1, m0);
let va3 = _mm256_madd_epi16(zbz1, m2);
let mut v0 = _mm256_add_epi32(va0, va1);
let mut v1 = _mm256_add_epi32(va2, va3);
v0 = _mm256_srai_epi32::<PRECISION>(v0);
v0 = _mm256_max_epi32(v0, zeros);
v0 = _mm256_min_epi32(v0, v_max_value);
v1 = _mm256_srai_epi32::<PRECISION>(v1);
v1 = _mm256_max_epi32(v1, zeros);
v1 = _mm256_min_epi32(v1, v_max_value);
_mm256_store_si256(temporary0.0.as_mut_ptr() as *mut _, v0);
_mm256_store_si256(temporary1.0.as_mut_ptr() as *mut _, v1);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary0.0[8] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary0.0[10] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary0.0[12] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary1.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary1.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[8] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[10] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[12] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let r = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.r_i()]._as_usize()]);
let mut g =
_xmm_broadcast_epi32(&self.profile.linear[src[src_cn.g_i()]._as_usize()]);
let b = _xmm_broadcast_epi32(&self.profile.linear[src[src_cn.b_i()]._as_usize()]);
g = _mm_slli_epi32::<16>(g);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let zrg0 = _mm_or_si128(r, g);
let zbz0 = _mm_or_si128(b, _mm256_castsi256_si128(rnd));
let v0 = _mm_madd_epi16(zrg0, _mm256_castsi256_si128(m0));
let v1 = _mm_madd_epi16(zbz0, _mm256_castsi256_si128(m2));
let mut v = _mm_add_epi32(v0, v1);
v = _mm_srai_epi32::<PRECISION>(v);
v = _mm_max_epi32(v, _mm_setzero_si128());
v = _mm_min_epi32(v, _mm256_castsi256_si128(v_max_value));
_mm_store_si128(temporary0.0.as_mut_ptr() as *mut _, v);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperRgbQ2_13OptAvx512<
T,
SRC_LAYOUT,
DST_LAYOUT,
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_avx512(src, dst) }
}
}

121
vendor/moxcms/src/conversions/bpc.rs vendored Normal file
View File

@@ -0,0 +1,121 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// use crate::conversions::interpolator::{MultidimensionalInterpolation, Tetrahedral};
// use crate::conversions::transform_lut4_to_4::{NonFiniteVector3fLerp, Vector3fCmykLerp};
// use crate::mlaf::mlaf;
// use crate::{Chromaticity, ColorProfile, DataColorSpace, Lab, Xyz};
//
// impl ColorProfile {
// #[inline]
// pub(crate) fn detect_black_point<const GRID_SIZE: usize>(&self, lut: &[f32]) -> Option<Xyz> {
// if self.color_space == DataColorSpace::Cmyk {
// // if let Some(mut bp) = self.black_point {
// // if let Some(wp) = self.media_white_point.map(|x| x.normalize()) {
// // if wp != Chromaticity::D50.to_xyz() {
// // let ad = adaption_matrix(wp, Chromaticity::D50.to_xyz());
// // let v = ad.mul_vector(bp.to_vector());
// // bp = Xyz {
// // x: v.v[0],
// // y: v.v[1],
// // z: v.v[2],
// // };
// // }
// // }
// // let mut lab = Lab::from_xyz(bp);
// // lab.a = 0.;
// // lab.b = 0.;
// // if lab.l > 50. {
// // lab.l = 50.;
// // }
// // bp = lab.to_xyz();
// // return Some(bp);
// // }
// let c = 65535;
// let m = 65535;
// let y = 65535;
// let k = 65535;
//
// let linear_k: f32 = k as f32 * (1. / 65535.);
// let w: i32 = k * (GRID_SIZE as i32 - 1) / 65535;
// let w_n: i32 = (w + 1).min(GRID_SIZE as i32 - 1);
// let t: f32 = linear_k * (GRID_SIZE as i32 - 1) as f32 - w as f32;
//
// let grid_size = GRID_SIZE as i32;
// let grid_size3 = grid_size * grid_size * grid_size;
//
// let table1 = &lut[(w * grid_size3 * 3) as usize..];
// let table2 = &lut[(w_n * grid_size3 * 3) as usize..];
//
// let tetrahedral1 = Tetrahedral::<GRID_SIZE>::new(table1);
// let tetrahedral2 = Tetrahedral::<GRID_SIZE>::new(table2);
// let r1 = tetrahedral1.inter3(c, m, y);
// let r2 = tetrahedral2.inter3(c, m, y);
// let r = NonFiniteVector3fLerp::interpolate(r1, r2, t, 1.0);
//
// let mut lab = Lab::from_xyz(Xyz {
// x: r.v[0],
// y: r.v[1],
// z: r.v[2],
// });
// lab.a = 0.;
// lab.b = 0.;
// if lab.l > 50. {
// lab.l = 50.;
// }
// let bp = lab.to_xyz();
//
// return Some(bp);
// }
// if self.color_space == DataColorSpace::Rgb {
// return Some(Xyz::new(0.0, 0.0, 0.0));
// }
// None
// }
// }
//
// pub(crate) fn compensate_bpc_in_lut(lut_xyz: &mut [f32], src_bp: Xyz, dst_bp: Xyz) {
// const WP_50: Xyz = Chromaticity::D50.to_xyz();
// let tx = src_bp.x - WP_50.x;
// let ty = src_bp.y - WP_50.y;
// let tz = src_bp.z - WP_50.z;
// let ax = (dst_bp.x - WP_50.x) / tx;
// let ay = (dst_bp.y - WP_50.y) / ty;
// let az = (dst_bp.z - WP_50.z) / tz;
//
// let bx = -WP_50.x * (dst_bp.x - src_bp.x) / tx;
// let by = -WP_50.y * (dst_bp.y - src_bp.y) / ty;
// let bz = -WP_50.z * (dst_bp.z - src_bp.z) / tz;
//
// for dst in lut_xyz.chunks_exact_mut(3) {
// dst[0] = mlaf(bx, dst[0], ax);
// dst[1] = mlaf(by, dst[1], ay);
// dst[2] = mlaf(bz, dst[2], az);
// }
// }

View File

@@ -0,0 +1,416 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[derive(Clone)]
struct TransformGray2RgbFusedExecutor<
T,
const SRC_LAYOUT: u8,
const DEST_LAYOUT: u8,
const BUCKET: usize,
> {
fused_gamma: Box<[T; BUCKET]>,
bit_depth: usize,
}
pub(crate) fn make_gray_to_x<
T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync,
const BUCKET: usize,
>(
src_layout: Layout,
dst_layout: Layout,
gray_linear: &[f32; BUCKET],
gray_gamma: &[T; 65536],
bit_depth: usize,
gamma_lut: usize,
) -> Result<Box<dyn TransformExecutor<T> + Sync + Send>, CmsError>
where
u32: AsPrimitive<T>,
{
if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha {
return Err(CmsError::UnsupportedProfileConnection);
}
let mut fused_gamma = Box::new([T::default(); BUCKET]);
let max_lut_size = (gamma_lut - 1) as f32;
for (&src, dst) in gray_linear.iter().zip(fused_gamma.iter_mut()) {
let possible_value = ((src * max_lut_size).round() as u32).min(max_lut_size as u32) as u16;
*dst = gray_gamma[possible_value as usize];
}
match src_layout {
Layout::Gray => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgb as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
_ => unreachable!(),
},
Layout::GrayAlpha => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGray2RgbFusedExecutor::<
T,
{ Layout::GrayAlpha as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
fused_gamma,
bit_depth,
})),
_ => unreachable!(),
},
_ => Err(CmsError::UnsupportedProfileConnection),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const BUCKET: usize,
> TransformExecutor<T> for TransformGray2RgbFusedExecutor<T, SRC_LAYOUT, DST_LAYOUT, BUCKET>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let is_gray_alpha = src_cn == Layout::GrayAlpha;
let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let g = self.fused_gamma[src[0]._as_usize()];
let a = if is_gray_alpha { src[1] } else { max_value };
dst[0] = g;
if dst_cn == Layout::GrayAlpha {
dst[1] = a;
} else if dst_cn == Layout::Rgb {
dst[1] = g;
dst[2] = g;
} else if dst_cn == Layout::Rgba {
dst[1] = g;
dst[2] = g;
dst[3] = a;
}
}
Ok(())
}
}
#[derive(Clone)]
struct TransformGrayToRgbExecutor<
T,
const SRC_LAYOUT: u8,
const DEST_LAYOUT: u8,
const BUCKET: usize,
> {
gray_linear: Box<[f32; BUCKET]>,
red_gamma: Box<[T; 65536]>,
green_gamma: Box<[T; 65536]>,
blue_gamma: Box<[T; 65536]>,
bit_depth: usize,
gamma_lut: usize,
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn make_gray_to_unfused<
T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync,
const BUCKET: usize,
>(
src_layout: Layout,
dst_layout: Layout,
gray_linear: Box<[f32; BUCKET]>,
red_gamma: Box<[T; 65536]>,
green_gamma: Box<[T; 65536]>,
blue_gamma: Box<[T; 65536]>,
bit_depth: usize,
gamma_lut: usize,
) -> Result<Box<dyn TransformExecutor<T> + Sync + Send>, CmsError>
where
u32: AsPrimitive<T>,
{
if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha {
return Err(CmsError::UnsupportedProfileConnection);
}
if dst_layout != Layout::Rgb && dst_layout != Layout::Rgba {
return Err(CmsError::UnsupportedProfileConnection);
}
match src_layout {
Layout::Gray => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgb as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::Rgba => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::Gray => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
_ => Err(CmsError::UnsupportedProfileConnection),
},
Layout::GrayAlpha => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::Rgba => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::Gray => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExecutor::<
T,
{ Layout::GrayAlpha as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
> {
gray_linear,
red_gamma,
green_gamma,
blue_gamma,
bit_depth,
gamma_lut,
})),
_ => Err(CmsError::UnsupportedProfileConnection),
},
_ => Err(CmsError::UnsupportedProfileConnection),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const BUCKET: usize,
> TransformExecutor<T> for TransformGrayToRgbExecutor<T, SRC_LAYOUT, DST_LAYOUT, BUCKET>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let is_gray_alpha = src_cn == Layout::GrayAlpha;
let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_();
let max_lut_size = (self.gamma_lut - 1) as f32;
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let g = self.gray_linear[src[0]._as_usize()];
let a = if is_gray_alpha { src[1] } else { max_value };
let possible_value = ((g * max_lut_size).round() as u16) as usize;
let red_value = self.red_gamma[possible_value];
let green_value = self.green_gamma[possible_value];
let blue_value = self.blue_gamma[possible_value];
if dst_cn == Layout::Rgb {
dst[0] = red_value;
dst[1] = green_value;
dst[2] = blue_value;
} else if dst_cn == Layout::Rgba {
dst[0] = red_value;
dst[1] = green_value;
dst[2] = blue_value;
dst[3] = a;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
}
Ok(())
}
}

View File

@@ -0,0 +1,383 @@
/*
* // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::transform::PointeeSizeExpressible;
use crate::trc::ToneCurveEvaluator;
use crate::{CmsError, Layout, Rgb, TransformExecutor};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
struct TransformGrayOneToOneExecutor<T, const SRC_LAYOUT: u8, const DEST_LAYOUT: u8> {
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
_phantom: PhantomData<T>,
bit_depth: usize,
}
pub(crate) fn make_gray_to_one_trc_extended<
T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync + AsPrimitive<f32>,
>(
src_layout: Layout,
dst_layout: Layout,
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
bit_depth: usize,
) -> Result<Box<dyn TransformExecutor<T> + Sync + Send>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha {
return Err(CmsError::UnsupportedProfileConnection);
}
match src_layout {
Layout::Gray => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgb as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
_ => unreachable!(),
},
Layout::GrayAlpha => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayOneToOneExecutor::<
T,
{ Layout::GrayAlpha as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
_ => unreachable!(),
},
_ => Err(CmsError::UnsupportedProfileConnection),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static + AsPrimitive<f32>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
> TransformExecutor<T> for TransformGrayOneToOneExecutor<T, SRC_LAYOUT, DST_LAYOUT>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let is_gray_alpha = src_cn == Layout::GrayAlpha;
let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let linear_value = self.linear_eval.evaluate_value(src[0].as_());
let g = self.gamma_eval.evaluate_value(linear_value).as_();
let a = if is_gray_alpha { src[1] } else { max_value };
dst[0] = g;
if dst_cn == Layout::GrayAlpha {
dst[1] = a;
} else if dst_cn == Layout::Rgb {
dst[1] = g;
dst[2] = g;
} else if dst_cn == Layout::Rgba {
dst[1] = g;
dst[2] = g;
dst[3] = a;
}
}
Ok(())
}
}
struct TransformGrayToRgbExtendedExecutor<T, const SRC_LAYOUT: u8, const DEST_LAYOUT: u8> {
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
_phantom: PhantomData<T>,
bit_depth: usize,
}
pub(crate) fn make_gray_to_rgb_extended<
T: Copy + Default + PointeeSizeExpressible + 'static + Send + Sync + AsPrimitive<f32>,
>(
src_layout: Layout,
dst_layout: Layout,
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
bit_depth: usize,
) -> Result<Box<dyn TransformExecutor<T> + Sync + Send>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
if src_layout != Layout::Gray && src_layout != Layout::GrayAlpha {
return Err(CmsError::UnsupportedProfileConnection);
}
if dst_layout != Layout::Rgb && dst_layout != Layout::Rgba {
return Err(CmsError::UnsupportedProfileConnection);
}
match src_layout {
Layout::Gray => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgb as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
_ => Err(CmsError::UnsupportedProfileConnection),
},
Layout::GrayAlpha => match dst_layout {
Layout::Rgb => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Rgba => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Rgba as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::Gray => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::Gray as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
Layout::GrayAlpha => Ok(Box::new(TransformGrayToRgbExtendedExecutor::<
T,
{ Layout::GrayAlpha as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
_phantom: PhantomData,
bit_depth,
})),
_ => Err(CmsError::UnsupportedProfileConnection),
},
_ => Err(CmsError::UnsupportedProfileConnection),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static + AsPrimitive<f32>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
> TransformExecutor<T> for TransformGrayToRgbExtendedExecutor<T, SRC_LAYOUT, DST_LAYOUT>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let is_gray_alpha = src_cn == Layout::GrayAlpha;
let max_value: T = ((1u32 << self.bit_depth as u32) - 1u32).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let linear_value = self.linear_eval.evaluate_value(src[0].as_());
let a = if is_gray_alpha { src[1] } else { max_value };
let tristimulus = self.gamma_eval.evaluate_tristimulus(Rgb::new(
linear_value,
linear_value,
linear_value,
));
let red_value = tristimulus.r.as_();
let green_value = tristimulus.g.as_();
let blue_value = tristimulus.b.as_();
if dst_cn == Layout::Rgb {
dst[0] = red_value;
dst[1] = green_value;
dst[2] = blue_value;
} else if dst_cn == Layout::Rgba {
dst[0] = red_value;
dst[1] = green_value;
dst[2] = blue_value;
dst[3] = a;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
}
Ok(())
}
}

View File

@@ -0,0 +1,645 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(dead_code)]
use crate::conversions::lut_transforms::LUT_SAMPLING;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use crate::{Vector3f, Vector4f};
use num_traits::AsPrimitive;
use std::ops::{Add, Mul, Sub};
#[cfg(feature = "options")]
pub(crate) struct Tetrahedral<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [f32],
}
#[cfg(feature = "options")]
pub(crate) struct Pyramidal<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [f32],
}
#[cfg(feature = "options")]
pub(crate) struct Prismatic<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [f32],
}
pub(crate) struct Trilinear<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [f32],
}
#[derive(Debug, Copy, Clone, Default)]
pub(crate) struct BarycentricWeight<V> {
pub x: i32,
pub x_n: i32,
pub w: V,
}
impl BarycentricWeight<f32> {
pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<f32>; 256]>
{
let mut weights = Box::new([BarycentricWeight::default(); 256]);
for (index, weight) in weights.iter_mut().enumerate() {
const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
let dr = index as f32 * scale - x as f32;
*weight = BarycentricWeight { x, x_n, w: dr };
}
weights
}
#[cfg(feature = "options")]
pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
-> Box<[BarycentricWeight<f32>; 65536]> {
let mut weights = Box::new([BarycentricWeight::<f32>::default(); 65536]);
let b_scale: f32 = 1.0 / (BINS - 1) as f32;
for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
let dr = index as f32 * scale - x as f32;
*weight = BarycentricWeight { x, x_n, w: dr };
}
weights
}
}
#[allow(dead_code)]
impl BarycentricWeight<i16> {
pub(crate) fn create_ranged_256<const GRID_SIZE: usize>() -> Box<[BarycentricWeight<i16>; 256]>
{
let mut weights = Box::new([BarycentricWeight::default(); 256]);
for (index, weight) in weights.iter_mut().enumerate() {
const SCALE: f32 = 1.0 / LUT_SAMPLING as f32;
let x: i32 = index as i32 * (GRID_SIZE as i32 - 1) / LUT_SAMPLING as i32;
let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
let scale = (GRID_SIZE as i32 - 1) as f32 * SCALE;
const Q: f32 = ((1i32 << 15) - 1) as f32;
let dr = ((index as f32 * scale - x as f32) * Q)
.round()
.min(i16::MAX as f32)
.max(-i16::MAX as f32) as i16;
*weight = BarycentricWeight { x, x_n, w: dr };
}
weights
}
#[cfg(feature = "options")]
pub(crate) fn create_binned<const GRID_SIZE: usize, const BINS: usize>()
-> Box<[BarycentricWeight<i16>; 65536]> {
let mut weights = Box::new([BarycentricWeight::<i16>::default(); 65536]);
let b_scale: f32 = 1.0 / (BINS - 1) as f32;
for (index, weight) in weights.iter_mut().enumerate().take(BINS) {
let x: i32 = (index as f32 * (GRID_SIZE as i32 - 1) as f32 * b_scale).floor() as i32;
let x_n: i32 = (x + 1).min(GRID_SIZE as i32 - 1);
let scale = (GRID_SIZE as i32 - 1) as f32 * b_scale;
const Q: f32 = ((1i32 << 15) - 1) as f32;
let dr = ((index as f32 * scale - x as f32) * Q)
.round()
.min(i16::MAX as f32)
.max(-i16::MAX as f32) as i16;
*weight = BarycentricWeight { x, x_n, w: dr };
}
weights
}
}
trait Fetcher<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
struct TetrahedralFetchVector3f<'a, const GRID_SIZE: usize> {
cube: &'a [f32],
}
pub(crate) trait MultidimensionalInterpolation<'a, const GRID_SIZE: usize> {
fn new(table: &'a [f32]) -> Self;
fn inter3<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> Vector3f;
fn inter4<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> Vector4f;
}
impl<const GRID_SIZE: usize> Fetcher<Vector3f> for TetrahedralFetchVector3f<'_, GRID_SIZE> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> Vector3f {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize
* 3;
let jx = &self.cube[offset..offset + 3];
Vector3f {
v: [jx[0], jx[1], jx[2]],
}
}
}
struct TetrahedralFetchVector4f<'a, const GRID_SIZE: usize> {
cube: &'a [f32],
}
impl<const GRID_SIZE: usize> Fetcher<Vector4f> for TetrahedralFetchVector4f<'_, GRID_SIZE> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> Vector4f {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize
* 4;
let jx = &self.cube[offset..offset + 4];
Vector4f {
v: [jx[0], jx[1], jx[2], jx[3]],
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> Tetrahedral<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<
T: Copy
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Mul<f32, Output = T>
+ Add<T, Output = T>
+ From<f32>
+ FusedMultiplyAdd<T>,
U: AsPrimitive<usize>,
const BINS: usize,
>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<T>,
) -> T {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
s1.mla(c3, T::from(rz))
}
}
macro_rules! define_md_inter {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> MultidimensionalInterpolation<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
fn new(table: &'a [f32]) -> Self {
Self { cube: table }
}
fn inter3<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> Vector3f {
self.interpolate::<Vector3f, U, BINS>(
in_r,
in_g,
in_b,
lut,
TetrahedralFetchVector3f::<GRID_SIZE> { cube: self.cube },
)
}
fn inter4<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> Vector4f {
self.interpolate::<Vector4f, U, BINS>(
in_r,
in_g,
in_b,
lut,
TetrahedralFetchVector4f::<GRID_SIZE> { cube: self.cube },
)
}
}
};
}
#[cfg(feature = "options")]
define_md_inter!(Tetrahedral);
#[cfg(feature = "options")]
define_md_inter!(Pyramidal);
#[cfg(feature = "options")]
define_md_inter!(Prismatic);
define_md_inter!(Trilinear);
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> Pyramidal<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<
T: Copy
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Mul<f32, Output = T>
+ Add<T, Output = T>
+ From<f32>
+ FusedMultiplyAdd<T>,
U: AsPrimitive<usize>,
const BINS: usize,
>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<T>,
) -> T {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> Prismatic<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<
T: Copy
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Mul<f32, Output = T>
+ Add<T, Output = T>
+ From<f32>
+ FusedMultiplyAdd<T>,
U: AsPrimitive<usize>,
const BINS: usize,
>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<T>,
) -> T {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if db >= dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x, y_n, z_n);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x_n, y_n, z);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
}
}
}
impl<const GRID_SIZE: usize> Trilinear<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<
T: Copy
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Mul<f32, Output = T>
+ Add<T, Output = T>
+ From<f32>
+ FusedMultiplyAdd<T>
+ FusedMultiplyNegAdd<T>,
U: AsPrimitive<usize>,
const BINS: usize,
>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<T>,
) -> T {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = T::from(dr);
let w1 = T::from(dg);
let w2 = T::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = T::from(dr);
let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
let dy = T::from(dg);
let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
let dz = T::from(db);
c0.neg_mla(c0, dz).mla(c1, w2)
}
}
pub(crate) trait LutBarycentricReduction<T, U> {
fn reduce<const SRC_BP: usize, const BINS: usize>(v: T) -> U;
}
impl LutBarycentricReduction<u8, u8> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: u8) -> u8 {
v
}
}
impl LutBarycentricReduction<u8, u16> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: u8) -> u16 {
if BINS == 65536 {
return u16::from_ne_bytes([v, v]);
}
if BINS == 16384 {
return u16::from_ne_bytes([v, v]) >> 2;
}
unimplemented!()
}
}
impl LutBarycentricReduction<f32, u8> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: f32) -> u8 {
(v * 255.).round().min(255.).max(0.) as u8
}
}
impl LutBarycentricReduction<f32, u16> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: f32) -> u16 {
let scale = (BINS - 1) as f32;
(v * scale).round().min(scale).max(0.) as u16
}
}
impl LutBarycentricReduction<f64, u8> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: f64) -> u8 {
(v * 255.).round().min(255.).max(0.) as u8
}
}
impl LutBarycentricReduction<f64, u16> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: f64) -> u16 {
let scale = (BINS - 1) as f64;
(v * scale).round().min(scale).max(0.) as u16
}
}
impl LutBarycentricReduction<u16, u16> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: u16) -> u16 {
let src_scale = 1. / ((1 << SRC_BP) - 1) as f32;
let scale = src_scale * (BINS - 1) as f32;
(v as f32 * scale).round().min(scale).max(0.) as u16
}
}
impl LutBarycentricReduction<u16, u8> for () {
#[inline(always)]
fn reduce<const SRC_BP: usize, const BINS: usize>(v: u16) -> u8 {
let shift = SRC_BP as u16 - 8;
if SRC_BP == 16 {
(v >> 8) as u8
} else {
(v >> shift).min(255) as u8
}
}
}

View File

@@ -0,0 +1,118 @@
/*
* // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaPostFinalizationStage;
use crate::{CmsError, DataColorSpace, Layout, PointeeSizeExpressible};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) struct InjectAlphaStage<I> {
pub(crate) dst_layout: Layout,
pub(crate) target_color_space: DataColorSpace,
pub(crate) _phantom: PhantomData<I>,
pub(crate) bit_depth: usize,
}
pub(crate) struct CopyAlphaStage<I> {
pub(crate) src_layout: Layout,
pub(crate) dst_layout: Layout,
pub(crate) target_color_space: DataColorSpace,
pub(crate) _phantom: PhantomData<I>,
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaPostFinalizationStage<T> for InjectAlphaStage<T>
where
f32: AsPrimitive<T>,
{
fn finalize(&self, _: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let norm_value: T = (if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
})
.as_();
if self.dst_layout == Layout::Rgba && self.target_color_space == DataColorSpace::Rgb {
for dst in dst.chunks_exact_mut(self.dst_layout.channels()) {
dst[3] = norm_value;
}
} else if self.dst_layout == Layout::GrayAlpha
&& self.target_color_space == DataColorSpace::Gray
{
for dst in dst.chunks_exact_mut(self.dst_layout.channels()) {
dst[1] = norm_value;
}
}
Ok(())
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaPostFinalizationStage<T> for CopyAlphaStage<T>
where
f32: AsPrimitive<T>,
{
fn finalize(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
if self.dst_layout == Layout::Rgba && self.target_color_space == DataColorSpace::Rgb {
if self.src_layout == Layout::Rgba {
for (src, dst) in src
.chunks_exact(self.src_layout.channels())
.zip(dst.chunks_exact_mut(self.dst_layout.channels()))
{
dst[3] = src[3];
}
} else if self.src_layout == Layout::GrayAlpha {
for (src, dst) in src
.chunks_exact(self.src_layout.channels())
.zip(dst.chunks_exact_mut(self.dst_layout.channels()))
{
dst[3] = src[1];
}
}
} else if self.dst_layout == Layout::GrayAlpha
&& self.target_color_space == DataColorSpace::Gray
{
if self.src_layout == Layout::Rgba {
for (src, dst) in src
.chunks_exact(self.src_layout.channels())
.zip(dst.chunks_exact_mut(self.dst_layout.channels()))
{
dst[1] = src[3];
}
} else if self.src_layout == Layout::GrayAlpha {
for (src, dst) in src
.chunks_exact(self.src_layout.channels())
.zip(dst.chunks_exact_mut(self.dst_layout.channels()))
{
dst[1] = src[1];
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,483 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::{KatanaFinalStage, KatanaInitialStage};
use crate::mlaf::mlaf;
use crate::safe_math::SafeMul;
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, Cube, DataColorSpace, InterpolationMethod, LutMultidimensionalType, MalformedSize,
Matrix3d, Matrix3f, PointeeSizeExpressible, TransformOptions, Vector3d, Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
pub(crate) enum MultidimensionalDirection {
DeviceToPcs,
PcsToDevice,
}
struct Multidimensional3x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
> {
a_curves: Option<Box<[Vec<f32>; 3]>>,
m_curves: Option<Box<[Vec<f32>; 3]>>,
b_curves: Option<Box<[Vec<f32>; 3]>>,
clut: Option<Vec<f32>>,
matrix: Matrix3f,
bias: Vector3f,
direction: MultidimensionalDirection,
options: TransformOptions,
pcs: DataColorSpace,
grid_size: [u8; 3],
_phantom: PhantomData<T>,
bit_depth: usize,
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
Multidimensional3x3<T>
{
fn execute_matrix_stage(&self, dst: &mut [f32]) {
let m = self.matrix;
let b = self.bias;
if !m.test_equality(Matrix3f::IDENTITY) || !b.eq(&Vector3f::default()) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(mlaf(b.v[0], x, m.v[0][0]), y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(mlaf(b.v[1], x, m.v[1][0]), y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(mlaf(b.v[2], x, m.v[2][0]), y, m.v[2][1]), z, m.v[2][2]);
}
}
}
fn execute_simple_curves(&self, dst: &mut [f32], curves: &[Vec<f32>; 3]) {
let curve0 = &curves[0];
let curve1 = &curves[1];
let curve2 = &curves[2];
for dst in dst.chunks_exact_mut(3) {
let a0 = dst[0];
let a1 = dst[1];
let a2 = dst[2];
let b0 = lut_interp_linear_float(a0, curve0);
let b1 = lut_interp_linear_float(a1, curve1);
let b2 = lut_interp_linear_float(a2, curve2);
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
}
fn to_pcs_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
input: &[T],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let norm_value = if T::FINITE {
1.0 / ((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
assert_eq!(
self.direction,
MultidimensionalDirection::DeviceToPcs,
"PCS to device cannot be used on `to pcs` stage"
);
// A -> B
// OR B - A A - curves stage
if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) {
if !clut.is_empty() {
let curve0 = &a_curves[0];
let curve1 = &a_curves[1];
let curve2 = &a_curves[2];
for (src, dst) in input.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
let b0 = lut_interp_linear_float(src[0].as_() * norm_value, curve0);
let b1 = lut_interp_linear_float(src[1].as_() * norm_value, curve1);
let b2 = lut_interp_linear_float(src[2].as_() * norm_value, curve2);
let interpolated = fetch(b0, b1, b2);
dst[0] = interpolated.v[0];
dst[1] = interpolated.v[1];
dst[2] = interpolated.v[2];
}
} else {
for (src, dst) in input.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
dst[0] = src[0].as_() * norm_value;
dst[1] = src[1].as_() * norm_value;
dst[2] = src[2].as_() * norm_value;
}
}
} else {
for (src, dst) in input.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
dst[0] = src[0].as_() * norm_value;
dst[1] = src[1].as_() * norm_value;
dst[2] = src[2].as_() * norm_value;
}
}
// Matrix stage
if let Some(m_curves) = self.m_curves.as_ref() {
self.execute_simple_curves(dst, m_curves);
self.execute_matrix_stage(dst);
}
// B-curves is mandatory
if let Some(b_curves) = &self.b_curves.as_ref() {
self.execute_simple_curves(dst, b_curves);
}
Ok(())
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaInitialStage<f32, T> for Multidimensional3x3<T>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let fixed_new_clut = Vec::new();
let new_clut = self.clut.as_ref().unwrap_or(&fixed_new_clut);
let lut = Cube::new_cube(new_clut, self.grid_size);
let mut new_dst = vec![0f32; input.len()];
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
return Ok(new_dst);
}
match self.options.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(new_dst)
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
Multidimensional3x3<T>
where
f32: AsPrimitive<T>,
{
fn to_output_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
src: &mut [f32],
dst: &mut [T],
fetch: Fetch,
) -> Result<(), CmsError> {
let norm_value = if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
assert_eq!(
self.direction,
MultidimensionalDirection::PcsToDevice,
"Device to PCS cannot be used on `to output` stage"
);
if let Some(b_curves) = &self.b_curves.as_ref() {
self.execute_simple_curves(src, b_curves);
}
// Matrix stage
if let Some(m_curves) = self.m_curves.as_ref() {
self.execute_matrix_stage(src);
self.execute_simple_curves(src, m_curves);
}
if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) {
if !clut.is_empty() {
let curve0 = &a_curves[0];
let curve1 = &a_curves[1];
let curve2 = &a_curves[2];
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
let b0 = lut_interp_linear_float(src[0], curve0);
let b1 = lut_interp_linear_float(src[1], curve1);
let b2 = lut_interp_linear_float(src[2], curve2);
let interpolated = fetch(b0, b1, b2);
if T::FINITE {
dst[0] = (interpolated.v[0] * norm_value)
.round()
.max(0.0)
.min(norm_value)
.as_();
dst[1] = (interpolated.v[1] * norm_value)
.round()
.max(0.0)
.min(norm_value)
.as_();
dst[2] = (interpolated.v[2] * norm_value)
.round()
.max(0.0)
.min(norm_value)
.as_();
} else {
dst[0] = interpolated.v[0].as_();
dst[1] = interpolated.v[1].as_();
dst[2] = interpolated.v[2].as_();
}
}
} else {
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
if T::FINITE {
dst[0] = (src[0] * norm_value).round().max(0.0).min(norm_value).as_();
dst[1] = (src[1] * norm_value).round().max(0.0).min(norm_value).as_();
dst[2] = (src[2] * norm_value).round().max(0.0).min(norm_value).as_();
} else {
dst[0] = src[0].as_();
dst[1] = src[1].as_();
dst[2] = src[2].as_();
}
}
}
} else {
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
if T::FINITE {
dst[0] = (src[0] * norm_value).round().max(0.0).min(norm_value).as_();
dst[1] = (src[1] * norm_value).round().max(0.0).min(norm_value).as_();
dst[2] = (src[2] * norm_value).round().max(0.0).min(norm_value).as_();
} else {
dst[0] = src[0].as_();
dst[1] = src[1].as_();
dst[2] = src[2].as_();
}
}
}
Ok(())
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaFinalStage<f32, T> for Multidimensional3x3<T>
where
f32: AsPrimitive<T>,
{
fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if src.len() != dst.len() {
return Err(CmsError::LaneSizeMismatch);
}
let fixed_new_clut = Vec::new();
let new_clut = self.clut.as_ref().unwrap_or(&fixed_new_clut);
let lut = Cube::new_cube(new_clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.to_output_impl(src, dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.options.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_output_impl(src, dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_output_impl(src, dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_output_impl(src, dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.to_output_impl(src, dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
fn make_multidimensional_3x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
direction: MultidimensionalDirection,
bit_depth: usize,
) -> Result<Multidimensional3x3<T>, CmsError> {
if mab.num_input_channels != 3 && mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.b_curves.is_empty() || mab.b_curves.len() != 3 {
return Err(CmsError::InvalidAtoBLut);
}
let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]];
let clut: Option<Vec<f32>> = if mab.a_curves.len() == 3 && mab.clut.is_some() {
let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
Some(clut)
} else {
None
};
let a_curves: Option<Box<[Vec<f32>; 3]>> = if mab.a_curves.len() == 3 && mab.clut.is_some() {
let mut arr = Box::<[Vec<f32>; 3]>::default();
for (a_curve, dst) in mab.a_curves.iter().zip(arr.iter_mut()) {
*dst = a_curve.to_clut()?;
}
Some(arr)
} else {
None
};
let b_curves: Option<Box<[Vec<f32>; 3]>> = if mab.b_curves.len() == 3 {
let mut arr = Box::<[Vec<f32>; 3]>::default();
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if all_curves_linear {
None
} else {
for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) {
*dst = c_curve.to_clut()?;
}
Some(arr)
}
} else {
return Err(CmsError::InvalidAtoBLut);
};
let matrix = mab.matrix.to_f32();
let m_curves: Option<Box<[Vec<f32>; 3]>> = if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let mut arr = Box::<[Vec<f32>; 3]>::default();
for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) {
*dst = curve.to_clut()?;
}
Some(arr)
} else {
None
}
} else {
None
};
let bias = mab.bias.cast();
let transform = Multidimensional3x3::<T> {
a_curves,
b_curves,
m_curves,
matrix,
direction,
options,
clut,
pcs,
grid_size,
bias,
_phantom: PhantomData,
bit_depth,
};
Ok(transform)
}
pub(crate) fn multi_dimensional_3x3_to_pcs<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
let transform = make_multidimensional_3x3::<T>(
mab,
options,
pcs,
MultidimensionalDirection::DeviceToPcs,
bit_depth,
)?;
Ok(Box::new(transform))
}
pub(crate) fn multi_dimensional_3x3_to_device<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaFinalStage<f32, T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
{
let transform = make_multidimensional_3x3::<T>(
mab,
options,
pcs,
MultidimensionalDirection::PcsToDevice,
bit_depth,
)?;
Ok(Box::new(transform))
}

View File

@@ -0,0 +1,321 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaInitialStage;
use crate::conversions::katana::md3x3::MultidimensionalDirection;
use crate::mlaf::mlaf;
use crate::safe_math::SafeMul;
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, DataColorSpace, Hypercube, InterpolationMethod, LutMultidimensionalType,
MalformedSize, Matrix3d, Matrix3f, PointeeSizeExpressible, TransformOptions, Vector3d,
Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) fn execute_simple_curves3(dst: &mut [f32], curves: &[Vec<f32>; 3]) {
let curve0 = &curves[0];
let curve1 = &curves[1];
let curve2 = &curves[2];
for dst in dst.chunks_exact_mut(3) {
let a0 = dst[0];
let a1 = dst[1];
let a2 = dst[2];
let b0 = lut_interp_linear_float(a0, curve0);
let b1 = lut_interp_linear_float(a1, curve1);
let b2 = lut_interp_linear_float(a2, curve2);
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
}
pub(crate) fn execute_matrix_stage3(matrix: Matrix3f, bias: Vector3f, dst: &mut [f32]) {
let m = matrix;
let b = bias;
if !m.test_equality(Matrix3f::IDENTITY) || !b.eq(&Vector3f::default()) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(mlaf(b.v[0], x, m.v[0][0]), y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(mlaf(b.v[1], x, m.v[1][0]), y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(mlaf(b.v[2], x, m.v[2][0]), y, m.v[2][1]), z, m.v[2][2]);
}
}
}
struct Multidimensional4x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
> {
a_curves: Option<Box<[Vec<f32>; 4]>>,
m_curves: Option<Box<[Vec<f32>; 3]>>,
b_curves: Option<Box<[Vec<f32>; 3]>>,
clut: Option<Vec<f32>>,
matrix: Matrix3f,
bias: Vector3f,
direction: MultidimensionalDirection,
options: TransformOptions,
pcs: DataColorSpace,
grid_size: [u8; 4],
_phantom: PhantomData<T>,
bit_depth: usize,
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
Multidimensional4x3<T>
{
fn to_pcs_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
&self,
input: &[T],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let norm_value = if T::FINITE {
1.0 / ((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
assert_eq!(
self.direction,
MultidimensionalDirection::DeviceToPcs,
"PCS to device cannot be used on `to pcs` stage"
);
// A -> B
// OR B - A A - curves stage
if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) {
if !clut.is_empty() {
let curve0 = &a_curves[0];
let curve1 = &a_curves[1];
let curve2 = &a_curves[2];
let curve3 = &a_curves[3];
for (src, dst) in input.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let b0 = lut_interp_linear_float(src[0].as_() * norm_value, curve0);
let b1 = lut_interp_linear_float(src[1].as_() * norm_value, curve1);
let b2 = lut_interp_linear_float(src[2].as_() * norm_value, curve2);
let b3 = lut_interp_linear_float(src[3].as_() * norm_value, curve3);
let interpolated = fetch(b0, b1, b2, b3);
dst[0] = interpolated.v[0];
dst[1] = interpolated.v[1];
dst[2] = interpolated.v[2];
}
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
// Matrix stage
if let Some(m_curves) = self.m_curves.as_ref() {
execute_simple_curves3(dst, m_curves);
execute_matrix_stage3(self.matrix, self.bias, dst);
}
// B-curves is mandatory
if let Some(b_curves) = &self.b_curves.as_ref() {
execute_simple_curves3(dst, b_curves);
}
Ok(())
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaInitialStage<f32, T> for Multidimensional4x3<T>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let fixed_new_clut = Vec::new();
let new_clut = self.clut.as_ref().unwrap_or(&fixed_new_clut);
let lut = Hypercube::new_hypercube(new_clut, self.grid_size);
let mut new_dst = vec![0f32; (input.len() / 4) * 3];
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| {
lut.quadlinear_vec3(x, y, z, w)
})?;
return Ok(new_dst);
}
match self.options.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| {
lut.pyramid_vec3(x, y, z, w)
})?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.to_pcs_impl(input, &mut new_dst, |x, y, z, w| {
lut.quadlinear_vec3(x, y, z, w)
})?;
}
}
Ok(new_dst)
}
}
fn make_multidimensional_4x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
direction: MultidimensionalDirection,
bit_depth: usize,
) -> Result<Multidimensional4x3<T>, CmsError> {
if mab.num_input_channels != 4 && mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.b_curves.is_empty() || mab.b_curves.len() != 3 {
return Err(CmsError::InvalidAtoBLut);
}
let grid_size = [
mab.grid_points[0],
mab.grid_points[1],
mab.grid_points[2],
mab.grid_points[3],
];
let clut: Option<Vec<f32>> = if mab.a_curves.len() == 4 && mab.clut.is_some() {
let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.grid_points[3] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
Some(clut)
} else {
return Err(CmsError::InvalidAtoBLut);
};
let a_curves: Option<Box<[Vec<f32>; 4]>> = if mab.a_curves.len() == 4 && mab.clut.is_some() {
let mut arr = Box::<[Vec<f32>; 4]>::default();
for (a_curve, dst) in mab.a_curves.iter().zip(arr.iter_mut()) {
*dst = a_curve.to_clut()?;
}
Some(arr)
} else {
None
};
let b_curves: Option<Box<[Vec<f32>; 3]>> = if mab.b_curves.len() == 3 {
let mut arr = Box::<[Vec<f32>; 3]>::default();
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if all_curves_linear {
None
} else {
for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) {
*dst = c_curve.to_clut()?;
}
Some(arr)
}
} else {
return Err(CmsError::InvalidAtoBLut);
};
let matrix = mab.matrix.to_f32();
let m_curves: Option<Box<[Vec<f32>; 3]>> = if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let mut arr = Box::<[Vec<f32>; 3]>::default();
for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) {
*dst = curve.to_clut()?;
}
Some(arr)
} else {
None
}
} else {
None
};
let bias = mab.bias.cast();
let transform = Multidimensional4x3::<T> {
a_curves,
b_curves,
m_curves,
matrix,
direction,
options,
clut,
pcs,
grid_size,
bias,
_phantom: PhantomData,
bit_depth,
};
Ok(transform)
}
pub(crate) fn multi_dimensional_4x3_to_pcs<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
let transform = make_multidimensional_4x3::<T>(
mab,
options,
pcs,
MultidimensionalDirection::DeviceToPcs,
bit_depth,
)?;
Ok(Box::new(transform))
}

View File

@@ -0,0 +1,284 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaFinalStage;
use crate::conversions::katana::md3x3::MultidimensionalDirection;
use crate::conversions::katana::md4x3::{execute_matrix_stage3, execute_simple_curves3};
use crate::conversions::md_lut::{MultidimensionalLut, tetra_3i_to_any_vec};
use crate::safe_math::SafeMul;
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, DataColorSpace, Layout, LutMultidimensionalType, MalformedSize, Matrix3d, Matrix3f,
PointeeSizeExpressible, TransformOptions, Vector3d, Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
struct Multidimensional3xN<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
> {
a_curves: Option<Vec<Vec<f32>>>,
m_curves: Option<Box<[Vec<f32>; 3]>>,
b_curves: Option<Box<[Vec<f32>; 3]>>,
clut: Option<Vec<f32>>,
matrix: Matrix3f,
bias: Vector3f,
direction: MultidimensionalDirection,
grid_size: [u8; 16],
output_inks: usize,
_phantom: PhantomData<T>,
dst_layout: Layout,
bit_depth: usize,
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
Multidimensional3xN<T>
where
f32: AsPrimitive<T>,
{
fn to_output_impl(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
let norm_value = if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
assert_eq!(
self.direction,
MultidimensionalDirection::PcsToDevice,
"PCS to device cannot be used on `to pcs` stage"
);
// B-curves is mandatory
if let Some(b_curves) = &self.b_curves.as_ref() {
execute_simple_curves3(src, b_curves);
}
// Matrix stage
if let Some(m_curves) = self.m_curves.as_ref() {
execute_matrix_stage3(self.matrix, self.bias, src);
execute_simple_curves3(src, m_curves);
}
if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) {
let mut inks = vec![0.; self.output_inks];
if clut.is_empty() {
return Err(CmsError::InvalidAtoBLut);
}
let md_lut = MultidimensionalLut::new(self.grid_size, 3, self.output_inks);
for (src, dst) in src
.chunks_exact(3)
.zip(dst.chunks_exact_mut(self.dst_layout.channels()))
{
tetra_3i_to_any_vec(
&md_lut,
clut,
src[0],
src[1],
src[2],
&mut inks,
self.output_inks,
);
for (ink, curve) in inks.iter_mut().zip(a_curves.iter()) {
*ink = lut_interp_linear_float(*ink, curve);
}
if T::FINITE {
for (dst, ink) in dst.iter_mut().zip(inks.iter()) {
*dst = (*ink * norm_value).round().max(0.).min(norm_value).as_();
}
} else {
for (dst, ink) in dst.iter_mut().zip(inks.iter()) {
*dst = (*ink * norm_value).as_();
}
}
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
Ok(())
}
}
impl<T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync>
KatanaFinalStage<f32, T> for Multidimensional3xN<T>
where
f32: AsPrimitive<T>,
{
fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % self.output_inks != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
self.to_output_impl(src, dst)?;
Ok(())
}
}
fn make_multidimensional_nx3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
dst_layout: Layout,
mab: &LutMultidimensionalType,
_: TransformOptions,
pcs: DataColorSpace,
direction: MultidimensionalDirection,
bit_depth: usize,
) -> Result<Multidimensional3xN<T>, CmsError> {
let real_inks = if pcs == DataColorSpace::Rgb {
3
} else {
dst_layout.channels()
};
if mab.num_output_channels != real_inks as u8 {
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.b_curves.is_empty() || mab.b_curves.len() != 3 {
return Err(CmsError::InvalidAtoBLut);
}
let clut: Option<Vec<f32>> =
if mab.a_curves.len() == mab.num_output_channels as usize && mab.clut.is_some() {
let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let mut lut_grid = 1usize;
for grid in mab.grid_points.iter().take(mab.num_input_channels as usize) {
lut_grid = lut_grid.safe_mul(*grid as usize)?;
}
let lut_grid = lut_grid.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
Some(clut)
} else {
return Err(CmsError::InvalidAtoBLut);
};
let a_curves: Option<Vec<Vec<f32>>> =
if mab.a_curves.len() == mab.num_output_channels as usize && mab.clut.is_some() {
let mut arr = Vec::new();
for a_curve in mab.a_curves.iter() {
arr.push(a_curve.to_clut()?);
}
Some(arr)
} else {
None
};
let b_curves: Option<Box<[Vec<f32>; 3]>> = if mab.b_curves.len() == 3 {
let mut arr = Box::<[Vec<f32>; 3]>::default();
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if all_curves_linear {
None
} else {
for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) {
*dst = c_curve.to_clut()?;
}
Some(arr)
}
} else {
return Err(CmsError::InvalidAtoBLut);
};
let matrix = mab.matrix.to_f32();
let m_curves: Option<Box<[Vec<f32>; 3]>> = if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let mut arr = Box::<[Vec<f32>; 3]>::default();
for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) {
*dst = curve.to_clut()?;
}
Some(arr)
} else {
None
}
} else {
None
};
let bias = mab.bias.cast();
let transform = Multidimensional3xN::<T> {
a_curves,
b_curves,
m_curves,
matrix,
direction,
clut,
grid_size: mab.grid_points,
bias,
dst_layout,
output_inks: real_inks,
_phantom: PhantomData,
bit_depth,
};
Ok(transform)
}
pub(crate) fn katana_multi_dimensional_3xn_to_device<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
dst_layout: Layout,
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaFinalStage<f32, T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
{
if mab.num_input_channels == 0 {
return Err(CmsError::UnsupportedProfileConnection);
}
let transform = make_multidimensional_nx3::<T>(
dst_layout,
mab,
options,
pcs,
MultidimensionalDirection::PcsToDevice,
bit_depth,
)?;
Ok(Box::new(transform))
}

View File

@@ -0,0 +1,296 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaInitialStage;
use crate::conversions::katana::md3x3::MultidimensionalDirection;
use crate::conversions::katana::md4x3::{execute_matrix_stage3, execute_simple_curves3};
use crate::conversions::md_lut::{
MultidimensionalLut, NVector, linear_1i_vec3f, linear_2i_vec3f_direct, linear_3i_vec3f_direct,
linear_4i_vec3f, linear_5i_vec3f, linear_6i_vec3f, linear_7i_vec3f, linear_8i_vec3f,
linear_9i_vec3f, linear_10i_vec3f, linear_11i_vec3f, linear_12i_vec3f, linear_13i_vec3f,
linear_14i_vec3f, linear_15i_vec3f,
};
use crate::safe_math::SafeMul;
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, DataColorSpace, Layout, LutMultidimensionalType, MalformedSize, Matrix3d, Matrix3f,
PointeeSizeExpressible, TransformOptions, Vector3d, Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
struct MultidimensionalNx3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
const BIT_DEPTH: usize,
> {
a_curves: Option<Vec<Vec<f32>>>,
m_curves: Option<Box<[Vec<f32>; 3]>>,
b_curves: Option<Box<[Vec<f32>; 3]>>,
clut: Option<Vec<f32>>,
matrix: Matrix3f,
bias: Vector3f,
direction: MultidimensionalDirection,
grid_size: [u8; 16],
input_inks: usize,
_phantom: PhantomData<T>,
}
#[inline(never)]
pub(crate) fn interpolate_out_function(
layout: Layout,
) -> fn(lut: &MultidimensionalLut, arr: &[f32], inputs: &[f32]) -> NVector<f32, 3> {
const OUT: usize = 3;
match layout {
Layout::Rgb => linear_3i_vec3f_direct::<OUT>,
Layout::Rgba => linear_4i_vec3f::<OUT>,
Layout::Gray => linear_1i_vec3f::<OUT>,
Layout::GrayAlpha => linear_2i_vec3f_direct::<OUT>,
Layout::Inks5 => linear_5i_vec3f::<OUT>,
Layout::Inks6 => linear_6i_vec3f::<OUT>,
Layout::Inks7 => linear_7i_vec3f::<OUT>,
Layout::Inks8 => linear_8i_vec3f::<OUT>,
Layout::Inks9 => linear_9i_vec3f::<OUT>,
Layout::Inks10 => linear_10i_vec3f::<OUT>,
Layout::Inks11 => linear_11i_vec3f::<OUT>,
Layout::Inks12 => linear_12i_vec3f::<OUT>,
Layout::Inks13 => linear_13i_vec3f::<OUT>,
Layout::Inks14 => linear_14i_vec3f::<OUT>,
Layout::Inks15 => linear_15i_vec3f::<OUT>,
}
}
impl<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
const BIT_DEPTH: usize,
> MultidimensionalNx3<T, BIT_DEPTH>
{
fn to_pcs_impl(&self, input: &[T], dst: &mut [f32]) -> Result<(), CmsError> {
let norm_value = if T::FINITE {
1.0 / ((1u32 << BIT_DEPTH) - 1) as f32
} else {
1.0
};
assert_eq!(
self.direction,
MultidimensionalDirection::DeviceToPcs,
"PCS to device cannot be used on `to pcs` stage"
);
// A -> B
// OR B - A A - curves stage
if let (Some(a_curves), Some(clut)) = (self.a_curves.as_ref(), self.clut.as_ref()) {
let layout = Layout::from_inks(self.input_inks);
let mut inks = vec![0.; self.input_inks];
if clut.is_empty() {
return Err(CmsError::InvalidAtoBLut);
}
let fetcher = interpolate_out_function(layout);
let md_lut = MultidimensionalLut::new(self.grid_size, self.input_inks, 3);
for (src, dst) in input
.chunks_exact(layout.channels())
.zip(dst.chunks_exact_mut(3))
{
for ((ink, src_ink), curve) in inks.iter_mut().zip(src).zip(a_curves.iter()) {
*ink = lut_interp_linear_float(src_ink.as_() * norm_value, curve);
}
let interpolated = fetcher(&md_lut, clut, &inks);
dst[0] = interpolated.v[0];
dst[1] = interpolated.v[1];
dst[2] = interpolated.v[2];
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
// Matrix stage
if let Some(m_curves) = self.m_curves.as_ref() {
execute_simple_curves3(dst, m_curves);
execute_matrix_stage3(self.matrix, self.bias, dst);
}
// B-curves is mandatory
if let Some(b_curves) = &self.b_curves.as_ref() {
execute_simple_curves3(dst, b_curves);
}
Ok(())
}
}
impl<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
const BIT_DEPTH: usize,
> KatanaInitialStage<f32, T> for MultidimensionalNx3<T, BIT_DEPTH>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % self.input_inks != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let mut new_dst = vec![0f32; (input.len() / self.input_inks) * 3];
self.to_pcs_impl(input, &mut new_dst)?;
Ok(new_dst)
}
}
fn make_multidimensional_nx3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
const BIT_DEPTH: usize,
>(
mab: &LutMultidimensionalType,
_: TransformOptions,
_: DataColorSpace,
direction: MultidimensionalDirection,
) -> Result<MultidimensionalNx3<T, BIT_DEPTH>, CmsError> {
if mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.b_curves.is_empty() || mab.b_curves.len() != 3 {
return Err(CmsError::InvalidAtoBLut);
}
let clut: Option<Vec<f32>> =
if mab.a_curves.len() == mab.num_input_channels as usize && mab.clut.is_some() {
let clut = mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let mut lut_grid = 1usize;
for grid in mab.grid_points.iter().take(mab.num_input_channels as usize) {
lut_grid = lut_grid.safe_mul(*grid as usize)?;
}
let lut_grid = lut_grid.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
Some(clut)
} else {
return Err(CmsError::InvalidAtoBLut);
};
let a_curves: Option<Vec<Vec<f32>>> =
if mab.a_curves.len() == mab.num_input_channels as usize && mab.clut.is_some() {
let mut arr = Vec::new();
for a_curve in mab.a_curves.iter() {
arr.push(a_curve.to_clut()?);
}
Some(arr)
} else {
None
};
let b_curves: Option<Box<[Vec<f32>; 3]>> = if mab.b_curves.len() == 3 {
let mut arr = Box::<[Vec<f32>; 3]>::default();
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if all_curves_linear {
None
} else {
for (c_curve, dst) in mab.b_curves.iter().zip(arr.iter_mut()) {
*dst = c_curve.to_clut()?;
}
Some(arr)
}
} else {
return Err(CmsError::InvalidAtoBLut);
};
let matrix = mab.matrix.to_f32();
let m_curves: Option<Box<[Vec<f32>; 3]>> = if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let mut arr = Box::<[Vec<f32>; 3]>::default();
for (curve, dst) in mab.m_curves.iter().zip(arr.iter_mut()) {
*dst = curve.to_clut()?;
}
Some(arr)
} else {
None
}
} else {
None
};
let bias = mab.bias.cast();
let transform = MultidimensionalNx3::<T, BIT_DEPTH> {
a_curves,
b_curves,
m_curves,
matrix,
direction,
clut,
grid_size: mab.grid_points,
bias,
input_inks: mab.num_input_channels as usize,
_phantom: PhantomData,
};
Ok(transform)
}
pub(crate) fn katana_multi_dimensional_nx3_to_pcs<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
mab: &LutMultidimensionalType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
if pcs == DataColorSpace::Rgb {
if mab.num_input_channels != 3 {
return Err(CmsError::InvalidAtoBLut);
}
if src_layout != Layout::Rgba && src_layout != Layout::Rgb {
return Err(CmsError::InvalidInksCountForProfile);
}
} else if mab.num_input_channels != src_layout.channels() as u8 {
return Err(CmsError::InvalidInksCountForProfile);
}
let transform = make_multidimensional_nx3::<T, BIT_DEPTH>(
mab,
options,
pcs,
MultidimensionalDirection::DeviceToPcs,
)?;
Ok(Box::new(transform))
}

View File

@@ -0,0 +1,393 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::md_nx3::interpolate_out_function;
use crate::conversions::katana::{KatanaFinalStage, KatanaInitialStage};
use crate::conversions::md_lut::{MultidimensionalLut, tetra_3i_to_any_vec};
use crate::profile::LutDataType;
use crate::safe_math::{SafeMul, SafePowi};
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, DataColorSpace, Layout, MalformedSize, PointeeSizeExpressible, TransformOptions,
};
use num_traits::AsPrimitive;
use std::array::from_fn;
use std::marker::PhantomData;
#[derive(Default)]
struct KatanaLutNx3<T> {
linearization: Vec<Vec<f32>>,
clut: Vec<f32>,
grid_size: u8,
input_inks: usize,
output: [Vec<f32>; 3],
_phantom: PhantomData<T>,
bit_depth: usize,
}
struct KatanaLut3xN<T> {
linearization: [Vec<f32>; 3],
clut: Vec<f32>,
grid_size: u8,
output_inks: usize,
output: Vec<Vec<f32>>,
dst_layout: Layout,
target_color_space: DataColorSpace,
_phantom: PhantomData<T>,
bit_depth: usize,
}
impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaLutNx3<T> {
fn to_pcs_impl(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % self.input_inks != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let norm_value = if T::FINITE {
1.0 / ((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
let grid_sizes: [u8; 16] = from_fn(|i| {
if i < self.input_inks {
self.grid_size
} else {
0
}
});
let md_lut = MultidimensionalLut::new(grid_sizes, self.input_inks, 3);
let layout = Layout::from_inks(self.input_inks);
let mut inks = vec![0.; self.input_inks];
let mut dst = vec![0.; (input.len() / layout.channels()) * 3];
let fetcher = interpolate_out_function(layout);
for (dest, src) in dst
.chunks_exact_mut(3)
.zip(input.chunks_exact(layout.channels()))
{
for ((ink, src_ink), curve) in inks.iter_mut().zip(src).zip(self.linearization.iter()) {
*ink = lut_interp_linear_float(src_ink.as_() * norm_value, curve);
}
let clut = fetcher(&md_lut, &self.clut, &inks);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
Ok(dst)
}
}
impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaInitialStage<f32, T>
for KatanaLutNx3<T>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % self.input_inks != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
self.to_pcs_impl(input)
}
}
impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaFinalStage<f32, T>
for KatanaLut3xN<T>
where
f32: AsPrimitive<T>,
{
fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let grid_sizes: [u8; 16] = from_fn(|i| {
if i < self.output_inks {
self.grid_size
} else {
0
}
});
let md_lut = MultidimensionalLut::new(grid_sizes, 3, self.output_inks);
let scale_value = if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
let mut working = vec![0.; self.output_inks];
for (dest, src) in dst
.chunks_exact_mut(self.dst_layout.channels())
.zip(src.chunks_exact(3))
{
let x = lut_interp_linear_float(src[0], &self.linearization[0]);
let y = lut_interp_linear_float(src[1], &self.linearization[1]);
let z = lut_interp_linear_float(src[2], &self.linearization[2]);
tetra_3i_to_any_vec(&md_lut, &self.clut, x, y, z, &mut working, self.output_inks);
for (ink, curve) in working.iter_mut().zip(self.output.iter()) {
*ink = lut_interp_linear_float(*ink, curve);
}
if T::FINITE {
for (dst, ink) in dest.iter_mut().zip(working.iter()) {
*dst = (*ink * scale_value).round().max(0.).min(scale_value).as_();
}
} else {
for (dst, ink) in dest.iter_mut().zip(working.iter()) {
*dst = (*ink * scale_value).as_();
}
}
}
if self.dst_layout == Layout::Rgba && self.target_color_space == DataColorSpace::Rgb {
for dst in dst.chunks_exact_mut(self.dst_layout.channels()) {
dst[3] = scale_value.as_();
}
}
Ok(())
}
}
fn katana_make_lut_nx3<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>>(
inks: usize,
lut: &LutDataType,
_: TransformOptions,
_: DataColorSpace,
bit_depth: usize,
) -> Result<KatanaLutNx3<T>, CmsError> {
if inks != lut.num_input_channels as usize {
return Err(CmsError::UnsupportedProfileConnection);
}
if lut.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
let clut_length: usize = (lut.num_clut_grid_points as usize)
.safe_powi(lut.num_input_channels as u32)?
.safe_mul(lut.num_output_channels as usize)?;
let clut_table = lut.clut_table.to_clut_f32();
if clut_table.len() != clut_length {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut_table.len(),
expected: clut_length,
}));
}
let linearization_table = lut.input_table.to_clut_f32();
if linearization_table.len() < lut.num_input_table_entries as usize * inks {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: linearization_table.len(),
expected: lut.num_input_table_entries as usize * inks,
}));
}
let linearization = (0..inks)
.map(|x| {
linearization_table[x * lut.num_input_table_entries as usize
..(x + 1) * lut.num_input_table_entries as usize]
.to_vec()
})
.collect::<_>();
let gamma_table = lut.output_table.to_clut_f32();
if gamma_table.len() < lut.num_output_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: gamma_table.len(),
expected: lut.num_output_table_entries as usize * 3,
}));
}
let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec();
let gamma_curve1 = gamma_table
[lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2]
.to_vec();
let gamma_curve2 = gamma_table
[lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3]
.to_vec();
let transform = KatanaLutNx3::<T> {
linearization,
clut: clut_table,
grid_size: lut.num_clut_grid_points,
output: [gamma_curve0, gamma_curve1, gamma_curve2],
input_inks: inks,
_phantom: PhantomData,
bit_depth,
};
Ok(transform)
}
fn katana_make_lut_3xn<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>>(
inks: usize,
dst_layout: Layout,
lut: &LutDataType,
_: TransformOptions,
target_color_space: DataColorSpace,
bit_depth: usize,
) -> Result<KatanaLut3xN<T>, CmsError> {
if lut.num_input_channels as usize != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
if target_color_space == DataColorSpace::Rgb {
if lut.num_output_channels != 3 || lut.num_output_channels != 4 {
return Err(CmsError::InvalidInksCountForProfile);
}
if dst_layout != Layout::Rgb || dst_layout != Layout::Rgba {
return Err(CmsError::InvalidInksCountForProfile);
}
} else if lut.num_output_channels as usize != dst_layout.channels() {
return Err(CmsError::InvalidInksCountForProfile);
}
let clut_length: usize = (lut.num_clut_grid_points as usize)
.safe_powi(lut.num_input_channels as u32)?
.safe_mul(lut.num_output_channels as usize)?;
let clut_table = lut.clut_table.to_clut_f32();
if clut_table.len() != clut_length {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut_table.len(),
expected: clut_length,
}));
}
let linearization_table = lut.input_table.to_clut_f32();
if linearization_table.len() < lut.num_input_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: linearization_table.len(),
expected: lut.num_input_table_entries as usize * 3,
}));
}
let linear_curve0 = linearization_table[..lut.num_input_table_entries as usize].to_vec();
let linear_curve1 = linearization_table
[lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2]
.to_vec();
let linear_curve2 = linearization_table
[lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3]
.to_vec();
let gamma_table = lut.output_table.to_clut_f32();
if gamma_table.len() < lut.num_output_table_entries as usize * inks {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: gamma_table.len(),
expected: lut.num_output_table_entries as usize * inks,
}));
}
let gamma = (0..inks)
.map(|x| {
gamma_table[x * lut.num_output_table_entries as usize
..(x + 1) * lut.num_output_table_entries as usize]
.to_vec()
})
.collect::<_>();
let transform = KatanaLut3xN::<T> {
linearization: [linear_curve0, linear_curve1, linear_curve2],
clut: clut_table,
grid_size: lut.num_clut_grid_points,
output: gamma,
output_inks: inks,
_phantom: PhantomData,
target_color_space,
dst_layout,
bit_depth,
};
Ok(transform)
}
pub(crate) fn katana_input_make_lut_nx3<
T: Copy + PointeeSizeExpressible + AsPrimitive<f32> + Send + Sync,
>(
src_layout: Layout,
inks: usize,
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
if pcs == DataColorSpace::Rgb {
if lut.num_input_channels != 3 {
return Err(CmsError::InvalidAtoBLut);
}
if src_layout != Layout::Rgba && src_layout != Layout::Rgb {
return Err(CmsError::InvalidInksCountForProfile);
}
} else if lut.num_input_channels != src_layout.channels() as u8 {
return Err(CmsError::InvalidInksCountForProfile);
}
let z0 = katana_make_lut_nx3::<T>(inks, lut, options, pcs, bit_depth)?;
Ok(Box::new(z0))
}
pub(crate) fn katana_output_make_lut_3xn<
T: Copy + PointeeSizeExpressible + AsPrimitive<f32> + Send + Sync,
>(
dst_layout: Layout,
lut: &LutDataType,
options: TransformOptions,
target_color_space: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaFinalStage<f32, T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
{
let real_inks = if target_color_space == DataColorSpace::Rgb {
3
} else {
dst_layout.channels()
};
let z0 = katana_make_lut_3xn::<T>(
real_inks,
dst_layout,
lut,
options,
target_color_space,
bit_depth,
)?;
Ok(Box::new(z0))
}

View File

@@ -0,0 +1,56 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod finalizers;
mod md3x3;
mod md4x3;
mod md_3xn;
mod md_nx3;
mod md_pipeline;
mod pcs_stages;
mod rgb_xyz;
mod stages;
mod xyz_lab;
mod xyz_rgb;
pub(crate) use finalizers::{CopyAlphaStage, InjectAlphaStage};
pub(crate) use md_3xn::katana_multi_dimensional_3xn_to_device;
pub(crate) use md_nx3::katana_multi_dimensional_nx3_to_pcs;
pub(crate) use md_pipeline::{katana_input_make_lut_nx3, katana_output_make_lut_3xn};
pub(crate) use md3x3::{multi_dimensional_3x3_to_device, multi_dimensional_3x3_to_pcs};
pub(crate) use md4x3::multi_dimensional_4x3_to_pcs;
pub(crate) use pcs_stages::{
KatanaDefaultIntermediate, katana_pcs_lab_v2_to_v4, katana_pcs_lab_v4_to_v2,
};
pub(crate) use rgb_xyz::katana_create_rgb_lin_lut;
pub(crate) use stages::{
Katana, KatanaFinalStage, KatanaInitialStage, KatanaIntermediateStage,
KatanaPostFinalizationStage,
};
pub(crate) use xyz_lab::{KatanaStageLabToXyz, KatanaStageXyzToLab};
pub(crate) use xyz_rgb::katana_prepare_inverse_lut_rgb_xyz;

View File

@@ -0,0 +1,100 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaIntermediateStage;
use crate::conversions::katana::stages::BlackholeIntermediateStage;
use crate::mlaf::mlaf;
use crate::{CmsError, ColorProfile, DataColorSpace, Matrix3f, ProfileVersion};
use std::marker::PhantomData;
pub(crate) struct KatanaMatrixStage {
pub(crate) matrices: Vec<Matrix3f>,
}
impl KatanaMatrixStage {
pub(crate) fn new(matrix: Matrix3f) -> Self {
Self {
matrices: vec![matrix],
}
}
}
pub(crate) type KatanaDefaultIntermediate = dyn KatanaIntermediateStage<f32> + Send + Sync;
impl KatanaIntermediateStage<f32> for KatanaMatrixStage {
fn stage(&self, input: &mut Vec<f32>) -> Result<Vec<f32>, CmsError> {
if input.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
for m in self.matrices.iter() {
for dst in input.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
Ok(std::mem::take(input))
}
}
pub(crate) fn katana_pcs_lab_v4_to_v2(profile: &ColorProfile) -> Box<KatanaDefaultIntermediate> {
if profile.pcs == DataColorSpace::Lab && profile.version_internal <= ProfileVersion::V4_0 {
let v_mat = vec![Matrix3f {
v: [
[65280.0 / 65535.0, 0., 0.],
[0., 65280.0 / 65535.0, 0.],
[0., 0., 65280.0 / 65535.0],
],
}];
return Box::new(KatanaMatrixStage { matrices: v_mat });
}
Box::new(BlackholeIntermediateStage {
_phantom: PhantomData,
})
}
pub(crate) fn katana_pcs_lab_v2_to_v4(profile: &ColorProfile) -> Box<KatanaDefaultIntermediate> {
if profile.pcs == DataColorSpace::Lab && profile.version_internal <= ProfileVersion::V4_0 {
let v_mat = vec![Matrix3f {
v: [
[65535.0 / 65280.0, 0., 0.],
[0., 65535.0 / 65280.0, 0.],
[0., 0., 65535.0 / 65280.0],
],
}];
return Box::new(KatanaMatrixStage { matrices: v_mat });
}
Box::new(BlackholeIntermediateStage {
_phantom: PhantomData,
})
}

View File

@@ -0,0 +1,161 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::pcs_stages::KatanaMatrixStage;
use crate::conversions::katana::{KatanaInitialStage, KatanaIntermediateStage};
use crate::{CmsError, ColorProfile, Layout, Matrix3f, PointeeSizeExpressible, TransformOptions};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
struct KatanaRgbLinearizationStage<T: Clone, const LAYOUT: u8, const LINEAR_CAP: usize> {
r_lin: Box<[f32; LINEAR_CAP]>,
g_lin: Box<[f32; LINEAR_CAP]>,
b_lin: Box<[f32; LINEAR_CAP]>,
linear_cap: usize,
bit_depth: usize,
_phantom: PhantomData<T>,
}
impl<
T: Clone + AsPrimitive<f32> + PointeeSizeExpressible,
const LAYOUT: u8,
const LINEAR_CAP: usize,
> KatanaInitialStage<f32, T> for KatanaRgbLinearizationStage<T, LAYOUT, LINEAR_CAP>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
let src_layout = Layout::from(LAYOUT);
if input.len() % src_layout.channels() != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let mut dst = vec![0.; input.len() / src_layout.channels() * 3];
let scale = if T::FINITE {
(self.linear_cap as f32 - 1.) / ((1 << self.bit_depth) - 1) as f32
} else {
(T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32
};
let cap_value = if T::FINITE {
((1 << self.bit_depth) - 1) as f32
} else {
(T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32
};
for (src, dst) in input
.chunks_exact(src_layout.channels())
.zip(dst.chunks_exact_mut(3))
{
let j_r = src[0].as_() * scale;
let j_g = src[1].as_() * scale;
let j_b = src[2].as_() * scale;
dst[0] = self.r_lin[(j_r.round().min(cap_value).max(0.) as u16) as usize];
dst[1] = self.g_lin[(j_g.round().min(cap_value).max(0.) as u16) as usize];
dst[2] = self.b_lin[(j_b.round().min(cap_value).max(0.) as u16) as usize];
}
Ok(dst)
}
}
pub(crate) struct KatanaRgbLinearizationState<T> {
pub(crate) stages: Vec<Box<dyn KatanaIntermediateStage<f32> + Send + Sync>>,
pub(crate) initial_stage: Box<dyn KatanaInitialStage<f32, T> + Send + Sync>,
}
pub(crate) fn katana_create_rgb_lin_lut<
T: Copy + Default + AsPrimitive<f32> + Send + Sync + AsPrimitive<usize> + PointeeSizeExpressible,
const BIT_DEPTH: usize,
const LINEAR_CAP: usize,
>(
layout: Layout,
source: &ColorProfile,
opts: TransformOptions,
) -> Result<KatanaRgbLinearizationState<T>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
let lin_r =
source.build_r_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_g =
source.build_g_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_b =
source.build_b_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_stage: Box<dyn KatanaInitialStage<f32, T> + Send + Sync> = match layout {
Layout::Rgb => {
Box::new(
KatanaRgbLinearizationStage::<T, { Layout::Rgb as u8 }, LINEAR_CAP> {
r_lin: lin_r,
g_lin: lin_g,
b_lin: lin_b,
bit_depth: BIT_DEPTH,
linear_cap: LINEAR_CAP,
_phantom: PhantomData,
},
)
}
Layout::Rgba => {
Box::new(
KatanaRgbLinearizationStage::<T, { Layout::Rgba as u8 }, LINEAR_CAP> {
r_lin: lin_r,
g_lin: lin_g,
b_lin: lin_b,
bit_depth: BIT_DEPTH,
linear_cap: LINEAR_CAP,
_phantom: PhantomData,
},
)
}
Layout::Gray => unimplemented!("Gray should not be called on Rgb/Rgba execution path"),
Layout::GrayAlpha => {
unimplemented!("GrayAlpha should not be called on Rgb/Rgba execution path")
}
_ => unreachable!(),
};
let xyz_to_rgb = source.rgb_to_xyz_matrix();
let matrices: Vec<Box<dyn KatanaIntermediateStage<f32> + Send + Sync>> =
vec![Box::new(KatanaMatrixStage {
matrices: vec![
xyz_to_rgb.to_f32(),
Matrix3f {
v: [
[32768.0 / 65535.0, 0.0, 0.0],
[0.0, 32768.0 / 65535.0, 0.0],
[0.0, 0.0, 32768.0 / 65535.0],
],
},
],
})];
Ok(KatanaRgbLinearizationState {
stages: matrices,
initial_stage: lin_stage,
})
}

View File

@@ -0,0 +1,85 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::{CmsError, TransformExecutor};
use std::marker::PhantomData;
/// W storage working data type
/// I input/output data type
pub(crate) trait KatanaInitialStage<W, I> {
fn to_pcs(&self, input: &[I]) -> Result<Vec<W>, CmsError>;
}
/// W storage working data type
/// I input/output data type
pub(crate) trait KatanaFinalStage<W, I> {
fn to_output(&self, src: &mut [W], dst: &mut [I]) -> Result<(), CmsError>;
}
/// W storage working data type
pub(crate) trait KatanaIntermediateStage<W> {
fn stage(&self, input: &mut Vec<W>) -> Result<Vec<W>, CmsError>;
}
pub(crate) struct BlackholeIntermediateStage<W> {
pub(crate) _phantom: PhantomData<W>,
}
impl<W> KatanaIntermediateStage<W> for BlackholeIntermediateStage<W> {
fn stage(&self, input: &mut Vec<W>) -> Result<Vec<W>, CmsError> {
Ok(std::mem::take(input))
}
}
/// I input/output data type
pub(crate) trait KatanaPostFinalizationStage<I> {
fn finalize(&self, src: &[I], dst: &mut [I]) -> Result<(), CmsError>;
}
/// W storage working data type
/// I input/output data type
pub(crate) struct Katana<W, I> {
pub(crate) initial_stage: Box<dyn KatanaInitialStage<W, I> + Send + Sync>,
pub(crate) final_stage: Box<dyn KatanaFinalStage<W, I> + Sync + Send>,
pub(crate) stages: Vec<Box<dyn KatanaIntermediateStage<W> + Send + Sync>>,
pub(crate) post_finalization: Vec<Box<dyn KatanaPostFinalizationStage<I> + Send + Sync>>,
}
impl<W, I: Copy + Default> TransformExecutor<I> for Katana<W, I> {
fn transform(&self, src: &[I], dst: &mut [I]) -> Result<(), CmsError> {
let mut working_vec = self.initial_stage.to_pcs(src)?;
for stage in self.stages.iter() {
working_vec = stage.stage(&mut working_vec)?;
}
self.final_stage.to_output(&mut working_vec, dst)?;
for finalization in self.post_finalization.iter() {
finalization.finalize(src, dst)?;
}
Ok(())
}
}

View File

@@ -0,0 +1,62 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaIntermediateStage;
use crate::{CmsError, Lab, Xyz};
#[derive(Default)]
pub(crate) struct KatanaStageLabToXyz {}
impl KatanaIntermediateStage<f32> for KatanaStageLabToXyz {
fn stage(&self, input: &mut Vec<f32>) -> Result<Vec<f32>, CmsError> {
for dst in input.chunks_exact_mut(3) {
let lab = Lab::new(dst[0], dst[1], dst[2]);
let xyz = lab.to_pcs_xyz();
dst[0] = xyz.x;
dst[1] = xyz.y;
dst[2] = xyz.z;
}
Ok(std::mem::take(input))
}
}
#[derive(Default)]
pub(crate) struct KatanaStageXyzToLab {}
impl KatanaIntermediateStage<f32> for KatanaStageXyzToLab {
fn stage(&self, input: &mut Vec<f32>) -> Result<Vec<f32>, CmsError> {
for dst in input.chunks_exact_mut(3) {
let xyz = Xyz::new(dst[0], dst[1], dst[2]);
let lab = Lab::from_pcs_xyz(xyz);
dst[0] = lab.l;
dst[1] = lab.a;
dst[2] = lab.b;
}
Ok(std::mem::take(input))
}
}

View File

@@ -0,0 +1,223 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::pcs_stages::KatanaMatrixStage;
use crate::conversions::katana::{
KatanaDefaultIntermediate, KatanaFinalStage, KatanaIntermediateStage,
};
use crate::mlaf::mlaf;
use crate::{
CmsError, ColorProfile, GammaLutInterpolate, Layout, Matrix3f, PointeeSizeExpressible,
RenderingIntent, Rgb, TransformOptions, filmlike_clip,
};
use num_traits::AsPrimitive;
pub(crate) struct KatanaXyzToRgbStage<T: Clone, const LAYOUT: u8> {
pub(crate) r_gamma: Box<[T; 65536]>,
pub(crate) g_gamma: Box<[T; 65536]>,
pub(crate) b_gamma: Box<[T; 65536]>,
pub(crate) intent: RenderingIntent,
pub(crate) bit_depth: usize,
pub(crate) gamma_lut: usize,
}
impl<T: Clone + AsPrimitive<f32> + PointeeSizeExpressible, const LAYOUT: u8>
KatanaFinalStage<f32, T> for KatanaXyzToRgbStage<T, LAYOUT>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
let dst_cn = Layout::from(LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 3;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
let max_colors: T = (if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
})
.as_();
let lut_cap = (self.gamma_lut - 1) as f32;
if self.intent != RenderingIntent::AbsoluteColorimetric {
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(dst_channels)) {
let mut rgb = Rgb::new(src[0], src[1], src[2]);
if rgb.is_out_of_gamut() {
rgb = filmlike_clip(rgb);
}
let r = mlaf(0.5, rgb.r, lut_cap).min(lut_cap).max(0.) as u16;
let g = mlaf(0.5, rgb.g, lut_cap).min(lut_cap).max(0.) as u16;
let b = mlaf(0.5, rgb.b, lut_cap).min(lut_cap).max(0.) as u16;
dst[0] = self.r_gamma[r as usize];
dst[1] = self.g_gamma[g as usize];
dst[2] = self.b_gamma[b as usize];
if dst_cn == Layout::Rgba {
dst[3] = max_colors;
}
}
} else {
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(dst_channels)) {
let rgb = Rgb::new(src[0], src[1], src[2]);
let r = mlaf(0.5, rgb.r, lut_cap).min(lut_cap).max(0.) as u16;
let g = mlaf(0.5, rgb.g, lut_cap).min(lut_cap).max(0.) as u16;
let b = mlaf(0.5, rgb.b, lut_cap).min(lut_cap).max(0.) as u16;
dst[0] = self.r_gamma[r as usize];
dst[1] = self.g_gamma[g as usize];
dst[2] = self.b_gamma[b as usize];
if dst_cn == Layout::Rgba {
dst[3] = max_colors;
}
}
}
Ok(())
}
}
pub(crate) struct KatanaXyzRgbState<T> {
pub(crate) stages: Vec<Box<dyn KatanaIntermediateStage<f32> + Send + Sync>>,
pub(crate) final_stage: Box<dyn KatanaFinalStage<f32, T> + Send + Sync>,
}
pub(crate) fn katana_prepare_inverse_lut_rgb_xyz<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible
+ GammaLutInterpolate,
const BIT_DEPTH: usize,
const GAMMA_LUT: usize,
>(
dest: &ColorProfile,
dest_layout: Layout,
options: TransformOptions,
) -> Result<KatanaXyzRgbState<T>, CmsError>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
{
// if !T::FINITE {
// if let Some(extended_gamma) = dest.try_extended_gamma_evaluator() {
// let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse();
//
// let mut matrices = vec![Matrix3f {
// v: [
// [65535.0 / 32768.0, 0.0, 0.0],
// [0.0, 65535.0 / 32768.0, 0.0],
// [0.0, 0.0, 65535.0 / 32768.0],
// ],
// }];
//
// matrices.push(xyz_to_rgb.to_f32());
// let xyz_to_rgb_stage = XyzToRgbStageExtended::<T> {
// gamma_evaluator: extended_gamma,
// matrices,
// phantom_data: PhantomData,
// };
// xyz_to_rgb_stage.transform(lut)?;
// return Ok(());
// }
// }
let gamma_map_r = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.red_trc,
options.allow_use_cicp_transfer,
)?;
let gamma_map_g = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.green_trc,
options.allow_use_cicp_transfer,
)?;
let gamma_map_b = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.blue_trc,
options.allow_use_cicp_transfer,
)?;
let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse();
let mut matrices: Vec<Box<KatanaDefaultIntermediate>> =
vec![Box::new(KatanaMatrixStage::new(Matrix3f {
v: [
[65535.0 / 32768.0, 0.0, 0.0],
[0.0, 65535.0 / 32768.0, 0.0],
[0.0, 0.0, 65535.0 / 32768.0],
],
}))];
matrices.push(Box::new(KatanaMatrixStage::new(xyz_to_rgb.to_f32())));
match dest_layout {
Layout::Rgb => {
let xyz_to_rgb_stage = KatanaXyzToRgbStage::<T, { Layout::Rgb as u8 }> {
r_gamma: gamma_map_r,
g_gamma: gamma_map_g,
b_gamma: gamma_map_b,
intent: options.rendering_intent,
bit_depth: BIT_DEPTH,
gamma_lut: GAMMA_LUT,
};
Ok(KatanaXyzRgbState {
stages: matrices,
final_stage: Box::new(xyz_to_rgb_stage),
})
}
Layout::Rgba => {
let xyz_to_rgb_stage = KatanaXyzToRgbStage::<T, { Layout::Rgba as u8 }> {
r_gamma: gamma_map_r,
g_gamma: gamma_map_g,
b_gamma: gamma_map_b,
intent: options.rendering_intent,
bit_depth: BIT_DEPTH,
gamma_lut: GAMMA_LUT,
};
Ok(KatanaXyzRgbState {
stages: matrices,
final_stage: Box::new(xyz_to_rgb_stage),
})
}
Layout::Gray => unreachable!("Gray layout must not be called on Rgb/Rgba path"),
Layout::GrayAlpha => unreachable!("Gray layout must not be called on Rgb/Rgba path"),
_ => unreachable!(
"layout {:?} should not be called on xyz->rgb path",
dest_layout
),
}
}

428
vendor/moxcms/src/conversions/lut3x3.rs vendored Normal file
View File

@@ -0,0 +1,428 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::{KatanaFinalStage, KatanaInitialStage};
use crate::err::MalformedSize;
use crate::profile::LutDataType;
use crate::safe_math::{SafeMul, SafePowi};
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, Cube, DataColorSpace, InterpolationMethod, PointeeSizeExpressible, Stage,
TransformOptions, Vector3f,
};
use num_traits::AsPrimitive;
#[derive(Default)]
struct Lut3x3 {
input: [Vec<f32>; 3],
clut: Vec<f32>,
grid_size: u8,
gamma: [Vec<f32>; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[derive(Default)]
struct KatanaLut3x3<T: Copy + Default> {
input: [Vec<f32>; 3],
clut: Vec<f32>,
grid_size: u8,
gamma: [Vec<f32>; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
_phantom: std::marker::PhantomData<T>,
bit_depth: usize,
}
fn make_lut_3x3(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Lut3x3, CmsError> {
let clut_length: usize = (lut.num_clut_grid_points as usize)
.safe_powi(lut.num_input_channels as u32)?
.safe_mul(lut.num_output_channels as usize)?;
let lin_table = lut.input_table.to_clut_f32();
if lin_table.len() < lut.num_input_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: lin_table.len(),
expected: lut.num_input_table_entries as usize * 3,
}));
}
let lin_curve0 = lin_table[..lut.num_input_table_entries as usize].to_vec();
let lin_curve1 = lin_table
[lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2]
.to_vec();
let lin_curve2 = lin_table
[lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3]
.to_vec();
let clut_table = lut.clut_table.to_clut_f32();
if clut_table.len() != clut_length {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut_table.len(),
expected: clut_length,
}));
}
let gamma_curves = lut.output_table.to_clut_f32();
if gamma_curves.len() < lut.num_output_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: gamma_curves.len(),
expected: lut.num_output_table_entries as usize * 3,
}));
}
let gamma_curve0 = gamma_curves[..lut.num_output_table_entries as usize].to_vec();
let gamma_curve1 = gamma_curves
[lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2]
.to_vec();
let gamma_curve2 = gamma_curves
[lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3]
.to_vec();
let transform = Lut3x3 {
input: [lin_curve0, lin_curve1, lin_curve2],
gamma: [gamma_curve0, gamma_curve1, gamma_curve2],
interpolation_method: options.interpolation_method,
clut: clut_table,
grid_size: lut.num_clut_grid_points,
pcs,
};
Ok(transform)
}
fn stage_lut_3x3(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Box<dyn Stage>, CmsError> {
let lut = make_lut_3x3(lut, options, pcs)?;
let transform = Lut3x3 {
input: lut.input,
gamma: lut.gamma,
interpolation_method: lut.interpolation_method,
clut: lut.clut,
grid_size: lut.grid_size,
pcs: lut.pcs,
};
Ok(Box::new(transform))
}
pub(crate) fn katana_input_stage_lut_3x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
{
let lut = make_lut_3x3(lut, options, pcs)?;
let transform = KatanaLut3x3::<T> {
input: lut.input,
gamma: lut.gamma,
interpolation_method: lut.interpolation_method,
clut: lut.clut,
grid_size: lut.grid_size,
pcs: lut.pcs,
_phantom: std::marker::PhantomData,
bit_depth,
};
Ok(Box::new(transform))
}
pub(crate) fn katana_output_stage_lut_3x3<
T: Copy + Default + AsPrimitive<f32> + PointeeSizeExpressible + Send + Sync,
>(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaFinalStage<f32, T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
{
let lut = make_lut_3x3(lut, options, pcs)?;
let transform = KatanaLut3x3::<T> {
input: lut.input,
gamma: lut.gamma,
interpolation_method: lut.interpolation_method,
clut: lut.clut,
grid_size: lut.grid_size,
pcs: lut.pcs,
_phantom: std::marker::PhantomData,
bit_depth,
};
Ok(Box::new(transform))
}
impl Lut3x3 {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let linearization_0 = &self.input[0];
let linearization_1 = &self.input[1];
let linearization_2 = &self.input[2];
for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
debug_assert!(self.grid_size as i32 >= 1);
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let clut = fetch(linear_x, linear_y, linear_z);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
Ok(())
}
}
impl Stage for Lut3x3 {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let l_tbl = Cube::new(&self.clut, self.grid_size as usize);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z| l_tbl.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z| l_tbl.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z| l_tbl.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
impl<T: Copy + Default + PointeeSizeExpressible + AsPrimitive<f32>> KatanaLut3x3<T>
where
f32: AsPrimitive<T>,
{
fn to_pcs_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
input: &[T],
fetch: Fetch,
) -> Result<Vec<f32>, CmsError> {
if input.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let normalizing_value = if T::FINITE {
1.0 / ((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
let mut dst = vec![0.; input.len()];
let linearization_0 = &self.input[0];
let linearization_1 = &self.input[1];
let linearization_2 = &self.input[2];
for (dest, src) in dst.chunks_exact_mut(3).zip(input.chunks_exact(3)) {
let linear_x =
lut_interp_linear_float(src[0].as_() * normalizing_value, linearization_0);
let linear_y =
lut_interp_linear_float(src[1].as_() * normalizing_value, linearization_1);
let linear_z =
lut_interp_linear_float(src[2].as_() * normalizing_value, linearization_2);
let clut = fetch(linear_x, linear_y, linear_z);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
Ok(dst)
}
fn to_output<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
src: &[f32],
dst: &mut [T],
fetch: Fetch,
) -> Result<(), CmsError> {
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() != src.len() {
return Err(CmsError::LaneSizeMismatch);
}
let norm_value = if T::FINITE {
((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
let linearization_0 = &self.input[0];
let linearization_1 = &self.input[1];
let linearization_2 = &self.input[2];
for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let clut = fetch(linear_x, linear_y, linear_z);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]);
if T::FINITE {
dest[0] = (pcs_x * norm_value).round().max(0.0).min(norm_value).as_();
dest[1] = (pcs_y * norm_value).round().max(0.0).min(norm_value).as_();
dest[2] = (pcs_z * norm_value).round().max(0.0).min(norm_value).as_();
} else {
dest[0] = pcs_x.as_();
dest[1] = pcs_y.as_();
dest[2] = pcs_z.as_();
}
}
Ok(())
}
}
impl<T: Copy + Default + PointeeSizeExpressible + AsPrimitive<f32>> KatanaInitialStage<f32, T>
for KatanaLut3x3<T>
where
f32: AsPrimitive<T>,
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
let l_tbl = Cube::new(&self.clut, self.grid_size as usize);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.to_pcs_impl(input, |x, y, z| l_tbl.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_pcs_impl(input, |x, y, z| l_tbl.tetra_vec3(x, y, z))
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_pcs_impl(input, |x, y, z| l_tbl.pyramid_vec3(x, y, z))
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_pcs_impl(input, |x, y, z| l_tbl.prism_vec3(x, y, z))
}
InterpolationMethod::Linear => {
self.to_pcs_impl(input, |x, y, z| l_tbl.trilinear_vec3(x, y, z))
}
}
}
}
impl<T: Copy + Default + PointeeSizeExpressible + AsPrimitive<f32>> KatanaFinalStage<f32, T>
for KatanaLut3x3<T>
where
f32: AsPrimitive<T>,
{
fn to_output(&self, src: &mut [f32], dst: &mut [T]) -> Result<(), CmsError> {
let l_tbl = Cube::new(&self.clut, self.grid_size as usize);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.to_output(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_output(src, dst, |x, y, z| l_tbl.tetra_vec3(x, y, z))
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_output(src, dst, |x, y, z| l_tbl.pyramid_vec3(x, y, z))
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_output(src, dst, |x, y, z| l_tbl.prism_vec3(x, y, z))
}
InterpolationMethod::Linear => {
self.to_output(src, dst, |x, y, z| l_tbl.trilinear_vec3(x, y, z))
}
}
}
}
pub(crate) fn create_lut3x3(
lut: &LutDataType,
src: &[f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Vec<f32>, CmsError> {
if lut.num_input_channels != 3 || lut.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
let mut dest = vec![0.; src.len()];
let lut_stage = stage_lut_3x3(lut, options, pcs)?;
lut_stage.transform(src, &mut dest)?;
Ok(dest)
}

248
vendor/moxcms/src/conversions/lut3x4.rs vendored Normal file
View File

@@ -0,0 +1,248 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::profile::LutDataType;
use crate::safe_math::{SafeMul, SafePowi};
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, Cube, DataColorSpace, InterpolationMethod, MalformedSize, Stage, TransformOptions,
Vector4f,
};
use num_traits::AsPrimitive;
#[derive(Default)]
struct Lut3x4 {
input: [Vec<f32>; 3],
clut: Vec<f32>,
grid_size: u8,
gamma: [Vec<f32>; 4],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
fn make_lut_3x4(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Lut3x4, CmsError> {
let clut_length: usize = (lut.num_clut_grid_points as usize)
.safe_powi(lut.num_input_channels as u32)?
.safe_mul(lut.num_output_channels as usize)?;
let clut_table = lut.clut_table.to_clut_f32();
if clut_table.len() != clut_length {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut_table.len(),
expected: clut_length,
}));
}
let linearization_table = lut.input_table.to_clut_f32();
if linearization_table.len() < lut.num_input_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: linearization_table.len(),
expected: lut.num_input_table_entries as usize * 3,
}));
}
let linear_curve0 = linearization_table[..lut.num_input_table_entries as usize].to_vec();
let linear_curve1 = linearization_table
[lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2]
.to_vec();
let linear_curve2 = linearization_table
[lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3]
.to_vec();
let gamma_table = lut.output_table.to_clut_f32();
if gamma_table.len() < lut.num_output_table_entries as usize * 4 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: gamma_table.len(),
expected: lut.num_output_table_entries as usize * 4,
}));
}
let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec();
let gamma_curve1 = gamma_table
[lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2]
.to_vec();
let gamma_curve2 = gamma_table
[lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3]
.to_vec();
let gamma_curve3 = gamma_table
[lut.num_output_table_entries as usize * 3..lut.num_output_table_entries as usize * 4]
.to_vec();
let transform = Lut3x4 {
input: [linear_curve0, linear_curve1, linear_curve2],
interpolation_method: options.interpolation_method,
clut: clut_table,
grid_size: lut.num_clut_grid_points,
pcs,
gamma: [gamma_curve0, gamma_curve1, gamma_curve2, gamma_curve3],
};
Ok(transform)
}
fn stage_lut_3x4(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Box<dyn Stage>, CmsError> {
let lut = make_lut_3x4(lut, options, pcs)?;
let transform = Lut3x4 {
input: lut.input,
interpolation_method: lut.interpolation_method,
clut: lut.clut,
grid_size: lut.grid_size,
pcs: lut.pcs,
gamma: lut.gamma,
};
Ok(Box::new(transform))
}
impl Lut3x4 {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector4f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let linearization_0 = &self.input[0];
let linearization_1 = &self.input[1];
let linearization_2 = &self.input[2];
for (dest, src) in dst.chunks_exact_mut(4).zip(src.chunks_exact(3)) {
debug_assert!(self.grid_size as i32 >= 1);
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let clut = fetch(linear_x, linear_y, linear_z);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.gamma[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.gamma[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.gamma[2]);
let pcs_w = lut_interp_linear_float(clut.v[3], &self.gamma[3]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
dest[3] = pcs_w;
}
Ok(())
}
}
impl Stage for Lut3x4 {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let l_tbl = Cube::new(&self.clut, self.grid_size as usize);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec4(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z| l_tbl.tetra_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z| l_tbl.pyramid_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z| l_tbl.prism_vec4(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z| l_tbl.trilinear_vec4(x, y, z))?;
}
}
Ok(())
}
}
pub(crate) fn create_lut3_samples<T: Copy + 'static, const SAMPLES: usize>() -> Vec<T>
where
u32: AsPrimitive<T>,
{
let lut_size: u32 = (3 * SAMPLES * SAMPLES * SAMPLES) as u32;
assert!(SAMPLES >= 1);
let mut src = Vec::with_capacity(lut_size as usize);
for x in 0..SAMPLES as u32 {
for y in 0..SAMPLES as u32 {
for z in 0..SAMPLES as u32 {
src.push(x.as_());
src.push(y.as_());
src.push(z.as_());
}
}
}
src
}
pub(crate) fn create_lut3_samples_norm<const SAMPLES: usize>() -> Vec<f32> {
let lut_size: u32 = (3 * SAMPLES * SAMPLES * SAMPLES) as u32;
assert!(SAMPLES >= 1);
let scale = 1. / (SAMPLES as f32 - 1.0);
let mut src = Vec::with_capacity(lut_size as usize);
for x in 0..SAMPLES as u32 {
for y in 0..SAMPLES as u32 {
for z in 0..SAMPLES as u32 {
src.push(x as f32 * scale);
src.push(y as f32 * scale);
src.push(z as f32 * scale);
}
}
}
src
}
pub(crate) fn create_lut3x4(
lut: &LutDataType,
src: &[f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Vec<f32>, CmsError> {
if lut.num_input_channels != 3 || lut.num_output_channels != 4 {
return Err(CmsError::UnsupportedProfileConnection);
}
let mut dest = vec![0.; (src.len() / 3) * 4];
let lut_stage = stage_lut_3x4(lut, options, pcs)?;
lut_stage.transform(src, &mut dest)?;
Ok(dest)
}

392
vendor/moxcms/src/conversions/lut4.rs vendored Normal file
View File

@@ -0,0 +1,392 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::katana::KatanaInitialStage;
use crate::profile::LutDataType;
use crate::safe_math::{SafeMul, SafePowi};
use crate::trc::lut_interp_linear_float;
use crate::{
CmsError, DataColorSpace, Hypercube, InterpolationMethod, MalformedSize,
PointeeSizeExpressible, Stage, TransformOptions, Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
#[allow(unused)]
#[derive(Default)]
struct Lut4x3 {
linearization: [Vec<f32>; 4],
clut: Vec<f32>,
grid_size: u8,
output: [Vec<f32>; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(unused)]
#[derive(Default)]
struct KatanaLut4x3<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> {
linearization: [Vec<f32>; 4],
clut: Vec<f32>,
grid_size: u8,
output: [Vec<f32>; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
_phantom: PhantomData<T>,
bit_depth: usize,
}
#[allow(unused)]
impl Lut4x3 {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let linearization_0 = &self.linearization[0];
let linearization_1 = &self.linearization[1];
let linearization_2 = &self.linearization[2];
let linearization_3 = &self.linearization[3];
for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(4)) {
debug_assert!(self.grid_size as i32 >= 1);
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let linear_w = lut_interp_linear_float(src[3], linearization_3);
let clut = fetch(linear_x, linear_y, linear_z, linear_w);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
Ok(())
}
}
macro_rules! define_lut4_dispatch {
($dispatcher: ident) => {
impl Stage for $dispatcher {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let l_tbl = Hypercube::new(&self.clut, self.grid_size as usize);
// If Source PCS is LAB trilinear should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self
.transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))?
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| {
l_tbl.quadlinear_vec3(x, y, z, w)
})?
}
}
Ok(())
}
}
};
}
impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaLut4x3<T> {
fn to_pcs_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
&self,
input: &[T],
fetch: Fetch,
) -> Result<Vec<f32>, CmsError> {
if input.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let norm_value = if T::FINITE {
1.0 / ((1u32 << self.bit_depth) - 1) as f32
} else {
1.0
};
let mut dst = vec![0.; (input.len() / 4) * 3];
let linearization_0 = &self.linearization[0];
let linearization_1 = &self.linearization[1];
let linearization_2 = &self.linearization[2];
let linearization_3 = &self.linearization[3];
for (dest, src) in dst.chunks_exact_mut(3).zip(input.chunks_exact(4)) {
let linear_x = lut_interp_linear_float(src[0].as_() * norm_value, linearization_0);
let linear_y = lut_interp_linear_float(src[1].as_() * norm_value, linearization_1);
let linear_z = lut_interp_linear_float(src[2].as_() * norm_value, linearization_2);
let linear_w = lut_interp_linear_float(src[3].as_() * norm_value, linearization_3);
let clut = fetch(linear_x, linear_y, linear_z, linear_w);
let pcs_x = lut_interp_linear_float(clut.v[0], &self.output[0]);
let pcs_y = lut_interp_linear_float(clut.v[1], &self.output[1]);
let pcs_z = lut_interp_linear_float(clut.v[2], &self.output[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
Ok(dst)
}
}
impl<T: Copy + PointeeSizeExpressible + AsPrimitive<f32>> KatanaInitialStage<f32, T>
for KatanaLut4x3<T>
{
fn to_pcs(&self, input: &[T]) -> Result<Vec<f32>, CmsError> {
if input.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let l_tbl = Hypercube::new(&self.clut, self.grid_size as usize);
// If Source PCS is LAB trilinear should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.to_pcs_impl(input, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.to_pcs_impl(input, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.to_pcs_impl(input, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.to_pcs_impl(input, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))
}
InterpolationMethod::Linear => {
self.to_pcs_impl(input, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w))
}
}
}
}
define_lut4_dispatch!(Lut4x3);
fn make_lut_4x3(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Lut4x3, CmsError> {
// There is 4 possible cases:
// - All curves are non-linear
// - Linearization curves are non-linear, but gamma is linear
// - Gamma curves are non-linear, but linearization is linear
// - All curves linear
let clut_length: usize = (lut.num_clut_grid_points as usize)
.safe_powi(lut.num_input_channels as u32)?
.safe_mul(lut.num_output_channels as usize)?;
let clut_table = lut.clut_table.to_clut_f32();
if clut_table.len() != clut_length {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut_table.len(),
expected: clut_length,
}));
}
let linearization_table = lut.input_table.to_clut_f32();
if linearization_table.len() < lut.num_input_table_entries as usize * 4 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: linearization_table.len(),
expected: lut.num_input_table_entries as usize * 4,
}));
}
let lin_curve0 = linearization_table[0..lut.num_input_table_entries as usize].to_vec();
let lin_curve1 = linearization_table
[lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2]
.to_vec();
let lin_curve2 = linearization_table
[lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3]
.to_vec();
let lin_curve3 = linearization_table
[lut.num_input_table_entries as usize * 3..lut.num_input_table_entries as usize * 4]
.to_vec();
let gamma_table = lut.output_table.to_clut_f32();
if gamma_table.len() < lut.num_output_table_entries as usize * 3 {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: gamma_table.len(),
expected: lut.num_output_table_entries as usize * 3,
}));
}
let gamma_curve0 = gamma_table[..lut.num_output_table_entries as usize].to_vec();
let gamma_curve1 = gamma_table
[lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2]
.to_vec();
let gamma_curve2 = gamma_table
[lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3]
.to_vec();
let transform = Lut4x3 {
linearization: [lin_curve0, lin_curve1, lin_curve2, lin_curve3],
interpolation_method: options.interpolation_method,
pcs,
clut: clut_table,
grid_size: lut.num_clut_grid_points,
output: [gamma_curve0, gamma_curve1, gamma_curve2],
};
Ok(transform)
}
fn stage_lut_4x3(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Box<dyn Stage>, CmsError> {
let lut = make_lut_4x3(lut, options, pcs)?;
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
use crate::conversions::neon::Lut4x3Neon;
let transform = Lut4x3Neon {
linearization: lut.linearization,
interpolation_method: lut.interpolation_method,
pcs: lut.pcs,
clut: lut.clut,
grid_size: lut.grid_size,
output: lut.output,
};
Ok(Box::new(transform))
}
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
{
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::Lut4x3AvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
let transform = Lut4x3AvxFma {
linearization: lut.linearization,
interpolation_method: lut.interpolation_method,
pcs: lut.pcs,
clut: lut.clut,
grid_size: lut.grid_size,
output: lut.output,
};
return Ok(Box::new(transform));
}
}
let transform = Lut4x3 {
linearization: lut.linearization,
interpolation_method: lut.interpolation_method,
pcs: lut.pcs,
clut: lut.clut,
grid_size: lut.grid_size,
output: lut.output,
};
Ok(Box::new(transform))
}
}
pub(crate) fn katana_input_stage_lut_4x3<
T: Copy + PointeeSizeExpressible + AsPrimitive<f32> + Send + Sync,
>(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
bit_depth: usize,
) -> Result<Box<dyn KatanaInitialStage<f32, T> + Send + Sync>, CmsError> {
// There is 4 possible cases:
// - All curves are non-linear
// - Linearization curves are non-linear, but gamma is linear
// - Gamma curves are non-linear, but linearization is linear
// - All curves linear
let lut = make_lut_4x3(lut, options, pcs)?;
let transform = KatanaLut4x3::<T> {
linearization: lut.linearization,
interpolation_method: lut.interpolation_method,
pcs: lut.pcs,
clut: lut.clut,
grid_size: lut.grid_size,
output: lut.output,
_phantom: PhantomData,
bit_depth,
};
Ok(Box::new(transform))
}
pub(crate) fn create_lut4_norm_samples<const SAMPLES: usize>() -> Vec<f32> {
let lut_size: u32 = (4 * SAMPLES * SAMPLES * SAMPLES * SAMPLES) as u32;
let mut src = Vec::with_capacity(lut_size as usize);
let recpeq = 1f32 / (SAMPLES - 1) as f32;
for k in 0..SAMPLES {
for c in 0..SAMPLES {
for m in 0..SAMPLES {
for y in 0..SAMPLES {
src.push(c as f32 * recpeq);
src.push(m as f32 * recpeq);
src.push(y as f32 * recpeq);
src.push(k as f32 * recpeq);
}
}
}
}
src
}
pub(crate) fn create_lut4<const SAMPLES: usize>(
lut: &LutDataType,
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Vec<f32>, CmsError> {
if lut.num_input_channels != 4 {
return Err(CmsError::UnsupportedProfileConnection);
}
let lut_size: u32 = (4 * SAMPLES * SAMPLES * SAMPLES * SAMPLES) as u32;
let src = create_lut4_norm_samples::<SAMPLES>();
let mut dest = vec![0.; (lut_size as usize) / 4 * 3];
let lut_stage = stage_lut_4x3(lut, options, pcs)?;
lut_stage.transform(&src, &mut dest)?;
Ok(dest)
}

View File

@@ -0,0 +1,802 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::lut3x3::{
create_lut3x3, katana_input_stage_lut_3x3, katana_output_stage_lut_3x3,
};
use crate::conversions::lut3x4::{create_lut3_samples_norm, create_lut3x4};
use crate::conversions::lut4::{create_lut4, create_lut4_norm_samples, katana_input_stage_lut_4x3};
use crate::conversions::mab::{prepare_mab_3x3, prepare_mba_3x3};
use crate::conversions::transform_lut3_to_4::make_transform_3x4;
use crate::mlaf::mlaf;
use crate::{
CmsError, ColorProfile, DataColorSpace, InPlaceStage, Layout, LutWarehouse, Matrix3f,
ProfileVersion, TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
pub(crate) struct MatrixStage {
pub(crate) matrices: Vec<Matrix3f>,
}
impl InPlaceStage for MatrixStage {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
if !self.matrices.is_empty() {
let m = self.matrices[0];
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
for m in self.matrices.iter().skip(1) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
Ok(())
}
}
pub(crate) const LUT_SAMPLING: u16 = 255;
pub(crate) trait Lut3x3Factory {
fn make_transform_3x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>;
}
pub(crate) trait Lut4x3Factory {
fn make_transform_4x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>;
}
fn pcs_lab_v4_to_v2(profile: &ColorProfile, lut: &mut [f32]) {
if profile.pcs == DataColorSpace::Lab
&& profile.version_internal <= ProfileVersion::V4_0
&& lut.len() % 3 == 0
{
assert_eq!(
lut.len() % 3,
0,
"Lut {:?} is not a multiple of 3, this should not happen for lab",
lut.len()
);
let v_mat = vec![Matrix3f {
v: [
[65280.0 / 65535.0, 0f32, 0f32],
[0f32, 65280.0 / 65535.0, 0f32],
[0f32, 0f32, 65280.0 / 65535.0f32],
],
}];
let stage = MatrixStage { matrices: v_mat };
stage.transform(lut).unwrap();
}
}
fn pcs_lab_v2_to_v4(profile: &ColorProfile, lut: &mut [f32]) {
if profile.pcs == DataColorSpace::Lab
&& profile.version_internal <= ProfileVersion::V4_0
&& lut.len() % 3 == 0
{
assert_eq!(
lut.len() % 3,
0,
"Lut {:?} is not a multiple of 3, this should not happen for lab",
lut.len()
);
let v_mat = vec![Matrix3f {
v: [
[65535.0 / 65280.0f32, 0f32, 0f32],
[0f32, 65535.0f32 / 65280.0f32, 0f32],
[0f32, 0f32, 65535.0f32 / 65280.0f32],
],
}];
let stage = MatrixStage { matrices: v_mat };
stage.transform(lut).unwrap();
}
}
macro_rules! make_transform_3x3_fn {
($method_name: ident, $exec_impl: ident) => {
fn $method_name<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
match src_layout {
Layout::Rgb => match dst_layout {
Layout::Rgb => $exec_impl::make_transform_3x3::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, color_space, is_linear),
Layout::Rgba => $exec_impl::make_transform_3x3::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, color_space, is_linear),
_ => unimplemented!(),
},
Layout::Rgba => match dst_layout {
Layout::Rgb => $exec_impl::make_transform_3x3::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, color_space, is_linear),
Layout::Rgba => $exec_impl::make_transform_3x3::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, color_space, is_linear),
_ => unimplemented!(),
},
_ => unimplemented!(),
}
}
};
}
macro_rules! make_transform_4x3_fn {
($method_name: ident, $exec_name: ident) => {
fn $method_name<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
dst_layout: Layout,
lut: Vec<f32>,
options: TransformOptions,
data_color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
match dst_layout {
Layout::Rgb => $exec_name::make_transform_4x3::<
T,
{ Layout::Rgb as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, data_color_space, is_linear),
Layout::Rgba => $exec_name::make_transform_4x3::<
T,
{ Layout::Rgba as u8 },
GRID_SIZE,
BIT_DEPTH,
>(lut, options, data_color_space, is_linear),
_ => unimplemented!(),
}
}
};
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
use crate::conversions::neon::NeonLut3x3Factory;
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
make_transform_3x3_fn!(make_transformer_3x3, NeonLut3x3Factory);
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
use crate::conversions::transform_lut3_to_3::DefaultLut3x3Factory;
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
make_transform_3x3_fn!(make_transformer_3x3, DefaultLut3x3Factory);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
use crate::conversions::avx::AvxLut3x3Factory;
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
make_transform_3x3_fn!(make_transformer_3x3_avx_fma, AvxLut3x3Factory);
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
use crate::conversions::sse::SseLut3x3Factory;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
make_transform_3x3_fn!(make_transformer_3x3_sse41, SseLut3x3Factory);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
use crate::conversions::avx::AvxLut4x3Factory;
use crate::conversions::interpolator::LutBarycentricReduction;
use crate::conversions::katana::{
Katana, KatanaDefaultIntermediate, KatanaInitialStage, KatanaPostFinalizationStage,
KatanaStageLabToXyz, KatanaStageXyzToLab, katana_create_rgb_lin_lut, katana_pcs_lab_v2_to_v4,
katana_pcs_lab_v4_to_v2, katana_prepare_inverse_lut_rgb_xyz, multi_dimensional_3x3_to_device,
multi_dimensional_3x3_to_pcs, multi_dimensional_4x3_to_pcs,
};
use crate::conversions::mab4x3::prepare_mab_4x3;
use crate::conversions::mba3x4::prepare_mba_3x4;
use crate::conversions::md_luts_factory::{do_any_to_any, prepare_alpha_finalizer};
// use crate::conversions::bpc::compensate_bpc_in_lut;
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
make_transform_4x3_fn!(make_transformer_4x3_avx_fma, AvxLut4x3Factory);
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
use crate::conversions::sse::SseLut4x3Factory;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
make_transform_4x3_fn!(make_transformer_4x3_sse41, SseLut4x3Factory);
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
use crate::conversions::transform_lut4_to_3::DefaultLut4x3Factory;
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
make_transform_4x3_fn!(make_transformer_4x3, DefaultLut4x3Factory);
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
use crate::conversions::neon::NeonLut4x3Factory;
use crate::conversions::prelude_lut_xyz_rgb::{create_rgb_lin_lut, prepare_inverse_lut_rgb_xyz};
use crate::conversions::xyz_lab::{StageLabToXyz, StageXyzToLab};
use crate::transform::PointeeSizeExpressible;
use crate::trc::GammaLutInterpolate;
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
make_transform_4x3_fn!(make_transformer_4x3, NeonLut4x3Factory);
#[inline(never)]
#[cold]
pub(crate) fn make_lut_transform<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible
+ GammaLutInterpolate,
const BIT_DEPTH: usize,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
>(
src_layout: Layout,
source: &ColorProfile,
dst_layout: Layout,
dest: &ColorProfile,
options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if (source.color_space == DataColorSpace::Cmyk || source.color_space == DataColorSpace::Color4)
&& (dest.color_space == DataColorSpace::Rgb || dest.color_space == DataColorSpace::Lab)
{
source.color_space.check_layout(src_layout)?;
dest.color_space.check_layout(dst_layout)?;
if source.pcs != DataColorSpace::Xyz && source.pcs != DataColorSpace::Lab {
return Err(CmsError::UnsupportedProfileConnection);
}
if dest.pcs != DataColorSpace::Lab && dest.pcs != DataColorSpace::Xyz {
return Err(CmsError::UnsupportedProfileConnection);
}
const GRID_SIZE: usize = 17;
let is_katana_required_for_source = source
.get_device_to_pcs(options.rendering_intent)
.ok_or(CmsError::UnsupportedLutRenderingIntent(
source.rendering_intent,
))
.map(|x| x.is_katana_required())?;
let is_katana_required_for_destination =
if dest.is_matrix_shaper() || dest.pcs == DataColorSpace::Xyz {
false
} else if dest.pcs == DataColorSpace::Lab {
dest.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)
.map(|x| x.is_katana_required())?
} else {
return Err(CmsError::UnsupportedProfileConnection);
};
if is_katana_required_for_source || is_katana_required_for_destination {
let initial_stage: Box<dyn KatanaInitialStage<f32, T> + Send + Sync> =
match source.get_device_to_pcs(options.rendering_intent).ok_or(
CmsError::UnsupportedLutRenderingIntent(source.rendering_intent),
)? {
LutWarehouse::Lut(lut) => {
katana_input_stage_lut_4x3::<T>(lut, options, source.pcs, BIT_DEPTH)?
}
LutWarehouse::Multidimensional(mab) => {
multi_dimensional_4x3_to_pcs::<T>(mab, options, source.pcs, BIT_DEPTH)?
}
};
let mut stages = Vec::new();
stages.push(katana_pcs_lab_v2_to_v4(source));
if source.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageLabToXyz::default()));
}
if dest.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageXyzToLab::default()));
}
stages.push(katana_pcs_lab_v4_to_v2(dest));
let final_stage = if dest.has_pcs_to_device_lut() {
let pcs_to_device = dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
match pcs_to_device {
LutWarehouse::Lut(lut) => {
katana_output_stage_lut_3x3::<T>(lut, options, dest.pcs, BIT_DEPTH)?
}
LutWarehouse::Multidimensional(mab) => {
multi_dimensional_3x3_to_device::<T>(mab, options, dest.pcs, BIT_DEPTH)?
}
}
} else if dest.is_matrix_shaper() {
let state = katana_prepare_inverse_lut_rgb_xyz::<T, BIT_DEPTH, GAMMA_LUT>(
dest, dst_layout, options,
)?;
stages.extend(state.stages);
state.final_stage
} else {
return Err(CmsError::UnsupportedProfileConnection);
};
let mut post_finalization: Vec<Box<dyn KatanaPostFinalizationStage<T> + Send + Sync>> =
Vec::new();
if let Some(stage) =
prepare_alpha_finalizer::<T>(src_layout, source, dst_layout, dest, BIT_DEPTH)
{
post_finalization.push(stage);
}
return Ok(Box::new(Katana::<f32, T> {
initial_stage,
final_stage,
stages,
post_finalization,
}));
}
let mut lut = match source.get_device_to_pcs(options.rendering_intent).ok_or(
CmsError::UnsupportedLutRenderingIntent(source.rendering_intent),
)? {
LutWarehouse::Lut(lut) => create_lut4::<GRID_SIZE>(lut, options, source.pcs)?,
LutWarehouse::Multidimensional(m_curves) => {
let mut samples = create_lut4_norm_samples::<GRID_SIZE>();
prepare_mab_4x3(m_curves, &mut samples, options, source.pcs)?
}
};
pcs_lab_v2_to_v4(source, &mut lut);
if source.pcs == DataColorSpace::Lab {
let lab_to_xyz_stage = StageLabToXyz::default();
lab_to_xyz_stage.transform(&mut lut)?;
}
// if source.color_space == DataColorSpace::Cmyk
// && (options.rendering_intent == RenderingIntent::Perceptual
// || options.rendering_intent == RenderingIntent::RelativeColorimetric)
// && options.black_point_compensation
// {
// if let (Some(src_bp), Some(dst_bp)) = (
// source.detect_black_point::<GRID_SIZE>(&lut),
// dest.detect_black_point::<GRID_SIZE>(&lut),
// ) {
// compensate_bpc_in_lut(&mut lut, src_bp, dst_bp);
// }
// }
if dest.pcs == DataColorSpace::Lab {
let lab_to_xyz_stage = StageXyzToLab::default();
lab_to_xyz_stage.transform(&mut lut)?;
}
pcs_lab_v4_to_v2(dest, &mut lut);
if dest.pcs == DataColorSpace::Xyz {
if dest.is_matrix_shaper() {
prepare_inverse_lut_rgb_xyz::<T, BIT_DEPTH, GAMMA_LUT>(dest, &mut lut, options)?;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
} else if dest.pcs == DataColorSpace::Lab {
let pcs_to_device = dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
match pcs_to_device {
LutWarehouse::Lut(lut_data_type) => {
lut = create_lut3x3(lut_data_type, &lut, options, dest.pcs)?
}
LutWarehouse::Multidimensional(mab) => {
prepare_mba_3x3(mab, &mut lut, options, dest.pcs)?
}
}
}
let is_dest_linear_profile = dest.color_space == DataColorSpace::Rgb
&& dest.is_matrix_shaper()
&& dest.is_linear_matrix_shaper();
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
if std::arch::is_x86_feature_detected!("avx2") && std::arch::is_x86_feature_detected!("fma")
{
return Ok(make_transformer_4x3_avx_fma::<T, GRID_SIZE, BIT_DEPTH>(
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
));
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
if std::arch::is_x86_feature_detected!("sse4.1") {
return Ok(make_transformer_4x3_sse41::<T, GRID_SIZE, BIT_DEPTH>(
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
));
}
Ok(make_transformer_4x3::<T, GRID_SIZE, BIT_DEPTH>(
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
))
} else if (source.color_space == DataColorSpace::Rgb
|| source.color_space == DataColorSpace::Lab)
&& (dest.color_space == DataColorSpace::Cmyk || dest.color_space == DataColorSpace::Color4)
{
source.color_space.check_layout(src_layout)?;
dest.color_space.check_layout(dst_layout)?;
if source.pcs != DataColorSpace::Xyz && source.pcs != DataColorSpace::Lab {
return Err(CmsError::UnsupportedProfileConnection);
}
const GRID_SIZE: usize = 33;
let mut lut: Vec<f32>;
if source.has_device_to_pcs_lut() {
let device_to_pcs = source
.get_device_to_pcs(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
lut = create_lut3_samples_norm::<GRID_SIZE>();
match device_to_pcs {
LutWarehouse::Lut(lut_data_type) => {
lut = create_lut3x3(lut_data_type, &lut, options, source.pcs)?;
}
LutWarehouse::Multidimensional(mab) => {
prepare_mab_3x3(mab, &mut lut, options, source.pcs)?
}
}
} else if source.is_matrix_shaper() {
lut = create_rgb_lin_lut::<T, BIT_DEPTH, LINEAR_CAP, GRID_SIZE>(source, options)?;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
pcs_lab_v2_to_v4(source, &mut lut);
if source.pcs == DataColorSpace::Xyz && dest.pcs == DataColorSpace::Lab {
let xyz_to_lab = StageXyzToLab::default();
xyz_to_lab.transform(&mut lut)?;
} else if source.pcs == DataColorSpace::Lab && dest.pcs == DataColorSpace::Xyz {
let lab_to_xyz_stage = StageLabToXyz::default();
lab_to_xyz_stage.transform(&mut lut)?;
}
pcs_lab_v4_to_v2(dest, &mut lut);
let lut = match dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?
{
LutWarehouse::Lut(lut_type) => create_lut3x4(lut_type, &lut, options, dest.pcs)?,
LutWarehouse::Multidimensional(m_curves) => {
prepare_mba_3x4(m_curves, &mut lut, options, dest.pcs)?
}
};
let is_dest_linear_profile = dest.color_space == DataColorSpace::Rgb
&& dest.is_matrix_shaper()
&& dest.is_linear_matrix_shaper();
Ok(make_transform_3x4::<T, GRID_SIZE, BIT_DEPTH>(
src_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
))
} else if (source.color_space.is_three_channels()) && (dest.color_space.is_three_channels()) {
source.color_space.check_layout(src_layout)?;
dest.color_space.check_layout(dst_layout)?;
const GRID_SIZE: usize = 33;
let is_katana_required_for_source = if source.is_matrix_shaper() {
false
} else {
source
.get_device_to_pcs(options.rendering_intent)
.ok_or(CmsError::UnsupportedLutRenderingIntent(
source.rendering_intent,
))
.map(|x| x.is_katana_required())?
};
let is_katana_required_for_destination =
if source.is_matrix_shaper() || dest.pcs == DataColorSpace::Xyz {
false
} else if dest.pcs == DataColorSpace::Lab {
dest.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)
.map(|x| x.is_katana_required())?
} else {
return Err(CmsError::UnsupportedProfileConnection);
};
let mut stages: Vec<Box<KatanaDefaultIntermediate>> = Vec::new();
// Slow and accurate fallback if anything not acceptable is detected by curve analysis
if is_katana_required_for_source || is_katana_required_for_destination {
let source_stage: Box<dyn KatanaInitialStage<f32, T> + Send + Sync> =
if source.is_matrix_shaper() {
let state = katana_create_rgb_lin_lut::<T, BIT_DEPTH, LINEAR_CAP>(
src_layout, source, options,
)?;
stages.extend(state.stages);
state.initial_stage
} else {
match source.get_device_to_pcs(options.rendering_intent).ok_or(
CmsError::UnsupportedLutRenderingIntent(source.rendering_intent),
)? {
LutWarehouse::Lut(lut) => {
katana_input_stage_lut_3x3::<T>(lut, options, source.pcs, BIT_DEPTH)?
}
LutWarehouse::Multidimensional(mab) => {
multi_dimensional_3x3_to_pcs::<T>(mab, options, source.pcs, BIT_DEPTH)?
}
}
};
stages.push(katana_pcs_lab_v2_to_v4(source));
if source.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageLabToXyz::default()));
}
if dest.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageXyzToLab::default()));
}
stages.push(katana_pcs_lab_v4_to_v2(dest));
let final_stage = if dest.has_pcs_to_device_lut() {
let pcs_to_device = dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
match pcs_to_device {
LutWarehouse::Lut(lut) => {
katana_output_stage_lut_3x3::<T>(lut, options, dest.pcs, BIT_DEPTH)?
}
LutWarehouse::Multidimensional(mab) => {
multi_dimensional_3x3_to_device::<T>(mab, options, dest.pcs, BIT_DEPTH)?
}
}
} else if dest.is_matrix_shaper() {
let state = katana_prepare_inverse_lut_rgb_xyz::<T, BIT_DEPTH, GAMMA_LUT>(
dest, dst_layout, options,
)?;
stages.extend(state.stages);
state.final_stage
} else {
return Err(CmsError::UnsupportedProfileConnection);
};
let mut post_finalization: Vec<Box<dyn KatanaPostFinalizationStage<T> + Send + Sync>> =
Vec::new();
if let Some(stage) =
prepare_alpha_finalizer::<T>(src_layout, source, dst_layout, dest, BIT_DEPTH)
{
post_finalization.push(stage);
}
return Ok(Box::new(Katana::<f32, T> {
initial_stage: source_stage,
final_stage,
stages,
post_finalization,
}));
}
let mut lut: Vec<f32>;
if source.has_device_to_pcs_lut() {
let device_to_pcs = source
.get_device_to_pcs(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
lut = create_lut3_samples_norm::<GRID_SIZE>();
match device_to_pcs {
LutWarehouse::Lut(lut_data_type) => {
lut = create_lut3x3(lut_data_type, &lut, options, source.pcs)?;
}
LutWarehouse::Multidimensional(mab) => {
prepare_mab_3x3(mab, &mut lut, options, source.pcs)?
}
}
} else if source.is_matrix_shaper() {
lut = create_rgb_lin_lut::<T, BIT_DEPTH, LINEAR_CAP, GRID_SIZE>(source, options)?;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
pcs_lab_v2_to_v4(source, &mut lut);
if source.pcs == DataColorSpace::Xyz && dest.pcs == DataColorSpace::Lab {
let xyz_to_lab = StageXyzToLab::default();
xyz_to_lab.transform(&mut lut)?;
} else if source.pcs == DataColorSpace::Lab && dest.pcs == DataColorSpace::Xyz {
let lab_to_xyz_stage = StageLabToXyz::default();
lab_to_xyz_stage.transform(&mut lut)?;
}
pcs_lab_v4_to_v2(dest, &mut lut);
if dest.has_pcs_to_device_lut() {
let pcs_to_device = dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
match pcs_to_device {
LutWarehouse::Lut(lut_data_type) => {
lut = create_lut3x3(lut_data_type, &lut, options, dest.pcs)?;
}
LutWarehouse::Multidimensional(mab) => {
prepare_mba_3x3(mab, &mut lut, options, dest.pcs)?
}
}
} else if dest.is_matrix_shaper() {
prepare_inverse_lut_rgb_xyz::<T, BIT_DEPTH, GAMMA_LUT>(dest, &mut lut, options)?;
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
let is_dest_linear_profile = dest.color_space == DataColorSpace::Rgb
&& dest.is_matrix_shaper()
&& dest.is_linear_matrix_shaper();
#[cfg(all(feature = "avx", target_arch = "x86_64"))]
if std::arch::is_x86_feature_detected!("avx2") && std::is_x86_feature_detected!("fma") {
return Ok(make_transformer_3x3_avx_fma::<T, GRID_SIZE, BIT_DEPTH>(
src_layout,
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
));
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
if std::arch::is_x86_feature_detected!("sse4.1") {
return Ok(make_transformer_3x3_sse41::<T, GRID_SIZE, BIT_DEPTH>(
src_layout,
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
));
}
Ok(make_transformer_3x3::<T, GRID_SIZE, BIT_DEPTH>(
src_layout,
dst_layout,
lut,
options,
dest.color_space,
is_dest_linear_profile,
))
} else {
do_any_to_any::<T, BIT_DEPTH, LINEAR_CAP, GAMMA_LUT>(
src_layout, source, dst_layout, dest, options,
)
}
}

730
vendor/moxcms/src/conversions/mab.rs vendored Normal file
View File

@@ -0,0 +1,730 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::mlaf::mlaf;
use crate::safe_math::SafeMul;
use crate::{
CmsError, Cube, DataColorSpace, InPlaceStage, InterpolationMethod, LutMultidimensionalType,
MalformedSize, Matrix3d, Matrix3f, TransformOptions, Vector3d, Vector3f,
};
#[allow(unused)]
struct ACurves3<'a, const DEPTH: usize> {
curve0: Box<[f32; 65536]>,
curve1: Box<[f32; 65536]>,
curve2: Box<[f32; 65536]>,
clut: &'a [f32],
grid_size: [u8; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(unused)]
struct ACurves3Optimized<'a> {
clut: &'a [f32],
grid_size: [u8; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(unused)]
impl<const DEPTH: usize> ACurves3<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
for dst in dst.chunks_exact_mut(3) {
let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
let interpolated = fetch(b0, b1, b2);
dst[0] = interpolated.v[0];
dst[1] = interpolated.v[1];
dst[2] = interpolated.v[2];
}
Ok(())
}
}
#[allow(unused)]
impl ACurves3Optimized<'_> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
for dst in dst.chunks_exact_mut(3) {
let a0 = dst[0];
let a1 = dst[1];
let a2 = dst[2];
let interpolated = fetch(a0, a1, a2);
dst[0] = interpolated.v[0];
dst[1] = interpolated.v[1];
dst[2] = interpolated.v[2];
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Cube::new_cube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
impl InPlaceStage for ACurves3Optimized<'_> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Cube::new_cube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
#[allow(unused)]
struct ACurves3Inverse<'a, const DEPTH: usize> {
curve0: Box<[f32; 65536]>,
curve1: Box<[f32; 65536]>,
curve2: Box<[f32; 65536]>,
clut: &'a [f32],
grid_size: [u8; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(unused)]
impl<const DEPTH: usize> ACurves3Inverse<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector3f>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH as u32 - 1u32) as f32;
for dst in dst.chunks_exact_mut(3) {
let interpolated = fetch(dst[0], dst[1], dst[2]);
let a0 = (interpolated.v[0] * scale_value).round().min(scale_value) as u16;
let a1 = (interpolated.v[1] * scale_value).round().min(scale_value) as u16;
let a2 = (interpolated.v[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3Inverse<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Cube::new_cube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
pub(crate) struct MCurves3<const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) matrix: Matrix3f,
pub(crate) bias: Vector3f,
pub(crate) inverse: bool,
}
impl<const DEPTH: usize> MCurves3<DEPTH> {
fn execute_matrix_stage(&self, dst: &mut [f32]) {
let m = self.matrix;
let b = self.bias;
if !m.test_equality(Matrix3f::IDENTITY) || !b.eq(&Vector3f::default()) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(mlaf(b.v[0], x, m.v[0][0]), y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(mlaf(b.v[1], x, m.v[1][0]), y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(mlaf(b.v[2], x, m.v[2][0]), y, m.v[2][1]), z, m.v[2][2]);
}
}
}
}
impl<const DEPTH: usize> InPlaceStage for MCurves3<DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
if self.inverse {
self.execute_matrix_stage(dst);
}
for dst in dst.chunks_exact_mut(3) {
let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
if !self.inverse {
self.execute_matrix_stage(dst);
}
Ok(())
}
}
pub(crate) struct BCurves3<const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
}
impl<const DEPTH: usize> InPlaceStage for BCurves3<DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
for dst in dst.chunks_exact_mut(3) {
let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
Ok(())
}
}
pub(crate) fn prepare_mab_3x3(
mab: &LutMultidimensionalType,
lut: &mut [f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<(), CmsError> {
const LERP_DEPTH: usize = 65536;
const BP: usize = 13;
const DEPTH: usize = 8192;
if mab.num_input_channels != 3 && mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.a_curves.len() == 3 && mab.clut.is_some() {
let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear());
let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]];
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
if all_curves_linear {
use crate::conversions::neon::ACurves3OptimizedNeon;
let a_curves = ACurves3OptimizedNeon {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut)?;
} else {
use crate::conversions::neon::ACurves3Neon;
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let a_curves = ACurves3Neon::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut)?;
}
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
{
let mut execution_box: Option<Box<dyn InPlaceStage>> = None;
if all_curves_linear {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves3OptimizedAvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
execution_box = Some(Box::new(ACurves3OptimizedAvxFma {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
execution_box = Some(Box::new(ACurves3Optimized {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
} else {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves3AvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves3AvxFma::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves3::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
execution_box
.expect("LUT Sampler on Multidimensional 3x3 must be set")
.transform(lut)?;
}
}
if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let curves: Result<Vec<_>, _> = mab
.m_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let matrix = mab.matrix.to_f32();
let bias: Vector3f = mab.bias.cast();
let m_curves = MCurves3::<DEPTH> {
curve0,
curve1,
curve2,
matrix,
bias,
inverse: false,
};
m_curves.transform(lut)?;
}
}
if mab.b_curves.len() == 3 {
const LERP_DEPTH: usize = 65536;
const BP: usize = 13;
const DEPTH: usize = 8192;
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear {
let curves: Result<Vec<_>, _> = mab
.b_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let b_curves = BCurves3::<DEPTH> {
curve0,
curve1,
curve2,
};
b_curves.transform(lut)?;
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
Ok(())
}
pub(crate) fn prepare_mba_3x3(
mab: &LutMultidimensionalType,
lut: &mut [f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<(), CmsError> {
if mab.num_input_channels != 3 && mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
const LERP_DEPTH: usize = 65536;
const BP: usize = 13;
const DEPTH: usize = 8192;
if mab.b_curves.len() == 3 {
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear {
let curves: Result<Vec<_>, _> = mab
.b_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let b_curves = BCurves3::<DEPTH> {
curve0,
curve1,
curve2,
};
b_curves.transform(lut)?;
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let curves: Result<Vec<_>, _> = mab
.m_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let matrix = mab.matrix.to_f32();
let bias: Vector3f = mab.bias.cast();
let m_curves = MCurves3::<DEPTH> {
curve0,
curve1,
curve2,
matrix,
bias,
inverse: true,
};
m_curves.transform(lut)?;
}
}
if mab.a_curves.len() == 3 && mab.clut.is_some() {
let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedCurveLutTable(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear());
let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]];
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
if all_curves_linear {
use crate::conversions::neon::ACurves3OptimizedNeon;
let a_curves = ACurves3OptimizedNeon {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut)?;
} else {
use crate::conversions::neon::ACurves3InverseNeon;
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let a_curves = ACurves3InverseNeon::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut)?;
}
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
{
let mut execution_box: Option<Box<dyn InPlaceStage>> = None;
if all_curves_linear {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves3OptimizedAvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
execution_box = Some(Box::new(ACurves3OptimizedAvxFma {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
execution_box = Some(Box::new(ACurves3Optimized {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
} else {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves3InverseAvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves3InverseAvxFma::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves3Inverse::<DEPTH> {
curve0,
curve1,
curve2,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
execution_box
.expect("LUT Sampler on Multidimensional Inverse 3x3 must be set")
.transform(lut)?;
}
}
Ok(())
}

394
vendor/moxcms/src/conversions/mab4x3.rs vendored Normal file
View File

@@ -0,0 +1,394 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::mab::{BCurves3, MCurves3};
use crate::safe_math::SafeMul;
use crate::{
CmsError, DataColorSpace, Hypercube, InPlaceStage, InterpolationMethod,
LutMultidimensionalType, MalformedSize, Matrix3d, Stage, TransformOptions, Vector3d, Vector3f,
};
#[allow(dead_code)]
struct ACurves4x3<'a, const DEPTH: usize> {
curve0: Box<[f32; 65536]>,
curve1: Box<[f32; 65536]>,
curve2: Box<[f32; 65536]>,
curve3: Box<[f32; 65536]>,
clut: &'a [f32],
grid_size: [u8; 4],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(dead_code)]
struct ACurves4x3Optimized<'a> {
clut: &'a [f32],
grid_size: [u8; 4],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
#[allow(dead_code)]
impl<const DEPTH: usize> ACurves4x3<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
assert_eq!(src.len() / 4, dst.len() / 3);
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let a0 = (src[0] * scale_value).round().min(scale_value) as u16;
let a1 = (src[1] * scale_value).round().min(scale_value) as u16;
let a2 = (src[2] * scale_value).round().min(scale_value) as u16;
let a3 = (src[3] * scale_value).round().min(scale_value) as u16;
let c = self.curve0[a0 as usize];
let m = self.curve1[a1 as usize];
let y = self.curve2[a2 as usize];
let k = self.curve3[a3 as usize];
let r = fetch(c, m, y, k);
dst[0] = r.v[0];
dst[1] = r.v[1];
dst[2] = r.v[2];
}
Ok(())
}
}
#[allow(dead_code)]
impl ACurves4x3Optimized<'_> {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> Vector3f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
assert_eq!(src.len() / 4, dst.len() / 3);
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let c = src[0];
let m = src[1];
let y = src[2];
let k = src[3];
let r = fetch(c, m, y, k);
dst[0] = r.v[0];
dst[1] = r.v[1];
dst[2] = r.v[2];
}
Ok(())
}
}
impl<const DEPTH: usize> Stage for ACurves4x3<'_, DEPTH> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Hypercube::new_hypercube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
Ok(())
}
}
impl Stage for ACurves4x3Optimized<'_> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Hypercube::new_hypercube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
Ok(())
}
}
pub(crate) fn prepare_mab_4x3(
mab: &LutMultidimensionalType,
lut: &mut [f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Vec<f32>, CmsError> {
const LERP_DEPTH: usize = 65536;
const BP: usize = 13;
const DEPTH: usize = 8192;
if mab.num_input_channels != 4 && mab.num_output_channels != 3 {
return Err(CmsError::UnsupportedProfileConnection);
}
let mut new_lut = vec![0f32; (lut.len() / 4) * 3];
if mab.a_curves.len() == 4 && mab.clut.is_some() {
let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.grid_points[3] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear());
let grid_size = [
mab.grid_points[0],
mab.grid_points[1],
mab.grid_points[2],
mab.grid_points[3],
];
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
if all_curves_linear {
use crate::conversions::neon::ACurves4x3NeonOptimizedNeon;
let a_curves = ACurves4x3NeonOptimizedNeon {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut, &mut new_lut)?;
} else {
use crate::conversions::neon::ACurves4x3Neon;
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2, curve3] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let a_curves = ACurves4x3Neon::<DEPTH> {
curve0,
curve1,
curve2,
curve3,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut, &mut new_lut)?;
}
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
{
let mut execution_box: Option<Box<dyn Stage>> = None;
if all_curves_linear {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves4x3AvxFmaOptimized;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
execution_box = Some(Box::new(ACurves4x3AvxFmaOptimized {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
execution_box = Some(Box::new(ACurves4x3Optimized {
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
} else {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::avx::ACurves4x3AvxFma;
if std::arch::is_x86_feature_detected!("avx2")
&& std::arch::is_x86_feature_detected!("fma")
{
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2, curve3] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves4x3AvxFma::<DEPTH> {
curve0,
curve1,
curve2,
curve3,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
if execution_box.is_none() {
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2, curve3] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
execution_box = Some(Box::new(ACurves4x3::<DEPTH> {
curve0,
curve1,
curve2,
curve3,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
}));
}
}
execution_box
.expect("Sampler for Multidimensional 4x3 must be set")
.transform(lut, &mut new_lut)?;
}
} else {
// Not supported
return Err(CmsError::UnsupportedProfileConnection);
}
if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let curves: Result<Vec<_>, _> = mab
.m_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let matrix = mab.matrix.to_f32();
let bias: Vector3f = mab.bias.cast();
let m_curves = MCurves3::<DEPTH> {
curve0,
curve1,
curve2,
matrix,
bias,
inverse: false,
};
m_curves.transform(&mut new_lut)?;
}
}
if mab.b_curves.len() == 3 {
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear {
let curves: Result<Vec<_>, _> = mab
.b_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let b_curves = BCurves3::<DEPTH> {
curve0,
curve1,
curve2,
};
b_curves.transform(&mut new_lut)?;
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
Ok(new_lut)
}

298
vendor/moxcms/src/conversions/mba3x4.rs vendored Normal file
View File

@@ -0,0 +1,298 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::mab::{BCurves3, MCurves3};
use crate::safe_math::SafeMul;
use crate::{
CmsError, Cube, DataColorSpace, InPlaceStage, InterpolationMethod, LutMultidimensionalType,
MalformedSize, Matrix3d, Stage, TransformOptions, Vector3d, Vector4f,
};
struct ACurves3x4Inverse<'a, const DEPTH: usize> {
curve0: Box<[f32; 65536]>,
curve1: Box<[f32; 65536]>,
curve2: Box<[f32; 65536]>,
curve3: Box<[f32; 65536]>,
clut: &'a [f32],
grid_size: [u8; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
struct ACurves3x4InverseOptimized<'a> {
clut: &'a [f32],
grid_size: [u8; 3],
interpolation_method: InterpolationMethod,
pcs: DataColorSpace,
}
impl<const DEPTH: usize> ACurves3x4Inverse<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector4f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH as u32 - 1u32) as f32;
assert_eq!(src.len() / 3, dst.len() / 4);
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(4)) {
let interpolated = fetch(src[0], src[1], src[2]);
let a0 = (interpolated.v[0] * scale_value).round().min(scale_value) as u16;
let a1 = (interpolated.v[1] * scale_value).round().min(scale_value) as u16;
let a2 = (interpolated.v[2] * scale_value).round().min(scale_value) as u16;
let a3 = (interpolated.v[3] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
let b3 = self.curve3[a3 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
dst[3] = b3;
}
Ok(())
}
}
impl ACurves3x4InverseOptimized<'_> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> Vector4f>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
assert_eq!(src.len() / 3, dst.len() / 4);
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(4)) {
let interpolated = fetch(src[0], src[1], src[2]);
let b0 = interpolated.v[0];
let b1 = interpolated.v[1];
let b2 = interpolated.v[2];
let b3 = interpolated.v[3];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
dst[3] = b3;
}
Ok(())
}
}
impl<const DEPTH: usize> Stage for ACurves3x4Inverse<'_, DEPTH> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Cube::new_cube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z| lut.tetra_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z| lut.pyramid_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z| lut.prism_vec4(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z))?;
}
}
Ok(())
}
}
impl Stage for ACurves3x4InverseOptimized<'_> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = Cube::new_cube(self.clut, self.grid_size);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z| lut.tetra_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z| lut.pyramid_vec4(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z| lut.prism_vec4(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z| lut.trilinear_vec4(x, y, z))?;
}
}
Ok(())
}
}
pub(crate) fn prepare_mba_3x4(
mab: &LutMultidimensionalType,
lut: &mut [f32],
options: TransformOptions,
pcs: DataColorSpace,
) -> Result<Vec<f32>, CmsError> {
if mab.num_input_channels != 3 && mab.num_output_channels != 4 {
return Err(CmsError::UnsupportedProfileConnection);
}
const LERP_DEPTH: usize = 65536;
const BP: usize = 13;
const DEPTH: usize = 8192;
if mab.b_curves.len() == 3 {
let all_curves_linear = mab.b_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear {
let curves: Result<Vec<_>, _> = mab
.b_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let b_curves = BCurves3::<DEPTH> {
curve0,
curve1,
curve2,
};
b_curves.transform(lut)?;
}
} else {
return Err(CmsError::InvalidAtoBLut);
}
if mab.m_curves.len() == 3 {
let all_curves_linear = mab.m_curves.iter().all(|curve| curve.is_linear());
if !all_curves_linear
|| !mab.matrix.test_equality(Matrix3d::IDENTITY)
|| mab.bias.ne(&Vector3d::default())
{
let curves: Result<Vec<_>, _> = mab
.m_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let matrix = mab.matrix.to_f32();
let bias = mab.bias.cast();
let m_curves = MCurves3::<DEPTH> {
curve0,
curve1,
curve2,
matrix,
bias,
inverse: true,
};
m_curves.transform(lut)?;
}
}
let mut new_lut = vec![0f32; (lut.len() / 3) * 4];
if mab.a_curves.len() == 4 && mab.clut.is_some() {
let clut = &mab.clut.as_ref().map(|x| x.to_clut_f32()).unwrap();
let lut_grid = (mab.grid_points[0] as usize)
.safe_mul(mab.grid_points[1] as usize)?
.safe_mul(mab.grid_points[2] as usize)?
.safe_mul(mab.num_output_channels as usize)?;
if clut.len() != lut_grid {
return Err(CmsError::MalformedClut(MalformedSize {
size: clut.len(),
expected: lut_grid,
}));
}
let grid_size = [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]];
let all_curves_linear = mab.a_curves.iter().all(|curve| curve.is_linear());
if all_curves_linear {
let a_curves = ACurves3x4InverseOptimized {
clut,
grid_size: [mab.grid_points[0], mab.grid_points[1], mab.grid_points[2]],
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut, &mut new_lut)?;
} else {
let curves: Result<Vec<_>, _> = mab
.a_curves
.iter()
.map(|c| {
c.build_linearize_table::<u16, LERP_DEPTH, BP>()
.ok_or(CmsError::InvalidTrcCurve)
})
.collect();
let [curve0, curve1, curve2, curve3] =
curves?.try_into().map_err(|_| CmsError::InvalidTrcCurve)?;
let a_curves = ACurves3x4Inverse::<DEPTH> {
curve0,
curve1,
curve2,
curve3,
clut,
grid_size,
interpolation_method: options.interpolation_method,
pcs,
};
a_curves.transform(lut, &mut new_lut)?;
}
} else {
return Err(CmsError::UnsupportedProfileConnection);
}
Ok(new_lut)
}

728
vendor/moxcms/src/conversions/md_lut.rs vendored Normal file
View File

@@ -0,0 +1,728 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use crate::mlaf::{mlaf, neg_mlaf};
use crate::nd_array::{ArrayFetch, lerp};
use crate::{Vector3f, Vector3i};
use num_traits::MulAdd;
use std::array::from_fn;
use std::marker::PhantomData;
use std::ops::{Add, Mul, Neg, Sub};
pub(crate) struct MultidimensionalLut {
pub(crate) grid_strides: [u32; 16],
pub(crate) grid_filling_size: [u32; 16],
pub(crate) grid_scale: [f32; 16],
pub(crate) output_inks: usize,
}
struct FastCube<T, F: ArrayFetch<T>> {
fetch: F,
_phantom: PhantomData<T>,
}
struct ArrayFetchVectorN<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
output_inks: usize,
}
#[repr(transparent)]
#[derive(Copy, Clone, Debug)]
pub(crate) struct NVector<T, const N: usize> {
pub(crate) v: [T; N],
}
impl<T: Copy, const N: usize> NVector<T, N> {
pub(crate) fn from_slice(v: &[T; N]) -> Self {
Self { v: *v }
}
}
impl<T: Copy, const N: usize> From<T> for NVector<T, N> {
#[inline]
fn from(value: T) -> Self {
Self { v: [value; N] }
}
}
impl<T: Copy + Add<T, Output = T> + Mul<T, Output = T> + MulAdd<T, Output = T>, const N: usize>
FusedMultiplyAdd<NVector<T, N>> for NVector<T, N>
{
#[inline]
fn mla(&self, b: NVector<T, N>, c: NVector<T, N>) -> NVector<T, N> {
Self {
v: from_fn(|i| mlaf(self.v[i], b.v[i], c.v[i])),
}
}
}
impl<
T: Copy + Add<T, Output = T> + Mul<T, Output = T> + MulAdd<T, Output = T> + Neg<Output = T>,
const N: usize,
> FusedMultiplyNegAdd<NVector<T, N>> for NVector<T, N>
{
#[inline]
fn neg_mla(&self, b: NVector<T, N>, c: NVector<T, N>) -> NVector<T, N> {
Self {
v: from_fn(|i| neg_mlaf(self.v[i], b.v[i], c.v[i])),
}
}
}
impl<T: Sub<Output = T> + Default + Copy, const N: usize> Sub<NVector<T, N>> for NVector<T, N> {
type Output = Self;
#[inline]
fn sub(self, rhs: NVector<T, N>) -> Self::Output {
Self {
v: from_fn(|i| self.v[i] - rhs.v[i]),
}
}
}
impl<T: Add<Output = T> + Default + Copy, const N: usize> Add<NVector<T, N>> for NVector<T, N> {
type Output = Self;
#[inline]
fn add(self, rhs: NVector<T, N>) -> Self::Output {
Self {
v: from_fn(|i| self.v[i] + rhs.v[i]),
}
}
}
impl<T: Mul<Output = T> + Default + Copy, const N: usize> Mul<NVector<T, N>> for NVector<T, N> {
type Output = Self;
#[inline]
fn mul(self, rhs: NVector<T, N>) -> Self::Output {
Self {
v: from_fn(|i| self.v[i] * rhs.v[i]),
}
}
}
impl<const N: usize> ArrayFetch<NVector<f32, N>> for ArrayFetchVectorN<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> NVector<f32, N> {
let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32 * self.z_stride)
as usize
* self.output_inks;
let k = &self.array[start..start + N];
NVector::<f32, N>::from_slice(k.try_into().unwrap())
}
}
impl<T, F: ArrayFetch<T>> FastCube<T, F>
where
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyNegAdd<T>
+ FusedMultiplyAdd<T>,
{
#[inline(always)]
fn tetra(&self, src: Vector3i, src_next: Vector3i, w: Vector3f) -> T {
let x = src.v[0];
let y = src.v[1];
let z = src.v[2];
let x_n = src_next.v[0];
let y_n = src_next.v[1];
let z_n = src_next.v[2];
let rx = w.v[0];
let ry = w.v[1];
let rz = w.v[2];
let c0 = self.fetch.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = self.fetch.fetch(x_n, y, z) - c0;
c2 = self.fetch.fetch(x_n, y_n, z) - self.fetch.fetch(x_n, y, z);
c3 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = self.fetch.fetch(x_n, y, z) - c0;
c2 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y, z_n);
c3 = self.fetch.fetch(x_n, y, z_n) - self.fetch.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = self.fetch.fetch(x_n, y, z_n) - self.fetch.fetch(x, y, z_n);
c2 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y, z_n);
c3 = self.fetch.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = self.fetch.fetch(x_n, y_n, z) - self.fetch.fetch(x, y_n, z);
c2 = self.fetch.fetch(x, y_n, z) - c0;
c3 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x, y_n, z_n);
c2 = self.fetch.fetch(x, y_n, z) - c0;
c3 = self.fetch.fetch(x, y_n, z_n) - self.fetch.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = self.fetch.fetch(x_n, y_n, z_n) - self.fetch.fetch(x, y_n, z_n);
c2 = self.fetch.fetch(x, y_n, z_n) - self.fetch.fetch(x, y, z_n);
c3 = self.fetch.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
s1.mla(c3, T::from(rz))
}
}
impl MultidimensionalLut {
pub(crate) fn new(grid_size: [u8; 16], input_inks: usize, output_inks: usize) -> Self {
assert!(input_inks <= 16);
let mut grid_strides = [1u32; 16];
let mut grid_filling_size = [1u32; 16];
for (ink, dst_stride) in grid_strides.iter_mut().take(input_inks - 1).enumerate() {
let mut stride = 1u32;
let how_many = input_inks.saturating_sub(ink).saturating_sub(1);
for &grid_stride in grid_size.iter().take(how_many) {
stride *= grid_stride as u32;
}
*dst_stride = stride;
}
for (ink, dst_stride) in grid_filling_size.iter_mut().take(input_inks).enumerate() {
let mut stride = output_inks as u32;
let how_many = input_inks.saturating_sub(ink).saturating_sub(1);
for &grid_stride in grid_size.iter().take(how_many) {
stride *= grid_stride as u32;
}
*dst_stride = stride;
}
let mut grid_strides_f = [0f32; 16];
for (dst, src) in grid_strides_f
.iter_mut()
.zip(grid_size.iter())
.take(input_inks)
{
*dst = (*src - 1) as f32;
}
Self {
grid_strides,
grid_scale: grid_strides_f,
grid_filling_size,
output_inks,
}
}
}
pub(crate) fn linear_4i_vec3f_direct<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
lx: f32,
ly: f32,
lz: f32,
lw: f32,
) -> NVector<f32, N> {
let lin_x = lx.max(0.0).min(1.0);
let lin_y = ly.max(0.0).min(1.0);
let lin_z = lz.max(0.0).min(1.0);
let lin_w = lw.max(0.0).min(1.0);
let scale_x = lut.grid_scale[0];
let scale_y = lut.grid_scale[1];
let scale_z = lut.grid_scale[2];
let scale_w = lut.grid_scale[3];
let lx = lin_x * scale_x;
let ly = lin_y * scale_y;
let lz = lin_z * scale_z;
let lw = lin_w * scale_w;
let x = lx.floor() as i32;
let y = ly.floor() as i32;
let z = lz.floor() as i32;
let w = lw.floor() as i32;
let src_x = Vector3i { v: [x, y, z] };
let x_n = lx.ceil() as i32;
let y_n = ly.ceil() as i32;
let z_n = lz.ceil() as i32;
let w_n = lw.ceil() as i32;
let src_next = Vector3i { v: [x_n, y_n, z_n] };
let x_w = lx - x as f32;
let y_w = ly - y as f32;
let z_w = lz - z as f32;
let w_w = lw - w as f32;
let weights = Vector3f { v: [x_w, y_w, z_w] };
let cube0 = &arr[(w as usize * lut.grid_filling_size[3] as usize)..];
let cube1 = &arr[(w_n as usize * lut.grid_filling_size[3] as usize)..];
let fast_cube0 = FastCube {
fetch: ArrayFetchVectorN {
array: cube0,
x_stride: lut.grid_strides[0],
y_stride: lut.grid_strides[1],
z_stride: lut.grid_strides[2],
output_inks: lut.output_inks,
},
_phantom: PhantomData,
};
let fast_cube1 = FastCube {
fetch: ArrayFetchVectorN {
array: cube1,
x_stride: lut.grid_strides[0],
y_stride: lut.grid_strides[1],
z_stride: lut.grid_strides[2],
output_inks: lut.output_inks,
},
_phantom: PhantomData,
};
let w0 = fast_cube0.tetra(src_x, src_next, weights);
let w1 = fast_cube1.tetra(src_x, src_next, weights);
lerp(w0, w1, NVector::<f32, N>::from(w_w))
}
pub(crate) fn linear_3i_vec3f_direct<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
linear_3i_vec3f(lut, arr, inputs[0], inputs[1], inputs[2])
}
fn linear_3i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
x: f32,
y: f32,
z: f32,
) -> NVector<f32, N> {
let lin_x = x.max(0.0).min(1.0);
let lin_y = y.max(0.0).min(1.0);
let lin_z = z.max(0.0).min(1.0);
let scale_x = lut.grid_scale[0];
let scale_y = lut.grid_scale[1];
let scale_z = lut.grid_scale[2];
let lx = lin_x * scale_x;
let ly = lin_y * scale_y;
let lz = lin_z * scale_z;
let x = lx.floor() as i32;
let y = ly.floor() as i32;
let z = lz.floor() as i32;
let src_x = Vector3i { v: [x, y, z] };
let x_n = lx.ceil() as i32;
let y_n = ly.ceil() as i32;
let z_n = lz.ceil() as i32;
let src_next = Vector3i { v: [x_n, y_n, z_n] };
let x_w = lx - x as f32;
let y_w = ly - y as f32;
let z_w = lz - z as f32;
let weights = Vector3f { v: [x_w, y_w, z_w] };
let fast_cube = FastCube {
fetch: ArrayFetchVectorN {
array: arr,
x_stride: lut.grid_strides[0],
y_stride: lut.grid_strides[1],
z_stride: lut.grid_strides[2],
output_inks: lut.output_inks,
},
_phantom: PhantomData,
};
fast_cube.tetra(src_x, src_next, weights)
}
pub(crate) fn linear_1i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let lin_x = inputs[0].max(0.0).min(1.0);
let scale_x = lut.grid_scale[0];
let lx = lin_x * scale_x;
let x = lx.floor() as i32;
let x_n = lx.ceil() as i32;
let x_w = lx - x as f32;
let x_stride = lut.grid_strides[0];
let offset = |xi: i32| -> usize { (xi as u32 * x_stride) as usize * lut.output_inks };
// Sample 2 corners
let a = NVector::<f32, N>::from_slice(&arr[offset(x)..][..N].try_into().unwrap());
let b = NVector::<f32, N>::from_slice(&arr[offset(x_n)..][..N].try_into().unwrap());
a * NVector::<f32, N>::from(1.0 - x_w) + b * NVector::<f32, N>::from(x_w)
}
pub(crate) fn linear_2i_vec3f_direct<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
linear_2i_vec3f(lut, arr, inputs[0], inputs[1])
}
fn linear_2i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
x: f32,
y: f32,
) -> NVector<f32, N> {
let lin_x = x.max(0.0).min(1.0);
let lin_y = y.max(0.0).min(1.0);
let scale_x = lut.grid_scale[0];
let scale_y = lut.grid_scale[1];
let lx = lin_x * scale_x;
let ly = lin_y * scale_y;
let x = lx.floor() as i32;
let y = ly.floor() as i32;
let x_n = lx.ceil() as i32;
let y_n = ly.ceil() as i32;
let x_w = lx - x as f32;
let y_w = ly - y as f32;
let x_stride = lut.grid_strides[0];
let y_stride = lut.grid_strides[1];
let offset = |xi: i32, yi: i32| -> usize {
(xi as u32 * x_stride + yi as u32 * y_stride) as usize * lut.output_inks
};
// Sample 4 corners
let a = NVector::<f32, N>::from_slice(&arr[offset(x, y)..][..N].try_into().unwrap());
let b = NVector::<f32, N>::from_slice(&arr[offset(x_n, y)..][..N].try_into().unwrap());
let c = NVector::<f32, N>::from_slice(&arr[offset(x, y_n)..][..N].try_into().unwrap());
let d = NVector::<f32, N>::from_slice(&arr[offset(x_n, y_n)..][..N].try_into().unwrap());
let ab = a * NVector::<f32, N>::from(1.0 - x_w) + b * NVector::<f32, N>::from(x_w);
let cd = c * NVector::<f32, N>::from(1.0 - x_w) + d * NVector::<f32, N>::from(x_w);
ab * NVector::<f32, N>::from(1.0 - y_w) + cd * NVector::<f32, N>::from(y_w)
}
pub(crate) fn linear_4i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
linear_4i_vec3f_direct(lut, arr, inputs[0], inputs[1], inputs[2], inputs[3])
}
type FHandle<const N: usize> = fn(&MultidimensionalLut, &[f32], &[f32]) -> NVector<f32, N>;
#[inline(never)]
pub(crate) fn linear_n_i_vec3f<
const N: usize,
const I: usize,
Handle: Fn(&MultidimensionalLut, &[f32], &[f32]) -> NVector<f32, N>,
>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
handle: Handle,
) -> NVector<f32, N> {
let lin_w = inputs[I];
let w_c = lin_w.max(0.).min(1.);
let scale_p = lut.grid_scale[I];
let wf = w_c * scale_p;
let w0 = wf.min(scale_p) as usize;
let w1 = (wf + 1.).min(scale_p) as usize;
let w = wf - w0 as f32;
let cube0 = &arr[(w0 * lut.grid_filling_size[I] as usize)..];
let cube1 = &arr[(w1 * lut.grid_filling_size[I] as usize)..];
let inputs_sliced = &inputs[0..I];
let w0 = handle(lut, cube0, inputs_sliced);
let w1 = handle(lut, cube1, inputs_sliced);
lerp(w0, w1, NVector::<f32, N>::from(w))
}
#[inline(never)]
pub(crate) fn linear_5i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let lin_w = inputs[4];
let w_c = lin_w.max(0.).min(1.);
let scale_p = lut.grid_scale[4];
let wf = w_c * scale_p;
let w0 = wf.min(scale_p) as usize;
let w1 = (wf + 1.).min(scale_p) as usize;
let w = wf - w0 as f32;
let cube0 = &arr[(w0 * lut.grid_filling_size[4] as usize)..];
let cube1 = &arr[(w1 * lut.grid_filling_size[4] as usize)..];
let w0 = linear_4i_vec3f_direct(lut, cube0, inputs[0], inputs[1], inputs[2], inputs[3]);
let w1 = linear_4i_vec3f_direct(lut, cube1, inputs[0], inputs[1], inputs[2], inputs[3]);
lerp(w0, w1, NVector::<f32, N>::from(w))
}
#[inline(never)]
pub(crate) fn linear_6i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_5i_vec3f::<N>;
linear_n_i_vec3f::<N, 5, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_7i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_6i_vec3f::<N>;
linear_n_i_vec3f::<N, 6, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_8i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_7i_vec3f::<N>;
linear_n_i_vec3f::<N, 7, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_9i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_8i_vec3f::<N>;
linear_n_i_vec3f::<N, 8, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_10i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_9i_vec3f::<N>;
linear_n_i_vec3f::<N, 9, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_11i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_10i_vec3f::<N>;
linear_n_i_vec3f::<N, 10, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_12i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_11i_vec3f::<N>;
linear_n_i_vec3f::<N, 11, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_13i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_12i_vec3f::<N>;
linear_n_i_vec3f::<N, 12, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_14i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_13i_vec3f::<N>;
linear_n_i_vec3f::<N, 13, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn linear_15i_vec3f<const N: usize>(
lut: &MultidimensionalLut,
arr: &[f32],
inputs: &[f32],
) -> NVector<f32, N> {
let f = linear_14i_vec3f::<N>;
linear_n_i_vec3f::<N, 14, FHandle<N>>(lut, arr, inputs, f)
}
#[inline(never)]
pub(crate) fn tetra_3i_to_any_vec(
lut: &MultidimensionalLut,
arr: &[f32],
x: f32,
y: f32,
z: f32,
dst: &mut [f32],
inks: usize,
) {
match inks {
1 => {
let vec3 = linear_3i_vec3f::<1>(lut, arr, x, y, z);
dst[0] = vec3.v[0];
}
2 => {
let vec3 = linear_3i_vec3f::<2>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
3 => {
let vec3 = linear_3i_vec3f::<3>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
4 => {
let vec3 = linear_3i_vec3f::<4>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
5 => {
let vec3 = linear_3i_vec3f::<5>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
6 => {
let vec3 = linear_3i_vec3f::<6>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
7 => {
let vec3 = linear_3i_vec3f::<7>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
8 => {
let vec3 = linear_3i_vec3f::<8>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
9 => {
let vec3 = linear_3i_vec3f::<9>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
10 => {
let vec3 = linear_3i_vec3f::<10>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
11 => {
let vec3 = linear_3i_vec3f::<11>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
12 => {
let vec3 = linear_3i_vec3f::<12>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
13 => {
let vec3 = linear_3i_vec3f::<13>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
14 => {
let vec3 = linear_3i_vec3f::<14>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
15 => {
let vec3 = linear_3i_vec3f::<15>(lut, arr, x, y, z);
for (dst, src) in dst.iter_mut().zip(vec3.v.iter()) {
*dst = *src;
}
}
_ => unreachable!(),
}
}

View File

@@ -0,0 +1,190 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::katana::{
CopyAlphaStage, InjectAlphaStage, Katana, KatanaInitialStage, KatanaIntermediateStage,
KatanaPostFinalizationStage, KatanaStageLabToXyz, KatanaStageXyzToLab,
katana_create_rgb_lin_lut, katana_input_make_lut_nx3, katana_multi_dimensional_3xn_to_device,
katana_multi_dimensional_nx3_to_pcs, katana_output_make_lut_3xn, katana_pcs_lab_v2_to_v4,
katana_pcs_lab_v4_to_v2, katana_prepare_inverse_lut_rgb_xyz,
};
use crate::{
CmsError, ColorProfile, DataColorSpace, GammaLutInterpolate, Layout, LutWarehouse,
PointeeSizeExpressible, TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
pub(crate) fn do_any_to_any<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible
+ GammaLutInterpolate,
const BIT_DEPTH: usize,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
>(
src_layout: Layout,
source: &ColorProfile,
dst_layout: Layout,
dest: &ColorProfile,
options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
let mut stages: Vec<Box<dyn KatanaIntermediateStage<f32> + Send + Sync>> = Vec::new();
let initial_stage: Box<dyn KatanaInitialStage<f32, T> + Send + Sync> = match source
.is_matrix_shaper()
{
true => {
let state =
katana_create_rgb_lin_lut::<T, BIT_DEPTH, LINEAR_CAP>(src_layout, source, options)?;
stages.extend(state.stages);
state.initial_stage
}
false => match source.get_device_to_pcs(options.rendering_intent).ok_or(
CmsError::UnsupportedLutRenderingIntent(source.rendering_intent),
)? {
LutWarehouse::Lut(lut) => katana_input_make_lut_nx3::<T>(
src_layout,
src_layout.channels(),
lut,
options,
source.pcs,
BIT_DEPTH,
)?,
LutWarehouse::Multidimensional(mab) => {
katana_multi_dimensional_nx3_to_pcs::<T, BIT_DEPTH>(
src_layout, mab, options, source.pcs,
)?
}
},
};
stages.push(katana_pcs_lab_v2_to_v4(source));
if source.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageLabToXyz::default()));
}
if dest.pcs == DataColorSpace::Lab {
stages.push(Box::new(KatanaStageXyzToLab::default()));
}
stages.push(katana_pcs_lab_v4_to_v2(dest));
let final_stage = if dest.has_pcs_to_device_lut() {
let pcs_to_device = dest
.get_pcs_to_device(options.rendering_intent)
.ok_or(CmsError::UnsupportedProfileConnection)?;
match pcs_to_device {
LutWarehouse::Lut(lut) => katana_output_make_lut_3xn::<T>(
dst_layout,
lut,
options,
dest.color_space,
BIT_DEPTH,
)?,
LutWarehouse::Multidimensional(mab) => katana_multi_dimensional_3xn_to_device::<T>(
dst_layout, mab, options, dest.pcs, BIT_DEPTH,
)?,
}
} else if dest.is_matrix_shaper() {
let state = katana_prepare_inverse_lut_rgb_xyz::<T, BIT_DEPTH, GAMMA_LUT>(
dest, dst_layout, options,
)?;
stages.extend(state.stages);
state.final_stage
} else {
return Err(CmsError::UnsupportedProfileConnection);
};
let mut post_finalization: Vec<Box<dyn KatanaPostFinalizationStage<T> + Send + Sync>> =
Vec::new();
if let Some(stage) =
prepare_alpha_finalizer::<T>(src_layout, source, dst_layout, dest, BIT_DEPTH)
{
post_finalization.push(stage);
}
Ok(Box::new(Katana::<f32, T> {
initial_stage,
final_stage,
stages,
post_finalization,
}))
}
pub(crate) fn prepare_alpha_finalizer<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible
+ GammaLutInterpolate,
>(
src_layout: Layout,
source: &ColorProfile,
dst_layout: Layout,
dest: &ColorProfile,
bit_depth: usize,
) -> Option<Box<dyn KatanaPostFinalizationStage<T> + Send + Sync>>
where
f32: AsPrimitive<T>,
{
if (dst_layout == Layout::GrayAlpha && dest.color_space == DataColorSpace::Gray)
|| (dst_layout == Layout::Rgba || dest.color_space == DataColorSpace::Rgb)
{
return if (src_layout == Layout::GrayAlpha && source.color_space == DataColorSpace::Gray)
|| (src_layout == Layout::Rgba || source.color_space == DataColorSpace::Rgb)
{
Some(Box::new(CopyAlphaStage {
src_layout,
dst_layout,
target_color_space: dest.color_space,
_phantom: Default::default(),
}))
} else {
Some(Box::new(InjectAlphaStage {
dst_layout,
target_color_space: dest.color_space,
_phantom: Default::default(),
bit_depth,
}))
};
}
None
}

74
vendor/moxcms/src/conversions/mod.rs vendored Normal file
View File

@@ -0,0 +1,74 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
mod avx;
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
mod avx512;
mod bpc;
mod gray2rgb;
mod gray2rgb_extended;
mod interpolator;
mod katana;
mod lut3x3;
mod lut3x4;
mod lut4;
mod lut_transforms;
mod mab;
mod mab4x3;
mod mba3x4;
mod md_lut;
mod md_luts_factory;
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
mod neon;
mod prelude_lut_xyz_rgb;
mod rgb2gray;
mod rgb2gray_extended;
mod rgb_xyz_factory;
mod rgbxyz;
mod rgbxyz_fixed;
mod rgbxyz_float;
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
mod sse;
mod transform_lut3_to_3;
mod transform_lut3_to_4;
mod transform_lut4_to_3;
mod xyz_lab;
pub(crate) use gray2rgb::{make_gray_to_unfused, make_gray_to_x};
pub(crate) use gray2rgb_extended::{make_gray_to_one_trc_extended, make_gray_to_rgb_extended};
pub(crate) use interpolator::LutBarycentricReduction;
pub(crate) use lut_transforms::make_lut_transform;
pub(crate) use rgb_xyz_factory::{RgbXyzFactory, RgbXyzFactoryOpt};
pub(crate) use rgb2gray::{ToneReproductionRgbToGray, make_rgb_to_gray};
pub(crate) use rgb2gray_extended::make_rgb_to_gray_extended;
pub(crate) use rgbxyz::{TransformMatrixShaper, TransformMatrixShaperOptimized};
pub(crate) use rgbxyz_float::{
TransformShaperFloatInOut, TransformShaperRgbFloat, make_rgb_xyz_rgb_transform_float,
make_rgb_xyz_rgb_transform_float_in_out,
};

View File

@@ -0,0 +1,225 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::cube::CubeNeon;
use crate::conversions::neon::interpolator::NeonVector;
use crate::{CmsError, DataColorSpace, InPlaceStage, InterpolationMethod};
use std::arch::aarch64::*;
pub(crate) struct ACurves3Neon<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves3OptimizedNeon<'a> {
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves3InverseNeon<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl<const DEPTH: usize> ACurves3Neon<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> NeonVector>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
for dst in dst.chunks_exact_mut(3) {
let a0 = (dst[0] * scale_value).round().min(scale_value) as u16;
let a1 = (dst[1] * scale_value).round().min(scale_value) as u16;
let a2 = (dst[2] * scale_value).round().min(scale_value) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
let v = fetch(b0, b1, b2).v;
unsafe {
dst[0] = vgetq_lane_f32::<0>(v);
dst[1] = vgetq_lane_f32::<1>(v);
dst[2] = vgetq_lane_f32::<2>(v);
}
}
Ok(())
}
}
impl ACurves3OptimizedNeon<'_> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> NeonVector>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
for dst in dst.chunks_exact_mut(3) {
let a0 = dst[0];
let a1 = dst[1];
let a2 = dst[2];
let v = fetch(a0, a1, a2).v;
unsafe {
dst[0] = vgetq_lane_f32::<0>(v);
dst[1] = vgetq_lane_f32::<1>(v);
dst[2] = vgetq_lane_f32::<2>(v);
}
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3Neon<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeNeon::new(self.clut, self.grid_size, 3);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
impl InPlaceStage for ACurves3OptimizedNeon<'_> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeNeon::new(self.clut, self.grid_size, 3);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}
impl<const DEPTH: usize> ACurves3InverseNeon<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32) -> NeonVector>(
&self,
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let v_scale_value = unsafe { vdupq_n_f32((DEPTH as u32 - 1u32) as f32) };
unsafe {
for dst in dst.chunks_exact_mut(3) {
let mut v = fetch(dst[0], dst[1], dst[2]).v;
v = vmulq_f32(v, v_scale_value);
v = vminq_f32(v, v_scale_value);
let c = vcvtaq_u32_f32(v);
let a0 = vgetq_lane_u32::<0>(c) as u16;
let a1 = vgetq_lane_u32::<1>(c) as u16;
let a2 = vgetq_lane_u32::<2>(c) as u16;
let b0 = self.curve0[a0 as usize];
let b1 = self.curve1[a1 as usize];
let b2 = self.curve2[a2 as usize];
dst[0] = b0;
dst[1] = b1;
dst[2] = b2;
}
}
Ok(())
}
}
impl<const DEPTH: usize> InPlaceStage for ACurves3InverseNeon<'_, DEPTH> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
let lut = CubeNeon::new(self.clut, self.grid_size, 3);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(dst, |x, y, z| lut.tetra_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(dst, |x, y, z| lut.pyramid_vec3(x, y, z))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(dst, |x, y, z| lut.prism_vec3(x, y, z))?;
}
InterpolationMethod::Linear => {
self.transform_impl(dst, |x, y, z| lut.trilinear_vec3(x, y, z))?;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,168 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::hypercube::HypercubeNeon;
use crate::conversions::neon::interpolator::NeonVector;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Stage};
use std::arch::aarch64::vgetq_lane_f32;
pub(crate) struct ACurves4x3Neon<'a, const DEPTH: usize> {
pub(crate) curve0: Box<[f32; 65536]>,
pub(crate) curve1: Box<[f32; 65536]>,
pub(crate) curve2: Box<[f32; 65536]>,
pub(crate) curve3: Box<[f32; 65536]>,
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 4],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
pub(crate) struct ACurves4x3NeonOptimizedNeon<'a> {
pub(crate) clut: &'a [f32],
pub(crate) grid_size: [u8; 4],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl<const DEPTH: usize> ACurves4x3Neon<'_, DEPTH> {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> NeonVector>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let scale_value = (DEPTH - 1) as f32;
assert_eq!(src.len() / 4, dst.len() / 3);
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let a0 = (src[0] * scale_value).round().min(scale_value) as u16;
let a1 = (src[1] * scale_value).round().min(scale_value) as u16;
let a2 = (src[2] * scale_value).round().min(scale_value) as u16;
let a3 = (src[3] * scale_value).round().min(scale_value) as u16;
let c = self.curve0[a0 as usize];
let m = self.curve1[a1 as usize];
let y = self.curve2[a2 as usize];
let k = self.curve3[a3 as usize];
let v = fetch(c, m, y, k).v;
unsafe {
dst[0] = vgetq_lane_f32::<0>(v);
dst[1] = vgetq_lane_f32::<1>(v);
dst[2] = vgetq_lane_f32::<2>(v);
}
}
Ok(())
}
}
impl ACurves4x3NeonOptimizedNeon<'_> {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> NeonVector>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
assert_eq!(src.len() / 4, dst.len() / 3);
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(3)) {
let c = src[0];
let m = src[1];
let y = src[2];
let k = src[3];
let v = fetch(c, m, y, k).v;
unsafe {
dst[0] = vgetq_lane_f32::<0>(v);
dst[1] = vgetq_lane_f32::<1>(v);
dst[2] = vgetq_lane_f32::<2>(v);
}
}
Ok(())
}
}
impl<const DEPTH: usize> Stage for ACurves4x3Neon<'_, DEPTH> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = HypercubeNeon::new(self.clut, self.grid_size, 3);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
Ok(())
}
}
impl Stage for ACurves4x3NeonOptimizedNeon<'_> {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let lut = HypercubeNeon::new(self.clut, self.grid_size, 3);
// If PCS is LAB then linear interpolation should be used
if self.pcs == DataColorSpace::Lab || self.pcs == DataColorSpace::Xyz {
return self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| lut.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| lut.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| lut.prism_vec3(x, y, z, w))?;
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| lut.quadlinear_vec3(x, y, z, w))?;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,442 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::interpolator::NeonVector;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use std::arch::aarch64::*;
use std::ops::{Add, Mul, Sub};
/// 3D CLUT NEON helper
///
/// Represents hexahedron.
pub(crate) struct CubeNeon<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
grid_size: [u8; 3],
}
struct HexahedronFetch3<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
}
trait CubeFetch<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
impl CubeFetch<NeonVector> for HexahedronFetch3<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVector {
let start = (x as u32 * self.x_stride + y as u32 * self.y_stride + z as u32) as usize * 3;
unsafe {
let k = self.array.get_unchecked(start..);
let lo = vld1_f32(k.as_ptr());
let hi = vld1_lane_f32::<0>(k.get_unchecked(2..).as_ptr(), vdup_n_f32(0.));
NeonVector {
v: vcombine_f32(lo, hi),
}
}
}
}
impl<'a> CubeNeon<'a> {
pub(crate) fn new(arr: &'a [f32], grid: [u8; 3], components: usize) -> Self {
// This is safety precondition, array size must be not less than full grid size * components.
// Needs to ensure that it is not missed somewhere else
assert_eq!(
grid[0] as usize * grid[1] as usize * grid[2] as usize * components,
arr.len()
);
let y_stride = grid[1] as u32;
let x_stride = y_stride * grid[0] as u32;
CubeNeon {
array: arr,
x_stride,
y_stride,
grid_size: grid,
}
}
#[inline(always)]
fn trilinear<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyNegAdd<T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let x_d = T::from(lin_x * scale_x - x as f32);
let y_d = T::from(lin_y * scale_y - y as f32);
let z_d = T::from(lin_z * scale_z - z as f32);
let c000 = fetch.fetch(x, y, z);
let c100 = fetch.fetch(x_n, y, z);
let c010 = fetch.fetch(x, y_n, z);
let c110 = fetch.fetch(x_n, y_n, z);
let c001 = fetch.fetch(x, y, z_n);
let c101 = fetch.fetch(x_n, y, z_n);
let c011 = fetch.fetch(x, y_n, z_n);
let c111 = fetch.fetch(x_n, y_n, z_n);
let c00 = c000.neg_mla(c000, x_d).mla(c100, x_d);
let c10 = c010.neg_mla(c010, x_d).mla(c110, x_d);
let c01 = c001.neg_mla(c001, x_d).mla(c101, x_d);
let c11 = c011.neg_mla(c011, x_d).mla(c111, x_d);
let c0 = c00.neg_mla(c00, y_d).mla(c10, y_d);
let c1 = c01.neg_mla(c01, y_d).mla(c11, y_d);
c0.neg_mla(c0, z_d).mla(c1, z_d)
}
#[cfg(feature = "options")]
#[inline]
fn pyramid<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
if dr > db && dg > db {
let x0 = fetch.fetch(x_n, y_n, z_n);
let x1 = fetch.fetch(x_n, y_n, z);
let x2 = fetch.fetch(x_n, y, z);
let x3 = fetch.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y_n, z_n);
let x2 = fetch.fetch(x, y_n, z_n);
let x3 = fetch.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y, z);
let x2 = fetch.fetch(x_n, y, z_n);
let x3 = fetch.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
}
}
#[cfg(feature = "options")]
#[inline]
fn tetra<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let rx = lin_x * scale_x - x as f32;
let ry = lin_y * scale_y - y as f32;
let rz = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = fetch.fetch(x_n, y, z) - c0;
c2 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x_n, y, z);
c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = fetch.fetch(x_n, y, z) - c0;
c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n);
c3 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = fetch.fetch(x_n, y, z_n) - fetch.fetch(x, y, z_n);
c2 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y, z_n);
c3 = fetch.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = fetch.fetch(x_n, y_n, z) - fetch.fetch(x, y_n, z);
c2 = fetch.fetch(x, y_n, z) - c0;
c3 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n);
c2 = fetch.fetch(x, y_n, z) - c0;
c3 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = fetch.fetch(x_n, y_n, z_n) - fetch.fetch(x, y_n, z_n);
c2 = fetch.fetch(x, y_n, z_n) - fetch.fetch(x, y, z_n);
c3 = fetch.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
s1.mla(c3, T::from(rz))
}
#[cfg(feature = "options")]
#[inline]
fn prism<
T: Copy
+ From<f32>
+ Sub<T, Output = T>
+ Mul<T, Output = T>
+ Add<T, Output = T>
+ FusedMultiplyAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
fetch: impl CubeFetch<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let c0 = fetch.fetch(x, y, z);
if db >= dr {
let x0 = fetch.fetch(x, y, z_n);
let x1 = fetch.fetch(x_n, y, z_n);
let x2 = fetch.fetch(x, y_n, z);
let x3 = fetch.fetch(x, y_n, z_n);
let x4 = fetch.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = fetch.fetch(x_n, y, z);
let x1 = fetch.fetch(x_n, y, z_n);
let x2 = fetch.fetch(x, y_n, z);
let x3 = fetch.fetch(x_n, y_n, z);
let x4 = fetch.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
}
}
#[inline]
pub(crate) fn trilinear_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> NeonVector {
self.trilinear(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn prism_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> NeonVector {
self.prism(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn pyramid_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> NeonVector {
self.pyramid(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn tetra_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32) -> NeonVector {
self.tetra(
lin_x,
lin_y,
lin_z,
HexahedronFetch3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
},
)
}
}

View File

@@ -0,0 +1,629 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::interpolator::NeonVector;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use crate::nd_array::lerp;
use std::arch::aarch64::{vcombine_f32, vdup_n_f32, vld1_f32, vld1_lane_f32};
use std::ops::{Add, Mul, Sub};
/// 4D CLUT helper.
///
/// Represents hypercube.
pub(crate) struct HypercubeNeon<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
grid_size: [u8; 4],
}
trait Fetcher4<T> {
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> T;
}
struct Fetch4Vec3<'a> {
array: &'a [f32],
x_stride: u32,
y_stride: u32,
z_stride: u32,
}
impl Fetcher4<NeonVector> for Fetch4Vec3<'_> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32, w: i32) -> NeonVector {
let start = (x as u32 * self.x_stride
+ y as u32 * self.y_stride
+ z as u32 * self.z_stride
+ w as u32) as usize
* 3;
unsafe {
let k = self.array.get_unchecked(start..);
let lo = vld1_f32(k.as_ptr());
let hi = vld1_lane_f32::<0>(k.get_unchecked(2..).as_ptr(), vdup_n_f32(0.));
NeonVector {
v: vcombine_f32(lo, hi),
}
}
}
}
impl<'a> HypercubeNeon<'a> {
pub(crate) fn new(arr: &'a [f32], grid: [u8; 4], components: usize) -> Self {
// This is safety precondition, array size must be not less than full grid size * components.
// Needs to ensure that it is not missed somewhere else
assert_eq!(
grid[0] as usize * grid[1] as usize * grid[2] as usize * grid[3] as usize * components,
arr.len()
);
let z_stride = grid[2] as u32;
let y_stride = z_stride * grid[1] as u32;
let x_stride = y_stride * grid[0] as u32;
HypercubeNeon {
array: arr,
x_stride,
y_stride,
z_stride,
grid_size: grid,
}
}
#[inline(always)]
fn quadlinear<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let x_d = T::from(lin_x * scale_x - x as f32);
let y_d = T::from(lin_y * scale_y - y as f32);
let z_d = T::from(lin_z * scale_z - z as f32);
let w_d = T::from(lin_w * scale_w - w as f32);
let r_x1 = lerp(r.fetch(x, y, z, w), r.fetch(x_n, y, z, w), x_d);
let r_x2 = lerp(r.fetch(x, y_n, z, w), r.fetch(x_n, y_n, z, w), x_d);
let r_y1 = lerp(r_x1, r_x2, y_d);
let r_x3 = lerp(r.fetch(x, y, z_n, w), r.fetch(x_n, y, z_n, w), x_d);
let r_x4 = lerp(r.fetch(x, y_n, z_n, w), r.fetch(x_n, y_n, z_n, w), x_d);
let r_y2 = lerp(r_x3, r_x4, y_d);
let r_z1 = lerp(r_y1, r_y2, z_d);
let r_x1 = lerp(r.fetch(x, y, z, w_n), r.fetch(x_n, y, z, w_n), x_d);
let r_x2 = lerp(r.fetch(x, y_n, z, w_n), r.fetch(x_n, y_n, z, w_n), x_d);
let r_y1 = lerp(r_x1, r_x2, y_d);
let r_x3 = lerp(r.fetch(x, y, z_n, w_n), r.fetch(x_n, y, z_n, w_n), x_d);
let r_x4 = lerp(r.fetch(x, y_n, z_n, w_n), r.fetch(x_n, y_n, z_n, w_n), x_d);
let r_y2 = lerp(r_x3, r_x4, y_d);
let r_z2 = lerp(r_y1, r_y2, z_d);
lerp(r_z1, r_z2, w_d)
}
#[inline]
pub(crate) fn quadlinear_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> NeonVector {
self.quadlinear(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn pyramid<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let dw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let w0 = if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n, w);
let x1 = r.fetch(x_n, y_n, z, w);
let x2 = r.fetch(x_n, y, z, w);
let x3 = r.fetch(x, y_n, z, w);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y_n, z_n, w);
let x2 = r.fetch(x, y_n, z_n, w);
let x3 = r.fetch(x, y_n, z, w);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y, z, w);
let x2 = r.fetch(x_n, y, z_n, w);
let x3 = r.fetch(x_n, y_n, z_n, w);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
};
let c0 = r.fetch(x, y, z, w_n);
let w1 = if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n, w_n);
let x1 = r.fetch(x_n, y_n, z, w_n);
let x2 = r.fetch(x_n, y, z, w_n);
let x3 = r.fetch(x, y_n, z, w_n);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y_n, z_n, w_n);
let x2 = r.fetch(x, y_n, z_n, w_n);
let x3 = r.fetch(x, y_n, z, w_n);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y, z, w_n);
let x2 = r.fetch(x_n, y, z_n, w_n);
let x3 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
s2.mla(c4, T::from(db * dr))
};
w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn pyramid_vec3(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
) -> NeonVector {
self.pyramid(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn prism<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let dr = lin_x * scale_x - x as f32;
let dg = lin_y * scale_y - y as f32;
let db = lin_z * scale_z - z as f32;
let dw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let w0 = if db >= dr {
let x0 = r.fetch(x, y, z_n, w);
let x1 = r.fetch(x_n, y, z_n, w);
let x2 = r.fetch(x, y_n, z, w);
let x3 = r.fetch(x, y_n, z_n, w);
let x4 = r.fetch(x_n, y_n, z_n, w);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z, w);
let x1 = r.fetch(x_n, y, z_n, w);
let x2 = r.fetch(x, y_n, z, w);
let x3 = r.fetch(x_n, y_n, z, w);
let x4 = r.fetch(x_n, y_n, z_n, w);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
};
let c0 = r.fetch(x, y, z, w_n);
let w1 = if db >= dr {
let x0 = r.fetch(x, y, z_n, w_n);
let x1 = r.fetch(x_n, y, z_n, w_n);
let x2 = r.fetch(x, y_n, z, w_n);
let x3 = r.fetch(x, y_n, z_n, w_n);
let x4 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z, w_n);
let x1 = r.fetch(x_n, y, z_n, w_n);
let x2 = r.fetch(x, y_n, z, w_n);
let x3 = r.fetch(x_n, y_n, z, w_n);
let x4 = r.fetch(x_n, y_n, z_n, w_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, T::from(db));
let s1 = s0.mla(c2, T::from(dr));
let s2 = s1.mla(c3, T::from(dg));
let s3 = s2.mla(c4, T::from(dg * db));
s3.mla(c5, T::from(dr * dg))
};
w0.neg_mla(w0, T::from(dw)).mla(w1, T::from(dw))
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn prism_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> NeonVector {
self.prism(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
#[cfg(feature = "options")]
#[inline(always)]
fn tetra<
T: From<f32>
+ Add<T, Output = T>
+ Mul<T, Output = T>
+ FusedMultiplyAdd<T>
+ Sub<T, Output = T>
+ Copy
+ FusedMultiplyNegAdd<T>,
>(
&self,
lin_x: f32,
lin_y: f32,
lin_z: f32,
lin_w: f32,
r: impl Fetcher4<T>,
) -> T {
let lin_x = lin_x.max(0.0).min(1.0);
let lin_y = lin_y.max(0.0).min(1.0);
let lin_z = lin_z.max(0.0).min(1.0);
let lin_w = lin_w.max(0.0).min(1.0);
let scale_x = (self.grid_size[0] as i32 - 1) as f32;
let scale_y = (self.grid_size[1] as i32 - 1) as f32;
let scale_z = (self.grid_size[2] as i32 - 1) as f32;
let scale_w = (self.grid_size[3] as i32 - 1) as f32;
let x = (lin_x * scale_x).floor() as i32;
let y = (lin_y * scale_y).floor() as i32;
let z = (lin_z * scale_z).floor() as i32;
let w = (lin_w * scale_w).floor() as i32;
let x_n = (lin_x * scale_x).ceil() as i32;
let y_n = (lin_y * scale_y).ceil() as i32;
let z_n = (lin_z * scale_z).ceil() as i32;
let w_n = (lin_w * scale_w).ceil() as i32;
let rx = lin_x * scale_x - x as f32;
let ry = lin_y * scale_y - y as f32;
let rz = lin_z * scale_z - z as f32;
let rw = lin_w * scale_w - w as f32;
let c0 = r.fetch(x, y, z, w);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z, w) - c0;
c2 = r.fetch(x_n, y_n, z, w) - r.fetch(x_n, y, z, w);
c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z, w) - c0;
c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
c3 = r.fetch(x_n, y, z_n, w) - r.fetch(x_n, y, z, w);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n, w) - r.fetch(x, y, z_n, w);
c2 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y, z_n, w);
c3 = r.fetch(x, y, z_n, w) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z, w) - r.fetch(x, y_n, z, w);
c2 = r.fetch(x, y_n, z, w) - c0;
c3 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x_n, y_n, z, w);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
c2 = r.fetch(x, y_n, z, w) - c0;
c3 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y_n, z, w);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n, w) - r.fetch(x, y_n, z_n, w);
c2 = r.fetch(x, y_n, z_n, w) - r.fetch(x, y, z_n, w);
c3 = r.fetch(x, y, z_n, w) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
let w0 = s1.mla(c3, T::from(rz));
let c0 = r.fetch(x, y, z, w_n);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z, w_n) - c0;
c2 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x_n, y, z, w_n);
c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z, w_n) - c0;
c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
c3 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x_n, y, z, w_n);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n, w_n) - r.fetch(x, y, z_n, w_n);
c2 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y, z_n, w_n);
c3 = r.fetch(x, y, z_n, w_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z, w_n) - r.fetch(x, y_n, z, w_n);
c2 = r.fetch(x, y_n, z, w_n) - c0;
c3 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x_n, y_n, z, w_n);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
c2 = r.fetch(x, y_n, z, w_n) - c0;
c3 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y_n, z, w_n);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n, w_n) - r.fetch(x, y_n, z_n, w_n);
c2 = r.fetch(x, y_n, z_n, w_n) - r.fetch(x, y, z_n, w_n);
c3 = r.fetch(x, y, z_n, w_n) - c0;
}
let s0 = c0.mla(c1, T::from(rx));
let s1 = s0.mla(c2, T::from(ry));
let w1 = s1.mla(c3, T::from(rz));
w0.neg_mla(w0, T::from(rw)).mla(w1, T::from(rw))
}
#[cfg(feature = "options")]
#[inline]
pub(crate) fn tetra_vec3(&self, lin_x: f32, lin_y: f32, lin_z: f32, lin_w: f32) -> NeonVector {
self.tetra(
lin_x,
lin_y,
lin_z,
lin_w,
Fetch4Vec3 {
array: self.array,
x_stride: self.x_stride,
y_stride: self.y_stride,
z_stride: self.z_stride,
},
)
}
}

View File

@@ -0,0 +1,905 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(dead_code)]
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::neon::rgb_xyz::NeonAlignedF32;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::ops::{Add, Mul, Sub};
pub(crate) struct TetrahedralNeon<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedF32],
}
pub(crate) struct PyramidalNeon<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedF32],
}
pub(crate) struct TrilinearNeon<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedF32],
}
pub(crate) struct PyramidalNeonDouble<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedF32],
pub(crate) cube1: &'a [NeonAlignedF32],
}
pub(crate) struct PrismaticNeonDouble<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedF32],
pub(crate) cube1: &'a [NeonAlignedF32],
}
pub(crate) struct TrilinearNeonDouble<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedF32],
pub(crate) cube1: &'a [NeonAlignedF32],
}
pub(crate) struct TetrahedralNeonDouble<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedF32],
pub(crate) cube1: &'a [NeonAlignedF32],
}
pub(crate) struct PrismaticNeon<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedF32],
}
trait Fetcher<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
struct TetrahedralNeonFetchVector<'a, const GRID_SIZE: usize> {
cube: &'a [NeonAlignedF32],
}
struct TetrahedralNeonFetchVectorDouble<'a, const GRID_SIZE: usize> {
cube0: &'a [NeonAlignedF32],
cube1: &'a [NeonAlignedF32],
}
#[derive(Copy, Clone)]
pub(crate) struct NeonVector {
pub(crate) v: float32x4_t,
}
#[derive(Copy, Clone)]
pub(crate) struct NeonVectorDouble {
pub(crate) v0: float32x4_t,
pub(crate) v1: float32x4_t,
}
impl From<f32> for NeonVector {
#[inline(always)]
fn from(v: f32) -> Self {
NeonVector {
v: unsafe { vdupq_n_f32(v) },
}
}
}
impl From<f32> for NeonVectorDouble {
#[inline(always)]
fn from(v: f32) -> Self {
NeonVectorDouble {
v0: unsafe { vdupq_n_f32(v) },
v1: unsafe { vdupq_n_f32(v) },
}
}
}
impl Sub<NeonVector> for NeonVector {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: NeonVector) -> Self::Output {
NeonVector {
v: unsafe { vsubq_f32(self.v, rhs.v) },
}
}
}
impl Mul<NeonVector> for NeonVector {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: NeonVector) -> Self::Output {
NeonVector {
v: unsafe { vmulq_f32(self.v, rhs.v) },
}
}
}
impl Sub<NeonVectorDouble> for NeonVectorDouble {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: NeonVectorDouble) -> Self::Output {
NeonVectorDouble {
v0: unsafe { vsubq_f32(self.v0, rhs.v0) },
v1: unsafe { vsubq_f32(self.v1, rhs.v1) },
}
}
}
impl Mul<NeonVectorDouble> for NeonVectorDouble {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: NeonVectorDouble) -> Self::Output {
NeonVectorDouble {
v0: unsafe { vmulq_f32(self.v0, rhs.v0) },
v1: unsafe { vmulq_f32(self.v1, rhs.v1) },
}
}
}
impl Add<NeonVector> for NeonVector {
type Output = Self;
#[inline(always)]
fn add(self, rhs: NeonVector) -> Self::Output {
NeonVector {
v: unsafe { vaddq_f32(self.v, rhs.v) },
}
}
}
impl Add<NeonVectorDouble> for NeonVectorDouble {
type Output = Self;
#[inline(always)]
fn add(self, rhs: NeonVectorDouble) -> Self::Output {
NeonVectorDouble {
v0: unsafe { vaddq_f32(self.v0, rhs.v0) },
v1: unsafe { vaddq_f32(self.v1, rhs.v1) },
}
}
}
impl FusedMultiplyAdd<NeonVector> for NeonVector {
#[inline(always)]
fn mla(&self, b: NeonVector, c: NeonVector) -> NeonVector {
NeonVector {
v: unsafe { vfmaq_f32(self.v, b.v, c.v) },
}
}
}
impl FusedMultiplyNegAdd<NeonVector> for NeonVector {
#[inline(always)]
fn neg_mla(&self, b: NeonVector, c: NeonVector) -> NeonVector {
NeonVector {
v: unsafe { vfmsq_f32(self.v, b.v, c.v) },
}
}
}
impl NeonVectorDouble {
#[inline(always)]
fn neg_mla(&self, b: NeonVectorDouble, c: NeonVectorDouble) -> NeonVectorDouble {
NeonVectorDouble {
v0: unsafe { vfmsq_f32(self.v0, b.v0, c.v0) },
v1: unsafe { vfmsq_f32(self.v1, b.v1, c.v1) },
}
}
}
impl NeonVectorDouble {
#[inline(always)]
fn mla(&self, b: NeonVectorDouble, c: NeonVector) -> NeonVectorDouble {
NeonVectorDouble {
v0: unsafe { vfmaq_f32(self.v0, b.v0, c.v) },
v1: unsafe { vfmaq_f32(self.v1, b.v1, c.v) },
}
}
#[inline(always)]
pub(crate) fn split(self) -> (NeonVector, NeonVector) {
(NeonVector { v: self.v0 }, NeonVector { v: self.v1 })
}
}
impl<const GRID_SIZE: usize> Fetcher<NeonVector> for TetrahedralNeonFetchVector<'_, GRID_SIZE> {
fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVector {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx = unsafe { self.cube.get_unchecked(offset..) };
NeonVector {
v: unsafe { vld1q_f32(jx.as_ptr() as *const f32) },
}
}
}
impl<const GRID_SIZE: usize> Fetcher<NeonVectorDouble>
for TetrahedralNeonFetchVectorDouble<'_, GRID_SIZE>
{
fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVectorDouble {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx0 = unsafe { self.cube0.get_unchecked(offset..) };
let jx1 = unsafe { self.cube1.get_unchecked(offset..) };
NeonVectorDouble {
v0: unsafe { vld1q_f32(jx0.as_ptr() as *const f32) },
v1: unsafe { vld1q_f32(jx1.as_ptr() as *const f32) },
}
}
}
pub(crate) trait NeonMdInterpolation<'a, const GRID_SIZE: usize> {
fn new(table: &'a [NeonAlignedF32]) -> Self;
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> NeonVector;
}
pub(crate) trait NeonMdInterpolationDouble<'a, const GRID_SIZE: usize> {
fn new(table0: &'a [NeonAlignedF32], table1: &'a [NeonAlignedF32]) -> Self;
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> (NeonVector, NeonVector);
}
impl<const GRID_SIZE: usize> TetrahedralNeon<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVector>,
) -> NeonVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, NeonVector::from(rx));
let s1 = s0.mla(c2, NeonVector::from(ry));
s1.mla(c3, NeonVector::from(rz))
}
}
impl<const GRID_SIZE: usize> TetrahedralNeonDouble<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVectorDouble>,
) -> (NeonVector, NeonVector) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, NeonVector::from(rx));
let s1 = s0.mla(c2, NeonVector::from(ry));
s1.mla(c3, NeonVector::from(rz)).split()
}
}
macro_rules! define_md_inter_neon {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> NeonMdInterpolation<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline(always)]
fn new(table: &'a [NeonAlignedF32]) -> Self {
Self { cube: table }
}
#[inline(always)]
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> NeonVector {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralNeonFetchVector::<GRID_SIZE> { cube: self.cube },
)
}
}
};
}
macro_rules! define_md_inter_neon_d {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> NeonMdInterpolationDouble<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline(always)]
fn new(table0: &'a [NeonAlignedF32], table1: &'a [NeonAlignedF32]) -> Self {
Self {
cube0: table0,
cube1: table1,
}
}
#[inline(always)]
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> (NeonVector, NeonVector) {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralNeonFetchVectorDouble::<GRID_SIZE> {
cube0: self.cube0,
cube1: self.cube1,
},
)
}
}
};
}
define_md_inter_neon!(TetrahedralNeon);
define_md_inter_neon!(PyramidalNeon);
define_md_inter_neon!(PrismaticNeon);
define_md_inter_neon!(TrilinearNeon);
define_md_inter_neon_d!(PrismaticNeonDouble);
define_md_inter_neon_d!(PyramidalNeonDouble);
define_md_inter_neon_d!(TetrahedralNeonDouble);
define_md_inter_neon_d!(TrilinearNeonDouble);
impl<const GRID_SIZE: usize> PyramidalNeon<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVector>,
) -> NeonVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, NeonVector::from(db));
let s1 = s0.mla(c2, NeonVector::from(dr));
let s2 = s1.mla(c3, NeonVector::from(dg));
s2.mla(c4, NeonVector::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, NeonVector::from(db));
let s1 = s0.mla(c2, NeonVector::from(dr));
let s2 = s1.mla(c3, NeonVector::from(dg));
s2.mla(c4, NeonVector::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, NeonVector::from(db));
let s1 = s0.mla(c2, NeonVector::from(dr));
let s2 = s1.mla(c3, NeonVector::from(dg));
s2.mla(c4, NeonVector::from(db * dr))
}
}
}
impl<const GRID_SIZE: usize> PyramidalNeonDouble<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVectorDouble>,
) -> (NeonVector, NeonVector) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
let w0 = NeonVector::from(db);
let w1 = NeonVector::from(dr);
let w2 = NeonVector::from(dg);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let w3 = NeonVector::from(dr * dg);
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let w3 = NeonVector::from(dg * db);
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let w3 = NeonVector::from(db * dr);
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
}
}
}
impl<const GRID_SIZE: usize> PrismaticNeon<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVector>,
) -> NeonVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if db > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x, y_n, z_n);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, NeonVector::from(db));
let s1 = s0.mla(c2, NeonVector::from(dr));
let s2 = s1.mla(c3, NeonVector::from(dg));
let s3 = s2.mla(c4, NeonVector::from(dg * db));
s3.mla(c5, NeonVector::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x_n, y_n, z);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, NeonVector::from(db));
let s1 = s0.mla(c2, NeonVector::from(dr));
let s2 = s1.mla(c3, NeonVector::from(dg));
let s3 = s2.mla(c4, NeonVector::from(dg * db));
s3.mla(c5, NeonVector::from(dr * dg))
}
}
}
impl<const GRID_SIZE: usize> PrismaticNeonDouble<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
rv: impl Fetcher<NeonVectorDouble>,
) -> (NeonVector, NeonVector) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = rv.fetch(x, y, z);
let w0 = NeonVector::from(db);
let w1 = NeonVector::from(dr);
let w2 = NeonVector::from(dg);
let w3 = NeonVector::from(dg * db);
let w4 = NeonVector::from(dr * dg);
if db > dr {
let x0 = rv.fetch(x, y, z_n);
let x1 = rv.fetch(x_n, y, z_n);
let x2 = rv.fetch(x, y_n, z);
let x3 = rv.fetch(x, y_n, z_n);
let x4 = rv.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4).split()
} else {
let x0 = rv.fetch(x_n, y, z);
let x1 = rv.fetch(x_n, y, z_n);
let x2 = rv.fetch(x, y_n, z);
let x3 = rv.fetch(x_n, y_n, z);
let x4 = rv.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4).split()
}
}
}
impl<const GRID_SIZE: usize> TrilinearNeonDouble<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVectorDouble>,
) -> (NeonVector, NeonVector) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = NeonVector::from(dr);
let w1 = NeonVector::from(dg);
let w2 = NeonVector::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = NeonVectorDouble::from(dr);
let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
let dy = NeonVectorDouble::from(dg);
let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
let dz = NeonVectorDouble::from(db);
c0.neg_mla(c0, dz).mla(c1, w2).split()
}
}
impl<const GRID_SIZE: usize> TrilinearNeon<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<NeonVector>,
) -> NeonVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = NeonVector::from(dr);
let w1 = NeonVector::from(dg);
let w2 = NeonVector::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = NeonVector::from(dr);
let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
let dy = NeonVector::from(dg);
let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
let dz = NeonVector::from(db);
c0.neg_mla(c0, dz).mla(c1, w2)
}
}

View File

@@ -0,0 +1,947 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::interpolator::BarycentricWeight;
use crate::math::FusedMultiplyAdd;
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::ops::{Add, Mul, Sub};
#[repr(align(8), C)]
pub(crate) struct NeonAlignedI16x4(pub(crate) [i16; 4]);
#[cfg(feature = "options")]
pub(crate) struct TetrahedralNeonQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PyramidalNeonQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedI16x4],
}
pub(crate) struct TrilinearNeonQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PyramidalNeonQ0_15Double<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedI16x4],
pub(crate) cube1: &'a [NeonAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PrismaticNeonQ0_15Double<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedI16x4],
pub(crate) cube1: &'a [NeonAlignedI16x4],
}
pub(crate) struct TrilinearNeonQ0_15Double<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedI16x4],
pub(crate) cube1: &'a [NeonAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct TetrahedralNeonQ0_15Double<'a, const GRID_SIZE: usize> {
pub(crate) cube0: &'a [NeonAlignedI16x4],
pub(crate) cube1: &'a [NeonAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PrismaticNeonQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [NeonAlignedI16x4],
}
trait Fetcher<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
struct TetrahedralNeonQ0_15FetchVector<'a, const GRID_SIZE: usize> {
cube: &'a [NeonAlignedI16x4],
}
struct TetrahedralNeonQ0_15FetchVectorDouble<'a, const GRID_SIZE: usize> {
cube0: &'a [NeonAlignedI16x4],
cube1: &'a [NeonAlignedI16x4],
}
#[derive(Copy, Clone)]
pub(crate) struct NeonVectorQ0_15 {
pub(crate) v: int16x4_t,
}
#[derive(Copy, Clone)]
pub(crate) struct NeonVectorQ0_15Double {
pub(crate) v: int16x8_t,
}
impl From<i16> for NeonVectorQ0_15 {
#[inline(always)]
fn from(v: i16) -> Self {
NeonVectorQ0_15 {
v: unsafe { vdup_n_s16(v) },
}
}
}
impl From<i16> for NeonVectorQ0_15Double {
#[inline(always)]
fn from(v: i16) -> Self {
NeonVectorQ0_15Double {
v: unsafe { vdupq_n_s16(v) },
}
}
}
impl Sub<NeonVectorQ0_15> for NeonVectorQ0_15 {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: NeonVectorQ0_15) -> Self::Output {
NeonVectorQ0_15 {
v: unsafe { vsub_s16(self.v, rhs.v) },
}
}
}
impl Mul<NeonVectorQ0_15> for NeonVectorQ0_15 {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: NeonVectorQ0_15) -> Self::Output {
NeonVectorQ0_15 {
v: unsafe { vqrdmulh_s16(self.v, rhs.v) },
}
}
}
impl Sub<NeonVectorQ0_15Double> for NeonVectorQ0_15Double {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: NeonVectorQ0_15Double) -> Self::Output {
NeonVectorQ0_15Double {
v: unsafe { vsubq_s16(self.v, rhs.v) },
}
}
}
impl Mul<NeonVectorQ0_15Double> for NeonVectorQ0_15Double {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: NeonVectorQ0_15Double) -> Self::Output {
NeonVectorQ0_15Double {
v: unsafe { vqrdmulhq_s16(self.v, rhs.v) },
}
}
}
impl Add<NeonVectorQ0_15> for NeonVectorQ0_15 {
type Output = Self;
#[inline(always)]
fn add(self, rhs: NeonVectorQ0_15) -> Self::Output {
NeonVectorQ0_15 {
v: unsafe { vadd_s16(self.v, rhs.v) },
}
}
}
impl Add<NeonVectorQ0_15Double> for NeonVectorQ0_15Double {
type Output = Self;
#[inline(always)]
fn add(self, rhs: NeonVectorQ0_15Double) -> Self::Output {
NeonVectorQ0_15Double {
v: unsafe { vaddq_s16(self.v, rhs.v) },
}
}
}
impl FusedMultiplyAdd<NeonVectorQ0_15> for NeonVectorQ0_15 {
#[inline(always)]
fn mla(&self, b: NeonVectorQ0_15, c: NeonVectorQ0_15) -> NeonVectorQ0_15 {
NeonVectorQ0_15 {
v: unsafe { vqrdmlah_s16(self.v, b.v, c.v) },
}
}
}
impl NeonVectorQ0_15 {
#[inline(always)]
fn neg_mla(&self, b: NeonVectorQ0_15, c: NeonVectorQ0_15) -> NeonVectorQ0_15 {
NeonVectorQ0_15 {
v: unsafe { vqrdmlsh_s16(self.v, b.v, c.v) },
}
}
}
impl NeonVectorQ0_15Double {
#[inline(always)]
fn neg_mla(&self, b: NeonVectorQ0_15Double, c: NeonVectorQ0_15Double) -> NeonVectorQ0_15Double {
NeonVectorQ0_15Double {
v: unsafe { vqrdmlshq_s16(self.v, b.v, c.v) },
}
}
}
impl NeonVectorQ0_15Double {
#[inline(always)]
fn mla(&self, b: NeonVectorQ0_15Double, c: NeonVectorQ0_15) -> NeonVectorQ0_15Double {
NeonVectorQ0_15Double {
v: unsafe { vqrdmlahq_s16(self.v, b.v, vcombine_s16(c.v, c.v)) },
}
}
#[inline(always)]
pub(crate) fn split(self) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
unsafe {
(
NeonVectorQ0_15 {
v: vget_low_s16(self.v),
},
NeonVectorQ0_15 {
v: vget_high_s16(self.v),
},
)
}
}
}
impl<const GRID_SIZE: usize> Fetcher<NeonVectorQ0_15>
for TetrahedralNeonQ0_15FetchVector<'_, GRID_SIZE>
{
fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVectorQ0_15 {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx = unsafe { self.cube.get_unchecked(offset..) };
NeonVectorQ0_15 {
v: unsafe { vld1_s16(jx.as_ptr() as *const i16) },
}
}
}
impl<const GRID_SIZE: usize> Fetcher<NeonVectorQ0_15Double>
for TetrahedralNeonQ0_15FetchVectorDouble<'_, GRID_SIZE>
{
fn fetch(&self, x: i32, y: i32, z: i32) -> NeonVectorQ0_15Double {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx0 = unsafe { self.cube0.get_unchecked(offset..) };
let jx1 = unsafe { self.cube1.get_unchecked(offset..) };
NeonVectorQ0_15Double {
v: unsafe {
vcombine_s16(
vld1_s16(jx0.as_ptr() as *const i16),
vld1_s16(jx1.as_ptr() as *const i16),
)
},
}
}
}
pub(crate) trait NeonMdInterpolationQ0_15<'a, const GRID_SIZE: usize> {
fn new(table: &'a [NeonAlignedI16x4]) -> Self;
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> NeonVectorQ0_15;
}
pub(crate) trait NeonMdInterpolationQ0_15Double<'a, const GRID_SIZE: usize> {
fn new(table0: &'a [NeonAlignedI16x4], table1: &'a [NeonAlignedI16x4]) -> Self;
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> (NeonVectorQ0_15, NeonVectorQ0_15);
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> TetrahedralNeonQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15>,
) -> NeonVectorQ0_15 {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, NeonVectorQ0_15::from(rx));
let s1 = s0.mla(c2, NeonVectorQ0_15::from(ry));
s1.mla(c3, NeonVectorQ0_15::from(rz))
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> TetrahedralNeonQ0_15Double<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15Double>,
) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, NeonVectorQ0_15::from(rx));
let s1 = s0.mla(c2, NeonVectorQ0_15::from(ry));
s1.mla(c3, NeonVectorQ0_15::from(rz)).split()
}
}
macro_rules! define_md_inter_neon {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> NeonMdInterpolationQ0_15<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline(always)]
fn new(table: &'a [NeonAlignedI16x4]) -> Self {
Self { cube: table }
}
#[inline(always)]
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> NeonVectorQ0_15 {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralNeonQ0_15FetchVector::<GRID_SIZE> { cube: self.cube },
)
}
}
};
}
macro_rules! define_md_inter_neon_d {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> NeonMdInterpolationQ0_15Double<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline(always)]
fn new(table0: &'a [NeonAlignedI16x4], table1: &'a [NeonAlignedI16x4]) -> Self {
Self {
cube0: table0,
cube1: table1,
}
}
#[inline(always)]
fn inter3_neon<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralNeonQ0_15FetchVectorDouble::<GRID_SIZE> {
cube0: self.cube0,
cube1: self.cube1,
},
)
}
}
};
}
#[cfg(feature = "options")]
define_md_inter_neon!(TetrahedralNeonQ0_15);
#[cfg(feature = "options")]
define_md_inter_neon!(PyramidalNeonQ0_15);
#[cfg(feature = "options")]
define_md_inter_neon!(PrismaticNeonQ0_15);
define_md_inter_neon!(TrilinearNeonQ0_15);
#[cfg(feature = "options")]
define_md_inter_neon_d!(PrismaticNeonQ0_15Double);
#[cfg(feature = "options")]
define_md_inter_neon_d!(PyramidalNeonQ0_15Double);
#[cfg(feature = "options")]
define_md_inter_neon_d!(TetrahedralNeonQ0_15Double);
define_md_inter_neon_d!(TrilinearNeonQ0_15Double);
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PyramidalNeonQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15>,
) -> NeonVectorQ0_15 {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
let w0 = NeonVectorQ0_15::from(db);
let w1 = NeonVectorQ0_15::from(dr);
let w2 = NeonVectorQ0_15::from(dg);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let w3 = w1 * w2;
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3)
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let w3 = w2 * w0;
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3)
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let w3 = w0 * w1;
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3)
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PyramidalNeonQ0_15Double<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15Double>,
) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
let w0 = NeonVectorQ0_15::from(db);
let w1 = NeonVectorQ0_15::from(dr);
let w2 = NeonVectorQ0_15::from(dg);
if dr > db && dg > db {
let w3 = NeonVectorQ0_15::from(dr) * NeonVectorQ0_15::from(dg);
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
} else if db > dr && dg > dr {
let w3 = NeonVectorQ0_15::from(dg) * NeonVectorQ0_15::from(db);
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
} else {
let w3 = NeonVectorQ0_15::from(db) * NeonVectorQ0_15::from(dr);
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
s2.mla(c4, w3).split()
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PrismaticNeonQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15>,
) -> NeonVectorQ0_15 {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
let w0 = NeonVectorQ0_15::from(db);
let w1 = NeonVectorQ0_15::from(dr);
let w2 = NeonVectorQ0_15::from(dg);
if db > dr {
let w3 = w2 * w0;
let w4 = w1 * w2;
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x, y_n, z_n);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4)
} else {
let w3 = w2 * w0;
let w4 = w1 * w2;
let x0 = r.fetch(x_n, y, z);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x_n, y_n, z);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4)
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PrismaticNeonQ0_15Double<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
rv: impl Fetcher<NeonVectorQ0_15Double>,
) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = rv.fetch(x, y, z);
let w0 = NeonVectorQ0_15::from(db);
let w1 = NeonVectorQ0_15::from(dr);
let w2 = NeonVectorQ0_15::from(dg);
let w3 = NeonVectorQ0_15::from(dg) * NeonVectorQ0_15::from(db);
let w4 = NeonVectorQ0_15::from(dr) * NeonVectorQ0_15::from(dg);
if db > dr {
let x0 = rv.fetch(x, y, z_n);
let x1 = rv.fetch(x_n, y, z_n);
let x2 = rv.fetch(x, y_n, z);
let x3 = rv.fetch(x, y_n, z_n);
let x4 = rv.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4).split()
} else {
let x0 = rv.fetch(x_n, y, z);
let x1 = rv.fetch(x_n, y, z_n);
let x2 = rv.fetch(x, y_n, z);
let x3 = rv.fetch(x_n, y_n, z);
let x4 = rv.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, w0);
let s1 = s0.mla(c2, w1);
let s2 = s1.mla(c3, w2);
let s3 = s2.mla(c4, w3);
s3.mla(c5, w4).split()
}
}
}
impl<const GRID_SIZE: usize> TrilinearNeonQ0_15Double<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15Double>,
) -> (NeonVectorQ0_15, NeonVectorQ0_15) {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = NeonVectorQ0_15::from(dr);
let w1 = NeonVectorQ0_15::from(dg);
let w2 = NeonVectorQ0_15::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = NeonVectorQ0_15Double::from(dr);
let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
let dy = NeonVectorQ0_15Double::from(dg);
let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
let dz = NeonVectorQ0_15Double::from(db);
c0.neg_mla(c0, dz).mla(c1, w2).split()
}
}
impl<const GRID_SIZE: usize> TrilinearNeonQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<NeonVectorQ0_15>,
) -> NeonVectorQ0_15 {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = NeonVectorQ0_15::from(dr);
let w1 = NeonVectorQ0_15::from(dg);
let w2 = NeonVectorQ0_15::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = NeonVectorQ0_15::from(dr);
let c00 = c000.neg_mla(c000, dx).mla(c100, w0);
let c10 = c010.neg_mla(c010, dx).mla(c110, w0);
let c01 = c001.neg_mla(c001, dx).mla(c101, w0);
let c11 = c011.neg_mla(c011, dx).mla(c111, w0);
let dy = NeonVectorQ0_15::from(dg);
let c0 = c00.neg_mla(c00, dy).mla(c10, w1);
let c1 = c01.neg_mla(c01, dy).mla(c11, w1);
let dz = NeonVectorQ0_15::from(db);
c0.neg_mla(c0, dz).mla(c1, w2)
}
}

View File

@@ -0,0 +1,321 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut4x3Factory;
use crate::conversions::neon::interpolator::*;
use crate::conversions::neon::interpolator_q0_15::NeonAlignedI16x4;
use crate::conversions::neon::lut4_to_3_q0_15::TransformLut4To3NeonQ0_15;
use crate::conversions::neon::rgb_xyz::NeonAlignedF32;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::marker::PhantomData;
struct TransformLut4To3Neon<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<NeonAlignedF32>,
_phantom: PhantomData<T>,
_phantom1: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3Neon<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
fn transform_chunk<'b, Interpolator: NeonMdInterpolationDouble<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let value_scale = unsafe { vdupq_n_f32(((1 << BIT_DEPTH) - 1) as f32) };
let max_value = ((1 << BIT_DEPTH) - 1u32).as_();
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
let t: f32 = k_weights.w;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let tetrahedral1 = Interpolator::new(table1, table2);
let (a0, b0) = tetrahedral1.inter3_neon(c, m, y, &self.weights);
let (a0, b0) = (a0.v, b0.v);
if T::FINITE {
unsafe {
let t0 = vdupq_n_f32(t);
let hp = vfmsq_f32(a0, a0, t0);
let mut v = vfmaq_f32(hp, b0, t0);
v = vmulq_f32(v, value_scale);
v = vminq_f32(v, value_scale);
let jvx = vcvtaq_u32_f32(v);
dst[cn.r_i()] = vgetq_lane_u32::<0>(jvx).as_();
dst[cn.g_i()] = vgetq_lane_u32::<1>(jvx).as_();
dst[cn.b_i()] = vgetq_lane_u32::<2>(jvx).as_();
}
} else {
unsafe {
let t0 = vdupq_n_f32(t);
let hp = vfmsq_f32(a0, a0, t0);
let v = vfmaq_f32(hp, b0, t0);
dst[cn.r_i()] = vgetq_lane_f32::<0>(v).as_();
dst[cn.g_i()] = vgetq_lane_f32::<1>(v).as_();
dst[cn.b_i()] = vgetq_lane_f32::<2>(v).as_();
}
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3Neon<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearNeonDouble<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralNeonDouble<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalNeonDouble<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticNeonDouble<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearNeonDouble<GRID_SIZE>>(src, dst);
}
}
}
Ok(())
}
}
pub(crate) struct NeonLut4x3Factory {}
impl Lut4x3Factory for NeonLut4x3Factory {
fn make_transform_4x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point
&& BIT_DEPTH < 16
&& std::arch::is_aarch64_feature_detected!("rdm")
{
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
NeonAlignedI16x4([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut4To3NeonQ0_15::<
T,
u8,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut4To3NeonQ0_15::<
T,
u16,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
};
}
let lut = lut
.chunks_exact(3)
.map(|x| NeonAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => {
Box::new(
TransformLut4To3Neon::<T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
},
)
}
#[cfg(feature = "options")]
BarycentricWeightScale::High => {
Box::new(
TransformLut4To3Neon::<T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
},
)
}
}
}
}

View File

@@ -0,0 +1,202 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::neon::interpolator_q0_15::*;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut4To3NeonQ0_15<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<NeonAlignedI16x4>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3NeonQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "rdm")]
unsafe fn transform_chunk<'b, Interpolator: NeonMdInterpolationQ0_15Double<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let f_value_scale = vdupq_n_f32(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
vdup_n_s16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
vdup_n_s16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
let t: i16 = k_weights.w;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let tetrahedral1 = Interpolator::new(table1, table2);
let (a0, b0) = tetrahedral1.inter3_neon(c, m, y, &self.weights);
let (a0, b0) = (a0.v, b0.v);
let t0 = vdup_n_s16(t);
let hp = vqrdmlsh_s16(a0, a0, t0);
let mut v = vqrdmlah_s16(hp, b0, t0);
if T::FINITE {
v = vmax_s16(v, vdup_n_s16(0));
v = vmin_s16(v, v_max_scale);
dst[cn.r_i()] = (vget_lane_s16::<0>(v) as u32).as_();
dst[cn.g_i()] = (vget_lane_s16::<1>(v) as u32).as_();
dst[cn.b_i()] = (vget_lane_s16::<2>(v) as u32).as_();
} else {
let o = vcvtq_f32_s32(vmovl_s16(v));
let r = vmulq_f32(o, f_value_scale);
dst[cn.r_i()] = vgetq_lane_f32::<0>(r).as_();
dst[cn.g_i()] = vgetq_lane_f32::<1>(r).as_();
dst[cn.b_i()] = vgetq_lane_f32::<2>(r).as_();
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3NeonQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearNeonQ0_15Double<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralNeonQ0_15Double<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalNeonQ0_15Double<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticNeonQ0_15Double<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearNeonQ0_15Double<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,55 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod a_curves3;
mod a_curves4x3;
mod cube;
mod hypercube;
mod interpolator;
mod interpolator_q0_15;
mod lut4_to_3;
mod lut4_to_3_q0_15;
mod preheat_lut4x3;
mod rgb_xyz;
mod rgb_xyz_opt;
mod rgb_xyz_q1_30_opt;
mod rgb_xyz_q2_13;
mod rgb_xyz_q2_13_opt;
mod t_lut3_to_3;
mod t_lut3_to_3_q0_15;
pub(crate) use a_curves3::{ACurves3InverseNeon, ACurves3Neon, ACurves3OptimizedNeon};
pub(crate) use a_curves4x3::{ACurves4x3Neon, ACurves4x3NeonOptimizedNeon};
pub(crate) use lut4_to_3::NeonLut4x3Factory;
pub(crate) use preheat_lut4x3::Lut4x3Neon;
pub(crate) use rgb_xyz::TransformShaperRgbNeon;
pub(crate) use rgb_xyz_opt::TransformShaperRgbOptNeon;
pub(crate) use rgb_xyz_q1_30_opt::TransformShaperQ1_30NeonOpt;
pub(crate) use rgb_xyz_q2_13::TransformShaperQ2_13Neon;
pub(crate) use rgb_xyz_q2_13_opt::TransformShaperQ2_13NeonOpt;
pub(crate) use t_lut3_to_3::NeonLut3x3Factory;

View File

@@ -0,0 +1,129 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::hypercube::HypercubeNeon;
use crate::conversions::neon::interpolator::NeonVector;
use crate::trc::{lut_interp_linear_float, lut_interp_linear_float_clamped};
use crate::{CmsError, DataColorSpace, InterpolationMethod, Stage};
use std::arch::aarch64::{vdupq_n_f32, vgetq_lane_f32, vmaxq_f32, vminq_f32};
#[derive(Default)]
pub(crate) struct Lut4x3Neon {
pub(crate) linearization: [Vec<f32>; 4],
pub(crate) clut: Vec<f32>,
pub(crate) grid_size: u8,
pub(crate) output: [Vec<f32>; 3],
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) pcs: DataColorSpace,
}
impl Lut4x3Neon {
fn transform_impl<Fetch: Fn(f32, f32, f32, f32) -> NeonVector>(
&self,
src: &[f32],
dst: &mut [f32],
fetch: Fetch,
) -> Result<(), CmsError> {
let linearization_0 = &self.linearization[0];
let linearization_1 = &self.linearization[1];
let linearization_2 = &self.linearization[2];
let linearization_3 = &self.linearization[3];
for (dest, src) in dst.chunks_exact_mut(3).zip(src.chunks_exact(4)) {
debug_assert!(self.grid_size as i32 >= 1);
let linear_x = lut_interp_linear_float(src[0], linearization_0);
let linear_y = lut_interp_linear_float(src[1], linearization_1);
let linear_z = lut_interp_linear_float(src[2], linearization_2);
let linear_w = lut_interp_linear_float(src[3], linearization_3);
unsafe {
let mut v = fetch(linear_x, linear_y, linear_z, linear_w).v;
v = vmaxq_f32(v, vdupq_n_f32(0.));
v = vminq_f32(v, vdupq_n_f32(1.));
let pcs_x =
lut_interp_linear_float_clamped(vgetq_lane_f32::<0>(v), &self.output[0]);
let pcs_y =
lut_interp_linear_float_clamped(vgetq_lane_f32::<1>(v), &self.output[1]);
let pcs_z =
lut_interp_linear_float_clamped(vgetq_lane_f32::<2>(v), &self.output[2]);
dest[0] = pcs_x;
dest[1] = pcs_y;
dest[2] = pcs_z;
}
}
Ok(())
}
}
macro_rules! dispatch_preheat {
($heater: ident) => {
impl Stage for $heater {
fn transform(&self, src: &[f32], dst: &mut [f32]) -> Result<(), CmsError> {
let l_tbl = HypercubeNeon::new(
&self.clut,
[
self.grid_size,
self.grid_size,
self.grid_size,
self.grid_size,
],
3,
);
// If Source PCS is LAB trilinear should be used
if self.pcs == DataColorSpace::Lab {
return self
.transform_impl(src, dst, |x, y, z, w| l_tbl.quadlinear_vec3(x, y, z, w));
}
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.tetra_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.pyramid_vec3(x, y, z, w))?;
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_impl(src, dst, |x, y, z, w| l_tbl.prism_vec3(x, y, z, w))?
}
InterpolationMethod::Linear => {
self.transform_impl(src, dst, |x, y, z, w| {
l_tbl.quadlinear_vec3(x, y, z, w)
})?
}
}
Ok(())
}
}
};
}
dispatch_preheat!(Lut4x3Neon);

View File

@@ -0,0 +1,427 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaper;
use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut};
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
#[repr(align(16), C)]
pub(crate) struct NeonAlignedU16(pub(crate) [u16; 8]);
#[repr(align(16), C)]
pub(crate) struct NeonAlignedF32(pub(crate) [f32; 4]);
pub(crate) struct TransformShaperRgbNeon<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T> for TransformShaperRgbNeon<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = NeonAlignedU16([0; 8]);
let mut temporary1 = NeonAlignedU16([0; 8]);
let mut temporary2 = NeonAlignedU16([0; 8]);
let mut temporary3 = NeonAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
unsafe {
let m0 = vld1q_f32([t.v[0][0], t.v[0][1], t.v[0][2], 0.].as_ptr());
let m1 = vld1q_f32([t.v[1][0], t.v[1][1], t.v[1][2], 0.].as_ptr());
let m2 = vld1q_f32([t.v[2][0], t.v[2][1], t.v[2][2], 0.].as_ptr());
let v_scale = vdupq_n_f32(scale);
let rnd = vdupq_n_f32(0.5);
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
let r0p = &self.profile.r_linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.g_linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.b_linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.r_linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.g_linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.b_linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.r_linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.g_linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.b_linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.r_linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.g_linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.b_linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_f32(r0p);
g0 = vld1q_dup_f32(g0p);
b0 = vld1q_dup_f32(b0p);
r1 = vld1q_dup_f32(r1p);
g1 = vld1q_dup_f32(g1p);
b1 = vld1q_dup_f32(b1p);
r2 = vld1q_dup_f32(r2p);
g2 = vld1q_dup_f32(g2p);
b2 = vld1q_dup_f32(b2p);
r3 = vld1q_dup_f32(r3p);
g3 = vld1q_dup_f32(g3p);
b3 = vld1q_dup_f32(b3p);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = vdupq_n_f32(0.);
g0 = vdupq_n_f32(0.);
b0 = vdupq_n_f32(0.);
r1 = vdupq_n_f32(0.);
g1 = vdupq_n_f32(0.);
b1 = vdupq_n_f32(0.);
r2 = vdupq_n_f32(0.);
g2 = vdupq_n_f32(0.);
b2 = vdupq_n_f32(0.);
r3 = vdupq_n_f32(0.);
g3 = vdupq_n_f32(0.);
b3 = vdupq_n_f32(0.);
a0 = max_colors;
a1 = max_colors;
a2 = max_colors;
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let v0_0 = vmulq_f32(r0, m0);
let v0_1 = vmulq_f32(r1, m0);
let v0_2 = vmulq_f32(r2, m0);
let v0_3 = vmulq_f32(r3, m0);
let v1_0 = vfmaq_f32(v0_0, g0, m1);
let v1_1 = vfmaq_f32(v0_1, g1, m1);
let v1_2 = vfmaq_f32(v0_2, g2, m1);
let v1_3 = vfmaq_f32(v0_3, g3, m1);
let mut vr0 = vfmaq_f32(v1_0, b0, m2);
let mut vr1 = vfmaq_f32(v1_1, b1, m2);
let mut vr2 = vfmaq_f32(v1_2, b2, m2);
let mut vr3 = vfmaq_f32(v1_3, b3, m2);
vr0 = vfmaq_f32(rnd, vr0, v_scale);
vr1 = vfmaq_f32(rnd, vr1, v_scale);
vr2 = vfmaq_f32(rnd, vr2, v_scale);
vr3 = vfmaq_f32(rnd, vr3, v_scale);
vr0 = vminq_f32(vr0, v_scale);
vr1 = vminq_f32(vr1, v_scale);
vr2 = vminq_f32(vr2, v_scale);
vr3 = vminq_f32(vr3, v_scale);
let zx0 = vcvtq_u32_f32(vr0);
let zx1 = vcvtq_u32_f32(vr1);
let zx2 = vcvtq_u32_f32(vr2);
let zx3 = vcvtq_u32_f32(vr3);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0);
vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1);
vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2);
vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3);
let r0p = &self.profile.r_linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.g_linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.b_linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.r_linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.g_linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.b_linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.r_linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.g_linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.b_linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.r_linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.g_linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.b_linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_f32(r0p);
g0 = vld1q_dup_f32(g0p);
b0 = vld1q_dup_f32(b0p);
r1 = vld1q_dup_f32(r1p);
g1 = vld1q_dup_f32(g1p);
b1 = vld1q_dup_f32(b1p);
r2 = vld1q_dup_f32(r2p);
g2 = vld1q_dup_f32(g2p);
b2 = vld1q_dup_f32(b2p);
r3 = vld1q_dup_f32(r3p);
g3 = vld1q_dup_f32(g3p);
b3 = vld1q_dup_f32(b3p);
dst0[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[temporary1.0[0] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[temporary1.0[2] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.r_gamma[temporary2.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.g_gamma[temporary2.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.b_gamma[temporary2.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[temporary3.0[0] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[temporary3.0[2] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[temporary3.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let v0_0 = vmulq_f32(r0, m0);
let v0_1 = vmulq_f32(r1, m0);
let v0_2 = vmulq_f32(r2, m0);
let v0_3 = vmulq_f32(r3, m0);
let v1_0 = vfmaq_f32(v0_0, g0, m1);
let v1_1 = vfmaq_f32(v0_1, g1, m1);
let v1_2 = vfmaq_f32(v0_2, g2, m1);
let v1_3 = vfmaq_f32(v0_3, g3, m1);
let mut vr0 = vfmaq_f32(v1_0, b0, m2);
let mut vr1 = vfmaq_f32(v1_1, b1, m2);
let mut vr2 = vfmaq_f32(v1_2, b2, m2);
let mut vr3 = vfmaq_f32(v1_3, b3, m2);
vr0 = vfmaq_f32(rnd, vr0, v_scale);
vr1 = vfmaq_f32(rnd, vr1, v_scale);
vr2 = vfmaq_f32(rnd, vr2, v_scale);
vr3 = vfmaq_f32(rnd, vr3, v_scale);
vr0 = vminq_f32(vr0, v_scale);
vr1 = vminq_f32(vr1, v_scale);
vr2 = vminq_f32(vr2, v_scale);
vr3 = vminq_f32(vr3, v_scale);
let zx0 = vcvtq_u32_f32(vr0);
let zx1 = vcvtq_u32_f32(vr1);
let zx2 = vcvtq_u32_f32(vr2);
let zx3 = vcvtq_u32_f32(vr3);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0);
vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1);
vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2);
vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3);
dst0[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[temporary1.0[0] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[temporary1.0[2] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.r_gamma[temporary2.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.g_gamma[temporary2.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.b_gamma[temporary2.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[temporary3.0[0] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[temporary3.0[2] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[temporary3.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let r = vld1q_dup_f32(rp);
let g = vld1q_dup_f32(gp);
let b = vld1q_dup_f32(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = vmulq_f32(r, m0);
let v1 = vfmaq_f32(v0, g, m1);
let mut v = vfmaq_f32(v1, b, m2);
v = vfmaq_f32(rnd, v, v_scale);
v = vminq_f32(v, v_scale);
let zx = vcvtq_u32_f32(v);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,423 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::rgb_xyz::NeonAlignedU16;
use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut};
use crate::conversions::rgbxyz::TransformMatrixShaperOptimized;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
pub(crate) struct TransformShaperRgbOptNeon<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T>
for TransformShaperRgbOptNeon<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary0 = NeonAlignedU16([0; 8]);
let mut temporary1 = NeonAlignedU16([0; 8]);
let mut temporary2 = NeonAlignedU16([0; 8]);
let mut temporary3 = NeonAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
unsafe {
let m0 = vld1q_f32([t.v[0][0], t.v[0][1], t.v[0][2], 0.].as_ptr());
let m1 = vld1q_f32([t.v[1][0], t.v[1][1], t.v[1][2], 0.].as_ptr());
let m2 = vld1q_f32([t.v[2][0], t.v[2][1], t.v[2][2], 0.].as_ptr());
let v_scale = vdupq_n_f32(scale);
let rnd = vdupq_n_f32(0.5);
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_f32(r0p);
g0 = vld1q_dup_f32(g0p);
b0 = vld1q_dup_f32(b0p);
r1 = vld1q_dup_f32(r1p);
g1 = vld1q_dup_f32(g1p);
b1 = vld1q_dup_f32(b1p);
r2 = vld1q_dup_f32(r2p);
g2 = vld1q_dup_f32(g2p);
b2 = vld1q_dup_f32(b2p);
r3 = vld1q_dup_f32(r3p);
g3 = vld1q_dup_f32(g3p);
b3 = vld1q_dup_f32(b3p);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = vdupq_n_f32(0.);
g0 = vdupq_n_f32(0.);
b0 = vdupq_n_f32(0.);
r1 = vdupq_n_f32(0.);
g1 = vdupq_n_f32(0.);
b1 = vdupq_n_f32(0.);
r2 = vdupq_n_f32(0.);
g2 = vdupq_n_f32(0.);
b2 = vdupq_n_f32(0.);
r3 = vdupq_n_f32(0.);
g3 = vdupq_n_f32(0.);
b3 = vdupq_n_f32(0.);
a0 = max_colors;
a1 = max_colors;
a2 = max_colors;
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let v0_0 = vmulq_f32(r0, m0);
let v0_1 = vmulq_f32(r1, m0);
let v0_2 = vmulq_f32(r2, m0);
let v0_3 = vmulq_f32(r3, m0);
let v1_0 = vfmaq_f32(v0_0, g0, m1);
let v1_1 = vfmaq_f32(v0_1, g1, m1);
let v1_2 = vfmaq_f32(v0_2, g2, m1);
let v1_3 = vfmaq_f32(v0_3, g3, m1);
let mut vr0 = vfmaq_f32(v1_0, b0, m2);
let mut vr1 = vfmaq_f32(v1_1, b1, m2);
let mut vr2 = vfmaq_f32(v1_2, b2, m2);
let mut vr3 = vfmaq_f32(v1_3, b3, m2);
vr0 = vfmaq_f32(rnd, vr0, v_scale);
vr1 = vfmaq_f32(rnd, vr1, v_scale);
vr2 = vfmaq_f32(rnd, vr2, v_scale);
vr3 = vfmaq_f32(rnd, vr3, v_scale);
vr0 = vminq_f32(vr0, v_scale);
vr1 = vminq_f32(vr1, v_scale);
vr2 = vminq_f32(vr2, v_scale);
vr3 = vminq_f32(vr3, v_scale);
let zx0 = vcvtq_u32_f32(vr0);
let zx1 = vcvtq_u32_f32(vr1);
let zx2 = vcvtq_u32_f32(vr2);
let zx3 = vcvtq_u32_f32(vr3);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0);
vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1);
vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2);
vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3);
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_f32(r0p);
g0 = vld1q_dup_f32(g0p);
b0 = vld1q_dup_f32(b0p);
r1 = vld1q_dup_f32(r1p);
g1 = vld1q_dup_f32(g1p);
b1 = vld1q_dup_f32(b1p);
r2 = vld1q_dup_f32(r2p);
g2 = vld1q_dup_f32(g2p);
b2 = vld1q_dup_f32(b2p);
r3 = vld1q_dup_f32(r3p);
g3 = vld1q_dup_f32(g3p);
b3 = vld1q_dup_f32(b3p);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[0] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[2] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary2.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary2.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary2.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary3.0[0] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary3.0[2] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary3.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let v0_0 = vmulq_f32(r0, m0);
let v0_1 = vmulq_f32(r1, m0);
let v0_2 = vmulq_f32(r2, m0);
let v0_3 = vmulq_f32(r3, m0);
let v1_0 = vfmaq_f32(v0_0, g0, m1);
let v1_1 = vfmaq_f32(v0_1, g1, m1);
let v1_2 = vfmaq_f32(v0_2, g2, m1);
let v1_3 = vfmaq_f32(v0_3, g3, m1);
let mut vr0 = vfmaq_f32(v1_0, b0, m2);
let mut vr1 = vfmaq_f32(v1_1, b1, m2);
let mut vr2 = vfmaq_f32(v1_2, b2, m2);
let mut vr3 = vfmaq_f32(v1_3, b3, m2);
vr0 = vfmaq_f32(rnd, vr0, v_scale);
vr1 = vfmaq_f32(rnd, vr1, v_scale);
vr2 = vfmaq_f32(rnd, vr2, v_scale);
vr3 = vfmaq_f32(rnd, vr3, v_scale);
vr0 = vminq_f32(vr0, v_scale);
vr1 = vminq_f32(vr1, v_scale);
vr2 = vminq_f32(vr2, v_scale);
vr3 = vminq_f32(vr3, v_scale);
let zx0 = vcvtq_u32_f32(vr0);
let zx1 = vcvtq_u32_f32(vr1);
let zx2 = vcvtq_u32_f32(vr2);
let zx3 = vcvtq_u32_f32(vr3);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx0);
vst1q_u32(temporary1.0.as_mut_ptr() as *mut _, zx1);
vst1q_u32(temporary2.0.as_mut_ptr() as *mut _, zx2);
vst1q_u32(temporary3.0.as_mut_ptr() as *mut _, zx3);
dst0[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary1.0[0] as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary1.0[2] as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary1.0[4] as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[temporary2.0[0] as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[temporary2.0[2] as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[temporary2.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[temporary3.0[0] as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[temporary3.0[2] as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[temporary3.0[4] as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.linear[src[src_cn.b_i()]._as_usize()];
let r = vld1q_dup_f32(rp);
let g = vld1q_dup_f32(gp);
let b = vld1q_dup_f32(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = vmulq_f32(r, m0);
let v1 = vfmaq_f32(v0, g, m1);
let mut v = vfmaq_f32(v1, b, m2);
v = vfmaq_f32(rnd, v, v_scale);
v = vminq_f32(v, v_scale);
let zx = vcvtq_u32_f32(v);
vst1q_u32(temporary0.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.gamma[temporary0.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary0.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary0.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,437 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut};
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
pub(crate) struct TransformShaperQ1_30NeonOpt<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i32, i32, T, LINEAR_CAP>,
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
const PRECISION: i32,
>
TransformShaperQ1_30NeonOpt<
T,
SRC_LAYOUT,
DST_LAYOUT,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "rdm")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors: T = ((1 << BIT_DEPTH) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
unsafe {
let m0 = vld1q_s32([t.v[0][0], t.v[0][1], t.v[0][2], 0].as_ptr());
let m1 = vld1q_s32([t.v[1][0], t.v[1][1], t.v[1][2], 0].as_ptr());
let m2 = vld1q_s32([t.v[2][0], t.v[2][1], t.v[2][2], 0].as_ptr());
let v_max_value = vdup_n_u16((GAMMA_LUT - 1) as u16);
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_s32(r0p);
g0 = vld1q_dup_s32(g0p);
b0 = vld1q_dup_s32(b0p);
r1 = vld1q_dup_s32(r1p);
g1 = vld1q_dup_s32(g1p);
b1 = vld1q_dup_s32(b1p);
r2 = vld1q_dup_s32(r2p);
g2 = vld1q_dup_s32(g2p);
b2 = vld1q_dup_s32(b2p);
r3 = vld1q_dup_s32(r3p);
g3 = vld1q_dup_s32(g3p);
b3 = vld1q_dup_s32(b3p);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = vdupq_n_s32(0);
g0 = vdupq_n_s32(0);
b0 = vdupq_n_s32(0);
r1 = vdupq_n_s32(0);
g1 = vdupq_n_s32(0);
b1 = vdupq_n_s32(0);
r2 = vdupq_n_s32(0);
g2 = vdupq_n_s32(0);
b2 = vdupq_n_s32(0);
r3 = vdupq_n_s32(0);
g3 = vdupq_n_s32(0);
b3 = vdupq_n_s32(0);
a0 = max_colors;
a1 = max_colors;
a2 = max_colors;
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let v0_0 = vqrdmulhq_s32(r0, m0);
let v0_1 = vqrdmulhq_s32(r1, m0);
let v0_2 = vqrdmulhq_s32(r2, m0);
let v0_3 = vqrdmulhq_s32(r3, m0);
let v1_0 = vqrdmlahq_s32(v0_0, g0, m1);
let v1_1 = vqrdmlahq_s32(v0_1, g1, m1);
let v1_2 = vqrdmlahq_s32(v0_2, g2, m1);
let v1_3 = vqrdmlahq_s32(v0_3, g3, m1);
let vr0 = vqrdmlahq_s32(v1_0, b0, m2);
let vr1 = vqrdmlahq_s32(v1_1, b1, m2);
let vr2 = vqrdmlahq_s32(v1_2, b2, m2);
let vr3 = vqrdmlahq_s32(v1_3, b3, m2);
let mut vr0 = vqmovun_s32(vr0);
let mut vr1 = vqmovun_s32(vr1);
let mut vr2 = vqmovun_s32(vr2);
let mut vr3 = vqmovun_s32(vr3);
if BIT_DEPTH != 16 {
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
}
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1q_dup_s32(r0p);
g0 = vld1q_dup_s32(g0p);
b0 = vld1q_dup_s32(b0p);
r1 = vld1q_dup_s32(r1p);
g1 = vld1q_dup_s32(g1p);
b1 = vld1q_dup_s32(b1p);
r2 = vld1q_dup_s32(r2p);
g2 = vld1q_dup_s32(g2p);
b2 = vld1q_dup_s32(b2p);
r3 = vld1q_dup_s32(r3p);
g3 = vld1q_dup_s32(g3p);
b3 = vld1q_dup_s32(b3p);
dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let v0_0 = vqrdmulhq_s32(r0, m0);
let v0_1 = vqrdmulhq_s32(r1, m0);
let v0_2 = vqrdmulhq_s32(r2, m0);
let v0_3 = vqrdmulhq_s32(r3, m0);
let v1_0 = vqrdmlahq_s32(v0_0, g0, m1);
let v1_1 = vqrdmlahq_s32(v0_1, g1, m1);
let v1_2 = vqrdmlahq_s32(v0_2, g2, m1);
let v1_3 = vqrdmlahq_s32(v0_3, g3, m1);
let vr0 = vqrdmlahq_s32(v1_0, b0, m2);
let vr1 = vqrdmlahq_s32(v1_1, b1, m2);
let vr2 = vqrdmlahq_s32(v1_2, b2, m2);
let vr3 = vqrdmlahq_s32(v1_3, b3, m2);
let mut vr0 = vqmovun_s32(vr0);
let mut vr1 = vqmovun_s32(vr1);
let mut vr2 = vqmovun_s32(vr2);
let mut vr3 = vqmovun_s32(vr3);
if BIT_DEPTH != 16 {
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
}
dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.linear[src[src_cn.b_i()]._as_usize()];
let r = vld1q_dup_s32(rp);
let g = vld1q_dup_s32(gp);
let b = vld1q_dup_s32(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = vqrdmulhq_s32(r, m0);
let v1 = vqrdmlahq_s32(v0, g, m1);
let v = vqrdmlahq_s32(v1, b, m2);
let mut vr0 = vqmovun_s32(v);
if BIT_DEPTH != 16 {
vr0 = vmin_u16(vr0, v_max_value);
}
dst[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperQ1_30NeonOpt<
T,
SRC_LAYOUT,
DST_LAYOUT,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,412 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPoint;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
#[allow(dead_code)]
#[inline]
pub(crate) fn split_by_twos<T: Copy>(data: &[T], channels: usize) -> (&[T], &[T]) {
let len = data.len() / (channels * 4);
let split_point = len * 4;
data.split_at(split_point * channels)
}
#[allow(dead_code)]
#[inline]
pub(crate) fn split_by_twos_mut<T: Copy>(data: &mut [T], channels: usize) -> (&mut [T], &mut [T]) {
let len = data.len() / (channels * 4);
let split_point = len * 4;
data.split_at_mut(split_point * channels)
}
pub(crate) struct TransformShaperQ2_13Neon<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPoint<i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperQ2_13Neon<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
unsafe {
let m0 = vld1_s16([t.v[0][0], t.v[0][1], t.v[0][2], 0].as_ptr());
let m1 = vld1_s16([t.v[1][0], t.v[1][1], t.v[1][2], 0].as_ptr());
let m2 = vld1_s16([t.v[2][0], t.v[2][1], t.v[2][2], 0].as_ptr());
let v_max_value = vdup_n_u16((GAMMA_LUT - 1) as u16);
let rnd = vdupq_n_s32(1 << (PRECISION - 1));
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
let r0p = &self.profile.r_linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.g_linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.b_linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.r_linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.g_linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.b_linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.r_linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.g_linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.b_linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.r_linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.g_linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.b_linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1_dup_s16(r0p);
g0 = vld1_dup_s16(g0p);
b0 = vld1_dup_s16(b0p);
r1 = vld1_dup_s16(r1p);
g1 = vld1_dup_s16(g1p);
b1 = vld1_dup_s16(b1p);
r2 = vld1_dup_s16(r2p);
g2 = vld1_dup_s16(g2p);
b2 = vld1_dup_s16(b2p);
r3 = vld1_dup_s16(r3p);
g3 = vld1_dup_s16(g3p);
b3 = vld1_dup_s16(b3p);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = vdup_n_s16(0);
g0 = vdup_n_s16(0);
b0 = vdup_n_s16(0);
r1 = vdup_n_s16(0);
g1 = vdup_n_s16(0);
b1 = vdup_n_s16(0);
r2 = vdup_n_s16(0);
g2 = vdup_n_s16(0);
b2 = vdup_n_s16(0);
r3 = vdup_n_s16(0);
g3 = vdup_n_s16(0);
b3 = vdup_n_s16(0);
a0 = max_colors;
a1 = max_colors;
a2 = max_colors;
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let v0_0 = vmlal_s16(rnd, r0, m0);
let v0_1 = vmlal_s16(rnd, r1, m0);
let v0_2 = vmlal_s16(rnd, r2, m0);
let v0_3 = vmlal_s16(rnd, r3, m0);
let v1_0 = vmlal_s16(v0_0, g0, m1);
let v1_1 = vmlal_s16(v0_1, g1, m1);
let v1_2 = vmlal_s16(v0_2, g2, m1);
let v1_3 = vmlal_s16(v0_3, g3, m1);
let vr0 = vmlal_s16(v1_0, b0, m2);
let vr1 = vmlal_s16(v1_1, b1, m2);
let vr2 = vmlal_s16(v1_2, b2, m2);
let vr3 = vmlal_s16(v1_3, b3, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(vr0);
let mut vr1 = vqshrun_n_s32::<PRECISION>(vr1);
let mut vr2 = vqshrun_n_s32::<PRECISION>(vr2);
let mut vr3 = vqshrun_n_s32::<PRECISION>(vr3);
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
let r0p = &self.profile.r_linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.g_linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.b_linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.r_linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.g_linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.b_linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.r_linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.g_linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.b_linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.r_linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.g_linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.b_linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1_dup_s16(r0p);
g0 = vld1_dup_s16(g0p);
b0 = vld1_dup_s16(b0p);
r1 = vld1_dup_s16(r1p);
g1 = vld1_dup_s16(g1p);
b1 = vld1_dup_s16(b1p);
r2 = vld1_dup_s16(r2p);
g2 = vld1_dup_s16(g2p);
b2 = vld1_dup_s16(b2p);
r3 = vld1_dup_s16(r3p);
g3 = vld1_dup_s16(g3p);
b3 = vld1_dup_s16(b3p);
dst0[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let v0_0 = vmlal_s16(rnd, r0, m0);
let v0_1 = vmlal_s16(rnd, r1, m0);
let v0_2 = vmlal_s16(rnd, r2, m0);
let v0_3 = vmlal_s16(rnd, r3, m0);
let v1_0 = vmlal_s16(v0_0, g0, m1);
let v1_1 = vmlal_s16(v0_1, g1, m1);
let v1_2 = vmlal_s16(v0_2, g2, m1);
let v1_3 = vmlal_s16(v0_3, g3, m1);
let vr0 = vmlal_s16(v1_0, b0, m2);
let vr1 = vmlal_s16(v1_1, b1, m2);
let vr2 = vmlal_s16(v1_2, b2, m2);
let vr3 = vmlal_s16(v1_3, b3, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(vr0);
let mut vr1 = vqshrun_n_s32::<PRECISION>(vr1);
let mut vr2 = vqshrun_n_s32::<PRECISION>(vr2);
let mut vr3 = vqshrun_n_s32::<PRECISION>(vr3);
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
dst0[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.r_gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.g_gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.b_gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let r = vld1_dup_s16(rp);
let g = vld1_dup_s16(gp);
let b = vld1_dup_s16(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = vmlal_s16(rnd, r, m0);
let v1 = vmlal_s16(v0, g, m1);
let v = vmlal_s16(v1, b, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(v);
vr0 = vmin_u16(vr0, v_max_value);
dst[dst_cn.r_i()] = self.profile.r_gamma[vget_lane_u16::<0>(vr0) as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[vget_lane_u16::<1>(vr0) as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,397 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::neon::rgb_xyz_q2_13::{split_by_twos, split_by_twos_mut};
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
pub(crate) struct TransformShaperQ2_13NeonOpt<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i16, i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperQ2_13NeonOpt<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
let (src_chunks, src_remainder) = split_by_twos(src, src_channels);
let (dst_chunks, dst_remainder) = split_by_twos_mut(dst, dst_channels);
unsafe {
let m0 = vld1_s16([t.v[0][0], t.v[0][1], t.v[0][2], 0].as_ptr());
let m1 = vld1_s16([t.v[1][0], t.v[1][1], t.v[1][2], 0].as_ptr());
let m2 = vld1_s16([t.v[2][0], t.v[2][1], t.v[2][2], 0].as_ptr());
let v_max_value = vdup_n_u16((GAMMA_LUT - 1) as u16);
let rnd = vdupq_n_s32(1 << (PRECISION - 1));
if !src_chunks.is_empty() {
let (src0, src1) = src_chunks.split_at(src_chunks.len() / 2);
let (dst0, dst1) = dst_chunks.split_at_mut(dst_chunks.len() / 2);
let mut src_iter0 = src0.chunks_exact(src_channels * 2);
let mut src_iter1 = src1.chunks_exact(src_channels * 2);
let (mut r0, mut g0, mut b0, mut a0);
let (mut r1, mut g1, mut b1, mut a1);
let (mut r2, mut g2, mut b2, mut a2);
let (mut r3, mut g3, mut b3, mut a3);
if let (Some(src0), Some(src1)) = (src_iter0.next(), src_iter1.next()) {
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1_dup_s16(r0p);
g0 = vld1_dup_s16(g0p);
b0 = vld1_dup_s16(b0p);
r1 = vld1_dup_s16(r1p);
g1 = vld1_dup_s16(g1p);
b1 = vld1_dup_s16(b1p);
r2 = vld1_dup_s16(r2p);
g2 = vld1_dup_s16(g2p);
b2 = vld1_dup_s16(b2p);
r3 = vld1_dup_s16(r3p);
g3 = vld1_dup_s16(g3p);
b3 = vld1_dup_s16(b3p);
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
} else {
r0 = vdup_n_s16(0);
g0 = vdup_n_s16(0);
b0 = vdup_n_s16(0);
r1 = vdup_n_s16(0);
g1 = vdup_n_s16(0);
b1 = vdup_n_s16(0);
r2 = vdup_n_s16(0);
g2 = vdup_n_s16(0);
b2 = vdup_n_s16(0);
r3 = vdup_n_s16(0);
g3 = vdup_n_s16(0);
b3 = vdup_n_s16(0);
a0 = max_colors;
a1 = max_colors;
a2 = max_colors;
a3 = max_colors;
}
for (((src0, src1), dst0), dst1) in src_iter0
.zip(src_iter1)
.zip(dst0.chunks_exact_mut(dst_channels * 2))
.zip(dst1.chunks_exact_mut(dst_channels * 2))
{
let v0_0 = vmlal_s16(rnd, r0, m0);
let v0_1 = vmlal_s16(rnd, r1, m0);
let v0_2 = vmlal_s16(rnd, r2, m0);
let v0_3 = vmlal_s16(rnd, r3, m0);
let v1_0 = vmlal_s16(v0_0, g0, m1);
let v1_1 = vmlal_s16(v0_1, g1, m1);
let v1_2 = vmlal_s16(v0_2, g2, m1);
let v1_3 = vmlal_s16(v0_3, g3, m1);
let vr0 = vmlal_s16(v1_0, b0, m2);
let vr1 = vmlal_s16(v1_1, b1, m2);
let vr2 = vmlal_s16(v1_2, b2, m2);
let vr3 = vmlal_s16(v1_3, b3, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(vr0);
let mut vr1 = vqshrun_n_s32::<PRECISION>(vr1);
let mut vr2 = vqshrun_n_s32::<PRECISION>(vr2);
let mut vr3 = vqshrun_n_s32::<PRECISION>(vr3);
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
let r0p = &self.profile.linear[src0[src_cn.r_i()]._as_usize()];
let g0p = &self.profile.linear[src0[src_cn.g_i()]._as_usize()];
let b0p = &self.profile.linear[src0[src_cn.b_i()]._as_usize()];
let r1p = &self.profile.linear[src0[src_cn.r_i() + src_channels]._as_usize()];
let g1p = &self.profile.linear[src0[src_cn.g_i() + src_channels]._as_usize()];
let b1p = &self.profile.linear[src0[src_cn.b_i() + src_channels]._as_usize()];
let r2p = &self.profile.linear[src1[src_cn.r_i()]._as_usize()];
let g2p = &self.profile.linear[src1[src_cn.g_i()]._as_usize()];
let b2p = &self.profile.linear[src1[src_cn.b_i()]._as_usize()];
let r3p = &self.profile.linear[src1[src_cn.r_i() + src_channels]._as_usize()];
let g3p = &self.profile.linear[src1[src_cn.g_i() + src_channels]._as_usize()];
let b3p = &self.profile.linear[src1[src_cn.b_i() + src_channels]._as_usize()];
r0 = vld1_dup_s16(r0p);
g0 = vld1_dup_s16(g0p);
b0 = vld1_dup_s16(b0p);
r1 = vld1_dup_s16(r1p);
g1 = vld1_dup_s16(g1p);
b1 = vld1_dup_s16(b1p);
r2 = vld1_dup_s16(r2p);
g2 = vld1_dup_s16(g2p);
b2 = vld1_dup_s16(b2p);
r3 = vld1_dup_s16(r3p);
g3 = vld1_dup_s16(g3p);
b3 = vld1_dup_s16(b3p);
dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
a0 = if src_channels == 4 {
src0[src_cn.a_i()]
} else {
max_colors
};
a1 = if src_channels == 4 {
src0[src_cn.a_i() + src_channels]
} else {
max_colors
};
a2 = if src_channels == 4 {
src1[src_cn.a_i()]
} else {
max_colors
};
a3 = if src_channels == 4 {
src1[src_cn.a_i() + src_channels]
} else {
max_colors
};
}
if let (Some(dst0), Some(dst1)) = (
dst0.chunks_exact_mut(dst_channels * 2).last(),
dst1.chunks_exact_mut(dst_channels * 2).last(),
) {
let v0_0 = vmlal_s16(rnd, r0, m0);
let v0_1 = vmlal_s16(rnd, r1, m0);
let v0_2 = vmlal_s16(rnd, r2, m0);
let v0_3 = vmlal_s16(rnd, r3, m0);
let v1_0 = vmlal_s16(v0_0, g0, m1);
let v1_1 = vmlal_s16(v0_1, g1, m1);
let v1_2 = vmlal_s16(v0_2, g2, m1);
let v1_3 = vmlal_s16(v0_3, g3, m1);
let vr0 = vmlal_s16(v1_0, b0, m2);
let vr1 = vmlal_s16(v1_1, b1, m2);
let vr2 = vmlal_s16(v1_2, b2, m2);
let vr3 = vmlal_s16(v1_3, b3, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(vr0);
let mut vr1 = vqshrun_n_s32::<PRECISION>(vr1);
let mut vr2 = vqshrun_n_s32::<PRECISION>(vr2);
let mut vr3 = vqshrun_n_s32::<PRECISION>(vr3);
vr0 = vmin_u16(vr0, v_max_value);
vr1 = vmin_u16(vr1, v_max_value);
vr2 = vmin_u16(vr2, v_max_value);
vr3 = vmin_u16(vr3, v_max_value);
dst0[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst0[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst0[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i()] = a0;
}
dst0[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr1) as usize];
dst0[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr1) as usize];
dst0[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst0[dst_cn.a_i() + dst_channels] = a1;
}
dst1[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr2) as usize];
dst1[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr2) as usize];
dst1[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr2) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i()] = a2;
}
dst1[dst_cn.r_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<0>(vr3) as usize];
dst1[dst_cn.g_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<1>(vr3) as usize];
dst1[dst_cn.b_i() + dst_channels] =
self.profile.gamma[vget_lane_u16::<2>(vr3) as usize];
if dst_channels == 4 {
dst1[dst_cn.a_i() + dst_channels] = a3;
}
}
}
for (src, dst) in src_remainder
.chunks_exact(src_channels)
.zip(dst_remainder.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.linear[src[src_cn.b_i()]._as_usize()];
let r = vld1_dup_s16(rp);
let g = vld1_dup_s16(gp);
let b = vld1_dup_s16(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let v0 = vmlal_s16(rnd, r, m0);
let v1 = vmlal_s16(v0, g, m1);
let v = vmlal_s16(v1, b, m2);
let mut vr0 = vqshrun_n_s32::<PRECISION>(v);
vr0 = vmin_u16(vr0, v_max_value);
dst[dst_cn.r_i()] = self.profile.gamma[vget_lane_u16::<0>(vr0) as usize];
dst[dst_cn.g_i()] = self.profile.gamma[vget_lane_u16::<1>(vr0) as usize];
dst[dst_cn.b_i()] = self.profile.gamma[vget_lane_u16::<2>(vr0) as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,335 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut3x3Factory;
use crate::conversions::neon::interpolator::*;
use crate::conversions::neon::interpolator_q0_15::NeonAlignedI16x4;
use crate::conversions::neon::rgb_xyz::NeonAlignedF32;
use crate::conversions::neon::t_lut3_to_3_q0_15::TransformLut3x3NeonQ0_15;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::marker::PhantomData;
struct TransformLut3x3Neon<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<NeonAlignedF32>,
_phantom: PhantomData<T>,
_phantom1: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut3x3Neon<T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[inline(always)]
fn transform_chunk<'b, Interpolator: NeonMdInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let value_scale = vdupq_n_f32(((1 << BIT_DEPTH) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_neon(x, y, z, &self.weights);
if T::FINITE {
let mut r = vfmaq_f32(vdupq_n_f32(0.5f32), v.v, value_scale);
r = vminq_f32(r, value_scale);
let jvx = vcvtaq_u32_f32(r);
dst[dst_cn.r_i()] = vgetq_lane_u32::<0>(jvx).as_();
dst[dst_cn.g_i()] = vgetq_lane_u32::<1>(jvx).as_();
dst[dst_cn.b_i()] = vgetq_lane_u32::<2>(jvx).as_();
} else {
dst[dst_cn.r_i()] = vgetq_lane_f32::<0>(v.v).as_();
dst[dst_cn.g_i()] = vgetq_lane_f32::<1>(v.v).as_();
dst[dst_cn.b_i()] = vgetq_lane_f32::<2>(v.v).as_();
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3Neon<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearNeon<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralNeon<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalNeon<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticNeon<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearNeon<GRID_SIZE>>(src, dst);
}
}
}
Ok(())
}
}
pub(crate) struct NeonLut3x3Factory {}
impl Lut3x3Factory for NeonLut3x3Factory {
fn make_transform_3x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point
&& BIT_DEPTH < 16
&& std::arch::is_aarch64_feature_detected!("rdm")
{
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
NeonAlignedI16x4([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3NeonQ0_15::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3NeonQ0_15::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
};
}
let lut = lut
.chunks_exact(3)
.map(|x| NeonAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3Neon::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3Neon::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
}
}
}

View File

@@ -0,0 +1,219 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::neon::interpolator_q0_15::*;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
use std::arch::aarch64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut3x3NeonQ0_15<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<NeonAlignedI16x4>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
>
TransformLut3x3NeonQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "rdm")]
unsafe fn transform_chunk<'b, Interpolator: NeonMdInterpolationQ0_15<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let f_value_scale = vdupq_n_f32(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
vdup_n_s16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
vdup_n_s16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_neon(x, y, z, &self.weights);
if T::FINITE {
let mut o = vmax_s16(v.v, vdup_n_s16(0));
o = vmin_s16(o, v_max_scale);
dst[dst_cn.r_i()] = (vget_lane_s16::<0>(o) as u32).as_();
dst[dst_cn.g_i()] = (vget_lane_s16::<1>(o) as u32).as_();
dst[dst_cn.b_i()] = (vget_lane_s16::<2>(o) as u32).as_();
} else {
let o = vcvtq_f32_s32(vmovl_s16(v.v));
let r = vmulq_f32(o, f_value_scale);
dst[dst_cn.r_i()] = vgetq_lane_f32::<0>(r).as_();
dst[dst_cn.g_i()] = vgetq_lane_f32::<1>(r).as_();
dst[dst_cn.b_i()] = vgetq_lane_f32::<2>(r).as_();
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3NeonQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearNeonQ0_15<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralNeonQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalNeonQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticNeonQ0_15<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearNeonQ0_15<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,327 @@
/*
* // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::lut3x4::create_lut3_samples;
use crate::mlaf::mlaf;
use crate::trc::ToneCurveEvaluator;
use crate::{
CmsError, ColorProfile, GammaLutInterpolate, InPlaceStage, Matrix3f, PointeeSizeExpressible,
RenderingIntent, Rgb, TransformOptions, filmlike_clip,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) struct XyzToRgbStage<T: Clone> {
pub(crate) r_gamma: Box<[T; 65536]>,
pub(crate) g_gamma: Box<[T; 65536]>,
pub(crate) b_gamma: Box<[T; 65536]>,
pub(crate) matrices: Vec<Matrix3f>,
pub(crate) intent: RenderingIntent,
pub(crate) bit_depth: usize,
pub(crate) gamma_lut: usize,
}
impl<T: Clone + AsPrimitive<f32>> InPlaceStage for XyzToRgbStage<T> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
assert!(self.bit_depth > 0);
if !self.matrices.is_empty() {
let m = self.matrices[0];
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
for m in self.matrices.iter().skip(1) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
let max_colors = (1 << self.bit_depth) - 1;
let color_scale = 1f32 / max_colors as f32;
let lut_cap = (self.gamma_lut - 1) as f32;
if self.intent != RenderingIntent::AbsoluteColorimetric {
for dst in dst.chunks_exact_mut(3) {
let mut rgb = Rgb::new(dst[0], dst[1], dst[2]);
if rgb.is_out_of_gamut() {
rgb = filmlike_clip(rgb);
}
let r = mlaf(0.5f32, rgb.r, lut_cap).min(lut_cap).max(0f32) as u16;
let g = mlaf(0.5f32, rgb.g, lut_cap).min(lut_cap).max(0f32) as u16;
let b = mlaf(0.5f32, rgb.b, lut_cap).min(lut_cap).max(0f32) as u16;
dst[0] = self.r_gamma[r as usize].as_() * color_scale;
dst[1] = self.g_gamma[g as usize].as_() * color_scale;
dst[2] = self.b_gamma[b as usize].as_() * color_scale;
}
} else {
for dst in dst.chunks_exact_mut(3) {
let rgb = Rgb::new(dst[0], dst[1], dst[2]);
let r = mlaf(0.5f32, rgb.r, lut_cap).min(lut_cap).max(0f32) as u16;
let g = mlaf(0.5f32, rgb.g, lut_cap).min(lut_cap).max(0f32) as u16;
let b = mlaf(0.5f32, rgb.b, lut_cap).min(lut_cap).max(0f32) as u16;
dst[0] = self.r_gamma[r as usize].as_() * color_scale;
dst[1] = self.g_gamma[g as usize].as_() * color_scale;
dst[2] = self.b_gamma[b as usize].as_() * color_scale;
}
}
Ok(())
}
}
pub(crate) struct XyzToRgbStageExtended<T: Clone> {
pub(crate) gamma_evaluator: Box<dyn ToneCurveEvaluator>,
pub(crate) matrices: Vec<Matrix3f>,
pub(crate) phantom_data: PhantomData<T>,
}
impl<T: Clone + AsPrimitive<f32>> InPlaceStage for XyzToRgbStageExtended<T> {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
if !self.matrices.is_empty() {
let m = self.matrices[0];
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
for m in self.matrices.iter().skip(1) {
for dst in dst.chunks_exact_mut(3) {
let x = dst[0];
let y = dst[1];
let z = dst[2];
dst[0] = mlaf(mlaf(x * m.v[0][0], y, m.v[0][1]), z, m.v[0][2]);
dst[1] = mlaf(mlaf(x * m.v[1][0], y, m.v[1][1]), z, m.v[1][2]);
dst[2] = mlaf(mlaf(x * m.v[2][0], y, m.v[2][1]), z, m.v[2][2]);
}
}
for dst in dst.chunks_exact_mut(3) {
let mut rgb = Rgb::new(dst[0], dst[1], dst[2]);
rgb = self.gamma_evaluator.evaluate_tristimulus(rgb);
dst[0] = rgb.r.as_();
dst[1] = rgb.g.as_();
dst[2] = rgb.b.as_();
}
Ok(())
}
}
struct RgbLinearizationStage<T: Clone, const LINEAR_CAP: usize, const SAMPLES: usize> {
r_lin: Box<[f32; LINEAR_CAP]>,
g_lin: Box<[f32; LINEAR_CAP]>,
b_lin: Box<[f32; LINEAR_CAP]>,
_phantom: PhantomData<T>,
bit_depth: usize,
}
impl<
T: Clone + AsPrimitive<usize> + PointeeSizeExpressible,
const LINEAR_CAP: usize,
const SAMPLES: usize,
> RgbLinearizationStage<T, LINEAR_CAP, SAMPLES>
{
fn transform(&self, src: &[T], dst: &mut [f32]) -> Result<(), CmsError> {
if src.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % 3 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let scale = if T::FINITE {
((1 << self.bit_depth) - 1) as f32 / (SAMPLES as f32 - 1f32)
} else {
(T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 / (SAMPLES as f32 - 1f32)
};
let capped_value = if T::FINITE {
(1 << self.bit_depth) - 1
} else {
T::NOT_FINITE_LINEAR_TABLE_SIZE - 1
};
for (src, dst) in src.chunks_exact(3).zip(dst.chunks_exact_mut(3)) {
let j_r = src[0].as_() as f32 * scale;
let j_g = src[1].as_() as f32 * scale;
let j_b = src[2].as_() as f32 * scale;
dst[0] = self.r_lin[(j_r.round().max(0.0).min(capped_value as f32) as u16) as usize];
dst[1] = self.g_lin[(j_g.round().max(0.0).min(capped_value as f32) as u16) as usize];
dst[2] = self.b_lin[(j_b.round().max(0.0).min(capped_value as f32) as u16) as usize];
}
Ok(())
}
}
pub(crate) fn create_rgb_lin_lut<
T: Copy + Default + AsPrimitive<f32> + Send + Sync + AsPrimitive<usize> + PointeeSizeExpressible,
const BIT_DEPTH: usize,
const LINEAR_CAP: usize,
const GRID_SIZE: usize,
>(
source: &ColorProfile,
opts: TransformOptions,
) -> Result<Vec<f32>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
let lut_origins = create_lut3_samples::<T, GRID_SIZE>();
let lin_r =
source.build_r_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_g =
source.build_g_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_b =
source.build_b_linearize_table::<T, LINEAR_CAP, BIT_DEPTH>(opts.allow_use_cicp_transfer)?;
let lin_stage = RgbLinearizationStage::<T, LINEAR_CAP, GRID_SIZE> {
r_lin: lin_r,
g_lin: lin_g,
b_lin: lin_b,
_phantom: PhantomData,
bit_depth: BIT_DEPTH,
};
let mut lut = vec![0f32; lut_origins.len()];
lin_stage.transform(&lut_origins, &mut lut)?;
let xyz_to_rgb = source.rgb_to_xyz_matrix();
let matrices = vec![
xyz_to_rgb.to_f32(),
Matrix3f {
v: [
[32768.0 / 65535.0, 0.0, 0.0],
[0.0, 32768.0 / 65535.0, 0.0],
[0.0, 0.0, 32768.0 / 65535.0],
],
},
];
let matrix_stage = crate::conversions::lut_transforms::MatrixStage { matrices };
matrix_stage.transform(&mut lut)?;
Ok(lut)
}
pub(crate) fn prepare_inverse_lut_rgb_xyz<
T: Copy
+ Default
+ AsPrimitive<f32>
+ Send
+ Sync
+ AsPrimitive<usize>
+ PointeeSizeExpressible
+ GammaLutInterpolate,
const BIT_DEPTH: usize,
const GAMMA_LUT: usize,
>(
dest: &ColorProfile,
lut: &mut [f32],
options: TransformOptions,
) -> Result<(), CmsError>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
{
if !T::FINITE {
if let Some(extended_gamma) = dest.try_extended_gamma_evaluator() {
let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse();
let mut matrices = vec![Matrix3f {
v: [
[65535.0 / 32768.0, 0.0, 0.0],
[0.0, 65535.0 / 32768.0, 0.0],
[0.0, 0.0, 65535.0 / 32768.0],
],
}];
matrices.push(xyz_to_rgb.to_f32());
let xyz_to_rgb_stage = XyzToRgbStageExtended::<T> {
gamma_evaluator: extended_gamma,
matrices,
phantom_data: PhantomData,
};
xyz_to_rgb_stage.transform(lut)?;
return Ok(());
}
}
let gamma_map_r = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.red_trc,
options.allow_use_cicp_transfer,
)?;
let gamma_map_g = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.green_trc,
options.allow_use_cicp_transfer,
)?;
let gamma_map_b = dest.build_gamma_table::<T, 65536, GAMMA_LUT, BIT_DEPTH>(
&dest.blue_trc,
options.allow_use_cicp_transfer,
)?;
let xyz_to_rgb = dest.rgb_to_xyz_matrix().inverse();
let mut matrices = vec![Matrix3f {
v: [
[65535.0 / 32768.0, 0.0, 0.0],
[0.0, 65535.0 / 32768.0, 0.0],
[0.0, 0.0, 65535.0 / 32768.0],
],
}];
matrices.push(xyz_to_rgb.to_f32());
let xyz_to_rgb_stage = XyzToRgbStage::<T> {
r_gamma: gamma_map_r,
g_gamma: gamma_map_g,
b_gamma: gamma_map_b,
matrices,
intent: options.rendering_intent,
gamma_lut: GAMMA_LUT,
bit_depth: BIT_DEPTH,
};
xyz_to_rgb_stage.transform(lut)?;
Ok(())
}

View File

@@ -0,0 +1,190 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::mlaf::mlaf;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor, Vector3f};
use num_traits::AsPrimitive;
#[derive(Clone)]
pub(crate) struct ToneReproductionRgbToGray<T, const BUCKET: usize> {
pub(crate) r_linear: Box<[f32; BUCKET]>,
pub(crate) g_linear: Box<[f32; BUCKET]>,
pub(crate) b_linear: Box<[f32; BUCKET]>,
pub(crate) gray_gamma: Box<[T; 65536]>,
}
#[derive(Clone)]
struct TransformRgbToGrayExecutor<
T,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const BUCKET: usize,
const GAMMA_LUT: usize,
> {
trc_box: ToneReproductionRgbToGray<T, BUCKET>,
weights: Vector3f,
bit_depth: usize,
}
pub(crate) fn make_rgb_to_gray<
T: Copy + Default + PointeeSizeExpressible + Send + Sync + 'static,
const BUCKET: usize,
const BIT_DEPTH: usize,
const GAMMA_LUT: usize,
>(
src_layout: Layout,
dst_layout: Layout,
trc: ToneReproductionRgbToGray<T, BUCKET>,
weights: Vector3f,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
u32: AsPrimitive<T>,
{
match src_layout {
Layout::Rgb => match dst_layout {
Layout::Rgb => unreachable!(),
Layout::Rgba => unreachable!(),
Layout::Gray => Box::new(TransformRgbToGrayExecutor::<
T,
{ Layout::Rgb as u8 },
{ Layout::Gray as u8 },
BUCKET,
GAMMA_LUT,
> {
trc_box: trc,
weights,
bit_depth: BIT_DEPTH,
}),
Layout::GrayAlpha => Box::new(TransformRgbToGrayExecutor::<
T,
{ Layout::Rgb as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
GAMMA_LUT,
> {
trc_box: trc,
weights,
bit_depth: BIT_DEPTH,
}),
_ => unreachable!(),
},
Layout::Rgba => match dst_layout {
Layout::Rgb => unreachable!(),
Layout::Rgba => unreachable!(),
Layout::Gray => Box::new(TransformRgbToGrayExecutor::<
T,
{ Layout::Rgba as u8 },
{ Layout::Gray as u8 },
BUCKET,
GAMMA_LUT,
> {
trc_box: trc,
weights,
bit_depth: BIT_DEPTH,
}),
Layout::GrayAlpha => Box::new(TransformRgbToGrayExecutor::<
T,
{ Layout::Rgba as u8 },
{ Layout::GrayAlpha as u8 },
BUCKET,
GAMMA_LUT,
> {
trc_box: trc,
weights,
bit_depth: BIT_DEPTH,
}),
_ => unreachable!(),
},
Layout::Gray => unreachable!(),
Layout::GrayAlpha => unreachable!(),
_ => unreachable!(),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const BUCKET: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T> for TransformRgbToGrayExecutor<T, SRC_LAYOUT, DST_LAYOUT, BUCKET, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let scale_value = (GAMMA_LUT - 1) as f32;
let max_value = ((1u32 << self.bit_depth) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.trc_box.r_linear[src[src_cn.r_i()]._as_usize()];
let g = self.trc_box.g_linear[src[src_cn.g_i()]._as_usize()];
let b = self.trc_box.b_linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let grey = mlaf(
0.5,
mlaf(
mlaf(self.weights.v[0] * r, self.weights.v[1], g),
self.weights.v[2],
b,
)
.min(1.)
.max(0.),
scale_value,
);
dst[0] = self.trc_box.gray_gamma[(grey as u16) as usize];
if dst_channels == 2 {
dst[1] = a;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,181 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::mlaf::mlaf;
use crate::transform::PointeeSizeExpressible;
use crate::trc::ToneCurveEvaluator;
use crate::{CmsError, Layout, Rgb, TransformExecutor, Vector3f};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
struct TransformRgbToGrayExtendedExecutor<T, const SRC_LAYOUT: u8, const DST_LAYOUT: u8> {
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
weights: Vector3f,
_phantom: PhantomData<T>,
bit_depth: usize,
}
pub(crate) fn make_rgb_to_gray_extended<
T: Copy + Default + PointeeSizeExpressible + Send + Sync + 'static + AsPrimitive<f32>,
>(
src_layout: Layout,
dst_layout: Layout,
linear_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
gamma_eval: Box<dyn ToneCurveEvaluator + Send + Sync>,
weights: Vector3f,
bit_depth: usize,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
match src_layout {
Layout::Rgb => match dst_layout {
Layout::Rgb => unreachable!(),
Layout::Rgba => unreachable!(),
Layout::Gray => Box::new(TransformRgbToGrayExtendedExecutor::<
T,
{ Layout::Rgb as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
weights,
_phantom: PhantomData,
bit_depth,
}),
Layout::GrayAlpha => Box::new(TransformRgbToGrayExtendedExecutor::<
T,
{ Layout::Rgb as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
weights,
_phantom: PhantomData,
bit_depth,
}),
_ => unreachable!(),
},
Layout::Rgba => match dst_layout {
Layout::Rgb => unreachable!(),
Layout::Rgba => unreachable!(),
Layout::Gray => Box::new(TransformRgbToGrayExtendedExecutor::<
T,
{ Layout::Rgba as u8 },
{ Layout::Gray as u8 },
> {
linear_eval,
gamma_eval,
weights,
_phantom: PhantomData,
bit_depth,
}),
Layout::GrayAlpha => Box::new(TransformRgbToGrayExtendedExecutor::<
T,
{ Layout::Rgba as u8 },
{ Layout::GrayAlpha as u8 },
> {
linear_eval,
gamma_eval,
weights,
_phantom: PhantomData,
bit_depth,
}),
_ => unreachable!(),
},
Layout::Gray => unreachable!(),
Layout::GrayAlpha => unreachable!(),
_ => unreachable!(),
}
}
impl<
T: Copy + Default + PointeeSizeExpressible + 'static + AsPrimitive<f32>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
> TransformExecutor<T> for TransformRgbToGrayExtendedExecutor<T, SRC_LAYOUT, DST_LAYOUT>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let max_value = ((1u32 << self.bit_depth) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let in_tristimulus = Rgb::<f32>::new(
src[src_cn.r_i()].as_(),
src[src_cn.g_i()].as_(),
src[src_cn.b_i()].as_(),
);
let lin_tristimulus = self.linear_eval.evaluate_tristimulus(in_tristimulus);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let grey = mlaf(
mlaf(
self.weights.v[0] * lin_tristimulus.r,
self.weights.v[1],
lin_tristimulus.g,
),
self.weights.v[2],
lin_tristimulus.b,
)
.min(1.)
.max(0.);
let gamma_value = self.gamma_eval.evaluate_value(grey);
dst[0] = gamma_value.as_();
if dst_channels == 2 {
dst[1] = a;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,437 @@
/*
* // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaper;
use crate::conversions::rgbxyz::{
TransformMatrixShaperOptimized, make_rgb_xyz_rgb_transform, make_rgb_xyz_rgb_transform_opt,
};
use crate::conversions::rgbxyz_fixed::{make_rgb_xyz_q2_13, make_rgb_xyz_q2_13_opt};
use crate::{CmsError, Layout, TransformExecutor, TransformOptions};
use num_traits::AsPrimitive;
const FIXED_POINT_SCALE: i32 = 13; // Q2.13;
pub(crate) trait RgbXyzFactory<T: Clone + AsPrimitive<usize> + Default> {
fn make_transform<const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<T, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>;
}
pub(crate) trait RgbXyzFactoryOpt<T: Clone + AsPrimitive<usize> + Default> {
fn make_optimized_transform<
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>;
}
impl RgbXyzFactory<u16> for u16 {
fn make_transform<const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<u16, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<u16> + Send + Sync>, CmsError> {
if BIT_DEPTH < 16 && transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
return make_rgb_xyz_q2_13::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
make_rgb_xyz_rgb_transform::<u16, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactory<f32> for f32 {
fn make_transform<const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<f32, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<f32> + Send + Sync>, CmsError> {
if transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2::<
f32,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41::<
f32,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
return make_rgb_xyz_q2_13::<
f32,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
make_rgb_xyz_rgb_transform::<f32, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactory<f64> for f64 {
fn make_transform<const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<f64, LINEAR_CAP>,
_: TransformOptions,
) -> Result<Box<dyn TransformExecutor<f64> + Send + Sync>, CmsError> {
make_rgb_xyz_rgb_transform::<f64, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactory<u8> for u8 {
fn make_transform<const LINEAR_CAP: usize, const GAMMA_LUT: usize, const BIT_DEPTH: usize>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<u8, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<u8> + Send + Sync>, CmsError> {
if transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2::<
u8,
LINEAR_CAP,
GAMMA_LUT,
8,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41::<
u8,
LINEAR_CAP,
GAMMA_LUT,
8,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
make_rgb_xyz_q2_13::<u8, LINEAR_CAP, GAMMA_LUT, 8, FIXED_POINT_SCALE>(
src_layout, dst_layout, profile,
)
} else {
make_rgb_xyz_rgb_transform::<u8, LINEAR_CAP, GAMMA_LUT, 8>(
src_layout, dst_layout, profile,
)
}
}
}
// Optimized factories
impl RgbXyzFactoryOpt<u16> for u16 {
fn make_optimized_transform<
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<u16, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<u16> + Send + Sync>, CmsError> {
if BIT_DEPTH >= 12 && transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
if std::arch::is_aarch64_feature_detected!("rdm") {
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q1_30_opt;
return make_rgb_xyz_q1_30_opt::<u16, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH, 30>(
src_layout, dst_layout, profile,
);
}
}
}
if BIT_DEPTH < 16 && transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2_opt;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2_opt::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41_opt;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41_opt::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
return make_rgb_xyz_q2_13_opt::<
u16,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
make_rgb_xyz_rgb_transform_opt::<u16, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactoryOpt<f32> for f32 {
fn make_optimized_transform<
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<f32, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<f32> + Send + Sync>, CmsError> {
if transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2_opt;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2_opt::<
f32,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41_opt;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41_opt::<
f32,
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
return if std::arch::is_aarch64_feature_detected!("rdm") {
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q1_30_opt;
make_rgb_xyz_q1_30_opt::<f32, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH, 30>(
src_layout, dst_layout, profile,
)
} else {
make_rgb_xyz_q2_13_opt::<f32, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH, FIXED_POINT_SCALE>(
src_layout, dst_layout, profile,
)
};
}
}
make_rgb_xyz_rgb_transform_opt::<f32, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactoryOpt<f64> for f64 {
fn make_optimized_transform<
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<f64, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<f64> + Send + Sync>, CmsError> {
if transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
{
if std::arch::is_aarch64_feature_detected!("rdm") {
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q1_30_opt;
return make_rgb_xyz_q1_30_opt::<f64, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH, 30>(
src_layout, dst_layout, profile,
);
}
}
}
make_rgb_xyz_rgb_transform_opt::<f64, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
)
}
}
impl RgbXyzFactoryOpt<u8> for u8 {
fn make_optimized_transform<
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<u8, LINEAR_CAP>,
transform_options: TransformOptions,
) -> Result<Box<dyn TransformExecutor<u8> + Send + Sync>, CmsError> {
if transform_options.prefer_fixed_point {
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx512_opt;
if std::arch::is_x86_feature_detected!("avx512bw")
&& std::arch::is_x86_feature_detected!("avx512vl")
{
return make_rgb_xyz_q2_13_transform_avx512_opt::<
u8,
LINEAR_CAP,
GAMMA_LUT,
8,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_avx2_opt;
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_q2_13_transform_avx2_opt::<
u8,
LINEAR_CAP,
GAMMA_LUT,
8,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
{
use crate::conversions::rgbxyz_fixed::make_rgb_xyz_q2_13_transform_sse_41_opt;
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_q2_13_transform_sse_41_opt::<
u8,
LINEAR_CAP,
GAMMA_LUT,
8,
FIXED_POINT_SCALE,
>(src_layout, dst_layout, profile);
}
}
make_rgb_xyz_q2_13_opt::<u8, LINEAR_CAP, GAMMA_LUT, 8, FIXED_POINT_SCALE>(
src_layout, dst_layout, profile,
)
} else {
make_rgb_xyz_rgb_transform_opt::<u8, LINEAR_CAP, GAMMA_LUT, 8>(
src_layout, dst_layout, profile,
)
}
}
}

701
vendor/moxcms/src/conversions/rgbxyz.rs vendored Normal file
View File

@@ -0,0 +1,701 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::{CmsError, Layout, Matrix3, Matrix3f, TransformExecutor};
use num_traits::AsPrimitive;
pub(crate) struct TransformMatrixShaper<T: Clone, const BUCKET: usize> {
pub(crate) r_linear: Box<[f32; BUCKET]>,
pub(crate) g_linear: Box<[f32; BUCKET]>,
pub(crate) b_linear: Box<[f32; BUCKET]>,
pub(crate) r_gamma: Box<[T; 65536]>,
pub(crate) g_gamma: Box<[T; 65536]>,
pub(crate) b_gamma: Box<[T; 65536]>,
pub(crate) adaptation_matrix: Matrix3f,
}
/// Low memory footprint optimized routine for matrix shaper profiles with the same
/// Gamma and linear curves.
pub(crate) struct TransformMatrixShaperOptimized<T: Clone, const BUCKET: usize> {
pub(crate) linear: Box<[f32; BUCKET]>,
pub(crate) gamma: Box<[T; 65536]>,
pub(crate) adaptation_matrix: Matrix3f,
}
impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize> TransformMatrixShaper<T, BUCKET> {
pub(crate) fn to_q2_13_n<
R: Copy + 'static + Default,
const PRECISION: i32,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
&self,
) -> TransformMatrixShaperFixedPoint<R, T, BUCKET>
where
f32: AsPrimitive<R>,
{
let linear_scale = if T::FINITE {
let lut_scale = (GAMMA_LUT - 1) as f32 / ((1 << BIT_DEPTH) - 1) as f32;
((1 << BIT_DEPTH) - 1) as f32 * lut_scale
} else {
let lut_scale = (GAMMA_LUT - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
(T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
};
let mut new_box_r = Box::new([R::default(); BUCKET]);
let mut new_box_g = Box::new([R::default(); BUCKET]);
let mut new_box_b = Box::new([R::default(); BUCKET]);
for (dst, &src) in new_box_r.iter_mut().zip(self.r_linear.iter()) {
*dst = (src * linear_scale).round().as_();
}
for (dst, &src) in new_box_g.iter_mut().zip(self.g_linear.iter()) {
*dst = (src * linear_scale).round().as_();
}
for (dst, &src) in new_box_b.iter_mut().zip(self.b_linear.iter()) {
*dst = (src * linear_scale).round().as_();
}
let scale: f32 = (1i32 << PRECISION) as f32;
let source_matrix = self.adaptation_matrix;
let mut dst_matrix = Matrix3::<i16> { v: [[0i16; 3]; 3] };
for i in 0..3 {
for j in 0..3 {
dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
}
}
TransformMatrixShaperFixedPoint {
r_linear: new_box_r,
g_linear: new_box_g,
b_linear: new_box_b,
r_gamma: self.r_gamma.clone(),
g_gamma: self.g_gamma.clone(),
b_gamma: self.b_gamma.clone(),
adaptation_matrix: dst_matrix,
}
}
}
impl<T: Clone + PointeeSizeExpressible, const BUCKET: usize>
TransformMatrixShaperOptimized<T, BUCKET>
{
pub(crate) fn to_q2_13_n<
R: Copy + 'static + Default,
const PRECISION: i32,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
&self,
) -> TransformMatrixShaperFixedPointOpt<R, i16, T, BUCKET>
where
f32: AsPrimitive<R>,
{
let linear_scale = if T::FINITE {
let lut_scale = (GAMMA_LUT - 1) as f32 / ((1 << BIT_DEPTH) - 1) as f32;
((1 << BIT_DEPTH) - 1) as f32 * lut_scale
} else {
let lut_scale = (GAMMA_LUT - 1) as f32 / (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32;
(T::NOT_FINITE_LINEAR_TABLE_SIZE - 1) as f32 * lut_scale
};
let mut new_box_linear = Box::new([R::default(); BUCKET]);
for (dst, src) in new_box_linear.iter_mut().zip(self.linear.iter()) {
*dst = (*src * linear_scale).round().as_();
}
let scale: f32 = (1i32 << PRECISION) as f32;
let source_matrix = self.adaptation_matrix;
let mut dst_matrix = Matrix3::<i16> {
v: [[i16::default(); 3]; 3],
};
for i in 0..3 {
for j in 0..3 {
dst_matrix.v[i][j] = (source_matrix.v[i][j] * scale) as i16;
}
}
TransformMatrixShaperFixedPointOpt {
linear: new_box_linear,
gamma: self.gamma.clone(),
adaptation_matrix: dst_matrix,
}
}
#[allow(dead_code)]
pub(crate) fn to_q1_30_n<
R: Copy + 'static + Default,
const PRECISION: i32,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
&self,
) -> TransformMatrixShaperFixedPointOpt<R, i32, T, BUCKET>
where
f32: AsPrimitive<R>,
f64: AsPrimitive<R>,
{
// It is important to scale 1 bit more to compensate vqrdmlah Q0.31, because we're going to use Q1.30
let table_size = if T::FINITE {
(1 << BIT_DEPTH) - 1
} else {
T::NOT_FINITE_LINEAR_TABLE_SIZE - 1
};
let ext_bp = if T::FINITE {
BIT_DEPTH as u32 + 1
} else {
let bp = (T::NOT_FINITE_LINEAR_TABLE_SIZE - 1).count_ones();
bp + 1
};
let linear_scale = {
let lut_scale = (GAMMA_LUT - 1) as f64 / table_size as f64;
((1u32 << ext_bp) - 1) as f64 * lut_scale
};
let mut new_box_linear = Box::new([R::default(); BUCKET]);
for (dst, &src) in new_box_linear.iter_mut().zip(self.linear.iter()) {
*dst = (src as f64 * linear_scale).round().as_();
}
let scale: f64 = (1i64 << PRECISION) as f64;
let source_matrix = self.adaptation_matrix;
let mut dst_matrix = Matrix3::<i32> {
v: [[i32::default(); 3]; 3],
};
for i in 0..3 {
for j in 0..3 {
dst_matrix.v[i][j] = (source_matrix.v[i][j] as f64 * scale) as i32;
}
}
TransformMatrixShaperFixedPointOpt {
linear: new_box_linear,
gamma: self.gamma.clone(),
adaptation_matrix: dst_matrix,
}
}
}
#[allow(unused)]
struct TransformMatrixShaperScalar<
T: Clone,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
> {
pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
}
#[allow(unused)]
struct TransformMatrixShaperOptScalar<
T: Clone,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
> {
pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
}
#[cfg(any(
any(target_arch = "x86", target_arch = "x86_64"),
all(target_arch = "aarch64", target_feature = "neon")
))]
#[allow(unused)]
macro_rules! create_rgb_xyz_dependant_executor {
($dep_name: ident, $dependant: ident, $shaper: ident) => {
pub(crate) fn $dep_name<
T: Clone + Send + Sync + Default + PointeeSizeExpressible + Copy + 'static,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: $shaper<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
{
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
> {
profile,
bit_depth: BIT_DEPTH,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
};
}
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
use crate::conversions::sse::{TransformShaperRgbOptSse, TransformShaperRgbSse};
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
use crate::conversions::avx::{TransformShaperRgbAvx, TransformShaperRgbOptAvx};
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_sse_41,
TransformShaperRgbSse,
TransformMatrixShaper
);
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_sse_41_opt,
TransformShaperRgbOptSse,
TransformMatrixShaperOptimized
);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_avx2,
TransformShaperRgbAvx,
TransformMatrixShaper
);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_avx2_opt,
TransformShaperRgbOptAvx,
TransformMatrixShaperOptimized
);
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
use crate::conversions::avx512::TransformShaperRgbOptAvx512;
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_avx512_opt,
TransformShaperRgbOptAvx512,
TransformMatrixShaperOptimized
);
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
pub(crate) fn make_rgb_xyz_rgb_transform<
T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaper<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
{
#[cfg(all(feature = "avx", target_arch = "x86_64"))]
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_rgb_transform_avx2::<T, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
);
}
#[cfg(all(feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))]
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_rgb_transform_sse_41::<T, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
);
}
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformMatrixShaperScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformMatrixShaperScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformMatrixShaperScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformMatrixShaperScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
pub(crate) fn make_rgb_xyz_rgb_transform_opt<
T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
{
#[cfg(all(feature = "avx512", target_arch = "x86_64"))]
if std::arch::is_x86_feature_detected!("avx512bw")
&& std::arch::is_x86_feature_detected!("avx512vl")
&& std::arch::is_x86_feature_detected!("fma")
{
return make_rgb_xyz_rgb_transform_avx512_opt::<T, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
);
}
#[cfg(all(feature = "avx", target_arch = "x86_64"))]
if std::arch::is_x86_feature_detected!("avx2") {
return make_rgb_xyz_rgb_transform_avx2_opt::<T, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
);
}
#[cfg(all(feature = "sse", any(target_arch = "x86", target_arch = "x86_64")))]
if std::arch::is_x86_feature_detected!("sse4.1") {
return make_rgb_xyz_rgb_transform_sse_41_opt::<T, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>(
src_layout, dst_layout, profile,
);
}
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformMatrixShaperOptScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformMatrixShaperOptScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformMatrixShaperOptScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformMatrixShaperOptScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
> {
profile,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
use crate::conversions::neon::{TransformShaperRgbNeon, TransformShaperRgbOptNeon};
use crate::conversions::rgbxyz_fixed::{
TransformMatrixShaperFixedPoint, TransformMatrixShaperFixedPointOpt,
};
use crate::transform::PointeeSizeExpressible;
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform,
TransformShaperRgbNeon,
TransformMatrixShaper
);
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
create_rgb_xyz_dependant_executor!(
make_rgb_xyz_rgb_transform_opt,
TransformShaperRgbOptNeon,
TransformMatrixShaperOptimized
);
#[allow(unused)]
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
> TransformExecutor<T>
for TransformMatrixShaperScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
use crate::mlaf::mlaf;
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[0][0], g, transform.v[0][1]),
b,
transform.v[0][2],
)
.max(0f32)
.min(1f32),
scale,
);
let new_g = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[1][0], g, transform.v[1][1]),
b,
transform.v[1][2],
)
.max(0f32)
.min(1f32),
scale,
);
let new_b = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[2][0], g, transform.v[2][1]),
b,
transform.v[2][2],
)
.max(0f32)
.min(1f32),
scale,
);
dst[dst_cn.r_i()] = self.profile.r_gamma[(new_r as u16) as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[(new_g as u16) as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[(new_b as u16) as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}
#[allow(unused)]
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
> TransformExecutor<T>
for TransformMatrixShaperOptScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
use crate::mlaf::mlaf;
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.profile.linear[src[src_cn.r_i()]._as_usize()];
let g = self.profile.linear[src[src_cn.g_i()]._as_usize()];
let b = self.profile.linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[0][0], g, transform.v[0][1]),
b,
transform.v[0][2],
)
.max(0f32)
.min(1f32),
scale,
);
let new_g = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[1][0], g, transform.v[1][1]),
b,
transform.v[1][2],
)
.max(0f32)
.min(1f32),
scale,
);
let new_b = mlaf(
0.5f32,
mlaf(
mlaf(r * transform.v[2][0], g, transform.v[2][1]),
b,
transform.v[2][2],
)
.max(0f32)
.min(1f32),
scale,
);
dst[dst_cn.r_i()] = self.profile.gamma[(new_r as u16) as usize];
dst[dst_cn.g_i()] = self.profile.gamma[(new_g as u16) as usize];
dst[dst_cn.b_i()] = self.profile.gamma[(new_b as u16) as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,487 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::Layout;
use crate::conversions::TransformMatrixShaper;
use crate::matrix::Matrix3;
use crate::{CmsError, TransformExecutor};
use num_traits::AsPrimitive;
/// Fixed point conversion Q2.13
pub(crate) struct TransformMatrixShaperFixedPoint<R, T, const LINEAR_CAP: usize> {
pub(crate) r_linear: Box<[R; LINEAR_CAP]>,
pub(crate) g_linear: Box<[R; LINEAR_CAP]>,
pub(crate) b_linear: Box<[R; LINEAR_CAP]>,
pub(crate) r_gamma: Box<[T; 65536]>,
pub(crate) g_gamma: Box<[T; 65536]>,
pub(crate) b_gamma: Box<[T; 65536]>,
pub(crate) adaptation_matrix: Matrix3<i16>,
}
/// Fixed point conversion Q2.13
///
/// Optimized routine for *all same curves* matrix shaper.
pub(crate) struct TransformMatrixShaperFixedPointOpt<R, W, T, const LINEAR_CAP: usize> {
pub(crate) linear: Box<[R; LINEAR_CAP]>,
pub(crate) gamma: Box<[T; 65536]>,
pub(crate) adaptation_matrix: Matrix3<W>,
}
#[allow(unused)]
struct TransformMatrixShaperQ2_13<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPoint<i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
#[allow(unused)]
struct TransformMatrixShaperQ2_13Optimized<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i16, i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
#[allow(unused)]
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformMatrixShaperQ2_13<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let max_colors: T = ((1 << self.bit_depth as u32) - 1u32).as_();
let rnd: i32 = (1i32 << (PRECISION - 1));
let v_gamma_max = GAMMA_LUT as i32 - 1;
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = r as i32 * transform.v[0][0] as i32
+ g as i32 * transform.v[0][1] as i32
+ b as i32 * transform.v[0][2] as i32
+ rnd;
let r_q2_13 = (new_r >> PRECISION).min(v_gamma_max).max(0) as u16;
let new_g = r as i32 * transform.v[1][0] as i32
+ g as i32 * transform.v[1][1] as i32
+ b as i32 * transform.v[1][2] as i32
+ rnd;
let g_q2_13 = (new_g >> PRECISION).min(v_gamma_max).max(0) as u16;
let new_b = r as i32 * transform.v[2][0] as i32
+ g as i32 * transform.v[2][1] as i32
+ b as i32 * transform.v[2][2] as i32
+ rnd;
let b_q2_13 = (new_b >> PRECISION).min(v_gamma_max).max(0) as u16;
dst[dst_cn.r_i()] = self.profile.r_gamma[r_q2_13 as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[g_q2_13 as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[b_q2_13 as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}
#[allow(unused)]
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformMatrixShaperQ2_13Optimized<
T,
SRC_LAYOUT,
DST_LAYOUT,
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let max_colors: T = ((1 << self.bit_depth as u32) - 1u32).as_();
let rnd: i32 = (1i32 << (PRECISION - 1));
let v_gamma_max = GAMMA_LUT as i32 - 1;
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.profile.linear[src[src_cn.r_i()]._as_usize()];
let g = self.profile.linear[src[src_cn.g_i()]._as_usize()];
let b = self.profile.linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = r as i32 * transform.v[0][0] as i32
+ g as i32 * transform.v[0][1] as i32
+ b as i32 * transform.v[0][2] as i32
+ rnd;
let r_q2_13 = (new_r >> PRECISION).min(v_gamma_max).max(0) as u16;
let new_g = r as i32 * transform.v[1][0] as i32
+ g as i32 * transform.v[1][1] as i32
+ b as i32 * transform.v[1][2] as i32
+ rnd;
let g_q2_13 = (new_g >> PRECISION).min(v_gamma_max).max(0) as u16;
let new_b = r as i32 * transform.v[2][0] as i32
+ g as i32 * transform.v[2][1] as i32
+ b as i32 * transform.v[2][2] as i32
+ rnd;
let b_q2_13 = (new_b >> PRECISION).min(v_gamma_max).max(0) as u16;
dst[dst_cn.r_i()] = self.profile.gamma[r_q2_13 as usize];
dst[dst_cn.g_i()] = self.profile.gamma[g_q2_13 as usize];
dst[dst_cn.b_i()] = self.profile.gamma[b_q2_13 as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}
macro_rules! create_rgb_xyz_dependant_q2_13_executor {
($dep_name: ident, $dependant: ident, $resolution: ident, $shaper: ident) => {
pub(crate) fn $dep_name<
T: Clone + Send + Sync + AsPrimitive<usize> + Default + PointeeSizeExpressible,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
const PRECISION: i32,
>(
src_layout: Layout,
dst_layout: Layout,
profile: $shaper<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
{
let q2_13_profile =
profile.to_q2_13_n::<$resolution, PRECISION, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>();
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
> {
profile: q2_13_profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
> {
profile: q2_13_profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
> {
profile: q2_13_profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
PRECISION,
> {
profile: q2_13_profile,
bit_depth: BIT_DEPTH,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
};
}
#[cfg(all(target_arch = "aarch64", feature = "neon"))]
macro_rules! create_rgb_xyz_dependant_q1_30_executor {
($dep_name: ident, $dependant: ident, $resolution: ident, $shaper: ident) => {
pub(crate) fn $dep_name<
T: Clone + Send + Sync + AsPrimitive<usize> + Default + PointeeSizeExpressible,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const BIT_DEPTH: usize,
const PRECISION: i32,
>(
src_layout: Layout,
dst_layout: Layout,
profile: $shaper<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
{
let q1_30_profile =
profile.to_q1_30_n::<$resolution, PRECISION, LINEAR_CAP, GAMMA_LUT, BIT_DEPTH>();
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
> {
profile: q1_30_profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
> {
profile: q1_30_profile,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
> {
profile: q1_30_profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new($dependant::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
GAMMA_LUT,
BIT_DEPTH,
PRECISION,
> {
profile: q1_30_profile,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
};
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
use crate::conversions::neon::{
TransformShaperQ1_30NeonOpt, TransformShaperQ2_13Neon, TransformShaperQ2_13NeonOpt,
};
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13,
TransformShaperQ2_13Neon,
i16,
TransformMatrixShaper
);
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_opt,
TransformShaperQ2_13NeonOpt,
i16,
TransformMatrixShaperOptimized
);
#[cfg(all(target_arch = "aarch64", target_feature = "neon", feature = "neon"))]
create_rgb_xyz_dependant_q1_30_executor!(
make_rgb_xyz_q1_30_opt,
TransformShaperQ1_30NeonOpt,
i32,
TransformMatrixShaperOptimized
);
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13,
TransformMatrixShaperQ2_13,
i16,
TransformMatrixShaper
);
#[cfg(not(all(target_arch = "aarch64", target_feature = "neon", feature = "neon")))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_opt,
TransformMatrixShaperQ2_13Optimized,
i16,
TransformMatrixShaperOptimized
);
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
use crate::conversions::sse::{TransformShaperQ2_13OptSse, TransformShaperQ2_13Sse};
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_transform_sse_41,
TransformShaperQ2_13Sse,
i32,
TransformMatrixShaper
);
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "sse"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_transform_sse_41_opt,
TransformShaperQ2_13OptSse,
i32,
TransformMatrixShaperOptimized
);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
use crate::conversions::avx::{TransformShaperRgbQ2_13Avx, TransformShaperRgbQ2_13OptAvx};
use crate::conversions::rgbxyz::TransformMatrixShaperOptimized;
use crate::transform::PointeeSizeExpressible;
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_transform_avx2,
TransformShaperRgbQ2_13Avx,
i32,
TransformMatrixShaper
);
#[cfg(all(target_arch = "x86_64", feature = "avx"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_transform_avx2_opt,
TransformShaperRgbQ2_13OptAvx,
i32,
TransformMatrixShaperOptimized
);
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
use crate::conversions::avx512::TransformShaperRgbQ2_13OptAvx512;
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
create_rgb_xyz_dependant_q2_13_executor!(
make_rgb_xyz_q2_13_transform_avx512_opt,
TransformShaperRgbQ2_13OptAvx512,
i32,
TransformMatrixShaperOptimized
);

View File

@@ -0,0 +1,332 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::trc::ToneCurveEvaluator;
use crate::{CmsError, Layout, Matrix3f, PointeeSizeExpressible, Rgb, TransformExecutor};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) struct TransformShaperRgbFloat<T: Clone, const BUCKET: usize> {
pub(crate) r_linear: Box<[f32; BUCKET]>,
pub(crate) g_linear: Box<[f32; BUCKET]>,
pub(crate) b_linear: Box<[f32; BUCKET]>,
pub(crate) gamma_evaluator: Box<dyn ToneCurveEvaluator + Send + Sync>,
pub(crate) adaptation_matrix: Matrix3f,
pub(crate) phantom_data: PhantomData<T>,
}
pub(crate) struct TransformShaperFloatInOut<T: Clone> {
pub(crate) linear_evaluator: Box<dyn ToneCurveEvaluator + Send + Sync>,
pub(crate) gamma_evaluator: Box<dyn ToneCurveEvaluator + Send + Sync>,
pub(crate) adaptation_matrix: Matrix3f,
pub(crate) phantom_data: PhantomData<T>,
}
struct TransformShaperFloatScalar<
T: Clone,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const BIT_DEPTH: usize,
> {
pub(crate) profile: TransformShaperRgbFloat<T, LINEAR_CAP>,
}
struct TransformShaperRgbFloatInOut<T: Clone, const SRC_LAYOUT: u8, const DST_LAYOUT: u8> {
pub(crate) profile: TransformShaperFloatInOut<T>,
pub(crate) bit_depth: usize,
}
pub(crate) fn make_rgb_xyz_rgb_transform_float<
T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default,
const LINEAR_CAP: usize,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformShaperRgbFloat<T, LINEAR_CAP>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformShaperFloatScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformShaperFloatScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
LINEAR_CAP,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformShaperFloatScalar::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
BIT_DEPTH,
> {
profile,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformShaperFloatScalar::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
LINEAR_CAP,
BIT_DEPTH,
> {
profile,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
pub(crate) fn make_rgb_xyz_rgb_transform_float_in_out<
T: Clone + Send + Sync + PointeeSizeExpressible + 'static + Copy + Default + AsPrimitive<f32>,
const BIT_DEPTH: usize,
>(
src_layout: Layout,
dst_layout: Layout,
profile: TransformShaperFloatInOut<T>,
) -> Result<Box<dyn TransformExecutor<T> + Send + Sync>, CmsError>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformShaperRgbFloatInOut::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgba as u8 },
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgba) {
return Ok(Box::new(TransformShaperRgbFloatInOut::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgba as u8 },
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgba) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformShaperRgbFloatInOut::<
T,
{ Layout::Rgba as u8 },
{ Layout::Rgb as u8 },
> {
profile,
bit_depth: BIT_DEPTH,
}));
} else if (src_layout == Layout::Rgb) && (dst_layout == Layout::Rgb) {
return Ok(Box::new(TransformShaperRgbFloatInOut::<
T,
{ Layout::Rgb as u8 },
{ Layout::Rgb as u8 },
> {
profile,
bit_depth: BIT_DEPTH,
}));
}
Err(CmsError::UnsupportedProfileConnection)
}
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const BIT_DEPTH: usize,
> TransformExecutor<T>
for TransformShaperFloatScalar<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, BIT_DEPTH>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
use crate::mlaf::mlaf;
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let max_colors: T = ((1 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let r = self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let g = self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let b = self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = mlaf(
mlaf(r * transform.v[0][0], g, transform.v[0][1]),
b,
transform.v[0][2],
);
let new_g = mlaf(
mlaf(r * transform.v[1][0], g, transform.v[1][1]),
b,
transform.v[1][2],
);
let new_b = mlaf(
mlaf(r * transform.v[2][0], g, transform.v[2][1]),
b,
transform.v[2][2],
);
let mut rgb = Rgb::new(new_r, new_g, new_b);
rgb = self.profile.gamma_evaluator.evaluate_tristimulus(rgb);
dst[dst_cn.r_i()] = rgb.r.as_();
dst[dst_cn.g_i()] = rgb.g.as_();
dst[dst_cn.b_i()] = rgb.b.as_();
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}
impl<
T: Clone + PointeeSizeExpressible + Copy + Default + 'static + AsPrimitive<f32>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
> TransformExecutor<T> for TransformShaperRgbFloatInOut<T, SRC_LAYOUT, DST_LAYOUT>
where
u32: AsPrimitive<T>,
f32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
use crate::mlaf::mlaf;
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let transform = self.profile.adaptation_matrix;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let mut src_rgb = Rgb::new(
src[src_cn.r_i()].as_(),
src[src_cn.g_i()].as_(),
src[src_cn.b_i()].as_(),
);
src_rgb = self.profile.linear_evaluator.evaluate_tristimulus(src_rgb);
let r = src_rgb.r;
let g = src_rgb.g;
let b = src_rgb.b;
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
let new_r = mlaf(
mlaf(r * transform.v[0][0], g, transform.v[0][1]),
b,
transform.v[0][2],
);
let new_g = mlaf(
mlaf(r * transform.v[1][0], g, transform.v[1][1]),
b,
transform.v[1][2],
);
let new_b = mlaf(
mlaf(r * transform.v[2][0], g, transform.v[2][1]),
b,
transform.v[2][2],
);
let mut rgb = Rgb::new(new_r, new_g, new_b);
rgb = self.profile.gamma_evaluator.evaluate_tristimulus(rgb);
dst[dst_cn.r_i()] = rgb.r.as_();
dst[dst_cn.g_i()] = rgb.g.as_();
dst[dst_cn.b_i()] = rgb.b.as_();
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
Ok(())
}
}

View File

@@ -0,0 +1,457 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::interpolator::BarycentricWeight;
use crate::math::FusedMultiplyAdd;
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::ops::{Add, Mul, Sub};
#[repr(align(16), C)]
pub(crate) struct SseAlignedF32(pub(crate) [f32; 4]);
#[cfg(feature = "options")]
pub(crate) struct TetrahedralSse<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedF32],
}
#[cfg(feature = "options")]
pub(crate) struct PyramidalSse<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedF32],
}
#[cfg(feature = "options")]
pub(crate) struct PrismaticSse<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedF32],
}
pub(crate) struct TrilinearSse<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedF32],
}
trait Fetcher<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
#[derive(Copy, Clone)]
#[repr(transparent)]
pub(crate) struct SseVector {
pub(crate) v: __m128,
}
impl From<f32> for SseVector {
#[inline(always)]
fn from(v: f32) -> Self {
SseVector {
v: unsafe { _mm_set1_ps(v) },
}
}
}
impl Sub<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_sub_ps(self.v, rhs.v) },
}
}
}
impl Add<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn add(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_add_ps(self.v, rhs.v) },
}
}
}
impl Mul<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_mul_ps(self.v, rhs.v) },
}
}
}
impl FusedMultiplyAdd<SseVector> for SseVector {
#[inline(always)]
fn mla(&self, b: SseVector, c: SseVector) -> SseVector {
SseVector {
v: unsafe { _mm_add_ps(self.v, _mm_mul_ps(b.v, c.v)) },
}
}
}
struct TetrahedralSseFetchVector<'a, const GRID_SIZE: usize> {
cube: &'a [SseAlignedF32],
}
impl<const GRID_SIZE: usize> Fetcher<SseVector> for TetrahedralSseFetchVector<'_, GRID_SIZE> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> SseVector {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx = unsafe { self.cube.get_unchecked(offset..) };
SseVector {
v: unsafe { _mm_load_ps(jx.as_ptr() as *const _) },
}
}
}
pub(crate) trait SseMdInterpolation<'a, const GRID_SIZE: usize> {
fn new(table: &'a [SseAlignedF32]) -> Self;
fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> SseVector;
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> TetrahedralSse<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, SseVector::from(rx));
let s1 = s0.mla(c2, SseVector::from(ry));
s1.mla(c3, SseVector::from(rz))
}
}
macro_rules! define_inter_sse {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> SseMdInterpolation<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline]
fn new(table: &'a [SseAlignedF32]) -> Self {
Self { cube: table }
}
#[inline(always)]
fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
) -> SseVector {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralSseFetchVector::<GRID_SIZE> { cube: self.cube },
)
}
}
};
}
#[cfg(feature = "options")]
define_inter_sse!(TetrahedralSse);
#[cfg(feature = "options")]
define_inter_sse!(PyramidalSse);
#[cfg(feature = "options")]
define_inter_sse!(PrismaticSse);
define_inter_sse!(TrilinearSse);
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PyramidalSse<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(dr * dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(dg * db))
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(db * dr))
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PrismaticSse<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if db > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x, y_n, z_n);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
let s3 = s2.mla(c4, SseVector::from(dg * db));
s3.mla(c5, SseVector::from(dr * dg))
} else {
let x0 = r.fetch(x_n, y, z);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x_n, y_n, z);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
let s3 = s2.mla(c4, SseVector::from(dg * db));
s3.mla(c5, SseVector::from(dr * dg))
}
}
}
impl<const GRID_SIZE: usize> TrilinearSse<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<f32>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let w0 = SseVector::from(dr);
let w1 = SseVector::from(dg);
let w2 = SseVector::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let dx = SseVector::from(1.0 - dr);
let c00 = (c000 * dx).mla(c100, w0);
let c10 = (c010 * dx).mla(c110, w0);
let c01 = (c001 * dx).mla(c101, w0);
let c11 = (c011 * dx).mla(c111, w0);
let dy = SseVector::from(1.0 - dg);
let c0 = (c00 * dy).mla(c10, w1);
let c1 = (c01 * dy).mla(c11, w1);
let dz = SseVector::from(1.0 - db);
(c0 * dz).mla(c1, w2)
}
}

View File

@@ -0,0 +1,456 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::interpolator::BarycentricWeight;
use crate::math::FusedMultiplyAdd;
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::ops::{Add, Mul, Sub};
#[repr(align(8), C)]
pub(crate) struct SseAlignedI16x4(pub(crate) [i16; 4]);
#[cfg(feature = "options")]
pub(crate) struct TetrahedralSseQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PyramidalSseQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedI16x4],
}
#[cfg(feature = "options")]
pub(crate) struct PrismaticSseQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedI16x4],
}
pub(crate) struct TrilinearSseQ0_15<'a, const GRID_SIZE: usize> {
pub(crate) cube: &'a [SseAlignedI16x4],
}
trait Fetcher<T> {
fn fetch(&self, x: i32, y: i32, z: i32) -> T;
}
#[derive(Copy, Clone)]
#[repr(transparent)]
pub(crate) struct SseVector {
pub(crate) v: __m128i,
}
impl From<i16> for SseVector {
#[inline(always)]
fn from(v: i16) -> Self {
SseVector {
v: unsafe { _mm_set1_epi16(v) },
}
}
}
impl Sub<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_sub_epi16(self.v, rhs.v) },
}
}
}
impl Add<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn add(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_add_epi16(self.v, rhs.v) },
}
}
}
impl Mul<SseVector> for SseVector {
type Output = Self;
#[inline(always)]
fn mul(self, rhs: SseVector) -> Self::Output {
SseVector {
v: unsafe { _mm_mulhrs_epi16(self.v, rhs.v) },
}
}
}
impl FusedMultiplyAdd<SseVector> for SseVector {
#[inline(always)]
fn mla(&self, b: SseVector, c: SseVector) -> SseVector {
SseVector {
v: unsafe { _mm_add_epi16(self.v, _mm_mulhrs_epi16(b.v, c.v)) },
}
}
}
struct TetrahedralSseQ0_15FetchVector<'a, const GRID_SIZE: usize> {
cube: &'a [SseAlignedI16x4],
}
impl<const GRID_SIZE: usize> Fetcher<SseVector> for TetrahedralSseQ0_15FetchVector<'_, GRID_SIZE> {
#[inline(always)]
fn fetch(&self, x: i32, y: i32, z: i32) -> SseVector {
let offset = (x as u32 * (GRID_SIZE as u32 * GRID_SIZE as u32)
+ y as u32 * GRID_SIZE as u32
+ z as u32) as usize;
let jx = unsafe { self.cube.get_unchecked(offset..) };
SseVector {
v: unsafe { _mm_loadu_si64(jx.as_ptr() as *const _) },
}
}
}
pub(crate) trait SseMdInterpolationQ0_15<'a, const GRID_SIZE: usize> {
fn new(table: &'a [SseAlignedI16x4]) -> Self;
fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> SseVector;
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> TetrahedralSseQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let rx = lut_r.w;
let ry = lut_g.w;
let rz = lut_b.w;
let c0 = r.fetch(x, y, z);
let c2;
let c1;
let c3;
if rx >= ry {
if ry >= rz {
//rx >= ry && ry >= rz
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z) - r.fetch(x_n, y, z);
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if rx >= rz {
//rx >= rz && rz >= ry
c1 = r.fetch(x_n, y, z) - c0;
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x_n, y, z_n) - r.fetch(x_n, y, z);
} else {
//rz > rx && rx >= ry
c1 = r.fetch(x_n, y, z_n) - r.fetch(x, y, z_n);
c2 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
} else if rx >= rz {
//ry > rx && rx >= rz
c1 = r.fetch(x_n, y_n, z) - r.fetch(x, y_n, z);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x_n, y_n, z_n) - r.fetch(x_n, y_n, z);
} else if ry >= rz {
//ry >= rz && rz > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z) - c0;
c3 = r.fetch(x, y_n, z_n) - r.fetch(x, y_n, z);
} else {
//rz > ry && ry > rx
c1 = r.fetch(x_n, y_n, z_n) - r.fetch(x, y_n, z_n);
c2 = r.fetch(x, y_n, z_n) - r.fetch(x, y, z_n);
c3 = r.fetch(x, y, z_n) - c0;
}
let s0 = c0.mla(c1, SseVector::from(rx));
let s1 = s0.mla(c2, SseVector::from(ry));
s1.mla(c3, SseVector::from(rz))
}
}
macro_rules! define_inter_sse {
($interpolator: ident) => {
impl<'a, const GRID_SIZE: usize> SseMdInterpolationQ0_15<'a, GRID_SIZE>
for $interpolator<'a, GRID_SIZE>
{
#[inline]
fn new(table: &'a [SseAlignedI16x4]) -> Self {
Self { cube: table }
}
#[inline(always)]
fn inter3_sse<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
) -> SseVector {
self.interpolate(
in_r,
in_g,
in_b,
lut,
TetrahedralSseQ0_15FetchVector::<GRID_SIZE> { cube: self.cube },
)
}
}
};
}
#[cfg(feature = "options")]
define_inter_sse!(TetrahedralSseQ0_15);
#[cfg(feature = "options")]
define_inter_sse!(PyramidalSseQ0_15);
#[cfg(feature = "options")]
define_inter_sse!(PrismaticSseQ0_15);
define_inter_sse!(TrilinearSseQ0_15);
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PyramidalSseQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if dr > db && dg > db {
let x0 = r.fetch(x_n, y_n, z_n);
let x1 = r.fetch(x_n, y_n, z);
let x2 = r.fetch(x_n, y, z);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - x1;
let c2 = x2 - c0;
let c3 = x3 - c0;
let c4 = c0 - x3 - x2 + x1;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(dr) * SseVector::from(dg))
} else if db > dr && dg > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y_n, z_n);
let x2 = r.fetch(x, y_n, z_n);
let x3 = r.fetch(x, y_n, z);
let c1 = x0 - c0;
let c2 = x1 - x2;
let c3 = x3 - c0;
let c4 = c0 - x3 - x0 + x2;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(dg) * SseVector::from(db))
} else {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z);
let x2 = r.fetch(x_n, y, z_n);
let x3 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - c0;
let c3 = x3 - x2;
let c4 = c0 - x1 - x0 + x2;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
s2.mla(c4, SseVector::from(db) * SseVector::from(dr))
}
}
}
#[cfg(feature = "options")]
impl<const GRID_SIZE: usize> PrismaticSseQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
let c0 = r.fetch(x, y, z);
if db > dr {
let x0 = r.fetch(x, y, z_n);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x, y_n, z_n);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x0 - c0;
let c2 = x1 - x0;
let c3 = x2 - c0;
let c4 = c0 - x2 - x0 + x3;
let c5 = x0 - x3 - x1 + x4;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
let s3 = s2.mla(c4, SseVector::from(dg) * SseVector::from(db));
s3.mla(c5, SseVector::from(dr) * SseVector::from(dg))
} else {
let x0 = r.fetch(x_n, y, z);
let x1 = r.fetch(x_n, y, z_n);
let x2 = r.fetch(x, y_n, z);
let x3 = r.fetch(x_n, y_n, z);
let x4 = r.fetch(x_n, y_n, z_n);
let c1 = x1 - x0;
let c2 = x0 - c0;
let c3 = x2 - c0;
let c4 = x0 - x3 - x1 + x4;
let c5 = c0 - x2 - x0 + x3;
let s0 = c0.mla(c1, SseVector::from(db));
let s1 = s0.mla(c2, SseVector::from(dr));
let s2 = s1.mla(c3, SseVector::from(dg));
let s3 = s2.mla(c4, SseVector::from(dg) * SseVector::from(db));
s3.mla(c5, SseVector::from(dr) * SseVector::from(dg))
}
}
}
impl<const GRID_SIZE: usize> TrilinearSseQ0_15<'_, GRID_SIZE> {
#[inline(always)]
fn interpolate<U: AsPrimitive<usize>, const BINS: usize>(
&self,
in_r: U,
in_g: U,
in_b: U,
lut: &[BarycentricWeight<i16>; BINS],
r: impl Fetcher<SseVector>,
) -> SseVector {
let lut_r = lut[in_r.as_()];
let lut_g = lut[in_g.as_()];
let lut_b = lut[in_b.as_()];
let x: i32 = lut_r.x;
let y: i32 = lut_g.x;
let z: i32 = lut_b.x;
let x_n: i32 = lut_r.x_n;
let y_n: i32 = lut_g.x_n;
let z_n: i32 = lut_b.x_n;
let dr = lut_r.w;
let dg = lut_g.w;
let db = lut_b.w;
const Q_MAX: i16 = ((1i32 << 15i32) - 1) as i16;
let q_max = SseVector::from(Q_MAX);
let w0 = SseVector::from(dr);
let w1 = SseVector::from(dg);
let w2 = SseVector::from(db);
let dx = q_max - SseVector::from(dr);
let dy = q_max - SseVector::from(dg);
let dz = q_max - SseVector::from(db);
let c000 = r.fetch(x, y, z);
let c100 = r.fetch(x_n, y, z);
let c010 = r.fetch(x, y_n, z);
let c110 = r.fetch(x_n, y_n, z);
let c001 = r.fetch(x, y, z_n);
let c101 = r.fetch(x_n, y, z_n);
let c011 = r.fetch(x, y_n, z_n);
let c111 = r.fetch(x_n, y_n, z_n);
let c00 = (c000 * dx).mla(c100, w0);
let c10 = (c010 * dx).mla(c110, w0);
let c01 = (c001 * dx).mla(c101, w0);
let c11 = (c011 * dx).mla(c111, w0);
let c0 = (c00 * dy).mla(c10, w1);
let c1 = (c01 * dy).mla(c11, w1);
(c0 * dz).mla(c1, w2)
}
}

View File

@@ -0,0 +1,330 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut4x3Factory;
use crate::conversions::sse::interpolator::*;
use crate::conversions::sse::interpolator_q0_15::SseAlignedI16x4;
use crate::conversions::sse::lut4_to_3_q0_15::TransformLut4To3SseQ0_15;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::marker::PhantomData;
struct TransformLut4To3Sse<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<SseAlignedF32>,
_phantom: PhantomData<T>,
_phantom1: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3Sse<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "sse4.1")]
unsafe fn transform_chunk<'b, Interpolator: SseMdInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) };
let max_value = ((1 << BIT_DEPTH) - 1u32).as_();
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
let t: f32 = k_weights.w;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let tetrahedral1 = Interpolator::new(table1);
let tetrahedral2 = Interpolator::new(table2);
let a0 = tetrahedral1.inter3_sse(c, m, y, &self.weights).v;
let b0 = tetrahedral2.inter3_sse(c, m, y, &self.weights).v;
if T::FINITE {
unsafe {
let t0 = _mm_set1_ps(t);
let ones = _mm_set1_ps(1f32);
let hp = _mm_mul_ps(a0, _mm_sub_ps(ones, t0));
let mut v = _mm_add_ps(_mm_mul_ps(b0, t0), hp);
v = _mm_max_ps(v, _mm_setzero_ps());
v = _mm_mul_ps(v, value_scale);
v = _mm_min_ps(v, value_scale);
let jvz = _mm_cvtps_epi32(v);
let x = _mm_extract_epi32::<0>(jvz);
let y = _mm_extract_epi32::<1>(jvz);
let z = _mm_extract_epi32::<2>(jvz);
dst[cn.r_i()] = (x as u32).as_();
dst[cn.g_i()] = (y as u32).as_();
dst[cn.b_i()] = (z as u32).as_();
}
} else {
unsafe {
let t0 = _mm_set1_ps(t);
let ones = _mm_set1_ps(1f32);
let hp = _mm_mul_ps(a0, _mm_sub_ps(ones, t0));
let v = _mm_add_ps(_mm_mul_ps(b0, t0), hp);
dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v) as u32).as_();
dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v) as u32).as_();
dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v) as u32).as_();
}
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3Sse<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearSse<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralSse<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalSse<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticSse<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearSse<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}
pub(crate) struct SseLut4x3Factory {}
impl Lut4x3Factory for SseLut4x3Factory {
fn make_transform_4x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point && BIT_DEPTH < 16 {
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
SseAlignedI16x4([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut4To3SseQ0_15::<
T,
u8,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut4To3SseQ0_15::<
T,
u16,
LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
_phantom: PhantomData,
_phantom1: PhantomData,
color_space,
is_linear,
}),
};
}
let lut = lut
.chunks_exact(3)
.map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => {
Box::new(
TransformLut4To3Sse::<T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
},
)
}
#[cfg(feature = "options")]
BarycentricWeightScale::High => {
Box::new(
TransformLut4To3Sse::<T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
},
)
}
}
}
}

View File

@@ -0,0 +1,212 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::sse::interpolator_q0_15::*;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut4To3SseQ0_15<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<SseAlignedI16x4>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3SseQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "sse4.1")]
unsafe fn transform_chunk<'b, Interpolator: SseMdInterpolationQ0_15<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
_mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
_mm_set1_epi16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
const Q: i16 = ((1i32 << 15) - 1) as i16;
let t: i16 = k_weights.w;
let t_n: i16 = Q - t;
let table1 = &self.lut[(w * grid_size3) as usize..];
let table2 = &self.lut[(w_n * grid_size3) as usize..];
let tetrahedral1 = Interpolator::new(table1);
let tetrahedral2 = Interpolator::new(table2);
let a0 = tetrahedral1.inter3_sse(c, m, y, &self.weights).v;
let b0 = tetrahedral2.inter3_sse(c, m, y, &self.weights).v;
let hp = _mm_mulhrs_epi16(_mm_set1_epi16(t_n), a0);
let v = _mm_add_epi16(hp, _mm_mulhrs_epi16(b0, _mm_set1_epi16(t)));
if T::FINITE {
let mut o = _mm_max_epi16(v, _mm_setzero_si128());
o = _mm_min_epi16(o, v_max_scale);
let x = _mm_extract_epi16::<0>(o);
let y = _mm_extract_epi16::<1>(o);
let z = _mm_extract_epi16::<2>(o);
dst[cn.r_i()] = (x as u32).as_();
dst[cn.g_i()] = (y as u32).as_();
dst[cn.b_i()] = (z as u32).as_();
} else {
let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v));
r = _mm_mul_ps(r, f_value_scale);
dst[cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_();
dst[cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_();
dst[cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_();
}
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3SseQ0_15<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearSseQ0_15<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralSseQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalSseQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticSseQ0_15<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearSseQ0_15<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,45 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
mod interpolator;
mod interpolator_q0_15;
mod lut4_to_3;
mod lut4_to_3_q0_15;
mod rgb_xyz;
mod rgb_xyz_opt;
mod rgb_xyz_q2_13;
mod rgb_xyz_q2_13_opt;
mod t_lut3_to_3;
mod t_lut3_to_3_q0_15;
pub(crate) use lut4_to_3::SseLut4x3Factory;
pub(crate) use rgb_xyz::TransformShaperRgbSse;
pub(crate) use rgb_xyz_opt::TransformShaperRgbOptSse;
pub(crate) use rgb_xyz_q2_13::TransformShaperQ2_13Sse;
pub(crate) use rgb_xyz_q2_13_opt::TransformShaperQ2_13OptSse;
pub(crate) use t_lut3_to_3::SseLut3x3Factory;

View File

@@ -0,0 +1,154 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaper;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
#[repr(align(16), C)]
pub(crate) struct SseAlignedU16(pub(crate) [u16; 8]);
pub(crate) struct TransformShaperRgbSse<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaper<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformShaperRgbSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "sse4.1")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary = SseAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm_setr_ps(t.v[0][0], t.v[0][1], t.v[0][2], 0f32);
let m1 = _mm_setr_ps(t.v[1][0], t.v[1][1], t.v[1][2], 0f32);
let m2 = _mm_setr_ps(t.v[2][0], t.v[2][1], t.v[2][2], 0f32);
let zeros = _mm_setzero_ps();
let v_scale = _mm_set1_ps(scale);
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let mut r = _mm_load_ss(rp);
let mut g = _mm_load_ss(gp);
let mut b = _mm_load_ss(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
r = _mm_shuffle_ps::<0>(r, r);
g = _mm_shuffle_ps::<0>(g, g);
b = _mm_shuffle_ps::<0>(b, b);
let v0 = _mm_mul_ps(r, m0);
let v1 = _mm_mul_ps(g, m1);
let v2 = _mm_mul_ps(b, m2);
let mut v = _mm_add_ps(_mm_add_ps(v0, v1), v2);
v = _mm_max_ps(v, zeros);
v = _mm_mul_ps(v, v_scale);
v = _mm_min_ps(v, v_scale);
let zx = _mm_cvtps_epi32(v);
_mm_store_si128(temporary.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T> for TransformShaperRgbSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,153 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::TransformMatrixShaperOptimized;
use crate::conversions::sse::rgb_xyz::SseAlignedU16;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
pub(crate) struct TransformShaperRgbOptSse<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> {
pub(crate) profile: TransformMatrixShaperOptimized<T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformShaperRgbOptSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "sse4.1")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary = SseAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let scale = (GAMMA_LUT - 1) as f32;
let max_colors: T = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm_setr_ps(t.v[0][0], t.v[0][1], t.v[0][2], 0f32);
let m1 = _mm_setr_ps(t.v[1][0], t.v[1][1], t.v[1][2], 0f32);
let m2 = _mm_setr_ps(t.v[2][0], t.v[2][1], t.v[2][2], 0f32);
let zeros = _mm_setzero_ps();
let v_scale = _mm_set1_ps(scale);
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.linear[src[src_cn.b_i()]._as_usize()];
let mut r = _mm_load_ss(rp);
let mut g = _mm_load_ss(gp);
let mut b = _mm_load_ss(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
r = _mm_shuffle_ps::<0>(r, r);
g = _mm_shuffle_ps::<0>(g, g);
b = _mm_shuffle_ps::<0>(b, b);
let v0 = _mm_mul_ps(r, m0);
let v1 = _mm_mul_ps(g, m1);
let v2 = _mm_mul_ps(b, m2);
let mut v = _mm_add_ps(_mm_add_ps(v0, v1), v2);
v = _mm_max_ps(v, zeros);
v = _mm_mul_ps(v, v_scale);
v = _mm_min_ps(v, v_scale);
let zx = _mm_cvtps_epi32(v);
_mm_store_si128(temporary.0.as_mut_ptr() as *mut _, zx);
dst[dst_cn.r_i()] = self.profile.gamma[temporary.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Clone + Copy + 'static + PointeeSizeExpressible + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
> TransformExecutor<T>
for TransformShaperRgbOptSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,167 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPoint;
use crate::conversions::sse::rgb_xyz::SseAlignedU16;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
pub(crate) struct TransformShaperQ2_13Sse<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPoint<i32, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
#[inline(always)]
pub(crate) unsafe fn _xmm_load_epi32(f: &i32) -> __m128i {
let float_ref: &f32 = unsafe { &*(f as *const i32 as *const f32) };
unsafe { _mm_castps_si128(_mm_load_ss(float_ref)) }
}
impl<
T: Copy + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformShaperQ2_13Sse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "sse4.1")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary = SseAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm_setr_epi16(
t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0,
);
let m2 = _mm_setr_epi16(t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0);
let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes();
let rnd = _mm_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]]));
let v_max_value = _mm_set1_epi32(GAMMA_LUT as i32 - 1);
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.r_linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.g_linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.b_linear[src[src_cn.b_i()]._as_usize()];
let mut r = _xmm_load_epi32(rp);
let mut g = _xmm_load_epi32(gp);
let mut b = _xmm_load_epi32(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
r = _mm_shuffle_epi32::<0>(r);
g = _mm_shuffle_epi32::<0>(g);
b = _mm_shuffle_epi32::<0>(b);
g = _mm_slli_epi32::<16>(g);
let zrg0 = _mm_or_si128(r, g);
let zbz0 = _mm_or_si128(b, rnd);
let v0 = _mm_madd_epi16(zrg0, m0);
let v1 = _mm_madd_epi16(zbz0, m2);
let mut v = _mm_add_epi32(v0, v1);
v = _mm_srai_epi32::<PRECISION>(v);
v = _mm_max_epi32(v, _mm_setzero_si128());
v = _mm_min_epi32(v, v_max_value);
_mm_store_si128(temporary.0.as_mut_ptr() as *mut _, v);
dst[dst_cn.r_i()] = self.profile.r_gamma[temporary.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.g_gamma[temporary.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.b_gamma[temporary.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperQ2_13Sse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,162 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::rgbxyz_fixed::TransformMatrixShaperFixedPointOpt;
use crate::conversions::sse::rgb_xyz::SseAlignedU16;
use crate::conversions::sse::rgb_xyz_q2_13::_xmm_load_epi32;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
pub(crate) struct TransformShaperQ2_13OptSse<
T: Copy,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> {
pub(crate) profile: TransformMatrixShaperFixedPointOpt<i32, i16, T, LINEAR_CAP>,
pub(crate) bit_depth: usize,
}
impl<
T: Copy + PointeeSizeExpressible + 'static,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformShaperQ2_13OptSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
#[target_feature(enable = "sse4.1")]
unsafe fn transform_impl(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let dst_cn = Layout::from(DST_LAYOUT);
let src_channels = src_cn.channels();
let dst_channels = dst_cn.channels();
let mut temporary = SseAlignedU16([0; 8]);
if src.len() / src_channels != dst.len() / dst_channels {
return Err(CmsError::LaneSizeMismatch);
}
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let t = self.profile.adaptation_matrix.transpose();
let max_colors = ((1 << self.bit_depth) - 1).as_();
unsafe {
let m0 = _mm_setr_epi16(
t.v[0][0], t.v[1][0], t.v[0][1], t.v[1][1], t.v[0][2], t.v[1][2], 0, 0,
);
let m2 = _mm_setr_epi16(t.v[2][0], 1, t.v[2][1], 1, t.v[2][2], 1, 0, 0);
let rnd_val = ((1i32 << (PRECISION - 1)) as i16).to_ne_bytes();
let rnd = _mm_set1_epi32(i32::from_ne_bytes([0, 0, rnd_val[0], rnd_val[1]]));
let v_max_value = _mm_set1_epi32(GAMMA_LUT as i32 - 1);
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let rp = &self.profile.linear[src[src_cn.r_i()]._as_usize()];
let gp = &self.profile.linear[src[src_cn.g_i()]._as_usize()];
let bp = &self.profile.linear[src[src_cn.b_i()]._as_usize()];
let mut r = _xmm_load_epi32(rp);
let mut g = _xmm_load_epi32(gp);
let mut b = _xmm_load_epi32(bp);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_colors
};
r = _mm_shuffle_epi32::<0>(r);
g = _mm_shuffle_epi32::<0>(g);
b = _mm_shuffle_epi32::<0>(b);
g = _mm_slli_epi32::<16>(g);
let zrg0 = _mm_or_si128(r, g);
let zbz0 = _mm_or_si128(b, rnd);
let v0 = _mm_madd_epi16(zrg0, m0);
let v1 = _mm_madd_epi16(zbz0, m2);
let mut v = _mm_add_epi32(v0, v1);
v = _mm_srai_epi32::<PRECISION>(v);
v = _mm_max_epi32(v, _mm_setzero_si128());
v = _mm_min_epi32(v, v_max_value);
_mm_store_si128(temporary.0.as_mut_ptr() as *mut _, v);
dst[dst_cn.r_i()] = self.profile.gamma[temporary.0[0] as usize];
dst[dst_cn.g_i()] = self.profile.gamma[temporary.0[2] as usize];
dst[dst_cn.b_i()] = self.profile.gamma[temporary.0[4] as usize];
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
Ok(())
}
}
impl<
T: Copy + PointeeSizeExpressible + 'static + Default,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const LINEAR_CAP: usize,
const GAMMA_LUT: usize,
const PRECISION: i32,
> TransformExecutor<T>
for TransformShaperQ2_13OptSse<T, SRC_LAYOUT, DST_LAYOUT, LINEAR_CAP, GAMMA_LUT, PRECISION>
where
u32: AsPrimitive<T>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
unsafe { self.transform_impl(src, dst) }
}
}

View File

@@ -0,0 +1,343 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::lut_transforms::Lut3x3Factory;
use crate::conversions::sse::interpolator::*;
use crate::conversions::sse::interpolator_q0_15::SseAlignedI16x4;
use crate::conversions::sse::t_lut3_to_3_q0_15::TransformLut3x3SseQ0_15;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::marker::PhantomData;
struct TransformLut3x3Sse<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<SseAlignedF32>,
_phantom: PhantomData<T>,
_phantom2: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut3x3Sse<T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "sse4.1")]
unsafe fn transform_chunk<'b, Interpolator: SseMdInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let value_scale = unsafe { _mm_set1_ps(((1 << BIT_DEPTH) - 1) as f32) };
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_sse(x, y, z, &self.weights);
if T::FINITE {
unsafe {
let mut r = _mm_mul_ps(v.v, value_scale);
r = _mm_max_ps(r, _mm_setzero_ps());
r = _mm_min_ps(r, value_scale);
let jvz = _mm_cvtps_epi32(r);
let x = _mm_extract_epi32::<0>(jvz);
let y = _mm_extract_epi32::<1>(jvz);
let z = _mm_extract_epi32::<2>(jvz);
dst[dst_cn.r_i()] = (x as u32).as_();
dst[dst_cn.g_i()] = (y as u32).as_();
dst[dst_cn.b_i()] = (z as u32).as_();
}
} else {
unsafe {
dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(v.v) as u32).as_();
dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(v.v) as u32).as_();
dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(v.v) as u32).as_();
}
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3Sse<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearSse<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralSse<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalSse<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticSse<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearSse<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}
pub(crate) struct SseLut3x3Factory {}
impl Lut3x3Factory for SseLut3x3Factory {
fn make_transform_3x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
if options.prefer_fixed_point && BIT_DEPTH < 16 {
let q: f32 = if T::FINITE {
((1i32 << BIT_DEPTH as i32) - 1) as f32
} else {
((1i32 << 14i32) - 1) as f32
};
let lut = lut
.chunks_exact(3)
.map(|x| {
SseAlignedI16x4([
(x[0] * q).round() as i16,
(x[1] * q).round() as i16,
(x[2] * q).round() as i16,
0,
])
})
.collect::<Vec<_>>();
return match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3SseQ0_15::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3SseQ0_15::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<i16>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
};
}
let lut = lut
.chunks_exact(3)
.map(|x| SseAlignedF32([x[0], x[1], x[2], 0f32]))
.collect::<Vec<_>>();
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3Sse::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3Sse::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom2: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
}
}
}

View File

@@ -0,0 +1,225 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::BarycentricWeight;
use crate::conversions::sse::interpolator_q0_15::*;
use crate::transform::PointeeSizeExpressible;
use crate::{CmsError, DataColorSpace, InterpolationMethod, Layout, TransformExecutor};
use num_traits::AsPrimitive;
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::marker::PhantomData;
pub(crate) struct TransformLut3x3SseQ0_15<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<SseAlignedI16x4>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom2: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<i16>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
>
TransformLut3x3SseQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[allow(unused_unsafe)]
#[target_feature(enable = "sse4.1")]
unsafe fn transform_chunk<'b, Interpolator: SseMdInterpolationQ0_15<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
unsafe {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let f_value_scale = _mm_set1_ps(1. / ((1 << 14i32) - 1) as f32);
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
let v_max_scale = if T::FINITE {
_mm_set1_epi16(((1i32 << BIT_DEPTH) - 1) as i16)
} else {
_mm_set1_epi16(((1i32 << 14i32) - 1) as i16)
};
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Interpolator::new(&self.lut);
let v = tetrahedral.inter3_sse(x, y, z, &self.weights);
if T::FINITE {
let mut o = _mm_max_epi16(v.v, _mm_setzero_si128());
o = _mm_min_epi16(o, v_max_scale);
let x = _mm_extract_epi16::<0>(o);
let y = _mm_extract_epi16::<1>(o);
let z = _mm_extract_epi16::<2>(o);
dst[dst_cn.r_i()] = (x as u32).as_();
dst[dst_cn.g_i()] = (y as u32).as_();
dst[dst_cn.b_i()] = (z as u32).as_();
} else {
let mut r = _mm_cvtepi32_ps(_mm_cvtepi16_epi32(v.v));
r = _mm_mul_ps(r, f_value_scale);
dst[dst_cn.r_i()] = f32::from_bits(_mm_extract_ps::<0>(r) as u32).as_();
dst[dst_cn.g_i()] = f32::from_bits(_mm_extract_ps::<1>(r) as u32).as_();
dst[dst_cn.b_i()] = f32::from_bits(_mm_extract_ps::<2>(r) as u32).as_();
}
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3SseQ0_15<
T,
U,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
BINS,
BARYCENTRIC_BINS,
>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
unsafe {
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
self.transform_chunk::<TrilinearSseQ0_15<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
self.transform_chunk::<TetrahedralSseQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
self.transform_chunk::<PyramidalSseQ0_15<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
self.transform_chunk::<PrismaticSseQ0_15<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
self.transform_chunk::<TrilinearSseQ0_15<GRID_SIZE>>(src, dst);
}
}
}
}
Ok(())
}
}

View File

@@ -0,0 +1,261 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#![allow(dead_code)]
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::{BarycentricWeight, MultidimensionalInterpolation};
use crate::conversions::lut_transforms::Lut3x3Factory;
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) struct TransformLut3x3<
T,
U,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<f32>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<f32>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut3x3<T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[inline(always)]
fn transform_chunk<'b, Tetrahedral: MultidimensionalInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
let value_scale = ((1 << BIT_DEPTH) - 1) as f32;
let max_value = ((1u32 << BIT_DEPTH) - 1).as_();
for (src, dst) in src
.chunks_exact(src_channels)
.zip(dst.chunks_exact_mut(dst_channels))
{
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[src_cn.b_i()],
);
let a = if src_channels == 4 {
src[src_cn.a_i()]
} else {
max_value
};
let tetrahedral = Tetrahedral::new(&self.lut);
let v = tetrahedral.inter3(x, y, z, &self.weights);
if T::FINITE {
let r = v * value_scale + 0.5;
dst[dst_cn.r_i()] = r.v[0].min(value_scale).max(0.).as_();
dst[dst_cn.g_i()] = r.v[1].min(value_scale).max(0.).as_();
dst[dst_cn.b_i()] = r.v[2].min(value_scale).max(0.).as_();
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
} else {
dst[dst_cn.r_i()] = v.v[0].as_();
dst[dst_cn.g_i()] = v.v[1].as_();
dst[dst_cn.b_i()] = v.v[2].as_();
if dst_channels == 4 {
dst[dst_cn.a_i()] = a;
}
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x3<T, U, SRC_LAYOUT, DST_LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let src_cn = Layout::from(SRC_LAYOUT);
let src_channels = src_cn.channels();
let dst_cn = Layout::from(DST_LAYOUT);
let dst_channels = dst_cn.channels();
if src.len() % src_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % dst_channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / src_channels;
let dst_chunks = dst.len() / dst_channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
use crate::conversions::interpolator::Trilinear;
self.transform_chunk::<Trilinear<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
use crate::conversions::interpolator::Tetrahedral;
self.transform_chunk::<Tetrahedral<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
use crate::conversions::interpolator::Pyramidal;
self.transform_chunk::<Pyramidal<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
use crate::conversions::interpolator::Prismatic;
self.transform_chunk::<Prismatic<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
use crate::conversions::interpolator::Trilinear;
self.transform_chunk::<Trilinear<GRID_SIZE>>(src, dst);
}
}
}
Ok(())
}
}
pub(crate) struct DefaultLut3x3Factory {}
impl Lut3x3Factory for DefaultLut3x3Factory {
fn make_transform_3x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const SRC_LAYOUT: u8,
const DST_LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Send + Sync>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x3::<
T,
u8,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x3::<
T,
u16,
SRC_LAYOUT,
DST_LAYOUT,
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
}
}
}

View File

@@ -0,0 +1,269 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::LutBarycentricReduction;
use crate::conversions::interpolator::{BarycentricWeight, MultidimensionalInterpolation};
use crate::transform::PointeeSizeExpressible;
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
TransformExecutor, TransformOptions,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) struct TransformLut3x4<
T,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
pub(crate) lut: Vec<f32>,
pub(crate) _phantom: PhantomData<T>,
pub(crate) _phantom1: PhantomData<U>,
pub(crate) interpolation_method: InterpolationMethod,
pub(crate) weights: Box<[BarycentricWeight<f32>; BINS]>,
pub(crate) color_space: DataColorSpace,
pub(crate) is_linear: bool,
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut3x4<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[inline(always)]
fn transform_chunk<'b, Tetrahedral: MultidimensionalInterpolation<'b, GRID_SIZE>>(
&'b self,
src: &[T],
dst: &mut [T],
) {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let value_scale = ((1 << BIT_DEPTH) - 1) as f32;
for (src, dst) in src.chunks_exact(channels).zip(dst.chunks_exact_mut(4)) {
let x = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[cn.r_i()],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[cn.g_i()],
);
let z = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[cn.b_i()],
);
let tetrahedral = Tetrahedral::new(&self.lut);
let v = tetrahedral.inter4(x, y, z, &self.weights);
if T::FINITE {
let r = v * value_scale + 0.5;
dst[0] = r.v[0].min(value_scale).max(0.).as_();
dst[1] = r.v[1].min(value_scale).max(0.).as_();
dst[2] = r.v[2].min(value_scale).max(0.).as_();
dst[3] = r.v[3].min(value_scale).max(0.).as_();
} else {
dst[0] = v.v[0].as_();
dst[1] = v.v[1].as_();
dst[2] = v.v[2].as_();
dst[3] = v.v[3].as_();
}
}
}
}
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut3x4<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / channels;
let dst_chunks = dst.len() / 4;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
use crate::conversions::interpolator::Trilinear;
self.transform_chunk::<Trilinear<GRID_SIZE>>(src, dst);
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
use crate::conversions::interpolator::Tetrahedral;
self.transform_chunk::<Tetrahedral<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
use crate::conversions::interpolator::Pyramidal;
self.transform_chunk::<Pyramidal<GRID_SIZE>>(src, dst);
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
use crate::conversions::interpolator::Prismatic;
self.transform_chunk::<Prismatic<GRID_SIZE>>(src, dst);
}
InterpolationMethod::Linear => {
use crate::conversions::interpolator::Trilinear;
self.transform_chunk::<Trilinear<GRID_SIZE>>(src, dst);
}
}
}
Ok(())
}
}
pub(crate) fn make_transform_3x4<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
layout: Layout,
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
match layout {
Layout::Rgb => match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x4::<
T,
u8,
{ Layout::Rgb as u8 },
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x4::<
T,
u16,
{ Layout::Rgb as u8 },
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
},
Layout::Rgba => match options.barycentric_weight_scale {
BarycentricWeightScale::Low => Box::new(TransformLut3x4::<
T,
u8,
{ Layout::Rgba as u8 },
GRID_SIZE,
BIT_DEPTH,
256,
256,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
}),
#[cfg(feature = "options")]
BarycentricWeightScale::High => Box::new(TransformLut3x4::<
T,
u16,
{ Layout::Rgba as u8 },
GRID_SIZE,
BIT_DEPTH,
65536,
65536,
> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
}),
},
_ => unimplemented!(),
}
}

View File

@@ -0,0 +1,316 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::conversions::interpolator::*;
use crate::conversions::lut_transforms::Lut4x3Factory;
use crate::math::{FusedMultiplyAdd, FusedMultiplyNegAdd, m_clamp};
use crate::{
BarycentricWeightScale, CmsError, DataColorSpace, InterpolationMethod, Layout,
PointeeSizeExpressible, TransformExecutor, TransformOptions, Vector3f,
};
use num_traits::AsPrimitive;
use std::marker::PhantomData;
pub(crate) trait Vector3fCmykLerp {
fn interpolate(a: Vector3f, b: Vector3f, t: f32, scale: f32) -> Vector3f;
}
#[allow(unused)]
#[derive(Copy, Clone, Default)]
struct DefaultVector3fLerp;
impl Vector3fCmykLerp for DefaultVector3fLerp {
#[inline(always)]
fn interpolate(a: Vector3f, b: Vector3f, t: f32, scale: f32) -> Vector3f {
let t = Vector3f::from(t);
let inter = a.neg_mla(a, t).mla(b, t);
let mut new_vec = Vector3f::from(0.5).mla(inter, Vector3f::from(scale));
new_vec.v[0] = m_clamp(new_vec.v[0], 0.0, scale);
new_vec.v[1] = m_clamp(new_vec.v[1], 0.0, scale);
new_vec.v[2] = m_clamp(new_vec.v[2], 0.0, scale);
new_vec
}
}
#[allow(unused)]
#[derive(Copy, Clone, Default)]
pub(crate) struct NonFiniteVector3fLerp;
impl Vector3fCmykLerp for NonFiniteVector3fLerp {
#[inline(always)]
fn interpolate(a: Vector3f, b: Vector3f, t: f32, _: f32) -> Vector3f {
let t = Vector3f::from(t);
a.neg_mla(a, t).mla(b, t)
}
}
#[allow(unused)]
#[derive(Copy, Clone, Default)]
pub(crate) struct NonFiniteVector3fLerpUnbound;
impl Vector3fCmykLerp for NonFiniteVector3fLerpUnbound {
#[inline(always)]
fn interpolate(a: Vector3f, b: Vector3f, t: f32, _: f32) -> Vector3f {
let t = Vector3f::from(t);
a.neg_mla(a, t).mla(b, t)
}
}
#[allow(unused)]
struct TransformLut4To3<
T,
U,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> {
lut: Vec<f32>,
_phantom: PhantomData<T>,
_phantom1: PhantomData<U>,
interpolation_method: InterpolationMethod,
weights: Box<[BarycentricWeight<f32>; BINS]>,
color_space: DataColorSpace,
is_linear: bool,
}
#[allow(unused)]
impl<
T: Copy + AsPrimitive<f32> + Default,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformLut4To3<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
#[inline(always)]
fn transform_chunk<
'k,
Tetrahedral: MultidimensionalInterpolation<'k, GRID_SIZE>,
Interpolation: Vector3fCmykLerp,
>(
&'k self,
src: &[T],
dst: &mut [T],
) {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
let grid_size = GRID_SIZE as i32;
let grid_size3 = grid_size * grid_size * grid_size;
let value_scale = ((1 << BIT_DEPTH) - 1) as f32;
let max_value = ((1 << BIT_DEPTH) - 1u32).as_();
for (src, dst) in src.chunks_exact(4).zip(dst.chunks_exact_mut(channels)) {
let c = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[0],
);
let m = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[1],
);
let y = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[2],
);
let k = <() as LutBarycentricReduction<T, U>>::reduce::<BIT_DEPTH, BARYCENTRIC_BINS>(
src[3],
);
let k_weights = self.weights[k.as_()];
let w: i32 = k_weights.x;
let w_n: i32 = k_weights.x_n;
let t: f32 = k_weights.w;
let table1 = &self.lut[(w * grid_size3 * 3) as usize..];
let table2 = &self.lut[(w_n * grid_size3 * 3) as usize..];
let tetrahedral1 = Tetrahedral::new(table1);
let tetrahedral2 = Tetrahedral::new(table2);
let r1 = tetrahedral1.inter3(c, m, y, &self.weights);
let r2 = tetrahedral2.inter3(c, m, y, &self.weights);
let r = Interpolation::interpolate(r1, r2, t, value_scale);
dst[cn.r_i()] = r.v[0].as_();
dst[cn.g_i()] = r.v[1].as_();
dst[cn.b_i()] = r.v[2].as_();
if channels == 4 {
dst[cn.a_i()] = max_value;
}
}
}
}
#[allow(unused)]
impl<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible,
U: AsPrimitive<usize>,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
const BINS: usize,
const BARYCENTRIC_BINS: usize,
> TransformExecutor<T>
for TransformLut4To3<T, U, LAYOUT, GRID_SIZE, BIT_DEPTH, BINS, BARYCENTRIC_BINS>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, U>,
{
fn transform(&self, src: &[T], dst: &mut [T]) -> Result<(), CmsError> {
let cn = Layout::from(LAYOUT);
let channels = cn.channels();
if src.len() % 4 != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
if dst.len() % channels != 0 {
return Err(CmsError::LaneMultipleOfChannels);
}
let src_chunks = src.len() / 4;
let dst_chunks = dst.len() / channels;
if src_chunks != dst_chunks {
return Err(CmsError::LaneSizeMismatch);
}
if self.color_space == DataColorSpace::Lab
|| (self.is_linear && self.color_space == DataColorSpace::Rgb)
|| self.color_space == DataColorSpace::Xyz
{
if T::FINITE {
self.transform_chunk::<Trilinear<GRID_SIZE>, DefaultVector3fLerp>(src, dst);
} else {
self.transform_chunk::<Trilinear<GRID_SIZE>, NonFiniteVector3fLerp>(src, dst);
}
} else {
match self.interpolation_method {
#[cfg(feature = "options")]
InterpolationMethod::Tetrahedral => {
if T::FINITE {
self.transform_chunk::<Tetrahedral<GRID_SIZE>, DefaultVector3fLerp>(
src, dst,
);
} else {
self.transform_chunk::<Tetrahedral<GRID_SIZE>, NonFiniteVector3fLerp>(
src, dst,
);
}
}
#[cfg(feature = "options")]
InterpolationMethod::Pyramid => {
if T::FINITE {
self.transform_chunk::<Pyramidal<GRID_SIZE>, DefaultVector3fLerp>(src, dst);
} else {
self.transform_chunk::<Pyramidal<GRID_SIZE>, NonFiniteVector3fLerp>(
src, dst,
);
}
}
#[cfg(feature = "options")]
InterpolationMethod::Prism => {
if T::FINITE {
self.transform_chunk::<Prismatic<GRID_SIZE>, DefaultVector3fLerp>(src, dst);
} else {
self.transform_chunk::<Prismatic<GRID_SIZE>, NonFiniteVector3fLerp>(
src, dst,
);
}
}
InterpolationMethod::Linear => {
if T::FINITE {
self.transform_chunk::<Trilinear<GRID_SIZE>, DefaultVector3fLerp>(src, dst);
} else {
self.transform_chunk::<Trilinear<GRID_SIZE>, NonFiniteVector3fLerp>(
src, dst,
);
}
}
}
}
Ok(())
}
}
#[allow(dead_code)]
pub(crate) struct DefaultLut4x3Factory {}
#[allow(dead_code)]
impl Lut4x3Factory for DefaultLut4x3Factory {
fn make_transform_4x3<
T: Copy + AsPrimitive<f32> + Default + PointeeSizeExpressible + 'static + Send + Sync,
const LAYOUT: u8,
const GRID_SIZE: usize,
const BIT_DEPTH: usize,
>(
lut: Vec<f32>,
options: TransformOptions,
color_space: DataColorSpace,
is_linear: bool,
) -> Box<dyn TransformExecutor<T> + Sync + Send>
where
f32: AsPrimitive<T>,
u32: AsPrimitive<T>,
(): LutBarycentricReduction<T, u8>,
(): LutBarycentricReduction<T, u16>,
{
match options.barycentric_weight_scale {
BarycentricWeightScale::Low => {
Box::new(
TransformLut4To3::<T, u8, LAYOUT, GRID_SIZE, BIT_DEPTH, 256, 256> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_ranged_256::<GRID_SIZE>(),
color_space,
is_linear,
},
)
}
#[cfg(feature = "options")]
BarycentricWeightScale::High => {
Box::new(
TransformLut4To3::<T, u16, LAYOUT, GRID_SIZE, BIT_DEPTH, 65536, 65536> {
lut,
_phantom: PhantomData,
_phantom1: PhantomData,
interpolation_method: options.interpolation_method,
weights: BarycentricWeight::<f32>::create_binned::<GRID_SIZE, 65536>(),
color_space,
is_linear,
},
)
}
}
}
}

View File

@@ -0,0 +1,61 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::{CmsError, InPlaceStage, Lab, Xyz};
#[derive(Default)]
pub(crate) struct StageLabToXyz {}
impl InPlaceStage for StageLabToXyz {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
for dst in dst.chunks_exact_mut(3) {
let lab = Lab::new(dst[0], dst[1], dst[2]);
let xyz = lab.to_pcs_xyz();
dst[0] = xyz.x;
dst[1] = xyz.y;
dst[2] = xyz.z;
}
Ok(())
}
}
#[derive(Default)]
pub(crate) struct StageXyzToLab {}
impl InPlaceStage for StageXyzToLab {
fn transform(&self, dst: &mut [f32]) -> Result<(), CmsError> {
for dst in dst.chunks_exact_mut(3) {
let xyz = Xyz::new(dst[0], dst[1], dst[2]);
let lab = Lab::from_pcs_xyz(xyz);
dst[0] = lab.l;
dst[1] = lab.a;
dst[2] = lab.b;
}
Ok(())
}
}

154
vendor/moxcms/src/dat.rs vendored Normal file
View File

@@ -0,0 +1,154 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::CmsError;
use crate::writer::write_u16_be;
use std::time::{SystemTime, UNIX_EPOCH};
#[repr(C)]
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Default)]
pub struct ColorDateTime {
pub year: u16,
pub month: u16,
pub day_of_the_month: u16,
pub hours: u16,
pub minutes: u16,
pub seconds: u16,
}
fn is_leap(year: i32) -> bool {
(year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)
}
fn days_in_month(year: i32, month: i32) -> i32 {
match month {
1 => 31,
2 => {
if is_leap(year) {
29
} else {
28
}
}
3 => 31,
4 => 30,
5 => 31,
6 => 30,
7 => 31,
8 => 31,
9 => 30,
10 => 31,
11 => 30,
12 => 31,
_ => unreachable!("Unknown month"),
}
}
impl ColorDateTime {
/// Parses slice for date time
pub fn new_from_slice(slice: &[u8]) -> Result<ColorDateTime, CmsError> {
if slice.len() != 12 {
return Err(CmsError::InvalidProfile);
}
let year = u16::from_be_bytes([slice[0], slice[1]]);
let month = u16::from_be_bytes([slice[2], slice[3]]);
let day_of_the_month = u16::from_be_bytes([slice[4], slice[5]]);
let hours = u16::from_be_bytes([slice[6], slice[7]]);
let minutes = u16::from_be_bytes([slice[8], slice[9]]);
let seconds = u16::from_be_bytes([slice[10], slice[11]]);
Ok(ColorDateTime {
year,
month,
day_of_the_month,
hours,
minutes,
seconds,
})
}
/// Creates a new `ColorDateTime` from the current system time (UTC)
pub fn now() -> Self {
let now = match SystemTime::now().duration_since(UNIX_EPOCH) {
Ok(v) => v,
Err(_) => return Self::default(),
};
let mut days = (now.as_secs() / 86_400) as i64;
let secs_of_day = (now.as_secs() % 86_400) as i64;
let mut year = 1970;
loop {
let year_days = if is_leap(year) { 366 } else { 365 };
if days >= year_days {
days -= year_days;
year += 1;
} else {
break;
}
}
let mut month = 1;
loop {
let mdays = days_in_month(year, month);
if days >= mdays as i64 {
days -= mdays as i64;
month += 1;
} else {
break;
}
}
let day = days + 1; // days from zero based to 1 base
let hour = secs_of_day / 3600;
let min = (secs_of_day % 3600) / 60;
let sec = secs_of_day % 60;
Self {
year: year as u16,
month: month as u16,
day_of_the_month: day as u16,
hours: hour as u16,
minutes: min as u16,
seconds: sec as u16,
}
}
#[inline]
pub(crate) fn encode(&self, into: &mut Vec<u8>) {
let year = self.year;
let month = self.month;
let day_of_the_month = self.day_of_the_month;
let hours = self.hours;
let minutes = self.minutes;
let seconds = self.seconds;
write_u16_be(into, year);
write_u16_be(into, month);
write_u16_be(into, day_of_the_month);
write_u16_be(into, hours);
write_u16_be(into, minutes);
write_u16_be(into, seconds);
}
}

541
vendor/moxcms/src/defaults.rs vendored Normal file
View File

@@ -0,0 +1,541 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::chad::BRADFORD_D;
use crate::cicp::create_rec709_parametric;
use crate::trc::{ToneReprCurve, curve_from_gamma};
use crate::{
CicpColorPrimaries, CicpProfile, ColorPrimaries, ColorProfile, DataColorSpace,
LocalizableString, Matrix3d, MatrixCoefficients, ProfileClass, ProfileText, RenderingIntent,
TransferCharacteristics, XyY,
};
use pxfm::{copysignk, exp, floor, pow};
/// From lcms: `cmsWhitePointFromTemp`
/// tempK must be >= 4000. and <= 25000.
/// Invalid values of tempK will return
/// (x,y,Y) = (-1.0, -1.0, -1.0)
/// similar to argyll: `icx_DTEMP2XYZ()`
const fn white_point_from_temperature(temp_k: i32) -> XyY {
let mut white_point = XyY {
x: 0.,
y: 0.,
yb: 0.,
};
// No optimization provided.
let temp_k = temp_k as f64; // Square
let temp_k2 = temp_k * temp_k; // Cube
let temp_k3 = temp_k2 * temp_k;
// For correlated color temperature (T) between 4000K and 7000K:
let x = if temp_k > 4000.0 && temp_k <= 7000.0 {
-4.6070 * (1E9 / temp_k3) + 2.9678 * (1E6 / temp_k2) + 0.09911 * (1E3 / temp_k) + 0.244063
} else if temp_k > 7000.0 && temp_k <= 25000.0 {
-2.0064 * (1E9 / temp_k3) + 1.9018 * (1E6 / temp_k2) + 0.24748 * (1E3 / temp_k) + 0.237040
} else {
// or for correlated color temperature (T) between 7000K and 25000K:
// Invalid tempK
white_point.x = -1.0;
white_point.y = -1.0;
white_point.yb = -1.0;
debug_assert!(false, "invalid temp");
return white_point;
};
// Obtain y(x)
let y = -3.000 * (x * x) + 2.870 * x - 0.275;
// wave factors (not used, but here for futures extensions)
// let M1 = (-1.3515 - 1.7703*x + 5.9114 *y)/(0.0241 + 0.2562*x - 0.7341*y);
// let M2 = (0.0300 - 31.4424*x + 30.0717*y)/(0.0241 + 0.2562*x - 0.7341*y);
// Fill white_point struct
white_point.x = x;
white_point.y = y;
white_point.yb = 1.0;
white_point
}
pub const WHITE_POINT_D50: XyY = white_point_from_temperature(5003);
pub const WHITE_POINT_D60: XyY = white_point_from_temperature(6000);
pub const WHITE_POINT_D65: XyY = white_point_from_temperature(6504);
pub const WHITE_POINT_DCI_P3: XyY = white_point_from_temperature(6300);
// https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-F.pdf
// Perceptual Quantization / SMPTE standard ST.2084
#[inline]
const fn pq_curve(x: f64) -> f64 {
const M1: f64 = 2610.0 / 16384.0;
const M2: f64 = (2523.0 / 4096.0) * 128.0;
const C1: f64 = 3424.0 / 4096.0;
const C2: f64 = (2413.0 / 4096.0) * 32.0;
const C3: f64 = (2392.0 / 4096.0) * 32.0;
if x == 0.0 {
return 0.0;
}
let sign = x;
let x = x.abs();
let xpo = pow(x, 1.0 / M2);
let num = (xpo - C1).max(0.0);
let den = C2 - C3 * xpo;
let res = pow(num / den, 1.0 / M1);
copysignk(res, sign)
}
pub(crate) const fn build_trc_table_pq() -> [u16; 4096] {
let mut table = [0u16; 4096];
const NUM_ENTRIES: usize = 4096;
let mut i = 0usize;
while i < NUM_ENTRIES {
let x: f64 = i as f64 / (NUM_ENTRIES - 1) as f64;
let y: f64 = pq_curve(x);
let mut output: f64;
output = y * 65535.0 + 0.5;
if output > 65535.0 {
output = 65535.0
}
if output < 0.0 {
output = 0.0
}
table[i] = floor(output) as u16;
i += 1;
}
table
}
pub(crate) const fn build_trc_table_hlg() -> [u16; 4096] {
let mut table = [0u16; 4096];
const NUM_ENTRIES: usize = 4096;
let mut i = 0usize;
while i < NUM_ENTRIES {
let x: f64 = i as f64 / (NUM_ENTRIES - 1) as f64;
let y: f64 = hlg_curve(x);
let mut output: f64;
output = y * 65535.0 + 0.5;
if output > 65535.0 {
output = 65535.0
}
if output < 0.0 {
output = 0.0
}
table[i] = floor(output) as u16;
i += 1;
}
table
}
// https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-F.pdf
// Hybrid Log-Gamma
const fn hlg_curve(x: f64) -> f64 {
const BETA: f64 = 0.04;
const RA: f64 = 5.591816309728916; // 1.0 / A where A = 0.17883277
const B: f64 = 0.28466892; // 1.0 - 4.0 * A
const C: f64 = 0.5599107295; // 0,5 aln(4a)
let e = (x * (1.0 - BETA) + BETA).max(0.0);
if e == 0.0 {
return 0.0;
}
let sign = e.abs();
let res = if e <= 0.5 {
e * e / 3.0
} else {
(exp((e - C) * RA) + B) / 12.0
};
copysignk(res, sign)
}
/// Perceptual Quantizer Lookup table
pub const PQ_LUT_TABLE: [u16; 4096] = build_trc_table_pq();
/// Hybrid Log Gamma Lookup table
pub const HLG_LUT_TABLE: [u16; 4096] = build_trc_table_hlg();
impl ColorProfile {
const SRGB_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::BT_709);
const DISPLAY_P3_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::SMPTE_432);
const ADOBE_RGB_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::ADOBE_RGB);
const DCI_P3_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_DCI_P3, ColorPrimaries::DCI_P3);
const PRO_PHOTO_RGB_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D50, ColorPrimaries::PRO_PHOTO_RGB);
const BT2020_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D65, ColorPrimaries::BT_2020);
const ACES_2065_1_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D60, ColorPrimaries::ACES_2065_1);
const ACES_CG_COLORANTS: Matrix3d =
ColorProfile::colorants_matrix(WHITE_POINT_D60, ColorPrimaries::ACES_CG);
#[inline]
fn basic_rgb_profile() -> ColorProfile {
ColorProfile {
profile_class: ProfileClass::DisplayDevice,
rendering_intent: RenderingIntent::Perceptual,
color_space: DataColorSpace::Rgb,
pcs: DataColorSpace::Xyz,
chromatic_adaptation: Some(BRADFORD_D),
white_point: WHITE_POINT_D50.to_xyzd(),
..Default::default()
}
}
/// Creates new profile from CICP
pub fn new_from_cicp(cicp_color_primaries: CicpProfile) -> ColorProfile {
let mut basic = ColorProfile::basic_rgb_profile();
basic.update_rgb_colorimetry_from_cicp(cicp_color_primaries);
basic
}
/// Creates new sRGB profile
pub fn new_srgb() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::SRGB_COLORANTS);
let curve =
ToneReprCurve::Parametric(vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045]);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Bt709,
transfer_characteristics: TransferCharacteristics::Srgb,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"sRGB IEC61966-2.1".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Adobe RGB profile
pub fn new_adobe_rgb() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::ADOBE_RGB_COLORANTS);
let curve = curve_from_gamma(2.19921875f32);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.white_point = WHITE_POINT_D50.to_xyzd();
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Adobe RGB 1998".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Display P3 profile
pub fn new_display_p3() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::DISPLAY_P3_COLORANTS);
let curve =
ToneReprCurve::Parametric(vec![2.4, 1. / 1.055, 0.055 / 1.055, 1. / 12.92, 0.04045]);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Smpte431,
transfer_characteristics: TransferCharacteristics::Srgb,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Display P3".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Display P3 PQ profile
pub fn new_display_p3_pq() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::DISPLAY_P3_COLORANTS);
let curve = ToneReprCurve::Lut(PQ_LUT_TABLE.to_vec());
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Smpte431,
transfer_characteristics: TransferCharacteristics::Smpte2084,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Display P3 PQ".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new DCI P3 profile
pub fn new_dci_p3() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::DCI_P3_COLORANTS);
let curve = curve_from_gamma(2.6f32);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_DCI_P3.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Smpte432,
transfer_characteristics: TransferCharacteristics::Srgb,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"DCI P3".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new ProPhoto RGB profile
pub fn new_pro_photo_rgb() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::PRO_PHOTO_RGB_COLORANTS);
let curve = curve_from_gamma(1.8f32);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D50.to_xyzd());
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"ProPhoto RGB".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Bt.2020 profile
pub fn new_bt2020() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::BT2020_COLORANTS);
let curve = ToneReprCurve::Parametric(create_rec709_parametric().to_vec());
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Rec.2020".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Bt.2020 PQ profile
pub fn new_bt2020_pq() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::BT2020_COLORANTS);
let curve = ToneReprCurve::Lut(PQ_LUT_TABLE.to_vec());
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Bt2020,
transfer_characteristics: TransferCharacteristics::Smpte2084,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Rec.2020 PQ".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Bt.2020 HLG profile
pub fn new_bt2020_hlg() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::BT2020_COLORANTS);
let curve = ToneReprCurve::Lut(HLG_LUT_TABLE.to_vec());
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D65.to_xyzd());
profile.cicp = Some(CicpProfile {
color_primaries: CicpColorPrimaries::Bt2020,
transfer_characteristics: TransferCharacteristics::Hlg,
matrix_coefficients: MatrixCoefficients::Bt709,
full_range: false,
});
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Rec.2020 HLG".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new Monochrome profile
pub fn new_gray_with_gamma(gamma: f32) -> ColorProfile {
ColorProfile {
gray_trc: Some(curve_from_gamma(gamma)),
profile_class: ProfileClass::DisplayDevice,
rendering_intent: RenderingIntent::Perceptual,
color_space: DataColorSpace::Gray,
media_white_point: Some(WHITE_POINT_D65.to_xyzd()),
white_point: WHITE_POINT_D50.to_xyzd(),
chromatic_adaptation: Some(BRADFORD_D),
copyright: Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)])),
..Default::default()
}
}
/// Creates new ACES 2065-1/AP0 profile
pub fn new_aces_aces_2065_1_linear() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::ACES_2065_1_COLORANTS);
let curve = ToneReprCurve::Lut(vec![]);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D60.to_xyzd());
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"ACES 2065-1".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
/// Creates new ACEScg profile
pub fn new_aces_cg_linear() -> ColorProfile {
let mut profile = ColorProfile::basic_rgb_profile();
profile.update_colorants(ColorProfile::ACES_CG_COLORANTS);
let curve = ToneReprCurve::Lut(vec![]);
profile.red_trc = Some(curve.clone());
profile.blue_trc = Some(curve.clone());
profile.green_trc = Some(curve);
profile.media_white_point = Some(WHITE_POINT_D60.to_xyzd());
profile.description = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"ACEScg/AP1".to_string(),
)]));
profile.copyright = Some(ProfileText::Localizable(vec![LocalizableString::new(
"en".to_string(),
"US".to_string(),
"Public Domain".to_string(),
)]));
profile
}
}

359
vendor/moxcms/src/dt_ucs.rs vendored Normal file
View File

@@ -0,0 +1,359 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::Xyz;
use crate::mlaf::mlaf;
use pxfm::{f_atan2f, f_powf, f_sincosf};
/// Darktable UCS JCH ( Darktable Uniform Color Space )
#[derive(Copy, Clone, PartialOrd, PartialEq, Debug)]
pub struct DtUchJch {
pub j: f32,
pub c: f32,
pub h: f32,
}
/// Darktable UCS HSB ( Darktable Uniform Color Space )
#[derive(Copy, Clone, PartialOrd, PartialEq, Debug)]
pub struct DtUchHsb {
pub h: f32,
pub s: f32,
pub b: f32,
}
/// Darktable HCB ( Darktable Uniform Color Space )
#[derive(Copy, Clone, PartialOrd, PartialEq, Debug)]
pub struct DtUchHcb {
pub h: f32,
pub c: f32,
pub b: f32,
}
const DT_UCS_L_STAR_RANGE: f32 = 2.098883786377;
#[inline]
fn y_to_dt_ucs_l_star(y: f32) -> f32 {
let y_hat = f_powf(y, 0.631651345306265);
DT_UCS_L_STAR_RANGE * y_hat / (y_hat + 1.12426773749357)
}
#[inline]
fn dt_ucs_l_star_to_y(x: f32) -> f32 {
f_powf(
1.12426773749357 * x / (DT_UCS_L_STAR_RANGE - x),
1.5831518565279648,
)
}
const L_WHITE: f32 = 0.98805060;
#[inline]
fn dt_ucs_luv_to_ucs_jch(
l_star: f32,
l_white: f32,
u_star_prime: f32,
v_star_prime: f32,
) -> DtUchJch {
let m2: f32 = mlaf(u_star_prime * u_star_prime, v_star_prime, v_star_prime); // square of colorfulness M
// should be JCH[0] = powf(L_star / L_white), cz) but we treat only the case where cz = 1
let j = l_star / l_white;
let c =
15.932993652962535 * f_powf(l_star, 0.6523997524738018) * f_powf(m2, 0.6007557017508491)
/ l_white;
let h = f_atan2f(v_star_prime, u_star_prime);
DtUchJch::new(j, c, h)
}
#[inline]
fn dt_ucs_xy_to_uv(x: f32, y: f32) -> (f32, f32) {
const X_C: [f32; 3] = [-0.783941002840055, 0.745273540913283, 0.318707282433486];
const Y_C: [f32; 3] = [0.277512987809202, -0.205375866083878, 2.16743692732158];
const BIAS: [f32; 3] = [0.153836578598858, -0.165478376301988, 0.291320554395942];
let mut u_c = mlaf(mlaf(BIAS[0], Y_C[0], y), X_C[0], x);
let mut v_c = mlaf(mlaf(BIAS[1], Y_C[1], y), X_C[1], x);
let d_c = mlaf(mlaf(BIAS[2], Y_C[2], y), X_C[2], x);
let div = if d_c >= 0.0 {
d_c.max(f32::MIN)
} else {
d_c.min(-f32::MIN)
};
u_c /= div;
v_c /= div;
const STAR_C: [f32; 2] = [1.39656225667, 1.4513954287];
const STAR_HF_C: [f32; 2] = [1.49217352929, 1.52488637914];
let u_star = STAR_C[0] * u_c / (u_c.abs() + STAR_HF_C[0]);
let v_star = STAR_C[1] * v_c / (v_c.abs() + STAR_HF_C[1]);
// The following is equivalent to a 2D matrix product
let u_star_prime = mlaf(-1.124983854323892 * u_star, -0.980483721769325, v_star);
let v_star_prime = mlaf(1.86323315098672 * u_star, 1.971853092390862, v_star);
(u_star_prime, v_star_prime)
}
impl DtUchJch {
#[inline]
pub fn new(j: f32, c: f32, h: f32) -> DtUchJch {
DtUchJch { j, c, h }
}
#[inline]
pub fn from_xyz(xyz: Xyz) -> DtUchJch {
DtUchJch::from_xyy(xyz.to_xyy())
}
#[inline]
pub fn to_xyz(&self) -> Xyz {
let xyy = self.to_xyy();
Xyz::from_xyy(xyy)
}
#[inline]
pub fn from_xyy(xyy: [f32; 3]) -> DtUchJch {
let l_star = y_to_dt_ucs_l_star(xyy[2]);
// let l_white = y_to_dt_ucs_l_star(1.);
let (u_star_prime, v_star_prime) = dt_ucs_xy_to_uv(xyy[0], xyy[1]);
dt_ucs_luv_to_ucs_jch(l_star, L_WHITE, u_star_prime, v_star_prime)
}
#[inline]
pub fn to_xyy(&self) -> [f32; 3] {
// let l_white: f32 = y_to_dt_ucs_l_star(1.0);
let l_star = (self.j * L_WHITE).max(0.0).min(2.09885);
let m = if l_star != 0. {
f_powf(
self.c * L_WHITE / (15.932993652962535 * f_powf(l_star, 0.6523997524738018)),
0.8322850678616855,
)
} else {
0.
};
let sin_cos_h = f_sincosf(self.h);
let u_star_prime = m * sin_cos_h.1;
let v_star_prime = m * sin_cos_h.0;
// The following is equivalent to a 2D matrix product
let u_star = mlaf(
-5.037522385190711 * u_star_prime,
-2.504856328185843,
v_star_prime,
);
let v_star = mlaf(
4.760029407436461 * u_star_prime,
2.874012963239247,
v_star_prime,
);
const F: [f32; 2] = [1.39656225667, 1.4513954287];
const HF: [f32; 2] = [1.49217352929, 1.52488637914];
let u_c = -HF[0] * u_star / (u_star.abs() - F[0]);
let v_c = -HF[1] * v_star / (v_star.abs() - F[1]);
const U_C: [f32; 3] = [0.167171472114775, -0.150959086409163, 0.940254742367256];
const V_C: [f32; 3] = [0.141299802443708, -0.155185060382272, 1.000000000000000];
const BIAS: [f32; 3] = [
-0.00801531300850582,
-0.00843312433578007,
-0.0256325967652889,
];
let mut x = mlaf(mlaf(BIAS[0], V_C[0], v_c), U_C[0], u_c);
let mut y = mlaf(mlaf(BIAS[1], V_C[1], v_c), U_C[1], u_c);
let d = mlaf(mlaf(BIAS[2], V_C[2], v_c), U_C[2], u_c);
let div = if d >= 0.0 {
d.max(f32::MIN)
} else {
d.min(-f32::MIN)
};
x /= div;
y /= div;
let yb = dt_ucs_l_star_to_y(l_star);
[x, y, yb]
}
}
impl DtUchHsb {
#[inline]
pub fn new(h: f32, s: f32, b: f32) -> DtUchHsb {
DtUchHsb { h, s, b }
}
#[inline]
pub fn from_jch(jch: DtUchJch) -> DtUchHsb {
let b = jch.j * (f_powf(jch.c, 1.33654221029386) + 1.);
let s = if b > 0. { jch.c / b } else { 0. };
let h = jch.h;
DtUchHsb::new(h, s, b)
}
#[inline]
pub fn to_jch(&self) -> DtUchJch {
let h = self.h;
let c = self.s * self.b;
let j = self.b / (f_powf(c, 1.33654221029386) + 1.);
DtUchJch::new(j, c, h)
}
}
impl DtUchHcb {
#[inline]
pub fn new(h: f32, c: f32, b: f32) -> DtUchHcb {
DtUchHcb { h, c, b }
}
#[inline]
pub fn from_jch(jch: DtUchJch) -> DtUchHcb {
let b = jch.j * (f_powf(jch.c, 1.33654221029386) + 1.);
let c = jch.c;
let h = jch.h;
DtUchHcb::new(h, c, b)
}
#[inline]
pub fn to_jch(&self) -> DtUchJch {
let h = self.h;
let c = self.c;
let j = self.b / (f_powf(self.c, 1.33654221029386) + 1.);
DtUchJch::new(j, c, h)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_darktable_ucs_jch() {
let xyy = [0.4, 0.2, 0.5];
let ucs = DtUchJch::from_xyy(xyy);
let xyy_rev = ucs.to_xyy();
assert!(
(xyy[0] - xyy_rev[0]).abs() < 1e-5,
"Expected {}, got {}",
xyy[0],
xyy_rev[0]
);
assert!(
(xyy[1] - xyy_rev[1]).abs() < 1e-5,
"Expected {}, got {}",
xyy[1],
xyy_rev[1]
);
assert!(
(xyy[2] - xyy_rev[2]).abs() < 1e-5,
"Expected {}, got {}",
xyy[2],
xyy_rev[2]
);
}
#[test]
fn test_darktable_hsb() {
let jch = DtUchJch::new(0.3, 0.6, 0.4);
let hsb = DtUchHsb::from_jch(jch);
let r_jch = hsb.to_jch();
assert!(
(r_jch.j - jch.j).abs() < 1e-5,
"Expected {}, got {}",
jch.j,
r_jch.j
);
assert!(
(r_jch.c - jch.c).abs() < 1e-5,
"Expected {}, got {}",
jch.c,
r_jch.c
);
assert!(
(r_jch.h - jch.h).abs() < 1e-5,
"Expected {}, got {}",
jch.h,
r_jch.h
);
}
#[test]
fn test_darktable_hcb() {
let jch = DtUchJch::new(0.3, 0.6, 0.4);
let hcb = DtUchHcb::from_jch(jch);
let r_jch = hcb.to_jch();
assert!(
(r_jch.j - jch.j).abs() < 1e-5,
"Expected {}, got {}",
jch.j,
r_jch.j
);
assert!(
(r_jch.c - jch.c).abs() < 1e-5,
"Expected {}, got {}",
jch.c,
r_jch.c
);
assert!(
(r_jch.h - jch.h).abs() < 1e-5,
"Expected {}, got {}",
jch.h,
r_jch.h
);
}
#[test]
fn test_darktable_ucs_jch_from_xyz() {
let xyz = Xyz::new(0.4, 0.2, 0.5);
let ucs = DtUchJch::from_xyz(xyz);
let xyy_rev = ucs.to_xyz();
assert!(
(xyz.x - xyz.x).abs() < 1e-5,
"Expected {}, got {}",
xyz.x,
xyy_rev.x
);
assert!(
(xyz.y - xyz.y).abs() < 1e-5,
"Expected {}, got {}",
xyz.y,
xyy_rev.y
);
assert!(
(xyz.z - xyz.z).abs() < 1e-5,
"Expected {}, got {}",
xyz.z,
xyy_rev.z
);
}
}

122
vendor/moxcms/src/err.rs vendored Normal file
View File

@@ -0,0 +1,122 @@
/*
* // Copyright (c) Radzivon Bartoshyk 2/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::RenderingIntent;
use std::error::Error;
use std::fmt::Display;
#[derive(Debug, Copy, Clone, PartialOrd, PartialEq)]
pub struct MalformedSize {
pub size: usize,
pub expected: usize,
}
#[derive(Debug, Clone, PartialOrd, PartialEq)]
pub enum CmsError {
LaneSizeMismatch,
LaneMultipleOfChannels,
InvalidProfile,
InvalidTrcCurve,
InvalidCicp,
CurveLutIsTooLarge,
ParametricCurveZeroDivision,
InvalidRenderingIntent,
DivisionByZero,
UnsupportedColorPrimaries(u8),
UnsupportedTrc(u8),
InvalidLayout,
UnsupportedProfileConnection,
BuildTransferFunction,
UnsupportedChannelConfiguration,
UnknownTag(u32),
UnknownTagTypeDefinition(u32),
UnsupportedLutRenderingIntent(RenderingIntent),
InvalidAtoBLut,
OverflowingError,
LUTTablesInvalidKind,
MalformedClut(MalformedSize),
MalformedCurveLutTable(MalformedSize),
InvalidInksCountForProfile,
MalformedTrcCurve(String),
}
impl Display for CmsError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CmsError::LaneSizeMismatch => f.write_str("Lanes length must match"),
CmsError::LaneMultipleOfChannels => {
f.write_str("Lane length must not be multiple of channel count")
}
CmsError::InvalidProfile => f.write_str("Invalid ICC profile"),
CmsError::InvalidCicp => {
f.write_str("Invalid Code Independent point (CICP) in ICC profile")
}
CmsError::InvalidTrcCurve => f.write_str("Invalid TRC curve"),
CmsError::CurveLutIsTooLarge => f.write_str("Curve Lut is too large"),
CmsError::ParametricCurveZeroDivision => {
f.write_str("Parametric Curve definition causes division by zero")
}
CmsError::InvalidRenderingIntent => f.write_str("Invalid rendering intent"),
CmsError::DivisionByZero => f.write_str("Division by zero"),
CmsError::UnsupportedColorPrimaries(value) => {
f.write_fmt(format_args!("Unsupported color primaries, {value}"))
}
CmsError::UnsupportedTrc(value) => f.write_fmt(format_args!("Unsupported TRC {value}")),
CmsError::InvalidLayout => f.write_str("Invalid layout"),
CmsError::UnsupportedProfileConnection => f.write_str("Unsupported profile connection"),
CmsError::BuildTransferFunction => f.write_str("Can't reconstruct transfer function"),
CmsError::UnsupportedChannelConfiguration => {
f.write_str("Can't reconstruct channel configuration")
}
CmsError::UnknownTag(t) => f.write_fmt(format_args!("Unknown tag: {t}")),
CmsError::UnknownTagTypeDefinition(t) => {
f.write_fmt(format_args!("Unknown tag type definition: {t}"))
}
CmsError::UnsupportedLutRenderingIntent(intent) => f.write_fmt(format_args!(
"Can't find LUT for rendering intent: {intent:?}"
)),
CmsError::InvalidAtoBLut => f.write_str("Invalid A to B Lut"),
CmsError::OverflowingError => {
f.write_str("Overflowing was happen, that is not allowed")
}
CmsError::LUTTablesInvalidKind => f.write_str("All LUT curves must have same kind"),
CmsError::MalformedClut(size) => {
f.write_fmt(format_args!("Invalid CLUT size: {size:?}"))
}
CmsError::MalformedCurveLutTable(size) => {
f.write_fmt(format_args!("Malformed curve LUT size: {size:?}"))
}
CmsError::InvalidInksCountForProfile => {
f.write_str("Invalid inks count for profile was provided")
}
CmsError::MalformedTrcCurve(str) => f.write_str(str),
}
}
}
impl Error for CmsError {}

1078
vendor/moxcms/src/gamma.rs vendored Normal file

File diff suppressed because it is too large Load Diff

66
vendor/moxcms/src/gamut.rs vendored Normal file
View File

@@ -0,0 +1,66 @@
/*
* // Copyright (c) Radzivon Bartoshyk 3/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::Rgb;
#[inline]
fn filmlike_clip_rgb_tone(r: &mut f32, g: &mut f32, b: &mut f32, l: f32) {
let new_r = r.min(l);
let new_b = b.min(l);
let new_g = new_b + ((new_r - new_b) * (*g - *b) / (*r - *b));
*r = new_r;
*g = new_g;
*b = new_b;
}
/// Soft clipping out-of-bounds values in S-curve
///
/// Works only on highlights, negative values are skipped
#[inline]
pub fn filmlike_clip(rgb: Rgb<f32>) -> Rgb<f32> {
const L: f32 = 1.;
let mut rgb = rgb;
if rgb.r >= rgb.g {
if rgb.g > rgb.b {
filmlike_clip_rgb_tone(&mut rgb.r, &mut rgb.g, &mut rgb.b, L);
} else if rgb.b > rgb.r {
filmlike_clip_rgb_tone(&mut rgb.b, &mut rgb.r, &mut rgb.g, L);
} else if rgb.b > rgb.g {
filmlike_clip_rgb_tone(&mut rgb.r, &mut rgb.b, &mut rgb.g, L);
} else {
Rgb::new(rgb.r.min(L), rgb.g.min(L), rgb.g);
}
} else if rgb.r >= rgb.b {
filmlike_clip_rgb_tone(&mut rgb.g, &mut rgb.r, &mut rgb.b, L);
} else if rgb.b > rgb.g {
filmlike_clip_rgb_tone(&mut rgb.b, &mut rgb.g, &mut rgb.r, L);
} else {
filmlike_clip_rgb_tone(&mut rgb.g, &mut rgb.b, &mut rgb.r, L);
}
rgb
}

223
vendor/moxcms/src/helpers.rs vendored Normal file
View File

@@ -0,0 +1,223 @@
/*
* // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
* //
* // Redistribution and use in source and binary forms, with or without modification,
* // are permitted provided that the following conditions are met:
* //
* // 1. Redistributions of source code must retain the above copyright notice, this
* // list of conditions and the following disclaimer.
* //
* // 2. Redistributions in binary form must reproduce the above copyright notice,
* // this list of conditions and the following disclaimer in the documentation
* // and/or other materials provided with the distribution.
* //
* // 3. Neither the name of the copyright holder nor the names of its
* // contributors may be used to endorse or promote products derived from
* // this software without specific prior written permission.
* //
* // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::matan::{
does_curve_have_discontinuity, is_curve_ascending, is_curve_degenerated, is_curve_descending,
is_curve_linear8, is_curve_linear16, is_curve_monotonic,
};
use crate::reader::{
s15_fixed16_number_to_double, uint8_number_to_float_fast, uint16_number_to_float_fast,
};
use crate::{CmsError, LutStore, Matrix3d, ToneReprCurve, Vector3d};
impl LutStore {
pub fn to_clut_f32(&self) -> Vec<f32> {
match self {
LutStore::Store8(store) => store
.iter()
.map(|x| uint8_number_to_float_fast(*x))
.collect(),
LutStore::Store16(store) => store
.iter()
.map(|x| uint16_number_to_float_fast(*x as u32))
.collect(),
}
}
pub(crate) fn is_degenerated(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => is_curve_degenerated(&v[start..end]),
LutStore::Store16(v) => is_curve_degenerated(&v[start..end]),
}
}
pub(crate) fn is_monotonic(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => is_curve_monotonic(&v[start..end]),
LutStore::Store16(v) => is_curve_monotonic(&v[start..end]),
}
}
pub(crate) fn have_discontinuities(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => does_curve_have_discontinuity(&v[start..end]),
LutStore::Store16(v) => does_curve_have_discontinuity(&v[start..end]),
}
}
#[allow(dead_code)]
pub(crate) fn is_linear(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => is_curve_linear8(&v[start..end]),
LutStore::Store16(v) => is_curve_linear16(&v[start..end]),
}
}
#[allow(dead_code)]
pub(crate) fn is_descending(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => is_curve_descending(&v[start..end]),
LutStore::Store16(v) => is_curve_descending(&v[start..end]),
}
}
#[allow(dead_code)]
pub(crate) fn is_ascending(&self, entries: usize, channel: usize) -> bool {
let start = entries * channel;
let end = start + entries;
match &self {
LutStore::Store8(v) => is_curve_ascending(&v[start..end]),
LutStore::Store16(v) => is_curve_ascending(&v[start..end]),
}
}
}
impl ToneReprCurve {
pub(crate) fn is_linear(&self) -> bool {
match &self {
ToneReprCurve::Lut(lut) => {
if lut.is_empty() {
return true;
}
if lut.len() == 1 {
let gamma = 1. / crate::trc::u8_fixed_8number_to_float(lut[0]);
if (gamma - 1.).abs() < 1e-4 {
return true;
}
}
is_curve_linear16(lut)
}
ToneReprCurve::Parametric(parametric) => {
if parametric.is_empty() {
return true;
}
if parametric.len() == 1 && parametric[0] == 1. {
return true;
}
false
}
}
}
pub(crate) fn is_monotonic(&self) -> bool {
match &self {
ToneReprCurve::Lut(lut) => is_curve_monotonic(lut),
ToneReprCurve::Parametric(_) => true,
}
}
pub(crate) fn is_degenerated(&self) -> bool {
match &self {
ToneReprCurve::Lut(lut) => is_curve_degenerated(lut),
ToneReprCurve::Parametric(_) => false,
}
}
pub(crate) fn have_discontinuities(&self) -> bool {
match &self {
ToneReprCurve::Lut(lut) => does_curve_have_discontinuity(lut),
ToneReprCurve::Parametric(_) => false,
}
}
}
pub(crate) fn read_matrix_3d(arr: &[u8]) -> Result<Matrix3d, CmsError> {
if arr.len() < 36 {
return Err(CmsError::InvalidProfile);
}
let m_tag = &arr[..36];
let e00 = i32::from_be_bytes([m_tag[0], m_tag[1], m_tag[2], m_tag[3]]);
let e01 = i32::from_be_bytes([m_tag[4], m_tag[5], m_tag[6], m_tag[7]]);
let e02 = i32::from_be_bytes([m_tag[8], m_tag[9], m_tag[10], m_tag[11]]);
let e10 = i32::from_be_bytes([m_tag[12], m_tag[13], m_tag[14], m_tag[15]]);
let e11 = i32::from_be_bytes([m_tag[16], m_tag[17], m_tag[18], m_tag[19]]);
let e12 = i32::from_be_bytes([m_tag[20], m_tag[21], m_tag[22], m_tag[23]]);
let e20 = i32::from_be_bytes([m_tag[24], m_tag[25], m_tag[26], m_tag[27]]);
let e21 = i32::from_be_bytes([m_tag[28], m_tag[29], m_tag[30], m_tag[31]]);
let e22 = i32::from_be_bytes([m_tag[32], m_tag[33], m_tag[34], m_tag[35]]);
Ok(Matrix3d {
v: [
[
s15_fixed16_number_to_double(e00),
s15_fixed16_number_to_double(e01),
s15_fixed16_number_to_double(e02),
],
[
s15_fixed16_number_to_double(e10),
s15_fixed16_number_to_double(e11),
s15_fixed16_number_to_double(e12),
],
[
s15_fixed16_number_to_double(e20),
s15_fixed16_number_to_double(e21),
s15_fixed16_number_to_double(e22),
],
],
})
}
pub(crate) fn read_vector_3d(arr: &[u8]) -> Result<Vector3d, CmsError> {
if arr.len() < 12 {
return Err(CmsError::InvalidProfile);
}
let m_tag = &arr[..12];
let b0 = i32::from_be_bytes([m_tag[0], m_tag[1], m_tag[2], m_tag[3]]);
let b1 = i32::from_be_bytes([m_tag[4], m_tag[5], m_tag[6], m_tag[7]]);
let b2 = i32::from_be_bytes([m_tag[8], m_tag[9], m_tag[10], m_tag[11]]);
Ok(Vector3d {
v: [
s15_fixed16_number_to_double(b0),
s15_fixed16_number_to_double(b1),
s15_fixed16_number_to_double(b2),
],
})
}

Some files were not shown because too many files have changed in this diff Show More