Reworked and reorganized lookup table generation and added documentation

Ogeon · Jan 7, 2025 · c8c63c3 · c8c63c3
1 parent a2e8ae7
commit c8c63c3
Show file tree

Hide file tree

Showing 9 changed files with 1,689 additions and 619 deletions.
diff --git a/codegen/src/lut.rs b/codegen/src/lut.rs
diff --git a/codegen/src/lut/model.rs b/codegen/src/lut/model.rs
@@ -0,0 +1,129 @@
+use super::TransferFn;
+
+/// This struct contains the scale and bias for a linear
+/// regression model of a transfer function on a given interval.
+///
+/// This model is calculated by using simple linear regression with
+/// integration instead of summation.
+pub(super) struct LinearModel {
+    scale: f64,
+    bias: f64,
+}
+
+impl LinearModel {
+    pub(super) fn new(
+        transfer_fn: &TransferFn,
+        start: u32,
+        end: u32,
+        man_index_width: u32,
+        t_width: u32,
+    ) -> Self {
+        let TransferFn {
+            linear_scale,
+            alpha,
+            beta,
+            gamma,
+            ..
+        } = *transfer_fn;
+
+        let beta_bits = (beta as f32).to_bits();
+        // Corresponds to the scale between differentials. Specifically,
+        // `dx = exp_scale * dt`
+        let exp_scale = f32::from_bits(((start >> 23) - man_index_width - t_width) << 23) as f64;
+        let start_x = f32::from_bits(start) as f64;
+        let end_x = f32::from_bits(end) as f64;
+
+        // If the transfer function is purely linear on a given interval,
+        // integration is unnecessary.
+        if let Some(linear_scale) = linear_scale {
+            if end <= beta_bits {
+                return Self {
+                    scale: linear_scale * exp_scale,
+                    bias: linear_scale * start_x,
+                };
+            }
+        }
+
+        let max_t = 2.0f64.powi(t_width as i32);
+
+        let (integral_y, integral_ty) = match linear_scale {
+            Some(linear_scale) if start < beta_bits => {
+                let beta_t =
+                    (beta_bits << (9 + man_index_width)) as f64 * 2.0f64.powi(t_width as i32 - 32);
+                let int_linear =
+                    integrate_linear((start_x, beta), (0.0, beta_t), linear_scale, exp_scale);
+                let int_exponential =
+                    integrate_exponential((beta, end_x), (beta_t, max_t), alpha, gamma, exp_scale);
+                (
+                    int_linear.0 + int_exponential.0,
+                    int_linear.1 + int_exponential.1,
+                )
+            }
+            _ => integrate_exponential((start_x, end_x), (0.0, max_t), alpha, gamma, exp_scale),
+        };
+        let max_t2 = max_t * max_t;
+        let integral_t = max_t2 * 0.5;
+        let integral_t2 = max_t2 * max_t / 3.0;
+
+        let scale = (max_t * integral_ty - integral_t * integral_y)
+            / (max_t * integral_t2 - integral_t * integral_t);
+        Self {
+            scale,
+            bias: (integral_y - scale * integral_t) / max_t,
+        }
+    }
+
+    pub(super) fn into_u8_lookup(self) -> u32 {
+        let scale_uint = (255.0 * self.scale * 65536.0 + 0.5) as u32;
+        let bias_uint = (((255.0 * self.bias + 0.5) * 128.0 + 0.5) as u32) << 9;
+        (bias_uint << 7) | scale_uint
+    }
+
+    pub(super) fn into_u16_lookup(self) -> u64 {
+        let scale_uint = (65535.0 * self.scale * 4294967296.0 + 0.5) as u64;
+        let bias_uint = (((65535.0 * self.bias + 0.5) * 32768.0 + 0.5) as u64) << 17;
+        (bias_uint << 15) | scale_uint
+    }
+}
+
+fn integrate_linear(
+    (start_x, end_x): (f64, f64),
+    (start_t, end_t): (f64, f64),
+    linear_scale: f64,
+    exp_scale: f64,
+) -> (f64, f64) {
+    let antiderive_y = |x: f64| 0.5 * linear_scale * x * x / exp_scale;
+    let antiderive_ty =
+        |x: f64, t: f64| 0.5 * linear_scale * x * x * (t - x / (3.0 * exp_scale)) / exp_scale;
+
+    (
+        antiderive_y(end_x) - antiderive_y(start_x),
+        antiderive_ty(end_x, end_t) - antiderive_ty(start_x, start_t),
+    )
+}
+
+fn integrate_exponential(
+    (start_x, end_x): (f64, f64),
+    (start_t, end_t): (f64, f64),
+    alpha: f64,
+    gamma: f64,
+    exp_scale: f64,
+) -> (f64, f64) {
+    let one_plus_gamma_inv = 1.0 + gamma.recip();
+    let antiderive_y = |x: f64, t: f64| {
+        alpha * gamma * x.powf(one_plus_gamma_inv) / (exp_scale * (1.0 + gamma)) + (1.0 - alpha) * t
+    };
+    let antiderive_ty = |x: f64, t: f64| {
+        alpha
+            * gamma
+            * x.powf(one_plus_gamma_inv)
+            * (t - gamma * x / (exp_scale * (1.0 + 2.0 * gamma)))
+            / (exp_scale * (1.0 + gamma))
+            + 0.5 * (1.0 - alpha) * t * t
+    };
+
+    (
+        antiderive_y(end_x, end_t) - antiderive_y(start_x, start_t),
+        antiderive_ty(end_x, end_t) - antiderive_ty(start_x, start_t),
+    )
+}
diff --git a/palette/src/encoding/adobe.rs b/palette/src/encoding/adobe.rs
@@ -17,6 +17,21 @@ use super::{FromLinear, IntoLinear};
 /// in cyan-green hues.
 ///
 /// The Adobe RGB standard uses a gamma 2.2 transfer function.
+///
+///# As transfer function
+///
+/// `AdobeRgb` will not use any kind of approximation when converting from `T` to
+/// `T`. This involves calls to `powf`, which may make it too slow for certain
+/// applications.
+///
+/// There are some specialized cases where it has been optimized:
+///
+/// * When converting from `u8` to `f32` or `f64`, while converting to linear
+///   space. This uses lookup tables with precomputed values.
+/// * When converting from `f32` or `f64` to `u8`, while converting from linear
+///   space. This uses a fast algorithm that guarantees a maximum error in the
+///   result of less than 0.6 in line with [this DirectX spec]
+///   (https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#FLOATtoSRGB).
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub struct AdobeRgb;
 

diff --git a/palette/src/encoding/lut.rs b/palette/src/encoding/lut.rs
@@ -3,7 +3,7 @@ mod codegen;
 const MAX_FLOAT_BITS: u32 = 0x3f7fffff; // 1.0 - f32::EPSILON
 
 // SAFETY: Only use this macro if `input` is clamped between `min_float` and `max_float`.
-macro_rules! linear_float_to_encoded_uint {
+macro_rules! unsafe_linear_float_to_encoded_uint {
     ($enc:ty, $lut:ty, $input:ident, $min_float_bits:ident, $table:ident, $bit_width:expr, $man_index_width:expr) => {{
         let input_bits = $input.to_bits();
         #[cfg(test)]
@@ -16,7 +16,7 @@ macro_rules! linear_float_to_encoded_uint {
             {
                 debug_assert!($table.get(i).is_some());
             }
-            unsafe { *$table.get_unchecked(i) }
+            *$table.get_unchecked(i)
         };
 
         let bias = (entry >> (2 * $bit_width)) << ($bit_width + 1);
@@ -44,7 +44,7 @@ fn linear_f32_to_encoded_u8(linear: f32, min_float_bits: u32, table: &[u32]) ->
         input = max_float;
     }
 
-    linear_float_to_encoded_uint!(u8, u32, input, min_float_bits, table, 8, 3)
+    unsafe { unsafe_linear_float_to_encoded_uint!(u8, u32, input, min_float_bits, table, 8, 3) }
 }
 
 #[cfg(feature = "gamma_lut_u16")]
@@ -69,5 +69,5 @@ fn linear_f32_to_encoded_u16_with_linear_scale(
         return ((linear_scale * input + 8388608.0).to_bits() & 65535) as u16;
     }
 
-    linear_float_to_encoded_uint!(u16, u64, input, min_float_bits, table, 16, 7)
+    unsafe { unsafe_linear_float_to_encoded_uint!(u16, u64, input, min_float_bits, table, 16, 7) }
 }