-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
FromLinear
and IntoLinear
lookup table creation to build script
- Loading branch information
BuildTools
committed
Jan 7, 2025
1 parent
f509ebd
commit 544e40f
Showing
14 changed files
with
70,251 additions
and
901 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,266 @@ | ||
use anyhow::Result; | ||
use proc_macro2::{Ident, TokenStream}; | ||
use quote::{format_ident, quote}; | ||
|
||
use crate::{codegen_file::CodegenFile, lut::model::LinearModel}; | ||
|
||
mod model; | ||
|
||
pub fn generate() -> Result<()> { | ||
let mut file = CodegenFile::create("palette/src/encoding/lut/codegen.rs")?; | ||
|
||
let transfer_fn_u8 = vec![ | ||
LutEntryU8::new( | ||
"srgb", | ||
"SRGB", | ||
TransferFn::new_with_linear(12.92, 0.0031308, 2.4), | ||
), | ||
LutEntryU8::new( | ||
"rec_standards", | ||
"REC_OETF", | ||
TransferFn::new_with_linear(4.5, 0.018053968510807, 1.0 / 0.45), | ||
), | ||
LutEntryU8::new( | ||
"adobe", | ||
"ADOBE_RGB", | ||
TransferFn::new_pure_gamma(563.0 / 256.0), | ||
), | ||
LutEntryU8::new("p3", "P3_GAMMA", TransferFn::new_pure_gamma(2.6)), | ||
]; | ||
|
||
let transfer_fn_u16 = vec![LutEntryU16::new( | ||
"prophoto", | ||
"PROPHOTO_RGB", | ||
TransferFn::new_with_linear(16.0, 0.001953125, 1.8), | ||
)]; | ||
|
||
for LutEntryU8 { | ||
module, | ||
fn_type_uppercase, | ||
transfer_fn, | ||
} in transfer_fn_u8 | ||
{ | ||
let u8_to_float = build_u8_to_float_lut(&fn_type_uppercase, &transfer_fn); | ||
let float_to_u8 = build_float_to_u8_lut(&fn_type_uppercase, &transfer_fn); | ||
|
||
file.append(quote! { | ||
pub mod #module { | ||
#u8_to_float | ||
|
||
#float_to_u8 | ||
} | ||
})?; | ||
} | ||
|
||
for LutEntryU16 { | ||
module, | ||
fn_type_uppercase, | ||
transfer_fn, | ||
} in transfer_fn_u16 | ||
{ | ||
let u16_to_float = build_u16_to_float_lut(&fn_type_uppercase, &transfer_fn); | ||
let float_to_u8 = build_float_to_u16_lut(&fn_type_uppercase, &transfer_fn); | ||
|
||
file.append(quote! { | ||
#[cfg(feature = "gamma_lut_u16")] | ||
pub mod #module { | ||
#u16_to_float | ||
|
||
#float_to_u8 | ||
} | ||
})?; | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
/// This struct is able to model a given transfer function. | ||
/// | ||
/// Any transfer function will have a linear part (optional) for input values | ||
/// less than some value `beta` and an exponential part determined by the function's | ||
/// `gamma` value. For transfer functions with a linear part, `alpha` is chosen to | ||
/// preserve function continuity. | ||
struct TransferFn { | ||
into_linear: Box<dyn Fn(f64) -> f64>, | ||
linear_scale: Option<f64>, | ||
alpha: f64, | ||
beta: f64, | ||
gamma: f64, | ||
} | ||
|
||
impl TransferFn { | ||
fn new_with_linear(linear_scale: f64, linear_end: f64, gamma: f64) -> Self { | ||
let alpha = (linear_scale * linear_end - 1.0) / (linear_end.powf(gamma.recip()) - 1.0); | ||
let beta = linear_end; | ||
Self { | ||
into_linear: Box::new(move |encoded| { | ||
if encoded <= linear_scale * beta { | ||
encoded / linear_scale | ||
} else { | ||
((encoded + alpha - 1.0) / alpha).powf(gamma) | ||
} | ||
}), | ||
linear_scale: Some(linear_scale), | ||
alpha, | ||
beta, | ||
gamma, | ||
} | ||
} | ||
|
||
fn new_pure_gamma(gamma: f64) -> Self { | ||
Self { | ||
into_linear: Box::new(move |encoded| encoded.powf(gamma)), | ||
linear_scale: None, | ||
alpha: 1.0, | ||
beta: 0.0, | ||
gamma, | ||
} | ||
} | ||
} | ||
|
||
struct LutEntryU8 { | ||
module: Ident, | ||
fn_type_uppercase: String, | ||
transfer_fn: TransferFn, | ||
} | ||
|
||
struct LutEntryU16 { | ||
module: Ident, | ||
fn_type_uppercase: String, | ||
transfer_fn: TransferFn, | ||
} | ||
|
||
impl LutEntryU8 { | ||
fn new(module: &str, fn_type_uppercase: &str, transfer_fn: TransferFn) -> Self { | ||
Self { | ||
module: format_ident!("{module}"), | ||
fn_type_uppercase: fn_type_uppercase.to_owned(), | ||
transfer_fn, | ||
} | ||
} | ||
} | ||
|
||
impl LutEntryU16 { | ||
fn new(module: &str, fn_type_uppercase: &str, transfer_fn: TransferFn) -> Self { | ||
Self { | ||
module: format_ident!("{module}"), | ||
fn_type_uppercase: fn_type_uppercase.to_owned(), | ||
transfer_fn, | ||
} | ||
} | ||
} | ||
|
||
fn build_u8_to_float_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream { | ||
let table = (0..=u8::MAX).map(|i| (transfer_fn.into_linear)((i as f64) / 255.0)); | ||
let table_ident = format_ident!("{fn_type_uppercase}_U8_TO_F64"); | ||
let table_f32 = table.clone().map(|f| f as f32); | ||
let table_f32_ident = format_ident!("{fn_type_uppercase}_U8_TO_F32"); | ||
quote! { | ||
pub const #table_ident: [f64; 256] = [ | ||
#(#table),* | ||
]; | ||
|
||
pub const #table_f32_ident: [f32; 256] = [ | ||
#(#table_f32),* | ||
]; | ||
} | ||
} | ||
|
||
fn build_u16_to_float_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream { | ||
let table = (0..=u16::MAX).map(|i| (transfer_fn.into_linear)((i as f64) / 65535.0)); | ||
let table_ident = format_ident!("{fn_type_uppercase}_U16_TO_F64"); | ||
quote! { | ||
pub static #table_ident: [f64; 65536] = [ | ||
#(#table),* | ||
]; | ||
} | ||
} | ||
|
||
/// This algorithm is an adaptation of [this C++ code](<https://gist.github.com/rygorous/2203834>) | ||
/// by Fabian "ryg" Giesen, which utilizes simple linear regression on | ||
/// sub-intervals of the transfer function's domain and stores the resulting | ||
/// models' scales and biases into a lookup table. | ||
/// | ||
/// The algorithm linked above calculates the transfer function for every | ||
/// potential `f32` input and feeds that into the regression model. In | ||
/// contrast, this algorithm replaces the discrete sums in the model with | ||
/// continuous integrals in order to reduce the time it takes to generate | ||
/// the tables. We are able to do this since transfer functions follow a | ||
/// predictable pattern for which the anti-derivative is known. | ||
fn build_float_to_u8_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream { | ||
// 1.0 - f32::EPSILON | ||
const MAX_FLOAT_BITS: u32 = 0x3f7fffff; | ||
// The number of mantissa bits used to index into the lookup table | ||
const MAN_INDEX_WIDTH: u32 = 3; | ||
// The number of bits in the remainder of the mantissa | ||
const BUCKET_INDEX_WIDTH: u32 = 20; | ||
const BUCKET_SIZE: u32 = 1 << BUCKET_INDEX_WIDTH; | ||
// Any input less than or equal to this maps to 0 | ||
let min_float_bits = | ||
(((transfer_fn.into_linear)(0.5 / 255.0) as f32).to_bits() - 1) & 0xff800000; | ||
|
||
let exp_table_size = ((MAX_FLOAT_BITS - min_float_bits) >> 23) + 1; | ||
let table_size = exp_table_size << MAN_INDEX_WIDTH; | ||
|
||
let table = (0..table_size).map(|i| { | ||
let start = min_float_bits + (i << BUCKET_INDEX_WIDTH); | ||
let end = start + BUCKET_SIZE; | ||
|
||
LinearModel::new(transfer_fn, start, end, MAN_INDEX_WIDTH, 8).into_u8_lookup() | ||
}); | ||
|
||
let table_ident = format_ident!("TO_{fn_type_uppercase}_U8"); | ||
let table_size_usize = table_size as usize; | ||
|
||
let float_const_ident = format_ident!("{fn_type_uppercase}_MIN_FLOAT"); | ||
quote! { | ||
pub const #float_const_ident: u32 = #min_float_bits; | ||
|
||
pub const #table_ident: [u32; #table_size_usize] = [ | ||
#(#table),* | ||
]; | ||
} | ||
} | ||
|
||
fn build_float_to_u16_lut(fn_type_uppercase: &str, transfer_fn: &TransferFn) -> TokenStream { | ||
// 1.0 - f32::EPSILON | ||
const MAX_FLOAT_BITS: u32 = 0x3f7fffff; | ||
// The number of mantissa bits used to index into the lookup table | ||
const MAN_INDEX_WIDTH: u32 = 7; | ||
// The number of bits in the remainder of the mantissa | ||
const BUCKET_INDEX_WIDTH: i32 = 16; | ||
const BUCKET_SIZE: u32 = 1 << BUCKET_INDEX_WIDTH; | ||
let TransferFn { | ||
into_linear, | ||
linear_scale, | ||
beta, | ||
.. | ||
} = transfer_fn; | ||
let min_float_bits = (*beta as f32) | ||
.to_bits() | ||
.max((into_linear(0.5 / 65535.0) as f32).to_bits() - 1) | ||
& 0xff800000; | ||
let exp_table_size = ((MAX_FLOAT_BITS - min_float_bits) >> 23) + 1; | ||
let table_size = exp_table_size << MAN_INDEX_WIDTH; | ||
let table = (0..table_size).map(|i| { | ||
let start = min_float_bits + (i << BUCKET_INDEX_WIDTH); | ||
let end = start + BUCKET_SIZE; | ||
|
||
LinearModel::new(transfer_fn, start, end, MAN_INDEX_WIDTH, 16).into_u16_lookup() | ||
}); | ||
|
||
let table_ident = format_ident!("TO_{fn_type_uppercase}_U16"); | ||
let table_size_usize = table_size as usize; | ||
let linear_scale = 65535.0 * (linear_scale.unwrap_or_default() as f32); | ||
|
||
let float_const_ident = format_ident!("{fn_type_uppercase}_MIN_FLOAT"); | ||
let linear_scale_ident = format_ident!("{fn_type_uppercase}_LINEAR_SCALE"); | ||
quote! { | ||
pub const #float_const_ident: u32 = #min_float_bits; | ||
pub const #linear_scale_ident: f32 = #linear_scale; | ||
|
||
pub const #table_ident: [u64; #table_size_usize] = [ | ||
#(#table),* | ||
]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
use super::TransferFn; | ||
|
||
/// This struct contains the scale and bias for a linear | ||
/// regression model of a transfer function on a given interval. | ||
/// | ||
/// This model is calculated by using simple linear regression with | ||
/// integration instead of summation. | ||
pub(super) struct LinearModel { | ||
scale: f64, | ||
bias: f64, | ||
} | ||
|
||
impl LinearModel { | ||
pub(super) fn new( | ||
transfer_fn: &TransferFn, | ||
start: u32, | ||
end: u32, | ||
man_index_width: u32, | ||
t_width: u32, | ||
) -> Self { | ||
let TransferFn { | ||
linear_scale, | ||
alpha, | ||
beta, | ||
gamma, | ||
.. | ||
} = *transfer_fn; | ||
|
||
let beta_bits = (beta as f32).to_bits(); | ||
// Corresponds to the scale between differentials. Specifically, | ||
// `dx = exp_scale * dt` | ||
let exp_scale = f32::from_bits(((start >> 23) - man_index_width - t_width) << 23) as f64; | ||
let start_x = f32::from_bits(start) as f64; | ||
let end_x = f32::from_bits(end) as f64; | ||
|
||
// If the transfer function is purely linear on a given interval, | ||
// integration is unnecessary. | ||
if let Some(linear_scale) = linear_scale { | ||
if end <= beta_bits { | ||
return Self { | ||
scale: linear_scale * exp_scale, | ||
bias: linear_scale * start_x, | ||
}; | ||
} | ||
} | ||
|
||
let max_t = 2.0f64.powi(t_width as i32); | ||
|
||
let (integral_y, integral_ty) = match linear_scale { | ||
Some(linear_scale) if start < beta_bits => { | ||
let beta_t = | ||
(beta_bits << (9 + man_index_width)) as f64 * 2.0f64.powi(t_width as i32 - 32); | ||
let int_linear = | ||
integrate_linear((start_x, beta), (0.0, beta_t), linear_scale, exp_scale); | ||
let int_exponential = | ||
integrate_exponential((beta, end_x), (beta_t, max_t), alpha, gamma, exp_scale); | ||
( | ||
int_linear.0 + int_exponential.0, | ||
int_linear.1 + int_exponential.1, | ||
) | ||
} | ||
_ => integrate_exponential((start_x, end_x), (0.0, max_t), alpha, gamma, exp_scale), | ||
}; | ||
let max_t2 = max_t * max_t; | ||
let integral_t = max_t2 * 0.5; | ||
let integral_t2 = max_t2 * max_t / 3.0; | ||
|
||
let scale = (max_t * integral_ty - integral_t * integral_y) | ||
/ (max_t * integral_t2 - integral_t * integral_t); | ||
Self { | ||
scale, | ||
bias: (integral_y - scale * integral_t) / max_t, | ||
} | ||
} | ||
|
||
pub(super) fn into_u8_lookup(self) -> u32 { | ||
let scale_uint = (255.0 * self.scale * 65536.0 + 0.5) as u32; | ||
let bias_uint = (((255.0 * self.bias + 0.5) * 128.0 + 0.5) as u32) << 9; | ||
(bias_uint << 7) | scale_uint | ||
} | ||
|
||
pub(super) fn into_u16_lookup(self) -> u64 { | ||
let scale_uint = (65535.0 * self.scale * 4294967296.0 + 0.5) as u64; | ||
let bias_uint = (((65535.0 * self.bias + 0.5) * 32768.0 + 0.5) as u64) << 17; | ||
(bias_uint << 15) | scale_uint | ||
} | ||
} | ||
|
||
fn integrate_linear( | ||
(start_x, end_x): (f64, f64), | ||
(start_t, end_t): (f64, f64), | ||
linear_scale: f64, | ||
exp_scale: f64, | ||
) -> (f64, f64) { | ||
let antiderive_y = |x: f64| 0.5 * linear_scale * x * x / exp_scale; | ||
let antiderive_ty = | ||
|x: f64, t: f64| 0.5 * linear_scale * x * x * (t - x / (3.0 * exp_scale)) / exp_scale; | ||
|
||
( | ||
antiderive_y(end_x) - antiderive_y(start_x), | ||
antiderive_ty(end_x, end_t) - antiderive_ty(start_x, start_t), | ||
) | ||
} | ||
|
||
fn integrate_exponential( | ||
(start_x, end_x): (f64, f64), | ||
(start_t, end_t): (f64, f64), | ||
alpha: f64, | ||
gamma: f64, | ||
exp_scale: f64, | ||
) -> (f64, f64) { | ||
let one_plus_gamma_inv = 1.0 + gamma.recip(); | ||
let antiderive_y = |x: f64, t: f64| { | ||
alpha * gamma * x.powf(one_plus_gamma_inv) / (exp_scale * (1.0 + gamma)) + (1.0 - alpha) * t | ||
}; | ||
let antiderive_ty = |x: f64, t: f64| { | ||
alpha | ||
* gamma | ||
* x.powf(one_plus_gamma_inv) | ||
* (t - gamma * x / (exp_scale * (1.0 + 2.0 * gamma))) | ||
/ (exp_scale * (1.0 + gamma)) | ||
+ 0.5 * (1.0 - alpha) * t * t | ||
}; | ||
|
||
( | ||
antiderive_y(end_x, end_t) - antiderive_y(start_x, start_t), | ||
antiderive_ty(end_x, end_t) - antiderive_ty(start_x, start_t), | ||
) | ||
} |
Oops, something went wrong.