Add primitive type conversions for fbig

cmpute · Jan 17, 2024 · b06745d · b06745d
1 parent b0ac0a2
commit b06745d
Show file tree

Hide file tree

Showing 15 changed files with 395 additions and 97 deletions.
diff --git a/base/CHANGELOG.md b/base/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## Unreleased
+
+- Re-implement functions `next_up` and `next_down`, and expose them through the `utils`` module.
+
 ## 0.4.0
 
 ### Add

diff --git a/base/src/lib.rs b/base/src/lib.rs
@@ -9,6 +9,11 @@ pub mod math;
 pub mod ring;
 pub mod sign;
 
+/// Some useful utility functions that are also used internally in this crate.
+pub mod utils {
+    pub use super::math::log::{next_down, next_up};
+}
+
 pub use approx::*;
 pub use bit::*;
 pub use error::*;

diff --git a/base/src/math/log.rs b/base/src/math/log.rs
@@ -92,31 +92,47 @@ const fn ceil_log2_fp8(n: u16) -> u16 {
 }
 
 /// Implementation of the nightly f32::next_up()
-#[cfg(feature = "std")]
+///
+/// This function will panic if the input is NaN or infinite.
 #[inline]
-fn next_up(f: f32) -> f32 {
-    debug_assert!(!f.is_nan() && !f.is_infinite());
-    use std::cmp::Ordering::*;
-
-    match f.partial_cmp(&0.).unwrap() {
-        Equal => f32::from_bits(1),
-        Less => f32::from_bits(f.to_bits() - 1),
-        Greater => f32::from_bits(f.to_bits() + 1),
-    }
+pub fn next_up(f: f32) -> f32 {
+    assert!(!f.is_nan() && !f.is_infinite());
+
+    const TINY_BITS: u32 = 0x1; // Smallest positive f32.
+    const CLEAR_SIGN_MASK: u32 = 0x7fff_ffff;
+
+    let bits = f.to_bits();
+    let abs = bits & CLEAR_SIGN_MASK;
+    let next_bits = if abs == 0 {
+        TINY_BITS
+    } else if bits == abs {
+        bits + 1
+    } else {
+        bits - 1
+    };
+    f32::from_bits(next_bits)
 }
 
 /// Implementation of the nightly f32::next_down()
-#[cfg(feature = "std")]
+///
+/// This function will panic if the input is NaN or infinite.
 #[inline]
-fn next_down(f: f32) -> f32 {
-    debug_assert!(!f.is_nan() && !f.is_infinite());
-    use std::cmp::Ordering::*;
-
-    match f.partial_cmp(&0.).unwrap() {
-        Equal => f32::from_bits(1 | (1 << 31)),
-        Less => f32::from_bits(f.to_bits() + 1),
-        Greater => f32::from_bits(f.to_bits() - 1),
-    }
+pub fn next_down(f: f32) -> f32 {
+    assert!(!f.is_nan() && !f.is_infinite());
+
+    const NEG_TINY_BITS: u32 = 0x8000_0001; // Smallest negative f32.
+    const CLEAR_SIGN_MASK: u32 = 0x7fff_ffff;
+
+    let bits = f.to_bits();
+    let abs = bits & CLEAR_SIGN_MASK;
+    let next_bits = if abs == 0 {
+        NEG_TINY_BITS
+    } else if bits == abs {
+        bits - 1
+    } else {
+        bits + 1
+    };
+    f32::from_bits(next_bits)
 }
 
 #[cfg(not(feature = "std"))]
@@ -195,7 +211,7 @@ macro_rules! impl_log2_bounds_for_uint {
                         ceil_log2_fp8(hi)
                     };
                     let ub = ub as f32 / 256.0;
-                    (lb + shift as f32, ub + shift as f32)
+                    (next_down(lb + shift as f32), next_up(ub + shift as f32))
                 }
             }
         }

diff --git a/base/src/math/mod.rs b/base/src/math/mod.rs
@@ -93,5 +93,5 @@ pub trait CubicRoot {
 }
 
 mod inv;
-mod log;
+pub(crate) mod log;
 mod root;
diff --git a/float/CHANGELOG.md b/float/CHANGELOG.md
@@ -1,12 +1,18 @@
 # Changelog
 
+## Unreleased
+
+- Implement `TryFrom<Repr>` and `TryFrom<FBig>` for primitive integers.
+- Implement `TryFrom<Repr<2>>` and `TryFrom<FBig<_, 2>>` for primitive floats.
+- Implement `From<UBig>` and `From<IBig>` for `Repr`.
+
 ## 0.4.2
 
 - Add `Repr::from_static_words` to support the `static_fbig!` and `static_dbig!` macros.
 - Add `FBig::from_repr_const` to support create an `FBig` instance from repr in const context.
 - Add conversion from `f32`/`f64` to `Repr<2>`.
 - Implement `NumOrd` between `FBig` and primitive integers / floats. 
-- Implement `AbsOrd` between `FBig` and `UBig`/`IBig`
+- Implement `AbsOrd` between `FBig` and `UBig`/`IBig`.
 - Now the `Debug` output of `FBig` values will not contains the rounding mode information (when alternative flag is not set).
 
 ## 0.4.1

diff --git a/float/src/convert.rs b/float/src/convert.rs
@@ -401,6 +401,8 @@ impl<R: Round, const B: Word> FBig<R, B> {
 
     /// Convert the float number to [f32] with the rounding mode associated with the type.
     ///
+    /// Note that the conversion is inexact even if the number is infinite.
+    ///
     /// # Examples
     ///
     /// ```
@@ -429,6 +431,8 @@ impl<R: Round, const B: Word> FBig<R, B> {
 
     /// Convert the float number to [f64] with [HalfEven] rounding mode regardless of the mode associated with this number.
     ///
+    /// Note that the conversion is inexact even if the number is infinite.
+    ///
     /// # Examples
     ///
     /// ```
@@ -694,17 +698,29 @@ impl<const B: Word> Repr<B> {
     }
 }
 
-impl<R: Round, const B: Word> From<IBig> for FBig<R, B> {
+impl<const B: Word> From<UBig> for Repr<B> {
     #[inline]
-    fn from(n: IBig) -> Self {
-        Self::from_parts(n, 0)
+    fn from(n: UBig) -> Self {
+        Self::new(n.into(), 0)
     }
 }
-
 impl<R: Round, const B: Word> From<UBig> for FBig<R, B> {
     #[inline]
     fn from(n: UBig) -> Self {
-        IBig::from(n).into()
+        Self::from_parts(n.into(), 0)
+    }
+}
+
+impl<const B: Word> From<IBig> for Repr<B> {
+    #[inline]
+    fn from(n: IBig) -> Self {
+        Self::new(n, 0)
+    }
+}
+impl<R: Round, const B: Word> From<IBig> for FBig<R, B> {
+    #[inline]
+    fn from(n: IBig) -> Self {
+        Self::from_parts(n, 0)
     }
 }
 
@@ -737,12 +753,45 @@ impl<R: Round, const B: Word> TryFrom<FBig<R, B>> for UBig {
 
 macro_rules! fbig_unsigned_conversions {
     ($($t:ty)*) => {$(
+        impl<const B: Word> From<$t> for Repr<B> {
+            #[inline]
+            fn from(value: $t) -> Repr<B> {
+                UBig::from(value).into()
+            }
+        }
         impl<R: Round, const B: Word> From<$t> for FBig<R, B> {
             #[inline]
             fn from(value: $t) -> FBig<R, B> {
                 UBig::from(value).into()
             }
         }
+
+        impl<const B: Word> TryFrom<Repr<B>> for $t {
+            type Error = ConversionError;
+
+            fn try_from(value: Repr<B>) -> Result<Self, Self::Error> {
+                if value.sign() == Sign::Negative || value.is_infinite() {
+                    Err(ConversionError::OutOfBounds)
+                } else {
+                    let (log2_lb, _) = value.log2_bounds();
+                    if log2_lb >= <$t>::BITS as f32 {
+                        Err(ConversionError::OutOfBounds)
+                    } else if value.exponent < 0 {
+                        Err(ConversionError::LossOfPrecision)
+                    } else {
+                        shl_digits::<B>(&value.significand, value.exponent as usize).try_into()
+                    }
+                }
+            }
+        }
+        impl<R: Round, const B: Word> TryFrom<FBig<R, B>> for $t {
+            type Error = ConversionError;
+
+            #[inline]
+            fn try_from(value: FBig<R, B>) -> Result<Self, Self::Error> {
+                value.repr.try_into()
+            }
+        }
     )*};
 }
 fbig_unsigned_conversions!(u8 u16 u32 u64 u128 usize);
@@ -755,6 +804,77 @@ macro_rules! fbig_signed_conversions {
                 IBig::from(value).into()
             }
         }
+
+        impl<R: Round, const B: Word> TryFrom<FBig<R, B>> for $t {
+            type Error = ConversionError;
+
+            fn try_from(value: FBig<R, B>) -> Result<Self, Self::Error> {
+                if value.repr.is_infinite() {
+                    Err(ConversionError::OutOfBounds)
+                } else {
+                    let (log2_lb, _) = value.repr.log2_bounds();
+                    if log2_lb >= <$t>::BITS as f32 {
+                        Err(ConversionError::OutOfBounds)
+                    } else if value.repr.exponent < 0 {
+                        Err(ConversionError::LossOfPrecision)
+                    } else {
+                        shl_digits::<B>(&value.repr.significand, value.repr.exponent as usize).try_into()
+                    }
+                }
+            }
+        }
     )*};
 }
 fbig_signed_conversions!(i8 i16 i32 i64 i128 isize);
+
+macro_rules! impl_from_fbig_for_float {
+    ($t:ty, $method:ident) => {
+        impl TryFrom<Repr<2>> for $t {
+            type Error = ConversionError;
+
+            #[inline]
+            fn try_from(value: Repr<2>) -> Result<Self, Self::Error> {
+                if value.is_infinite() {
+                    Err(ConversionError::LossOfPrecision)
+                } else {
+                    match value.$method() {
+                        Exact(v) => Ok(v),
+                        Inexact(v, _) => {
+                            if v.is_infinite() {
+                                Err(ConversionError::OutOfBounds)
+                            } else {
+                                Err(ConversionError::LossOfPrecision)
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        impl<R: Round> TryFrom<FBig<R, 2>> for $t {
+            type Error = ConversionError;
+
+            #[inline]
+            fn try_from(value: FBig<R, 2>) -> Result<Self, Self::Error> {
+                // this method is the same as the one for Repr, but it has to be re-implemented
+                // because the rounding behavior of to_32/to_64 is different.
+                if value.repr.is_infinite() {
+                    Err(ConversionError::LossOfPrecision)
+                } else {
+                    match value.$method() {
+                        Exact(v) => Ok(v),
+                        Inexact(v, _) => {
+                            if v.is_infinite() {
+                                Err(ConversionError::OutOfBounds)
+                            } else {
+                                Err(ConversionError::LossOfPrecision)
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
+impl_from_fbig_for_float!(f32, to_f32);
+impl_from_fbig_for_float!(f64, to_f64);
diff --git a/float/src/log.rs b/float/src/log.rs
@@ -1,4 +1,9 @@
-use dashu_base::{AbsOrd, Approximation::*, EstimatedLog2, Sign};
+use dashu_base::{
+    utils::{next_down, next_up},
+    AbsOrd,
+    Approximation::*,
+    EstimatedLog2, Sign,
+};
 use dashu_int::IBig;
 
 use crate::{
@@ -11,6 +16,10 @@ use crate::{
 impl<const B: Word> EstimatedLog2 for Repr<B> {
     // currently a Word has at most 64 bits, so log2() < f32::MAX
     fn log2_bounds(&self) -> (f32, f32) {
+        if self.significand.is_zero() {
+            return (f32::NEG_INFINITY, f32::NEG_INFINITY);
+        }
+
         // log(s*B^e) = log(s) + e*log(B)
         let (logs_lb, logs_ub) = self.significand.log2_bounds();
         let (logb_lb, logb_ub) = if B.is_power_of_two() {
@@ -20,11 +29,12 @@ impl<const B: Word> EstimatedLog2 for Repr<B> {
             B.log2_bounds()
         };
         let e = self.exponent as f32;
-        if self.exponent >= 0 {
+        let (lb, ub) = if self.exponent >= 0 {
             (logs_lb + e * logb_lb, logs_ub + e * logb_ub)
         } else {
             (logs_lb + e * logb_ub, logs_ub + e * logb_lb)
-        }
+        };
+        (next_down(lb), next_up(ub))
     }
 
     fn log2_est(&self) -> f32 {