lexical_write_integer/
decimal.rs

1//! Radix-generic, lexical integer-to-string conversion routines.
2//!
3//! An optimization for decimal is pre-computing the number of digits written
4//! prior to actually writing digits, avoiding the use of temporary buffers.
5//! This scales well with integer size, short of `u128`, due to the slower
6//! division algorithms required.
7//!
8//! See [`Algorithm.md`] for a more detailed description of the algorithm
9//! choice here.
10//!
11//! [`Algorithm.md`]: https://github.com/Alexhuszagh/rust-lexical/blob/main/lexical-write-integer/docs/Algorithm.md
12
13#![cfg(not(feature = "compact"))]
14#![doc(hidden)]
15
16use crate::algorithm::{algorithm, algorithm_u128};
17use crate::table::DIGIT_TO_BASE10_SQUARED;
18use lexical_util::format::{RADIX, RADIX_SHIFT, STANDARD};
19use lexical_util::num::UnsignedInteger;
20
21/// Fast integral log2.
22///
23/// This is fairly trivial to explain, since the log2 is related to the
24/// number of bits in the value. Therefore, it has to be related to
25/// `T::BITS - ctlz(x)`. For example, `log2(2) == 1`, and `log2(1) == 0`,
26/// and `log2(3) == 1`. Therefore, we must take the log of an odd number,
27/// and subtract one.
28///
29/// This algorithm is described in detail in "Computing the number of digits
30/// of an integer quickly", available
31/// [here](https://lemire.me/blog/2021/05/28/computing-the-number-of-digits-of-an-integer-quickly/).
32#[inline]
33pub fn fast_log2<T: UnsignedInteger>(x: T) -> usize {
34    T::BITS - 1 - (x | T::ONE).leading_zeros() as usize
35}
36
37/// Calculate the fast, integral log10 of a value.
38///
39/// This is relatively easy to explain as well: we calculate the log2
40/// of the value, then multiply by an integral constant for the log10(2).
41///
42/// Note that this value is frequently off by 1, so we need to round-up
43/// accordingly. This magic number is valid at least up until `1<<18`,
44/// which works for all values, since our max log2 is 127.
45#[inline]
46pub fn fast_log10<T: UnsignedInteger>(x: T) -> usize {
47    let log2 = fast_log2(x);
48    (log2 * 1233) >> 12
49}
50
51/// Fast algorithm to calculate the number of digits in an integer.
52///
53/// We only use this for 32-bit or smaller values: for larger numbers,
54/// we first write digits until we get to 32-bits, then we call this.
55///
56/// The values are as follows:
57///
58/// - `2^32 for j = 1`
59/// - `⌈log10(2^j)⌉ * 2^128 + 2^128 – 10^(⌈log10(2j)⌉) for j from 2 to 30`
60/// - `⌈log10(2^j)⌉ for j = 31 and j = 32`
61///
62/// This algorithm is described in detail in "Computing the number of digits
63/// of an integer even faster", available
64/// [here](https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/).
65#[inline]
66pub fn fast_digit_count(x: u32) -> usize {
67    const TABLE: [u64; 32] = [
68        4294967296,
69        8589934582,
70        8589934582,
71        8589934582,
72        12884901788,
73        12884901788,
74        12884901788,
75        17179868184,
76        17179868184,
77        17179868184,
78        21474826480,
79        21474826480,
80        21474826480,
81        21474826480,
82        25769703776,
83        25769703776,
84        25769703776,
85        30063771072,
86        30063771072,
87        30063771072,
88        34349738368,
89        34349738368,
90        34349738368,
91        34349738368,
92        38554705664,
93        38554705664,
94        38554705664,
95        41949672960,
96        41949672960,
97        41949672960,
98        42949672960,
99        42949672960,
100    ];
101    // SAFETY: always safe, since fast_log2 will always return a value
102    // <= 32. This is because the range of values from `ctlz(x | 1)` is
103    // `[0, 31]`, so `32 - 1 - ctlz(x | 1)` must be in the range `[0, 31]`.
104    let shift = unsafe { index_unchecked!(TABLE[fast_log2(x)]) };
105    let count = (x as u64 + shift) >> 32;
106    count as usize
107}
108
109/// Slightly slower algorithm to calculate the number of digits in an integer.
110///
111/// This uses no static storage, and uses a fast log10(2) estimation
112/// to calculate the number of digits, from the log2 value.
113///
114/// This algorithm is described in detail in "Computing the number of digits
115/// of an integer even faster", available
116/// [here](https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/).
117#[inline]
118pub fn fallback_digit_count<T: UnsignedInteger>(x: T, table: &[T]) -> usize {
119    // This value is always within 1: calculate if we need to round-up
120    // based on a pre-computed table.
121    let log10 = fast_log10(x);
122    let shift_up = table.get(log10).map_or(false, |&y| x >= y);
123
124    log10 + shift_up as usize + 1
125}
126
127/// Quickly calculate the number of digits in a type.
128pub trait DigitCount: UnsignedInteger {
129    /// Get the number of digits in a value.
130    fn digit_count(self) -> usize;
131}
132
133macro_rules! digit_count_unimpl {
134    ($($t:ty)*) => ($(
135        impl DigitCount for $t {
136            #[inline]
137            fn digit_count(self) -> usize {
138                unimplemented!()
139            }
140        }
141    )*)
142}
143
144digit_count_unimpl! { u8 u16 usize }
145
146impl DigitCount for u32 {
147    #[inline]
148    fn digit_count(self) -> usize {
149        fast_digit_count(self)
150    }
151}
152
153impl DigitCount for u64 {
154    #[inline]
155    fn digit_count(self) -> usize {
156        const TABLE: [u64; 19] = [
157            10,
158            100,
159            1000,
160            10000,
161            100000,
162            1000000,
163            10000000,
164            100000000,
165            1000000000,
166            10000000000,
167            100000000000,
168            1000000000000,
169            10000000000000,
170            100000000000000,
171            1000000000000000,
172            10000000000000000,
173            100000000000000000,
174            1000000000000000000,
175            10000000000000000000,
176        ];
177        fallback_digit_count(self, &TABLE)
178    }
179}
180
181impl DigitCount for u128 {
182    #[inline]
183    fn digit_count(self) -> usize {
184        const TABLE: [u128; 38] = [
185            10,
186            100,
187            1000,
188            10000,
189            100000,
190            1000000,
191            10000000,
192            100000000,
193            1000000000,
194            10000000000,
195            100000000000,
196            1000000000000,
197            10000000000000,
198            100000000000000,
199            1000000000000000,
200            10000000000000000,
201            100000000000000000,
202            1000000000000000000,
203            10000000000000000000,
204            100000000000000000000,
205            1000000000000000000000,
206            10000000000000000000000,
207            100000000000000000000000,
208            1000000000000000000000000,
209            10000000000000000000000000,
210            100000000000000000000000000,
211            1000000000000000000000000000,
212            10000000000000000000000000000,
213            100000000000000000000000000000,
214            1000000000000000000000000000000,
215            10000000000000000000000000000000,
216            100000000000000000000000000000000,
217            1000000000000000000000000000000000,
218            10000000000000000000000000000000000,
219            100000000000000000000000000000000000,
220            1000000000000000000000000000000000000,
221            10000000000000000000000000000000000000,
222            100000000000000000000000000000000000000,
223        ];
224        fallback_digit_count(self, &TABLE)
225    }
226}
227
228/// Write integer to decimal string.
229pub trait Decimal: DigitCount {
230    /// # Safety
231    ///
232    /// Safe as long as buffer is at least [`FORMATTED_SIZE`] elements long,
233    /// (or [`FORMATTED_SIZE_DECIMAL`] for decimal), and the radix is valid.
234    ///
235    /// [`FORMATTED_SIZE`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE
236    /// [`FORMATTED_SIZE_DECIMAL`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE_DECIMAL
237    unsafe fn decimal(self, buffer: &mut [u8]) -> usize;
238}
239
240// Don't implement decimal for small types, where we could have an overflow.
241macro_rules! decimal_unimpl {
242    ($($t:ty)*) => ($(
243        impl Decimal for $t {
244            #[inline(always)]
245            unsafe fn decimal(self, _: &mut [u8]) -> usize {
246                // Forces a hard error if we have a logic error in our code.
247                unimplemented!()
248            }
249        }
250    )*);
251}
252
253decimal_unimpl! { u8 u16 usize }
254
255// Implement decimal for type.
256macro_rules! decimal_impl {
257    ($($t:ty)*) => ($(
258        impl Decimal for $t {
259            #[inline(always)]
260            unsafe fn decimal(self, buffer: &mut [u8]) -> usize {
261                // SAFETY: safe as long as buffer is large enough to hold the max value.
262                let count = self.digit_count();
263                debug_assert!(count <= buffer.len());
264                unsafe {
265                    algorithm(self, 10, &DIGIT_TO_BASE10_SQUARED, &mut buffer[..count]);
266                    count
267                }
268            }
269        }
270    )*);
271}
272
273decimal_impl! { u32 u64 }
274
275impl Decimal for u128 {
276    #[inline(always)]
277    unsafe fn decimal(self, buffer: &mut [u8]) -> usize {
278        // SAFETY: safe as long as buffer is large enough to hold the max value.
279        let count = self.digit_count();
280        debug_assert!(count <= buffer.len());
281        unsafe {
282            algorithm_u128::<{ STANDARD }, { RADIX }, { RADIX_SHIFT }>(
283                self,
284                &DIGIT_TO_BASE10_SQUARED,
285                &mut buffer[..count],
286            );
287            count
288        }
289    }
290}