lexical_write_integer/decimal.rs
1//! Radix-generic, lexical integer-to-string conversion routines.
2//!
3//! An optimization for decimal is pre-computing the number of digits written
4//! prior to actually writing digits, avoiding the use of temporary buffers.
5//! This scales well with integer size, short of `u128`, due to the slower
6//! division algorithms required.
7//!
8//! See [`Algorithm.md`] for a more detailed description of the algorithm
9//! choice here.
10//!
11//! [`Algorithm.md`]: https://github.com/Alexhuszagh/rust-lexical/blob/main/lexical-write-integer/docs/Algorithm.md
12
13#![cfg(not(feature = "compact"))]
14#![doc(hidden)]
15
16use crate::algorithm::{algorithm, algorithm_u128};
17use crate::table::DIGIT_TO_BASE10_SQUARED;
18use lexical_util::format::{RADIX, RADIX_SHIFT, STANDARD};
19use lexical_util::num::UnsignedInteger;
20
21/// Fast integral log2.
22///
23/// This is fairly trivial to explain, since the log2 is related to the
24/// number of bits in the value. Therefore, it has to be related to
25/// `T::BITS - ctlz(x)`. For example, `log2(2) == 1`, and `log2(1) == 0`,
26/// and `log2(3) == 1`. Therefore, we must take the log of an odd number,
27/// and subtract one.
28///
29/// This algorithm is described in detail in "Computing the number of digits
30/// of an integer quickly", available
31/// [here](https://lemire.me/blog/2021/05/28/computing-the-number-of-digits-of-an-integer-quickly/).
32#[inline]
33pub fn fast_log2<T: UnsignedInteger>(x: T) -> usize {
34 T::BITS - 1 - (x | T::ONE).leading_zeros() as usize
35}
36
37/// Calculate the fast, integral log10 of a value.
38///
39/// This is relatively easy to explain as well: we calculate the log2
40/// of the value, then multiply by an integral constant for the log10(2).
41///
42/// Note that this value is frequently off by 1, so we need to round-up
43/// accordingly. This magic number is valid at least up until `1<<18`,
44/// which works for all values, since our max log2 is 127.
45#[inline]
46pub fn fast_log10<T: UnsignedInteger>(x: T) -> usize {
47 let log2 = fast_log2(x);
48 (log2 * 1233) >> 12
49}
50
51/// Fast algorithm to calculate the number of digits in an integer.
52///
53/// We only use this for 32-bit or smaller values: for larger numbers,
54/// we first write digits until we get to 32-bits, then we call this.
55///
56/// The values are as follows:
57///
58/// - `2^32 for j = 1`
59/// - `⌈log10(2^j)⌉ * 2^128 + 2^128 – 10^(⌈log10(2j)⌉) for j from 2 to 30`
60/// - `⌈log10(2^j)⌉ for j = 31 and j = 32`
61///
62/// This algorithm is described in detail in "Computing the number of digits
63/// of an integer even faster", available
64/// [here](https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/).
65#[inline]
66pub fn fast_digit_count(x: u32) -> usize {
67 const TABLE: [u64; 32] = [
68 4294967296,
69 8589934582,
70 8589934582,
71 8589934582,
72 12884901788,
73 12884901788,
74 12884901788,
75 17179868184,
76 17179868184,
77 17179868184,
78 21474826480,
79 21474826480,
80 21474826480,
81 21474826480,
82 25769703776,
83 25769703776,
84 25769703776,
85 30063771072,
86 30063771072,
87 30063771072,
88 34349738368,
89 34349738368,
90 34349738368,
91 34349738368,
92 38554705664,
93 38554705664,
94 38554705664,
95 41949672960,
96 41949672960,
97 41949672960,
98 42949672960,
99 42949672960,
100 ];
101 // SAFETY: always safe, since fast_log2 will always return a value
102 // <= 32. This is because the range of values from `ctlz(x | 1)` is
103 // `[0, 31]`, so `32 - 1 - ctlz(x | 1)` must be in the range `[0, 31]`.
104 let shift = unsafe { index_unchecked!(TABLE[fast_log2(x)]) };
105 let count = (x as u64 + shift) >> 32;
106 count as usize
107}
108
109/// Slightly slower algorithm to calculate the number of digits in an integer.
110///
111/// This uses no static storage, and uses a fast log10(2) estimation
112/// to calculate the number of digits, from the log2 value.
113///
114/// This algorithm is described in detail in "Computing the number of digits
115/// of an integer even faster", available
116/// [here](https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/).
117#[inline]
118pub fn fallback_digit_count<T: UnsignedInteger>(x: T, table: &[T]) -> usize {
119 // This value is always within 1: calculate if we need to round-up
120 // based on a pre-computed table.
121 let log10 = fast_log10(x);
122 let shift_up = table.get(log10).map_or(false, |&y| x >= y);
123
124 log10 + shift_up as usize + 1
125}
126
127/// Quickly calculate the number of digits in a type.
128pub trait DigitCount: UnsignedInteger {
129 /// Get the number of digits in a value.
130 fn digit_count(self) -> usize;
131}
132
133macro_rules! digit_count_unimpl {
134 ($($t:ty)*) => ($(
135 impl DigitCount for $t {
136 #[inline]
137 fn digit_count(self) -> usize {
138 unimplemented!()
139 }
140 }
141 )*)
142}
143
144digit_count_unimpl! { u8 u16 usize }
145
146impl DigitCount for u32 {
147 #[inline]
148 fn digit_count(self) -> usize {
149 fast_digit_count(self)
150 }
151}
152
153impl DigitCount for u64 {
154 #[inline]
155 fn digit_count(self) -> usize {
156 const TABLE: [u64; 19] = [
157 10,
158 100,
159 1000,
160 10000,
161 100000,
162 1000000,
163 10000000,
164 100000000,
165 1000000000,
166 10000000000,
167 100000000000,
168 1000000000000,
169 10000000000000,
170 100000000000000,
171 1000000000000000,
172 10000000000000000,
173 100000000000000000,
174 1000000000000000000,
175 10000000000000000000,
176 ];
177 fallback_digit_count(self, &TABLE)
178 }
179}
180
181impl DigitCount for u128 {
182 #[inline]
183 fn digit_count(self) -> usize {
184 const TABLE: [u128; 38] = [
185 10,
186 100,
187 1000,
188 10000,
189 100000,
190 1000000,
191 10000000,
192 100000000,
193 1000000000,
194 10000000000,
195 100000000000,
196 1000000000000,
197 10000000000000,
198 100000000000000,
199 1000000000000000,
200 10000000000000000,
201 100000000000000000,
202 1000000000000000000,
203 10000000000000000000,
204 100000000000000000000,
205 1000000000000000000000,
206 10000000000000000000000,
207 100000000000000000000000,
208 1000000000000000000000000,
209 10000000000000000000000000,
210 100000000000000000000000000,
211 1000000000000000000000000000,
212 10000000000000000000000000000,
213 100000000000000000000000000000,
214 1000000000000000000000000000000,
215 10000000000000000000000000000000,
216 100000000000000000000000000000000,
217 1000000000000000000000000000000000,
218 10000000000000000000000000000000000,
219 100000000000000000000000000000000000,
220 1000000000000000000000000000000000000,
221 10000000000000000000000000000000000000,
222 100000000000000000000000000000000000000,
223 ];
224 fallback_digit_count(self, &TABLE)
225 }
226}
227
228/// Write integer to decimal string.
229pub trait Decimal: DigitCount {
230 /// # Safety
231 ///
232 /// Safe as long as buffer is at least [`FORMATTED_SIZE`] elements long,
233 /// (or [`FORMATTED_SIZE_DECIMAL`] for decimal), and the radix is valid.
234 ///
235 /// [`FORMATTED_SIZE`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE
236 /// [`FORMATTED_SIZE_DECIMAL`]: lexical_util::constants::FormattedSize::FORMATTED_SIZE_DECIMAL
237 unsafe fn decimal(self, buffer: &mut [u8]) -> usize;
238}
239
240// Don't implement decimal for small types, where we could have an overflow.
241macro_rules! decimal_unimpl {
242 ($($t:ty)*) => ($(
243 impl Decimal for $t {
244 #[inline(always)]
245 unsafe fn decimal(self, _: &mut [u8]) -> usize {
246 // Forces a hard error if we have a logic error in our code.
247 unimplemented!()
248 }
249 }
250 )*);
251}
252
253decimal_unimpl! { u8 u16 usize }
254
255// Implement decimal for type.
256macro_rules! decimal_impl {
257 ($($t:ty)*) => ($(
258 impl Decimal for $t {
259 #[inline(always)]
260 unsafe fn decimal(self, buffer: &mut [u8]) -> usize {
261 // SAFETY: safe as long as buffer is large enough to hold the max value.
262 let count = self.digit_count();
263 debug_assert!(count <= buffer.len());
264 unsafe {
265 algorithm(self, 10, &DIGIT_TO_BASE10_SQUARED, &mut buffer[..count]);
266 count
267 }
268 }
269 }
270 )*);
271}
272
273decimal_impl! { u32 u64 }
274
275impl Decimal for u128 {
276 #[inline(always)]
277 unsafe fn decimal(self, buffer: &mut [u8]) -> usize {
278 // SAFETY: safe as long as buffer is large enough to hold the max value.
279 let count = self.digit_count();
280 debug_assert!(count <= buffer.len());
281 unsafe {
282 algorithm_u128::<{ STANDARD }, { RADIX }, { RADIX_SHIFT }>(
283 self,
284 &DIGIT_TO_BASE10_SQUARED,
285 &mut buffer[..count],
286 );
287 count
288 }
289 }
290}