lexical_parse_integer/
algorithm.rs

1//! Radix-generic, optimized, string-to-integer conversion routines.
2//!
3//! These routines are highly optimized: they use various optimizations
4//! to read multiple digits at-a-time with less multiplication instructions,
5//! as well as other optimizations to avoid unnecessary compile-time branching.
6//!
7//! See [Algorithm.md](/docs/Algorithm.md) for a more detailed description of
8//! the algorithm choice here. See [Benchmarks.md](/docs/Benchmarks.md) for
9//! recent benchmark data.
10
11#![cfg(not(feature = "compact"))]
12#![doc(hidden)]
13
14use crate::shared::is_overflow;
15use lexical_util::digit::char_to_digit_const;
16use lexical_util::format::NumberFormat;
17use lexical_util::iterator::{AsBytes, BytesIter};
18use lexical_util::num::{as_cast, Integer, UnsignedInteger};
19use lexical_util::result::Result;
20use lexical_util::step::min_step;
21
22// ALGORITHM
23
24/// Check if we can try to parse 8 digits.
25macro_rules! can_try_parse_multidigits {
26    ($iter:expr, $radix:expr) => {
27        $iter.is_contiguous() && (cfg!(not(feature = "power-of-two")) || $radix <= 10)
28    };
29}
30
31/// Parse 8-digits at a time.
32///
33/// See the algorithm description in `parse_8digits`.
34/// This reduces the number of required multiplications
35/// from 8 to 4.
36#[rustfmt::skip]
37macro_rules! parse_8digits {
38    (
39        $value:ident,
40        $iter:ident,
41        $format:ident,
42        $t:ident
43    ) => {{
44        let radix: $t = as_cast(NumberFormat::<{ $format }>::MANTISSA_RADIX);
45        let radix2: $t = radix.wrapping_mul(radix);
46        let radix4: $t = radix2.wrapping_mul(radix2);
47        let radix8: $t = radix4.wrapping_mul(radix4);
48
49        // Try our fast, 8-digit at a time optimizations.
50        while let Some(val8) = try_parse_8digits::<$t, _, $format>(&mut $iter) {
51            $value = $value.wrapping_mul(radix8);
52            $value = $value.wrapping_add(val8);
53        }
54    }};
55}
56
57/// Parse 4-digits at a time.
58///
59/// See the algorithm description in `parse_4digits`.
60/// This reduces the number of required multiplications
61/// from 4 to 3.
62#[rustfmt::skip]
63macro_rules! parse_4digits {
64    (
65        $value:ident,
66        $iter:ident,
67        $format:ident,
68        $t:ident
69    ) => {{
70        let radix: $t = as_cast(NumberFormat::<{ $format }>::MANTISSA_RADIX);
71        let radix2: $t = radix.wrapping_mul(radix);
72        let radix4: $t = radix2.wrapping_mul(radix2);
73
74        // Try our fast, 4-digit at a time optimizations.
75        while let Some(val4) = try_parse_4digits::<$t, _, $format>(&mut $iter) {
76            $value = $value.wrapping_mul(radix4);
77            $value = $value.wrapping_add(val4);
78        }
79    }};
80}
81
82/// Parse digits for a positive or negative value.
83/// Optimized for operations with machine integers.
84#[rustfmt::skip]
85macro_rules! parse_digits {
86    (
87        $value:ident,
88        $iter:ident,
89        $format:ident,
90        $is_negative:ident,
91        $start_index:ident,
92        $t:ident,
93        $u:ident,
94        $invalid_digit:ident
95    ) => {{
96        //  WARNING:
97        //      Performance is heavily dependent on the amount of branching.
98        //      We therefore optimize for worst cases only to a certain extent:
99        //      that is, since most integers aren't randomly distributed, but
100        //      are more likely to be smaller values, we need to avoid overbranching
101        //      to ensure small digit parsing isn't impacted too much. We therefore
102        //      only enable 4-digit **or** 8-digit optimizations, but not both.
103        //      If not, the two branch passes kill performance for small 64-bit
104        //      and 128-bit values.
105        //
106        //      However, for signed integers, the increased amount of branching
107        //      makes these multi-digit optimizations not worthwhile. For large
108        //      64-bit, signed integers, the performance benefit is ~23% faster.
109        //      However, the performance penalty for smaller, more common integers
110        //      is ~50%. Therefore, these optimizations are not worth the penalty.
111        //
112        //      For unsigned and 128-bit signed integers, the performance penalties
113        //      are minimal and the performance gains are substantial, so re-enable
114        //      the optimizations.
115        //
116        //  DO NOT MAKE CHANGES without monitoring the resulting benchmarks,
117        //  or performance could greatly be impacted.
118        let radix = NumberFormat::<{ $format }>::MANTISSA_RADIX;
119
120        // Optimizations for reading 8-digits at a time.
121        // Makes no sense to do 8 digits at a time for 32-bit values,
122        // since it can only hold 8 digits for base 10.
123        if <$t>::BITS == 128 && can_try_parse_multidigits!($iter, radix) {
124            parse_8digits!($value, $iter, $format, $u);
125        }
126        if <$t>::BITS == 64 && can_try_parse_multidigits!($iter, radix) && !<$t>::IS_SIGNED {
127            parse_8digits!($value, $iter, $format, $u);
128        }
129
130        // Optimizations for reading 4-digits at a time.
131        // 36^4 is larger than a 16-bit integer. Likewise, 10^4 is almost
132        // the limit of u16, so it's not worth it.
133        if <$t>::BITS == 32 && can_try_parse_multidigits!($iter, radix) && !<$t>::IS_SIGNED {
134            parse_4digits!($value, $iter, $format, $u);
135        }
136
137        parse_1digit!($value, $iter, $format, $is_negative, $start_index, $t, $u, $invalid_digit)
138    }};
139}
140
141/// Algorithm for the complete parser.
142#[inline]
143pub fn algorithm_complete<T, Unsigned, const FORMAT: u128>(bytes: &[u8]) -> Result<T>
144where
145    T: Integer,
146    Unsigned: UnsignedInteger,
147{
148    algorithm!(bytes, FORMAT, T, Unsigned, parse_digits, invalid_digit_complete, into_ok_complete)
149}
150
151/// Algorithm for the partial parser.
152#[inline]
153pub fn algorithm_partial<T, Unsigned, const FORMAT: u128>(bytes: &[u8]) -> Result<(T, usize)>
154where
155    T: Integer,
156    Unsigned: UnsignedInteger,
157{
158    algorithm!(bytes, FORMAT, T, Unsigned, parse_digits, invalid_digit_partial, into_ok_partial)
159}
160
161// DIGIT OPTIMIZATIONS
162
163/// Determine if 4 bytes, read raw from bytes, are 4 digits for the radix.
164#[inline]
165pub fn is_4digits<const FORMAT: u128>(v: u32) -> bool {
166    let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
167    debug_assert!(radix <= 10);
168
169    // We want to have a wrapping add and sub such that only values from the
170    // range `[0x30, 0x39]` (or narrower for custom radixes) will not
171    // overflow into the high bit. This means that the value needs to overflow
172    // into into `0x80` if the digit is 1 above, or `0x46` for the value `0x39`.
173    // Likewise, we only valid for `[0x30, 0x38]` for radix 8, so we need
174    // `0x47`.
175    let add = 0x46 + 10 - radix;
176    let add = add + (add << 8) + (add << 16) + (add << 24);
177    // This aims to underflow if anything is below the min digit: if we have any
178    // values under `0x30`, then this underflows and wraps into the high bit.
179    let sub = 0x3030_3030;
180    let a = v.wrapping_add(add);
181    let b = v.wrapping_sub(sub);
182
183    (a | b) & 0x8080_8080 == 0
184}
185
186/// Parse 4 bytes read from bytes into 4 digits.
187#[inline]
188pub fn parse_4digits<const FORMAT: u128>(mut v: u32) -> u32 {
189    let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
190    debug_assert!(radix <= 10);
191
192    // Normalize our digits to the range `[0, 9]`.
193    v -= 0x3030_3030;
194    // Scale digits in 0 <= Nn <= 99.
195    v = (v * radix) + (v >> 8);
196    // Scale digits in 0 <= Nnnn <= 9999.
197    v = ((v & 0x0000007f) * radix * radix) + ((v >> 16) & 0x0000007f);
198
199    v
200}
201
202/// Use a fast-path optimization, where we attempt to parse 4 digits at a time.
203/// This reduces the number of multiplications necessary to 2, instead of 4.
204///
205/// This approach is described in full here:
206/// <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>
207#[inline]
208pub fn try_parse_4digits<'a, T, Iter, const FORMAT: u128>(iter: &mut Iter) -> Option<T>
209where
210    T: Integer,
211    Iter: BytesIter<'a>,
212{
213    // Can't do fast optimizations with radixes larger than 10, since
214    // we no longer have a contiguous ASCII block. Likewise, cannot
215    // use non-contiguous iterators.
216    debug_assert!(NumberFormat::<{ FORMAT }>::MANTISSA_RADIX <= 10);
217    debug_assert!(Iter::IS_CONTIGUOUS);
218
219    // Read our digits, validate the input, and check from there.
220    let bytes = u32::from_le(iter.read::<u32>()?);
221    if is_4digits::<FORMAT>(bytes) {
222        // SAFETY: safe since we have at least 4 bytes in the buffer.
223        unsafe { iter.step_by_unchecked(4) };
224        Some(T::as_cast(parse_4digits::<FORMAT>(bytes)))
225    } else {
226        None
227    }
228}
229
230/// Determine if 8 bytes, read raw from bytes, are 8 digits for the radix.
231/// See `is_4digits` for the algorithm description.
232#[inline]
233pub fn is_8digits<const FORMAT: u128>(v: u64) -> bool {
234    let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
235    debug_assert!(radix <= 10);
236
237    let add = 0x46 + 10 - radix;
238    let add = add + (add << 8) + (add << 16) + (add << 24);
239    let add = (add as u64) | ((add as u64) << 32);
240    // This aims to underflow if anything is below the min digit: if we have any
241    // values under `0x30`, then this underflows and wraps into the high bit.
242    let sub = 0x3030_3030_3030_3030;
243    let a = v.wrapping_add(add);
244    let b = v.wrapping_sub(sub);
245
246    (a | b) & 0x8080_8080_8080_8080 == 0
247}
248
249/// Parse 8 bytes read from bytes into 8 digits.
250/// Credit for this goes to @aqrit, which further optimizes the
251/// optimization described by Johnny Lee above.
252#[inline]
253pub fn parse_8digits<const FORMAT: u128>(mut v: u64) -> u64 {
254    let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX as u64;
255    debug_assert!(radix <= 10);
256
257    // Create our masks. Assume the optimizer will do this at compile time.
258    // It seems like an optimizing compiler **will** do this, so we
259    // should be safe.
260    let radix2 = radix * radix;
261    let radix4 = radix2 * radix2;
262    let radix6 = radix2 * radix4;
263    let mask = 0x0000_00FF_0000_00FFu64;
264    let mul1 = radix2 + (radix6 << 32);
265    let mul2 = 1 + (radix4 << 32);
266
267    // Normalize our digits to the base.
268    v -= 0x3030_3030_3030_3030;
269    // Scale digits in 0 <= Nn <= 99.
270    v = (v * radix) + (v >> 8);
271    let v1 = (v & mask).wrapping_mul(mul1);
272    let v2 = ((v >> 16) & mask).wrapping_mul(mul2);
273
274    ((v1.wrapping_add(v2) >> 32) as u32) as u64
275}
276
277/// Use a fast-path optimization, where we attempt to parse 8 digits at a time.
278/// This reduces the number of multiplications necessary to 3, instead of 8.
279#[inline]
280pub fn try_parse_8digits<'a, T, Iter, const FORMAT: u128>(iter: &mut Iter) -> Option<T>
281where
282    T: Integer,
283    Iter: BytesIter<'a>,
284{
285    // Can't do fast optimizations with radixes larger than 10, since
286    // we no longer have a contiguous ASCII block. Likewise, cannot
287    // use non-contiguous iterators.
288    debug_assert!(NumberFormat::<{ FORMAT }>::MANTISSA_RADIX <= 10);
289    debug_assert!(Iter::IS_CONTIGUOUS);
290
291    // Read our digits, validate the input, and check from there.
292    let bytes = u64::from_le(iter.read::<u64>()?);
293    if is_8digits::<FORMAT>(bytes) {
294        // SAFETY: safe since we have at least 8 bytes in the buffer.
295        unsafe { iter.step_by_unchecked(8) };
296        Some(T::as_cast(parse_8digits::<FORMAT>(bytes)))
297    } else {
298        None
299    }
300}