lexical_parse_integer/algorithm.rs
1//! Radix-generic, optimized, string-to-integer conversion routines.
2//!
3//! These routines are highly optimized: they use various optimizations
4//! to read multiple digits at-a-time with less multiplication instructions,
5//! as well as other optimizations to avoid unnecessary compile-time branching.
6//!
7//! See [Algorithm.md](/docs/Algorithm.md) for a more detailed description of
8//! the algorithm choice here. See [Benchmarks.md](/docs/Benchmarks.md) for
9//! recent benchmark data.
10
11#![cfg(not(feature = "compact"))]
12#![doc(hidden)]
13
14use crate::shared::is_overflow;
15use lexical_util::digit::char_to_digit_const;
16use lexical_util::format::NumberFormat;
17use lexical_util::iterator::{AsBytes, BytesIter};
18use lexical_util::num::{as_cast, Integer, UnsignedInteger};
19use lexical_util::result::Result;
20use lexical_util::step::min_step;
21
22// ALGORITHM
23
24/// Check if we can try to parse 8 digits.
25macro_rules! can_try_parse_multidigits {
26 ($iter:expr, $radix:expr) => {
27 $iter.is_contiguous() && (cfg!(not(feature = "power-of-two")) || $radix <= 10)
28 };
29}
30
31/// Parse 8-digits at a time.
32///
33/// See the algorithm description in `parse_8digits`.
34/// This reduces the number of required multiplications
35/// from 8 to 4.
36#[rustfmt::skip]
37macro_rules! parse_8digits {
38 (
39 $value:ident,
40 $iter:ident,
41 $format:ident,
42 $t:ident
43 ) => {{
44 let radix: $t = as_cast(NumberFormat::<{ $format }>::MANTISSA_RADIX);
45 let radix2: $t = radix.wrapping_mul(radix);
46 let radix4: $t = radix2.wrapping_mul(radix2);
47 let radix8: $t = radix4.wrapping_mul(radix4);
48
49 // Try our fast, 8-digit at a time optimizations.
50 while let Some(val8) = try_parse_8digits::<$t, _, $format>(&mut $iter) {
51 $value = $value.wrapping_mul(radix8);
52 $value = $value.wrapping_add(val8);
53 }
54 }};
55}
56
57/// Parse 4-digits at a time.
58///
59/// See the algorithm description in `parse_4digits`.
60/// This reduces the number of required multiplications
61/// from 4 to 3.
62#[rustfmt::skip]
63macro_rules! parse_4digits {
64 (
65 $value:ident,
66 $iter:ident,
67 $format:ident,
68 $t:ident
69 ) => {{
70 let radix: $t = as_cast(NumberFormat::<{ $format }>::MANTISSA_RADIX);
71 let radix2: $t = radix.wrapping_mul(radix);
72 let radix4: $t = radix2.wrapping_mul(radix2);
73
74 // Try our fast, 4-digit at a time optimizations.
75 while let Some(val4) = try_parse_4digits::<$t, _, $format>(&mut $iter) {
76 $value = $value.wrapping_mul(radix4);
77 $value = $value.wrapping_add(val4);
78 }
79 }};
80}
81
82/// Parse digits for a positive or negative value.
83/// Optimized for operations with machine integers.
84#[rustfmt::skip]
85macro_rules! parse_digits {
86 (
87 $value:ident,
88 $iter:ident,
89 $format:ident,
90 $is_negative:ident,
91 $start_index:ident,
92 $t:ident,
93 $u:ident,
94 $invalid_digit:ident
95 ) => {{
96 // WARNING:
97 // Performance is heavily dependent on the amount of branching.
98 // We therefore optimize for worst cases only to a certain extent:
99 // that is, since most integers aren't randomly distributed, but
100 // are more likely to be smaller values, we need to avoid overbranching
101 // to ensure small digit parsing isn't impacted too much. We therefore
102 // only enable 4-digit **or** 8-digit optimizations, but not both.
103 // If not, the two branch passes kill performance for small 64-bit
104 // and 128-bit values.
105 //
106 // However, for signed integers, the increased amount of branching
107 // makes these multi-digit optimizations not worthwhile. For large
108 // 64-bit, signed integers, the performance benefit is ~23% faster.
109 // However, the performance penalty for smaller, more common integers
110 // is ~50%. Therefore, these optimizations are not worth the penalty.
111 //
112 // For unsigned and 128-bit signed integers, the performance penalties
113 // are minimal and the performance gains are substantial, so re-enable
114 // the optimizations.
115 //
116 // DO NOT MAKE CHANGES without monitoring the resulting benchmarks,
117 // or performance could greatly be impacted.
118 let radix = NumberFormat::<{ $format }>::MANTISSA_RADIX;
119
120 // Optimizations for reading 8-digits at a time.
121 // Makes no sense to do 8 digits at a time for 32-bit values,
122 // since it can only hold 8 digits for base 10.
123 if <$t>::BITS == 128 && can_try_parse_multidigits!($iter, radix) {
124 parse_8digits!($value, $iter, $format, $u);
125 }
126 if <$t>::BITS == 64 && can_try_parse_multidigits!($iter, radix) && !<$t>::IS_SIGNED {
127 parse_8digits!($value, $iter, $format, $u);
128 }
129
130 // Optimizations for reading 4-digits at a time.
131 // 36^4 is larger than a 16-bit integer. Likewise, 10^4 is almost
132 // the limit of u16, so it's not worth it.
133 if <$t>::BITS == 32 && can_try_parse_multidigits!($iter, radix) && !<$t>::IS_SIGNED {
134 parse_4digits!($value, $iter, $format, $u);
135 }
136
137 parse_1digit!($value, $iter, $format, $is_negative, $start_index, $t, $u, $invalid_digit)
138 }};
139}
140
141/// Algorithm for the complete parser.
142#[inline]
143pub fn algorithm_complete<T, Unsigned, const FORMAT: u128>(bytes: &[u8]) -> Result<T>
144where
145 T: Integer,
146 Unsigned: UnsignedInteger,
147{
148 algorithm!(bytes, FORMAT, T, Unsigned, parse_digits, invalid_digit_complete, into_ok_complete)
149}
150
151/// Algorithm for the partial parser.
152#[inline]
153pub fn algorithm_partial<T, Unsigned, const FORMAT: u128>(bytes: &[u8]) -> Result<(T, usize)>
154where
155 T: Integer,
156 Unsigned: UnsignedInteger,
157{
158 algorithm!(bytes, FORMAT, T, Unsigned, parse_digits, invalid_digit_partial, into_ok_partial)
159}
160
161// DIGIT OPTIMIZATIONS
162
163/// Determine if 4 bytes, read raw from bytes, are 4 digits for the radix.
164#[inline]
165pub fn is_4digits<const FORMAT: u128>(v: u32) -> bool {
166 let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
167 debug_assert!(radix <= 10);
168
169 // We want to have a wrapping add and sub such that only values from the
170 // range `[0x30, 0x39]` (or narrower for custom radixes) will not
171 // overflow into the high bit. This means that the value needs to overflow
172 // into into `0x80` if the digit is 1 above, or `0x46` for the value `0x39`.
173 // Likewise, we only valid for `[0x30, 0x38]` for radix 8, so we need
174 // `0x47`.
175 let add = 0x46 + 10 - radix;
176 let add = add + (add << 8) + (add << 16) + (add << 24);
177 // This aims to underflow if anything is below the min digit: if we have any
178 // values under `0x30`, then this underflows and wraps into the high bit.
179 let sub = 0x3030_3030;
180 let a = v.wrapping_add(add);
181 let b = v.wrapping_sub(sub);
182
183 (a | b) & 0x8080_8080 == 0
184}
185
186/// Parse 4 bytes read from bytes into 4 digits.
187#[inline]
188pub fn parse_4digits<const FORMAT: u128>(mut v: u32) -> u32 {
189 let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
190 debug_assert!(radix <= 10);
191
192 // Normalize our digits to the range `[0, 9]`.
193 v -= 0x3030_3030;
194 // Scale digits in 0 <= Nn <= 99.
195 v = (v * radix) + (v >> 8);
196 // Scale digits in 0 <= Nnnn <= 9999.
197 v = ((v & 0x0000007f) * radix * radix) + ((v >> 16) & 0x0000007f);
198
199 v
200}
201
202/// Use a fast-path optimization, where we attempt to parse 4 digits at a time.
203/// This reduces the number of multiplications necessary to 2, instead of 4.
204///
205/// This approach is described in full here:
206/// <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>
207#[inline]
208pub fn try_parse_4digits<'a, T, Iter, const FORMAT: u128>(iter: &mut Iter) -> Option<T>
209where
210 T: Integer,
211 Iter: BytesIter<'a>,
212{
213 // Can't do fast optimizations with radixes larger than 10, since
214 // we no longer have a contiguous ASCII block. Likewise, cannot
215 // use non-contiguous iterators.
216 debug_assert!(NumberFormat::<{ FORMAT }>::MANTISSA_RADIX <= 10);
217 debug_assert!(Iter::IS_CONTIGUOUS);
218
219 // Read our digits, validate the input, and check from there.
220 let bytes = u32::from_le(iter.read::<u32>()?);
221 if is_4digits::<FORMAT>(bytes) {
222 // SAFETY: safe since we have at least 4 bytes in the buffer.
223 unsafe { iter.step_by_unchecked(4) };
224 Some(T::as_cast(parse_4digits::<FORMAT>(bytes)))
225 } else {
226 None
227 }
228}
229
230/// Determine if 8 bytes, read raw from bytes, are 8 digits for the radix.
231/// See `is_4digits` for the algorithm description.
232#[inline]
233pub fn is_8digits<const FORMAT: u128>(v: u64) -> bool {
234 let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX;
235 debug_assert!(radix <= 10);
236
237 let add = 0x46 + 10 - radix;
238 let add = add + (add << 8) + (add << 16) + (add << 24);
239 let add = (add as u64) | ((add as u64) << 32);
240 // This aims to underflow if anything is below the min digit: if we have any
241 // values under `0x30`, then this underflows and wraps into the high bit.
242 let sub = 0x3030_3030_3030_3030;
243 let a = v.wrapping_add(add);
244 let b = v.wrapping_sub(sub);
245
246 (a | b) & 0x8080_8080_8080_8080 == 0
247}
248
249/// Parse 8 bytes read from bytes into 8 digits.
250/// Credit for this goes to @aqrit, which further optimizes the
251/// optimization described by Johnny Lee above.
252#[inline]
253pub fn parse_8digits<const FORMAT: u128>(mut v: u64) -> u64 {
254 let radix = NumberFormat::<{ FORMAT }>::MANTISSA_RADIX as u64;
255 debug_assert!(radix <= 10);
256
257 // Create our masks. Assume the optimizer will do this at compile time.
258 // It seems like an optimizing compiler **will** do this, so we
259 // should be safe.
260 let radix2 = radix * radix;
261 let radix4 = radix2 * radix2;
262 let radix6 = radix2 * radix4;
263 let mask = 0x0000_00FF_0000_00FFu64;
264 let mul1 = radix2 + (radix6 << 32);
265 let mul2 = 1 + (radix4 << 32);
266
267 // Normalize our digits to the base.
268 v -= 0x3030_3030_3030_3030;
269 // Scale digits in 0 <= Nn <= 99.
270 v = (v * radix) + (v >> 8);
271 let v1 = (v & mask).wrapping_mul(mul1);
272 let v2 = ((v >> 16) & mask).wrapping_mul(mul2);
273
274 ((v1.wrapping_add(v2) >> 32) as u32) as u64
275}
276
277/// Use a fast-path optimization, where we attempt to parse 8 digits at a time.
278/// This reduces the number of multiplications necessary to 3, instead of 8.
279#[inline]
280pub fn try_parse_8digits<'a, T, Iter, const FORMAT: u128>(iter: &mut Iter) -> Option<T>
281where
282 T: Integer,
283 Iter: BytesIter<'a>,
284{
285 // Can't do fast optimizations with radixes larger than 10, since
286 // we no longer have a contiguous ASCII block. Likewise, cannot
287 // use non-contiguous iterators.
288 debug_assert!(NumberFormat::<{ FORMAT }>::MANTISSA_RADIX <= 10);
289 debug_assert!(Iter::IS_CONTIGUOUS);
290
291 // Read our digits, validate the input, and check from there.
292 let bytes = u64::from_le(iter.read::<u64>()?);
293 if is_8digits::<FORMAT>(bytes) {
294 // SAFETY: safe since we have at least 8 bytes in the buffer.
295 unsafe { iter.step_by_unchecked(8) };
296 Some(T::as_cast(parse_8digits::<FORMAT>(bytes)))
297 } else {
298 None
299 }
300}