lexical_parse_float/
parse.rs

1//! Shared trait and methods for parsing floats.
2//!
3//! This is adapted from [fast-float-rust](https://github.com/aldanor/fast-float-rust),
4//! a port of [fast_float](https://github.com/fastfloat/fast_float) to Rust.
5
6#![doc(hidden)]
7
8#[cfg(any(feature = "compact", feature = "radix"))]
9use crate::bellerophon::bellerophon;
10#[cfg(feature = "power-of-two")]
11use crate::binary::{binary, slow_binary};
12use crate::float::{extended_to_float, ExtendedFloat80, LemireFloat};
13#[cfg(not(feature = "compact"))]
14use crate::lemire::lemire;
15use crate::number::Number;
16use crate::options::Options;
17use crate::shared;
18use crate::slow::slow_radix;
19#[cfg(not(feature = "compact"))]
20use lexical_parse_integer::algorithm;
21#[cfg(feature = "f16")]
22use lexical_util::bf16::bf16;
23use lexical_util::digit::{char_to_digit_const, char_to_valid_digit_const};
24use lexical_util::error::Error;
25#[cfg(feature = "f16")]
26use lexical_util::f16::f16;
27use lexical_util::format::NumberFormat;
28use lexical_util::iterator::{AsBytes, Bytes, BytesIter};
29use lexical_util::result::Result;
30use lexical_util::step::u64_step;
31
32// API
33// ---
34
35/// Check f radix is a power-of-2.
36#[cfg(feature = "power-of-two")]
37macro_rules! is_power_two {
38    ($radix:expr) => {
39        matches!($radix, 2 | 4 | 8 | 16 | 32)
40    };
41}
42
43/// Check if the radix is valid and error otherwise
44macro_rules! check_radix {
45    ($format:ident) => {{
46        let format = NumberFormat::<{ $format }> {};
47        #[cfg(feature = "power-of-two")]
48        {
49            if format.radix() != format.exponent_base() {
50                let valid_radix = matches!(
51                    (format.radix(), format.exponent_base()),
52                    (4, 2) | (8, 2) | (16, 2) | (32, 2) | (16, 4)
53                );
54                if !valid_radix {
55                    return Err(Error::InvalidRadix);
56                }
57            }
58        }
59
60        #[cfg(not(feature = "power-of-two"))]
61        {
62            if format.radix() != format.exponent_base() {
63                return Err(Error::InvalidRadix);
64            }
65        }
66    }};
67}
68
69/// Parse integer trait, implemented in terms of the optimized back-end.
70pub trait ParseFloat: LemireFloat {
71    /// Forward complete parser parameters to the backend.
72    #[cfg_attr(not(feature = "compact"), inline(always))]
73    fn parse_complete<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<Self> {
74        check_radix!(FORMAT);
75        parse_complete::<Self, FORMAT>(bytes, options)
76    }
77
78    /// Forward partial parser parameters to the backend.
79    #[cfg_attr(not(feature = "compact"), inline(always))]
80    fn parse_partial<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<(Self, usize)> {
81        check_radix!(FORMAT);
82        parse_partial::<Self, FORMAT>(bytes, options)
83    }
84
85    /// Forward complete parser parameters to the backend, using only the fast path.
86    #[cfg_attr(not(feature = "compact"), inline(always))]
87    fn fast_path_complete<const FORMAT: u128>(bytes: &[u8], options: &Options) -> Result<Self> {
88        check_radix!(FORMAT);
89        fast_path_complete::<Self, FORMAT>(bytes, options)
90    }
91
92    /// Forward partial parser parameters to the backend, using only the fast path.
93    #[cfg_attr(not(feature = "compact"), inline(always))]
94    fn fast_path_partial<const FORMAT: u128>(
95        bytes: &[u8],
96        options: &Options,
97    ) -> Result<(Self, usize)> {
98        check_radix!(FORMAT);
99        fast_path_partial::<Self, FORMAT>(bytes, options)
100    }
101}
102
103macro_rules! parse_float_impl {
104    ($($t:ty)*) => ($(
105        impl ParseFloat for $t {}
106    )*)
107}
108
109parse_float_impl! { f32 f64 }
110
111#[cfg(feature = "f16")]
112macro_rules! parse_float_as_f32 {
113    ($($t:ty)*) => ($(
114        impl ParseFloat for $t {
115            #[cfg_attr(not(feature = "compact"), inline(always))]
116            fn parse_complete<const FORMAT: u128>(bytes: &[u8], options: &Options)
117                -> Result<Self>
118            {
119                Ok(Self::from_f32(parse_complete::<f32, FORMAT>(bytes, options)?))
120            }
121
122            #[cfg_attr(not(feature = "compact"), inline(always))]
123            fn parse_partial<const FORMAT: u128>(bytes: &[u8], options: &Options)
124                -> Result<(Self, usize)>
125            {
126                let (float, count) = parse_partial::<f32, FORMAT>(bytes, options)?;
127                Ok((Self::from_f32(float), count))
128            }
129
130            #[cfg_attr(not(feature = "compact"), inline(always))]
131            fn fast_path_complete<const FORMAT: u128>(bytes: &[u8], options: &Options)
132                -> Result<Self>
133            {
134                Ok(Self::from_f32(fast_path_complete::<f32, FORMAT>(bytes, options)?))
135            }
136
137            #[cfg_attr(not(feature = "compact"), inline(always))]
138            fn fast_path_partial<const FORMAT: u128>(bytes: &[u8], options: &Options)
139                -> Result<(Self, usize)>
140            {
141                let (float, count) = fast_path_partial::<f32, FORMAT>(bytes, options)?;
142                Ok((Self::from_f32(float), count))
143            }
144        }
145    )*)
146}
147
148#[cfg(feature = "f16")]
149parse_float_as_f32! { bf16 f16 }
150
151// PARSE
152// -----
153
154// NOTE:
155//  The partial and complete parsers are done separately because it provides
156//  minor optimizations when parsing invalid input, and the logic is slightly
157//  different internally. Most of the code is reshared, so the duplicated
158//  code is only like 30 lines.
159
160macro_rules! parse_mantissa_sign {
161    ($byte:ident, $format:ident) => {{
162        match $byte.integer_iter().peek() {
163            Some(&b'+') if !$format.no_positive_mantissa_sign() => (false, 1),
164            Some(&b'+') if $format.no_positive_mantissa_sign() => {
165                return Err(Error::InvalidPositiveSign($byte.cursor()));
166            },
167            Some(&b'-') => (true, 1),
168            Some(_) if $format.required_mantissa_sign() => {
169                return Err(Error::MissingSign($byte.cursor()));
170            },
171            _ => (false, 0),
172        }
173    }};
174}
175
176macro_rules! parse_exponent_sign {
177    ($byte:ident, $format:ident) => {{
178        match $byte.integer_iter().peek() {
179            Some(&b'+') if !$format.no_positive_exponent_sign() => (false, 1),
180            Some(&b'+') if $format.no_positive_exponent_sign() => {
181                return Err(Error::InvalidPositiveExponentSign($byte.cursor()));
182            },
183            Some(&b'-') => (true, 1),
184            Some(_) if $format.required_mantissa_sign() => {
185                return Err(Error::MissingExponentSign($byte.cursor()));
186            },
187            _ => (false, 0),
188        }
189    }};
190}
191
192/// Utility to extract the result and handle any errors from parsing a `Number`.
193macro_rules! parse_number {
194    (
195        $format:ident,
196        $byte:ident,
197        $is_negative:ident,
198        $options:ident,
199        $parse_normal:ident,
200        $parse_special:ident
201    ) => {{
202        match $parse_normal::<$format>($byte.clone(), $is_negative, $options) {
203            Ok(n) => n,
204            Err(e) => {
205                if let Some(value) =
206                    $parse_special::<_, $format>($byte.clone(), $is_negative, $options)
207                {
208                    return Ok(value);
209                } else {
210                    return Err(e);
211                }
212            },
213        }
214    }};
215}
216
217/// Convert extended float to native.
218macro_rules! to_native {
219    ($type:ident, $fp:ident, $is_negative:ident) => {{
220        let mut float = extended_to_float::<$type>($fp);
221        if $is_negative {
222            float = -float;
223        }
224        float
225    }};
226}
227
228/// Parse a float from bytes using a complete parser.
229pub fn parse_complete<F: LemireFloat, const FORMAT: u128>(
230    bytes: &[u8],
231    options: &Options,
232) -> Result<F> {
233    let format = NumberFormat::<{ FORMAT }> {};
234    let mut byte = bytes.bytes::<{ FORMAT }>();
235    let (is_negative, shift) = parse_mantissa_sign!(byte, format);
236    // SAFETY: safe since we shift at most one for a parsed sign byte.
237    unsafe { byte.step_by_unchecked(shift) };
238    if byte.integer_iter().is_consumed() {
239        return Err(Error::Empty(byte.cursor()));
240    }
241
242    // Parse our a small representation of our number.
243    let num = parse_number!(FORMAT, byte, is_negative, options, parse_number, parse_special);
244    // Try the fast-path algorithm.
245    if let Some(value) = num.try_fast_path::<_, FORMAT>() {
246        return Ok(value);
247    }
248    // Now try the moderate path algorithm.
249    let mut fp = moderate_path::<F, FORMAT>(&num, options.lossy());
250
251    // Unable to correctly round the float using the fast or moderate algorithms.
252    // Fallback to a slower, but always correct algorithm. If we have
253    // lossy, we can't be here.
254    if fp.exp < 0 {
255        debug_assert!(!options.lossy());
256        // Undo the invalid extended float biasing.
257        fp.exp -= shared::INVALID_FP;
258        fp = slow_path::<F, FORMAT>(num, fp);
259    }
260
261    // Convert to native float and return result.
262    Ok(to_native!(F, fp, is_negative))
263}
264
265/// Parse a float using only the fast path as a complete parser.
266pub fn fast_path_complete<F: LemireFloat, const FORMAT: u128>(
267    bytes: &[u8],
268    options: &Options,
269) -> Result<F> {
270    let format = NumberFormat::<{ FORMAT }> {};
271    let mut byte = bytes.bytes::<{ FORMAT }>();
272    let (is_negative, shift) = parse_mantissa_sign!(byte, format);
273    // SAFETY: safe since we shift at most one for a parsed sign byte.
274    unsafe { byte.step_by_unchecked(shift) };
275    if byte.integer_iter().is_consumed() {
276        return Err(Error::Empty(byte.cursor()));
277    }
278
279    // Parse our a small representation of our number.
280    let num = parse_number!(FORMAT, byte, is_negative, options, parse_number, parse_special);
281    Ok(num.force_fast_path::<_, FORMAT>())
282}
283
284/// Parse a float from bytes using a partial parser.
285pub fn parse_partial<F: LemireFloat, const FORMAT: u128>(
286    bytes: &[u8],
287    options: &Options,
288) -> Result<(F, usize)> {
289    let format = NumberFormat::<{ FORMAT }> {};
290    let mut byte = bytes.bytes::<{ FORMAT }>();
291    let (is_negative, shift) = parse_mantissa_sign!(byte, format);
292    // SAFETY: safe since we shift at most one for a parsed sign byte.
293    unsafe { byte.step_by_unchecked(shift) };
294    if byte.integer_iter().is_consumed() {
295        return Err(Error::Empty(byte.cursor()));
296    }
297
298    // Parse our a small representation of our number.
299    let (num, count) = parse_number!(
300        FORMAT,
301        byte,
302        is_negative,
303        options,
304        parse_partial_number,
305        parse_partial_special
306    );
307    // Try the fast-path algorithm.
308    if let Some(value) = num.try_fast_path::<_, FORMAT>() {
309        return Ok((value, count));
310    }
311    // Now try the moderate path algorithm.
312    let mut fp = moderate_path::<F, FORMAT>(&num, options.lossy());
313
314    // Unable to correctly round the float using the fast or moderate algorithms.
315    // Fallback to a slower, but always correct algorithm. If we have
316    // lossy, we can't be here.
317    if fp.exp < 0 {
318        debug_assert!(!options.lossy());
319        // Undo the invalid extended float biasing.
320        fp.exp -= shared::INVALID_FP;
321        fp = slow_path::<F, FORMAT>(num, fp);
322    }
323
324    // Convert to native float and return result.
325    Ok((to_native!(F, fp, is_negative), count))
326}
327
328/// Parse a float using only the fast path as a partial parser.
329pub fn fast_path_partial<F: LemireFloat, const FORMAT: u128>(
330    bytes: &[u8],
331    options: &Options,
332) -> Result<(F, usize)> {
333    let format = NumberFormat::<{ FORMAT }> {};
334    let mut byte = bytes.bytes::<{ FORMAT }>();
335    let (is_negative, shift) = parse_mantissa_sign!(byte, format);
336    // SAFETY: safe since we shift at most one for a parsed sign byte.
337    unsafe { byte.step_by_unchecked(shift) };
338    if byte.integer_iter().is_consumed() {
339        return Err(Error::Empty(byte.cursor()));
340    }
341
342    // Parse our a small representation of our number.
343    let (num, count) = parse_number!(
344        FORMAT,
345        byte,
346        is_negative,
347        options,
348        parse_partial_number,
349        parse_partial_special
350    );
351    Ok((num.force_fast_path::<_, FORMAT>(), count))
352}
353
354// PATHS
355// -----
356
357/// Wrapper for different moderate-path algorithms.
358/// A return exponent of `-1` indicates an invalid value.
359#[inline]
360pub fn moderate_path<F: LemireFloat, const FORMAT: u128>(
361    num: &Number,
362    lossy: bool,
363) -> ExtendedFloat80 {
364    #[cfg(feature = "compact")]
365    {
366        #[cfg(feature = "power-of-two")]
367        {
368            let format = NumberFormat::<{ FORMAT }> {};
369            if is_power_two!(format.mantissa_radix()) {
370                // Implement the power-of-two backends.
371                binary::<F, FORMAT>(num, lossy)
372            } else {
373                bellerophon::<F, FORMAT>(num, lossy)
374            }
375        }
376
377        #[cfg(not(feature = "power-of-two"))]
378        {
379            bellerophon::<F, FORMAT>(num, lossy)
380        }
381    }
382
383    #[cfg(not(feature = "compact"))]
384    {
385        #[cfg(feature = "radix")]
386        {
387            let format = NumberFormat::<{ FORMAT }> {};
388            let radix = format.mantissa_radix();
389            if radix == 10 {
390                lemire::<F>(num, lossy)
391            } else if is_power_two!(radix) {
392                // Implement the power-of-two backends.
393                binary::<F, FORMAT>(num, lossy)
394            } else {
395                bellerophon::<F, FORMAT>(num, lossy)
396            }
397        }
398
399        #[cfg(all(feature = "power-of-two", not(feature = "radix")))]
400        {
401            let format = NumberFormat::<{ FORMAT }> {};
402            let radix = format.mantissa_radix();
403            debug_assert!(matches!(radix, 2 | 4 | 8 | 10 | 16 | 32));
404            if radix == 10 {
405                lemire::<F>(num, lossy)
406            } else {
407                // Implement the power-of-two backends.
408                binary::<F, FORMAT>(num, lossy)
409            }
410        }
411
412        #[cfg(not(feature = "power-of-two"))]
413        {
414            lemire::<F>(num, lossy)
415        }
416    }
417}
418
419/// Invoke the slow path.
420/// At this point, the float string has already been validated.
421#[inline]
422pub fn slow_path<F: LemireFloat, const FORMAT: u128>(
423    num: Number,
424    fp: ExtendedFloat80,
425) -> ExtendedFloat80 {
426    #[cfg(not(feature = "power-of-two"))]
427    {
428        slow_radix::<F, FORMAT>(num, fp)
429    }
430
431    #[cfg(feature = "power-of-two")]
432    {
433        let format = NumberFormat::<{ FORMAT }> {};
434        if is_power_two!(format.mantissa_radix()) {
435            slow_binary::<F, FORMAT>(num)
436        } else {
437            slow_radix::<F, FORMAT>(num, fp)
438        }
439    }
440}
441
442// NUMBER
443// ------
444
445/// Parse a partial, non-special floating point number.
446///
447/// This creates a representation of the float as the
448/// significant digits and the decimal exponent.
449#[inline]
450#[allow(clippy::collapsible_if)]
451pub fn parse_partial_number<'a, const FORMAT: u128>(
452    mut byte: Bytes<'a, FORMAT>,
453    is_negative: bool,
454    options: &Options,
455) -> Result<(Number<'a>, usize)> {
456    //  NOTE:
457    //      There are no satisfactory optimizations to reduce the number
458    //      of multiplications for very long input strings, but this will
459    //      be a small fraction of the performance penalty anyway.
460    //
461    //      We've tried:
462    //          - checking for explicit overflow, via `overflowing_mul`.
463    //          - counting the max number of steps.
464    //          - subslicing the string, and only processing the first `step` digits.
465    //          - pre-computing the maximum power, and only adding until then.
466    //
467    //      All of these lead to substantial performance penalty.
468    //      If we pre-parse the string, then only process it then, we
469    //      get a performance penalty of ~2.5x (20ns to 50ns) for common
470    //      floats, an unacceptable cost, while only improving performance
471    //      for rare floats 5-25% (9.3µs to 7.5µs for denormal with 6400
472    //      digits, and 7.8µs to 7.4µs for large floats with 6400 digits).
473    //
474    //      The performance cost is **almost** entirely in this function,
475    //      but additional branching **does** not improve performance,
476    //      and pre-tokenization is a recipe for failure. For halfway
477    //      cases with smaller numbers of digits, the majority of the
478    //      performance cost is in the big integer arithmetic (`pow` and
479    //      `parse_mantissa`), which suggests few optimizations can or should
480    //      be made.
481
482    // Config options
483    let format = NumberFormat::<{ FORMAT }> {};
484    let decimal_point = options.decimal_point();
485    let exponent_character = options.exponent();
486    debug_assert!(format.is_valid());
487    debug_assert!(!byte.is_done());
488    let bits_per_digit = shared::log2(format.mantissa_radix()) as i64;
489    let bits_per_base = shared::log2(format.exponent_base()) as i64;
490
491    // INTEGER
492
493    // Check to see if we have a valid base prefix.
494    let base_prefix = format.base_prefix();
495    let mut is_prefix = false;
496    let mut iter = byte.integer_iter();
497    if cfg!(feature = "format") && base_prefix != 0 && iter.peek() == Some(&b'0') {
498        // SAFETY: safe since `byte.len() >= 1`.
499        unsafe { iter.step_unchecked() };
500        // Check to see if the next character is the base prefix.
501        // We must have a format like `0x`, `0d`, `0o`. Note:
502        if let Some(&c) = iter.peek() {
503            is_prefix = if format.case_sensitive_base_prefix() {
504                c == base_prefix
505            } else {
506                c.to_ascii_lowercase() == base_prefix.to_ascii_lowercase()
507            };
508            if is_prefix {
509                // SAFETY: safe since `byte.len() >= 1`.
510                unsafe { iter.step_unchecked() };
511                if iter.is_done() {
512                    return Err(Error::Empty(iter.cursor()));
513                }
514            }
515        }
516    }
517
518    // Parse our integral digits.
519    let mut mantissa = 0_u64;
520    let start = byte.clone();
521    #[cfg(not(feature = "compact"))]
522    parse_8digits::<_, FORMAT>(byte.integer_iter(), &mut mantissa);
523    parse_digits::<_, _, FORMAT>(byte.integer_iter(), |digit| {
524        mantissa = mantissa.wrapping_mul(format.radix() as _).wrapping_add(digit as _);
525    });
526    let mut n_digits = byte.current_count() - start.current_count();
527    if cfg!(feature = "format") && format.required_integer_digits() && n_digits == 0 {
528        return Err(Error::EmptyInteger(byte.cursor()));
529    }
530
531    // Store the integer digits for slow-path algorithms.
532    // SAFETY: safe, since `n_digits <= start.as_slice().len()`.
533    debug_assert!(n_digits <= start.as_slice().len());
534    let integer_digits = unsafe { start.as_slice().get_unchecked(..n_digits) };
535
536    // Check if integer leading zeros are disabled.
537    if cfg!(feature = "format") && !is_prefix && format.no_float_leading_zeros() {
538        if integer_digits.len() > 1 && integer_digits.get(0) == Some(&b'0') {
539            return Err(Error::InvalidLeadingZeros(start.cursor()));
540        }
541    }
542
543    // FRACTION
544
545    // Handle decimal point and digits afterwards.
546    let mut n_after_dot = 0;
547    let mut exponent = 0_i64;
548    let mut implicit_exponent: i64;
549    let int_end = n_digits as i64;
550    let mut fraction_digits = None;
551    if byte.first_is(decimal_point) {
552        // SAFETY: s cannot be empty due to first_is
553        unsafe { byte.step_unchecked() };
554        let before = byte.clone();
555        #[cfg(not(feature = "compact"))]
556        parse_8digits::<_, FORMAT>(byte.fraction_iter(), &mut mantissa);
557        parse_digits::<_, _, FORMAT>(byte.fraction_iter(), |digit| {
558            mantissa = mantissa.wrapping_mul(format.radix() as _).wrapping_add(digit as _);
559        });
560        n_after_dot = byte.current_count() - before.current_count();
561
562        // Store the fraction digits for slow-path algorithms.
563        // SAFETY: safe, since `n_after_dot <= before.as_slice().len()`.
564        debug_assert!(n_after_dot <= before.as_slice().len());
565        fraction_digits = Some(unsafe { before.as_slice().get_unchecked(..n_after_dot) });
566
567        // Calculate the implicit exponent: the number of digits after the dot.
568        implicit_exponent = -(n_after_dot as i64);
569        if format.mantissa_radix() == format.exponent_base() {
570            exponent = implicit_exponent;
571        } else {
572            debug_assert!(bits_per_digit % bits_per_base == 0);
573            exponent = implicit_exponent * bits_per_digit / bits_per_base;
574        };
575        if cfg!(feature = "format") && format.required_fraction_digits() && n_after_dot == 0 {
576            return Err(Error::EmptyFraction(byte.cursor()));
577        }
578    }
579
580    n_digits += n_after_dot;
581    if format.required_mantissa_digits() && n_digits == 0 {
582        return Err(Error::EmptyMantissa(byte.cursor()));
583    }
584
585    // EXPONENT
586
587    // Handle scientific notation.
588    let mut explicit_exponent = 0_i64;
589    let is_exponent = if cfg!(feature = "format") && format.case_sensitive_exponent() {
590        byte.first_is(exponent_character)
591    } else {
592        byte.case_insensitive_first_is(exponent_character)
593    };
594    if is_exponent {
595        // Check float format syntax checks.
596        if cfg!(feature = "format") {
597            if format.no_exponent_notation() {
598                return Err(Error::InvalidExponent(byte.cursor()));
599            }
600            // Check if we have no fraction but we required exponent notation.
601            if format.no_exponent_without_fraction() && fraction_digits.is_none() {
602                return Err(Error::ExponentWithoutFraction(byte.cursor()));
603            }
604        }
605
606        // SAFETY: byte cannot be empty due to first_is
607        unsafe { byte.step_unchecked() };
608        let (is_negative, shift) = parse_exponent_sign!(byte, format);
609        // SAFETY: safe since we shift at most one for a parsed sign byte.
610        unsafe { byte.step_by_unchecked(shift) };
611        if cfg!(feature = "format") && format.required_exponent_sign() && shift == 0 {
612            return Err(Error::MissingExponentSign(byte.cursor()));
613        }
614
615        let before = byte.current_count();
616        parse_digits::<_, _, FORMAT>(byte.exponent_iter(), |digit| {
617            if explicit_exponent < 0x10000000 {
618                explicit_exponent *= format.radix() as i64;
619                explicit_exponent += digit as i64;
620            }
621        });
622        if format.required_exponent_digits() && byte.current_count() - before == 0 {
623            return Err(Error::EmptyExponent(byte.cursor()));
624        }
625        // Handle our sign, and get the explicit part of the exponent.
626        explicit_exponent = if is_negative {
627            -explicit_exponent
628        } else {
629            explicit_exponent
630        };
631        exponent += explicit_exponent;
632    } else if cfg!(feature = "format") && format.required_exponent_notation() {
633        return Err(Error::MissingExponent(byte.cursor()));
634    }
635
636    // Check to see if we have a valid base suffix.
637    // We've already trimmed any leading digit separators here, so we can be safe
638    // that the first character **is not** a digit separator.
639    let base_suffix = format.base_suffix();
640    if cfg!(feature = "format") && base_suffix != 0 {
641        let is_suffix = if cfg!(feature = "format") && format.case_sensitive_base_suffix() {
642            byte.first_is(base_suffix)
643        } else {
644            byte.case_insensitive_first_is(base_suffix)
645        };
646        if is_suffix {
647            // SAFETY: safe since `byte.len() >= 1`.
648            unsafe { byte.step_unchecked() };
649        }
650    }
651
652    // CHECK OVERFLOW
653
654    // Get the number of parsed digits (total), and redo if we had overflow.
655    let end = byte.cursor();
656    let mut step = u64_step(format.radix());
657    let mut many_digits = false;
658    if cfg!(feature = "format") && !format.required_mantissa_digits() && n_digits == 0 {
659        exponent = 0;
660    }
661    if n_digits <= step {
662        return Ok((
663            Number {
664                exponent,
665                mantissa,
666                is_negative,
667                many_digits: false,
668                integer: integer_digits,
669                fraction: fraction_digits,
670            },
671            end,
672        ));
673    }
674
675    // Check for leading zeros, and to see if we had a false overflow.
676    n_digits -= step;
677    let mut zeros = start.clone();
678    let mut zeros_integer = zeros.integer_iter();
679    while zeros_integer.peek_is(b'0') {
680        n_digits = n_digits.saturating_sub(1);
681        // SAFETY: safe since zeros cannot be empty due to peek_is
682        unsafe { zeros_integer.step_unchecked() };
683    }
684    if zeros.first_is(decimal_point) {
685        // SAFETY: safe since zeros cannot be empty due to first_is
686        unsafe { zeros.step_unchecked() };
687    }
688    let mut zeros_fraction = zeros.fraction_iter();
689    while zeros_fraction.peek_is(b'0') {
690        n_digits = n_digits.saturating_sub(1);
691        // SAFETY: safe since zeros cannot be empty due to peek_is
692        unsafe { zeros_fraction.step_unchecked() };
693    }
694
695    // OVERFLOW
696
697    // Now, check if we explicitly overflowed.
698    if n_digits > 0 {
699        // Have more than 19 significant digits, so we overflowed.
700        many_digits = true;
701        mantissa = 0;
702        let mut integer = integer_digits.bytes::<{ FORMAT }>();
703        // Skip leading zeros, so we can use the step properly.
704        let mut integer_iter = integer.integer_iter();
705        integer_iter.skip_zeros();
706        parse_u64_digits::<_, FORMAT>(integer_iter, &mut mantissa, &mut step);
707        implicit_exponent = if step == 0 {
708            // Filled our mantissa with just the integer.
709            int_end - integer.current_count() as i64
710        } else {
711            // We know this can't be a None since we had more than 19
712            // digits previously, so we overflowed a 64-bit integer,
713            // but parsing only the integral digits produced less
714            // than 19 digits. That means we must have a decimal
715            // point, and at least 1 fractional digit.
716            let mut fraction = fraction_digits.unwrap().bytes::<{ FORMAT }>();
717            let mut fraction_iter = fraction.fraction_iter();
718            // Skip leading zeros, so we can use the step properly.
719            if mantissa == 0 {
720                fraction_iter.skip_zeros();
721            }
722            parse_u64_digits::<_, FORMAT>(fraction_iter, &mut mantissa, &mut step);
723            -(fraction.current_count() as i64)
724        };
725        if format.mantissa_radix() == format.exponent_base() {
726            exponent = implicit_exponent;
727        } else {
728            debug_assert!(bits_per_digit % bits_per_base == 0);
729            exponent = implicit_exponent * bits_per_digit / bits_per_base;
730        };
731        // Add back the explicit exponent.
732        exponent += explicit_exponent;
733    }
734
735    Ok((
736        Number {
737            exponent,
738            mantissa,
739            is_negative,
740            many_digits,
741            integer: integer_digits,
742            fraction: fraction_digits,
743        },
744        end,
745    ))
746}
747
748/// Try to parse a non-special floating point number.
749#[inline]
750pub fn parse_number<'a, const FORMAT: u128>(
751    byte: Bytes<'a, FORMAT>,
752    is_negative: bool,
753    options: &Options,
754) -> Result<Number<'a>> {
755    let length = byte.length();
756    let (float, count) = parse_partial_number::<FORMAT>(byte, is_negative, options)?;
757    if count == length {
758        Ok(float)
759    } else {
760        Err(Error::InvalidDigit(count))
761    }
762}
763
764// DIGITS
765// ------
766
767/// Iteratively parse and consume digits from bytes.
768#[inline]
769pub fn parse_digits<'a, Iter, Cb, const FORMAT: u128>(mut iter: Iter, mut cb: Cb)
770where
771    Iter: BytesIter<'a>,
772    Cb: FnMut(u32),
773{
774    let format = NumberFormat::<{ FORMAT }> {};
775    let radix = format.radix();
776    while let Some(&c) = iter.peek() {
777        match char_to_digit_const(c, radix) {
778            Some(v) => cb(v),
779            None => break,
780        }
781        // SAFETY: iter cannot be empty due to `iter.peek()`.
782        unsafe { iter.step_unchecked() };
783    }
784}
785
786/// Iteratively parse and consume digits in intervals of 8.
787#[inline]
788#[cfg(not(feature = "compact"))]
789pub fn parse_8digits<'a, Iter, const FORMAT: u128>(mut iter: Iter, mantissa: &mut u64)
790where
791    Iter: BytesIter<'a>,
792{
793    let format = NumberFormat::<{ FORMAT }> {};
794    let radix: u64 = format.radix() as u64;
795    if can_try_parse_8digits!(iter, radix) {
796        let radix2 = radix.wrapping_mul(radix);
797        let radix4 = radix2.wrapping_mul(radix2);
798        let radix8 = radix4.wrapping_mul(radix4);
799        // Can do up to 2 iterations without overflowing, however, for large
800        // inputs, this is much faster than any other alternative.
801        while let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(&mut iter) {
802            *mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v);
803        }
804    }
805}
806
807/// Iteratively parse and consume digits without overflowing.
808///
809/// # Preconditions
810///
811/// There must be at least `step` digits left in iterator.
812#[inline]
813pub fn parse_u64_digits<'a, Iter, const FORMAT: u128>(
814    mut iter: Iter,
815    mantissa: &mut u64,
816    step: &mut usize,
817) where
818    Iter: BytesIter<'a>,
819{
820    let format = NumberFormat::<{ FORMAT }> {};
821    let radix = format.radix() as u64;
822
823    // Try to parse 8 digits at a time, if we can.
824    #[cfg(not(feature = "compact"))]
825    if can_try_parse_8digits!(iter, radix) {
826        let radix2 = radix.wrapping_mul(radix);
827        let radix4 = radix2.wrapping_mul(radix2);
828        let radix8 = radix4.wrapping_mul(radix4);
829        while *step > 8 {
830            if let Some(v) = algorithm::try_parse_8digits::<u64, _, FORMAT>(&mut iter) {
831                *mantissa = mantissa.wrapping_mul(radix8).wrapping_add(v);
832                *step -= 8;
833            } else {
834                break;
835            }
836        }
837    }
838
839    // Parse single digits at a time.
840    while let Some(&c) = iter.peek() {
841        if *step > 0 {
842            let digit = char_to_valid_digit_const(c, radix as u32);
843            *mantissa = *mantissa * radix + digit as u64;
844            *step -= 1;
845            // SAFETY: safe, since `iter` cannot be empty due to `iter.peek()`.
846            unsafe { iter.step_unchecked() };
847        } else {
848            break;
849        }
850    }
851}
852
853// SPECIAL
854// -------
855
856/// Determine if the input data matches the special string.
857/// If there's no match, returns 0. Otherwise, returns the byte's cursor.
858#[inline]
859pub fn is_special_eq<const FORMAT: u128>(mut byte: Bytes<FORMAT>, string: &'static [u8]) -> usize {
860    let format = NumberFormat::<{ FORMAT }> {};
861    if cfg!(feature = "format") && format.case_sensitive_special() {
862        if shared::starts_with(byte.special_iter(), string.iter()) {
863            // Trim the iterator afterwards.
864            byte.special_iter().peek();
865            return byte.cursor();
866        }
867    } else if shared::case_insensitive_starts_with(byte.special_iter(), string.iter()) {
868        // Trim the iterator afterwards.
869        byte.special_iter().peek();
870        return byte.cursor();
871    }
872    0
873}
874
875/// Parse a positive representation of a special, non-finite float.
876#[inline]
877pub fn parse_positive_special<F, const FORMAT: u128>(
878    byte: Bytes<FORMAT>,
879    options: &Options,
880) -> Option<(F, usize)>
881where
882    F: LemireFloat,
883{
884    let format = NumberFormat::<{ FORMAT }> {};
885    if cfg!(feature = "format") && format.no_special() {
886        return None;
887    }
888
889    let cursor = byte.cursor();
890    let length = byte.length() - cursor;
891    if let Some(nan_string) = options.nan_string() {
892        if length >= nan_string.len() {
893            let count = is_special_eq::<FORMAT>(byte.clone(), nan_string);
894            if count != 0 {
895                return Some((F::NAN, count));
896            }
897        }
898    }
899    if let Some(infinity_string) = options.infinity_string() {
900        if length >= infinity_string.len() {
901            let count = is_special_eq::<FORMAT>(byte.clone(), infinity_string);
902            if count != 0 {
903                return Some((F::INFINITY, count));
904            }
905        }
906    }
907    if let Some(inf_string) = options.inf_string() {
908        if length >= inf_string.len() {
909            let count = is_special_eq::<FORMAT>(byte.clone(), inf_string);
910            if count != 0 {
911                return Some((F::INFINITY, count));
912            }
913        }
914    }
915
916    None
917}
918
919/// Parse a partial representation of a special, non-finite float.
920#[inline]
921pub fn parse_partial_special<F, const FORMAT: u128>(
922    byte: Bytes<FORMAT>,
923    is_negative: bool,
924    options: &Options,
925) -> Option<(F, usize)>
926where
927    F: LemireFloat,
928{
929    let (mut float, count) = parse_positive_special::<F, FORMAT>(byte, options)?;
930    if is_negative {
931        float = -float;
932    }
933    Some((float, count))
934}
935
936/// Try to parse a special, non-finite float.
937#[inline]
938pub fn parse_special<F, const FORMAT: u128>(
939    byte: Bytes<FORMAT>,
940    is_negative: bool,
941    options: &Options,
942) -> Option<F>
943where
944    F: LemireFloat,
945{
946    let length = byte.length();
947    if let Some((float, count)) = parse_partial_special::<F, FORMAT>(byte, is_negative, options) {
948        if count == length {
949            return Some(float);
950        }
951    }
952    None
953}
lexical_parse_float/parse.rs

lexical_parse_float/
parse.rs