lexical_parse_float/shared.rs
1//! Shared utilities and algorithms.
2
3#![doc(hidden)]
4
5use crate::float::{ExtendedFloat80, RawFloat};
6use crate::mask::{lower_n_halfway, lower_n_mask};
7#[cfg(feature = "power-of-two")]
8use lexical_util::format::NumberFormat;
9use lexical_util::num::AsPrimitive;
10
11// 8 DIGIT
12// -------
13
14/// Check if we can try to parse 8 digits.
15#[cfg(not(feature = "compact"))]
16macro_rules! can_try_parse_8digits {
17 ($iter:expr, $radix:expr) => {
18 $iter.is_contiguous() && (cfg!(not(feature = "power-of-two")) || $radix <= 10)
19 };
20}
21
22// POWER2
23// ------
24
25/// Calculate the shift to move the significant digits into place.
26#[inline(always)]
27pub fn calculate_shift<F: RawFloat>(power2: i32) -> i32 {
28 let mantissa_shift = 64 - F::MANTISSA_SIZE - 1;
29 if -power2 >= mantissa_shift {
30 -power2 + 1
31 } else {
32 mantissa_shift
33 }
34}
35
36/// Calculate the biased, binary exponent from the mantissa shift and exponent.
37#[inline(always)]
38#[cfg(feature = "power-of-two")]
39pub fn calculate_power2<F: RawFloat, const FORMAT: u128>(exponent: i64, ctlz: u32) -> i32 {
40 let format = NumberFormat::<{ FORMAT }> {};
41 exponent as i32 * log2(format.exponent_base()) + F::EXPONENT_BIAS - ctlz as i32
42}
43
44/// Bias for marking an invalid extended float.
45pub const INVALID_FP: i32 = i16::MIN as i32;
46
47// LOG2
48// ----
49
50/// Quick log2 that evaluates at compile time for the radix.
51/// Note that this may produce inaccurate results for other radixes:
52/// we don't care since it's only called for powers-of-two.
53#[inline]
54pub const fn log2(radix: u32) -> i32 {
55 match radix {
56 2 => 1,
57 4 => 2,
58 8 => 3,
59 16 => 4,
60 32 => 5,
61 // Fallthrough to 1 for non-power-of-two radixes.
62 _ => 1,
63 }
64}
65
66// STARTS WITH
67// -----------
68
69/// Check if left iter starts with right iter.
70///
71/// This optimizes decently well, to the following ASM for pure slices:
72///
73/// ```text
74/// starts_with_slc:
75/// xor eax, eax
76/// .LBB0_1:
77/// cmp rcx, rax
78/// je .LBB0_2
79/// cmp rsi, rax
80/// je .LBB0_5
81/// movzx r8d, byte ptr [rdi + rax]
82/// lea r9, [rax + 1]
83/// cmp r8b, byte ptr [rdx + rax]
84/// mov rax, r9
85/// je .LBB0_1
86/// .LBB0_5:
87/// xor eax, eax
88/// ret
89/// .LBB0_2:
90/// mov al, 1
91/// ret
92/// ```
93#[cfg_attr(not(feature = "compact"), inline)]
94pub fn starts_with<'a, 'b, Iter1, Iter2>(mut x: Iter1, mut y: Iter2) -> bool
95where
96 Iter1: Iterator<Item = &'a u8>,
97 Iter2: Iterator<Item = &'b u8>,
98{
99 loop {
100 // Only call `next()` on x if y is not None, otherwise,
101 // we may incorrectly consume an x character.
102 let yi = y.next();
103 if yi.is_none() {
104 return true;
105 } else if x.next() != yi {
106 return false;
107 }
108 }
109}
110
111/// Check if left iter starts with right iter without case-sensitivity.
112///
113/// This optimizes decently well, to the following ASM for pure slices:
114///
115/// ```text
116/// case_insensitive_starts_with_slc:
117/// xor eax, eax
118/// .LBB1_1:
119/// cmp rcx, rax
120/// je .LBB1_2
121/// cmp rsi, rax
122/// je .LBB1_5
123/// movzx r8d, byte ptr [rdi + rax]
124/// xor r8b, byte ptr [rdx + rax]
125/// add rax, 1
126/// test r8b, -33
127/// je .LBB1_1
128/// .LBB1_5:
129/// xor eax, eax
130/// ret
131/// .LBB1_2:
132/// mov al, 1
133/// ret
134/// ```
135#[cfg_attr(not(feature = "compact"), inline)]
136pub fn case_insensitive_starts_with<'a, 'b, Iter1, Iter2>(mut x: Iter1, mut y: Iter2) -> bool
137where
138 Iter1: Iterator<Item = &'a u8>,
139 Iter2: Iterator<Item = &'b u8>,
140{
141 // We use a faster optimization here for ASCII letters, which NaN
142 // and infinite strings **must** be. [A-Z] is 0x41-0x5A, while
143 // [a-z] is 0x61-0x7A. Therefore, the xor must be 0 or 32 if they
144 // are case-insensitive equal, but only if at least 1 of the inputs
145 // is an ASCII letter.
146 loop {
147 let yi = y.next();
148 if yi.is_none() {
149 return true;
150 }
151 let yi = *yi.unwrap();
152 let is_not_equal = x.next().map_or(true, |&xi| {
153 let xor = xi ^ yi;
154 xor != 0 && xor != 0x20
155 });
156 if is_not_equal {
157 return false;
158 }
159 }
160}
161
162// ROUNDING
163// --------
164
165/// Round an extended-precision float to the nearest machine float.
166///
167/// Shifts the significant digits into place, adjusts the exponent,
168/// so it can be easily converted to a native float.
169#[cfg_attr(not(feature = "compact"), inline)]
170pub fn round<F, Cb>(fp: &mut ExtendedFloat80, cb: Cb)
171where
172 F: RawFloat,
173 Cb: Fn(&mut ExtendedFloat80, i32),
174{
175 let fp_inf = ExtendedFloat80 {
176 mant: 0,
177 exp: F::INFINITE_POWER,
178 };
179
180 // Calculate our shift in significant digits.
181 let mantissa_shift = 64 - F::MANTISSA_SIZE - 1;
182
183 // Check for a denormal float, if after the shift the exponent is negative.
184 if -fp.exp >= mantissa_shift {
185 // Have a denormal float that isn't a literal 0.
186 // The extra 1 is to adjust for the denormal float, which is
187 // `1 - F::EXPONENT_BIAS`. This works as before, because our
188 // old logic rounded to `F::DENORMAL_EXPONENT` (now 1), and then
189 // checked if `exp == F::DENORMAL_EXPONENT` and no hidden mask
190 // bit was set. Here, we handle that here, rather than later.
191 //
192 // This might round-down to 0, but shift will be at **max** 65,
193 // for halfway cases rounding towards 0.
194 let shift = -fp.exp + 1;
195 debug_assert!(shift <= 65);
196 cb(fp, shift.min(64));
197 // Check for round-up: if rounding-nearest carried us to the hidden bit.
198 fp.exp = (fp.mant >= F::HIDDEN_BIT_MASK.as_u64()) as i32;
199 return;
200 }
201
202 // The float is normal, round to the hidden bit.
203 cb(fp, mantissa_shift);
204
205 // Check if we carried, and if so, shift the bit to the hidden bit.
206 let carry_mask = F::CARRY_MASK.as_u64();
207 if fp.mant & carry_mask == carry_mask {
208 fp.mant >>= 1;
209 fp.exp += 1;
210 }
211
212 // Handle if we carried and check for overflow again.
213 if fp.exp >= F::INFINITE_POWER {
214 // Exponent is above largest normal value, must be infinite.
215 *fp = fp_inf;
216 return;
217 }
218
219 // Remove the hidden bit.
220 fp.mant &= F::MANTISSA_MASK.as_u64();
221}
222
223/// Shift right N-bytes and round towards a direction.
224///
225/// Callback should take the following parameters:
226/// 1. is_odd
227/// 1. is_halfway
228/// 1. is_above
229#[cfg_attr(not(feature = "compact"), inline)]
230pub fn round_nearest_tie_even<Cb>(fp: &mut ExtendedFloat80, shift: i32, cb: Cb)
231where
232 // is_odd, is_halfway, is_above
233 Cb: Fn(bool, bool, bool) -> bool,
234{
235 // Ensure we've already handled denormal values that underflow.
236 debug_assert!(shift <= 64);
237
238 // Extract the truncated bits using mask.
239 // Calculate if the value of the truncated bits are either above
240 // the mid-way point, or equal to it.
241 //
242 // For example, for 4 truncated bytes, the mask would be 0b1111
243 // and the midway point would be 0b1000.
244 let mask = lower_n_mask(shift as u64);
245 let halfway = lower_n_halfway(shift as u64);
246 let truncated_bits = fp.mant & mask;
247 let is_above = truncated_bits > halfway;
248 let is_halfway = truncated_bits == halfway;
249
250 // Bit shift so the leading bit is in the hidden bit.
251 // This optimixes pretty well:
252 // ```text
253 // mov ecx, esi
254 // shr rdi, cl
255 // xor eax, eax
256 // cmp esi, 64
257 // cmovne rax, rdi
258 // ret
259 // ```
260 fp.mant = match shift == 64 {
261 true => 0,
262 false => fp.mant >> shift,
263 };
264 fp.exp += shift;
265
266 // Extract the last bit after shifting (and determine if it is odd).
267 let is_odd = fp.mant & 1 == 1;
268
269 // Calculate if we need to roundup.
270 // We need to roundup if we are above halfway, or if we are odd
271 // and at half-way (need to tie-to-even). Avoid the branch here.
272 fp.mant += cb(is_odd, is_halfway, is_above) as u64;
273}
274
275/// Round our significant digits into place, truncating them.
276#[cfg_attr(not(feature = "compact"), inline)]
277pub fn round_down(fp: &mut ExtendedFloat80, shift: i32) {
278 // Might have a shift greater than 64 if we have an error.
279 fp.mant = match shift == 64 {
280 true => 0,
281 false => fp.mant >> shift,
282 };
283 fp.exp += shift;
284}