prost_wkt_types/pbtime/
datetime.rs

1//! A date/time type which exists primarily to convert [`Timestamp`]s into an RFC 3339 formatted
2//! string.
3
4////////////////////////////////////////////////////////////////////////////////
5/// FROM prost-types/src/datetime.rs
6////////////////////////////////////////////////////////////////////////////////
7
8use core::fmt;
9
10use crate::Duration;
11use crate::Timestamp;
12
13use super::TimestampError;
14
15/// A point in time, represented as a date and time in the UTC timezone.
16#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
17pub(crate) struct DateTime {
18    /// The year.
19    pub(crate) year: i64,
20    /// The month of the year, from 1 to 12, inclusive.
21    pub(crate) month: u8,
22    /// The day of the month, from 1 to 31, inclusive.
23    pub(crate) day: u8,
24    /// The hour of the day, from 0 to 23, inclusive.
25    pub(crate) hour: u8,
26    /// The minute of the hour, from 0 to 59, inclusive.
27    pub(crate) minute: u8,
28    /// The second of the minute, from 0 to 59, inclusive.
29    pub(crate) second: u8,
30    /// The nanoseconds, from 0 to 999_999_999, inclusive.
31    pub(crate) nanos: u32,
32}
33
34impl DateTime {
35    /// The minimum representable [`Timestamp`] as a `DateTime`.
36    pub(crate) const MIN: DateTime = DateTime {
37        year: -292_277_022_657,
38        month: 1,
39        day: 27,
40        hour: 8,
41        minute: 29,
42        second: 52,
43        nanos: 0,
44    };
45
46    /// The maximum representable [`Timestamp`] as a `DateTime`.
47    pub(crate) const MAX: DateTime = DateTime {
48        year: 292_277_026_596,
49        month: 12,
50        day: 4,
51        hour: 15,
52        minute: 30,
53        second: 7,
54        nanos: 999_999_999,
55    };
56
57    /// Returns `true` if the `DateTime` is a valid calendar date.
58    pub(crate) fn is_valid(&self) -> bool {
59        self >= &DateTime::MIN
60            && self <= &DateTime::MAX
61            && self.month > 0
62            && self.month <= 12
63            && self.day > 0
64            && self.day <= days_in_month(self.year, self.month)
65            && self.hour < 24
66            && self.minute < 60
67            && self.second < 60
68            && self.nanos < 1_000_000_000
69    }
70}
71
72impl fmt::Display for DateTime {
73    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
74        // Pad years to at least 4 digits.
75        if self.year > 9999 {
76            write!(f, "+{}", self.year)?;
77        } else if self.year < 0 {
78            write!(f, "{:05}", self.year)?;
79        } else {
80            write!(f, "{:04}", self.year)?;
81        };
82
83        write!(
84            f,
85            "-{:02}-{:02}T{:02}:{:02}:{:02}",
86            self.month, self.day, self.hour, self.minute, self.second,
87        )?;
88
89        // Format subseconds to either nothing, millis, micros, or nanos.
90        let nanos = self.nanos;
91        if nanos == 0 {
92            write!(f, "Z")
93        } else if nanos % 1_000_000 == 0 {
94            write!(f, ".{:03}Z", nanos / 1_000_000)
95        } else if nanos % 1_000 == 0 {
96            write!(f, ".{:06}Z", nanos / 1_000)
97        } else {
98            write!(f, ".{:09}Z", nanos)
99        }
100    }
101}
102
103impl From<Timestamp> for DateTime {
104    /// musl's [`__secs_to_tm`][1] converted to Rust via [c2rust][2] and then cleaned up by hand.
105    ///
106    /// All existing `strftime`-like APIs in Rust are unable to handle the full range of timestamps
107    /// representable by `Timestamp`, including `strftime` itself, since tm.tm_year is an int.
108    ///
109    /// [1]: http://git.musl-libc.org/cgit/musl/tree/src/time/__secs_to_tm.c
110    /// [2]: https://c2rust.com/
111    fn from(mut timestamp: Timestamp) -> DateTime {
112        timestamp.normalize();
113
114        let t = timestamp.seconds;
115        let nanos = timestamp.nanos;
116
117        // 2000-03-01 (mod 400 year, immediately after feb29
118        const LEAPOCH: i64 = 946_684_800 + 86400 * (31 + 29);
119        const DAYS_PER_400Y: i32 = 365 * 400 + 97;
120        const DAYS_PER_100Y: i32 = 365 * 100 + 24;
121        const DAYS_PER_4Y: i32 = 365 * 4 + 1;
122        const DAYS_IN_MONTH: [u8; 12] = [31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29];
123
124        // Note(dcb): this bit is rearranged slightly to avoid integer overflow.
125        let mut days: i64 = (t / 86_400) - (LEAPOCH / 86_400);
126        let mut remsecs: i32 = (t % 86_400) as i32;
127        if remsecs < 0i32 {
128            remsecs += 86_400;
129            days -= 1
130        }
131
132        let mut qc_cycles: i32 = (days / i64::from(DAYS_PER_400Y)) as i32;
133        let mut remdays: i32 = (days % i64::from(DAYS_PER_400Y)) as i32;
134        if remdays < 0 {
135            remdays += DAYS_PER_400Y;
136            qc_cycles -= 1;
137        }
138
139        let mut c_cycles: i32 = remdays / DAYS_PER_100Y;
140        if c_cycles == 4 {
141            c_cycles -= 1;
142        }
143        remdays -= c_cycles * DAYS_PER_100Y;
144
145        let mut q_cycles: i32 = remdays / DAYS_PER_4Y;
146        if q_cycles == 25 {
147            q_cycles -= 1;
148        }
149        remdays -= q_cycles * DAYS_PER_4Y;
150
151        let mut remyears: i32 = remdays / 365;
152        if remyears == 4 {
153            remyears -= 1;
154        }
155        remdays -= remyears * 365;
156
157        let mut years: i64 = i64::from(remyears)
158            + 4 * i64::from(q_cycles)
159            + 100 * i64::from(c_cycles)
160            + 400 * i64::from(qc_cycles);
161
162        let mut months: i32 = 0;
163        while i32::from(DAYS_IN_MONTH[months as usize]) <= remdays {
164            remdays -= i32::from(DAYS_IN_MONTH[months as usize]);
165            months += 1
166        }
167
168        if months >= 10 {
169            months -= 12;
170            years += 1;
171        }
172
173        let date_time = DateTime {
174            year: years + 2000,
175            month: (months + 3) as u8,
176            day: (remdays + 1) as u8,
177            hour: (remsecs / 3600) as u8,
178            minute: (remsecs / 60 % 60) as u8,
179            second: (remsecs % 60) as u8,
180            nanos: nanos as u32,
181        };
182        debug_assert!(date_time.is_valid());
183        date_time
184    }
185}
186
187/// Returns the number of days in the month.
188fn days_in_month(year: i64, month: u8) -> u8 {
189    const DAYS_IN_MONTH: [u8; 12] = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
190    let (_, is_leap) = year_to_seconds(year);
191    DAYS_IN_MONTH[usize::from(month - 1)] + u8::from(is_leap && month == 2)
192}
193
194macro_rules! ensure {
195    ($expr:expr) => {{
196        if !$expr {
197            return None;
198        }
199    }};
200}
201
202/// Parses a date in RFC 3339 format from ASCII string `b`, returning the year, month, day, and
203/// remaining input.
204///
205/// The date is not validated according to a calendar.
206fn parse_date(s: &str) -> Option<(i64, u8, u8, &str)> {
207    debug_assert!(s.is_ascii());
208
209    // Smallest valid date is YYYY-MM-DD.
210    ensure!(s.len() >= 10);
211
212    // Parse the year in one of three formats:
213    //  * +YYYY[Y]+
214    //  * -[Y]+
215    //  * YYYY
216    let (year, s) = match s.as_bytes()[0] {
217        b'+' => {
218            let (digits, s) = parse_digits(&s[1..]);
219            ensure!(digits.len() >= 5);
220            let date: i64 = digits.parse().ok()?;
221            (date, s)
222        }
223        b'-' => {
224            let (digits, s) = parse_digits(&s[1..]);
225            ensure!(digits.len() >= 4);
226            let date: i64 = digits.parse().ok()?;
227            (-date, s)
228        }
229        _ => {
230            // Parse a 4 digit numeric.
231            let (n1, s) = parse_two_digit_numeric(s)?;
232            let (n2, s) = parse_two_digit_numeric(s)?;
233            (i64::from(n1) * 100 + i64::from(n2), s)
234        }
235    };
236
237    let s = parse_char(s, b'-')?;
238    let (month, s) = parse_two_digit_numeric(s)?;
239    let s = parse_char(s, b'-')?;
240    let (day, s) = parse_two_digit_numeric(s)?;
241    Some((year, month, day, s))
242}
243
244/// Parses a time in RFC 3339 format from ASCII string `s`, returning the hour, minute, second, and
245/// nanos.
246///
247/// The date is not validated according to a calendar.
248fn parse_time(s: &str) -> Option<(u8, u8, u8, u32, &str)> {
249    debug_assert!(s.is_ascii());
250
251    let (hour, s) = parse_two_digit_numeric(s)?;
252    let s = parse_char(s, b':')?;
253    let (minute, s) = parse_two_digit_numeric(s)?;
254    let s = parse_char(s, b':')?;
255    let (second, s) = parse_two_digit_numeric(s)?;
256
257    let (nanos, s) = parse_nanos(s)?;
258
259    Some((hour, minute, second, nanos, s))
260}
261
262/// Parses an optional nanosecond time from ASCII string `s`, returning the nanos and remaining
263/// string.
264fn parse_nanos(s: &str) -> Option<(u32, &str)> {
265    debug_assert!(s.is_ascii());
266
267    // Parse the nanoseconds, if present.
268    let (nanos, s) = if let Some(s) = parse_char(s, b'.') {
269        let (mut digits, s) = parse_digits(s);
270        if digits.len() > 9 {
271            digits = digits.split_at(9).0;
272        }
273        let nanos = 10u32.pow(9 - digits.len() as u32) * digits.parse::<u32>().ok()?;
274        (nanos, s)
275    } else {
276        (0, s)
277    };
278
279    Some((nanos, s))
280}
281
282/// Parses a timezone offset in RFC 3339 format from ASCII string `s`, returning the offset hour,
283/// offset minute, and remaining input.
284fn parse_offset(s: &str) -> Option<(i8, i8, &str)> {
285    debug_assert!(s.is_ascii());
286
287    if s.is_empty() {
288        // If no timezone specified, assume UTC.
289        return Some((0, 0, s));
290    }
291
292    // Snowflake's timestamp format contains a space separator before the offset.
293    let s = parse_char(s, b' ').unwrap_or(s);
294
295    if let Some(s) = parse_char_ignore_case(s, b'Z') {
296        Some((0, 0, s))
297    } else {
298        let (is_positive, s) = if let Some(s) = parse_char(s, b'+') {
299            (true, s)
300        } else if let Some(s) = parse_char(s, b'-') {
301            (false, s)
302        } else {
303            return None;
304        };
305
306        let (hour, s) = parse_two_digit_numeric(s)?;
307
308        let (minute, s) = if s.is_empty() {
309            // No offset minutes are specified, e.g. +00 or +07.
310            (0, s)
311        } else {
312            // Optional colon separator between the hour and minute digits.
313            let s = parse_char(s, b':').unwrap_or(s);
314            let (minute, s) = parse_two_digit_numeric(s)?;
315            (minute, s)
316        };
317
318        // '-00:00' indicates an unknown local offset.
319        ensure!(is_positive || hour > 0 || minute > 0);
320
321        ensure!(hour < 24 && minute < 60);
322
323        let hour = hour as i8;
324        let minute = minute as i8;
325
326        if is_positive {
327            Some((hour, minute, s))
328        } else {
329            Some((-hour, -minute, s))
330        }
331    }
332}
333
334/// Parses a two-digit base-10 number from ASCII string `s`, returning the number and the remaining
335/// string.
336fn parse_two_digit_numeric(s: &str) -> Option<(u8, &str)> {
337    debug_assert!(s.is_ascii());
338    if s.len() < 2 {
339        return None;
340    }
341    if s.starts_with('+') {
342        return None;
343    }
344    let (digits, s) = s.split_at(2);
345    Some((digits.parse().ok()?, s))
346}
347
348/// Splits ASCII string `s` at the first occurrence of a non-digit character.
349fn parse_digits(s: &str) -> (&str, &str) {
350    debug_assert!(s.is_ascii());
351
352    let idx = s
353        .as_bytes()
354        .iter()
355        .position(|c| !c.is_ascii_digit())
356        .unwrap_or(s.len());
357    s.split_at(idx)
358}
359
360/// Attempts to parse ASCII character `c` from ASCII string `s`, returning the remaining string. If
361/// the character can not be parsed, returns `None`.
362fn parse_char(s: &str, c: u8) -> Option<&str> {
363    debug_assert!(s.is_ascii());
364
365    ensure!(*s.as_bytes().first()? == c);
366    Some(&s[1..])
367}
368
369/// Attempts to parse ASCII character `c` from ASCII string `s`, ignoring ASCII case, returning the
370/// remaining string. If the character can not be parsed, returns `None`.
371fn parse_char_ignore_case(s: &str, c: u8) -> Option<&str> {
372    debug_assert!(s.is_ascii());
373
374    ensure!(s.as_bytes().first()?.eq_ignore_ascii_case(&c));
375    Some(&s[1..])
376}
377
378/// Returns the offset in seconds from the Unix epoch of the date time.
379///
380/// This is musl's [`__tm_to_secs`][1] converted to Rust via [c2rust[2] and then cleaned up by
381/// hand.
382///
383/// [1]: https://git.musl-libc.org/cgit/musl/tree/src/time/__tm_to_secs.c
384/// [2]: https://c2rust.com/
385fn date_time_to_seconds(tm: &DateTime) -> i64 {
386    let (start_of_year, is_leap) = year_to_seconds(tm.year);
387
388    let seconds_within_year = month_to_seconds(tm.month, is_leap)
389        + 86400 * u32::from(tm.day - 1)
390        + 3600 * u32::from(tm.hour)
391        + 60 * u32::from(tm.minute)
392        + u32::from(tm.second);
393
394    (start_of_year + i128::from(seconds_within_year)) as i64
395}
396
397/// Returns the number of seconds in the year prior to the start of the provided month.
398///
399/// This is musl's [`__month_to_secs`][1] converted to Rust via c2rust and then cleaned up by hand.
400///
401/// [1]: https://git.musl-libc.org/cgit/musl/tree/src/time/__month_to_secs.c
402fn month_to_seconds(month: u8, is_leap: bool) -> u32 {
403    const SECS_THROUGH_MONTH: [u32; 12] = [
404        0,
405        31 * 86400,
406        59 * 86400,
407        90 * 86400,
408        120 * 86400,
409        151 * 86400,
410        181 * 86400,
411        212 * 86400,
412        243 * 86400,
413        273 * 86400,
414        304 * 86400,
415        334 * 86400,
416    ];
417    let t = SECS_THROUGH_MONTH[usize::from(month - 1)];
418    if is_leap && month > 2 {
419        t + 86400
420    } else {
421        t
422    }
423}
424
425/// Returns the offset in seconds from the Unix epoch of the start of a year.
426///
427/// musl's [`__year_to_secs`][1] converted to Rust via c2rust and then cleaned up by hand.
428///
429/// Returns an i128 because the start of the earliest supported year underflows i64.
430///
431/// [1]: https://git.musl-libc.org/cgit/musl/tree/src/time/__year_to_secs.c
432pub(crate) fn year_to_seconds(year: i64) -> (i128, bool) {
433    let is_leap;
434    let year = year - 1900;
435
436    // Fast path for years 1901 - 2038.
437    if (1..=138).contains(&year) {
438        let mut leaps: i64 = (year - 68) >> 2;
439        if (year - 68).trailing_zeros() >= 2 {
440            leaps -= 1;
441            is_leap = true;
442        } else {
443            is_leap = false;
444        }
445        return (
446            i128::from(31_536_000 * (year - 70) + 86400 * leaps),
447            is_leap,
448        );
449    }
450
451    let centuries: i64;
452    let mut leaps: i64;
453
454    let mut cycles: i64 = (year - 100) / 400;
455    let mut rem: i64 = (year - 100) % 400;
456
457    if rem < 0 {
458        cycles -= 1;
459        rem += 400
460    }
461    if rem == 0 {
462        is_leap = true;
463        centuries = 0;
464        leaps = 0;
465    } else {
466        if rem >= 200 {
467            if rem >= 300 {
468                centuries = 3;
469                rem -= 300;
470            } else {
471                centuries = 2;
472                rem -= 200;
473            }
474        } else if rem >= 100 {
475            centuries = 1;
476            rem -= 100;
477        } else {
478            centuries = 0;
479        }
480        if rem == 0 {
481            is_leap = false;
482            leaps = 0;
483        } else {
484            leaps = rem / 4;
485            rem %= 4;
486            is_leap = rem == 0;
487        }
488    }
489    leaps += 97 * cycles + 24 * centuries - i64::from(is_leap);
490
491    (
492        i128::from((year - 100) * 31_536_000) + i128::from(leaps * 86400 + 946_684_800 + 86400),
493        is_leap,
494    )
495}
496
497/// Parses a timestamp in RFC 3339 format from `s`.
498pub(crate) fn parse_timestamp(s: &str) -> Option<Timestamp> {
499    // Check that the string is ASCII, since subsequent parsing steps use byte-level indexing.
500    ensure!(s.is_ascii());
501
502    let (year, month, day, s) = parse_date(s)?;
503
504    if s.is_empty() {
505        // The string only contained a date.
506        let date_time = DateTime {
507            year,
508            month,
509            day,
510            ..DateTime::default()
511        };
512
513        return Timestamp::try_from(date_time).ok();
514    }
515
516    // Accept either 'T' or ' ' as delimiter between date and time.
517    let s = parse_char_ignore_case(s, b'T').or_else(|| parse_char(s, b' '))?;
518    let (hour, minute, mut second, nanos, s) = parse_time(s)?;
519    let (offset_hour, offset_minute, s) = parse_offset(s)?;
520
521    ensure!(s.is_empty());
522
523    // Detect whether the timestamp falls in a leap second. If this is the case, roll it back
524    // to the previous second. To be maximally conservative, this should be checking that the
525    // timestamp is the last second in the UTC day (23:59:60), and even potentially checking
526    // that it's the final day of the UTC month, however these checks are non-trivial because
527    // at this point we have, in effect, a local date time, since the offset has not been
528    // applied.
529    if second == 60 {
530        second = 59;
531    }
532
533    let date_time = DateTime {
534        year,
535        month,
536        day,
537        hour,
538        minute,
539        second,
540        nanos,
541    };
542
543    let Timestamp { seconds, nanos } = Timestamp::try_from(date_time).ok()?;
544
545    let seconds =
546        seconds.checked_sub(i64::from(offset_hour) * 3600 + i64::from(offset_minute) * 60)?;
547
548    Some(Timestamp { seconds, nanos })
549}
550
551/// Parse a duration in the [Protobuf JSON encoding spec format][1].
552///
553/// [1]: https://developers.google.com/protocol-buffers/docs/proto3#json
554pub(crate) fn parse_duration(s: &str) -> Option<Duration> {
555    // Check that the string is ASCII, since subsequent parsing steps use byte-level indexing.
556    ensure!(s.is_ascii());
557
558    let (is_negative, s) = match parse_char(s, b'-') {
559        Some(s) => (true, s),
560        None => (false, s),
561    };
562
563    let (digits, s) = parse_digits(s);
564    let seconds = digits.parse::<i64>().ok()?;
565
566    let (nanos, s) = parse_nanos(s)?;
567
568    let s = parse_char(s, b's')?;
569    ensure!(s.is_empty());
570    ensure!(nanos < super::NANOS_PER_SECOND as u32);
571
572    // If the duration is negative, also flip the nanos sign.
573    let (seconds, nanos) = if is_negative {
574        (-seconds, -(nanos as i32))
575    } else {
576        (seconds, nanos as i32)
577    };
578
579    Some(Duration { seconds, nanos })
580}
581
582impl TryFrom<DateTime> for Timestamp {
583    type Error = TimestampError;
584
585    fn try_from(date_time: DateTime) -> Result<Timestamp, TimestampError> {
586        if !date_time.is_valid() {
587            return Err(TimestampError::InvalidDateTime);
588        }
589        let seconds = date_time_to_seconds(&date_time);
590        let nanos = date_time.nanos;
591        Ok(Timestamp {
592            seconds,
593            nanos: nanos as i32,
594        })
595    }
596}