flate2/gz/
mod.rs

1use std::ffi::CString;
2use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
3use std::time;
4
5use crate::bufreader::BufReader;
6use crate::{Compression, Crc};
7
8pub static FHCRC: u8 = 1 << 1;
9pub static FEXTRA: u8 = 1 << 2;
10pub static FNAME: u8 = 1 << 3;
11pub static FCOMMENT: u8 = 1 << 4;
12pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
13
14pub mod bufread;
15pub mod read;
16pub mod write;
17
18// The maximum length of the header filename and comment fields. More than
19// enough for these fields in reasonable use, but prevents possible attacks.
20const MAX_HEADER_BUF: usize = 65535;
21
22/// A structure representing the header of a gzip stream.
23///
24/// The header can contain metadata about the file that was compressed, if
25/// present.
26#[derive(PartialEq, Clone, Debug, Default)]
27pub struct GzHeader {
28    extra: Option<Vec<u8>>,
29    filename: Option<Vec<u8>>,
30    comment: Option<Vec<u8>>,
31    operating_system: u8,
32    mtime: u32,
33}
34
35impl GzHeader {
36    /// Returns the `filename` field of this gzip stream's header, if present.
37    pub fn filename(&self) -> Option<&[u8]> {
38        self.filename.as_ref().map(|s| &s[..])
39    }
40
41    /// Returns the `extra` field of this gzip stream's header, if present.
42    pub fn extra(&self) -> Option<&[u8]> {
43        self.extra.as_ref().map(|s| &s[..])
44    }
45
46    /// Returns the `comment` field of this gzip stream's header, if present.
47    pub fn comment(&self) -> Option<&[u8]> {
48        self.comment.as_ref().map(|s| &s[..])
49    }
50
51    /// Returns the `operating_system` field of this gzip stream's header.
52    ///
53    /// There are predefined values for various operating systems.
54    /// 255 means that the value is unknown.
55    pub fn operating_system(&self) -> u8 {
56        self.operating_system
57    }
58
59    /// This gives the most recent modification time of the original file being compressed.
60    ///
61    /// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970.
62    /// (Note that this may cause problems for MS-DOS and other systems that use local
63    /// rather than Universal time.) If the compressed data did not come from a file,
64    /// `mtime` is set to the time at which compression started.
65    /// `mtime` = 0 means no time stamp is available.
66    ///
67    /// The usage of `mtime` is discouraged because of Year 2038 problem.
68    pub fn mtime(&self) -> u32 {
69        self.mtime
70    }
71
72    /// Returns the most recent modification time represented by a date-time type.
73    /// Returns `None` if the value of the underlying counter is 0,
74    /// indicating no time stamp is available.
75    ///
76    ///
77    /// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970.
78    /// See [`mtime`](#method.mtime) for more detail.
79    pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
80        if self.mtime == 0 {
81            None
82        } else {
83            let duration = time::Duration::new(u64::from(self.mtime), 0);
84            let datetime = time::UNIX_EPOCH + duration;
85            Some(datetime)
86        }
87    }
88}
89
90#[derive(Debug)]
91pub enum GzHeaderState {
92    Start(u8, [u8; 10]),
93    Xlen(Option<Box<Crc>>, u8, [u8; 2]),
94    Extra(Option<Box<Crc>>, u16),
95    Filename(Option<Box<Crc>>),
96    Comment(Option<Box<Crc>>),
97    Crc(Option<Box<Crc>>, u8, [u8; 2]),
98    Complete,
99}
100
101impl Default for GzHeaderState {
102    fn default() -> Self {
103        Self::Complete
104    }
105}
106
107#[derive(Debug, Default)]
108pub struct GzHeaderParser {
109    state: GzHeaderState,
110    flags: u8,
111    header: GzHeader,
112}
113
114impl GzHeaderParser {
115    fn new() -> Self {
116        GzHeaderParser {
117            state: GzHeaderState::Start(0, [0; 10]),
118            flags: 0,
119            header: GzHeader::default(),
120        }
121    }
122
123    fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> {
124        loop {
125            match &mut self.state {
126                GzHeaderState::Start(count, buffer) => {
127                    while (*count as usize) < buffer.len() {
128                        *count += read_into(r, &mut buffer[*count as usize..])? as u8;
129                    }
130                    // Gzip identification bytes
131                    if buffer[0] != 0x1f || buffer[1] != 0x8b {
132                        return Err(bad_header());
133                    }
134                    // Gzip compression method (8 = deflate)
135                    if buffer[2] != 8 {
136                        return Err(bad_header());
137                    }
138                    self.flags = buffer[3];
139                    // RFC1952: "must give an error indication if any reserved bit is non-zero"
140                    if self.flags & FRESERVED != 0 {
141                        return Err(bad_header());
142                    }
143                    self.header.mtime = ((buffer[4] as u32) << 0)
144                        | ((buffer[5] as u32) << 8)
145                        | ((buffer[6] as u32) << 16)
146                        | ((buffer[7] as u32) << 24);
147                    let _xfl = buffer[8];
148                    self.header.operating_system = buffer[9];
149                    let crc = if self.flags & FHCRC != 0 {
150                        let mut crc = Box::new(Crc::new());
151                        crc.update(buffer);
152                        Some(crc)
153                    } else {
154                        None
155                    };
156                    self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
157                }
158                GzHeaderState::Xlen(crc, count, buffer) => {
159                    if self.flags & FEXTRA != 0 {
160                        while (*count as usize) < buffer.len() {
161                            *count += read_into(r, &mut buffer[*count as usize..])? as u8;
162                        }
163                        if let Some(crc) = crc {
164                            crc.update(buffer);
165                        }
166                        let xlen = parse_le_u16(&buffer);
167                        self.header.extra = Some(vec![0; xlen as usize]);
168                        self.state = GzHeaderState::Extra(crc.take(), 0);
169                    } else {
170                        self.state = GzHeaderState::Filename(crc.take());
171                    }
172                }
173                GzHeaderState::Extra(crc, count) => {
174                    debug_assert!(self.header.extra.is_some());
175                    let extra = self.header.extra.as_mut().unwrap();
176                    while (*count as usize) < extra.len() {
177                        *count += read_into(r, &mut extra[*count as usize..])? as u16;
178                    }
179                    if let Some(crc) = crc {
180                        crc.update(extra);
181                    }
182                    self.state = GzHeaderState::Filename(crc.take());
183                }
184                GzHeaderState::Filename(crc) => {
185                    if self.flags & FNAME != 0 {
186                        let filename = self.header.filename.get_or_insert_with(Vec::new);
187                        read_to_nul(r, filename)?;
188                        if let Some(crc) = crc {
189                            crc.update(filename);
190                            crc.update(b"\0");
191                        }
192                    }
193                    self.state = GzHeaderState::Comment(crc.take());
194                }
195                GzHeaderState::Comment(crc) => {
196                    if self.flags & FCOMMENT != 0 {
197                        let comment = self.header.comment.get_or_insert_with(Vec::new);
198                        read_to_nul(r, comment)?;
199                        if let Some(crc) = crc {
200                            crc.update(comment);
201                            crc.update(b"\0");
202                        }
203                    }
204                    self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
205                }
206                GzHeaderState::Crc(crc, count, buffer) => {
207                    if let Some(crc) = crc {
208                        debug_assert!(self.flags & FHCRC != 0);
209                        while (*count as usize) < buffer.len() {
210                            *count += read_into(r, &mut buffer[*count as usize..])? as u8;
211                        }
212                        let stored_crc = parse_le_u16(&buffer);
213                        let calced_crc = crc.sum() as u16;
214                        if stored_crc != calced_crc {
215                            return Err(corrupt());
216                        }
217                    }
218                    self.state = GzHeaderState::Complete;
219                }
220                GzHeaderState::Complete => {
221                    return Ok(());
222                }
223            }
224        }
225    }
226
227    fn header(&self) -> Option<&GzHeader> {
228        match self.state {
229            GzHeaderState::Complete => Some(&self.header),
230            _ => None,
231        }
232    }
233}
234
235impl From<GzHeaderParser> for GzHeader {
236    fn from(parser: GzHeaderParser) -> Self {
237        debug_assert!(matches!(parser.state, GzHeaderState::Complete));
238        parser.header
239    }
240}
241
242// Attempt to fill the `buffer` from `r`. Return the number of bytes read.
243// Return an error if EOF is read before the buffer is full.  This differs
244// from `read` in that Ok(0) means that more data may be available.
245fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
246    debug_assert!(!buffer.is_empty());
247    match r.read(buffer) {
248        Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
249        Ok(n) => Ok(n),
250        Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
251        Err(e) => Err(e),
252    }
253}
254
255// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
256fn read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
257    let mut bytes = r.bytes();
258    loop {
259        match bytes.next().transpose()? {
260            Some(byte) if byte == 0 => {
261                return Ok(());
262            }
263            Some(_) if buffer.len() == MAX_HEADER_BUF => {
264                return Err(Error::new(
265                    ErrorKind::InvalidInput,
266                    "gzip header field too long",
267                ));
268            }
269            Some(byte) => {
270                buffer.push(byte);
271            }
272            None => {
273                return Err(ErrorKind::UnexpectedEof.into());
274            }
275        }
276    }
277}
278
279fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
280    (buffer[0] as u16) | ((buffer[1] as u16) << 8)
281}
282
283fn bad_header() -> Error {
284    Error::new(ErrorKind::InvalidInput, "invalid gzip header")
285}
286
287fn corrupt() -> Error {
288    Error::new(
289        ErrorKind::InvalidInput,
290        "corrupt gzip stream does not have a matching checksum",
291    )
292}
293
294/// A builder structure to create a new gzip Encoder.
295///
296/// This structure controls header configuration options such as the filename.
297///
298/// # Examples
299///
300/// ```
301/// use std::io::prelude::*;
302/// # use std::io;
303/// use std::fs::File;
304/// use flate2::GzBuilder;
305/// use flate2::Compression;
306///
307/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern
308///
309/// # fn sample_builder() -> Result<(), io::Error> {
310/// let f = File::create("examples/hello_world.gz")?;
311/// let mut gz = GzBuilder::new()
312///                 .filename("hello_world.txt")
313///                 .comment("test file, please delete")
314///                 .write(f, Compression::default());
315/// gz.write_all(b"hello world")?;
316/// gz.finish()?;
317/// # Ok(())
318/// # }
319/// ```
320#[derive(Debug)]
321pub struct GzBuilder {
322    extra: Option<Vec<u8>>,
323    filename: Option<CString>,
324    comment: Option<CString>,
325    operating_system: Option<u8>,
326    mtime: u32,
327}
328
329impl Default for GzBuilder {
330    fn default() -> Self {
331        Self::new()
332    }
333}
334
335impl GzBuilder {
336    /// Create a new blank builder with no header by default.
337    pub fn new() -> GzBuilder {
338        GzBuilder {
339            extra: None,
340            filename: None,
341            comment: None,
342            operating_system: None,
343            mtime: 0,
344        }
345    }
346
347    /// Configure the `mtime` field in the gzip header.
348    pub fn mtime(mut self, mtime: u32) -> GzBuilder {
349        self.mtime = mtime;
350        self
351    }
352
353    /// Configure the `operating_system` field in the gzip header.
354    pub fn operating_system(mut self, os: u8) -> GzBuilder {
355        self.operating_system = Some(os);
356        self
357    }
358
359    /// Configure the `extra` field in the gzip header.
360    pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
361        self.extra = Some(extra.into());
362        self
363    }
364
365    /// Configure the `filename` field in the gzip header.
366    ///
367    /// # Panics
368    ///
369    /// Panics if the `filename` slice contains a zero.
370    pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
371        self.filename = Some(CString::new(filename.into()).unwrap());
372        self
373    }
374
375    /// Configure the `comment` field in the gzip header.
376    ///
377    /// # Panics
378    ///
379    /// Panics if the `comment` slice contains a zero.
380    pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
381        self.comment = Some(CString::new(comment.into()).unwrap());
382        self
383    }
384
385    /// Consume this builder, creating a writer encoder in the process.
386    ///
387    /// The data written to the returned encoder will be compressed and then
388    /// written out to the supplied parameter `w`.
389    pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
390        write::gz_encoder(self.into_header(lvl), w, lvl)
391    }
392
393    /// Consume this builder, creating a reader encoder in the process.
394    ///
395    /// Data read from the returned encoder will be the compressed version of
396    /// the data read from the given reader.
397    pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
398        read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
399    }
400
401    /// Consume this builder, creating a reader encoder in the process.
402    ///
403    /// Data read from the returned encoder will be the compressed version of
404    /// the data read from the given reader.
405    pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
406    where
407        R: BufRead,
408    {
409        bufread::gz_encoder(self.into_header(lvl), r, lvl)
410    }
411
412    fn into_header(self, lvl: Compression) -> Vec<u8> {
413        let GzBuilder {
414            extra,
415            filename,
416            comment,
417            operating_system,
418            mtime,
419        } = self;
420        let mut flg = 0;
421        let mut header = vec![0u8; 10];
422        if let Some(v) = extra {
423            flg |= FEXTRA;
424            header.push((v.len() >> 0) as u8);
425            header.push((v.len() >> 8) as u8);
426            header.extend(v);
427        }
428        if let Some(filename) = filename {
429            flg |= FNAME;
430            header.extend(filename.as_bytes_with_nul().iter().copied());
431        }
432        if let Some(comment) = comment {
433            flg |= FCOMMENT;
434            header.extend(comment.as_bytes_with_nul().iter().copied());
435        }
436        header[0] = 0x1f;
437        header[1] = 0x8b;
438        header[2] = 8;
439        header[3] = flg;
440        header[4] = (mtime >> 0) as u8;
441        header[5] = (mtime >> 8) as u8;
442        header[6] = (mtime >> 16) as u8;
443        header[7] = (mtime >> 24) as u8;
444        header[8] = if lvl.0 >= Compression::best().0 {
445            2
446        } else if lvl.0 <= Compression::fast().0 {
447            4
448        } else {
449            0
450        };
451
452        // Typically this byte indicates what OS the gz stream was created on,
453        // but in an effort to have cross-platform reproducible streams just
454        // default this value to 255. I'm not sure that if we "correctly" set
455        // this it'd do anything anyway...
456        header[9] = operating_system.unwrap_or(255);
457        header
458    }
459}
460
461#[cfg(test)]
462mod tests {
463    use std::io::prelude::*;
464
465    use super::{read, write, GzBuilder, GzHeaderParser};
466    use crate::{Compression, GzHeader};
467    use rand::{thread_rng, Rng};
468
469    #[test]
470    fn roundtrip() {
471        let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
472        e.write_all(b"foo bar baz").unwrap();
473        let inner = e.finish().unwrap();
474        let mut d = read::GzDecoder::new(&inner[..]);
475        let mut s = String::new();
476        d.read_to_string(&mut s).unwrap();
477        assert_eq!(s, "foo bar baz");
478    }
479
480    #[test]
481    fn roundtrip_zero() {
482        let e = write::GzEncoder::new(Vec::new(), Compression::default());
483        let inner = e.finish().unwrap();
484        let mut d = read::GzDecoder::new(&inner[..]);
485        let mut s = String::new();
486        d.read_to_string(&mut s).unwrap();
487        assert_eq!(s, "");
488    }
489
490    #[test]
491    fn roundtrip_big() {
492        let mut real = Vec::new();
493        let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
494        let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
495        for _ in 0..200 {
496            let to_write = &v[..thread_rng().gen_range(0..v.len())];
497            real.extend(to_write.iter().copied());
498            w.write_all(to_write).unwrap();
499        }
500        let result = w.finish().unwrap();
501        let mut r = read::GzDecoder::new(&result[..]);
502        let mut v = Vec::new();
503        r.read_to_end(&mut v).unwrap();
504        assert_eq!(v, real);
505    }
506
507    #[test]
508    fn roundtrip_big2() {
509        let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
510        let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
511        let mut res = Vec::new();
512        r.read_to_end(&mut res).unwrap();
513        assert_eq!(res, v);
514    }
515
516    // A Rust implementation of CRC that closely matches the C code in RFC1952.
517    // Only use this to create CRCs for tests.
518    struct Rfc1952Crc {
519        /* Table of CRCs of all 8-bit messages. */
520        crc_table: [u32; 256],
521    }
522
523    impl Rfc1952Crc {
524        fn new() -> Self {
525            let mut crc = Rfc1952Crc {
526                crc_table: [0; 256],
527            };
528            /* Make the table for a fast CRC. */
529            for n in 0usize..256 {
530                let mut c = n as u32;
531                for _k in 0..8 {
532                    if c & 1 != 0 {
533                        c = 0xedb88320 ^ (c >> 1);
534                    } else {
535                        c = c >> 1;
536                    }
537                }
538                crc.crc_table[n] = c;
539            }
540            crc
541        }
542
543        /*
544         Update a running crc with the bytes buf and return
545         the updated crc. The crc should be initialized to zero. Pre- and
546         post-conditioning (one's complement) is performed within this
547         function so it shouldn't be done by the caller.
548        */
549        fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
550            let mut c = crc ^ 0xffffffff;
551
552            for b in buf {
553                c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
554            }
555            c ^ 0xffffffff
556        }
557
558        /* Return the CRC of the bytes buf. */
559        fn crc(&self, buf: &[u8]) -> u32 {
560            self.update_crc(0, buf)
561        }
562    }
563
564    #[test]
565    fn roundtrip_header() {
566        let mut header = GzBuilder::new()
567            .mtime(1234)
568            .operating_system(57)
569            .filename("filename")
570            .comment("comment")
571            .into_header(Compression::fast());
572
573        // Add a CRC to the header
574        header[3] = header[3] ^ super::FHCRC;
575        let rfc1952_crc = Rfc1952Crc::new();
576        let crc32 = rfc1952_crc.crc(&header);
577        let crc16 = crc32 as u16;
578        header.extend(&crc16.to_le_bytes());
579
580        let mut parser = GzHeaderParser::new();
581        parser.parse(&mut header.as_slice()).unwrap();
582        let actual = parser.header().unwrap();
583        assert_eq!(
584            actual,
585            &GzHeader {
586                extra: None,
587                filename: Some("filename".as_bytes().to_vec()),
588                comment: Some("comment".as_bytes().to_vec()),
589                operating_system: 57,
590                mtime: 1234
591            }
592        )
593    }
594
595    #[test]
596    fn fields() {
597        let r = vec![0, 2, 4, 6];
598        let e = GzBuilder::new()
599            .filename("foo.rs")
600            .comment("bar")
601            .extra(vec![0, 1, 2, 3])
602            .read(&r[..], Compression::default());
603        let mut d = read::GzDecoder::new(e);
604        assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
605        assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
606        assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
607        let mut res = Vec::new();
608        d.read_to_end(&mut res).unwrap();
609        assert_eq!(res, vec![0, 2, 4, 6]);
610    }
611
612    #[test]
613    fn keep_reading_after_end() {
614        let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
615        e.write_all(b"foo bar baz").unwrap();
616        let inner = e.finish().unwrap();
617        let mut d = read::GzDecoder::new(&inner[..]);
618        let mut s = String::new();
619        d.read_to_string(&mut s).unwrap();
620        assert_eq!(s, "foo bar baz");
621        d.read_to_string(&mut s).unwrap();
622        assert_eq!(s, "foo bar baz");
623    }
624
625    #[test]
626    fn qc_reader() {
627        ::quickcheck::quickcheck(test as fn(_) -> _);
628
629        fn test(v: Vec<u8>) -> bool {
630            let r = read::GzEncoder::new(&v[..], Compression::default());
631            let mut r = read::GzDecoder::new(r);
632            let mut v2 = Vec::new();
633            r.read_to_end(&mut v2).unwrap();
634            v == v2
635        }
636    }
637
638    #[test]
639    fn flush_after_write() {
640        let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
641        write!(f, "Hello world").unwrap();
642        f.flush().unwrap();
643    }
644}