flate2/gz/
bufread.rs

1use std::cmp;
2use std::io;
3use std::io::prelude::*;
4use std::mem;
5
6use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser};
7use crate::crc::CrcReader;
8use crate::deflate;
9use crate::Compression;
10
11fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
12    let min = cmp::min(into.len(), from.len() - *pos);
13    for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) {
14        *slot = *val;
15    }
16    *pos += min;
17    min
18}
19
20/// A gzip streaming encoder
21///
22/// This structure implements a [`Read`] interface. When read from, it reads
23/// uncompressed data from the underlying [`BufRead`] and provides the compressed data.
24///
25/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
26/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
27///
28/// # Examples
29///
30/// ```
31/// use std::io::prelude::*;
32/// use std::io;
33/// use flate2::Compression;
34/// use flate2::bufread::GzEncoder;
35/// use std::fs::File;
36/// use std::io::BufReader;
37///
38/// // Opens sample file, compresses the contents and returns a Vector or error
39/// // File wrapped in a BufReader implements BufRead
40///
41/// fn open_hello_world() -> io::Result<Vec<u8>> {
42///     let f = File::open("examples/hello_world.txt")?;
43///     let b = BufReader::new(f);
44///     let mut gz = GzEncoder::new(b, Compression::fast());
45///     let mut buffer = Vec::new();
46///     gz.read_to_end(&mut buffer)?;
47///     Ok(buffer)
48/// }
49/// ```
50#[derive(Debug)]
51pub struct GzEncoder<R> {
52    inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
53    header: Vec<u8>,
54    pos: usize,
55    eof: bool,
56}
57
58pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
59    let crc = CrcReader::new(r);
60    GzEncoder {
61        inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
62        header,
63        pos: 0,
64        eof: false,
65    }
66}
67
68impl<R: BufRead> GzEncoder<R> {
69    /// Creates a new encoder which will use the given compression level.
70    ///
71    /// The encoder is not configured specially for the emitted header. For
72    /// header configuration, see the `GzBuilder` type.
73    ///
74    /// The data read from the stream `r` will be compressed and available
75    /// through the returned reader.
76    pub fn new(r: R, level: Compression) -> GzEncoder<R> {
77        GzBuilder::new().buf_read(r, level)
78    }
79
80    fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
81        if self.pos == 8 {
82            return Ok(0);
83        }
84        let crc = self.inner.get_ref().crc();
85        let ref arr = [
86            (crc.sum() >> 0) as u8,
87            (crc.sum() >> 8) as u8,
88            (crc.sum() >> 16) as u8,
89            (crc.sum() >> 24) as u8,
90            (crc.amount() >> 0) as u8,
91            (crc.amount() >> 8) as u8,
92            (crc.amount() >> 16) as u8,
93            (crc.amount() >> 24) as u8,
94        ];
95        Ok(copy(into, arr, &mut self.pos))
96    }
97}
98
99impl<R> GzEncoder<R> {
100    /// Acquires a reference to the underlying reader.
101    pub fn get_ref(&self) -> &R {
102        self.inner.get_ref().get_ref()
103    }
104
105    /// Acquires a mutable reference to the underlying reader.
106    ///
107    /// Note that mutation of the reader may result in surprising results if
108    /// this encoder is continued to be used.
109    pub fn get_mut(&mut self) -> &mut R {
110        self.inner.get_mut().get_mut()
111    }
112
113    /// Returns the underlying stream, consuming this encoder
114    pub fn into_inner(self) -> R {
115        self.inner.into_inner().into_inner()
116    }
117}
118
119#[inline]
120fn finish(buf: &[u8; 8]) -> (u32, u32) {
121    let crc = ((buf[0] as u32) << 0)
122        | ((buf[1] as u32) << 8)
123        | ((buf[2] as u32) << 16)
124        | ((buf[3] as u32) << 24);
125    let amt = ((buf[4] as u32) << 0)
126        | ((buf[5] as u32) << 8)
127        | ((buf[6] as u32) << 16)
128        | ((buf[7] as u32) << 24);
129    (crc, amt)
130}
131
132impl<R: BufRead> Read for GzEncoder<R> {
133    fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
134        let mut amt = 0;
135        if self.eof {
136            return self.read_footer(into);
137        } else if self.pos < self.header.len() {
138            amt += copy(into, &self.header, &mut self.pos);
139            if amt == into.len() {
140                return Ok(amt);
141            }
142            let tmp = into;
143            into = &mut tmp[amt..];
144        }
145        match self.inner.read(into)? {
146            0 => {
147                self.eof = true;
148                self.pos = 0;
149                self.read_footer(into)
150            }
151            n => Ok(amt + n),
152        }
153    }
154}
155
156impl<R: BufRead + Write> Write for GzEncoder<R> {
157    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
158        self.get_mut().write(buf)
159    }
160
161    fn flush(&mut self) -> io::Result<()> {
162        self.get_mut().flush()
163    }
164}
165
166/// A decoder for a single member of a [gzip file].
167///
168/// This structure implements a [`Read`] interface. When read from, it reads
169/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
170///
171/// After reading a single member of the gzip data this reader will return
172/// Ok(0) even if there are more bytes available in the underlying reader.
173/// If you need the following bytes, call `into_inner()` after Ok(0) to
174/// recover the underlying reader.
175///
176/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
177/// or read more
178/// [in the introduction](../index.html#about-multi-member-gzip-files).
179///
180/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
181/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
182/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
183///
184/// # Examples
185///
186/// ```
187/// use std::io::prelude::*;
188/// use std::io;
189/// # use flate2::Compression;
190/// # use flate2::write::GzEncoder;
191/// use flate2::bufread::GzDecoder;
192///
193/// # fn main() {
194/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
195/// #   e.write_all(b"Hello World").unwrap();
196/// #   let bytes = e.finish().unwrap();
197/// #   println!("{}", decode_reader(bytes).unwrap());
198/// # }
199/// #
200/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
201/// // Here &[u8] implements BufRead
202///
203/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
204///    let mut gz = GzDecoder::new(&bytes[..]);
205///    let mut s = String::new();
206///    gz.read_to_string(&mut s)?;
207///    Ok(s)
208/// }
209/// ```
210#[derive(Debug)]
211pub struct GzDecoder<R> {
212    state: GzState,
213    reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
214    multi: bool,
215}
216
217#[derive(Debug)]
218enum GzState {
219    Header(GzHeaderParser),
220    Body(GzHeader),
221    Finished(GzHeader, usize, [u8; 8]),
222    Err(io::Error),
223    End(Option<GzHeader>),
224}
225
226impl<R: BufRead> GzDecoder<R> {
227    /// Creates a new decoder from the given reader, immediately parsing the
228    /// gzip header.
229    pub fn new(mut r: R) -> GzDecoder<R> {
230        let mut header_parser = GzHeaderParser::new();
231
232        let state = match header_parser.parse(&mut r) {
233            Ok(_) => GzState::Body(GzHeader::from(header_parser)),
234            Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => {
235                GzState::Header(header_parser)
236            }
237            Err(err) => GzState::Err(err),
238        };
239
240        GzDecoder {
241            state,
242            reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
243            multi: false,
244        }
245    }
246
247    fn multi(mut self, flag: bool) -> GzDecoder<R> {
248        self.multi = flag;
249        self
250    }
251}
252
253impl<R> GzDecoder<R> {
254    /// Returns the header associated with this stream, if it was valid
255    pub fn header(&self) -> Option<&GzHeader> {
256        match &self.state {
257            GzState::Body(header) | GzState::Finished(header, _, _) => Some(header),
258            GzState::End(header) => header.as_ref(),
259            _ => None,
260        }
261    }
262
263    /// Acquires a reference to the underlying reader.
264    pub fn get_ref(&self) -> &R {
265        self.reader.get_ref().get_ref()
266    }
267
268    /// Acquires a mutable reference to the underlying stream.
269    ///
270    /// Note that mutation of the stream may result in surprising results if
271    /// this decoder is continued to be used.
272    pub fn get_mut(&mut self) -> &mut R {
273        self.reader.get_mut().get_mut()
274    }
275
276    /// Consumes this decoder, returning the underlying reader.
277    pub fn into_inner(self) -> R {
278        self.reader.into_inner().into_inner()
279    }
280}
281
282impl<R: BufRead> Read for GzDecoder<R> {
283    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
284        loop {
285            match &mut self.state {
286                GzState::Header(parser) => {
287                    parser.parse(self.reader.get_mut().get_mut())?;
288                    self.state = GzState::Body(GzHeader::from(mem::take(parser)));
289                }
290                GzState::Body(header) => {
291                    if into.is_empty() {
292                        return Ok(0);
293                    }
294                    match self.reader.read(into)? {
295                        0 => {
296                            self.state = GzState::Finished(mem::take(header), 0, [0; 8]);
297                        }
298                        n => {
299                            return Ok(n);
300                        }
301                    }
302                }
303                GzState::Finished(header, pos, buf) => {
304                    if *pos < buf.len() {
305                        *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?;
306                    } else {
307                        let (crc, amt) = finish(&buf);
308
309                        if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() {
310                            self.state = GzState::End(Some(mem::take(header)));
311                            return Err(corrupt());
312                        } else if self.multi {
313                            let is_eof = self
314                                .reader
315                                .get_mut()
316                                .get_mut()
317                                .fill_buf()
318                                .map(|buf| buf.is_empty())?;
319
320                            if is_eof {
321                                self.state = GzState::End(Some(mem::take(header)));
322                            } else {
323                                self.reader.reset();
324                                self.reader.get_mut().reset_data();
325                                self.state = GzState::Header(GzHeaderParser::new())
326                            }
327                        } else {
328                            self.state = GzState::End(Some(mem::take(header)));
329                        }
330                    }
331                }
332                GzState::Err(err) => {
333                    let result = Err(mem::replace(err, io::ErrorKind::Other.into()));
334                    self.state = GzState::End(None);
335                    return result;
336                }
337                GzState::End(_) => return Ok(0),
338            }
339        }
340    }
341}
342
343impl<R: BufRead + Write> Write for GzDecoder<R> {
344    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
345        self.get_mut().write(buf)
346    }
347
348    fn flush(&mut self) -> io::Result<()> {
349        self.get_mut().flush()
350    }
351}
352
353/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
354///
355/// This structure implements a [`Read`] interface. When read from, it reads
356/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
357///
358/// A gzip file consists of a series of *members* concatenated one after another.
359/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the
360/// underlying reader does. For a file, this reads to the end of the file.
361///
362/// To handle members separately, see [GzDecoder] or read more
363/// [in the introduction](../index.html#about-multi-member-gzip-files).
364///
365/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
366/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
367/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
368///
369/// # Examples
370///
371/// ```
372/// use std::io::prelude::*;
373/// use std::io;
374/// # use flate2::Compression;
375/// # use flate2::write::GzEncoder;
376/// use flate2::bufread::MultiGzDecoder;
377///
378/// # fn main() {
379/// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
380/// #   e.write_all(b"Hello World").unwrap();
381/// #   let bytes = e.finish().unwrap();
382/// #   println!("{}", decode_reader(bytes).unwrap());
383/// # }
384/// #
385/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
386/// // Here &[u8] implements BufRead
387///
388/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
389///    let mut gz = MultiGzDecoder::new(&bytes[..]);
390///    let mut s = String::new();
391///    gz.read_to_string(&mut s)?;
392///    Ok(s)
393/// }
394/// ```
395#[derive(Debug)]
396pub struct MultiGzDecoder<R>(GzDecoder<R>);
397
398impl<R: BufRead> MultiGzDecoder<R> {
399    /// Creates a new decoder from the given reader, immediately parsing the
400    /// (first) gzip header. If the gzip stream contains multiple members all will
401    /// be decoded.
402    pub fn new(r: R) -> MultiGzDecoder<R> {
403        MultiGzDecoder(GzDecoder::new(r).multi(true))
404    }
405}
406
407impl<R> MultiGzDecoder<R> {
408    /// Returns the current header associated with this stream, if it's valid
409    pub fn header(&self) -> Option<&GzHeader> {
410        self.0.header()
411    }
412
413    /// Acquires a reference to the underlying reader.
414    pub fn get_ref(&self) -> &R {
415        self.0.get_ref()
416    }
417
418    /// Acquires a mutable reference to the underlying stream.
419    ///
420    /// Note that mutation of the stream may result in surprising results if
421    /// this decoder is continued to be used.
422    pub fn get_mut(&mut self) -> &mut R {
423        self.0.get_mut()
424    }
425
426    /// Consumes this decoder, returning the underlying reader.
427    pub fn into_inner(self) -> R {
428        self.0.into_inner()
429    }
430}
431
432impl<R: BufRead> Read for MultiGzDecoder<R> {
433    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
434        self.0.read(into)
435    }
436}
437
438#[cfg(test)]
439mod test {
440    use crate::bufread::GzDecoder;
441    use crate::gz::write;
442    use crate::Compression;
443    use std::io::{Read, Write};
444
445    // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any
446    // additional data to be consumed by the caller.
447    #[test]
448    fn decode_extra_data() {
449        let expected = "Hello World";
450
451        let compressed = {
452            let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
453            e.write(expected.as_ref()).unwrap();
454            let mut b = e.finish().unwrap();
455            b.push(b'x');
456            b
457        };
458
459        let mut output = Vec::new();
460        let mut decoder = GzDecoder::new(compressed.as_slice());
461        let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
462        assert_eq!(decoded_bytes, output.len());
463        let actual = std::str::from_utf8(&output).expect("String parsing error");
464        assert_eq!(
465            actual, expected,
466            "after decompression we obtain the original input"
467        );
468
469        output.clear();
470        assert_eq!(
471            decoder.read(&mut output).unwrap(),
472            0,
473            "subsequent read of decoder returns 0, but inner reader can return additional data"
474        );
475        let mut reader = decoder.into_inner();
476        assert_eq!(
477            reader.read_to_end(&mut output).unwrap(),
478            1,
479            "extra data is accessible in underlying buf-read"
480        );
481        assert_eq!(output, b"x");
482    }
483}