miniz_oxide/inflate/core.rs
1//! Streaming decompression functionality.
2
3use super::*;
4use crate::shared::{update_adler32, HUFFMAN_LENGTH_ORDER};
5use ::core::cell::Cell;
6
7use ::core::cmp;
8use ::core::convert::TryInto;
9
10use self::output_buffer::{InputWrapper, OutputBuffer};
11
12pub const TINFL_LZ_DICT_SIZE: usize = 32_768;
13
14/// A struct containing huffman code lengths and the huffman code tree used by the decompressor.
15#[derive(Clone)]
16struct HuffmanTable {
17 /// Fast lookup table for shorter huffman codes.
18 ///
19 /// See `HuffmanTable::fast_lookup`.
20 pub look_up: [i16; FAST_LOOKUP_SIZE as usize],
21 /// Full huffman tree.
22 ///
23 /// Positive values are edge nodes/symbols, negative values are
24 /// parent nodes/references to other nodes.
25 pub tree: [i16; MAX_HUFF_TREE_SIZE],
26}
27
28impl HuffmanTable {
29 const fn new() -> HuffmanTable {
30 HuffmanTable {
31 look_up: [0; FAST_LOOKUP_SIZE as usize],
32 tree: [0; MAX_HUFF_TREE_SIZE],
33 }
34 }
35
36 /// Look for a symbol in the fast lookup table.
37 /// The symbol is stored in the lower 9 bits, the length in the next 6.
38 /// If the returned value is negative, the code wasn't found in the
39 /// fast lookup table and the full tree has to be traversed to find the code.
40 #[inline]
41 fn fast_lookup(&self, bit_buf: BitBuffer) -> i16 {
42 self.look_up[(bit_buf & BitBuffer::from(FAST_LOOKUP_SIZE - 1)) as usize]
43 }
44
45 /// Get the symbol and the code length from the huffman tree.
46 #[inline]
47 fn tree_lookup(&self, fast_symbol: i32, bit_buf: BitBuffer, mut code_len: u8) -> (i32, u32) {
48 let mut symbol = fast_symbol;
49 // We step through the tree until we encounter a positive value, which indicates a
50 // symbol.
51 loop {
52 // symbol here indicates the position of the left (0) node, if the next bit is 1
53 // we add 1 to the lookup position to get the right node.
54 let tree_index = (!symbol + ((bit_buf >> code_len) & 1) as i32) as usize;
55
56 // Use get here to avoid generatic panic code.
57 // The init_tree code should prevent this from actually going out of bounds
58 // but if there were somehow a bug with that
59 // we would at worst end up with corrupted output in release mode.
60 debug_assert!(tree_index < self.tree.len());
61 symbol = i32::from(self.tree.get(tree_index).copied().unwrap_or(i16::MAX));
62 code_len += 1;
63 if symbol >= 0 {
64 break;
65 }
66 }
67 // Note: Using a u8 for code_len inside this function seems to improve performance, but changing it
68 // in localvars seems to worsen things so we convert it to a u32 here.
69 (symbol, u32::from(code_len))
70 }
71
72 #[inline]
73 /// Look up a symbol and code length from the bits in the provided bit buffer.
74 ///
75 /// Returns Some(symbol, length) on success,
76 /// None if the length is 0.
77 ///
78 /// It's possible we could avoid checking for 0 if we can guarantee a sane table.
79 /// TODO: Check if a smaller type for code_len helps performance.
80 fn lookup(&self, bit_buf: BitBuffer) -> (i32, u32) {
81 let symbol = self.fast_lookup(bit_buf).into();
82 if symbol >= 0 {
83 let length = (symbol >> 9) as u32;
84 (symbol, length)
85 } else {
86 // We didn't get a symbol from the fast lookup table, so check the tree instead.
87 self.tree_lookup(symbol, bit_buf, FAST_LOOKUP_BITS)
88 }
89 }
90}
91
92/// The number of huffman tables used.
93const MAX_HUFF_TABLES: usize = 3;
94/// The length of the first (literal/length) huffman table.
95const MAX_HUFF_SYMBOLS_0: usize = 288;
96/// The length of the second (distance) huffman table.
97const MAX_HUFF_SYMBOLS_1: usize = 32;
98/// The length of the last (huffman code length) huffman table.
99const MAX_HUFF_SYMBOLS_2: usize = 19;
100/// The maximum length of a code that can be looked up in the fast lookup table.
101const FAST_LOOKUP_BITS: u8 = 10;
102/// The size of the fast lookup table.
103const FAST_LOOKUP_SIZE: u32 = 1 << FAST_LOOKUP_BITS;
104const MAX_HUFF_TREE_SIZE: usize = MAX_HUFF_SYMBOLS_0 * 2;
105const LITLEN_TABLE: usize = 0;
106const DIST_TABLE: usize = 1;
107const HUFFLEN_TABLE: usize = 2;
108
109/// Flags to [`decompress()`] to control how inflation works.
110///
111/// These define bits for a bitmask argument.
112pub mod inflate_flags {
113 /// Should we try to parse a zlib header?
114 ///
115 /// If unset, the function will expect an RFC1951 deflate stream. If set, it will expect a
116 /// RFC1950 zlib wrapper around the deflate stream.
117 pub const TINFL_FLAG_PARSE_ZLIB_HEADER: u32 = 1;
118
119 /// There will be more input that hasn't been given to the decompressor yet.
120 ///
121 /// This is useful when you want to decompress what you have so far,
122 /// even if you know there is probably more input that hasn't gotten here yet (_e.g._, over a
123 /// network connection). When [`decompress()`][super::decompress] reaches the end of the input
124 /// without finding the end of the compressed stream, it will return
125 /// [`TINFLStatus::NeedsMoreInput`][super::TINFLStatus::NeedsMoreInput] if this is set,
126 /// indicating that you should get more data before calling again. If not set, it will return
127 /// [`TINFLStatus::FailedCannotMakeProgress`][super::TINFLStatus::FailedCannotMakeProgress]
128 /// suggesting the stream is corrupt, since you claimed it was all there.
129 pub const TINFL_FLAG_HAS_MORE_INPUT: u32 = 2;
130
131 /// The output buffer should not wrap around.
132 pub const TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: u32 = 4;
133
134 /// Calculate the adler32 checksum of the output data even if we're not inflating a zlib stream.
135 ///
136 /// If [`TINFL_FLAG_IGNORE_ADLER32`] is specified, it will override this.
137 ///
138 /// NOTE: Enabling/disabling this between calls to decompress will result in an incorrect
139 /// checksum.
140 pub const TINFL_FLAG_COMPUTE_ADLER32: u32 = 8;
141
142 /// Ignore adler32 checksum even if we are inflating a zlib stream.
143 ///
144 /// Overrides [`TINFL_FLAG_COMPUTE_ADLER32`] if both are enabled.
145 ///
146 /// NOTE: This flag does not exist in miniz as it does not support this and is a
147 /// custom addition for miniz_oxide.
148 ///
149 /// NOTE: Should not be changed from enabled to disabled after decompression has started,
150 /// this will result in checksum failure (outside the unlikely event where the checksum happens
151 /// to match anyway).
152 pub const TINFL_FLAG_IGNORE_ADLER32: u32 = 64;
153}
154
155use self::inflate_flags::*;
156
157const MIN_TABLE_SIZES: [u16; 3] = [257, 1, 4];
158
159#[cfg(target_pointer_width = "64")]
160type BitBuffer = u64;
161
162#[cfg(not(target_pointer_width = "64"))]
163type BitBuffer = u32;
164
165/*
166enum HuffmanTableType {
167 LiteralLength = 0,
168 Dist = 1,
169 Huffman = 2,
170}*/
171
172/// Main decompression struct.
173///
174#[derive(Clone)]
175pub struct DecompressorOxide {
176 /// Current state of the decompressor.
177 state: core::State,
178 /// Number of bits in the bit buffer.
179 num_bits: u32,
180 /// Zlib CMF
181 z_header0: u32,
182 /// Zlib FLG
183 z_header1: u32,
184 /// Adler32 checksum from the zlib header.
185 z_adler32: u32,
186 /// 1 if the current block is the last block, 0 otherwise.
187 finish: u8,
188 /// The type of the current block.
189 /// or if in a dynamic block, which huffman table we are currently
190 // initializing.
191 block_type: u8,
192 /// 1 if the adler32 value should be checked.
193 check_adler32: u32,
194 /// Last match distance.
195 dist: u32,
196 /// Variable used for match length, symbols, and a number of other things.
197 counter: u32,
198 /// Number of extra bits for the last length or distance code.
199 num_extra: u8,
200 /// Number of entries in each huffman table.
201 table_sizes: [u16; MAX_HUFF_TABLES],
202 /// Buffer of input data.
203 bit_buf: BitBuffer,
204 /// Huffman tables.
205 tables: [HuffmanTable; MAX_HUFF_TABLES],
206 code_size_literal: [u8; MAX_HUFF_SYMBOLS_0],
207 code_size_dist: [u8; MAX_HUFF_SYMBOLS_1],
208 code_size_huffman: [u8; MAX_HUFF_SYMBOLS_2],
209 /// Raw block header.
210 raw_header: [u8; 4],
211 /// Huffman length codes.
212 len_codes: [u8; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1 + 137],
213}
214
215impl DecompressorOxide {
216 /// Create a new tinfl_decompressor with all fields set to 0.
217 pub fn new() -> DecompressorOxide {
218 DecompressorOxide::default()
219 }
220
221 /// Set the current state to `Start`.
222 #[inline]
223 pub fn init(&mut self) {
224 // The rest of the data is reset or overwritten when used.
225 self.state = core::State::Start;
226 }
227
228 /// Returns the adler32 checksum of the currently decompressed data.
229 /// Note: Will return Some(1) if decompressing zlib but ignoring adler32.
230 #[inline]
231 pub fn adler32(&self) -> Option<u32> {
232 if self.state != State::Start && !self.state.is_failure() && self.z_header0 != 0 {
233 Some(self.check_adler32)
234 } else {
235 None
236 }
237 }
238
239 /// Returns the adler32 that was read from the zlib header if it exists.
240 #[inline]
241 pub fn adler32_header(&self) -> Option<u32> {
242 if self.state != State::Start && self.state != State::BadZlibHeader && self.z_header0 != 0 {
243 Some(self.z_adler32)
244 } else {
245 None
246 }
247 }
248
249 // Get zlib header for tests
250 // Only for tests for now, may provide a proper function for this for later.
251 #[cfg(all(test, feature = "with-alloc"))]
252 pub(crate) const fn zlib_header(&self) -> (u32, u32) {
253 (self.z_header0, self.z_header1)
254 }
255
256 /*fn code_size_table(&mut self, table_num: u8) -> &mut [u8] {
257 match table_num {
258 0 => &mut self.code_size_literal,
259 1 => &mut self.code_size_dist,
260 _ => &mut self.code_size_huffman,
261 }
262 }*/
263}
264
265impl Default for DecompressorOxide {
266 /// Create a new tinfl_decompressor with all fields set to 0.
267 #[inline(always)]
268 fn default() -> Self {
269 DecompressorOxide {
270 state: core::State::Start,
271 num_bits: 0,
272 z_header0: 0,
273 z_header1: 0,
274 z_adler32: 0,
275 finish: 0,
276 block_type: 0,
277 check_adler32: 0,
278 dist: 0,
279 counter: 0,
280 num_extra: 0,
281 table_sizes: [0; MAX_HUFF_TABLES],
282 bit_buf: 0,
283 // TODO:(oyvindln) Check that copies here are optimized out in release mode.
284 tables: [
285 HuffmanTable::new(),
286 HuffmanTable::new(),
287 HuffmanTable::new(),
288 ],
289 code_size_literal: [0; MAX_HUFF_SYMBOLS_0],
290 code_size_dist: [0; MAX_HUFF_SYMBOLS_1],
291 code_size_huffman: [0; MAX_HUFF_SYMBOLS_2],
292 raw_header: [0; 4],
293 len_codes: [0; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1 + 137],
294 }
295 }
296}
297
298#[derive(Copy, Clone, PartialEq, Eq, Debug)]
299#[non_exhaustive]
300enum State {
301 Start = 0,
302 ReadZlibCmf,
303 ReadZlibFlg,
304 ReadBlockHeader,
305 BlockTypeNoCompression,
306 RawHeader,
307 RawMemcpy1,
308 RawMemcpy2,
309 ReadTableSizes,
310 ReadHufflenTableCodeSize,
311 ReadLitlenDistTablesCodeSize,
312 ReadExtraBitsCodeSize,
313 DecodeLitlen,
314 WriteSymbol,
315 ReadExtraBitsLitlen,
316 DecodeDistance,
317 ReadExtraBitsDistance,
318 RawReadFirstByte,
319 RawStoreFirstByte,
320 WriteLenBytesToEnd,
321 BlockDone,
322 HuffDecodeOuterLoop1,
323 HuffDecodeOuterLoop2,
324 ReadAdler32,
325
326 DoneForever,
327
328 // Failure states.
329 BlockTypeUnexpected,
330 BadCodeSizeSum,
331 BadDistOrLiteralTableLength,
332 BadTotalSymbols,
333 BadZlibHeader,
334 DistanceOutOfBounds,
335 BadRawLength,
336 BadCodeSizeDistPrevLookup,
337 InvalidLitlen,
338 InvalidDist,
339}
340
341impl State {
342 const fn is_failure(self) -> bool {
343 matches!(
344 self,
345 BlockTypeUnexpected
346 | BadCodeSizeSum
347 | BadDistOrLiteralTableLength
348 | BadTotalSymbols
349 | BadZlibHeader
350 | DistanceOutOfBounds
351 | BadRawLength
352 | BadCodeSizeDistPrevLookup
353 | InvalidLitlen
354 | InvalidDist
355 )
356 }
357
358 #[inline]
359 fn begin(&mut self, new_state: State) {
360 *self = new_state;
361 }
362}
363
364use self::State::*;
365
366// # Optimization
367// We add a extra value at the end and make the tables 32 elements long
368// so we can use a mask to avoid bounds checks.
369// The invalid values are set to something high enough to avoid underflowing
370// the match length.
371/// Base length for each length code.
372///
373/// The base is used together with the value of the extra bits to decode the actual
374/// length/distance values in a match.
375#[rustfmt::skip]
376const LENGTH_BASE: [u16; 32] = [
377 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
378 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 512, 512, 512
379];
380
381/// Number of extra bits for each length code.
382#[rustfmt::skip]
383const LENGTH_EXTRA: [u8; 32] = [
384 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
385 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0, 0
386];
387
388/// Base length for each distance code.
389#[rustfmt::skip]
390const DIST_BASE: [u16; 30] = [
391 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33,
392 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537,
393 2049, 3073, 4097, 6145, 8193, 12_289, 16_385, 24_577
394];
395
396/// Get the number of extra bits used for a distance code.
397/// (Code numbers above `NUM_DISTANCE_CODES` will give some garbage
398/// value.)
399#[inline(always)]
400const fn num_extra_bits_for_distance_code(code: u8) -> u8 {
401 // TODO: Need to verify that this is faster on all platforms.
402 // This can be easily calculated without a lookup.
403 let c = code >> 1;
404 c.saturating_sub(1)
405}
406
407/// The mask used when indexing the base/extra arrays.
408const BASE_EXTRA_MASK: usize = 32 - 1;
409
410/// Read an le u16 value from the slice iterator.
411///
412/// # Panics
413/// Panics if there are less than two bytes left.
414#[inline]
415fn read_u16_le(iter: &mut InputWrapper) -> u16 {
416 let ret = {
417 let two_bytes = iter.as_slice()[..2].try_into().unwrap_or_default();
418 u16::from_le_bytes(two_bytes)
419 };
420 iter.advance(2);
421 ret
422}
423
424/// Ensure that there is data in the bit buffer.
425///
426/// On 64-bit platform, we use a 64-bit value so this will
427/// result in there being at least 32 bits in the bit buffer.
428/// This function assumes that there is at least 4 bytes left in the input buffer.
429#[inline(always)]
430#[cfg(target_pointer_width = "64")]
431fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut InputWrapper) {
432 // Read four bytes into the buffer at once.
433 if l.num_bits < 30 {
434 l.bit_buf |= BitBuffer::from(in_iter.read_u32_le()) << l.num_bits;
435 l.num_bits += 32;
436 }
437}
438
439/// Same as previous, but for non-64-bit platforms.
440/// Ensures at least 16 bits are present, requires at least 2 bytes in the in buffer.
441#[inline(always)]
442#[cfg(not(target_pointer_width = "64"))]
443fn fill_bit_buffer(l: &mut LocalVars, in_iter: &mut InputWrapper) {
444 // If the buffer is 32-bit wide, read 2 bytes instead.
445 if l.num_bits < 15 {
446 l.bit_buf |= BitBuffer::from(read_u16_le(in_iter)) << l.num_bits;
447 l.num_bits += 16;
448 }
449}
450
451/// Check that the zlib header is correct and that there is enough space in the buffer
452/// for the window size specified in the header.
453///
454/// See https://tools.ietf.org/html/rfc1950
455#[inline]
456const fn validate_zlib_header(cmf: u32, flg: u32, flags: u32, mask: usize) -> Action {
457 let mut failed =
458 // cmf + flg should be divisible by 31.
459 (((cmf * 256) + flg) % 31 != 0) ||
460 // If this flag is set, a dictionary was used for this zlib compressed data.
461 // This is currently not supported by miniz or miniz-oxide
462 ((flg & 0b0010_0000) != 0) ||
463 // Compression method. Only 8(DEFLATE) is defined by the standard.
464 ((cmf & 15) != 8);
465
466 let window_size = 1 << ((cmf >> 4) + 8);
467 if (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) == 0 {
468 // Bail if the buffer is wrapping and the window size is larger than the buffer.
469 failed |= (mask + 1) < window_size;
470 }
471
472 // Zlib doesn't allow window sizes above 32 * 1024.
473 failed |= window_size > 32_768;
474
475 if failed {
476 Action::Jump(BadZlibHeader)
477 } else {
478 Action::Jump(ReadBlockHeader)
479 }
480}
481
482enum Action {
483 None,
484 Jump(State),
485 End(TINFLStatus),
486}
487
488/// Try to decode the next huffman code, and puts it in the counter field of the decompressor
489/// if successful.
490///
491/// # Returns
492/// The specified action returned from `f` on success,
493/// `Action::End` if there are not enough data left to decode a symbol.
494fn decode_huffman_code<F>(
495 r: &mut DecompressorOxide,
496 l: &mut LocalVars,
497 table: usize,
498 flags: u32,
499 in_iter: &mut InputWrapper,
500 f: F,
501) -> Action
502where
503 F: FnOnce(&mut DecompressorOxide, &mut LocalVars, i32) -> Action,
504{
505 // As the huffman codes can be up to 15 bits long we need at least 15 bits
506 // ready in the bit buffer to start decoding the next huffman code.
507 if l.num_bits < 15 {
508 // First, make sure there is enough data in the bit buffer to decode a huffman code.
509 if in_iter.bytes_left() < 2 {
510 // If there is less than 2 bytes left in the input buffer, we try to look up
511 // the huffman code with what's available, and return if that doesn't succeed.
512 // Original explanation in miniz:
513 // /* TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes
514 // * remaining in the input buffer falls below 2. */
515 // /* It reads just enough bytes from the input stream that are needed to decode
516 // * the next Huffman code (and absolutely no more). It works by trying to fully
517 // * decode a */
518 // /* Huffman code by using whatever bits are currently present in the bit buffer.
519 // * If this fails, it reads another byte, and tries again until it succeeds or
520 // * until the */
521 // /* bit buffer contains >=15 bits (deflate's max. Huffman code size). */
522 loop {
523 let mut temp = i32::from(r.tables[table].fast_lookup(l.bit_buf));
524 if temp >= 0 {
525 let code_len = (temp >> 9) as u32;
526 // TODO: Is there any point to check for code_len != 0 here still?
527 if (code_len != 0) && (l.num_bits >= code_len) {
528 break;
529 }
530 } else if l.num_bits > FAST_LOOKUP_BITS.into() {
531 let mut code_len = u32::from(FAST_LOOKUP_BITS);
532 loop {
533 temp = i32::from(
534 r.tables[table].tree
535 [(!temp + ((l.bit_buf >> code_len) & 1) as i32) as usize],
536 );
537 code_len += 1;
538 if temp >= 0 || l.num_bits < code_len + 1 {
539 break;
540 }
541 }
542 if temp >= 0 {
543 break;
544 }
545 }
546
547 // TODO: miniz jumps straight to here after getting here again after failing to read
548 // a byte.
549 // Doing that lets miniz avoid re-doing the lookup that that was done in the
550 // previous call.
551 let mut byte = 0;
552 if let a @ Action::End(_) = read_byte(in_iter, flags, |b| {
553 byte = b;
554 Action::None
555 }) {
556 return a;
557 };
558
559 // Do this outside closure for now to avoid borrowing r.
560 l.bit_buf |= BitBuffer::from(byte) << l.num_bits;
561 l.num_bits += 8;
562
563 if l.num_bits >= 15 {
564 break;
565 }
566 }
567 } else {
568 // There is enough data in the input buffer, so read the next two bytes
569 // and add them to the bit buffer.
570 // Unwrapping here is fine since we just checked that there are at least two
571 // bytes left.
572 l.bit_buf |= BitBuffer::from(read_u16_le(in_iter)) << l.num_bits;
573 l.num_bits += 16;
574 }
575 }
576
577 // We now have at least 15 bits in the input buffer.
578 let mut symbol = i32::from(r.tables[table].fast_lookup(l.bit_buf));
579 let code_len;
580 // If the symbol was found in the fast lookup table.
581 if symbol >= 0 {
582 // Get the length value from the top bits.
583 // As we shift down the sign bit, converting to an unsigned value
584 // shouldn't overflow.
585 code_len = (symbol >> 9) as u32;
586 // Mask out the length value.
587 symbol &= 511;
588 } else {
589 let res = r.tables[table].tree_lookup(symbol, l.bit_buf, FAST_LOOKUP_BITS);
590 symbol = res.0;
591 code_len = res.1;
592 };
593
594 l.bit_buf >>= code_len;
595 l.num_bits -= code_len;
596 f(r, l, symbol)
597}
598
599/// Try to read one byte from `in_iter` and call `f` with the read byte as an argument,
600/// returning the result.
601/// If reading fails, `Action::End is returned`
602#[inline]
603fn read_byte<F>(in_iter: &mut InputWrapper, flags: u32, f: F) -> Action
604where
605 F: FnOnce(u8) -> Action,
606{
607 match in_iter.read_byte() {
608 None => end_of_input(flags),
609 Some(byte) => f(byte),
610 }
611}
612
613// TODO: `l: &mut LocalVars` may be slow similar to decompress_fast (even with inline(always))
614/// Try to read `amount` number of bits from `in_iter` and call the function `f` with the bits as an
615/// an argument after reading, returning the result of that function, or `Action::End` if there are
616/// not enough bytes left.
617#[inline]
618#[allow(clippy::while_immutable_condition)]
619fn read_bits<F>(
620 l: &mut LocalVars,
621 amount: u32,
622 in_iter: &mut InputWrapper,
623 flags: u32,
624 f: F,
625) -> Action
626where
627 F: FnOnce(&mut LocalVars, BitBuffer) -> Action,
628{
629 // Clippy gives a false positive warning here due to the closure.
630 // Read enough bytes from the input iterator to cover the number of bits we want.
631 while l.num_bits < amount {
632 let action = read_byte(in_iter, flags, |byte| {
633 l.bit_buf |= BitBuffer::from(byte) << l.num_bits;
634 l.num_bits += 8;
635 Action::None
636 });
637
638 // If there are not enough bytes in the input iterator, return and signal that we need more.
639 if !matches!(action, Action::None) {
640 return action;
641 }
642 }
643
644 let bits = l.bit_buf & ((1 << amount) - 1);
645 l.bit_buf >>= amount;
646 l.num_bits -= amount;
647 f(l, bits)
648}
649
650#[inline]
651fn pad_to_bytes<F>(l: &mut LocalVars, in_iter: &mut InputWrapper, flags: u32, f: F) -> Action
652where
653 F: FnOnce(&mut LocalVars) -> Action,
654{
655 let num_bits = l.num_bits & 7;
656 read_bits(l, num_bits, in_iter, flags, |l, _| f(l))
657}
658
659#[inline]
660const fn end_of_input(flags: u32) -> Action {
661 Action::End(if flags & TINFL_FLAG_HAS_MORE_INPUT != 0 {
662 TINFLStatus::NeedsMoreInput
663 } else {
664 TINFLStatus::FailedCannotMakeProgress
665 })
666}
667
668#[inline]
669fn undo_bytes(l: &mut LocalVars, max: u32) -> u32 {
670 let res = cmp::min(l.num_bits >> 3, max);
671 l.num_bits -= res << 3;
672 res
673}
674
675fn start_static_table(r: &mut DecompressorOxide) {
676 r.table_sizes[LITLEN_TABLE] = 288;
677 r.table_sizes[DIST_TABLE] = 32;
678 r.code_size_literal[0..144].fill(8);
679 r.code_size_literal[144..256].fill(9);
680 r.code_size_literal[256..280].fill(7);
681 r.code_size_literal[280..288].fill(8);
682 r.code_size_dist[0..32].fill(5);
683}
684
685#[cfg(any(
686 feature = "rustc-dep-of-std",
687 target_arch = "aarch64",
688 target_arch = "arm64ec",
689 target_arch = "loongarch64"
690))]
691fn reverse_bits(n: u32) -> u32 {
692 // Lookup is not used when building as part of std to avoid wasting space
693 // for lookup table in every rust binary
694 // as it's only used for backtraces in the cold path
695 // - see #152
696
697 // armv7 and newer, and loongarch have a cpu instruction for bit reversal so
698 // it's preferable to just use that on those architectures.
699 n.reverse_bits()
700}
701
702#[cfg(not(any(
703 feature = "rustc-dep-of-std",
704 target_arch = "aarch64",
705 target_arch = "arm64ec",
706 target_arch = "loongarch64"
707)))]
708fn reverse_bits(n: u32) -> u32 {
709 static REVERSED_BITS_LOOKUP: [u32; 512] = {
710 let mut table = [0; 512];
711
712 let mut i = 0;
713 while i < 512 {
714 table[i] = (i as u32).reverse_bits();
715 i += 1;
716 }
717
718 table
719 };
720 REVERSED_BITS_LOOKUP[n as usize]
721}
722
723fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Option<Action> {
724 loop {
725 let bt = r.block_type as usize;
726
727 let code_sizes = match bt {
728 LITLEN_TABLE => &mut r.code_size_literal[..],
729 DIST_TABLE => &mut r.code_size_dist,
730 HUFFLEN_TABLE => &mut r.code_size_huffman,
731 _ => return None,
732 };
733 let table = &mut r.tables[bt];
734
735 let mut total_symbols = [0u16; 16];
736 let mut next_code = [0u32; 17];
737 const INVALID_CODE: i16 = 1 << 9 | 286;
738 // Set the values in the fast table to return a
739 // non-zero length and an invalid symbol instead of zero
740 // so that we do not have to have a check for a zero
741 // code length in the hot code path later
742 // and can instead error out on the invalid symbol check
743 // on bogus input.
744 table.look_up.fill(INVALID_CODE);
745 // If we are initializing the huffman code length we can skip
746 // this since these codes can't be longer than 3 bits
747 // and thus only use the fast table and this table won't be accessed so
748 // there is no point clearing it.
749 // TODO: Avoid creating this table at all.
750 if bt != HUFFLEN_TABLE {
751 table.tree.fill(0);
752 }
753
754 let table_size = r.table_sizes[bt] as usize;
755 if table_size > code_sizes.len() {
756 return None;
757 }
758 for &code_size in &code_sizes[..table_size] {
759 let cs = code_size as usize;
760 if cs >= total_symbols.len() {
761 return None;
762 }
763 total_symbols[cs] += 1;
764 }
765
766 let mut used_symbols = 0;
767 let mut total = 0u32;
768 // Count up the total number of used lengths and check that the table is not under or over-subscribed.
769 for (&ts, next) in total_symbols.iter().zip(next_code[1..].iter_mut()).skip(1) {
770 used_symbols += ts;
771 total += u32::from(ts);
772 total <<= 1;
773 *next = total;
774 }
775
776 //
777 // While it's not explicitly stated in the spec, a hufflen table
778 // with a single length (or none) would be invalid as there needs to be
779 // at minimum a length for both a non-zero length huffman code for the end of block symbol
780 // and one of the codes to represent 0 to make sense - so just reject that here as well.
781 //
782 // The distance table is allowed to have a single distance code though according to the spect it is
783 // supposed to be accompanied by a second dummy code. It can also be empty indicating no used codes.
784 //
785 // The literal/length table can not be empty as there has to be an end of block symbol,
786 // The standard doesn't specify that there should be a dummy code in case of a single
787 // symbol (i.e an empty block). Normally that's not an issue though the code will have
788 // to take that into account later on in case of malformed input.
789 if total != 65_536 && (used_symbols > 1 || bt == HUFFLEN_TABLE) {
790 return Some(Action::Jump(BadTotalSymbols));
791 }
792
793 let mut tree_next = -1;
794 for symbol_index in 0..table_size {
795 let code_size = code_sizes[symbol_index];
796 if code_size == 0 || usize::from(code_size) >= next_code.len() {
797 continue;
798 }
799
800 let cur_code = next_code[code_size as usize];
801 next_code[code_size as usize] += 1;
802
803 let n = cur_code & (u32::MAX >> (32 - code_size));
804
805 let mut rev_code = if n < 512 {
806 // Using a lookup table
807 // for a small speedup here,
808 // Seems to only really make a difference on very short
809 // inputs however.
810 // 512 seems to be around a sweet spot.
811 reverse_bits(n)
812 } else {
813 n.reverse_bits()
814 } >> (32 - code_size);
815
816 if code_size <= FAST_LOOKUP_BITS {
817 let k = (i16::from(code_size) << 9) | symbol_index as i16;
818 while rev_code < FAST_LOOKUP_SIZE {
819 table.look_up[rev_code as usize] = k;
820 rev_code += 1 << code_size;
821 }
822 continue;
823 }
824
825 let mut tree_cur = table.look_up[(rev_code & (FAST_LOOKUP_SIZE - 1)) as usize];
826 if tree_cur == INVALID_CODE {
827 table.look_up[(rev_code & (FAST_LOOKUP_SIZE - 1)) as usize] = tree_next;
828 tree_cur = tree_next;
829 tree_next -= 2;
830 }
831
832 rev_code >>= FAST_LOOKUP_BITS - 1;
833 for _ in FAST_LOOKUP_BITS + 1..code_size {
834 rev_code >>= 1;
835 tree_cur -= (rev_code & 1) as i16;
836 let tree_index = (-tree_cur - 1) as usize;
837 if tree_index >= table.tree.len() {
838 return None;
839 }
840 if table.tree[tree_index] == 0 {
841 table.tree[tree_index] = tree_next;
842 tree_cur = tree_next;
843 tree_next -= 2;
844 } else {
845 tree_cur = table.tree[tree_index];
846 }
847 }
848
849 rev_code >>= 1;
850 tree_cur -= (rev_code & 1) as i16;
851 let tree_index = (-tree_cur - 1) as usize;
852 if tree_index >= table.tree.len() {
853 return None;
854 }
855 table.tree[tree_index] = symbol_index as i16;
856 }
857
858 if r.block_type == HUFFLEN_TABLE as u8 {
859 l.counter = 0;
860 return Some(Action::Jump(ReadLitlenDistTablesCodeSize));
861 }
862
863 if r.block_type == LITLEN_TABLE as u8 {
864 break;
865 }
866 r.block_type -= 1;
867 }
868
869 l.counter = 0;
870
871 Some(Action::Jump(DecodeLitlen))
872}
873
874// A helper macro for generating the state machine.
875//
876// As Rust doesn't have fallthrough on matches, we have to return to the match statement
877// and jump for each state change. (Which would ideally be optimized away, but often isn't.)
878macro_rules! generate_state {
879 ($state: ident, $state_machine: tt, $f: expr) => {
880 loop {
881 match $f {
882 Action::None => continue,
883 Action::Jump(new_state) => {
884 $state = new_state;
885 continue $state_machine;
886 },
887 Action::End(result) => break $state_machine result,
888 }
889 }
890 };
891}
892
893#[derive(Copy, Clone)]
894struct LocalVars {
895 pub bit_buf: BitBuffer,
896 pub num_bits: u32,
897 pub dist: u32,
898 pub counter: u32,
899 pub num_extra: u8,
900}
901
902#[inline]
903fn transfer(
904 out_slice: &mut [u8],
905 mut source_pos: usize,
906 mut out_pos: usize,
907 match_len: usize,
908 out_buf_size_mask: usize,
909) {
910 // special case that comes up surprisingly often. in the case that `source_pos`
911 // is 1 less than `out_pos`, we can say that the entire range will be the same
912 // value and optimize this to be a simple `memset`
913 let source_diff = if source_pos > out_pos {
914 source_pos - out_pos
915 } else {
916 out_pos - source_pos
917 };
918 if out_buf_size_mask == usize::MAX && source_diff == 1 && out_pos > source_pos {
919 let init = out_slice[out_pos - 1];
920 let end = (match_len >> 2) * 4 + out_pos;
921
922 out_slice[out_pos..end].fill(init);
923 out_pos = end;
924 source_pos = end - 1;
925 // if the difference between `source_pos` and `out_pos` is greater than 3, we
926 // can do slightly better than the naive case by copying everything at once
927 } else if out_buf_size_mask == usize::MAX && source_diff >= 4 && out_pos > source_pos {
928 for _ in 0..match_len >> 2 {
929 out_slice.copy_within(source_pos..=source_pos + 3, out_pos);
930 source_pos += 4;
931 out_pos += 4;
932 }
933 } else {
934 for _ in 0..match_len >> 2 {
935 out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask];
936 out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask];
937 out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask];
938 out_slice[out_pos + 3] = out_slice[(source_pos + 3) & out_buf_size_mask];
939 source_pos += 4;
940 out_pos += 4;
941 }
942 }
943
944 match match_len & 3 {
945 0 => (),
946 1 => out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask],
947 2 => {
948 out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask];
949 out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask];
950 }
951 3 => {
952 out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask];
953 out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask];
954 out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask];
955 }
956 _ => unreachable!(),
957 }
958}
959
960/// Presumes that there is at least match_len bytes in output left.
961#[inline]
962fn apply_match(
963 out_slice: &mut [u8],
964 out_pos: usize,
965 dist: usize,
966 match_len: usize,
967 out_buf_size_mask: usize,
968) {
969 debug_assert!(out_pos.checked_add(match_len).unwrap() <= out_slice.len());
970
971 let source_pos = out_pos.wrapping_sub(dist) & out_buf_size_mask;
972
973 if match_len == 3 {
974 let out_slice = Cell::from_mut(out_slice).as_slice_of_cells();
975 if let Some(dst) = out_slice.get(out_pos..out_pos + 3) {
976 // Moving bounds checks before any memory mutation allows the optimizer
977 // combine them together.
978 let src = out_slice
979 .get(source_pos)
980 .zip(out_slice.get((source_pos + 1) & out_buf_size_mask))
981 .zip(out_slice.get((source_pos + 2) & out_buf_size_mask));
982 if let Some(((a, b), c)) = src {
983 // For correctness, the memory reads and writes have to be interleaved.
984 // Cells make it possible for read and write references to overlap.
985 dst[0].set(a.get());
986 dst[1].set(b.get());
987 dst[2].set(c.get());
988 }
989 }
990 return;
991 }
992
993 if cfg!(not(any(target_arch = "x86", target_arch = "x86_64"))) {
994 // We are not on x86 so copy manually.
995 transfer(out_slice, source_pos, out_pos, match_len, out_buf_size_mask);
996 return;
997 }
998
999 if source_pos >= out_pos && (source_pos - out_pos) < match_len {
1000 transfer(out_slice, source_pos, out_pos, match_len, out_buf_size_mask);
1001 } else if match_len <= dist && source_pos + match_len < out_slice.len() {
1002 // Destination and source segments does not intersect and source does not wrap.
1003 if source_pos < out_pos {
1004 let (from_slice, to_slice) = out_slice.split_at_mut(out_pos);
1005 to_slice[..match_len].copy_from_slice(&from_slice[source_pos..source_pos + match_len]);
1006 } else {
1007 let (to_slice, from_slice) = out_slice.split_at_mut(source_pos);
1008 to_slice[out_pos..out_pos + match_len].copy_from_slice(&from_slice[..match_len]);
1009 }
1010 } else {
1011 transfer(out_slice, source_pos, out_pos, match_len, out_buf_size_mask);
1012 }
1013}
1014
1015/// Fast inner decompression loop which is run while there is at least
1016/// 259 bytes left in the output buffer, and at least 6 bytes left in the input buffer
1017/// (The maximum one match would need + 1).
1018///
1019/// This was inspired by a similar optimization in zlib, which uses this info to do
1020/// faster unchecked copies of multiple bytes at a time.
1021/// Currently we don't do this here, but this function does avoid having to jump through the
1022/// big match loop on each state change(as rust does not have fallthrough or gotos at the moment),
1023/// and already improves decompression speed a fair bit.
1024fn decompress_fast(
1025 r: &mut DecompressorOxide,
1026 in_iter: &mut InputWrapper,
1027 out_buf: &mut OutputBuffer,
1028 flags: u32,
1029 local_vars: &mut LocalVars,
1030 out_buf_size_mask: usize,
1031) -> (TINFLStatus, State) {
1032 // Make a local copy of the most used variables, to avoid having to update and read from values
1033 // in a random memory location and to encourage more register use.
1034 let mut l = *local_vars;
1035 let mut state;
1036
1037 let status: TINFLStatus = 'o: loop {
1038 state = State::DecodeLitlen;
1039 loop {
1040 // This function assumes that there is at least 259 bytes left in the output buffer,
1041 // and that there is at least 14 bytes left in the input buffer. 14 input bytes:
1042 // 15 (prev lit) + 15 (length) + 5 (length extra) + 15 (dist)
1043 // + 29 + 32 (left in bit buf, including last 13 dist extra) = 111 bits < 14 bytes
1044 // We need the one extra byte as we may write one length and one full match
1045 // before checking again.
1046 if out_buf.bytes_left() < 259 || in_iter.bytes_left() < 14 {
1047 state = State::DecodeLitlen;
1048 break 'o TINFLStatus::Done;
1049 }
1050
1051 fill_bit_buffer(&mut l, in_iter);
1052
1053 let (symbol, code_len) = r.tables[LITLEN_TABLE].lookup(l.bit_buf);
1054 l.counter = symbol as u32;
1055 l.bit_buf >>= code_len;
1056 l.num_bits -= code_len;
1057
1058 if (l.counter & 256) != 0 {
1059 // The symbol is not a literal.
1060 break;
1061 } else {
1062 // If we have a 32-bit buffer we need to read another two bytes now
1063 // to have enough bits to keep going.
1064 if cfg!(not(target_pointer_width = "64")) {
1065 fill_bit_buffer(&mut l, in_iter);
1066 }
1067
1068 let (symbol, code_len) = r.tables[LITLEN_TABLE].lookup(l.bit_buf);
1069 l.bit_buf >>= code_len;
1070 l.num_bits -= code_len;
1071 // The previous symbol was a literal, so write it directly and check
1072 // the next one.
1073 out_buf.write_byte(l.counter as u8);
1074 if (symbol & 256) != 0 {
1075 l.counter = symbol as u32;
1076 // The symbol is a length value.
1077 break;
1078 } else {
1079 // The symbol is a literal, so write it directly and continue.
1080 out_buf.write_byte(symbol as u8);
1081 }
1082 }
1083 }
1084
1085 // Mask the top bits since they may contain length info.
1086 l.counter &= 511;
1087 if l.counter == 256 {
1088 // We hit the end of block symbol.
1089 state.begin(BlockDone);
1090 break 'o TINFLStatus::Done;
1091 } else if l.counter > 285 {
1092 // Invalid code.
1093 // We already verified earlier that the code is > 256.
1094 state.begin(InvalidLitlen);
1095 break 'o TINFLStatus::Failed;
1096 } else {
1097 // The symbol was a length code.
1098 // # Optimization
1099 // Mask the value to avoid bounds checks
1100 // While the maximum is checked, the compiler isn't able to know that the
1101 // value won't wrap around here.
1102 l.num_extra = LENGTH_EXTRA[(l.counter - 257) as usize & BASE_EXTRA_MASK];
1103 l.counter = u32::from(LENGTH_BASE[(l.counter - 257) as usize & BASE_EXTRA_MASK]);
1104 // Length and distance codes have a number of extra bits depending on
1105 // the base, which together with the base gives us the exact value.
1106
1107 // We need to make sure we have at least 33 (so min 5 bytes) bits in the buffer at this spot.
1108 fill_bit_buffer(&mut l, in_iter);
1109 if l.num_extra != 0 {
1110 let extra_bits = l.bit_buf & ((1 << l.num_extra) - 1);
1111 l.bit_buf >>= l.num_extra;
1112 l.num_bits -= u32::from(l.num_extra);
1113 l.counter += extra_bits as u32;
1114 }
1115
1116 // We found a length code, so a distance code should follow.
1117
1118 if cfg!(not(target_pointer_width = "64")) {
1119 fill_bit_buffer(&mut l, in_iter);
1120 }
1121
1122 let (mut symbol, code_len) = r.tables[DIST_TABLE].lookup(l.bit_buf);
1123 symbol &= 511;
1124 l.bit_buf >>= code_len;
1125 l.num_bits -= code_len;
1126 if symbol > 29 {
1127 state.begin(InvalidDist);
1128 break 'o TINFLStatus::Failed;
1129 }
1130
1131 l.num_extra = num_extra_bits_for_distance_code(symbol as u8);
1132 l.dist = u32::from(DIST_BASE[symbol as usize]);
1133
1134 if l.num_extra != 0 {
1135 fill_bit_buffer(&mut l, in_iter);
1136 let extra_bits = l.bit_buf & ((1 << l.num_extra) - 1);
1137 l.bit_buf >>= l.num_extra;
1138 l.num_bits -= u32::from(l.num_extra);
1139 l.dist += extra_bits as u32;
1140 }
1141
1142 let position = out_buf.position();
1143 if l.dist as usize > out_buf.position()
1144 && (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0)
1145 {
1146 // We encountered a distance that refers a position before
1147 // the start of the decoded data, so we can't continue.
1148 state.begin(DistanceOutOfBounds);
1149 break TINFLStatus::Failed;
1150 }
1151
1152 apply_match(
1153 out_buf.get_mut(),
1154 position,
1155 l.dist as usize,
1156 l.counter as usize,
1157 out_buf_size_mask,
1158 );
1159
1160 out_buf.set_position(position + l.counter as usize);
1161 }
1162 };
1163
1164 *local_vars = l;
1165 (status, state)
1166}
1167
1168/// Main decompression function. Keeps decompressing data from `in_buf` until the `in_buf` is
1169/// empty, `out` is full, the end of the deflate stream is hit, or there is an error in the
1170/// deflate stream.
1171///
1172/// # Arguments
1173///
1174/// `r` is a [`DecompressorOxide`] struct with the state of this stream.
1175///
1176/// `in_buf` is a reference to the compressed data that is to be decompressed. The decompressor will
1177/// start at the first byte of this buffer.
1178///
1179/// `out` is a reference to the buffer that will store the decompressed data, and that
1180/// stores previously decompressed data if any.
1181///
1182/// * The offset given by `out_pos` indicates where in the output buffer slice writing should start.
1183/// * If [`TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF`] is not set, the output buffer is used in a
1184/// wrapping manner, and it's size is required to be a power of 2.
1185/// * The decompression function normally needs access to 32KiB of the previously decompressed data
1186/// (or to the beginning of the decompressed data if less than 32KiB has been decompressed.)
1187/// - If this data is not available, decompression may fail.
1188/// - Some deflate compressors allow specifying a window size which limits match distances to
1189/// less than this, or alternatively an RLE mode where matches will only refer to the previous byte
1190/// and thus allows a smaller output buffer. The window size can be specified in the zlib
1191/// header structure, however, the header data should not be relied on to be correct.
1192///
1193/// `flags` indicates settings and status to the decompression function.
1194/// * The [`TINFL_FLAG_HAS_MORE_INPUT`] has to be specified if more compressed data is to be provided
1195/// in a subsequent call to this function.
1196/// * See the the [`inflate_flags`] module for details on other flags.
1197///
1198/// # Returns
1199///
1200/// Returns a tuple containing the status of the compressor, the number of input bytes read, and the
1201/// number of bytes output to `out`.
1202///
1203/// This function shouldn't panic pending any bugs.
1204pub fn decompress(
1205 r: &mut DecompressorOxide,
1206 in_buf: &[u8],
1207 out: &mut [u8],
1208 out_pos: usize,
1209 flags: u32,
1210) -> (TINFLStatus, usize, usize) {
1211 let out_buf_size_mask = if flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0 {
1212 usize::MAX
1213 } else {
1214 // In the case of zero len, any attempt to write would produce HasMoreOutput,
1215 // so to gracefully process the case of there really being no output,
1216 // set the mask to all zeros.
1217 out.len().saturating_sub(1)
1218 };
1219
1220 // Ensure the output buffer's size is a power of 2, unless the output buffer
1221 // is large enough to hold the entire output file (in which case it doesn't
1222 // matter).
1223 // Also make sure that the output buffer position is not past the end of the output buffer.
1224 if (out_buf_size_mask.wrapping_add(1) & out_buf_size_mask) != 0 || out_pos > out.len() {
1225 return (TINFLStatus::BadParam, 0, 0);
1226 }
1227
1228 let mut in_iter = InputWrapper::from_slice(in_buf);
1229
1230 let mut state = r.state;
1231
1232 let mut out_buf = OutputBuffer::from_slice_and_pos(out, out_pos);
1233
1234 // Make a local copy of the important variables here so we can work with them on the stack.
1235 let mut l = LocalVars {
1236 bit_buf: r.bit_buf,
1237 num_bits: r.num_bits,
1238 dist: r.dist,
1239 counter: r.counter,
1240 num_extra: r.num_extra,
1241 };
1242
1243 let mut status = 'state_machine: loop {
1244 match state {
1245 Start => generate_state!(state, 'state_machine, {
1246 l.bit_buf = 0;
1247 l.num_bits = 0;
1248 l.dist = 0;
1249 l.counter = 0;
1250 l.num_extra = 0;
1251 r.z_header0 = 0;
1252 r.z_header1 = 0;
1253 r.z_adler32 = 1;
1254 r.check_adler32 = 1;
1255 if flags & TINFL_FLAG_PARSE_ZLIB_HEADER != 0 {
1256 Action::Jump(State::ReadZlibCmf)
1257 } else {
1258 Action::Jump(State::ReadBlockHeader)
1259 }
1260 }),
1261
1262 ReadZlibCmf => generate_state!(state, 'state_machine, {
1263 read_byte(&mut in_iter, flags, |cmf| {
1264 r.z_header0 = u32::from(cmf);
1265 Action::Jump(State::ReadZlibFlg)
1266 })
1267 }),
1268
1269 ReadZlibFlg => generate_state!(state, 'state_machine, {
1270 read_byte(&mut in_iter, flags, |flg| {
1271 r.z_header1 = u32::from(flg);
1272 validate_zlib_header(r.z_header0, r.z_header1, flags, out_buf_size_mask)
1273 })
1274 }),
1275
1276 // Read the block header and jump to the relevant section depending on the block type.
1277 ReadBlockHeader => generate_state!(state, 'state_machine, {
1278 read_bits(&mut l, 3, &mut in_iter, flags, |l, bits| {
1279 r.finish = (bits & 1) as u8;
1280 r.block_type = ((bits >> 1) & 3) as u8;
1281 match r.block_type {
1282 0 => Action::Jump(BlockTypeNoCompression),
1283 1 => {
1284 start_static_table(r);
1285 init_tree(r, l).unwrap_or(Action::End(TINFLStatus::Failed))
1286 },
1287 2 => {
1288 l.counter = 0;
1289 Action::Jump(ReadTableSizes)
1290 },
1291 3 => Action::Jump(BlockTypeUnexpected),
1292 _ => unreachable!()
1293 }
1294 })
1295 }),
1296
1297 // Raw/Stored/uncompressed block.
1298 BlockTypeNoCompression => generate_state!(state, 'state_machine, {
1299 pad_to_bytes(&mut l, &mut in_iter, flags, |l| {
1300 l.counter = 0;
1301 Action::Jump(RawHeader)
1302 })
1303 }),
1304
1305 // Check that the raw block header is correct.
1306 RawHeader => generate_state!(state, 'state_machine, {
1307 if l.counter < 4 {
1308 // Read block length and block length check.
1309 if l.num_bits != 0 {
1310 read_bits(&mut l, 8, &mut in_iter, flags, |l, bits| {
1311 r.raw_header[l.counter as usize] = bits as u8;
1312 l.counter += 1;
1313 Action::None
1314 })
1315 } else {
1316 read_byte(&mut in_iter, flags, |byte| {
1317 r.raw_header[l.counter as usize] = byte;
1318 l.counter += 1;
1319 Action::None
1320 })
1321 }
1322 } else {
1323 // Check if the length value of a raw block is correct.
1324 // The 2 first (2-byte) words in a raw header are the length and the
1325 // ones complement of the length.
1326 let length = u16::from(r.raw_header[0]) | (u16::from(r.raw_header[1]) << 8);
1327 let check = u16::from(r.raw_header[2]) | (u16::from(r.raw_header[3]) << 8);
1328 let valid = length == !check;
1329 l.counter = length.into();
1330
1331 if !valid {
1332 Action::Jump(BadRawLength)
1333 } else if l.counter == 0 {
1334 // Empty raw block. Sometimes used for synchronization.
1335 Action::Jump(BlockDone)
1336 } else if l.num_bits != 0 {
1337 // There is some data in the bit buffer, so we need to write that first.
1338 Action::Jump(RawReadFirstByte)
1339 } else {
1340 // The bit buffer is empty, so memcpy the rest of the uncompressed data from
1341 // the block.
1342 Action::Jump(RawMemcpy1)
1343 }
1344 }
1345 }),
1346
1347 // Read the byte from the bit buffer.
1348 RawReadFirstByte => generate_state!(state, 'state_machine, {
1349 read_bits(&mut l, 8, &mut in_iter, flags, |l, bits| {
1350 l.dist = bits as u32;
1351 Action::Jump(RawStoreFirstByte)
1352 })
1353 }),
1354
1355 // Write the byte we just read to the output buffer.
1356 RawStoreFirstByte => generate_state!(state, 'state_machine, {
1357 if out_buf.bytes_left() == 0 {
1358 Action::End(TINFLStatus::HasMoreOutput)
1359 } else {
1360 out_buf.write_byte(l.dist as u8);
1361 l.counter -= 1;
1362 if l.counter == 0 || l.num_bits == 0 {
1363 Action::Jump(RawMemcpy1)
1364 } else {
1365 // There is still some data left in the bit buffer that needs to be output.
1366 // TODO: Changed this to jump to `RawReadfirstbyte` rather than
1367 // `RawStoreFirstByte` as that seemed to be the correct path, but this
1368 // needs testing.
1369 Action::Jump(RawReadFirstByte)
1370 }
1371 }
1372 }),
1373
1374 RawMemcpy1 => generate_state!(state, 'state_machine, {
1375 if l.counter == 0 {
1376 Action::Jump(BlockDone)
1377 } else if out_buf.bytes_left() == 0 {
1378 Action::End(TINFLStatus::HasMoreOutput)
1379 } else {
1380 Action::Jump(RawMemcpy2)
1381 }
1382 }),
1383
1384 RawMemcpy2 => generate_state!(state, 'state_machine, {
1385 if in_iter.bytes_left() > 0 {
1386 // Copy as many raw bytes as possible from the input to the output using memcpy.
1387 // Raw block lengths are limited to 64 * 1024, so casting through usize and u32
1388 // is not an issue.
1389 let space_left = out_buf.bytes_left();
1390 let bytes_to_copy = cmp::min(cmp::min(
1391 space_left,
1392 in_iter.bytes_left()),
1393 l.counter as usize
1394 );
1395
1396 out_buf.write_slice(&in_iter.as_slice()[..bytes_to_copy]);
1397
1398 in_iter.advance(bytes_to_copy);
1399 l.counter -= bytes_to_copy as u32;
1400 Action::Jump(RawMemcpy1)
1401 } else {
1402 end_of_input(flags)
1403 }
1404 }),
1405
1406 // Read how many huffman codes/symbols are used for each table.
1407 ReadTableSizes => generate_state!(state, 'state_machine, {
1408 if l.counter < 3 {
1409 let num_bits = [5, 5, 4][l.counter as usize];
1410 read_bits(&mut l, num_bits, &mut in_iter, flags, |l, bits| {
1411 r.table_sizes[l.counter as usize] =
1412 bits as u16 + MIN_TABLE_SIZES[l.counter as usize];
1413 l.counter += 1;
1414 Action::None
1415 })
1416 } else {
1417 r.code_size_huffman.fill(0);
1418 l.counter = 0;
1419 // Check that the litlen and distance are within spec.
1420 // litlen table should be <=286 acc to the RFC and
1421 // additionally zlib rejects dist table sizes larger than 30.
1422 // NOTE this the final sizes after adding back predefined values, not
1423 // raw value in the data.
1424 // See miniz_oxide issue #130 and https://github.com/madler/zlib/issues/82.
1425 if r.table_sizes[LITLEN_TABLE] <= 286 && r.table_sizes[DIST_TABLE] <= 30 {
1426 Action::Jump(ReadHufflenTableCodeSize)
1427 }
1428 else {
1429 Action::Jump(BadDistOrLiteralTableLength)
1430 }
1431 }
1432 }),
1433
1434 // Read the 3-bit lengths of the huffman codes describing the huffman code lengths used
1435 // to decode the lengths of the main tables.
1436 ReadHufflenTableCodeSize => generate_state!(state, 'state_machine, {
1437 if l.counter < r.table_sizes[HUFFLEN_TABLE].into() {
1438 read_bits(&mut l, 3, &mut in_iter, flags, |l, bits| {
1439 // These lengths are not stored in a normal ascending order, but rather one
1440 // specified by the deflate specification intended to put the most used
1441 // values at the front as trailing zero lengths do not have to be stored.
1442 r.code_size_huffman[HUFFMAN_LENGTH_ORDER[l.counter as usize] as usize] =
1443 bits as u8;
1444 l.counter += 1;
1445 Action::None
1446 })
1447 } else {
1448 r.table_sizes[HUFFLEN_TABLE] = MAX_HUFF_SYMBOLS_2 as u16;
1449 init_tree(r, &mut l).unwrap_or(Action::End(TINFLStatus::Failed))
1450 }
1451 }),
1452
1453 ReadLitlenDistTablesCodeSize => generate_state!(state, 'state_machine, {
1454 if l.counter < u32::from(r.table_sizes[LITLEN_TABLE]) + u32::from(r.table_sizes[DIST_TABLE]) {
1455 decode_huffman_code(
1456 r, &mut l, HUFFLEN_TABLE,
1457 flags, &mut in_iter, |r, l, symbol| {
1458 l.dist = symbol as u32;
1459 if l.dist < 16 {
1460 r.len_codes[l.counter as usize] = l.dist as u8;
1461 l.counter += 1;
1462 Action::None
1463 } else if l.dist == 16 && l.counter == 0 {
1464 Action::Jump(BadCodeSizeDistPrevLookup)
1465 } else {
1466 l.num_extra = [2, 3, 7][l.dist as usize - 16];
1467 Action::Jump(ReadExtraBitsCodeSize)
1468 }
1469 }
1470 )
1471 } else if l.counter != u32::from(r.table_sizes[LITLEN_TABLE]) + u32::from(r.table_sizes[DIST_TABLE]) {
1472 Action::Jump(BadCodeSizeSum)
1473 } else {
1474 r.code_size_literal[..r.table_sizes[LITLEN_TABLE] as usize]
1475 .copy_from_slice(&r.len_codes[..r.table_sizes[LITLEN_TABLE] as usize]);
1476
1477 let dist_table_start = r.table_sizes[LITLEN_TABLE] as usize;
1478 let dist_table_end = (r.table_sizes[LITLEN_TABLE] +
1479 r.table_sizes[DIST_TABLE]) as usize;
1480 r.code_size_dist[..r.table_sizes[DIST_TABLE] as usize]
1481 .copy_from_slice(&r.len_codes[dist_table_start..dist_table_end]);
1482
1483 r.block_type -= 1;
1484 init_tree(r, &mut l).unwrap_or(Action::End(TINFLStatus::Failed))
1485 }
1486 }),
1487
1488 ReadExtraBitsCodeSize => generate_state!(state, 'state_machine, {
1489 let num_extra = l.num_extra.into();
1490 read_bits(&mut l, num_extra, &mut in_iter, flags, |l, mut extra_bits| {
1491 // Mask to avoid a bounds check.
1492 extra_bits += [3, 3, 11][(l.dist as usize - 16) & 3];
1493 let val = if l.dist == 16 {
1494 r.len_codes[l.counter as usize - 1]
1495 } else {
1496 0
1497 };
1498
1499 r.len_codes[
1500 l.counter as usize..l.counter as usize + extra_bits as usize
1501 ].fill(val);
1502 l.counter += extra_bits as u32;
1503 Action::Jump(ReadLitlenDistTablesCodeSize)
1504 })
1505 }),
1506
1507 DecodeLitlen => generate_state!(state, 'state_machine, {
1508 if in_iter.bytes_left() < 4 || out_buf.bytes_left() < 2 {
1509 // See if we can decode a literal with the data we have left.
1510 // Jumps to next state (WriteSymbol) if successful.
1511 decode_huffman_code(
1512 r,
1513 &mut l,
1514 LITLEN_TABLE,
1515 flags,
1516 &mut in_iter,
1517 |_r, l, symbol| {
1518 l.counter = symbol as u32;
1519 Action::Jump(WriteSymbol)
1520 },
1521 )
1522 } else if
1523 // If there is enough space, use the fast inner decompression
1524 // function.
1525 out_buf.bytes_left() >= 259 &&
1526 in_iter.bytes_left() >= 14
1527 {
1528 let (status, new_state) = decompress_fast(
1529 r,
1530 &mut in_iter,
1531 &mut out_buf,
1532 flags,
1533 &mut l,
1534 out_buf_size_mask,
1535 );
1536
1537 state = new_state;
1538 if status == TINFLStatus::Done {
1539 Action::Jump(new_state)
1540 } else {
1541 Action::End(status)
1542 }
1543 } else {
1544 fill_bit_buffer(&mut l, &mut in_iter);
1545
1546 let (symbol, code_len) = r.tables[LITLEN_TABLE].lookup(l.bit_buf);
1547
1548 l.counter = symbol as u32;
1549 l.bit_buf >>= code_len;
1550 l.num_bits -= code_len;
1551
1552 if (l.counter & 256) != 0 {
1553 // The symbol is not a literal.
1554 Action::Jump(HuffDecodeOuterLoop1)
1555 } else {
1556 // If we have a 32-bit buffer we need to read another two bytes now
1557 // to have enough bits to keep going.
1558 if cfg!(not(target_pointer_width = "64")) {
1559 fill_bit_buffer(&mut l, &mut in_iter);
1560 }
1561
1562 let (symbol, code_len) = r.tables[LITLEN_TABLE].lookup(l.bit_buf);
1563
1564 l.bit_buf >>= code_len;
1565 l.num_bits -= code_len;
1566 // The previous symbol was a literal, so write it directly and check
1567 // the next one.
1568 out_buf.write_byte(l.counter as u8);
1569 if (symbol & 256) != 0 {
1570 l.counter = symbol as u32;
1571 // The symbol is a length value.
1572 Action::Jump(HuffDecodeOuterLoop1)
1573 } else {
1574 // The symbol is a literal, so write it directly and continue.
1575 out_buf.write_byte(symbol as u8);
1576 Action::None
1577 }
1578
1579 }
1580
1581 }
1582 }),
1583
1584 WriteSymbol => generate_state!(state, 'state_machine, {
1585 if l.counter >= 256 {
1586 Action::Jump(HuffDecodeOuterLoop1)
1587 } else if out_buf.bytes_left() > 0 {
1588 out_buf.write_byte(l.counter as u8);
1589 Action::Jump(DecodeLitlen)
1590 } else {
1591 Action::End(TINFLStatus::HasMoreOutput)
1592 }
1593 }),
1594
1595 HuffDecodeOuterLoop1 => generate_state!(state, 'state_machine, {
1596 // Mask the top bits since they may contain length info.
1597 l.counter &= 511;
1598
1599 if l.counter
1600 == 256 {
1601 // We hit the end of block symbol.
1602 Action::Jump(BlockDone)
1603 } else if l.counter > 285 {
1604 // Invalid code.
1605 // We already verified earlier that the code is > 256.
1606 Action::Jump(InvalidLitlen)
1607 } else {
1608 // # Optimization
1609 // Mask the value to avoid bounds checks
1610 // We could use get_unchecked later if can statically verify that
1611 // this will never go out of bounds.
1612 l.num_extra =
1613 LENGTH_EXTRA[(l.counter - 257) as usize & BASE_EXTRA_MASK];
1614 l.counter = u32::from(LENGTH_BASE[(l.counter - 257) as usize & BASE_EXTRA_MASK]);
1615 // Length and distance codes have a number of extra bits depending on
1616 // the base, which together with the base gives us the exact value.
1617 if l.num_extra != 0 {
1618 Action::Jump(ReadExtraBitsLitlen)
1619 } else {
1620 Action::Jump(DecodeDistance)
1621 }
1622 }
1623 }),
1624
1625 ReadExtraBitsLitlen => generate_state!(state, 'state_machine, {
1626 let num_extra = l.num_extra.into();
1627 read_bits(&mut l, num_extra, &mut in_iter, flags, |l, extra_bits| {
1628 l.counter += extra_bits as u32;
1629 Action::Jump(DecodeDistance)
1630 })
1631 }),
1632
1633 DecodeDistance => generate_state!(state, 'state_machine, {
1634 // Try to read a huffman code from the input buffer and look up what
1635 // length code the decoded symbol refers to.
1636 decode_huffman_code(r, &mut l, DIST_TABLE, flags, &mut in_iter, |_r, l, symbol| {
1637 // # Optimizaton - transform the value into usize here before the check so
1638 // the compiler can optimize the bounds check later - ideally it should
1639 // know that the value can't be negative from earlier in the
1640 // decode_huffman_code function but it seems it may not be able
1641 // to make the assumption that it can't be negative and thus
1642 // overflow if it's converted after the check.
1643 let symbol = symbol as usize;
1644 if symbol > 29 {
1645 // Invalid distance code.
1646 return Action::Jump(InvalidDist)
1647 }
1648 l.num_extra = num_extra_bits_for_distance_code(symbol as u8);
1649 l.dist = u32::from(DIST_BASE[symbol]);
1650 if l.num_extra != 0 {
1651 // ReadEXTRA_BITS_DISTACNE
1652 Action::Jump(ReadExtraBitsDistance)
1653 } else {
1654 Action::Jump(HuffDecodeOuterLoop2)
1655 }
1656 })
1657 }),
1658
1659 ReadExtraBitsDistance => generate_state!(state, 'state_machine, {
1660 let num_extra = l.num_extra.into();
1661 read_bits(&mut l, num_extra, &mut in_iter, flags, |l, extra_bits| {
1662 l.dist += extra_bits as u32;
1663 Action::Jump(HuffDecodeOuterLoop2)
1664 })
1665 }),
1666
1667 HuffDecodeOuterLoop2 => generate_state!(state, 'state_machine, {
1668 if l.dist as usize > out_buf.position() &&
1669 (flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF != 0)
1670 {
1671 // We encountered a distance that refers a position before
1672 // the start of the decoded data, so we can't continue.
1673 Action::Jump(DistanceOutOfBounds)
1674 } else {
1675 let out_pos = out_buf.position();
1676 let source_pos = out_buf.position()
1677 .wrapping_sub(l.dist as usize) & out_buf_size_mask;
1678
1679 let out_len = out_buf.get_ref().len();
1680 let match_end_pos = out_buf.position() + l.counter as usize;
1681
1682 if match_end_pos > out_len ||
1683 // miniz doesn't do this check here. Not sure how it makes sure
1684 // that this case doesn't happen.
1685 (source_pos >= out_pos && (source_pos - out_pos) < l.counter as usize)
1686 {
1687 // Not enough space for all of the data in the output buffer,
1688 // so copy what we have space for.
1689 if l.counter == 0 {
1690 Action::Jump(DecodeLitlen)
1691 } else {
1692 Action::Jump(WriteLenBytesToEnd)
1693 }
1694 } else {
1695 apply_match(
1696 out_buf.get_mut(),
1697 out_pos,
1698 l.dist as usize,
1699 l.counter as usize,
1700 out_buf_size_mask
1701 );
1702 out_buf.set_position(out_pos + l.counter as usize);
1703 Action::Jump(DecodeLitlen)
1704 }
1705 }
1706 }),
1707
1708 WriteLenBytesToEnd => generate_state!(state, 'state_machine, {
1709 if out_buf.bytes_left() > 0 {
1710 let out_pos = out_buf.position();
1711 let source_pos = out_buf.position()
1712 .wrapping_sub(l.dist as usize) & out_buf_size_mask;
1713
1714
1715 let len = cmp::min(out_buf.bytes_left(), l.counter as usize);
1716
1717 transfer(out_buf.get_mut(), source_pos, out_pos, len, out_buf_size_mask);
1718
1719 out_buf.set_position(out_pos + len);
1720 l.counter -= len as u32;
1721 if l.counter == 0 {
1722 Action::Jump(DecodeLitlen)
1723 } else {
1724 Action::None
1725 }
1726 } else {
1727 Action::End(TINFLStatus::HasMoreOutput)
1728 }
1729 }),
1730
1731 BlockDone => generate_state!(state, 'state_machine, {
1732 // End once we've read the last block.
1733 if r.finish != 0 {
1734 pad_to_bytes(&mut l, &mut in_iter, flags, |_| Action::None);
1735
1736 let in_consumed = in_buf.len() - in_iter.bytes_left();
1737 let undo = undo_bytes(&mut l, in_consumed as u32) as usize;
1738 in_iter = InputWrapper::from_slice(in_buf[in_consumed - undo..].iter().as_slice());
1739
1740 l.bit_buf &= ((1 as BitBuffer) << l.num_bits) - 1;
1741 debug_assert_eq!(l.num_bits, 0);
1742
1743 if flags & TINFL_FLAG_PARSE_ZLIB_HEADER != 0 {
1744 l.counter = 0;
1745 Action::Jump(ReadAdler32)
1746 } else {
1747 Action::Jump(DoneForever)
1748 }
1749 } else {
1750 Action::Jump(ReadBlockHeader)
1751 }
1752 }),
1753
1754 ReadAdler32 => generate_state!(state, 'state_machine, {
1755 if l.counter < 4 {
1756 if l.num_bits != 0 {
1757 read_bits(&mut l, 8, &mut in_iter, flags, |l, bits| {
1758 r.z_adler32 <<= 8;
1759 r.z_adler32 |= bits as u32;
1760 l.counter += 1;
1761 Action::None
1762 })
1763 } else {
1764 read_byte(&mut in_iter, flags, |byte| {
1765 r.z_adler32 <<= 8;
1766 r.z_adler32 |= u32::from(byte);
1767 l.counter += 1;
1768 Action::None
1769 })
1770 }
1771 } else {
1772 Action::Jump(DoneForever)
1773 }
1774 }),
1775
1776 // We are done.
1777 DoneForever => break TINFLStatus::Done,
1778
1779 // Anything else indicates failure.
1780 // BadZlibHeader | BadRawLength | BadDistOrLiteralTableLength | BlockTypeUnexpected |
1781 // DistanceOutOfBounds |
1782 // BadTotalSymbols | BadCodeSizeDistPrevLookup | BadCodeSizeSum | InvalidLitlen |
1783 // InvalidDist | InvalidCodeLen
1784 _ => break TINFLStatus::Failed,
1785 };
1786 };
1787
1788 let in_undo = if status != TINFLStatus::NeedsMoreInput
1789 && status != TINFLStatus::FailedCannotMakeProgress
1790 {
1791 undo_bytes(&mut l, (in_buf.len() - in_iter.bytes_left()) as u32) as usize
1792 } else {
1793 0
1794 };
1795
1796 // Make sure HasMoreOutput overrides NeedsMoreInput if the output buffer is full.
1797 // (Unless the missing input is the adler32 value in which case we don't need to write anything.)
1798 // TODO: May want to see if we can do this in a better way.
1799 if status == TINFLStatus::NeedsMoreInput
1800 && out_buf.bytes_left() == 0
1801 && state != State::ReadAdler32
1802 {
1803 status = TINFLStatus::HasMoreOutput
1804 }
1805
1806 r.state = state;
1807 r.bit_buf = l.bit_buf;
1808 r.num_bits = l.num_bits;
1809 r.dist = l.dist;
1810 r.counter = l.counter;
1811 r.num_extra = l.num_extra;
1812
1813 r.bit_buf &= ((1 as BitBuffer) << r.num_bits) - 1;
1814
1815 // If this is a zlib stream, and update the adler32 checksum with the decompressed bytes if
1816 // requested.
1817 let need_adler = if (flags & TINFL_FLAG_IGNORE_ADLER32) == 0 {
1818 flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32) != 0
1819 } else {
1820 // If TINFL_FLAG_IGNORE_ADLER32 is enabled, ignore the checksum.
1821 false
1822 };
1823 if need_adler && status as i32 >= 0 {
1824 let out_buf_pos = out_buf.position();
1825 r.check_adler32 = update_adler32(r.check_adler32, &out_buf.get_ref()[out_pos..out_buf_pos]);
1826
1827 // disabled so that random input from fuzzer would not be rejected early,
1828 // before it has a chance to reach interesting parts of code
1829 if !cfg!(fuzzing) {
1830 // Once we are done, check if the checksum matches with the one provided in the zlib header.
1831 if status == TINFLStatus::Done
1832 && flags & TINFL_FLAG_PARSE_ZLIB_HEADER != 0
1833 && r.check_adler32 != r.z_adler32
1834 {
1835 status = TINFLStatus::Adler32Mismatch;
1836 }
1837 }
1838 }
1839
1840 (
1841 status,
1842 in_buf.len() - in_iter.bytes_left() - in_undo,
1843 out_buf.position() - out_pos,
1844 )
1845}
1846
1847#[cfg(test)]
1848mod test {
1849 use super::*;
1850
1851 //TODO: Fix these.
1852
1853 fn tinfl_decompress_oxide<'i>(
1854 r: &mut DecompressorOxide,
1855 input_buffer: &'i [u8],
1856 output_buffer: &mut [u8],
1857 flags: u32,
1858 ) -> (TINFLStatus, &'i [u8], usize) {
1859 let (status, in_pos, out_pos) = decompress(r, input_buffer, output_buffer, 0, flags);
1860 (status, &input_buffer[in_pos..], out_pos)
1861 }
1862
1863 #[test]
1864 fn decompress_zlib() {
1865 let encoded = [
1866 120, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, 19,
1867 ];
1868 let flags = TINFL_FLAG_COMPUTE_ADLER32 | TINFL_FLAG_PARSE_ZLIB_HEADER;
1869
1870 let mut b = DecompressorOxide::new();
1871 const LEN: usize = 32;
1872 let mut b_buf = [0; LEN];
1873
1874 // This should fail with the out buffer being to small.
1875 let b_status = tinfl_decompress_oxide(&mut b, &encoded[..], &mut b_buf, flags);
1876
1877 assert_eq!(b_status.0, TINFLStatus::Failed);
1878
1879 let flags = flags | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
1880
1881 b = DecompressorOxide::new();
1882
1883 // With TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF set this should no longer fail.
1884 let b_status = tinfl_decompress_oxide(&mut b, &encoded[..], &mut b_buf, flags);
1885
1886 assert_eq!(b_buf[..b_status.2], b"Hello, zlib!"[..]);
1887 assert_eq!(b_status.0, TINFLStatus::Done);
1888 }
1889
1890 #[cfg(feature = "with-alloc")]
1891 #[test]
1892 fn raw_block() {
1893 const LEN: usize = 64;
1894
1895 let text = b"Hello, zlib!";
1896 let encoded = {
1897 let len = text.len();
1898 let notlen = !len;
1899 let mut encoded = vec![
1900 1,
1901 len as u8,
1902 (len >> 8) as u8,
1903 notlen as u8,
1904 (notlen >> 8) as u8,
1905 ];
1906 encoded.extend_from_slice(&text[..]);
1907 encoded
1908 };
1909
1910 //let flags = TINFL_FLAG_COMPUTE_ADLER32 | TINFL_FLAG_PARSE_ZLIB_HEADER |
1911 let flags = TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
1912
1913 let mut b = DecompressorOxide::new();
1914
1915 let mut b_buf = [0; LEN];
1916
1917 let b_status = tinfl_decompress_oxide(&mut b, &encoded[..], &mut b_buf, flags);
1918 assert_eq!(b_buf[..b_status.2], text[..]);
1919 assert_eq!(b_status.0, TINFLStatus::Done);
1920 }
1921
1922 fn masked_lookup(table: &HuffmanTable, bit_buf: BitBuffer) -> (i32, u32) {
1923 let ret = table.lookup(bit_buf);
1924 (ret.0 & 511, ret.1)
1925 }
1926
1927 #[test]
1928 fn fixed_table_lookup() {
1929 let mut d = DecompressorOxide::new();
1930 d.block_type = 1;
1931 start_static_table(&mut d);
1932 let mut l = LocalVars {
1933 bit_buf: d.bit_buf,
1934 num_bits: d.num_bits,
1935 dist: d.dist,
1936 counter: d.counter,
1937 num_extra: d.num_extra,
1938 };
1939 init_tree(&mut d, &mut l).unwrap();
1940 let llt = &d.tables[LITLEN_TABLE];
1941 let dt = &d.tables[DIST_TABLE];
1942 assert_eq!(masked_lookup(llt, 0b00001100), (0, 8));
1943 assert_eq!(masked_lookup(llt, 0b00011110), (72, 8));
1944 assert_eq!(masked_lookup(llt, 0b01011110), (74, 8));
1945 assert_eq!(masked_lookup(llt, 0b11111101), (143, 8));
1946 assert_eq!(masked_lookup(llt, 0b000010011), (144, 9));
1947 assert_eq!(masked_lookup(llt, 0b111111111), (255, 9));
1948 assert_eq!(masked_lookup(llt, 0b00000000), (256, 7));
1949 assert_eq!(masked_lookup(llt, 0b1110100), (279, 7));
1950 assert_eq!(masked_lookup(llt, 0b00000011), (280, 8));
1951 assert_eq!(masked_lookup(llt, 0b11100011), (287, 8));
1952
1953 assert_eq!(masked_lookup(dt, 0), (0, 5));
1954 assert_eq!(masked_lookup(dt, 20), (5, 5));
1955 }
1956
1957 // Only run this test with alloc enabled as it uses a larger buffer.
1958 #[cfg(feature = "with-alloc")]
1959 fn check_result(input: &[u8], expected_status: TINFLStatus, expected_state: State, zlib: bool) {
1960 let mut r = DecompressorOxide::default();
1961 let mut output_buf = vec![0; 1024 * 32];
1962 let flags = if zlib {
1963 inflate_flags::TINFL_FLAG_PARSE_ZLIB_HEADER
1964 } else {
1965 0
1966 } | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF
1967 | TINFL_FLAG_HAS_MORE_INPUT;
1968 let (d_status, _in_bytes, _out_bytes) =
1969 decompress(&mut r, input, &mut output_buf, 0, flags);
1970 assert_eq!(expected_status, d_status);
1971 assert_eq!(expected_state, r.state);
1972 }
1973
1974 #[cfg(feature = "with-alloc")]
1975 #[test]
1976 fn bogus_input() {
1977 use self::check_result as cr;
1978 const F: TINFLStatus = TINFLStatus::Failed;
1979 const OK: TINFLStatus = TINFLStatus::Done;
1980 // Bad CM.
1981 cr(&[0x77, 0x85], F, State::BadZlibHeader, true);
1982 // Bad window size (but check is correct).
1983 cr(&[0x88, 0x98], F, State::BadZlibHeader, true);
1984 // Bad check bits.
1985 cr(&[0x78, 0x98], F, State::BadZlibHeader, true);
1986
1987 // Too many code lengths. (From inflate library issues)
1988 cr(
1989 b"M\xff\xffM*\xad\xad\xad\xad\xad\xad\xad\xcd\xcd\xcdM",
1990 F,
1991 State::BadDistOrLiteralTableLength,
1992 false,
1993 );
1994
1995 // Bad CLEN (also from inflate library issues)
1996 cr(
1997 b"\xdd\xff\xff*M\x94ffffffffff",
1998 F,
1999 State::BadDistOrLiteralTableLength,
2000 false,
2001 );
2002
2003 // Port of inflate coverage tests from zlib-ng
2004 // https://github.com/Dead2/zlib-ng/blob/develop/test/infcover.c
2005 let c = |a, b, c| cr(a, b, c, false);
2006
2007 // Invalid uncompressed/raw block length.
2008 c(&[0, 0, 0, 0, 0], F, State::BadRawLength);
2009 // Ok empty uncompressed block.
2010 c(&[3, 0], OK, State::DoneForever);
2011 // Invalid block type.
2012 c(&[6], F, State::BlockTypeUnexpected);
2013 // Ok uncompressed block.
2014 c(&[1, 1, 0, 0xfe, 0xff, 0], OK, State::DoneForever);
2015 // Too many litlens, we handle this later than zlib, so this test won't
2016 // give the same result.
2017 // c(&[0xfc, 0, 0], F, State::BadTotalSymbols);
2018 // Invalid set of code lengths - TODO Check if this is the correct error for this.
2019 c(&[4, 0, 0xfe, 0xff], F, State::BadTotalSymbols);
2020 // Invalid repeat in list of code lengths.
2021 // (Try to repeat a non-existent code.)
2022 c(&[4, 0, 0x24, 0x49, 0], F, State::BadCodeSizeDistPrevLookup);
2023 // Missing end of block code (should we have a separate error for this?) - fails on further input
2024 // c(&[4, 0, 0x24, 0xe9, 0xff, 0x6d], F, State::BadTotalSymbols);
2025 // Invalid set of literals/lengths
2026 c(
2027 &[
2028 4, 0x80, 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x71, 0xff, 0xff, 0x93, 0x11, 0,
2029 ],
2030 F,
2031 State::BadTotalSymbols,
2032 );
2033 // Invalid set of distances _ needsmoreinput
2034 // c(&[4, 0x80, 0x49, 0x92, 0x24, 0x49, 0x92, 0x24, 0x0f, 0xb4, 0xff, 0xff, 0xc3, 0x84], F, State::BadTotalSymbols);
2035 // Invalid distance code
2036 c(&[2, 0x7e, 0xff, 0xff], F, State::InvalidDist);
2037
2038 // Distance refers to position before the start
2039 c(
2040 &[0x0c, 0xc0, 0x81, 0, 0, 0, 0, 0, 0x90, 0xff, 0x6b, 0x4, 0],
2041 F,
2042 State::DistanceOutOfBounds,
2043 );
2044
2045 // Trailer
2046 // Bad gzip trailer checksum GZip header not handled by miniz_oxide
2047 //cr(&[0x1f, 0x8b, 0x08 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0x03, 0, 0, 0, 0, 0x01], F, State::BadCRC, false)
2048 // Bad gzip trailer length
2049 //cr(&[0x1f, 0x8b, 0x08 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0x03, 0, 0, 0, 0, 0, 0, 0, 0, 0x01], F, State::BadCRC, false)
2050 }
2051
2052 #[test]
2053 fn empty_output_buffer_non_wrapping() {
2054 let encoded = [
2055 120, 156, 243, 72, 205, 201, 201, 215, 81, 168, 202, 201, 76, 82, 4, 0, 27, 101, 4, 19,
2056 ];
2057 let flags = TINFL_FLAG_COMPUTE_ADLER32
2058 | TINFL_FLAG_PARSE_ZLIB_HEADER
2059 | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
2060 let mut r = DecompressorOxide::new();
2061 let mut output_buf: [u8; 0] = [];
2062 // Check that we handle an empty buffer properly and not panicking.
2063 // https://github.com/Frommi/miniz_oxide/issues/23
2064 let res = decompress(&mut r, &encoded, &mut output_buf, 0, flags);
2065 assert_eq!(res, (TINFLStatus::HasMoreOutput, 4, 0));
2066 }
2067
2068 #[test]
2069 fn empty_output_buffer_wrapping() {
2070 let encoded = [
2071 0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00,
2072 ];
2073 let flags = TINFL_FLAG_COMPUTE_ADLER32;
2074 let mut r = DecompressorOxide::new();
2075 let mut output_buf: [u8; 0] = [];
2076 // Check that we handle an empty buffer properly and not panicking.
2077 // https://github.com/Frommi/miniz_oxide/issues/23
2078 let res = decompress(&mut r, &encoded, &mut output_buf, 0, flags);
2079 assert_eq!(res, (TINFLStatus::HasMoreOutput, 2, 0));
2080 }
2081
2082 #[test]
2083 fn dist_extra_bits() {
2084 use self::num_extra_bits_for_distance_code;
2085 // Number of extra bits for each distance code.
2086 const DIST_EXTRA: [u8; 29] = [
2087 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12,
2088 12, 13,
2089 ];
2090
2091 for (i, &dist) in DIST_EXTRA.iter().enumerate() {
2092 assert_eq!(dist, num_extra_bits_for_distance_code(i as u8));
2093 }
2094 }
2095
2096 #[test]
2097 fn check_tree() {
2098 let mut r = DecompressorOxide::new();
2099 let mut l = LocalVars {
2100 bit_buf: 0,
2101 num_bits: 0,
2102 dist: 0,
2103 counter: 0,
2104 num_extra: 0,
2105 };
2106
2107 r.code_size_huffman[0] = 1;
2108 r.code_size_huffman[1] = 1;
2109 //r.code_size_huffman[2] = 3;
2110 //r.code_size_huffman[3] = 3;
2111 //r.code_size_huffman[1] = 4;
2112 r.block_type = HUFFLEN_TABLE as u8;
2113 r.table_sizes[HUFFLEN_TABLE] = 4;
2114 let res = init_tree(&mut r, &mut l).unwrap();
2115
2116 let status = match res {
2117 Action::Jump(s) => s,
2118 _ => {
2119 //println!("issue");
2120 return;
2121 }
2122 };
2123 //println!("status {:?}", status);
2124 assert!(status != BadTotalSymbols);
2125 }
2126}