1use std::fmt;
31use std::io::Read;
32
33pub const MINIMUM_MIN: u32 = 64;
35pub const MINIMUM_MAX: u32 = 1_048_576;
37pub const AVERAGE_MIN: u32 = 256;
39pub const AVERAGE_MAX: u32 = 4_194_304;
41pub const MAXIMUM_MIN: u32 = 1024;
43pub const MAXIMUM_MAX: u32 = 16_777_216;
45
46const MASKS: [u64; 26] = [
55 0, 0, 0, 0, 0, 0x0000000001804110, 0x0000000001803110, 0x0000000018035100, 0x0000001800035300, 0x0000019000353000, 0x0000590003530000, 0x0000d90003530000, 0x0000d90103530000, 0x0000d90303530000, 0x0000d90313530000, 0x0000d90f03530000, 0x0000d90303537000, 0x0000d90703537000, 0x0000d90707537000, 0x0000d91707537000, 0x0000d91747537000, 0x0000d91767537000, 0x0000d93767537000, 0x0000d93777537000, 0x0000d93777577000, 0x0000db3777577000, ];
82
83#[rustfmt::skip]
91const GEAR: [u64; 256] = [
92 0x3b5d3c7d207e37dc, 0x784d68ba91123086, 0xcd52880f882e7298, 0xeacf8e4e19fdcca7,
93 0xc31f385dfbd1632b, 0x1d5f27001e25abe6, 0x83130bde3c9ad991, 0xc4b225676e9b7649,
94 0xaa329b29e08eb499, 0xb67fcbd21e577d58, 0x0027baaada2acf6b, 0xe3ef2d5ac73c2226,
95 0x0890f24d6ed312b7, 0xa809e036851d7c7e, 0xf0a6fe5e0013d81b, 0x1d026304452cec14,
96 0x03864632648e248f, 0xcdaacf3dcd92b9b4, 0xf5e012e63c187856, 0x8862f9d3821c00b6,
97 0xa82f7338750f6f8a, 0x1e583dc6c1cb0b6f, 0x7a3145b69743a7f1, 0xabb20fee404807eb,
98 0xb14b3cfe07b83a5d, 0xb9dc27898adb9a0f, 0x3703f5e91baa62be, 0xcf0bb866815f7d98,
99 0x3d9867c41ea9dcd3, 0x1be1fa65442bf22c, 0x14300da4c55631d9, 0xe698e9cbc6545c99,
100 0x4763107ec64e92a5, 0xc65821fc65696a24, 0x76196c064822f0b7, 0x485be841f3525e01,
101 0xf652bc9c85974ff5, 0xcad8352face9e3e9, 0x2a6ed1dceb35e98e, 0xc6f483badc11680f,
102 0x3cfd8c17e9cf12f1, 0x89b83c5e2ea56471, 0xae665cfd24e392a9, 0xec33c4e504cb8915,
103 0x3fb9b15fc9fe7451, 0xd7fd1fd1945f2195, 0x31ade0853443efd8, 0x255efc9863e1e2d2,
104 0x10eab6008d5642cf, 0x46f04863257ac804, 0xa52dc42a789a27d3, 0xdaaadf9ce77af565,
105 0x6b479cd53d87febb, 0x6309e2d3f93db72f, 0xc5738ffbaa1ff9d6, 0x6bd57f3f25af7968,
106 0x67605486d90d0a4a, 0xe14d0b9663bfbdae, 0xb7bbd8d816eb0414, 0xdef8a4f16b35a116,
107 0xe7932d85aaaffed6, 0x08161cbae90cfd48, 0x855507beb294f08b, 0x91234ea6ffd399b2,
108 0xad70cf4b2435f302, 0xd289a97565bc2d27, 0x8e558437ffca99de, 0x96d2704b7115c040,
109 0x0889bbcdfc660e41, 0x5e0d4e67dc92128d, 0x72a9f8917063ed97, 0x438b69d409e016e3,
110 0xdf4fed8a5d8a4397, 0x00f41dcf41d403f7, 0x4814eb038e52603f, 0x9dafbacc58e2d651,
111 0xfe2f458e4be170af, 0x4457ec414df6a940, 0x06e62f1451123314, 0xbd1014d173ba92cc,
112 0xdef318e25ed57760, 0x9fea0de9dfca8525, 0x459de1e76c20624b, 0xaeec189617e2d666,
113 0x126a2c06ab5a83cb, 0xb1321532360f6132, 0x65421503dbb40123, 0x2d67c287ea089ab3,
114 0x6c93bff5a56bd6b6, 0x4ffb2036cab6d98d, 0xce7b785b1be7ad4f, 0xedb42ef6189fd163,
115 0xdc905288703988f6, 0x365f9c1d2c691884, 0xc640583680d99bfe, 0x3cd4624c07593ec6,
116 0x7f1ea8d85d7c5805, 0x014842d480b57149, 0x0b649bcb5a828688, 0xbcd5708ed79b18f0,
117 0xe987c862fbd2f2f0, 0x982731671f0cd82c, 0xbaf13e8b16d8c063, 0x8ea3109cbd951bba,
118 0xd141045bfb385cad, 0x2acbc1a0af1f7d30, 0xe6444d89df03bfdf, 0xa18cc771b8188ff9,
119 0x9834429db01c39bb, 0x214add07fe086a1f, 0x8f07c19b1f6b3ff9, 0x56a297b1bf4ffe55,
120 0x94d558e493c54fc7, 0x40bfc24c764552cb, 0x931a706f8a8520cb, 0x32229d322935bd52,
121 0x2560d0f5dc4fefaf, 0x9dbcc48355969bb6, 0x0fd81c3985c0b56a, 0xe03817e1560f2bda,
122 0xc1bb4f81d892b2d5, 0xb0c4864f4e28d2d7, 0x3ecc49f9d9d6c263, 0x51307e99b52ba65e,
123 0x8af2b688da84a752, 0xf5d72523b91b20b6, 0x6d95ff1ff4634806, 0x562f21555458339a,
124 0xc0ce47f889336346, 0x487823e5089b40d8, 0xe4727c7ebc6d9592, 0x5a8f7277e94970ba,
125 0xfca2f406b1c8bb50, 0x5b1f8a95f1791070, 0xd304af9fc9028605, 0x5440ab7fc930e748,
126 0x312d25fbca2ab5a1, 0x10f4a4b234a4d575, 0x90301d55047e7473, 0x3b6372886c61591e,
127 0x293402b77c444e06, 0x451f34a4d3e97dd7, 0x3158d814d81bc57b, 0x034942425b9bda69,
128 0xe2032ff9e532d9bb, 0x62ae066b8b2179e5, 0x9545e10c2f8d71d8, 0x7ff7483eb2d23fc0,
129 0x00945fcebdc98d86, 0x8764bbbe99b26ca2, 0x1b1ec62284c0bfc3, 0x58e0fcc4f0aa362b,
130 0x5f4abefa878d458d, 0xfd74ac2f9607c519, 0xa4e3fb37df8cbfa9, 0xbf697e43cac574e5,
131 0x86f14a3f68f4cd53, 0x24a23d076f1ce522, 0xe725cd8048868cc8, 0xbf3c729eb2464362,
132 0xd8f6cd57b3cc1ed8, 0x6329e52425541577, 0x62aa688ad5ae1ac0, 0x0a242566269bf845,
133 0x168b1a4753aca74b, 0xf789afefff2e7e3c, 0x6c3362093b6fccdb, 0x4ce8f50bd28c09b2,
134 0x006a2db95ae8aa93, 0x975b0d623c3d1a8c, 0x18605d3935338c5b, 0x5bb6f6136cad3c71,
135 0x0f53a20701f8d8a6, 0xab8c5ad2e7e93c67, 0x40b5ac5127acaa29, 0x8c7bf63c2075895f,
136 0x78bd9f7e014a805c, 0xb2c9e9f4f9c8c032, 0xefd6049827eb91f3, 0x2be459f482c16fbd,
137 0xd92ce0c5745aaa8c, 0x0aaa8fb298d965b9, 0x2b37f92c6c803b15, 0x8c54a5e94e0f0e78,
138 0x95f9b6e90c0a3032, 0xe7939faa436c7874, 0xd16bfe8f6a8a40c9, 0x44982b86263fd2fa,
139 0xe285fb39f984e583, 0x779a8df72d7619d3, 0xf2d79a8de8d5dd1e, 0xd1037354d66684e2,
140 0x004c82a4e668a8e5, 0x31d40a7668b044e6, 0xd70578538bd02c11, 0xdb45431078c5f482,
141 0x977121bb7f6a51ad, 0x73d5ccbd34eff8dd, 0xe437a07d356e17cd, 0x47b2782043c95627,
142 0x9fb251413e41d49a, 0xccd70b60652513d3, 0x1c95b31e8a1b49b2, 0xcae73dfd1bcb4c1b,
143 0x34d98331b1f5b70f, 0x784e39f22338d92f, 0x18613d4a064df420, 0xf1d8dae25f0bcebe,
144 0x33f77c15ae855efc, 0x3c88b3b912eb109c, 0x956a2ec96bafeea5, 0x1aa005b5e0ad0e87,
145 0x5500d70527c4bb8e, 0xe36c57196421cc44, 0x13c4d286cc36ee39, 0x5654a23d818b2a81,
146 0x77b1dc13d161abdc, 0x734f44de5f8d5eb5, 0x60717e174a6c89a2, 0xd47d9649266a211e,
147 0x5b13a4322bb69e90, 0xf7669609f8b5fc3c, 0x21e6ac55bedcdac9, 0x9b56b62b61166dea,
148 0xf48f66b939797e9c, 0x35f332f9c0e6ae9a, 0xcc733f6a9a878db0, 0x3da161e41cc108c2,
149 0xb7d74ae535914d51, 0x4d493b0b11d36469, 0xce264d1dfba9741a, 0xa9d1f2dc7436dc06,
150 0x70738016604c2a27, 0x231d36e96e93f3d5, 0x7666881197838d19, 0x4a2a83090aaad40c,
151 0xf1e761591668b35d, 0x7363236497f730a7, 0x301080e37379dd4d, 0x502dea2971827042,
152 0xc2c5eb858f32625f, 0x786afb9edfafbdff, 0xdaee0d868490b2a4, 0x617366b3268609f6,
153 0xae0e35a0fe46173e, 0xd1a07de93e824f11, 0x079b8b115ea4cca8, 0x93a99274558faebb,
154 0xfb1e6e22e08a03b3, 0xea635fdba3698dd0, 0xcf53659328503a5c, 0xcde3b31e6fd5d780,
155 0x8e3e4221d3614413, 0xef14d0d86bf1a22c, 0xe1d830d3f16c5ddb, 0xaabd2b2a451504e1
156];
157
158fn cut(
160 source: &[u8],
161 min_size: usize,
162 avg_size: usize,
163 max_size: usize,
164 mask_s: u64,
165 mask_l: u64,
166) -> (u64, usize) {
167 let mut remaining = source.len();
168 if remaining <= min_size {
169 return (0, remaining);
170 }
171 let mut center = avg_size;
172 if remaining > max_size {
173 remaining = max_size;
174 } else if remaining < center {
175 center = remaining;
176 }
177 let mut index = min_size;
178 let mut hash: u64 = 0;
182 while index < center {
183 hash = (hash << 1).wrapping_add(GEAR[source[index] as usize]);
184 if (hash & mask_s) == 0 {
185 return (hash, index);
186 }
187 index += 1;
188 }
189 let last_pos = remaining;
193 while index < last_pos {
194 hash = (hash << 1).wrapping_add(GEAR[source[index] as usize]);
195 if (hash & mask_l) == 0 {
196 return (hash, index);
197 }
198 index += 1;
199 }
200 (hash, index)
203}
204
205pub enum Normalization {
222 Level0,
224 Level1,
226 Level2,
228 Level3,
230}
231
232impl Normalization {
233 fn bits(&self) -> u32 {
234 match self {
235 Normalization::Level0 => 0,
236 Normalization::Level1 => 1,
237 Normalization::Level2 => 2,
238 Normalization::Level3 => 3,
239 }
240 }
241}
242
243#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
247pub struct Chunk {
248 pub hash: u64,
250 pub offset: usize,
252 pub length: usize,
254}
255
256#[derive(Debug, Clone, Eq, PartialEq)]
281pub struct FastCDC<'a> {
282 source: &'a [u8],
283 processed: usize,
284 remaining: usize,
285 min_size: usize,
286 avg_size: usize,
287 max_size: usize,
288 mask_s: u64,
289 mask_l: u64,
290}
291
292impl<'a> FastCDC<'a> {
293 pub fn new(source: &'a [u8], min_size: u32, avg_size: u32, max_size: u32) -> Self {
299 FastCDC::with_level(source, min_size, avg_size, max_size, Normalization::Level1)
300 }
301
302 pub fn with_level(
306 source: &'a [u8],
307 min_size: u32,
308 avg_size: u32,
309 max_size: u32,
310 level: Normalization,
311 ) -> Self {
312 assert!(min_size >= MINIMUM_MIN);
313 assert!(min_size <= MINIMUM_MAX);
314 assert!(avg_size >= AVERAGE_MIN);
315 assert!(avg_size <= AVERAGE_MAX);
316 assert!(max_size >= MAXIMUM_MIN);
317 assert!(max_size <= MAXIMUM_MAX);
318 let bits = logarithm2(avg_size);
319 let normalization = level.bits();
320 let mask_s = MASKS[(bits + normalization) as usize];
321 let mask_l = MASKS[(bits - normalization) as usize];
322 Self {
323 source,
324 processed: 0,
325 remaining: source.len(),
326 min_size: min_size as usize,
327 avg_size: avg_size as usize,
328 max_size: max_size as usize,
329 mask_s,
330 mask_l,
331 }
332 }
333
334 pub fn cut(&self, start: usize, remaining: usize) -> (u64, usize) {
347 let end = start + remaining;
348 let (hash, count) = cut(
349 &self.source[start..end],
350 self.min_size,
351 self.avg_size,
352 self.max_size,
353 self.mask_s,
354 self.mask_l,
355 );
356 (hash, start + count)
357 }
358}
359
360impl<'a> Iterator for FastCDC<'a> {
361 type Item = Chunk;
362
363 fn next(&mut self) -> Option<Chunk> {
364 if self.remaining == 0 {
365 None
366 } else {
367 let (hash, cutpoint) = self.cut(self.processed, self.remaining);
368 if cutpoint == 0 {
369 None
370 } else {
371 let offset = self.processed;
372 let length = cutpoint - offset;
373 self.processed += length;
374 self.remaining -= length;
375 Some(Chunk {
376 hash,
377 offset,
378 length,
379 })
380 }
381 }
382 }
383
384 fn size_hint(&self) -> (usize, Option<usize>) {
385 let upper_bound = self.remaining / self.min_size;
390 (upper_bound, Some(upper_bound))
391 }
392}
393
394#[derive(Debug)]
398pub enum Error {
399 Empty,
401 IoError(std::io::Error),
403 Other(String),
405}
406
407impl fmt::Display for Error {
408 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
409 write!(f, "chunker error: {self:?}")
410 }
411}
412
413impl std::error::Error for Error {}
414
415impl From<std::io::Error> for Error {
416 fn from(error: std::io::Error) -> Self {
417 Error::IoError(error)
418 }
419}
420
421impl From<Error> for std::io::Error {
422 fn from(error: Error) -> Self {
423 match error {
424 Error::IoError(ioerr) => ioerr,
425 Error::Empty => Self::from(std::io::ErrorKind::UnexpectedEof),
426 Error::Other(str) => Self::new(std::io::ErrorKind::Other, str),
427 }
428 }
429}
430
431#[derive(Debug, Clone, Eq, PartialEq, Hash)]
435pub struct ChunkData {
436 pub hash: u64,
438 pub offset: u64,
440 pub length: usize,
442 pub data: Vec<u8>,
444}
445
446pub struct StreamCDC<R: Read> {
467 buffer: Vec<u8>,
469 capacity: usize,
471 length: usize,
473 source: R,
475 processed: u64,
477 eof: bool,
479 min_size: usize,
480 avg_size: usize,
481 max_size: usize,
482 mask_s: u64,
483 mask_l: u64,
484}
485
486impl<R: Read> StreamCDC<R> {
487 pub fn new(source: R, min_size: u32, avg_size: u32, max_size: u32) -> Self {
493 StreamCDC::with_level(source, min_size, avg_size, max_size, Normalization::Level1)
494 }
495
496 pub fn with_level(
500 source: R,
501 min_size: u32,
502 avg_size: u32,
503 max_size: u32,
504 level: Normalization,
505 ) -> Self {
506 assert!(min_size >= MINIMUM_MIN);
507 assert!(min_size <= MINIMUM_MAX);
508 assert!(avg_size >= AVERAGE_MIN);
509 assert!(avg_size <= AVERAGE_MAX);
510 assert!(max_size >= MAXIMUM_MIN);
511 assert!(max_size <= MAXIMUM_MAX);
512 let bits = logarithm2(avg_size);
513 let normalization = level.bits();
514 let mask_s = MASKS[(bits + normalization) as usize];
515 let mask_l = MASKS[(bits - normalization) as usize];
516 Self {
517 buffer: vec![0_u8; max_size as usize],
518 capacity: max_size as usize,
519 length: 0,
520 source,
521 eof: false,
522 processed: 0,
523 min_size: min_size as usize,
524 avg_size: avg_size as usize,
525 max_size: max_size as usize,
526 mask_s,
527 mask_l,
528 }
529 }
530
531 fn fill_buffer(&mut self) -> Result<usize, Error> {
534 if self.eof {
536 Ok(0)
537 } else {
538 let mut all_bytes_read = 0;
539 while !self.eof && self.length < self.capacity {
540 let bytes_read = self.source.read(&mut self.buffer[self.length..])?;
541 if bytes_read == 0 {
542 self.eof = true;
543 } else {
544 self.length += bytes_read;
545 all_bytes_read += bytes_read;
546 }
547 }
548 Ok(all_bytes_read)
549 }
550 }
551
552 fn drain_bytes(&mut self, count: usize) -> Result<Vec<u8>, Error> {
555 if count > self.length {
557 Err(Error::Other(format!(
558 "drain_bytes() called with count larger than length: {} > {}",
559 count, self.length
560 )))
561 } else {
562 let data = self.buffer.drain(..count).collect::<Vec<u8>>();
563 self.length -= count;
564 self.buffer.resize(self.capacity, 0_u8);
565 Ok(data)
566 }
567 }
568
569 fn read_chunk(&mut self) -> Result<ChunkData, Error> {
572 self.fill_buffer()?;
573 if self.length == 0 {
574 Err(Error::Empty)
575 } else {
576 let (hash, count) = cut(
577 &self.buffer[..self.length],
578 self.min_size,
579 self.avg_size,
580 self.max_size,
581 self.mask_s,
582 self.mask_l,
583 );
584 if count == 0 {
585 Err(Error::Empty)
586 } else {
587 let offset = self.processed;
588 self.processed += count as u64;
589 let data = self.drain_bytes(count)?;
590 Ok(ChunkData {
591 hash,
592 offset,
593 length: count,
594 data,
595 })
596 }
597 }
598 }
599}
600
601impl<R: Read> Iterator for StreamCDC<R> {
602 type Item = Result<ChunkData, Error>;
603
604 fn next(&mut self) -> Option<Result<ChunkData, Error>> {
605 let slice = self.read_chunk();
606 if let Err(Error::Empty) = slice {
607 None
608 } else {
609 Some(slice)
610 }
611 }
612}
613
614fn logarithm2(value: u32) -> u32 {
618 f64::from(value).log2().round() as u32
619}
620
621#[cfg(test)]
622mod tests {
623 use super::*;
624 use md5::{Digest, Md5};
625 use std::fs::{self, File};
626
627 #[test]
628 fn test_logarithm2() {
629 assert_eq!(logarithm2(0), 0);
630 assert_eq!(logarithm2(1), 0);
631 assert_eq!(logarithm2(2), 1);
632 assert_eq!(logarithm2(3), 2);
633 assert_eq!(logarithm2(5), 2);
634 assert_eq!(logarithm2(6), 3);
635 assert_eq!(logarithm2(11), 3);
636 assert_eq!(logarithm2(12), 4);
637 assert_eq!(logarithm2(19), 4);
638 assert_eq!(logarithm2(64), 6);
639 assert_eq!(logarithm2(128), 7);
640 assert_eq!(logarithm2(256), 8);
641 assert_eq!(logarithm2(512), 9);
642 assert_eq!(logarithm2(1024), 10);
643 assert_eq!(logarithm2(16383), 14);
644 assert_eq!(logarithm2(16384), 14);
645 assert_eq!(logarithm2(16385), 14);
646 assert_eq!(logarithm2(32767), 15);
647 assert_eq!(logarithm2(32768), 15);
648 assert_eq!(logarithm2(32769), 15);
649 assert_eq!(logarithm2(65535), 16);
650 assert_eq!(logarithm2(65536), 16);
651 assert_eq!(logarithm2(65537), 16);
652 assert_eq!(logarithm2(1_048_575), 20);
653 assert_eq!(logarithm2(1_048_576), 20);
654 assert_eq!(logarithm2(1_048_577), 20);
655 assert_eq!(logarithm2(4_194_303), 22);
656 assert_eq!(logarithm2(4_194_304), 22);
657 assert_eq!(logarithm2(4_194_305), 22);
658 assert_eq!(logarithm2(16_777_215), 24);
659 assert_eq!(logarithm2(16_777_216), 24);
660 assert_eq!(logarithm2(16_777_217), 24);
661 }
662
663 #[test]
664 #[should_panic]
665 fn test_minimum_too_low() {
666 let array = [0u8; 1024];
667 FastCDC::new(&array, 63, 256, 1024);
668 }
669
670 #[test]
671 #[should_panic]
672 fn test_minimum_too_high() {
673 let array = [0u8; 1024];
674 FastCDC::new(&array, 67_108_867, 256, 1024);
675 }
676
677 #[test]
678 #[should_panic]
679 fn test_average_too_low() {
680 let array = [0u8; 1024];
681 FastCDC::new(&array, 64, 255, 1024);
682 }
683
684 #[test]
685 #[should_panic]
686 fn test_average_too_high() {
687 let array = [0u8; 1024];
688 FastCDC::new(&array, 64, 268_435_457, 1024);
689 }
690
691 #[test]
692 #[should_panic]
693 fn test_maximum_too_low() {
694 let array = [0u8; 1024];
695 FastCDC::new(&array, 64, 256, 1023);
696 }
697
698 #[test]
699 #[should_panic]
700 fn test_maximum_too_high() {
701 let array = [0u8; 1024];
702 FastCDC::new(&array, 64, 256, 1_073_741_825);
703 }
704
705 #[test]
706 fn test_masks() {
707 let source = [0u8; 1024];
708 let chunker = FastCDC::new(&source, 64, 256, 1024);
709 assert_eq!(chunker.mask_l, MASKS[7]);
710 assert_eq!(chunker.mask_s, MASKS[9]);
711 let chunker = FastCDC::new(&source, 8192, 16384, 32768);
712 assert_eq!(chunker.mask_l, MASKS[13]);
713 assert_eq!(chunker.mask_s, MASKS[15]);
714 let chunker = FastCDC::new(&source, 1_048_576, 4_194_304, 16_777_216);
715 assert_eq!(chunker.mask_l, MASKS[21]);
716 assert_eq!(chunker.mask_s, MASKS[23]);
717 }
718
719 #[test]
720 fn test_cut_all_zeros() {
721 let array = [0u8; 10240];
723 let chunker = FastCDC::new(&array, 64, 256, 1024);
724 let mut cursor: usize = 0;
725 for _ in 0..10 {
726 let (hash, pos) = chunker.cut(cursor, 10240 - cursor);
727 assert_eq!(hash, 14169102344523991076);
728 assert_eq!(pos, cursor + 1024);
729 cursor = pos;
730 }
731 let (_, pos) = chunker.cut(cursor, 10240 - cursor);
733 assert_eq!(pos, 10240);
734 }
735
736 #[test]
737 fn test_cut_sekien_16k_chunks() {
738 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
739 assert!(read_result.is_ok());
740 let contents = read_result.unwrap();
741 let chunker = FastCDC::new(&contents, 4096, 16384, 65535);
742 let mut cursor: usize = 0;
743 let mut remaining: usize = contents.len();
744 let expected: Vec<(u64, usize)> = vec![
745 (17968276318003433923, 21325),
746 (4098594969649699419, 17140),
747 (15733367461443853673, 28084),
748 (4509236223063678303, 18217),
749 (2504464741100432583, 24700),
750 ];
751 for (e_hash, e_length) in expected.iter() {
752 let (hash, pos) = chunker.cut(cursor, remaining);
753 assert_eq!(hash, *e_hash);
754 assert_eq!(pos, cursor + e_length);
755 cursor = pos;
756 remaining -= e_length;
757 }
758 assert_eq!(remaining, 0);
759 }
760
761 #[test]
762 fn test_cut_sekien_32k_chunks() {
763 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
764 assert!(read_result.is_ok());
765 let contents = read_result.unwrap();
766 let chunker = FastCDC::new(&contents, 8192, 32768, 131072);
767 let mut cursor: usize = 0;
768 let mut remaining: usize = contents.len();
769 let expected: Vec<(u64, usize)> =
770 vec![(15733367461443853673, 66549), (2504464741100432583, 42917)];
771 for (e_hash, e_length) in expected.iter() {
772 let (hash, pos) = chunker.cut(cursor, remaining);
773 assert_eq!(hash, *e_hash);
774 assert_eq!(pos, cursor + e_length);
775 cursor = pos;
776 remaining -= e_length;
777 }
778 assert_eq!(remaining, 0);
779 }
780
781 #[test]
782 fn test_cut_sekien_64k_chunks() {
783 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
784 assert!(read_result.is_ok());
785 let contents = read_result.unwrap();
786 let chunker = FastCDC::new(&contents, 16384, 65536, 262144);
787 let mut cursor: usize = 0;
788 let mut remaining: usize = contents.len();
789 let expected: Vec<(u64, usize)> = vec![(2504464741100432583, 109466)];
790 for (e_hash, e_length) in expected.iter() {
791 let (hash, pos) = chunker.cut(cursor, remaining);
792 assert_eq!(hash, *e_hash);
793 assert_eq!(pos, cursor + e_length);
794 cursor = pos;
795 remaining -= e_length;
796 }
797 assert_eq!(remaining, 0);
798 }
799
800 struct ExpectedChunk {
801 hash: u64,
802 offset: u64,
803 length: usize,
804 digest: String,
805 }
806
807 #[test]
808 fn test_iter_sekien_16k_chunks() {
809 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
810 assert!(read_result.is_ok());
811 let contents = read_result.unwrap();
812 let expected_chunks = vec![
816 ExpectedChunk {
817 hash: 17968276318003433923,
818 offset: 0,
819 length: 21325,
820 digest: "2bb52734718194617c957f5e07ee6054".into(),
821 },
822 ExpectedChunk {
823 hash: 4098594969649699419,
824 offset: 21325,
825 length: 17140,
826 digest: "badfb0757fe081c20336902e7131f768".into(),
827 },
828 ExpectedChunk {
829 hash: 15733367461443853673,
830 offset: 38465,
831 length: 28084,
832 digest: "18412d7414de6eb42f638351711f729d".into(),
833 },
834 ExpectedChunk {
835 hash: 4509236223063678303,
836 offset: 66549,
837 length: 18217,
838 digest: "04fe1405fc5f960363bfcd834c056407".into(),
839 },
840 ExpectedChunk {
841 hash: 2504464741100432583,
842 offset: 84766,
843 length: 24700,
844 digest: "1aa7ad95f274d6ba34a983946ebc5af3".into(),
845 },
846 ];
847 let chunker = FastCDC::new(&contents, 4096, 16384, 65535);
848 let mut index = 0;
849 for chunk in chunker {
850 assert_eq!(chunk.hash, expected_chunks[index].hash);
851 assert_eq!(chunk.offset, expected_chunks[index].offset as usize);
852 assert_eq!(chunk.length, expected_chunks[index].length);
853 let mut hasher = Md5::new();
854 hasher.update(&contents[chunk.offset..chunk.offset + chunk.length]);
855 let table = hasher.finalize();
856 let digest = format!("{:x}", table);
857 assert_eq!(digest, expected_chunks[index].digest);
858 index += 1;
859 }
860 assert_eq!(index, 5);
861 }
862
863 #[test]
864 fn test_cut_sekien_16k_nc_0() {
865 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
866 assert!(read_result.is_ok());
867 let contents = read_result.unwrap();
868 let chunker = FastCDC::with_level(&contents, 4096, 16384, 65535, Normalization::Level0);
869 let mut cursor: usize = 0;
870 let mut remaining: usize = contents.len();
871 let expected: Vec<(u64, usize)> = vec![
872 (221561130519947581, 6634),
873 (15733367461443853673, 59915),
874 (10460176299449652894, 25597),
875 (6197802202431009942, 5237),
876 (2504464741100432583, 12083),
877 ];
878 for (e_hash, e_length) in expected.iter() {
879 let (hash, pos) = chunker.cut(cursor, remaining);
880 assert_eq!(hash, *e_hash);
881 assert_eq!(pos, cursor + e_length);
882 cursor = pos;
883 remaining -= e_length;
884 }
885 assert_eq!(remaining, 0);
886 }
887
888 #[test]
889 fn test_cut_sekien_16k_nc_3() {
890 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
891 assert!(read_result.is_ok());
892 let contents = read_result.unwrap();
893 let chunker = FastCDC::with_level(&contents, 4096, 16384, 65535, Normalization::Level3);
894 let mut cursor: usize = 0;
895 let mut remaining: usize = contents.len();
896 let expected: Vec<(u64, usize)> = vec![
897 (14582375164208481996, 17350),
898 (13104072099671895560, 19911),
899 (6161241554519610597, 17426),
900 (16009206469796846404, 17519),
901 (10460176299449652894, 19940),
902 (2504464741100432583, 17320),
903 ];
904 for (e_hash, e_length) in expected.iter() {
905 let (hash, pos) = chunker.cut(cursor, remaining);
906 assert_eq!(hash, *e_hash);
907 assert_eq!(pos, cursor + e_length);
908 cursor = pos;
909 remaining -= e_length;
910 }
911 assert_eq!(remaining, 0);
912 }
913
914 #[test]
915 fn test_error_fmt() {
916 let err = Error::Empty;
917 assert_eq!(format!("{err}"), "chunker error: Empty");
918 }
919
920 #[test]
921 fn test_stream_sekien_16k_chunks() {
922 let file_result = File::open("test/fixtures/SekienAkashita.jpg");
923 assert!(file_result.is_ok());
924 let file = file_result.unwrap();
925 let expected_chunks = vec![
927 ExpectedChunk {
928 hash: 17968276318003433923,
929 offset: 0,
930 length: 21325,
931 digest: "2bb52734718194617c957f5e07ee6054".into(),
932 },
933 ExpectedChunk {
934 hash: 4098594969649699419,
935 offset: 21325,
936 length: 17140,
937 digest: "badfb0757fe081c20336902e7131f768".into(),
938 },
939 ExpectedChunk {
940 hash: 15733367461443853673,
941 offset: 38465,
942 length: 28084,
943 digest: "18412d7414de6eb42f638351711f729d".into(),
944 },
945 ExpectedChunk {
946 hash: 4509236223063678303,
947 offset: 66549,
948 length: 18217,
949 digest: "04fe1405fc5f960363bfcd834c056407".into(),
950 },
951 ExpectedChunk {
952 hash: 2504464741100432583,
953 offset: 84766,
954 length: 24700,
955 digest: "1aa7ad95f274d6ba34a983946ebc5af3".into(),
956 },
957 ];
958 let chunker = StreamCDC::new(file, 4096, 16384, 65535);
959 let mut index = 0;
960 for result in chunker {
961 assert!(result.is_ok());
962 let chunk = result.unwrap();
963 assert_eq!(chunk.hash, expected_chunks[index].hash);
964 assert_eq!(chunk.offset, expected_chunks[index].offset);
965 assert_eq!(chunk.length, expected_chunks[index].length);
966 let mut hasher = Md5::new();
967 hasher.update(&chunk.data);
968 let table = hasher.finalize();
969 let digest = format!("{:x}", table);
970 assert_eq!(digest, expected_chunks[index].digest);
971 index += 1;
972 }
973 assert_eq!(index, 5);
974 }
975}