1use std::fmt;
35use std::io::Read;
36
37#[cfg(any(feature = "tokio", feature = "futures"))]
38mod async_stream_cdc;
39#[cfg(any(feature = "tokio", feature = "futures"))]
40pub use async_stream_cdc::*;
41
42pub const MINIMUM_MIN: u32 = 64;
44pub const MINIMUM_MAX: u32 = 1_048_576;
46pub const AVERAGE_MIN: u32 = 256;
48pub const AVERAGE_MAX: u32 = 4_194_304;
50pub const MAXIMUM_MIN: u32 = 1024;
52pub const MAXIMUM_MAX: u32 = 16_777_216;
54
55pub(self) const MASKS: [u64; 26] = [
64 0, 0, 0, 0, 0, 0x0000000001804110, 0x0000000001803110, 0x0000000018035100, 0x0000001800035300, 0x0000019000353000, 0x0000590003530000, 0x0000d90003530000, 0x0000d90103530000, 0x0000d90303530000, 0x0000d90313530000, 0x0000d90f03530000, 0x0000d90303537000, 0x0000d90703537000, 0x0000d90707537000, 0x0000d91707537000, 0x0000d91747537000, 0x0000d91767537000, 0x0000d93767537000, 0x0000d93777537000, 0x0000d93777577000, 0x0000db3777577000, ];
91
92#[rustfmt::skip]
100const GEAR: [u64; 256] = [
101 0x3b5d3c7d207e37dc, 0x784d68ba91123086, 0xcd52880f882e7298, 0xeacf8e4e19fdcca7,
102 0xc31f385dfbd1632b, 0x1d5f27001e25abe6, 0x83130bde3c9ad991, 0xc4b225676e9b7649,
103 0xaa329b29e08eb499, 0xb67fcbd21e577d58, 0x0027baaada2acf6b, 0xe3ef2d5ac73c2226,
104 0x0890f24d6ed312b7, 0xa809e036851d7c7e, 0xf0a6fe5e0013d81b, 0x1d026304452cec14,
105 0x03864632648e248f, 0xcdaacf3dcd92b9b4, 0xf5e012e63c187856, 0x8862f9d3821c00b6,
106 0xa82f7338750f6f8a, 0x1e583dc6c1cb0b6f, 0x7a3145b69743a7f1, 0xabb20fee404807eb,
107 0xb14b3cfe07b83a5d, 0xb9dc27898adb9a0f, 0x3703f5e91baa62be, 0xcf0bb866815f7d98,
108 0x3d9867c41ea9dcd3, 0x1be1fa65442bf22c, 0x14300da4c55631d9, 0xe698e9cbc6545c99,
109 0x4763107ec64e92a5, 0xc65821fc65696a24, 0x76196c064822f0b7, 0x485be841f3525e01,
110 0xf652bc9c85974ff5, 0xcad8352face9e3e9, 0x2a6ed1dceb35e98e, 0xc6f483badc11680f,
111 0x3cfd8c17e9cf12f1, 0x89b83c5e2ea56471, 0xae665cfd24e392a9, 0xec33c4e504cb8915,
112 0x3fb9b15fc9fe7451, 0xd7fd1fd1945f2195, 0x31ade0853443efd8, 0x255efc9863e1e2d2,
113 0x10eab6008d5642cf, 0x46f04863257ac804, 0xa52dc42a789a27d3, 0xdaaadf9ce77af565,
114 0x6b479cd53d87febb, 0x6309e2d3f93db72f, 0xc5738ffbaa1ff9d6, 0x6bd57f3f25af7968,
115 0x67605486d90d0a4a, 0xe14d0b9663bfbdae, 0xb7bbd8d816eb0414, 0xdef8a4f16b35a116,
116 0xe7932d85aaaffed6, 0x08161cbae90cfd48, 0x855507beb294f08b, 0x91234ea6ffd399b2,
117 0xad70cf4b2435f302, 0xd289a97565bc2d27, 0x8e558437ffca99de, 0x96d2704b7115c040,
118 0x0889bbcdfc660e41, 0x5e0d4e67dc92128d, 0x72a9f8917063ed97, 0x438b69d409e016e3,
119 0xdf4fed8a5d8a4397, 0x00f41dcf41d403f7, 0x4814eb038e52603f, 0x9dafbacc58e2d651,
120 0xfe2f458e4be170af, 0x4457ec414df6a940, 0x06e62f1451123314, 0xbd1014d173ba92cc,
121 0xdef318e25ed57760, 0x9fea0de9dfca8525, 0x459de1e76c20624b, 0xaeec189617e2d666,
122 0x126a2c06ab5a83cb, 0xb1321532360f6132, 0x65421503dbb40123, 0x2d67c287ea089ab3,
123 0x6c93bff5a56bd6b6, 0x4ffb2036cab6d98d, 0xce7b785b1be7ad4f, 0xedb42ef6189fd163,
124 0xdc905288703988f6, 0x365f9c1d2c691884, 0xc640583680d99bfe, 0x3cd4624c07593ec6,
125 0x7f1ea8d85d7c5805, 0x014842d480b57149, 0x0b649bcb5a828688, 0xbcd5708ed79b18f0,
126 0xe987c862fbd2f2f0, 0x982731671f0cd82c, 0xbaf13e8b16d8c063, 0x8ea3109cbd951bba,
127 0xd141045bfb385cad, 0x2acbc1a0af1f7d30, 0xe6444d89df03bfdf, 0xa18cc771b8188ff9,
128 0x9834429db01c39bb, 0x214add07fe086a1f, 0x8f07c19b1f6b3ff9, 0x56a297b1bf4ffe55,
129 0x94d558e493c54fc7, 0x40bfc24c764552cb, 0x931a706f8a8520cb, 0x32229d322935bd52,
130 0x2560d0f5dc4fefaf, 0x9dbcc48355969bb6, 0x0fd81c3985c0b56a, 0xe03817e1560f2bda,
131 0xc1bb4f81d892b2d5, 0xb0c4864f4e28d2d7, 0x3ecc49f9d9d6c263, 0x51307e99b52ba65e,
132 0x8af2b688da84a752, 0xf5d72523b91b20b6, 0x6d95ff1ff4634806, 0x562f21555458339a,
133 0xc0ce47f889336346, 0x487823e5089b40d8, 0xe4727c7ebc6d9592, 0x5a8f7277e94970ba,
134 0xfca2f406b1c8bb50, 0x5b1f8a95f1791070, 0xd304af9fc9028605, 0x5440ab7fc930e748,
135 0x312d25fbca2ab5a1, 0x10f4a4b234a4d575, 0x90301d55047e7473, 0x3b6372886c61591e,
136 0x293402b77c444e06, 0x451f34a4d3e97dd7, 0x3158d814d81bc57b, 0x034942425b9bda69,
137 0xe2032ff9e532d9bb, 0x62ae066b8b2179e5, 0x9545e10c2f8d71d8, 0x7ff7483eb2d23fc0,
138 0x00945fcebdc98d86, 0x8764bbbe99b26ca2, 0x1b1ec62284c0bfc3, 0x58e0fcc4f0aa362b,
139 0x5f4abefa878d458d, 0xfd74ac2f9607c519, 0xa4e3fb37df8cbfa9, 0xbf697e43cac574e5,
140 0x86f14a3f68f4cd53, 0x24a23d076f1ce522, 0xe725cd8048868cc8, 0xbf3c729eb2464362,
141 0xd8f6cd57b3cc1ed8, 0x6329e52425541577, 0x62aa688ad5ae1ac0, 0x0a242566269bf845,
142 0x168b1a4753aca74b, 0xf789afefff2e7e3c, 0x6c3362093b6fccdb, 0x4ce8f50bd28c09b2,
143 0x006a2db95ae8aa93, 0x975b0d623c3d1a8c, 0x18605d3935338c5b, 0x5bb6f6136cad3c71,
144 0x0f53a20701f8d8a6, 0xab8c5ad2e7e93c67, 0x40b5ac5127acaa29, 0x8c7bf63c2075895f,
145 0x78bd9f7e014a805c, 0xb2c9e9f4f9c8c032, 0xefd6049827eb91f3, 0x2be459f482c16fbd,
146 0xd92ce0c5745aaa8c, 0x0aaa8fb298d965b9, 0x2b37f92c6c803b15, 0x8c54a5e94e0f0e78,
147 0x95f9b6e90c0a3032, 0xe7939faa436c7874, 0xd16bfe8f6a8a40c9, 0x44982b86263fd2fa,
148 0xe285fb39f984e583, 0x779a8df72d7619d3, 0xf2d79a8de8d5dd1e, 0xd1037354d66684e2,
149 0x004c82a4e668a8e5, 0x31d40a7668b044e6, 0xd70578538bd02c11, 0xdb45431078c5f482,
150 0x977121bb7f6a51ad, 0x73d5ccbd34eff8dd, 0xe437a07d356e17cd, 0x47b2782043c95627,
151 0x9fb251413e41d49a, 0xccd70b60652513d3, 0x1c95b31e8a1b49b2, 0xcae73dfd1bcb4c1b,
152 0x34d98331b1f5b70f, 0x784e39f22338d92f, 0x18613d4a064df420, 0xf1d8dae25f0bcebe,
153 0x33f77c15ae855efc, 0x3c88b3b912eb109c, 0x956a2ec96bafeea5, 0x1aa005b5e0ad0e87,
154 0x5500d70527c4bb8e, 0xe36c57196421cc44, 0x13c4d286cc36ee39, 0x5654a23d818b2a81,
155 0x77b1dc13d161abdc, 0x734f44de5f8d5eb5, 0x60717e174a6c89a2, 0xd47d9649266a211e,
156 0x5b13a4322bb69e90, 0xf7669609f8b5fc3c, 0x21e6ac55bedcdac9, 0x9b56b62b61166dea,
157 0xf48f66b939797e9c, 0x35f332f9c0e6ae9a, 0xcc733f6a9a878db0, 0x3da161e41cc108c2,
158 0xb7d74ae535914d51, 0x4d493b0b11d36469, 0xce264d1dfba9741a, 0xa9d1f2dc7436dc06,
159 0x70738016604c2a27, 0x231d36e96e93f3d5, 0x7666881197838d19, 0x4a2a83090aaad40c,
160 0xf1e761591668b35d, 0x7363236497f730a7, 0x301080e37379dd4d, 0x502dea2971827042,
161 0xc2c5eb858f32625f, 0x786afb9edfafbdff, 0xdaee0d868490b2a4, 0x617366b3268609f6,
162 0xae0e35a0fe46173e, 0xd1a07de93e824f11, 0x079b8b115ea4cca8, 0x93a99274558faebb,
163 0xfb1e6e22e08a03b3, 0xea635fdba3698dd0, 0xcf53659328503a5c, 0xcde3b31e6fd5d780,
164 0x8e3e4221d3614413, 0xef14d0d86bf1a22c, 0xe1d830d3f16c5ddb, 0xaabd2b2a451504e1
165];
166
167#[rustfmt::skip]
174const GEAR_LS: [u64; 256] = [
175 0x76ba78fa40fc6fb8, 0xf09ad1752224610c, 0x9aa5101f105ce530, 0xd59f1c9c33fb994e,
176 0x863e70bbf7a2c656, 0x3abe4e003c4b57cc, 0x062617bc7935b322, 0x89644acedd36ec92,
177 0x54653653c11d6932, 0x6cff97a43caefab0, 0x004f7555b4559ed6, 0xc7de5ab58e78444c,
178 0x1121e49adda6256e, 0x5013c06d0a3af8fc, 0xe14dfcbc0027b036, 0x3a04c6088a59d828,
179 0x070c8c64c91c491e, 0x9b559e7b9b257368, 0xebc025cc7830f0ac, 0x10c5f3a70438016c,
180 0x505ee670ea1edf14, 0x3cb07b8d839616de, 0xf4628b6d2e874fe2, 0x57641fdc80900fd6,
181 0x629679fc0f7074ba, 0x73b84f1315b7341e, 0x6e07ebd23754c57c, 0x9e1770cd02befb30,
182 0x7b30cf883d53b9a6, 0x37c3f4ca8857e458, 0x28601b498aac63b2, 0xcd31d3978ca8b932,
183 0x8ec620fd8c9d254a, 0x8cb043f8cad2d448, 0xec32d80c9045e16e, 0x90b7d083e6a4bc02,
184 0xeca579390b2e9fea, 0x95b06a5f59d3c7d2, 0x54dda3b9d66bd31c, 0x8de90775b822d01e,
185 0x79fb182fd39e25e2, 0x137078bc5d4ac8e2, 0x5cccb9fa49c72552, 0xd86789ca0997122a,
186 0x7f7362bf93fce8a2, 0xaffa3fa328be432a, 0x635bc10a6887dfb0, 0x4abdf930c7c3c5a4,
187 0x21d56c011aac859e, 0x8de090c64af59008, 0x4a5b8854f1344fa6, 0xb555bf39cef5eaca,
188 0xd68f39aa7b0ffd76, 0xc613c5a7f27b6e5e, 0x8ae71ff7543ff3ac, 0xd7aafe7e4b5ef2d0,
189 0xcec0a90db21a1494, 0xc29a172cc77f7b5c, 0x6f77b1b02dd60828, 0xbdf149e2d66b422c,
190 0xcf265b0b555ffdac, 0x102c3975d219fa90, 0x0aaa0f7d6529e116, 0x22469d4dffa73364,
191 0x5ae19e96486be604, 0xa51352eacb785a4e, 0x1cab086fff9533bc, 0x2da4e096e22b8080,
192 0x1113779bf8cc1c82, 0xbc1a9ccfb924251a, 0xe553f122e0c7db2e, 0x8716d3a813c02dc6,
193 0xbe9fdb14bb14872e, 0x01e83b9e83a807ee, 0x9029d6071ca4c07e, 0x3b5f7598b1c5aca2,
194 0xfc5e8b1c97c2e15e, 0x88afd8829bed5280, 0x0dcc5e28a2246628, 0x7a2029a2e7752598,
195 0xbde631c4bdaaeec0, 0x3fd41bd3bf950a4a, 0x8b3bc3ced840c496, 0x5dd8312c2fc5accc,
196 0x24d4580d56b50796, 0x62642a646c1ec264, 0xca842a07b7680246, 0x5acf850fd4113566,
197 0xd9277feb4ad7ad6c, 0x9ff6406d956db31a, 0x9cf6f0b637cf5a9e, 0xdb685dec313fa2c6,
198 0xb920a510e07311ec, 0x6cbf383a58d23108, 0x8c80b06d01b337fc, 0x79a8c4980eb27d8c,
199 0xfe3d51b0baf8b00a, 0x029085a9016ae292, 0x16c93796b5050d10, 0x79aae11daf3631e0,
200 0xd30f90c5f7a5e5e0, 0x304e62ce3e19b058, 0x75e27d162db180c6, 0x1d4621397b2a3774,
201 0xa28208b7f670b95a, 0x559783415e3efa60, 0xcc889b13be077fbe, 0x43198ee370311ff2,
202 0x3068853b60387376, 0x4295ba0ffc10d43e, 0x1e0f83363ed67ff2, 0xad452f637e9ffcaa,
203 0x29aab1c9278a9f8e, 0x817f8498ec8aa596, 0x2634e0df150a4196, 0x64453a64526b7aa4,
204 0x4ac1a1ebb89fdf5e, 0x3b798906ab2d376c, 0x1fb038730b816ad4, 0xc0702fc2ac1e57b4,
205 0x83769f03b12565aa, 0x61890c9e9c51a5ae, 0x7d9893f3b3ad84c6, 0xa260fd336a574cbc,
206 0x15e56d11b5094ea4, 0xebae4a477236416c, 0xdb2bfe3fe8c6900c, 0xac5e42aaa8b06734,
207 0x819c8ff11266c68c, 0x90f047ca113681b0, 0xc8e4f8fd78db2b24, 0xb51ee4efd292e174,
208 0xf945e80d639176a0, 0xb63f152be2f220e0, 0xa6095f3f92050c0a, 0xa88156ff9261ce90,
209 0x625a4bf794556b42, 0x21e949646949aaea, 0x20603aaa08fce8e6, 0x76c6e510d8c2b23c,
210 0x5268056ef8889c0c, 0x8a3e6949a7d2fbae, 0x62b1b029b0378af6, 0x06928484b737b4d2,
211 0xc4065ff3ca65b376, 0xc55c0cd71642f3ca, 0x2a8bc2185f1ae3b0, 0xffee907d65a47f80,
212 0x0128bf9d7b931b0c, 0x0ec9777d3364d944, 0x363d8c4509817f86, 0xb1c1f989e1546c56,
213 0xbe957df50f1a8b1a, 0xfae9585f2c0f8a32, 0x49c7f66fbf197f52, 0x7ed2fc87958ae9ca,
214 0x0de2947ed1e99aa6, 0x49447a0ede39ca44, 0xce4b9b00910d1990, 0x7e78e53d648c86c4,
215 0xb1ed9aaf67983db0, 0xc653ca484aa82aee, 0xc554d115ab5c3580, 0x14484acc4d37f08a,
216 0x2d16348ea7594e96, 0xef135fdffe5cfc78, 0xd866c41276df99b6, 0x99d1ea17a5181364,
217 0x00d45b72b5d15526, 0x2eb61ac4787a3518, 0x30c0ba726a6718b6, 0xb76dec26d95a78e2,
218 0x1ea7440e03f1b14c, 0x5718b5a5cfd278ce, 0x816b58a24f595452, 0x18f7ec7840eb12be,
219 0xf17b3efc029500b8, 0x6593d3e9f3918064, 0xdfac09304fd723e6, 0x57c8b3e90582df7a,
220 0xb259c18ae8b55518, 0x15551f6531b2cb72, 0x566ff258d900762a, 0x18a94bd29c1e1cf0,
221 0x2bf36dd218146064, 0xcf273f5486d8f0e8, 0xa2d7fd1ed5148192, 0x8930570c4c7fa5f4,
222 0xc50bf673f309cb06, 0xef351bee5aec33a6, 0xe5af351bd1abba3c, 0xa206e6a9accd09c4,
223 0x00990549ccd151ca, 0x63a814ecd16089cc, 0xae0af0a717a05822, 0xb68a8620f18be904,
224 0x2ee24376fed4a35a, 0xe7ab997a69dff1ba, 0xc86f40fa6adc2f9a, 0x8f64f0408792ac4e,
225 0x3f64a2827c83a934, 0x99ae16c0ca4a27a6, 0x392b663d14369364, 0x95ce7bfa37969836,
226 0x69b3066363eb6e1e, 0xf09c73e44671b25e, 0x30c27a940c9be840, 0xe3b1b5c4be179d7c,
227 0x67eef82b5d0abdf8, 0x7911677225d62138, 0x2ad45d92d75fdd4a, 0x35400b6bc15a1d0e,
228 0xaa01ae0a4f89771c, 0xc6d8ae32c8439888, 0x2789a50d986ddc72, 0xaca9447b03165502,
229 0xef63b827a2c357b8, 0xe69e89bcbf1abd6a, 0xc0e2fc2e94d91344, 0xa8fb2c924cd4423c,
230 0xb6274864576d3d20, 0xeecd2c13f16bf878, 0x43cd58ab7db9b592, 0x36ad6c56c22cdbd4,
231 0xe91ecd7272f2fd38, 0x6be665f381cd5d34, 0x98e67ed5350f1b60, 0x7b42c3c839821184,
232 0x6fae95ca6b229aa2, 0x9a92761623a6c8d2, 0x9c4c9a3bf752e834, 0x53a3e5b8e86db80c,
233 0xe0e7002cc098544e, 0x463a6dd2dd27e7aa, 0xeccd10232f071a32, 0x945506121555a818,
234 0xe3cec2b22cd166ba, 0xe6c646c92fee614e, 0x602101c6e6f3ba9a, 0xa05bd452e304e084,
235 0x858bd70b1e64c4be, 0xf0d5f73dbf5f7bfe, 0xb5dc1b0d09216548, 0xc2e6cd664d0c13ec,
236 0x5c1c6b41fc8c2e7c, 0xa340fbd27d049e22, 0x0f371622bd499950, 0x275324e8ab1f5d76,
237 0xf63cdc45c1140766, 0xd4c6bfb746d31ba0, 0x9ea6cb2650a074b8, 0x9bc7663cdfabaf00,
238 0x1c7c8443a6c28826, 0xde29a1b0d7e34458, 0xc3b061a7e2d8bbb6, 0x557a56548a2a09c2
239];
240
241#[allow(clippy::too_many_arguments)]
243pub(self) fn cut(
244 source: &[u8],
245 min_size: usize,
246 avg_size: usize,
247 max_size: usize,
248 mask_s: u64,
249 mask_l: u64,
250 mask_s_ls: u64,
251 mask_l_ls: u64,
252) -> (u64, usize) {
253 let mut remaining = source.len();
254 if remaining <= min_size {
255 return (0, remaining);
256 }
257 let mut center = avg_size;
258 if remaining > max_size {
259 remaining = max_size;
260 } else if remaining < center {
261 center = remaining;
262 }
263 let mut index = min_size / 2;
264 let mut hash: u64 = 0;
265 while index < center / 2 {
266 let a = index * 2;
267 hash = (hash << 2).wrapping_add(GEAR_LS[source[a] as usize]);
268 if (hash & mask_s_ls) == 0 {
269 return (hash, a);
270 }
271 hash = hash.wrapping_add(GEAR[source[a + 1] as usize]);
272 if (hash & mask_s) == 0 {
273 return (hash, a + 1);
274 }
275 index += 1;
276 }
277 while index < remaining / 2 {
278 let a = index * 2;
279 hash = (hash << 2).wrapping_add(GEAR_LS[source[a] as usize]);
280 if (hash & mask_l_ls) == 0 {
281 return (hash, a);
282 }
283 hash = hash.wrapping_add(GEAR[source[a + 1] as usize]);
284 if (hash & mask_l) == 0 {
285 return (hash, a + 1);
286 }
287 index += 1;
288 }
289 (hash, remaining)
292}
293
294pub enum Normalization {
310 Level0,
312 Level1,
314 Level2,
316 Level3,
318}
319
320impl Normalization {
321 pub(self) fn bits(&self) -> u32 {
322 match self {
323 Normalization::Level0 => 0,
324 Normalization::Level1 => 1,
325 Normalization::Level2 => 2,
326 Normalization::Level3 => 3,
327 }
328 }
329}
330
331#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
335pub struct Chunk {
336 pub hash: u64,
338 pub offset: usize,
340 pub length: usize,
342}
343
344#[derive(Debug, Clone, Eq, PartialEq)]
369pub struct FastCDC<'a> {
370 source: &'a [u8],
371 processed: usize,
372 remaining: usize,
373 min_size: usize,
374 avg_size: usize,
375 max_size: usize,
376 mask_s: u64,
377 mask_l: u64,
378 mask_s_ls: u64,
379 mask_l_ls: u64,
380}
381
382impl<'a> FastCDC<'a> {
383 pub fn new(source: &'a [u8], min_size: u32, avg_size: u32, max_size: u32) -> Self {
389 FastCDC::with_level(source, min_size, avg_size, max_size, Normalization::Level1)
390 }
391
392 pub fn with_level(
396 source: &'a [u8],
397 min_size: u32,
398 avg_size: u32,
399 max_size: u32,
400 level: Normalization,
401 ) -> Self {
402 assert!(min_size >= MINIMUM_MIN);
403 assert!(min_size <= MINIMUM_MAX);
404 assert!(avg_size >= AVERAGE_MIN);
405 assert!(avg_size <= AVERAGE_MAX);
406 assert!(max_size >= MAXIMUM_MIN);
407 assert!(max_size <= MAXIMUM_MAX);
408 let bits = logarithm2(avg_size);
409 let normalization = level.bits();
410 let mask_s = MASKS[(bits + normalization) as usize];
411 let mask_l = MASKS[(bits - normalization) as usize];
412 Self {
413 source,
414 processed: 0,
415 remaining: source.len(),
416 min_size: min_size as usize,
417 avg_size: avg_size as usize,
418 max_size: max_size as usize,
419 mask_s,
420 mask_l,
421 mask_s_ls: mask_s << 1,
422 mask_l_ls: mask_l << 1,
423 }
424 }
425
426 pub fn cut(&self, start: usize, remaining: usize) -> (u64, usize) {
440 let end = start + remaining;
441 let (hash, count) = cut(
442 &self.source[start..end],
443 self.min_size,
444 self.avg_size,
445 self.max_size,
446 self.mask_s,
447 self.mask_l,
448 self.mask_s_ls,
449 self.mask_l_ls,
450 );
451 (hash, start + count)
452 }
453}
454
455impl<'a> Iterator for FastCDC<'a> {
456 type Item = Chunk;
457
458 fn next(&mut self) -> Option<Chunk> {
459 if self.remaining == 0 {
460 None
461 } else {
462 let (hash, cutpoint) = self.cut(self.processed, self.remaining);
463 if cutpoint == 0 {
464 None
465 } else {
466 let offset = self.processed;
467 let length = cutpoint - offset;
468 self.processed += length;
469 self.remaining -= length;
470 Some(Chunk {
471 hash,
472 offset,
473 length,
474 })
475 }
476 }
477 }
478
479 fn size_hint(&self) -> (usize, Option<usize>) {
480 let upper_bound = self.remaining / self.min_size;
485 (upper_bound, Some(upper_bound))
486 }
487}
488
489#[derive(Debug)]
493pub enum Error {
494 Empty,
496 IoError(std::io::Error),
498 Other(String),
500}
501
502impl fmt::Display for Error {
503 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
504 write!(f, "chunker error: {self:?}")
505 }
506}
507
508impl std::error::Error for Error {}
509
510impl From<std::io::Error> for Error {
511 fn from(error: std::io::Error) -> Self {
512 Error::IoError(error)
513 }
514}
515
516impl From<Error> for std::io::Error {
517 fn from(error: Error) -> Self {
518 match error {
519 Error::IoError(ioerr) => ioerr,
520 Error::Empty => Self::from(std::io::ErrorKind::UnexpectedEof),
521 Error::Other(str) => Self::new(std::io::ErrorKind::Other, str),
522 }
523 }
524}
525
526#[derive(Debug, Clone, Eq, PartialEq, Hash)]
530pub struct ChunkData {
531 pub hash: u64,
533 pub offset: u64,
535 pub length: usize,
537 pub data: Vec<u8>,
539}
540
541pub struct StreamCDC<R: Read> {
562 buffer: Vec<u8>,
564 capacity: usize,
566 length: usize,
568 source: R,
570 processed: u64,
572 eof: bool,
574 min_size: usize,
575 avg_size: usize,
576 max_size: usize,
577 mask_s: u64,
578 mask_l: u64,
579 mask_s_ls: u64,
580 mask_l_ls: u64,
581}
582
583impl<R: Read> StreamCDC<R> {
584 pub fn new(source: R, min_size: u32, avg_size: u32, max_size: u32) -> Self {
590 StreamCDC::with_level(source, min_size, avg_size, max_size, Normalization::Level1)
591 }
592
593 pub fn with_level(
597 source: R,
598 min_size: u32,
599 avg_size: u32,
600 max_size: u32,
601 level: Normalization,
602 ) -> Self {
603 assert!(min_size >= MINIMUM_MIN);
604 assert!(min_size <= MINIMUM_MAX);
605 assert!(avg_size >= AVERAGE_MIN);
606 assert!(avg_size <= AVERAGE_MAX);
607 assert!(max_size >= MAXIMUM_MIN);
608 assert!(max_size <= MAXIMUM_MAX);
609 let bits = logarithm2(avg_size);
610 let normalization = level.bits();
611 let mask_s = MASKS[(bits + normalization) as usize];
612 let mask_l = MASKS[(bits - normalization) as usize];
613 Self {
614 buffer: vec![0_u8; max_size as usize],
615 capacity: max_size as usize,
616 length: 0,
617 source,
618 eof: false,
619 processed: 0,
620 min_size: min_size as usize,
621 avg_size: avg_size as usize,
622 max_size: max_size as usize,
623 mask_s,
624 mask_l,
625 mask_s_ls: mask_s << 1,
626 mask_l_ls: mask_l << 1,
627 }
628 }
629
630 fn fill_buffer(&mut self) -> Result<usize, Error> {
633 if self.eof {
635 Ok(0)
636 } else {
637 let mut all_bytes_read = 0;
638 while !self.eof && self.length < self.capacity {
639 let bytes_read = self.source.read(&mut self.buffer[self.length..])?;
640 if bytes_read == 0 {
641 self.eof = true;
642 } else {
643 self.length += bytes_read;
644 all_bytes_read += bytes_read;
645 }
646 }
647 Ok(all_bytes_read)
648 }
649 }
650
651 fn drain_bytes(&mut self, count: usize) -> Result<Vec<u8>, Error> {
654 if count > self.length {
656 Err(Error::Other(format!(
657 "drain_bytes() called with count larger than length: {} > {}",
658 count, self.length
659 )))
660 } else {
661 let data = self.buffer.drain(..count).collect::<Vec<u8>>();
662 self.length -= count;
663 self.buffer.resize(self.capacity, 0_u8);
664 Ok(data)
665 }
666 }
667
668 fn read_chunk(&mut self) -> Result<ChunkData, Error> {
671 self.fill_buffer()?;
672 if self.length == 0 {
673 Err(Error::Empty)
674 } else {
675 let (hash, count) = cut(
676 &self.buffer[..self.length],
677 self.min_size,
678 self.avg_size,
679 self.max_size,
680 self.mask_s,
681 self.mask_l,
682 self.mask_s_ls,
683 self.mask_l_ls,
684 );
685 if count == 0 {
686 Err(Error::Empty)
687 } else {
688 let offset = self.processed;
689 self.processed += count as u64;
690 let data = self.drain_bytes(count)?;
691 Ok(ChunkData {
692 hash,
693 offset,
694 length: count,
695 data,
696 })
697 }
698 }
699 }
700}
701
702impl<R: Read> Iterator for StreamCDC<R> {
703 type Item = Result<ChunkData, Error>;
704
705 fn next(&mut self) -> Option<Result<ChunkData, Error>> {
706 let slice = self.read_chunk();
707 if let Err(Error::Empty) = slice {
708 None
709 } else {
710 Some(slice)
711 }
712 }
713}
714
715pub(self) fn logarithm2(value: u32) -> u32 {
719 f64::from(value).log2().round() as u32
720}
721
722#[cfg(test)]
723mod tests {
724 use super::*;
725 use md5::{Digest, Md5};
726 use std::fs::{self, File};
727
728 #[test]
729 fn test_logarithm2() {
730 assert_eq!(logarithm2(0), 0);
731 assert_eq!(logarithm2(1), 0);
732 assert_eq!(logarithm2(2), 1);
733 assert_eq!(logarithm2(3), 2);
734 assert_eq!(logarithm2(5), 2);
735 assert_eq!(logarithm2(6), 3);
736 assert_eq!(logarithm2(11), 3);
737 assert_eq!(logarithm2(12), 4);
738 assert_eq!(logarithm2(19), 4);
739 assert_eq!(logarithm2(64), 6);
740 assert_eq!(logarithm2(128), 7);
741 assert_eq!(logarithm2(256), 8);
742 assert_eq!(logarithm2(512), 9);
743 assert_eq!(logarithm2(1024), 10);
744 assert_eq!(logarithm2(16383), 14);
745 assert_eq!(logarithm2(16384), 14);
746 assert_eq!(logarithm2(16385), 14);
747 assert_eq!(logarithm2(32767), 15);
748 assert_eq!(logarithm2(32768), 15);
749 assert_eq!(logarithm2(32769), 15);
750 assert_eq!(logarithm2(65535), 16);
751 assert_eq!(logarithm2(65536), 16);
752 assert_eq!(logarithm2(65537), 16);
753 assert_eq!(logarithm2(1_048_575), 20);
754 assert_eq!(logarithm2(1_048_576), 20);
755 assert_eq!(logarithm2(1_048_577), 20);
756 assert_eq!(logarithm2(4_194_303), 22);
757 assert_eq!(logarithm2(4_194_304), 22);
758 assert_eq!(logarithm2(4_194_305), 22);
759 assert_eq!(logarithm2(16_777_215), 24);
760 assert_eq!(logarithm2(16_777_216), 24);
761 assert_eq!(logarithm2(16_777_217), 24);
762 }
763
764 #[test]
765 #[should_panic]
766 fn test_minimum_too_low() {
767 let array = [0u8; 1024];
768 FastCDC::new(&array, 63, 256, 1024);
769 }
770
771 #[test]
772 #[should_panic]
773 fn test_minimum_too_high() {
774 let array = [0u8; 1024];
775 FastCDC::new(&array, 67_108_867, 256, 1024);
776 }
777
778 #[test]
779 #[should_panic]
780 fn test_average_too_low() {
781 let array = [0u8; 1024];
782 FastCDC::new(&array, 64, 255, 1024);
783 }
784
785 #[test]
786 #[should_panic]
787 fn test_average_too_high() {
788 let array = [0u8; 1024];
789 FastCDC::new(&array, 64, 268_435_457, 1024);
790 }
791
792 #[test]
793 #[should_panic]
794 fn test_maximum_too_low() {
795 let array = [0u8; 1024];
796 FastCDC::new(&array, 64, 256, 1023);
797 }
798
799 #[test]
800 #[should_panic]
801 fn test_maximum_too_high() {
802 let array = [0u8; 1024];
803 FastCDC::new(&array, 64, 256, 1_073_741_825);
804 }
805
806 #[test]
807 fn test_masks() {
808 let source = [0u8; 1024];
809 let chunker = FastCDC::new(&source, 64, 256, 1024);
810 assert_eq!(chunker.mask_l, MASKS[7]);
811 assert_eq!(chunker.mask_s, MASKS[9]);
812 let chunker = FastCDC::new(&source, 8192, 16384, 32768);
813 assert_eq!(chunker.mask_l, MASKS[13]);
814 assert_eq!(chunker.mask_s, MASKS[15]);
815 let chunker = FastCDC::new(&source, 1_048_576, 4_194_304, 16_777_216);
816 assert_eq!(chunker.mask_l, MASKS[21]);
817 assert_eq!(chunker.mask_s, MASKS[23]);
818 }
819
820 #[test]
821 fn test_cut_all_zeros() {
822 let array = [0u8; 10240];
824 let chunker = FastCDC::new(&array, 64, 256, 1024);
825 let mut cursor: usize = 0;
826 for _ in 0..10 {
827 let (hash, pos) = chunker.cut(cursor, 10240 - cursor);
828 assert_eq!(hash, 14169102344523991076);
829 assert_eq!(pos, cursor + 1024);
830 cursor = pos;
831 }
832 let (_, pos) = chunker.cut(cursor, 10240 - cursor);
834 assert_eq!(pos, 10240);
835 }
836
837 #[test]
838 fn test_cut_sekien_16k_chunks() {
839 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
840 assert!(read_result.is_ok());
841 let contents = read_result.unwrap();
842 let chunker = FastCDC::new(&contents, 4096, 16384, 65535);
843 let mut cursor: usize = 0;
844 let mut remaining: usize = contents.len();
845 let expected: Vec<(u64, usize)> = vec![
846 (17968276318003433923, 21325),
847 (8197189939299398838, 17140),
848 (13019990849178155730, 28084),
849 (4509236223063678303, 18217),
850 (2504464741100432583, 24700),
851 ];
852 for (e_hash, e_length) in expected.iter() {
853 let (hash, pos) = chunker.cut(cursor, remaining);
854 assert_eq!(hash, *e_hash);
855 assert_eq!(pos, cursor + e_length);
856 cursor = pos;
857 remaining -= e_length;
858 }
859 assert_eq!(remaining, 0);
860 }
861
862 #[test]
863 fn test_cut_sekien_32k_chunks() {
864 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
865 assert!(read_result.is_ok());
866 let contents = read_result.unwrap();
867 let chunker = FastCDC::new(&contents, 8192, 32768, 131072);
868 let mut cursor: usize = 0;
869 let mut remaining: usize = contents.len();
870 let expected: Vec<(u64, usize)> =
871 vec![(15733367461443853673, 66549), (6321136627705800457, 42917)];
872 for (e_hash, e_length) in expected.iter() {
873 let (hash, pos) = chunker.cut(cursor, remaining);
874 assert_eq!(hash, *e_hash);
875 assert_eq!(pos, cursor + e_length);
876 cursor = pos;
877 remaining -= e_length;
878 }
879 assert_eq!(remaining, 0);
880 }
881
882 #[test]
883 fn test_cut_sekien_64k_chunks() {
884 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
885 assert!(read_result.is_ok());
886 let contents = read_result.unwrap();
887 let chunker = FastCDC::new(&contents, 16384, 65536, 262144);
888 let mut cursor: usize = 0;
889 let mut remaining: usize = contents.len();
890 let expected: Vec<(u64, usize)> = vec![(2504464741100432583, 109466)];
891 for (e_hash, e_length) in expected.iter() {
892 let (hash, pos) = chunker.cut(cursor, remaining);
893 assert_eq!(hash, *e_hash);
894 assert_eq!(pos, cursor + e_length);
895 cursor = pos;
896 remaining -= e_length;
897 }
898 assert_eq!(remaining, 0);
899 }
900
901 struct ExpectedChunk {
902 hash: u64,
903 offset: u64,
904 length: usize,
905 digest: String,
906 }
907
908 #[test]
909 fn test_iter_sekien_16k_chunks() {
910 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
911 assert!(read_result.is_ok());
912 let contents = read_result.unwrap();
913 let expected_chunks = vec![
917 ExpectedChunk {
918 hash: 17968276318003433923,
919 offset: 0,
920 length: 21325,
921 digest: "2bb52734718194617c957f5e07ee6054".into(),
922 },
923 ExpectedChunk {
924 hash: 8197189939299398838,
925 offset: 21325,
926 length: 17140,
927 digest: "badfb0757fe081c20336902e7131f768".into(),
928 },
929 ExpectedChunk {
930 hash: 13019990849178155730,
931 offset: 38465,
932 length: 28084,
933 digest: "18412d7414de6eb42f638351711f729d".into(),
934 },
935 ExpectedChunk {
936 hash: 4509236223063678303,
937 offset: 66549,
938 length: 18217,
939 digest: "04fe1405fc5f960363bfcd834c056407".into(),
940 },
941 ExpectedChunk {
942 hash: 2504464741100432583,
943 offset: 84766,
944 length: 24700,
945 digest: "1aa7ad95f274d6ba34a983946ebc5af3".into(),
946 },
947 ];
948 let chunker = FastCDC::new(&contents, 4096, 16384, 65535);
949 let mut index = 0;
950 for chunk in chunker {
951 assert_eq!(chunk.hash, expected_chunks[index].hash);
952 assert_eq!(chunk.offset, expected_chunks[index].offset as usize);
953 assert_eq!(chunk.length, expected_chunks[index].length);
954 let mut hasher = Md5::new();
955 hasher.update(&contents[chunk.offset..chunk.offset + chunk.length]);
956 let table = hasher.finalize();
957 let digest = format!("{:x}", table);
958 assert_eq!(digest, expected_chunks[index].digest);
959 index += 1;
960 }
961 assert_eq!(index, 5);
962 }
963
964 #[test]
965 fn test_cut_sekien_16k_nc_0() {
966 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
967 assert!(read_result.is_ok());
968 let contents = read_result.unwrap();
969 let chunker = FastCDC::with_level(&contents, 4096, 16384, 65535, Normalization::Level0);
970 let mut cursor: usize = 0;
971 let mut remaining: usize = contents.len();
972 let expected: Vec<(u64, usize)> = vec![
973 (443122261039895162, 6634),
974 (15733367461443853673, 59915),
975 (10460176299449652894, 25597),
976 (6197802202431009942, 5237),
977 (6321136627705800457, 12083),
978 ];
979 for (e_hash, e_length) in expected.iter() {
980 let (hash, pos) = chunker.cut(cursor, remaining);
981 assert_eq!(hash, *e_hash);
982 assert_eq!(pos, cursor + e_length);
983 cursor = pos;
984 remaining -= e_length;
985 }
986 assert_eq!(remaining, 0);
987 }
988
989 #[test]
990 fn test_cut_sekien_16k_nc_3() {
991 let read_result = fs::read("test/fixtures/SekienAkashita.jpg");
992 assert!(read_result.is_ok());
993 let contents = read_result.unwrap();
994 let chunker = FastCDC::with_level(&contents, 8192, 16384, 32768, Normalization::Level3);
995 let mut cursor: usize = 0;
996 let mut remaining: usize = contents.len();
997 let expected: Vec<(u64, usize)> = vec![
998 (10718006254707412376, 17350),
999 (13104072099671895560, 19911),
1000 (12322483109039221194, 17426),
1001 (16009206469796846404, 17519),
1002 (2473608525189754172, 19940),
1003 (2504464741100432583, 17320),
1004 ];
1005 for (e_hash, e_length) in expected.iter() {
1006 let (hash, pos) = chunker.cut(cursor, remaining);
1007 assert_eq!(hash, *e_hash);
1008 assert_eq!(pos, cursor + e_length);
1009 cursor = pos;
1010 remaining -= e_length;
1011 }
1012 assert_eq!(remaining, 0);
1013 }
1014
1015 #[test]
1016 fn test_error_fmt() {
1017 let err = Error::Empty;
1018 assert_eq!(format!("{err}"), "chunker error: Empty");
1019 }
1020
1021 #[test]
1022 fn test_stream_sekien_16k_chunks() {
1023 let file_result = File::open("test/fixtures/SekienAkashita.jpg");
1024 assert!(file_result.is_ok());
1025 let file = file_result.unwrap();
1026 let expected_chunks = vec![
1028 ExpectedChunk {
1029 hash: 17968276318003433923,
1030 offset: 0,
1031 length: 21325,
1032 digest: "2bb52734718194617c957f5e07ee6054".into(),
1033 },
1034 ExpectedChunk {
1035 hash: 8197189939299398838,
1036 offset: 21325,
1037 length: 17140,
1038 digest: "badfb0757fe081c20336902e7131f768".into(),
1039 },
1040 ExpectedChunk {
1041 hash: 13019990849178155730,
1042 offset: 38465,
1043 length: 28084,
1044 digest: "18412d7414de6eb42f638351711f729d".into(),
1045 },
1046 ExpectedChunk {
1047 hash: 4509236223063678303,
1048 offset: 66549,
1049 length: 18217,
1050 digest: "04fe1405fc5f960363bfcd834c056407".into(),
1051 },
1052 ExpectedChunk {
1053 hash: 2504464741100432583,
1054 offset: 84766,
1055 length: 24700,
1056 digest: "1aa7ad95f274d6ba34a983946ebc5af3".into(),
1057 },
1058 ];
1059 let chunker = StreamCDC::new(file, 4096, 16384, 65535);
1060 let mut index = 0;
1061 for result in chunker {
1062 assert!(result.is_ok());
1063 let chunk = result.unwrap();
1064 assert_eq!(chunk.hash, expected_chunks[index].hash);
1065 assert_eq!(chunk.offset, expected_chunks[index].offset);
1066 assert_eq!(chunk.length, expected_chunks[index].length);
1067 let mut hasher = Md5::new();
1068 hasher.update(&chunk.data);
1069 let table = hasher.finalize();
1070 let digest = format!("{:x}", table);
1071 assert_eq!(digest, expected_chunks[index].digest);
1072 index += 1;
1073 }
1074 assert_eq!(index, 5);
1075 }
1076}