1use pin_project::pin_project;
10use std::collections::BTreeSet;
11use std::pin::Pin;
12use std::sync::Arc;
13use std::sync::atomic::{AtomicBool, Ordering};
14use std::task::{Poll, ready};
15use tokio::io::{AsyncBufRead, AsyncRead, ReadBuf};
16use wu_manber::TwoByteWM;
17
18pub struct ReferencePatternInner<P> {
23 candidates: Vec<P>,
24 longest_candidate: usize,
25 searcher: Option<TwoByteWM>,
27}
28
29#[derive(Clone)]
30pub struct ReferencePattern<P> {
31 inner: Arc<ReferencePatternInner<P>>,
32}
33
34impl<P> ReferencePattern<P> {
35 pub fn candidates(&self) -> &[P] {
36 &self.inner.candidates
37 }
38
39 pub fn longest_candidate(&self) -> usize {
40 self.inner.longest_candidate
41 }
42}
43
44impl<P: AsRef<[u8]>> ReferencePattern<P> {
45 pub fn new(candidates: Vec<P>) -> Self {
48 let searcher = if candidates.is_empty() {
49 None
50 } else {
51 Some(TwoByteWM::new(&candidates))
52 };
53 let longest_candidate = candidates.iter().fold(0, |v, c| v.max(c.as_ref().len()));
54
55 ReferencePattern {
56 inner: Arc::new(ReferencePatternInner {
57 searcher,
58 candidates,
59 longest_candidate,
60 }),
61 }
62 }
63}
64
65impl<P> From<Vec<P>> for ReferencePattern<P>
66where
67 P: AsRef<[u8]>,
68{
69 fn from(candidates: Vec<P>) -> Self {
70 Self::new(candidates)
71 }
72}
73
74pub struct ReferenceScanner<P> {
77 pattern: ReferencePattern<P>,
78 matches: Vec<AtomicBool>,
79}
80
81impl<P: AsRef<[u8]>> ReferenceScanner<P> {
82 pub fn new<IP: Into<ReferencePattern<P>>>(pattern: IP) -> Self {
85 let pattern = pattern.into();
86 let mut matches = Vec::new();
87 for _ in 0..pattern.candidates().len() {
88 matches.push(AtomicBool::new(false));
89 }
90 ReferenceScanner { pattern, matches }
91 }
92
93 pub fn scan<S: AsRef<[u8]>>(&self, haystack: S) {
96 if haystack.as_ref().len() < self.pattern.longest_candidate() {
97 return;
98 }
99
100 if let Some(searcher) = &self.pattern.inner.searcher {
101 for m in searcher.find(haystack) {
102 self.matches[m.pat_idx].store(true, Ordering::Release);
103 }
104 }
105 }
106
107 pub fn pattern(&self) -> &ReferencePattern<P> {
108 &self.pattern
109 }
110
111 pub fn matches(&self) -> Vec<bool> {
112 self.matches
113 .iter()
114 .map(|m| m.load(Ordering::Acquire))
115 .collect()
116 }
117
118 pub fn candidate_matches(&self) -> impl Iterator<Item = &P> {
119 let candidates = self.pattern.candidates();
120 self.matches.iter().enumerate().filter_map(|(idx, found)| {
121 if found.load(Ordering::Acquire) {
122 Some(&candidates[idx])
123 } else {
124 None
125 }
126 })
127 }
128}
129
130impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> {
131 pub fn finalise(self) -> BTreeSet<P> {
133 self.candidate_matches().cloned().collect()
134 }
135}
136
137const DEFAULT_BUF_SIZE: usize = 8 * 1024;
138
139#[pin_project]
140pub struct ReferenceReader<'a, P, R> {
141 scanner: &'a ReferenceScanner<P>,
142 buffer: Vec<u8>,
143 consumed: usize,
144 #[pin]
145 reader: R,
146}
147
148impl<'a, P, R> ReferenceReader<'a, P, R>
149where
150 P: AsRef<[u8]>,
151{
152 pub fn new(scanner: &'a ReferenceScanner<P>, reader: R) -> Self {
153 Self::with_capacity(DEFAULT_BUF_SIZE, scanner, reader)
154 }
155
156 pub fn with_capacity(capacity: usize, scanner: &'a ReferenceScanner<P>, reader: R) -> Self {
157 let capacity = capacity.max(scanner.pattern().longest_candidate());
159 ReferenceReader {
160 scanner,
161 buffer: Vec::with_capacity(capacity),
162 consumed: 0,
163 reader,
164 }
165 }
166}
167
168impl<P, R> AsyncRead for ReferenceReader<'_, P, R>
169where
170 R: AsyncRead,
171 P: AsRef<[u8]>,
172{
173 fn poll_read(
174 mut self: Pin<&mut Self>,
175 cx: &mut std::task::Context<'_>,
176 buf: &mut tokio::io::ReadBuf<'_>,
177 ) -> Poll<std::io::Result<()>> {
178 let internal_buf = ready!(self.as_mut().poll_fill_buf(cx))?;
179 let amt = buf.remaining().min(internal_buf.len());
180 buf.put_slice(&internal_buf[..amt]);
181 self.consume(amt);
182 Poll::Ready(Ok(()))
183 }
184}
185
186impl<P, R> AsyncBufRead for ReferenceReader<'_, P, R>
187where
188 R: AsyncRead,
189 P: AsRef<[u8]>,
190{
191 fn poll_fill_buf(
192 self: Pin<&mut Self>,
193 cx: &mut std::task::Context<'_>,
194 ) -> Poll<std::io::Result<&[u8]>> {
195 #[allow(clippy::manual_saturating_arithmetic)] let overlap = self
197 .scanner
198 .pattern
199 .longest_candidate()
200 .checked_sub(1)
201 .unwrap_or(0);
204 let mut this = self.project();
205 if *this.consumed < this.buffer.len() {
207 return Poll::Ready(Ok(&this.buffer[*this.consumed..]));
208 }
209 if *this.consumed > overlap {
211 let start = this.buffer.len() - overlap;
212 this.buffer.copy_within(start.., 0);
213 this.buffer.truncate(overlap);
214 *this.consumed = overlap;
215 }
216 loop {
218 let filled = {
219 let mut buf = ReadBuf::uninit(this.buffer.spare_capacity_mut());
220 ready!(this.reader.as_mut().poll_read(cx, &mut buf))?;
221 buf.filled().len()
222 };
223 unsafe {
225 this.buffer.set_len(filled + this.buffer.len());
226 }
227 if filled == 0 || this.buffer.len() > overlap {
228 break;
229 }
230 }
231
232 #[allow(clippy::needless_borrows_for_generic_args)] this.scanner.scan(&this.buffer);
234
235 Poll::Ready(Ok(&this.buffer[*this.consumed..]))
236 }
237
238 fn consume(self: Pin<&mut Self>, amt: usize) {
239 debug_assert!(self.consumed + amt <= self.buffer.len());
240 let this = self.project();
241 *this.consumed += amt;
242 }
243}
244
245#[cfg(test)]
246mod tests {
247 use rstest::rstest;
248 use tokio::io::AsyncReadExt as _;
249 use tokio_test::io::Builder;
250
251 use super::*;
252
253 const HELLO_DRV: &str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#;
255
256 #[test]
257 fn test_no_patterns() {
258 let scanner: ReferenceScanner<String> = ReferenceScanner::new(vec![]);
259
260 scanner.scan(HELLO_DRV);
261
262 let result = scanner.finalise();
263
264 assert_eq!(result.len(), 0);
265 }
266
267 #[test]
268 fn test_single_match() {
269 let scanner = ReferenceScanner::new(vec![
270 "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(),
271 ]);
272 scanner.scan(HELLO_DRV);
273
274 let result = scanner.finalise();
275
276 assert_eq!(result.len(), 1);
277 assert!(result.contains("/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16"));
278 }
279
280 #[test]
281 fn test_multiple_matches() {
282 let candidates = vec![
283 "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(),
285 "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(),
286 "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(),
287 "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(),
289 ];
290
291 let scanner = ReferenceScanner::new(candidates.clone());
292 scanner.scan(HELLO_DRV);
293
294 let result = scanner.finalise();
295 assert_eq!(result.len(), 3);
296
297 for c in candidates[..3].iter() {
298 assert!(result.contains(c));
299 }
300 }
301
302 #[rstest]
303 #[case::normal(8096, 8096)]
304 #[case::small_capacity(8096, 1)]
305 #[case::small_read(1, 8096)]
306 #[case::all_small(1, 1)]
307 #[tokio::test]
308 async fn test_reference_reader(#[case] chunk_size: usize, #[case] capacity: usize) {
309 let candidates = vec![
310 "33l4p0pn0mybmqzaxfkpppyh7vx1c74p",
312 "pf80kikyxr63wrw56k00i1kw6ba76qik",
313 "cp65c8nk29qq5cl1wyy5qyw103cwmax7",
314 "fn7zvafq26f0c8b17brs7s95s10ibfzs",
316 ];
317 let pattern = ReferencePattern::new(candidates.clone());
318 let scanner = ReferenceScanner::new(pattern);
319 let mut mock = Builder::new();
320 for c in HELLO_DRV.as_bytes().chunks(chunk_size) {
321 mock.read(c);
322 }
323 let mock = mock.build();
324 let mut reader = ReferenceReader::with_capacity(capacity, &scanner, mock);
325 let mut s = String::new();
326 reader.read_to_string(&mut s).await.unwrap();
327 assert_eq!(s, HELLO_DRV);
328
329 let result = scanner.finalise();
330 assert_eq!(result.len(), 3);
331
332 for c in candidates[..3].iter() {
333 assert!(result.contains(c));
334 }
335 }
336
337 #[tokio::test]
338 async fn test_reference_reader_no_patterns() {
339 let pattern = ReferencePattern::new(Vec::<&str>::new());
340 let scanner = ReferenceScanner::new(pattern);
341 let mut mock = Builder::new();
342 mock.read(HELLO_DRV.as_bytes());
343 let mock = mock.build();
344 let mut reader = ReferenceReader::new(&scanner, mock);
345 let mut s = String::new();
346 reader.read_to_string(&mut s).await.unwrap();
347 assert_eq!(s, HELLO_DRV);
348
349 let result = scanner.finalise();
350 assert_eq!(result.len(), 0);
351 }
352
353 }