snix_castore/directoryservice/
directory_graph.rs

1use petgraph::{
2    graph::{DiGraph, NodeIndex},
3    visit::{Bfs, DfsPostOrder, Walker},
4};
5use std::collections::{HashMap, HashSet, hash_map};
6use tracing::{instrument, warn};
7
8use crate::directoryservice::order_validator::OrderingError;
9use crate::{B3Digest, Directory, Node};
10
11/// This represents a full (and validated) graph of [Directory] nodes.
12/// It can be constructed using [DirectoryGraphBuilder], and is normally used to
13/// receive in one or the other insertion order, validate, and then drain in
14/// Leaves-To-Root order.
15/// If you just want to validate an order without keeping the results,
16/// `RootToLeavesValidator` or `LeavesToRootValidator` can be used.
17#[derive(Default)]
18pub struct DirectoryGraph {
19    // A directed graph, using Directory as node weight.
20    // Edges point from parents to children.
21    graph: DiGraph<Directory, ()>,
22
23    // Points to the root.
24    root_idx: NodeIndex,
25}
26
27#[derive(PartialEq, Eq, Debug)]
28enum DirectoryOrder {
29    /// Start with the root.
30    /// Validates that newly received directories are already referenced from
31    /// the root via existing directories.
32    RootToLeaves,
33    /// Each directory may only refer to directories already sent previously.
34    LeavesToRoot,
35}
36
37impl DirectoryGraph {
38    /// Drains the graph, returning node weights in the chosen [DirectoryOrder].
39    fn drain(self, order: DirectoryOrder) -> impl Iterator<Item = Directory> {
40        let order = match order {
41            DirectoryOrder::RootToLeaves => {
42                // do a BFS traversal of the graph, starting with the root node
43                Bfs::new(&self.graph, self.root_idx)
44                    .iter(&self.graph)
45                    .collect::<Vec<_>>()
46            }
47            DirectoryOrder::LeavesToRoot => {
48                // do a DFS Post-Order traversal of the graph, starting with the root node
49                DfsPostOrder::new(&self.graph, self.root_idx)
50                    .iter(&self.graph)
51                    .collect::<Vec<_>>()
52            }
53        };
54
55        let (mut nodes, _edges) = self.graph.into_nodes_edges();
56        order
57            .into_iter()
58            .map(move |i| std::mem::take(&mut nodes[i.index()].weight))
59    }
60
61    /// Drains the graph in Leaves-To-Root Order.
62    #[instrument(level = "trace", skip_all)]
63    pub fn drain_leaves_to_root(self) -> impl Iterator<Item = Directory> {
64        self.drain(DirectoryOrder::LeavesToRoot)
65    }
66
67    /// Drains the graph in Root-To-Leaves Order.
68    #[instrument(level = "trace", skip_all)]
69    pub fn drain_root_to_leaves(self) -> impl Iterator<Item = Directory> {
70        self.drain(DirectoryOrder::RootToLeaves)
71    }
72
73    pub fn root(&self) -> &Directory {
74        self.graph
75            .node_weight(self.root_idx)
76            .expect("Snix bug: root not found")
77    }
78}
79
80/// This allows constructing a [DirectoryGraph].
81/// After deciding on the insertion order ([Self::new_leaves_to_root] or
82/// [Self::new_root_to_leaves] with the expected root digest passed),
83/// different [Directory] can be passed to [Self::try_insert].
84/// A [Self::build] consumes the builder, returning a validated [DirectoryGraph],
85/// or an error.
86/// The resulting [DirectoryGraph] can be used to drain the graph in
87/// Leaves-To-Root or Root-To-Leaves order.
88///
89/// It does do the same checks as `RootToLeavesValidator` and `LeavesToRootValidator`
90/// (insertion order, completeness, connectivity, correct sizes referenced).
91// NOTE: a child is always smaller than its parent
92pub struct DirectoryGraphBuilder {
93    /// The order of [Directory] elements [Self::try_insert] is called with.
94    insertion_order: DirectoryOrder,
95
96    /// A directed graph, using Directory as node weight.
97    /// Edges point from parents to children.
98    graph: DiGraph<Directory, ()>,
99
100    /// A lookup table from directory digest to node index and size.
101    /// The size is stored to avoid having to calculate it multiple times.
102    digest_to_node_idx_size: HashMap<B3Digest, (NodeIndex, u64)>,
103
104    /// A map from digest to size and all node indexes that are pointing to it.
105    /// Used in the RTL case for all unfinished edges.
106    rtl_edges_todo: HashMap<B3Digest, (u64, Vec<NodeIndex>)>,
107
108    /// Holds the expected root digest.
109    /// Populated in the RTL case only.
110    exp_root_digest: Option<B3Digest>,
111}
112
113impl DirectoryGraphBuilder {
114    /// Constructs a new [DirectoryGraphBuilder] accepting directories in
115    /// Leaves-To-Root order.
116    pub fn new_leaves_to_root() -> Self {
117        Self {
118            insertion_order: DirectoryOrder::LeavesToRoot,
119            graph: Default::default(),
120            digest_to_node_idx_size: Default::default(),
121            rtl_edges_todo: Default::default(),
122            exp_root_digest: None,
123        }
124    }
125
126    /// Constructs a new [DirectoryGraphBuilder] accepting directories in
127    /// Root-To-Leaves order.
128    /// The expected root Directory needs to be passed as an argument,
129    /// and is validated to match the one inserted on the first call to
130    /// [Self::try_insert].
131    pub fn new_root_to_leaves(root_digest: B3Digest) -> Self {
132        Self {
133            insertion_order: DirectoryOrder::RootToLeaves,
134            graph: Default::default(),
135            digest_to_node_idx_size: Default::default(),
136            rtl_edges_todo: Default::default(),
137            exp_root_digest: Some(root_digest),
138        }
139    }
140
141    #[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()))]
142    pub fn try_insert(&mut self, directory: Directory) -> Result<(), OrderingError> {
143        let directory_digest = directory.digest();
144        let directory_size = directory.size();
145
146        let hash_map::Entry::Vacant(entry) = self
147            .digest_to_node_idx_size
148            .entry(directory_digest.to_owned())
149        else {
150            warn!("directory received multiple times");
151            return Ok(());
152        };
153
154        let node_idx = self.graph.add_node(directory);
155        entry.insert((node_idx, directory_size));
156
157        if self.insertion_order == DirectoryOrder::RootToLeaves {
158            // If this was the first inserted node, set first_idx.
159            // We also obviously won't find ourselves in [self.rtl_edges_todo],
160            // as we're the first element.
161            if self.graph.node_count() == 1 {
162                let directory = self
163                    .graph
164                    .node_weight(node_idx)
165                    .expect("Snix bug: node not found")
166                    .to_owned();
167                if directory_digest
168                    != self
169                        .exp_root_digest
170                        .take()
171                        .expect("exp_root_digest to be some")
172                {
173                    Err(OrderingError::Unexpected { directory })?
174                }
175            } else if let Some((digest, (size, src_idxs))) =
176                // Check for our own digest in [self.rtl_edges_todo], pop and add edges to graph
177                self.rtl_edges_todo.remove_entry(&directory_digest)
178            {
179                if size != directory_size {
180                    Err(OrderingError::WrongSize { digest, size })?
181                }
182
183                for src_idx in src_idxs {
184                    self.graph.add_edge(src_idx, node_idx, ());
185                }
186            } else {
187                let directory = self
188                    .graph
189                    .node_weight(node_idx)
190                    .expect("Snix bug: node not found")
191                    .to_owned();
192
193                Err(OrderingError::Unexpected { directory })?
194            }
195        }
196
197        // Look at outgoing digests. For this we have to retrieve the previously-inserted Directory again.
198        // We copy out the digests (as all code paths add edges, which mutates the graph).
199        let directory = self
200            .graph
201            .node_weight(node_idx)
202            .expect("Snix bug: node not found");
203        let out_digests_sizes = directory
204            .nodes()
205            .filter_map(|(_, node)| {
206                if let Node::Directory { digest, size } = node {
207                    Some((digest.to_owned(), *size))
208                } else {
209                    None
210                }
211            })
212            .collect::<Vec<_>>();
213
214        for (out_digest, out_size) in out_digests_sizes {
215            match self.insertion_order {
216                DirectoryOrder::RootToLeaves => {
217                    // Add outgoing pointers to the graph, or to [self.rtl_edges_todo], if not yet known.
218                    if let Some(&(out_node_idx, seen_dir_size)) =
219                        self.digest_to_node_idx_size.get(&out_digest)
220                    {
221                        // check size
222                        if seen_dir_size != out_size {
223                            Err(OrderingError::WrongSize {
224                                digest: out_digest,
225                                size: out_size,
226                            })?
227                        }
228
229                        // draw edge
230                        self.graph.add_edge(node_idx, out_node_idx, ());
231                    } else {
232                        // pointer points to something not yet in the graph, add to todo
233                        match self.rtl_edges_todo.entry(out_digest) {
234                            hash_map::Entry::Occupied(mut occupied_entry) => {
235                                let size = occupied_entry.get().0;
236                                if size != out_size {
237                                    Err(OrderingError::WrongSize {
238                                        digest: occupied_entry.key().to_owned(),
239                                        size,
240                                    })?
241                                }
242                                occupied_entry.get_mut().1.push(node_idx);
243                            }
244                            hash_map::Entry::Vacant(vacant_entry) => {
245                                vacant_entry.insert((out_size, vec![node_idx]));
246                            }
247                        }
248                    }
249                }
250                DirectoryOrder::LeavesToRoot => {
251                    // Check all pointers in the currently added directory have already been added previously;
252                    // each sent directory may only refer to directories already sent.
253                    if let Some(&(out_node_idx, seen_dir_size)) =
254                        self.digest_to_node_idx_size.get(&out_digest)
255                    {
256                        // check the size from the pointer matches actual size
257                        if seen_dir_size != out_size {
258                            Err(OrderingError::WrongSize {
259                                digest: out_digest,
260                                size: out_size,
261                            })?
262                        }
263
264                        // draw the edge
265                        self.graph.add_edge(node_idx, out_node_idx, ());
266                    } else {
267                        let directory = self
268                            .graph
269                            .node_weight(node_idx)
270                            .expect("Snix bug: node not found");
271
272                        Err(OrderingError::UnknownLTR {
273                            digest: out_digest,
274                            parent_digest: directory_digest.to_owned(),
275                            path_component: directory
276                                .nodes()
277                                .find_map(|(path_component, node)| {
278                                    if let Node::Directory { digest, .. } = node
279                                        && digest == &out_digest
280                                    {
281                                        Some(path_component)
282                                    } else {
283                                        None
284                                    }
285                                })
286                                .expect("PathComponent not found")
287                                .to_owned(),
288                        })?
289                    }
290                }
291            }
292        }
293
294        Ok(())
295    }
296
297    pub fn build(self) -> Result<DirectoryGraph, OrderingError> {
298        match self.insertion_order {
299            // We must have received the root, and there may not be any rtl_edges_todo.
300            DirectoryOrder::RootToLeaves => {
301                if self.graph.node_count() == 0 {
302                    return Err(OrderingError::EmptySet);
303                }
304
305                if !self.rtl_edges_todo.is_empty() {
306                    return Err(OrderingError::DirectoriesMissing(HashSet::from_iter(
307                        self.rtl_edges_todo.into_keys(),
308                    )));
309                }
310
311                debug_assert_eq!(
312                    self.graph.externals(petgraph::Incoming).count(),
313                    1,
314                    "one incoming"
315                );
316                Ok(DirectoryGraph {
317                    graph: self.graph,
318                    // 1. petgraph invariant: adding nodes or edges does not alter indices
319                    // 2. DirectoryGraph RTL invariant: we only add nodes and edges
320                    // 3. petgraph invariant: nodes are compactly numbered [0, n)
321                    // 4. DirectoryGraph RTL invariant: the root is inserted first
322                    // ∴ the root node is always index 0
323                    root_idx: NodeIndex::new(0),
324                })
325            }
326            DirectoryOrder::LeavesToRoot => {
327                let incomings = self.graph.externals(petgraph::Incoming).collect::<Vec<_>>();
328
329                if incomings.is_empty() {
330                    return Err(OrderingError::EmptySet);
331                }
332
333                if incomings.len() != 1 {
334                    return Err(OrderingError::DirectoriesMissing(HashSet::from_iter(
335                        incomings.iter().map(|i| {
336                            self.graph
337                                .node_weight(*i)
338                                .expect("Snix bug: node not found")
339                                .digest()
340                        }),
341                    )));
342                }
343                Ok(DirectoryGraph {
344                    graph: self.graph,
345                    root_idx: incomings[0],
346                })
347            }
348        }
349    }
350}
351
352#[cfg(test)]
353mod tests {
354    use super::DirectoryOrder;
355    use crate::directoryservice::directory_graph::DirectoryGraphBuilder;
356    use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C};
357    use crate::{Directory, Node};
358    use rstest::rstest;
359    use std::sync::LazyLock;
360
361    pub static BROKEN_PARENT_DIRECTORY: LazyLock<Directory> = LazyLock::new(|| {
362        Directory::try_from_iter([(
363            "foo".try_into().unwrap(),
364            Node::Directory {
365                digest: DIRECTORY_A.digest(),
366                size: DIRECTORY_A.size() + 42, // wrong!
367            },
368        )])
369        .unwrap()
370    });
371
372    #[rstest]
373    /// Uploading no directories at all should fail, the empty graph is invalid.
374    #[case::ltr_empty_graph(DirectoryOrder::LeavesToRoot, &[], false, None)]
375    /// Uploading an empty directory should succeed.
376    #[case::ltr_empty_directory(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A]))]
377    /// Uploading A, then B (referring to A) should succeed.
378    #[case::ltr_simple_closure(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A, &*DIRECTORY_B], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_B]))]
379    /// Uploading A, then A, then C (referring to A twice) should succeed.
380    /// We pretend to be a dumb client not deduping directories.
381    #[case::ltr_same_child(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A, &*DIRECTORY_A, &*DIRECTORY_C], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_C]))]
382    /// Uploading A, then C (referring to A twice) should succeed.
383    #[case::ltr_same_child_dedup(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A, &*DIRECTORY_C], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_C]))]
384    /// Uploading A, then C (referring to A twice), then B (itself referring to A) should fail during close,
385    /// as B itself would be left unconnected.
386    #[case::ltr_unconnected_node(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A, &*DIRECTORY_C, &*DIRECTORY_B], false, None)]
387    /// Uploading B (referring to A) should fail immediately, because A was never uploaded.
388    #[case::ltr_dangling_pointer(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_B], true, None)]
389    /// Uploading a directory which refers to another Directory with a wrong size should fail.
390    #[case::ltr_wrong_size_in_parent(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A, &*BROKEN_PARENT_DIRECTORY], true, None)]
391
392    /// Downloading an empty directory should succeed.
393    #[case::rtl_empty_directory(DirectoryOrder::RootToLeaves, &[&*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A]))]
394    /// Downlading B, then A (referenced by B) should succeed.
395    #[case::rtl_simple_closure(DirectoryOrder::RootToLeaves, &[&*DIRECTORY_B, &*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_B]))]
396    /// Downloading C (referring to A twice), then A should succeed.
397    #[case::rtl_same_child_dedup(DirectoryOrder::RootToLeaves, &[&*DIRECTORY_C, &*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_C]))]
398    /// Downloading C, then B (both referring to A but not referring to each other) should fail immediately as B has no connection to C (the root)
399    #[case::rtl_unconnected_node(DirectoryOrder::RootToLeaves, &[&*DIRECTORY_C, &*DIRECTORY_B], true, None)]
400    /// Downloading a directory which refers to another Directory with a wrong size should fail.
401    #[case::rtl_wrong_size_in_parent(DirectoryOrder::RootToLeaves, &[&*BROKEN_PARENT_DIRECTORY, &*DIRECTORY_A], true, None)]
402    fn directory_graph(
403        #[case] insertion_order: DirectoryOrder,
404        #[case] directories_to_upload: &[&Directory],
405        #[case] exp_fail_upload_last: bool,
406        #[case] exp_build: Option<Vec<&Directory>>, // Some(_) if finalize successful, None if not.
407    ) {
408        let mut it = directories_to_upload.iter().peekable();
409
410        let mut builder = match insertion_order {
411            // in the RTL case, pull the first element from directories_to_upload and initialize with it
412            DirectoryOrder::RootToLeaves => DirectoryGraphBuilder::new_root_to_leaves(
413                it.peek()
414                    .expect("directories_to_upload to not be empty")
415                    .digest(),
416            ),
417            DirectoryOrder::LeavesToRoot => DirectoryGraphBuilder::new_leaves_to_root(),
418        };
419
420        while let Some(d) = it.next() {
421            if it.peek().is_none() /* is last */ && exp_fail_upload_last {
422                builder
423                    .try_insert((*d).to_owned())
424                    .expect_err("last insert to fail");
425            } else {
426                builder
427                    .try_insert((*d).to_owned())
428                    .expect("insert to succeed");
429            }
430        }
431
432        if exp_fail_upload_last {
433            return;
434        }
435
436        if let Some(exp_drain_ltr) = exp_build {
437            let directory_graph = builder.build().expect("build to succeed");
438
439            // drain
440            let drained_ltr = directory_graph.drain_leaves_to_root().collect::<Vec<_>>();
441
442            assert_eq!(
443                exp_drain_ltr
444                    .iter()
445                    .map(|d| (*d).to_owned())
446                    .collect::<Vec<_>>(),
447                drained_ltr
448            );
449        } else {
450            assert!(builder.build().is_err(), "expected build to fail");
451        }
452    }
453
454    #[test]
455    /// Inserting a firt directory into [DirectoryGraphBuilder] that has a
456    /// different digest than what was specified in `new_root_to_leaves` should fail.
457    fn rtl_wrong_digest() {
458        let mut builder = DirectoryGraphBuilder::new_root_to_leaves(DIRECTORY_B.digest());
459        builder
460            .try_insert(DIRECTORY_A.clone())
461            .expect_err("expect insert of root with wrong digest to fail");
462    }
463}