snix_castore/directoryservice/
directory_graph.rs

1use petgraph::{
2    graph::{DiGraph, NodeIndex},
3    visit::{Bfs, DfsPostOrder, Walker},
4};
5use std::collections::{HashMap, HashSet, hash_map};
6use tracing::{instrument, warn};
7
8use crate::directoryservice::order_validator::OrderingError;
9use crate::{B3Digest, Directory, Node};
10
11#[derive(PartialEq, Eq, Debug)]
12pub enum DirectoryOrder {
13    RootToLeaves,
14    LeavesToRoot,
15}
16
17/// This represents a full (and validated) graph of [Directory] nodes.
18/// It can be constructed using [DirectoryGraphBuilder], and is normally used to
19/// convert from one order to the other.
20/// If you just want to validate an order without keeping the results,
21/// `RootToLeavesValidator` or `LeavesToRootValidator` can be used.
22#[derive(Default)]
23pub struct DirectoryGraph {
24    // A directed graph, using Directory as node weight.
25    // Edges point from parents to children.
26    graph: DiGraph<Directory, ()>,
27
28    // Points to the root.
29    root_idx: NodeIndex,
30}
31
32impl DirectoryGraph {
33    /// Drains the graph, returning node weights in the chosen [DirectoryOrder].
34    #[instrument(level = "trace", skip_all)]
35    pub fn drain(self, order: DirectoryOrder) -> impl Iterator<Item = Directory> {
36        let order = match order {
37            DirectoryOrder::RootToLeaves => {
38                // do a BFS traversal of the graph, starting with the root node
39                Bfs::new(&self.graph, self.root_idx)
40                    .iter(&self.graph)
41                    .collect::<Vec<_>>()
42            }
43            DirectoryOrder::LeavesToRoot => {
44                // do a DFS Post-Order traversal of the graph, starting with the root node
45                DfsPostOrder::new(&self.graph, self.root_idx)
46                    .iter(&self.graph)
47                    .collect::<Vec<_>>()
48            }
49        };
50
51        let (mut nodes, _edges) = self.graph.into_nodes_edges();
52        order
53            .into_iter()
54            .map(move |i| std::mem::take(&mut nodes[i.index()].weight))
55    }
56
57    pub fn root(&self) -> &Directory {
58        self.graph
59            .node_weight(self.root_idx)
60            .expect("Snix bug: root not found")
61    }
62}
63
64/// This allows constructing a [DirectoryGraph].
65/// After deciding on the insertion order (via [Self::new_with_insertion_order]),
66/// different [Directory] can be passed to [Self::insert].
67/// A [Self::build] consumes the builder, returning a validated [DirectoryGraph],
68/// or an error.
69/// The resulting [DirectoryGraph] can be used to drain the graph in either order.
70///
71/// It does do the same checks as `RootToLeavesValidator` and `LeavesToRootValidator`
72/// (insertion order, completeness, connectivity, correct sizes referenced).
73// NOTE: a child is always smaller than its parent
74pub struct DirectoryGraphBuilder {
75    /// The order of [Directory] elements [Self::insert] is called with.
76    insertion_order: DirectoryOrder,
77
78    /// A directed graph, using Directory as node weight.
79    /// Edges point from parents to children.
80    graph: DiGraph<Directory, ()>,
81
82    /// A lookup table from directory digest to node index and size.
83    /// The size is stored to avoid having to calculate it multiple times.
84    digest_to_node_idx_size: HashMap<B3Digest, (NodeIndex, u64)>,
85
86    /// A map from digest to size and all node indexes that are pointing to it.
87    /// Used in the RTL case for all unfinished edges.
88    rtl_edges_todo: HashMap<B3Digest, (u64, Vec<NodeIndex>)>,
89
90    /// Points to the first node index.
91    /// Populated in the RTL case only.
92    first_idx: Option<NodeIndex>,
93}
94
95impl DirectoryGraphBuilder {
96    pub fn new_with_insertion_order(insertion_order: DirectoryOrder) -> Self {
97        Self {
98            insertion_order,
99            graph: Default::default(),
100            digest_to_node_idx_size: Default::default(),
101            rtl_edges_todo: Default::default(),
102            first_idx: None,
103        }
104    }
105
106    #[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()))]
107    pub fn insert(&mut self, directory: Directory) -> Result<(), OrderingError> {
108        let directory_digest = directory.digest();
109        let directory_size = directory.size();
110
111        let entry = match self
112            .digest_to_node_idx_size
113            .entry(directory_digest.to_owned())
114        {
115            hash_map::Entry::Occupied(_) => {
116                warn!("directory received multiple times");
117                return Ok(());
118            }
119            hash_map::Entry::Vacant(vacant_entry) => vacant_entry,
120        };
121
122        let node_idx = self.graph.add_node(directory);
123        entry.insert((node_idx, directory_size));
124
125        if self.insertion_order == DirectoryOrder::RootToLeaves {
126            // If this was the first inserted node, set first_idx.
127            // We also obviously won't find ourselves in [self.rtl_edges_todo],
128            // as we're the first element.
129            if self.graph.node_count() == 1 {
130                self.first_idx = Some(node_idx);
131            } else if let Some((digest, (size, src_idxs))) =
132                // Check for our own digest in [self.rtl_edges_todo], pop and add edges to graph
133                self.rtl_edges_todo.remove_entry(&directory_digest)
134            {
135                if size != directory_size {
136                    Err(OrderingError::WrongSize { digest, size })?
137                }
138
139                for src_idx in src_idxs {
140                    self.graph.add_edge(src_idx, node_idx, ());
141                }
142            } else {
143                let directory = self
144                    .graph
145                    .node_weight(node_idx)
146                    .expect("Snix bug: node not found")
147                    .to_owned();
148
149                Err(OrderingError::Unexpected { directory })?
150            }
151        }
152
153        // Look at outgoing digests. For this we have to retrieve the previously-inserted Directory again.
154        // We copy out the digests (as all code paths add edges, which mutates the graph).
155        let directory = self
156            .graph
157            .node_weight(node_idx)
158            .expect("Snix bug: node not found");
159        let out_digests_sizes = directory
160            .nodes()
161            .filter_map(|(_, node)| {
162                if let Node::Directory { digest, size } = node {
163                    Some((digest.to_owned(), *size))
164                } else {
165                    None
166                }
167            })
168            .collect::<Vec<_>>();
169
170        for (out_digest, out_size) in out_digests_sizes {
171            match self.insertion_order {
172                DirectoryOrder::RootToLeaves => {
173                    // Add outgoing pointers to the graph, or to [self.rtl_edges_todo], if not yet known.
174                    if let Some((out_node_idx, seen_dir_size)) =
175                        self.digest_to_node_idx_size.get(&out_digest)
176                    {
177                        // check size
178                        if *seen_dir_size != out_size {
179                            Err(OrderingError::WrongSize {
180                                digest: out_digest,
181                                size: out_size,
182                            })?
183                        }
184
185                        // draw edge
186                        self.graph.add_edge(node_idx, *out_node_idx, ());
187                    } else {
188                        // pointer points to something not yet in the graph, add to todo
189                        match self.rtl_edges_todo.entry(out_digest) {
190                            hash_map::Entry::Occupied(mut occupied_entry) => {
191                                let size = occupied_entry.get().0;
192                                if size != out_size {
193                                    Err(OrderingError::WrongSize {
194                                        digest: occupied_entry.key().to_owned(),
195                                        size,
196                                    })?
197                                }
198                                occupied_entry.get_mut().1.push(node_idx);
199                            }
200                            hash_map::Entry::Vacant(vacant_entry) => {
201                                vacant_entry.insert((out_size, vec![node_idx]));
202                            }
203                        }
204                    }
205                }
206                DirectoryOrder::LeavesToRoot => {
207                    // Check all pointers in the currently added directory have already been added previously;
208                    // each sent directory may only refer to directories already sent.
209                    if let Some((out_node_idx, seen_dir_size)) =
210                        self.digest_to_node_idx_size.get(&out_digest)
211                    {
212                        // check the size from the pointer matches actual size
213                        if *seen_dir_size != out_size {
214                            Err(OrderingError::WrongSize {
215                                digest: out_digest,
216                                size: out_size,
217                            })?
218                        }
219
220                        // draw the edge
221                        self.graph.add_edge(node_idx, *out_node_idx, ());
222                    } else {
223                        let directory = self
224                            .graph
225                            .node_weight(node_idx)
226                            .expect("Snix bug: node not found");
227
228                        Err(OrderingError::UnknownLTR {
229                            digest: out_digest.clone(),
230                            parent_digest: directory_digest.to_owned(),
231                            path_component: directory
232                                .nodes()
233                                .find_map(|(path_component, node)| {
234                                    if let Node::Directory { digest, .. } = node
235                                        && digest == &out_digest
236                                    {
237                                        Some(path_component)
238                                    } else {
239                                        None
240                                    }
241                                })
242                                .expect("PathComponent not found")
243                                .to_owned(),
244                        })?
245                    }
246                }
247            }
248        }
249
250        Ok(())
251    }
252
253    pub fn build(self) -> Result<DirectoryGraph, OrderingError> {
254        match self.insertion_order {
255            // We must have received the root, and there may not be any rtl_edges_todo.
256            DirectoryOrder::RootToLeaves => match self.first_idx {
257                None => Err(OrderingError::NoNodesReceived),
258                Some(root_idx) => {
259                    if !self.rtl_edges_todo.is_empty() {
260                        return Err(OrderingError::MoreThanOnePending(HashSet::from_iter(
261                            self.rtl_edges_todo.into_keys(),
262                        )));
263                    }
264
265                    debug_assert_eq!(
266                        self.graph.externals(petgraph::Incoming).count(),
267                        1,
268                        "one incoming"
269                    );
270                    Ok(DirectoryGraph {
271                        graph: self.graph,
272                        root_idx,
273                    })
274                }
275            },
276            DirectoryOrder::LeavesToRoot => {
277                let incomings = self.graph.externals(petgraph::Incoming).collect::<Vec<_>>();
278
279                if incomings.is_empty() {
280                    return Err(OrderingError::NoNodesReceived);
281                }
282
283                if incomings.len() != 1 {
284                    return Err(OrderingError::MoreThanOnePending(HashSet::from_iter(
285                        incomings.iter().map(|i| {
286                            self.graph
287                                .node_weight(*i)
288                                .expect("Snix bug: node not found")
289                                .digest()
290                        }),
291                    )));
292                }
293                Ok(DirectoryGraph {
294                    graph: self.graph,
295                    root_idx: incomings[0],
296                })
297            }
298        }
299    }
300}
301
302#[cfg(test)]
303mod tests {
304    use crate::directoryservice::DirectoryOrder;
305    use crate::directoryservice::directory_graph::DirectoryGraphBuilder;
306    use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C};
307    use crate::{Directory, Node};
308    use rstest::rstest;
309    use std::sync::LazyLock;
310
311    pub static BROKEN_PARENT_DIRECTORY: LazyLock<Directory> = LazyLock::new(|| {
312        Directory::try_from_iter([(
313            "foo".try_into().unwrap(),
314            Node::Directory {
315                digest: DIRECTORY_A.digest(),
316                size: DIRECTORY_A.size() + 42, // wrong!
317            },
318        )])
319        .unwrap()
320    });
321
322    #[rstest]
323    /// Uploading no directories at all should fail, the empty graph is invalid.
324    #[case::ltr_empty_graph(DirectoryOrder::LeavesToRoot, &[], false, None)]
325    /// Uploading an empty directory should succeed.
326    #[case::ltr_empty_directory(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A]))]
327    /// Uploading A, then B (referring to A) should succeed.
328    #[case::ltr_simple_closure(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A, &*DIRECTORY_B], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_B]))]
329    /// Uploading A, then A, then C (referring to A twice) should succeed.
330    /// We pretend to be a dumb client not deduping directories.
331    #[case::ltr_same_child(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A, &*DIRECTORY_A, &*DIRECTORY_C], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_C]))]
332    /// Uploading A, then C (referring to A twice) should succeed.
333    #[case::ltr_same_child_dedup(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A, &*DIRECTORY_C], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_C]))]
334    /// Uploading A, then C (referring to A twice), then B (itself referring to A) should fail during close,
335    /// as B itself would be left unconnected.
336    #[case::ltr_unconnected_node(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A, &*DIRECTORY_C, &*DIRECTORY_B], false, None)]
337    /// Uploading B (referring to A) should fail immediately, because A was never uploaded.
338    #[case::ltr_dangling_pointer(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_B], true, None)]
339    /// Uploading a directory which refers to another Directory with a wrong size should fail.
340    #[case::ltr_wrong_size_in_parent(DirectoryOrder::LeavesToRoot, &[&*DIRECTORY_A, &*BROKEN_PARENT_DIRECTORY], true, None)]
341    /// Downloading an empty directory should succeed.
342    #[case::rtl_empty_directory(DirectoryOrder::RootToLeaves, &[&*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A]))]
343    /// Downlading B, then A (referenced by B) should succeed.
344    #[case::rtl_simple_closure(DirectoryOrder::RootToLeaves, &[&*DIRECTORY_B, &*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_B]))]
345    /// Downloading C (referring to A twice), then A should succeed.
346    #[case::rtl_same_child_dedup(DirectoryOrder::RootToLeaves, &[&*DIRECTORY_C, &*DIRECTORY_A], false, Some(vec![&*DIRECTORY_A, &*DIRECTORY_C]))]
347    /// Downloading C, then B (both referring to A but not referring to each other) should fail immediately as B has no connection to C (the root)
348    #[case::rtl_unconnected_node(DirectoryOrder::RootToLeaves, &[&*DIRECTORY_C, &*DIRECTORY_B], true, None)]
349    /// Downloading a directory which refers to another Directory with a wrong size should fail.
350    #[case::rtl_wrong_size_in_parent(DirectoryOrder::RootToLeaves, &[&*BROKEN_PARENT_DIRECTORY, &*DIRECTORY_A], true, None)]
351    fn directory_graph(
352        #[case] insertion_order: DirectoryOrder,
353        #[case] directories_to_upload: &[&Directory],
354        #[case] exp_fail_upload_last: bool,
355        #[case] exp_build: Option<Vec<&Directory>>, // Some(_) if finalize successful, None if not.
356    ) {
357        let mut builder = DirectoryGraphBuilder::new_with_insertion_order(insertion_order);
358        let mut it = directories_to_upload.iter().peekable();
359
360        while let Some(d) = it.next() {
361            if it.peek().is_none() /* is last */ && exp_fail_upload_last {
362                builder
363                    .insert((*d).to_owned())
364                    .expect_err("last insert to fail");
365            } else {
366                builder.insert((*d).to_owned()).expect("insert to succeed");
367            }
368        }
369
370        if exp_fail_upload_last {
371            return;
372        }
373
374        if let Some(exp_drain_ltr) = exp_build {
375            let directory_graph = builder.build().expect("build to succeed");
376
377            // drain
378            let drained_ltr = directory_graph
379                .drain(super::DirectoryOrder::LeavesToRoot)
380                .collect::<Vec<_>>();
381
382            assert_eq!(
383                exp_drain_ltr
384                    .iter()
385                    .map(|d| (*d).to_owned())
386                    .collect::<Vec<_>>(),
387                drained_ltr
388            );
389        } else {
390            assert!(builder.build().is_err(), "expected build to fail");
391        }
392    }
393}