snix_castore/directoryservice/
object_store.rs

1use std::collections::HashMap;
2use std::collections::hash_map;
3use std::sync::Arc;
4
5use data_encoding::HEXLOWER;
6use futures::SinkExt;
7use futures::StreamExt;
8use futures::TryStreamExt;
9use futures::stream::BoxStream;
10use object_store::{ObjectStore, path::Path};
11use prost::Message;
12use tokio::io::AsyncWriteExt;
13use tokio_util::codec::LengthDelimitedCodec;
14use tonic::async_trait;
15use tracing::{Level, instrument, trace, warn};
16use url::Url;
17
18use super::{Directory, DirectoryPutter, DirectoryService, RootToLeavesValidator};
19use crate::composition::{CompositionContext, ServiceBuilder};
20use crate::directoryservice::directory_graph::DirectoryGraphBuilder;
21use crate::{B3Digest, Node, proto};
22
23/// Stores directory closures in an object store.
24/// Notably, this makes use of the option to disallow accessing child directories except when
25/// fetching them recursively via the top-level directory, since all batched writes
26/// (using `put_multiple_start`) are stored in a single object.
27/// Directories are stored in a length-delimited format with a 1MiB limit. The length field is a
28/// u32 and the directories are stored in root-to-leaves topological order, the same way they will
29/// be returned to the client in get_recursive.
30#[derive(Clone)]
31pub struct ObjectStoreDirectoryService {
32    instance_name: String,
33    object_store: Arc<dyn ObjectStore>,
34    base_path: Path,
35}
36
37#[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))]
38fn derive_dirs_path(base_path: &Path, digest: &B3Digest) -> Path {
39    base_path
40        .child("dirs")
41        .child("b3")
42        .child(HEXLOWER.encode(&digest.as_slice()[..2]))
43        .child(HEXLOWER.encode(digest.as_slice()))
44}
45
46/// Helper function, parsing protobuf-encoded Directories into [crate::Directory],
47/// if the digest is allowed.
48fn parse_proto_directory<F>(
49    encoded_directory: &[u8],
50    digest_allowed: F,
51) -> Result<crate::Directory, Error>
52where
53    F: Fn(&B3Digest) -> bool,
54{
55    let actual_digest = B3Digest::from(blake3::hash(encoded_directory).as_bytes());
56    if !digest_allowed(&actual_digest) {
57        return Err(Error::UnexpectedDigest(actual_digest));
58    }
59
60    let directory_proto =
61        proto::Directory::decode(encoded_directory).map_err(Error::ProtobufDecode)?;
62
63    Directory::try_from(directory_proto).map_err(Error::DirectoryValidation)
64}
65
66#[allow(clippy::identity_op)]
67const MAX_FRAME_LENGTH: usize = 1 * 1024 * 1024 * 1000; // 1 MiB
68//
69impl ObjectStoreDirectoryService {
70    /// Constructs a new [ObjectStoreDirectoryService] from a [Url] supported by
71    /// [object_store].
72    /// Any path suffix becomes the base path of the object store.
73    /// additional options, the same as in [object_store::parse_url_opts] can
74    /// be passed.
75    pub fn parse_url_opts<I, K, V>(url: &Url, options: I) -> Result<Self, object_store::Error>
76    where
77        I: IntoIterator<Item = (K, V)>,
78        K: AsRef<str>,
79        V: Into<String>,
80    {
81        let (object_store, path) = object_store::parse_url_opts(url, options)?;
82
83        Ok(Self {
84            instance_name: "root".into(),
85            object_store: Arc::new(object_store),
86            base_path: path,
87        })
88    }
89
90    /// Like [Self::parse_url_opts], except without the options.
91    pub fn parse_url(url: &Url) -> Result<Self, object_store::Error> {
92        Self::parse_url_opts(url, Vec::<(String, String)>::new())
93    }
94
95    pub fn new(instance_name: String, object_store: Arc<dyn ObjectStore>, base_path: Path) -> Self {
96        Self {
97            instance_name,
98            object_store,
99            base_path,
100        }
101    }
102}
103
104#[async_trait]
105impl DirectoryService for ObjectStoreDirectoryService {
106    /// This is the same steps as for get_recursive anyways, so we just call get_recursive and
107    /// return the first element of the stream and drop the request.
108    #[instrument(level = "trace", skip_all, fields(directory.digest = %digest, instance_name = %self.instance_name))]
109    async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, super::Error> {
110        self.get_recursive(digest).take(1).next().await.transpose()
111    }
112
113    #[instrument(level = "trace", skip_all, fields(directory.digest = %directory.digest(), instance_name = %self.instance_name))]
114    async fn put(&self, directory: Directory) -> Result<B3Digest, super::Error> {
115        // Ensure the directory doesn't contain other directory children
116        if directory
117            .nodes()
118            .any(|(_, e)| matches!(e, Node::Directory { .. }))
119        {
120            Err(Error::PutForDirectoryWithChildren)?
121        }
122
123        let mut handle = self.put_multiple_start();
124        handle.put(directory).await?;
125        handle.close().await
126    }
127
128    #[instrument(level = "trace", skip_all, fields(directory.digest = %root_directory_digest, instance_name = %self.instance_name))]
129    fn get_recursive(
130        &self,
131        root_directory_digest: &B3Digest,
132    ) -> BoxStream<'_, Result<Directory, super::Error>> {
133        // Check that we are not passing on bogus from the object store to the client, and that the
134        // trust chain from the root digest to the leaves is intact.
135        let dir_path = derive_dirs_path(&self.base_path, root_directory_digest);
136        let object_store = &self.object_store;
137        let root_directory_digest = *root_directory_digest;
138
139        async_stream::try_stream! {
140                let bytes_stream = match object_store.get(&dir_path).await {
141                    Ok(v) => v.into_stream(),
142                    Err(object_store::Error::NotFound { .. }) => {
143                        return;
144                    }
145                    Err(e) => Err(Error::ObjectStore(e))?,
146                };
147
148                // get a reader of the response body.
149                let r = tokio_util::io::StreamReader::new(bytes_stream);
150                let decompressed_stream = async_compression::tokio::bufread::ZstdDecoder::new(r);
151
152                // the subdirectories are stored in a length delimited format
153                let mut encoded_directories = LengthDelimitedCodec::builder()
154                    .max_frame_length(MAX_FRAME_LENGTH)
155                    .length_field_type::<u32>()
156                    .new_read(decompressed_stream)
157                    .err_into::<Error>();
158
159                let mut order_validator = RootToLeavesValidator::new_with_root_digest(root_directory_digest);
160                while let Some(encoded_directory) = encoded_directories.try_next().await? {
161                    let directory = parse_proto_directory(&encoded_directory, |digest| {
162                        order_validator.would_accept(digest)
163                    })?;
164
165                    order_validator.try_accept(&directory).map_err(Error::DirectoryOrdering)?;
166
167                    yield directory;
168                }
169
170                order_validator.finalize().map_err(Error::DirectoryOrdering)?;
171        }.boxed()
172    }
173
174    #[instrument(skip_all)]
175    fn put_multiple_start(&self) -> Box<dyn DirectoryPutter + '_>
176    where
177        Self: Clone,
178    {
179        Box::new(ObjectStoreDirectoryPutter::new(
180            self.object_store.clone(),
181            &self.base_path,
182        ))
183    }
184}
185
186#[derive(thiserror::Error, Debug)]
187enum Error {
188    #[error("wrong arguments: {0}")]
189    WrongConfig(&'static str),
190    #[error("put() may only be used for directories without children")]
191    PutForDirectoryWithChildren,
192
193    #[error("Directory Graph ordering error")]
194    DirectoryOrdering(#[from] crate::directoryservice::OrderingError),
195    #[error("requested directory has unexpected digest {0}")]
196    UnexpectedDigest(B3Digest),
197    #[error("failed to decode protobuf: {0}")]
198    ProtobufDecode(#[from] prost::DecodeError),
199    #[error("failed to validate directory: {0}")]
200    DirectoryValidation(#[from] crate::DirectoryError),
201
202    #[error("DirectoryPutter already closed")]
203    DirectoryPutterAlreadyClosed,
204
205    #[error("ObjectStore error: {0}")]
206    ObjectStore(#[from] object_store::Error),
207
208    #[error("io error: {0}")]
209    IO(#[from] std::io::Error),
210}
211
212#[derive(serde::Deserialize)]
213#[serde(deny_unknown_fields)]
214pub struct ObjectStoreDirectoryServiceConfig {
215    object_store_url: String,
216    #[serde(default)]
217    object_store_options: HashMap<String, String>,
218}
219
220impl TryFrom<url::Url> for ObjectStoreDirectoryServiceConfig {
221    type Error = Box<dyn std::error::Error + Send + Sync>;
222    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
223        // We need to convert the URL to string, strip the prefix there, and then
224        // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
225        let trimmed_url = {
226            let s = url.to_string();
227            let mut url = Url::parse(s.strip_prefix("objectstore+").ok_or(Error::WrongConfig(
228                "Missing objectstore+ part in URI scheme",
229            ))?)?;
230            // trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
231            url.set_query(None);
232            url
233        };
234        Ok(ObjectStoreDirectoryServiceConfig {
235            object_store_url: trimmed_url.into(),
236            object_store_options: url
237                .query_pairs()
238                .into_iter()
239                .map(|(k, v)| (k.to_string(), v.to_string()))
240                .collect(),
241        })
242    }
243}
244
245#[async_trait]
246impl ServiceBuilder for ObjectStoreDirectoryServiceConfig {
247    type Output = dyn DirectoryService;
248    async fn build<'a>(
249        &'a self,
250        instance_name: &str,
251        _context: &CompositionContext,
252    ) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
253        let opts = {
254            let mut opts: HashMap<&str, _> = self
255                .object_store_options
256                .iter()
257                .map(|(k, v)| (k.as_str(), v.as_str()))
258                .collect();
259
260            if let hash_map::Entry::Vacant(e) =
261                opts.entry(object_store::ClientConfigKey::UserAgent.as_ref())
262            {
263                e.insert(crate::USER_AGENT);
264            }
265
266            opts
267        };
268
269        let (object_store, path) =
270            object_store::parse_url_opts(&self.object_store_url.parse()?, opts)?;
271        Ok(Arc::new(ObjectStoreDirectoryService::new(
272            instance_name.to_string(),
273            Arc::new(object_store),
274            path,
275        )))
276    }
277}
278
279struct ObjectStoreDirectoryPutter<'a> {
280    object_store: Arc<dyn ObjectStore>,
281    base_path: &'a Path,
282
283    builder: Option<DirectoryGraphBuilder>,
284}
285
286impl<'a> ObjectStoreDirectoryPutter<'a> {
287    fn new(object_store: Arc<dyn ObjectStore>, base_path: &'a Path) -> Self {
288        Self {
289            object_store,
290            base_path,
291            builder: Some(DirectoryGraphBuilder::new_leaves_to_root()),
292        }
293    }
294}
295
296#[async_trait]
297impl DirectoryPutter for ObjectStoreDirectoryPutter<'_> {
298    #[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()), err)]
299    async fn put(&mut self, directory: Directory) -> Result<(), super::Error> {
300        let builder = self
301            .builder
302            .as_mut()
303            .ok_or_else(|| Error::DirectoryPutterAlreadyClosed)?;
304
305        builder.try_insert(directory)?;
306
307        Ok(())
308    }
309
310    #[instrument(level = "trace", skip_all, ret, err)]
311    async fn close(&mut self) -> Result<B3Digest, super::Error> {
312        let builder = self
313            .builder
314            .take()
315            .ok_or_else(|| Error::DirectoryPutterAlreadyClosed)?;
316
317        // Retrieve the validated directories.
318        let directory_graph = builder.build()?;
319        let root_digest = directory_graph.root().digest();
320
321        let dir_path = derive_dirs_path(self.base_path, &root_digest);
322
323        match self.object_store.head(&dir_path).await {
324            // directory tree already exists, nothing to do
325            Ok(_) => {
326                trace!("directory tree already exists");
327            }
328
329            // directory tree does not yet exist, compress and upload.
330            Err(object_store::Error::NotFound { .. }) => {
331                trace!("uploading directory tree");
332
333                let object_store_writer =
334                    object_store::buffered::BufWriter::new(self.object_store.clone(), dir_path);
335                let compressed_writer =
336                    async_compression::tokio::write::ZstdEncoder::new(object_store_writer);
337                let mut directories_sink = LengthDelimitedCodec::builder()
338                    .max_frame_length(MAX_FRAME_LENGTH)
339                    .length_field_type::<u32>()
340                    .new_write(compressed_writer);
341
342                // Drain the graph in *Root-To-Leaves*, order, as that's how we write it to storage.
343                for directory in directory_graph.drain_root_to_leaves() {
344                    directories_sink
345                        .send(proto::Directory::from(directory).encode_to_vec().into())
346                        .await?;
347                }
348
349                let mut compressed_writer = directories_sink.into_inner();
350                compressed_writer.shutdown().await?;
351            }
352            // other error
353            Err(err) => Err(Error::ObjectStore(err))?,
354        }
355
356        Ok(root_digest)
357    }
358}