snix_castore/directoryservice/
object_store.rs

1use std::collections::HashMap;
2use std::collections::hash_map;
3use std::sync::Arc;
4
5use async_stream::try_stream;
6use data_encoding::HEXLOWER;
7use futures::SinkExt;
8use futures::StreamExt;
9use futures::TryFutureExt;
10use futures::TryStreamExt;
11use futures::future::Either;
12use futures::stream::BoxStream;
13use object_store::{ObjectStore, path::Path};
14use prost::Message;
15use tokio::io::AsyncWriteExt;
16use tokio_util::codec::LengthDelimitedCodec;
17use tonic::async_trait;
18use tracing::{Level, instrument, trace, warn};
19use url::Url;
20
21use super::{Directory, DirectoryPutter, DirectoryService, RootToLeavesValidator};
22use crate::composition::{CompositionContext, ServiceBuilder};
23use crate::directoryservice::directory_graph::DirectoryGraphBuilder;
24use crate::directoryservice::directory_graph::DirectoryOrder;
25use crate::{B3Digest, Error, Node, proto};
26
27/// Stores directory closures in an object store.
28/// Notably, this makes use of the option to disallow accessing child directories except when
29/// fetching them recursively via the top-level directory, since all batched writes
30/// (using `put_multiple_start`) are stored in a single object.
31/// Directories are stored in a length-delimited format with a 1MiB limit. The length field is a
32/// u32 and the directories are stored in root-to-leaves topological order, the same way they will
33/// be returned to the client in get_recursive.
34#[derive(Clone)]
35pub struct ObjectStoreDirectoryService {
36    instance_name: String,
37    object_store: Arc<dyn ObjectStore>,
38    base_path: Path,
39}
40
41#[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))]
42fn derive_dirs_path(base_path: &Path, digest: &B3Digest) -> Path {
43    base_path
44        .child("dirs")
45        .child("b3")
46        .child(HEXLOWER.encode(&digest.as_slice()[..2]))
47        .child(HEXLOWER.encode(digest.as_slice()))
48}
49
50/// Helper function, parsing protobuf-encoded Directories into [crate::Directory],
51/// if the digest is allowed.
52fn parse_proto_directory<F>(
53    encoded_directory: &[u8],
54    digest_allowed: F,
55) -> Result<crate::Directory, Error>
56where
57    F: Fn(&B3Digest) -> bool,
58{
59    let actual_digest = B3Digest::from(blake3::hash(encoded_directory).as_bytes());
60    if !digest_allowed(&actual_digest) {
61        return Err(crate::Error::StorageError(
62            "unexpected directory digest".to_string(),
63        ));
64    }
65
66    let directory_proto = proto::Directory::decode(encoded_directory).map_err(|e| {
67        warn!("unable to parse directory {}: {}", actual_digest, e);
68        Error::StorageError(e.to_string())
69    })?;
70
71    Directory::try_from(directory_proto).map_err(|e| {
72        warn!("unable to convert directory {}: {}", actual_digest, e);
73        Error::StorageError(e.to_string())
74    })
75}
76
77#[allow(clippy::identity_op)]
78const MAX_FRAME_LENGTH: usize = 1 * 1024 * 1024 * 1000; // 1 MiB
79//
80impl ObjectStoreDirectoryService {
81    /// Constructs a new [ObjectStoreDirectoryService] from a [Url] supported by
82    /// [object_store].
83    /// Any path suffix becomes the base path of the object store.
84    /// additional options, the same as in [object_store::parse_url_opts] can
85    /// be passed.
86    pub fn parse_url_opts<I, K, V>(url: &Url, options: I) -> Result<Self, object_store::Error>
87    where
88        I: IntoIterator<Item = (K, V)>,
89        K: AsRef<str>,
90        V: Into<String>,
91    {
92        let (object_store, path) = object_store::parse_url_opts(url, options)?;
93
94        Ok(Self {
95            instance_name: "root".into(),
96            object_store: Arc::new(object_store),
97            base_path: path,
98        })
99    }
100
101    /// Like [Self::parse_url_opts], except without the options.
102    pub fn parse_url(url: &Url) -> Result<Self, object_store::Error> {
103        Self::parse_url_opts(url, Vec::<(String, String)>::new())
104    }
105
106    pub fn new(instance_name: String, object_store: Arc<dyn ObjectStore>, base_path: Path) -> Self {
107        Self {
108            instance_name,
109            object_store,
110            base_path,
111        }
112    }
113}
114
115#[async_trait]
116impl DirectoryService for ObjectStoreDirectoryService {
117    /// This is the same steps as for get_recursive anyways, so we just call get_recursive and
118    /// return the first element of the stream and drop the request.
119    #[instrument(level = "trace", skip_all, fields(directory.digest = %digest, instance_name = %self.instance_name))]
120    async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, Error> {
121        self.get_recursive(digest).take(1).next().await.transpose()
122    }
123
124    #[instrument(level = "trace", skip_all, fields(directory.digest = %directory.digest(), instance_name = %self.instance_name))]
125    async fn put(&self, directory: Directory) -> Result<B3Digest, Error> {
126        // Ensure the directory doesn't contain other directory children
127        if directory
128            .nodes()
129            .any(|(_, e)| matches!(e, Node::Directory { .. }))
130        {
131            return Err(Error::InvalidRequest(
132                    "only put_multiple_start is supported by the ObjectStoreDirectoryService for directories with children".into(),
133            ));
134        }
135
136        let mut handle = self.put_multiple_start();
137        handle.put(directory).await?;
138        handle.close().await
139    }
140
141    #[instrument(level = "trace", skip_all, fields(directory.digest = %root_directory_digest, instance_name = %self.instance_name))]
142    fn get_recursive(
143        &self,
144        root_directory_digest: &B3Digest,
145    ) -> BoxStream<'static, Result<Directory, Error>> {
146        // Check that we are not passing on bogus from the object store to the client, and that the
147        // trust chain from the root digest to the leaves is intact.
148        let dir_path = derive_dirs_path(&self.base_path, root_directory_digest);
149        let object_store = self.object_store.clone();
150        let root_directory_digest = root_directory_digest.to_owned();
151
152        Box::pin(
153            (async move {
154                let stream = match object_store.get(&dir_path).await {
155                    Ok(v) => v.into_stream(),
156                    Err(object_store::Error::NotFound { .. }) => {
157                        return Ok(Either::Left(futures::stream::empty()));
158                    }
159                    Err(e) => return Err(std::io::Error::from(e).into()),
160                };
161
162                // get a reader of the response body.
163                let r = tokio_util::io::StreamReader::new(stream);
164                let decompressed_stream = async_compression::tokio::bufread::ZstdDecoder::new(r);
165
166                // the subdirectories are stored in a length delimited format
167                let mut encoded_directories = LengthDelimitedCodec::builder()
168                    .max_frame_length(MAX_FRAME_LENGTH)
169                    .length_field_type::<u32>()
170                    .new_read(decompressed_stream)
171                    .err_into::<Error>();
172
173                Ok(Either::Right(try_stream! {
174                    let mut order_validator = if let Some(encoded_directory) = encoded_directories.try_next().await? {
175                        let directory = parse_proto_directory(&encoded_directory, |digest| {
176                            digest == &root_directory_digest
177                        })?;
178
179                        let order_validator = RootToLeavesValidator::new_with_root(&directory);
180                        yield directory;
181                        order_validator
182                    } else {
183                        // no elements in stream
184                        Err(Error::StorageError("no directories stored".to_string()))?
185                    };
186
187                    while let Some(encoded_directory) = encoded_directories.try_next().await? {
188                        let directory = parse_proto_directory(&encoded_directory, |digest| {
189                            order_validator.would_accept(digest)
190                        })?;
191
192                        order_validator.try_accept(&directory).map_err(|e| Error::StorageError(e.to_string()))?;
193
194                        yield directory;
195                    }
196
197                    order_validator.finalize().map_err(|e| Error::StorageError(e.to_string()))?;
198                }))
199            })
200            .try_flatten_stream(),
201        )
202    }
203
204    #[instrument(skip_all)]
205    fn put_multiple_start(&self) -> Box<dyn DirectoryPutter + '_>
206    where
207        Self: Clone,
208    {
209        Box::new(ObjectStoreDirectoryPutter::new(
210            self.object_store.clone(),
211            &self.base_path,
212        ))
213    }
214}
215
216#[derive(serde::Deserialize)]
217#[serde(deny_unknown_fields)]
218pub struct ObjectStoreDirectoryServiceConfig {
219    object_store_url: String,
220    #[serde(default)]
221    object_store_options: HashMap<String, String>,
222}
223
224impl TryFrom<url::Url> for ObjectStoreDirectoryServiceConfig {
225    type Error = Box<dyn std::error::Error + Send + Sync>;
226    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
227        // We need to convert the URL to string, strip the prefix there, and then
228        // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
229        let trimmed_url = {
230            let s = url.to_string();
231            let mut url = Url::parse(
232                s.strip_prefix("objectstore+")
233                    .ok_or(Error::StorageError("Missing objectstore uri".into()))?,
234            )?;
235            // trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
236            url.set_query(None);
237            url
238        };
239        Ok(ObjectStoreDirectoryServiceConfig {
240            object_store_url: trimmed_url.into(),
241            object_store_options: url
242                .query_pairs()
243                .into_iter()
244                .map(|(k, v)| (k.to_string(), v.to_string()))
245                .collect(),
246        })
247    }
248}
249
250#[async_trait]
251impl ServiceBuilder for ObjectStoreDirectoryServiceConfig {
252    type Output = dyn DirectoryService;
253    async fn build<'a>(
254        &'a self,
255        instance_name: &str,
256        _context: &CompositionContext,
257    ) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
258        let opts = {
259            let mut opts: HashMap<&str, _> = self
260                .object_store_options
261                .iter()
262                .map(|(k, v)| (k.as_str(), v.as_str()))
263                .collect();
264
265            if let hash_map::Entry::Vacant(e) =
266                opts.entry(object_store::ClientConfigKey::UserAgent.as_ref())
267            {
268                e.insert(crate::USER_AGENT);
269            }
270
271            opts
272        };
273
274        let (object_store, path) =
275            object_store::parse_url_opts(&self.object_store_url.parse()?, opts)?;
276        Ok(Arc::new(ObjectStoreDirectoryService::new(
277            instance_name.to_string(),
278            Arc::new(object_store),
279            path,
280        )))
281    }
282}
283
284struct ObjectStoreDirectoryPutter<'a> {
285    object_store: Arc<dyn ObjectStore>,
286    base_path: &'a Path,
287
288    builder: Option<DirectoryGraphBuilder>,
289}
290
291impl<'a> ObjectStoreDirectoryPutter<'a> {
292    fn new(object_store: Arc<dyn ObjectStore>, base_path: &'a Path) -> Self {
293        Self {
294            object_store,
295            base_path,
296            builder: Some(DirectoryGraphBuilder::new_with_insertion_order(
297                DirectoryOrder::LeavesToRoot,
298            )),
299        }
300    }
301}
302
303#[async_trait]
304impl DirectoryPutter for ObjectStoreDirectoryPutter<'_> {
305    #[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()), err)]
306    async fn put(&mut self, directory: Directory) -> Result<(), Error> {
307        let builder = self
308            .builder
309            .as_mut()
310            .ok_or_else(|| Error::StorageError("already closed".to_string()))?;
311
312        builder.insert(directory)?;
313
314        Ok(())
315    }
316
317    #[instrument(level = "trace", skip_all, ret, err)]
318    async fn close(&mut self) -> Result<B3Digest, Error> {
319        let builder = self
320            .builder
321            .take()
322            .ok_or_else(|| Error::StorageError("already closed".to_string()))?;
323
324        // Retrieve the validated directories.
325        let directory_graph = builder.build()?;
326        let root_digest = directory_graph.root().digest();
327
328        let dir_path = derive_dirs_path(self.base_path, &root_digest);
329
330        match self.object_store.head(&dir_path).await {
331            // directory tree already exists, nothing to do
332            Ok(_) => {
333                trace!("directory tree already exists");
334            }
335
336            // directory tree does not yet exist, compress and upload.
337            Err(object_store::Error::NotFound { .. }) => {
338                trace!("uploading directory tree");
339
340                let object_store_writer =
341                    object_store::buffered::BufWriter::new(self.object_store.clone(), dir_path);
342                let compressed_writer =
343                    async_compression::tokio::write::ZstdEncoder::new(object_store_writer);
344                let mut directories_sink = LengthDelimitedCodec::builder()
345                    .max_frame_length(MAX_FRAME_LENGTH)
346                    .length_field_type::<u32>()
347                    .new_write(compressed_writer);
348
349                for directory in directory_graph.drain(DirectoryOrder::RootToLeaves) {
350                    directories_sink
351                        .send(proto::Directory::from(directory).encode_to_vec().into())
352                        .await?;
353                }
354
355                let mut compressed_writer = directories_sink.into_inner();
356                compressed_writer.shutdown().await?;
357            }
358            // other error
359            Err(err) => Err(std::io::Error::from(err))?,
360        }
361
362        Ok(root_digest)
363    }
364}