snix_castore/directoryservice/
object_store.rs

1use std::collections::HashMap;
2use std::collections::hash_map;
3use std::sync::Arc;
4
5use data_encoding::HEXLOWER;
6use futures::SinkExt;
7use futures::StreamExt;
8use futures::TryFutureExt;
9use futures::TryStreamExt;
10use futures::future::Either;
11use futures::stream::BoxStream;
12use object_store::{ObjectStore, path::Path};
13use prost::Message;
14use tokio::io::AsyncWriteExt;
15use tokio_util::codec::LengthDelimitedCodec;
16use tonic::async_trait;
17use tracing::{Level, instrument, trace, warn};
18use url::Url;
19
20use super::{
21    Directory, DirectoryGraph, DirectoryPutter, DirectoryService, LeavesToRootValidator,
22    RootToLeavesValidator,
23};
24use crate::composition::{CompositionContext, ServiceBuilder};
25use crate::{B3Digest, Error, Node, proto};
26
27/// Stores directory closures in an object store.
28/// Notably, this makes use of the option to disallow accessing child directories except when
29/// fetching them recursively via the top-level directory, since all batched writes
30/// (using `put_multiple_start`) are stored in a single object.
31/// Directories are stored in a length-delimited format with a 1MiB limit. The length field is a
32/// u32 and the directories are stored in root-to-leaves topological order, the same way they will
33/// be returned to the client in get_recursive.
34#[derive(Clone)]
35pub struct ObjectStoreDirectoryService {
36    instance_name: String,
37    object_store: Arc<dyn ObjectStore>,
38    base_path: Path,
39}
40
41#[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))]
42fn derive_dirs_path(base_path: &Path, digest: &B3Digest) -> Path {
43    base_path
44        .child("dirs")
45        .child("b3")
46        .child(HEXLOWER.encode(&digest.as_slice()[..2]))
47        .child(HEXLOWER.encode(digest.as_slice()))
48}
49
50#[allow(clippy::identity_op)]
51const MAX_FRAME_LENGTH: usize = 1 * 1024 * 1024 * 1000; // 1 MiB
52//
53impl ObjectStoreDirectoryService {
54    /// Constructs a new [ObjectStoreDirectoryService] from a [Url] supported by
55    /// [object_store].
56    /// Any path suffix becomes the base path of the object store.
57    /// additional options, the same as in [object_store::parse_url_opts] can
58    /// be passed.
59    pub fn parse_url_opts<I, K, V>(url: &Url, options: I) -> Result<Self, object_store::Error>
60    where
61        I: IntoIterator<Item = (K, V)>,
62        K: AsRef<str>,
63        V: Into<String>,
64    {
65        let (object_store, path) = object_store::parse_url_opts(url, options)?;
66
67        Ok(Self {
68            instance_name: "root".into(),
69            object_store: Arc::new(object_store),
70            base_path: path,
71        })
72    }
73
74    /// Like [Self::parse_url_opts], except without the options.
75    pub fn parse_url(url: &Url) -> Result<Self, object_store::Error> {
76        Self::parse_url_opts(url, Vec::<(String, String)>::new())
77    }
78
79    pub fn new(instance_name: String, object_store: Arc<dyn ObjectStore>, base_path: Path) -> Self {
80        Self {
81            instance_name,
82            object_store,
83            base_path,
84        }
85    }
86}
87
88#[async_trait]
89impl DirectoryService for ObjectStoreDirectoryService {
90    /// This is the same steps as for get_recursive anyways, so we just call get_recursive and
91    /// return the first element of the stream and drop the request.
92    #[instrument(level = "trace", skip_all, fields(directory.digest = %digest, instance_name = %self.instance_name))]
93    async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, Error> {
94        self.get_recursive(digest).take(1).next().await.transpose()
95    }
96
97    #[instrument(level = "trace", skip_all, fields(directory.digest = %directory.digest(), instance_name = %self.instance_name))]
98    async fn put(&self, directory: Directory) -> Result<B3Digest, Error> {
99        // Ensure the directory doesn't contain other directory children
100        if directory
101            .nodes()
102            .any(|(_, e)| matches!(e, Node::Directory { .. }))
103        {
104            return Err(Error::InvalidRequest(
105                    "only put_multiple_start is supported by the ObjectStoreDirectoryService for directories with children".into(),
106            ));
107        }
108
109        let mut handle = self.put_multiple_start();
110        handle.put(directory).await?;
111        handle.close().await
112    }
113
114    #[instrument(level = "trace", skip_all, fields(directory.digest = %root_directory_digest, instance_name = %self.instance_name))]
115    fn get_recursive(
116        &self,
117        root_directory_digest: &B3Digest,
118    ) -> BoxStream<'static, Result<Directory, Error>> {
119        // Check that we are not passing on bogus from the object store to the client, and that the
120        // trust chain from the root digest to the leaves is intact
121        let mut order_validator =
122            RootToLeavesValidator::new_with_root_digest(root_directory_digest.clone());
123
124        let dir_path = derive_dirs_path(&self.base_path, root_directory_digest);
125        let object_store = self.object_store.clone();
126
127        Box::pin(
128            (async move {
129                let stream = match object_store.get(&dir_path).await {
130                    Ok(v) => v.into_stream(),
131                    Err(object_store::Error::NotFound { .. }) => {
132                        return Ok(Either::Left(futures::stream::empty()));
133                    }
134                    Err(e) => return Err(std::io::Error::from(e).into()),
135                };
136
137                // get a reader of the response body.
138                let r = tokio_util::io::StreamReader::new(stream);
139                let decompressed_stream = async_compression::tokio::bufread::ZstdDecoder::new(r);
140
141                // the subdirectories are stored in a length delimited format
142                let delimited_stream = LengthDelimitedCodec::builder()
143                    .max_frame_length(MAX_FRAME_LENGTH)
144                    .length_field_type::<u32>()
145                    .new_read(decompressed_stream);
146
147                let dirs_stream = delimited_stream.map_err(Error::from).and_then(move |buf| {
148                    futures::future::ready((|| {
149                        let mut hasher = blake3::Hasher::new();
150                        let digest: B3Digest = hasher.update(&buf).finalize().as_bytes().into();
151
152                        // Ensure to only decode the directory objects whose digests we trust
153                        if !order_validator.digest_allowed(&digest) {
154                            return Err(crate::Error::StorageError(format!(
155                                "received unexpected directory {}",
156                                digest
157                            )));
158                        }
159
160                        let directory = proto::Directory::decode(&*buf).map_err(|e| {
161                            warn!("unable to parse directory {}: {}", digest, e);
162                            Error::StorageError(e.to_string())
163                        })?;
164                        let directory = Directory::try_from(directory).map_err(|e| {
165                            warn!("unable to convert directory {}: {}", digest, e);
166                            Error::StorageError(e.to_string())
167                        })?;
168
169                        // Allow the children to appear next
170                        order_validator.add_directory_unchecked(&directory);
171
172                        Ok(directory)
173                    })())
174                });
175
176                Ok(Either::Right(dirs_stream))
177            })
178            .try_flatten_stream(),
179        )
180    }
181
182    #[instrument(skip_all)]
183    fn put_multiple_start(&self) -> Box<(dyn DirectoryPutter + '_)>
184    where
185        Self: Clone,
186    {
187        Box::new(ObjectStoreDirectoryPutter::new(
188            self.object_store.clone(),
189            &self.base_path,
190        ))
191    }
192}
193
194#[derive(serde::Deserialize)]
195#[serde(deny_unknown_fields)]
196pub struct ObjectStoreDirectoryServiceConfig {
197    object_store_url: String,
198    #[serde(default)]
199    object_store_options: HashMap<String, String>,
200}
201
202impl TryFrom<url::Url> for ObjectStoreDirectoryServiceConfig {
203    type Error = Box<dyn std::error::Error + Send + Sync>;
204    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
205        // We need to convert the URL to string, strip the prefix there, and then
206        // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
207        let trimmed_url = {
208            let s = url.to_string();
209            let mut url = Url::parse(
210                s.strip_prefix("objectstore+")
211                    .ok_or(Error::StorageError("Missing objectstore uri".into()))?,
212            )?;
213            // trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
214            url.set_query(None);
215            url
216        };
217        Ok(ObjectStoreDirectoryServiceConfig {
218            object_store_url: trimmed_url.into(),
219            object_store_options: url
220                .query_pairs()
221                .into_iter()
222                .map(|(k, v)| (k.to_string(), v.to_string()))
223                .collect(),
224        })
225    }
226}
227
228#[async_trait]
229impl ServiceBuilder for ObjectStoreDirectoryServiceConfig {
230    type Output = dyn DirectoryService;
231    async fn build<'a>(
232        &'a self,
233        instance_name: &str,
234        _context: &CompositionContext,
235    ) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
236        let opts = {
237            let mut opts: HashMap<&str, _> = self
238                .object_store_options
239                .iter()
240                .map(|(k, v)| (k.as_str(), v.as_str()))
241                .collect();
242
243            if let hash_map::Entry::Vacant(e) =
244                opts.entry(object_store::ClientConfigKey::UserAgent.as_ref())
245            {
246                e.insert(crate::USER_AGENT);
247            }
248
249            opts
250        };
251
252        let (object_store, path) =
253            object_store::parse_url_opts(&self.object_store_url.parse()?, opts)?;
254        Ok(Arc::new(ObjectStoreDirectoryService::new(
255            instance_name.to_string(),
256            Arc::new(object_store),
257            path,
258        )))
259    }
260}
261
262struct ObjectStoreDirectoryPutter<'a> {
263    object_store: Arc<dyn ObjectStore>,
264    base_path: &'a Path,
265
266    directory_validator: Option<DirectoryGraph<LeavesToRootValidator>>,
267}
268
269impl<'a> ObjectStoreDirectoryPutter<'a> {
270    fn new(object_store: Arc<dyn ObjectStore>, base_path: &'a Path) -> Self {
271        Self {
272            object_store,
273            base_path,
274            directory_validator: Some(Default::default()),
275        }
276    }
277}
278
279#[async_trait]
280impl DirectoryPutter for ObjectStoreDirectoryPutter<'_> {
281    #[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()), err)]
282    async fn put(&mut self, directory: Directory) -> Result<(), Error> {
283        match self.directory_validator {
284            None => return Err(Error::StorageError("already closed".to_string())),
285            Some(ref mut validator) => {
286                validator
287                    .add(directory)
288                    .map_err(|e| Error::StorageError(e.to_string()))?;
289            }
290        }
291
292        Ok(())
293    }
294
295    #[instrument(level = "trace", skip_all, ret, err)]
296    async fn close(&mut self) -> Result<B3Digest, Error> {
297        let validator = match self.directory_validator.take() {
298            None => return Err(Error::InvalidRequest("already closed".to_string())),
299            Some(validator) => validator,
300        };
301
302        // retrieve the validated directories.
303        // It is important that they are in topological order (root first),
304        // since that's how we want to retrieve them from the object store in the end.
305        let directories = validator
306            .validate()
307            .map_err(|e| Error::StorageError(e.to_string()))?
308            .drain_root_to_leaves()
309            .collect::<Vec<_>>();
310
311        // Get the root digest
312        let root_digest = directories
313            .first()
314            .ok_or_else(|| Error::InvalidRequest("got no directories".to_string()))?
315            .digest();
316
317        let dir_path = derive_dirs_path(self.base_path, &root_digest);
318
319        match self.object_store.head(&dir_path).await {
320            // directory tree already exists, nothing to do
321            Ok(_) => {
322                trace!("directory tree already exists");
323            }
324
325            // directory tree does not yet exist, compress and upload.
326            Err(object_store::Error::NotFound { .. }) => {
327                trace!("uploading directory tree");
328
329                let object_store_writer =
330                    object_store::buffered::BufWriter::new(self.object_store.clone(), dir_path);
331                let compressed_writer =
332                    async_compression::tokio::write::ZstdEncoder::new(object_store_writer);
333                let mut directories_sink = LengthDelimitedCodec::builder()
334                    .max_frame_length(MAX_FRAME_LENGTH)
335                    .length_field_type::<u32>()
336                    .new_write(compressed_writer);
337
338                for directory in directories {
339                    directories_sink
340                        .send(proto::Directory::from(directory).encode_to_vec().into())
341                        .await?;
342                }
343
344                let mut compressed_writer = directories_sink.into_inner();
345                compressed_writer.shutdown().await?;
346            }
347            // other error
348            Err(err) => Err(std::io::Error::from(err))?,
349        }
350
351        Ok(root_digest)
352    }
353}