snix_castore/directoryservice/
object_store.rs1use std::collections::HashMap;
2use std::collections::hash_map;
3use std::sync::Arc;
4
5use data_encoding::HEXLOWER;
6use futures::SinkExt;
7use futures::StreamExt;
8use futures::TryStreamExt;
9use futures::stream::BoxStream;
10use object_store::ObjectStoreExt;
11use object_store::{ObjectStore, path::Path};
12use prost::Message;
13use tokio::io::AsyncWriteExt;
14use tokio_util::codec::LengthDelimitedCodec;
15use tonic::async_trait;
16use tracing::{Level, instrument, trace, warn};
17use url::Url;
18
19use super::{Directory, DirectoryPutter, DirectoryService, RootToLeavesValidator};
20use crate::composition::{CompositionContext, ServiceBuilder};
21use crate::directoryservice::directory_graph::DirectoryGraphBuilder;
22use crate::{B3Digest, Node, proto};
23
24#[derive(Clone)]
32pub struct ObjectStoreDirectoryService {
33 instance_name: String,
34 object_store: Arc<dyn ObjectStore>,
35 base_path: Path,
36}
37
38#[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))]
39fn derive_dirs_path(base_path: &Path, digest: &B3Digest) -> Path {
40 base_path
41 .clone()
42 .join("dirs")
43 .join("b3")
44 .join(HEXLOWER.encode(&digest.as_slice()[..2]))
45 .join(HEXLOWER.encode(digest.as_slice()))
46}
47
48fn parse_proto_directory<F>(
51 encoded_directory: &[u8],
52 digest_allowed: F,
53) -> Result<crate::Directory, Error>
54where
55 F: Fn(&B3Digest) -> bool,
56{
57 let actual_digest = B3Digest::from(blake3::hash(encoded_directory).as_bytes());
58 if !digest_allowed(&actual_digest) {
59 return Err(Error::UnexpectedDigest(actual_digest));
60 }
61
62 let directory_proto =
63 proto::Directory::decode(encoded_directory).map_err(Error::ProtobufDecode)?;
64
65 Directory::try_from(directory_proto).map_err(Error::DirectoryValidation)
66}
67
68#[allow(clippy::identity_op)]
69const MAX_FRAME_LENGTH: usize = 1 * 1024 * 1024 * 1000; impl ObjectStoreDirectoryService {
72 pub fn parse_url_opts<I, K, V>(url: &Url, options: I) -> Result<Self, object_store::Error>
78 where
79 I: IntoIterator<Item = (K, V)>,
80 K: AsRef<str>,
81 V: Into<String>,
82 {
83 let (object_store, path) = object_store::parse_url_opts(url, options)?;
84
85 Ok(Self {
86 instance_name: "root".into(),
87 object_store: Arc::new(object_store),
88 base_path: path,
89 })
90 }
91
92 pub fn parse_url(url: &Url) -> Result<Self, object_store::Error> {
94 Self::parse_url_opts(url, Vec::<(String, String)>::new())
95 }
96
97 pub fn new(instance_name: String, object_store: Arc<dyn ObjectStore>, base_path: Path) -> Self {
98 Self {
99 instance_name,
100 object_store,
101 base_path,
102 }
103 }
104}
105
106#[async_trait]
107impl DirectoryService for ObjectStoreDirectoryService {
108 #[instrument(level = "trace", skip_all, fields(directory.digest = %digest, instance_name = %self.instance_name))]
111 async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, super::Error> {
112 self.get_recursive(digest).take(1).next().await.transpose()
113 }
114
115 #[instrument(level = "trace", skip_all, fields(directory.digest = %directory.digest(), instance_name = %self.instance_name))]
116 async fn put(&self, directory: Directory) -> Result<B3Digest, super::Error> {
117 if directory
119 .nodes()
120 .any(|(_, e)| matches!(e, Node::Directory { .. }))
121 {
122 Err(Error::PutForDirectoryWithChildren)?
123 }
124
125 let mut handle = self.put_multiple_start();
126 handle.put(directory).await?;
127 handle.close().await
128 }
129
130 #[instrument(level = "trace", skip_all, fields(directory.digest = %root_directory_digest, instance_name = %self.instance_name))]
131 fn get_recursive(
132 &self,
133 root_directory_digest: &B3Digest,
134 ) -> BoxStream<'_, Result<Directory, super::Error>> {
135 let dir_path = derive_dirs_path(&self.base_path, root_directory_digest);
138 let object_store = &self.object_store;
139 let root_directory_digest = *root_directory_digest;
140
141 async_stream::try_stream! {
142 let bytes_stream = match object_store.get(&dir_path).await {
143 Ok(v) => v.into_stream(),
144 Err(object_store::Error::NotFound { .. }) => {
145 return;
146 }
147 Err(e) => Err(Error::ObjectStore(e))?,
148 };
149
150 let r = tokio_util::io::StreamReader::new(bytes_stream);
152 let decompressed_stream = async_compression::tokio::bufread::ZstdDecoder::new(r);
153
154 let mut encoded_directories = LengthDelimitedCodec::builder()
156 .max_frame_length(MAX_FRAME_LENGTH)
157 .length_field_type::<u32>()
158 .new_read(decompressed_stream)
159 .err_into::<Error>();
160
161 let mut order_validator = RootToLeavesValidator::new_with_root_digest(root_directory_digest);
162 while let Some(encoded_directory) = encoded_directories.try_next().await? {
163 let directory = parse_proto_directory(&encoded_directory, |digest| {
164 order_validator.would_accept(digest)
165 })?;
166
167 order_validator.try_accept(&directory).map_err(Error::DirectoryOrdering)?;
168
169 yield directory;
170 }
171
172 order_validator.finalize().map_err(Error::DirectoryOrdering)?;
173 }.boxed()
174 }
175
176 #[instrument(skip_all)]
177 fn put_multiple_start(&self) -> Box<dyn DirectoryPutter + '_>
178 where
179 Self: Clone,
180 {
181 Box::new(ObjectStoreDirectoryPutter::new(
182 self.object_store.clone(),
183 &self.base_path,
184 ))
185 }
186}
187
188#[derive(thiserror::Error, Debug)]
189enum Error {
190 #[error("wrong arguments: {0}")]
191 WrongConfig(&'static str),
192 #[error("put() may only be used for directories without children")]
193 PutForDirectoryWithChildren,
194
195 #[error("Directory Graph ordering error")]
196 DirectoryOrdering(#[from] crate::directoryservice::OrderingError),
197 #[error("requested directory has unexpected digest {0}")]
198 UnexpectedDigest(B3Digest),
199 #[error("failed to decode protobuf: {0}")]
200 ProtobufDecode(#[from] prost::DecodeError),
201 #[error("failed to validate directory: {0}")]
202 DirectoryValidation(#[from] crate::DirectoryError),
203
204 #[error("DirectoryPutter already closed")]
205 DirectoryPutterAlreadyClosed,
206
207 #[error("ObjectStore error: {0}")]
208 ObjectStore(#[from] object_store::Error),
209
210 #[error("io error: {0}")]
211 IO(#[from] std::io::Error),
212}
213impl From<Error> for super::Error {
214 fn from(value: Error) -> Self {
215 Self(Box::new(value))
216 }
217}
218
219#[derive(serde::Deserialize)]
220#[serde(deny_unknown_fields)]
221pub struct ObjectStoreDirectoryServiceConfig {
222 object_store_url: String,
223 #[serde(default)]
224 object_store_options: HashMap<String, String>,
225}
226
227impl TryFrom<url::Url> for ObjectStoreDirectoryServiceConfig {
228 type Error = Box<dyn std::error::Error + Send + Sync>;
229 fn try_from(url: url::Url) -> Result<Self, Self::Error> {
230 let trimmed_url = {
233 let s = url.to_string();
234 let mut url = Url::parse(s.strip_prefix("objectstore+").ok_or(Error::WrongConfig(
235 "Missing objectstore+ part in URI scheme",
236 ))?)?;
237 url.set_query(None);
239 url
240 };
241 Ok(ObjectStoreDirectoryServiceConfig {
242 object_store_url: trimmed_url.into(),
243 object_store_options: url
244 .query_pairs()
245 .into_iter()
246 .map(|(k, v)| (k.to_string(), v.to_string()))
247 .collect(),
248 })
249 }
250}
251
252#[async_trait]
253impl ServiceBuilder for ObjectStoreDirectoryServiceConfig {
254 type Output = dyn DirectoryService;
255 async fn build<'a>(
256 &'a self,
257 instance_name: &str,
258 _context: &CompositionContext,
259 ) -> Result<Arc<Self::Output>, Box<dyn std::error::Error + Send + Sync>> {
260 let opts = {
261 let mut opts: HashMap<&str, _> = self
262 .object_store_options
263 .iter()
264 .map(|(k, v)| (k.as_str(), v.as_str()))
265 .collect();
266
267 if let hash_map::Entry::Vacant(e) =
268 opts.entry(object_store::ClientConfigKey::UserAgent.as_ref())
269 {
270 e.insert(crate::USER_AGENT);
271 }
272
273 opts
274 };
275
276 let (object_store, path) =
277 object_store::parse_url_opts(&self.object_store_url.parse()?, opts)?;
278 Ok(Arc::new(ObjectStoreDirectoryService::new(
279 instance_name.to_string(),
280 Arc::new(object_store),
281 path,
282 )))
283 }
284}
285
286struct ObjectStoreDirectoryPutter<'a> {
287 object_store: Arc<dyn ObjectStore>,
288 base_path: &'a Path,
289
290 builder: Option<DirectoryGraphBuilder>,
291}
292
293impl<'a> ObjectStoreDirectoryPutter<'a> {
294 fn new(object_store: Arc<dyn ObjectStore>, base_path: &'a Path) -> Self {
295 Self {
296 object_store,
297 base_path,
298 builder: Some(DirectoryGraphBuilder::new_leaves_to_root()),
299 }
300 }
301}
302
303#[async_trait]
304impl DirectoryPutter for ObjectStoreDirectoryPutter<'_> {
305 #[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()), err)]
306 async fn put(&mut self, directory: Directory) -> Result<(), super::Error> {
307 let builder = self
308 .builder
309 .as_mut()
310 .ok_or_else(|| Error::DirectoryPutterAlreadyClosed)?;
311
312 builder
313 .try_insert(directory)
314 .map_err(Error::DirectoryOrdering)?;
315
316 Ok(())
317 }
318
319 #[instrument(level = "trace", skip_all, ret, err)]
320 async fn close(&mut self) -> Result<B3Digest, super::Error> {
321 let builder = self
322 .builder
323 .take()
324 .ok_or_else(|| Error::DirectoryPutterAlreadyClosed)?;
325
326 let directory_graph = builder.build().map_err(Error::DirectoryOrdering)?;
328 let root_digest = directory_graph.root().digest();
329
330 let dir_path = derive_dirs_path(self.base_path, &root_digest);
331
332 match self.object_store.head(&dir_path).await {
333 Ok(_) => {
335 trace!("directory tree already exists");
336 }
337
338 Err(object_store::Error::NotFound { .. }) => {
340 trace!("uploading directory tree");
341
342 let object_store_writer =
343 object_store::buffered::BufWriter::new(self.object_store.clone(), dir_path);
344 let compressed_writer =
345 async_compression::tokio::write::ZstdEncoder::new(object_store_writer);
346 let mut directories_sink = LengthDelimitedCodec::builder()
347 .max_frame_length(MAX_FRAME_LENGTH)
348 .length_field_type::<u32>()
349 .new_write(compressed_writer);
350
351 for directory in directory_graph.drain_root_to_leaves() {
353 directories_sink
354 .send(proto::Directory::from(directory).encode_to_vec().into())
355 .await
356 .map_err(Error::IO)?;
357 }
358
359 let mut compressed_writer = directories_sink.into_inner();
360 compressed_writer.shutdown().await.map_err(Error::IO)?;
361 }
362 Err(err) => Err(Error::ObjectStore(err))?,
364 }
365
366 Ok(root_digest)
367 }
368}