snix_castore/directoryservice/
object_store.rs1use std::collections::HashMap;
2use std::collections::hash_map;
3use std::sync::Arc;
4
5use data_encoding::HEXLOWER;
6use futures::SinkExt;
7use futures::StreamExt;
8use futures::TryFutureExt;
9use futures::TryStreamExt;
10use futures::future::Either;
11use futures::stream::BoxStream;
12use object_store::{ObjectStore, path::Path};
13use prost::Message;
14use tokio::io::AsyncWriteExt;
15use tokio_util::codec::LengthDelimitedCodec;
16use tonic::async_trait;
17use tracing::{Level, instrument, trace, warn};
18use url::Url;
19
20use super::{
21 Directory, DirectoryGraph, DirectoryPutter, DirectoryService, LeavesToRootValidator,
22 RootToLeavesValidator,
23};
24use crate::composition::{CompositionContext, ServiceBuilder};
25use crate::{B3Digest, Error, Node, proto};
26
27#[derive(Clone)]
35pub struct ObjectStoreDirectoryService {
36 instance_name: String,
37 object_store: Arc<dyn ObjectStore>,
38 base_path: Path,
39}
40
41#[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))]
42fn derive_dirs_path(base_path: &Path, digest: &B3Digest) -> Path {
43 base_path
44 .child("dirs")
45 .child("b3")
46 .child(HEXLOWER.encode(&digest.as_slice()[..2]))
47 .child(HEXLOWER.encode(digest.as_slice()))
48}
49
50#[allow(clippy::identity_op)]
51const MAX_FRAME_LENGTH: usize = 1 * 1024 * 1024 * 1000; impl ObjectStoreDirectoryService {
54 pub fn parse_url_opts<I, K, V>(url: &Url, options: I) -> Result<Self, object_store::Error>
60 where
61 I: IntoIterator<Item = (K, V)>,
62 K: AsRef<str>,
63 V: Into<String>,
64 {
65 let (object_store, path) = object_store::parse_url_opts(url, options)?;
66
67 Ok(Self {
68 instance_name: "root".into(),
69 object_store: Arc::new(object_store),
70 base_path: path,
71 })
72 }
73
74 pub fn parse_url(url: &Url) -> Result<Self, object_store::Error> {
76 Self::parse_url_opts(url, Vec::<(String, String)>::new())
77 }
78
79 pub fn new(instance_name: String, object_store: Arc<dyn ObjectStore>, base_path: Path) -> Self {
80 Self {
81 instance_name,
82 object_store,
83 base_path,
84 }
85 }
86}
87
88#[async_trait]
89impl DirectoryService for ObjectStoreDirectoryService {
90 #[instrument(level = "trace", skip_all, fields(directory.digest = %digest, instance_name = %self.instance_name))]
93 async fn get(&self, digest: &B3Digest) -> Result<Option<Directory>, Error> {
94 self.get_recursive(digest).take(1).next().await.transpose()
95 }
96
97 #[instrument(level = "trace", skip_all, fields(directory.digest = %directory.digest(), instance_name = %self.instance_name))]
98 async fn put(&self, directory: Directory) -> Result<B3Digest, Error> {
99 if directory
101 .nodes()
102 .any(|(_, e)| matches!(e, Node::Directory { .. }))
103 {
104 return Err(Error::InvalidRequest(
105 "only put_multiple_start is supported by the ObjectStoreDirectoryService for directories with children".into(),
106 ));
107 }
108
109 let mut handle = self.put_multiple_start();
110 handle.put(directory).await?;
111 handle.close().await
112 }
113
114 #[instrument(level = "trace", skip_all, fields(directory.digest = %root_directory_digest, instance_name = %self.instance_name))]
115 fn get_recursive(
116 &self,
117 root_directory_digest: &B3Digest,
118 ) -> BoxStream<'static, Result<Directory, Error>> {
119 let mut order_validator =
122 RootToLeavesValidator::new_with_root_digest(root_directory_digest.clone());
123
124 let dir_path = derive_dirs_path(&self.base_path, root_directory_digest);
125 let object_store = self.object_store.clone();
126
127 Box::pin(
128 (async move {
129 let stream = match object_store.get(&dir_path).await {
130 Ok(v) => v.into_stream(),
131 Err(object_store::Error::NotFound { .. }) => {
132 return Ok(Either::Left(futures::stream::empty()));
133 }
134 Err(e) => return Err(std::io::Error::from(e).into()),
135 };
136
137 let r = tokio_util::io::StreamReader::new(stream);
139 let decompressed_stream = async_compression::tokio::bufread::ZstdDecoder::new(r);
140
141 let delimited_stream = LengthDelimitedCodec::builder()
143 .max_frame_length(MAX_FRAME_LENGTH)
144 .length_field_type::<u32>()
145 .new_read(decompressed_stream);
146
147 let dirs_stream = delimited_stream.map_err(Error::from).and_then(move |buf| {
148 futures::future::ready((|| {
149 let mut hasher = blake3::Hasher::new();
150 let digest: B3Digest = hasher.update(&buf).finalize().as_bytes().into();
151
152 if !order_validator.digest_allowed(&digest) {
154 return Err(crate::Error::StorageError(format!(
155 "received unexpected directory {}",
156 digest
157 )));
158 }
159
160 let directory = proto::Directory::decode(&*buf).map_err(|e| {
161 warn!("unable to parse directory {}: {}", digest, e);
162 Error::StorageError(e.to_string())
163 })?;
164 let directory = Directory::try_from(directory).map_err(|e| {
165 warn!("unable to convert directory {}: {}", digest, e);
166 Error::StorageError(e.to_string())
167 })?;
168
169 order_validator.add_directory_unchecked(&directory);
171
172 Ok(directory)
173 })())
174 });
175
176 Ok(Either::Right(dirs_stream))
177 })
178 .try_flatten_stream(),
179 )
180 }
181
182 #[instrument(skip_all)]
183 fn put_multiple_start(&self) -> Box<(dyn DirectoryPutter + '_)>
184 where
185 Self: Clone,
186 {
187 Box::new(ObjectStoreDirectoryPutter::new(
188 self.object_store.clone(),
189 &self.base_path,
190 ))
191 }
192}
193
194#[derive(serde::Deserialize)]
195#[serde(deny_unknown_fields)]
196pub struct ObjectStoreDirectoryServiceConfig {
197 object_store_url: String,
198 #[serde(default)]
199 object_store_options: HashMap<String, String>,
200}
201
202impl TryFrom<url::Url> for ObjectStoreDirectoryServiceConfig {
203 type Error = Box<dyn std::error::Error + Send + Sync>;
204 fn try_from(url: url::Url) -> Result<Self, Self::Error> {
205 let trimmed_url = {
208 let s = url.to_string();
209 let mut url = Url::parse(
210 s.strip_prefix("objectstore+")
211 .ok_or(Error::StorageError("Missing objectstore uri".into()))?,
212 )?;
213 url.set_query(None);
215 url
216 };
217 Ok(ObjectStoreDirectoryServiceConfig {
218 object_store_url: trimmed_url.into(),
219 object_store_options: url
220 .query_pairs()
221 .into_iter()
222 .map(|(k, v)| (k.to_string(), v.to_string()))
223 .collect(),
224 })
225 }
226}
227
228#[async_trait]
229impl ServiceBuilder for ObjectStoreDirectoryServiceConfig {
230 type Output = dyn DirectoryService;
231 async fn build<'a>(
232 &'a self,
233 instance_name: &str,
234 _context: &CompositionContext,
235 ) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync + 'static>> {
236 let opts = {
237 let mut opts: HashMap<&str, _> = self
238 .object_store_options
239 .iter()
240 .map(|(k, v)| (k.as_str(), v.as_str()))
241 .collect();
242
243 if let hash_map::Entry::Vacant(e) =
244 opts.entry(object_store::ClientConfigKey::UserAgent.as_ref())
245 {
246 e.insert(crate::USER_AGENT);
247 }
248
249 opts
250 };
251
252 let (object_store, path) =
253 object_store::parse_url_opts(&self.object_store_url.parse()?, opts)?;
254 Ok(Arc::new(ObjectStoreDirectoryService::new(
255 instance_name.to_string(),
256 Arc::new(object_store),
257 path,
258 )))
259 }
260}
261
262struct ObjectStoreDirectoryPutter<'a> {
263 object_store: Arc<dyn ObjectStore>,
264 base_path: &'a Path,
265
266 directory_validator: Option<DirectoryGraph<LeavesToRootValidator>>,
267}
268
269impl<'a> ObjectStoreDirectoryPutter<'a> {
270 fn new(object_store: Arc<dyn ObjectStore>, base_path: &'a Path) -> Self {
271 Self {
272 object_store,
273 base_path,
274 directory_validator: Some(Default::default()),
275 }
276 }
277}
278
279#[async_trait]
280impl DirectoryPutter for ObjectStoreDirectoryPutter<'_> {
281 #[instrument(level = "trace", skip_all, fields(directory.digest=%directory.digest()), err)]
282 async fn put(&mut self, directory: Directory) -> Result<(), Error> {
283 match self.directory_validator {
284 None => return Err(Error::StorageError("already closed".to_string())),
285 Some(ref mut validator) => {
286 validator
287 .add(directory)
288 .map_err(|e| Error::StorageError(e.to_string()))?;
289 }
290 }
291
292 Ok(())
293 }
294
295 #[instrument(level = "trace", skip_all, ret, err)]
296 async fn close(&mut self) -> Result<B3Digest, Error> {
297 let validator = match self.directory_validator.take() {
298 None => return Err(Error::InvalidRequest("already closed".to_string())),
299 Some(validator) => validator,
300 };
301
302 let directories = validator
306 .validate()
307 .map_err(|e| Error::StorageError(e.to_string()))?
308 .drain_root_to_leaves()
309 .collect::<Vec<_>>();
310
311 let root_digest = directories
313 .first()
314 .ok_or_else(|| Error::InvalidRequest("got no directories".to_string()))?
315 .digest();
316
317 let dir_path = derive_dirs_path(self.base_path, &root_digest);
318
319 match self.object_store.head(&dir_path).await {
320 Ok(_) => {
322 trace!("directory tree already exists");
323 }
324
325 Err(object_store::Error::NotFound { .. }) => {
327 trace!("uploading directory tree");
328
329 let object_store_writer =
330 object_store::buffered::BufWriter::new(self.object_store.clone(), dir_path);
331 let compressed_writer =
332 async_compression::tokio::write::ZstdEncoder::new(object_store_writer);
333 let mut directories_sink = LengthDelimitedCodec::builder()
334 .max_frame_length(MAX_FRAME_LENGTH)
335 .length_field_type::<u32>()
336 .new_write(compressed_writer);
337
338 for directory in directories {
339 directories_sink
340 .send(proto::Directory::from(directory).encode_to_vec().into())
341 .await?;
342 }
343
344 let mut compressed_writer = directories_sink.into_inner();
345 compressed_writer.shutdown().await?;
346 }
347 Err(err) => Err(std::io::Error::from(err))?,
349 }
350
351 Ok(root_digest)
352 }
353}