1#[cfg(not(target_arch = "wasm32"))]
19use crate::local::LocalFileSystem;
20use crate::memory::InMemory;
21use crate::path::Path;
22use crate::ObjectStore;
23use snafu::Snafu;
24use url::Url;
25
26#[derive(Debug, Snafu)]
27pub enum Error {
28 #[snafu(display("Unable to recognise URL \"{}\"", url))]
29 Unrecognised { url: Url },
30
31 #[snafu(context(false))]
32 Path { source: crate::path::Error },
33}
34
35impl From<Error> for super::Error {
36 fn from(e: Error) -> Self {
37 Self::Generic {
38 store: "URL",
39 source: Box::new(e),
40 }
41 }
42}
43
44#[non_exhaustive] #[derive(Debug, Eq, PartialEq, Clone)]
64pub enum ObjectStoreScheme {
65 Local,
67 Memory,
69 AmazonS3,
71 GoogleCloudStorage,
73 MicrosoftAzure,
75 Http,
77}
78
79impl ObjectStoreScheme {
80 pub fn parse(url: &Url) -> Result<(Self, Path), Error> {
104 let strip_bucket = || Some(url.path().strip_prefix('/')?.split_once('/')?.1);
105
106 let (scheme, path) = match (url.scheme(), url.host_str()) {
107 ("file", None) => (Self::Local, url.path()),
108 ("memory", None) => (Self::Memory, url.path()),
109 ("s3" | "s3a", Some(_)) => (Self::AmazonS3, url.path()),
110 ("gs", Some(_)) => (Self::GoogleCloudStorage, url.path()),
111 ("az" | "adl" | "azure" | "abfs" | "abfss", Some(_)) => {
112 (Self::MicrosoftAzure, url.path())
113 }
114 ("http", Some(_)) => (Self::Http, url.path()),
115 ("https", Some(host)) => {
116 if host.ends_with("dfs.core.windows.net")
117 || host.ends_with("blob.core.windows.net")
118 || host.ends_with("dfs.fabric.microsoft.com")
119 || host.ends_with("blob.fabric.microsoft.com")
120 {
121 (Self::MicrosoftAzure, url.path())
122 } else if host.ends_with("amazonaws.com") {
123 match host.starts_with("s3") {
124 true => (Self::AmazonS3, strip_bucket().unwrap_or_default()),
125 false => (Self::AmazonS3, url.path()),
126 }
127 } else if host.ends_with("r2.cloudflarestorage.com") {
128 (Self::AmazonS3, strip_bucket().unwrap_or_default())
129 } else {
130 (Self::Http, url.path())
131 }
132 }
133 _ => return Err(Error::Unrecognised { url: url.clone() }),
134 };
135
136 Ok((scheme, Path::from_url_path(path)?))
137 }
138}
139
140#[cfg(feature = "cloud")]
141macro_rules! builder_opts {
142 ($builder:ty, $url:expr, $options:expr) => {{
143 let builder = $options.into_iter().fold(
144 <$builder>::new().with_url($url.to_string()),
145 |builder, (key, value)| match key.as_ref().parse() {
146 Ok(k) => builder.with_config(k, value),
147 Err(_) => builder,
148 },
149 );
150 Box::new(builder.build()?) as _
151 }};
152}
153
154pub fn parse_url(url: &Url) -> Result<(Box<dyn ObjectStore>, Path), super::Error> {
160 parse_url_opts(url, std::iter::empty::<(&str, &str)>())
161}
162
163pub fn parse_url_opts<I, K, V>(
169 url: &Url,
170 options: I,
171) -> Result<(Box<dyn ObjectStore>, Path), super::Error>
172where
173 I: IntoIterator<Item = (K, V)>,
174 K: AsRef<str>,
175 V: Into<String>,
176{
177 let _options = options;
178 let (scheme, path) = ObjectStoreScheme::parse(url)?;
179 let path = Path::parse(path)?;
180
181 let store = match scheme {
182 #[cfg(not(target_arch = "wasm32"))]
183 ObjectStoreScheme::Local => Box::new(LocalFileSystem::new()) as _,
184 ObjectStoreScheme::Memory => Box::new(InMemory::new()) as _,
185 #[cfg(feature = "aws")]
186 ObjectStoreScheme::AmazonS3 => {
187 builder_opts!(crate::aws::AmazonS3Builder, url, _options)
188 }
189 #[cfg(feature = "gcp")]
190 ObjectStoreScheme::GoogleCloudStorage => {
191 builder_opts!(crate::gcp::GoogleCloudStorageBuilder, url, _options)
192 }
193 #[cfg(feature = "azure")]
194 ObjectStoreScheme::MicrosoftAzure => {
195 builder_opts!(crate::azure::MicrosoftAzureBuilder, url, _options)
196 }
197 #[cfg(feature = "http")]
198 ObjectStoreScheme::Http => {
199 let url = &url[..url::Position::BeforePath];
200 builder_opts!(crate::http::HttpBuilder, url, _options)
201 }
202 #[cfg(not(all(feature = "aws", feature = "azure", feature = "gcp", feature = "http")))]
203 s => {
204 return Err(super::Error::Generic {
205 store: "parse_url",
206 source: format!("feature for {s:?} not enabled").into(),
207 })
208 }
209 };
210
211 Ok((store, path))
212}
213
214#[cfg(test)]
215mod tests {
216 use super::*;
217
218 #[test]
219 fn test_parse() {
220 let cases = [
221 ("file:/path", (ObjectStoreScheme::Local, "path")),
222 ("file:///path", (ObjectStoreScheme::Local, "path")),
223 ("memory:/path", (ObjectStoreScheme::Memory, "path")),
224 ("memory:///", (ObjectStoreScheme::Memory, "")),
225 ("s3://bucket/path", (ObjectStoreScheme::AmazonS3, "path")),
226 ("s3a://bucket/path", (ObjectStoreScheme::AmazonS3, "path")),
227 (
228 "https://s3.region.amazonaws.com/bucket",
229 (ObjectStoreScheme::AmazonS3, ""),
230 ),
231 (
232 "https://s3.region.amazonaws.com/bucket/path",
233 (ObjectStoreScheme::AmazonS3, "path"),
234 ),
235 (
236 "https://bucket.s3.region.amazonaws.com",
237 (ObjectStoreScheme::AmazonS3, ""),
238 ),
239 (
240 "https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket",
241 (ObjectStoreScheme::AmazonS3, ""),
242 ),
243 (
244 "https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket/path",
245 (ObjectStoreScheme::AmazonS3, "path"),
246 ),
247 (
248 "abfs://container/path",
249 (ObjectStoreScheme::MicrosoftAzure, "path"),
250 ),
251 (
252 "abfs://file_system@account_name.dfs.core.windows.net/path",
253 (ObjectStoreScheme::MicrosoftAzure, "path"),
254 ),
255 (
256 "abfss://file_system@account_name.dfs.core.windows.net/path",
257 (ObjectStoreScheme::MicrosoftAzure, "path"),
258 ),
259 (
260 "https://account.dfs.core.windows.net",
261 (ObjectStoreScheme::MicrosoftAzure, ""),
262 ),
263 (
264 "https://account.blob.core.windows.net",
265 (ObjectStoreScheme::MicrosoftAzure, ""),
266 ),
267 (
268 "gs://bucket/path",
269 (ObjectStoreScheme::GoogleCloudStorage, "path"),
270 ),
271 (
272 "gs://test.example.com/path",
273 (ObjectStoreScheme::GoogleCloudStorage, "path"),
274 ),
275 ("http://mydomain/path", (ObjectStoreScheme::Http, "path")),
276 ("https://mydomain/path", (ObjectStoreScheme::Http, "path")),
277 (
278 "s3://bucket/foo%20bar",
279 (ObjectStoreScheme::AmazonS3, "foo bar"),
280 ),
281 (
282 "https://foo/bar%20baz",
283 (ObjectStoreScheme::Http, "bar baz"),
284 ),
285 (
286 "file:///bar%252Efoo",
287 (ObjectStoreScheme::Local, "bar%2Efoo"),
288 ),
289 (
290 "abfss://file_system@account.dfs.fabric.microsoft.com/",
291 (ObjectStoreScheme::MicrosoftAzure, ""),
292 ),
293 (
294 "abfss://file_system@account.dfs.fabric.microsoft.com/",
295 (ObjectStoreScheme::MicrosoftAzure, ""),
296 ),
297 (
298 "https://account.dfs.fabric.microsoft.com/",
299 (ObjectStoreScheme::MicrosoftAzure, ""),
300 ),
301 (
302 "https://account.dfs.fabric.microsoft.com/container",
303 (ObjectStoreScheme::MicrosoftAzure, "container"),
304 ),
305 (
306 "https://account.blob.fabric.microsoft.com/",
307 (ObjectStoreScheme::MicrosoftAzure, ""),
308 ),
309 (
310 "https://account.blob.fabric.microsoft.com/container",
311 (ObjectStoreScheme::MicrosoftAzure, "container"),
312 ),
313 ];
314
315 for (s, (expected_scheme, expected_path)) in cases {
316 let url = Url::parse(s).unwrap();
317 let (scheme, path) = ObjectStoreScheme::parse(&url).unwrap();
318
319 assert_eq!(scheme, expected_scheme, "{s}");
320 assert_eq!(path, Path::parse(expected_path).unwrap(), "{s}");
321 }
322
323 let neg_cases = [
324 "unix:/run/foo.socket",
325 "file://remote/path",
326 "memory://remote/",
327 ];
328 for s in neg_cases {
329 let url = Url::parse(s).unwrap();
330 assert!(ObjectStoreScheme::parse(&url).is_err());
331 }
332 }
333
334 #[test]
335 fn test_url_spaces() {
336 let url = Url::parse("file:///my file with spaces").unwrap();
337 assert_eq!(url.path(), "/my%20file%20with%20spaces");
338 let (_, path) = parse_url(&url).unwrap();
339 assert_eq!(path.as_ref(), "my file with spaces");
340 }
341
342 #[tokio::test]
343 #[cfg(feature = "http")]
344 async fn test_url_http() {
345 use crate::client::mock_server::MockServer;
346 use hyper::{header::USER_AGENT, Response};
347
348 let server = MockServer::new().await;
349
350 server.push_fn(|r| {
351 assert_eq!(r.uri().path(), "/foo/bar");
352 assert_eq!(r.headers().get(USER_AGENT).unwrap(), "test_url");
353 Response::new(String::new())
354 });
355
356 let test = format!("{}/foo/bar", server.url());
357 let opts = [("user_agent", "test_url"), ("allow_http", "true")];
358 let url = test.parse().unwrap();
359 let (store, path) = parse_url_opts(&url, opts).unwrap();
360 assert_eq!(path.as_ref(), "foo/bar");
361 store.get(&path).await.unwrap();
362
363 server.shutdown().await;
364 }
365}