object_store/azure/
builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::azure::client::{AzureClient, AzureConfig};
19use crate::azure::credential::{
20    AzureAccessKey, AzureCliCredential, ClientSecretOAuthProvider, ImdsManagedIdentityProvider,
21    WorkloadIdentityOAuthProvider,
22};
23use crate::azure::{AzureCredential, AzureCredentialProvider, MicrosoftAzure, STORE};
24use crate::client::TokenCredentialProvider;
25use crate::config::ConfigValue;
26use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider};
27use percent_encoding::percent_decode_str;
28use serde::{Deserialize, Serialize};
29use snafu::{OptionExt, ResultExt, Snafu};
30use std::str::FromStr;
31use std::sync::Arc;
32use url::Url;
33
34/// The well-known account used by Azurite and the legacy Azure Storage Emulator.
35///
36/// <https://docs.microsoft.com/azure/storage/common/storage-use-azurite#well-known-storage-account-and-key>
37const EMULATOR_ACCOUNT: &str = "devstoreaccount1";
38
39/// The well-known account key used by Azurite and the legacy Azure Storage Emulator.
40///
41/// <https://docs.microsoft.com/azure/storage/common/storage-use-azurite#well-known-storage-account-and-key>
42const EMULATOR_ACCOUNT_KEY: &str =
43    "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==";
44
45const MSI_ENDPOINT_ENV_KEY: &str = "IDENTITY_ENDPOINT";
46
47/// A specialized `Error` for Azure builder-related errors
48#[derive(Debug, Snafu)]
49#[allow(missing_docs)]
50enum Error {
51    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
52    UnableToParseUrl {
53        source: url::ParseError,
54        url: String,
55    },
56
57    #[snafu(display(
58        "Unable parse emulator url {}={}, Error: {}",
59        env_name,
60        env_value,
61        source
62    ))]
63    UnableToParseEmulatorUrl {
64        env_name: String,
65        env_value: String,
66        source: url::ParseError,
67    },
68
69    #[snafu(display("Account must be specified"))]
70    MissingAccount {},
71
72    #[snafu(display("Container name must be specified"))]
73    MissingContainerName {},
74
75    #[snafu(display(
76        "Unknown url scheme cannot be parsed into storage location: {}",
77        scheme
78    ))]
79    UnknownUrlScheme { scheme: String },
80
81    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
82    UrlNotRecognised { url: String },
83
84    #[snafu(display("Failed parsing an SAS key"))]
85    DecodeSasKey { source: std::str::Utf8Error },
86
87    #[snafu(display("Missing component in SAS query pair"))]
88    MissingSasComponent {},
89
90    #[snafu(display("Configuration key: '{}' is not known.", key))]
91    UnknownConfigurationKey { key: String },
92}
93
94impl From<Error> for crate::Error {
95    fn from(source: Error) -> Self {
96        match source {
97            Error::UnknownConfigurationKey { key } => {
98                Self::UnknownConfigurationKey { store: STORE, key }
99            }
100            _ => Self::Generic {
101                store: STORE,
102                source: Box::new(source),
103            },
104        }
105    }
106}
107
108/// Configure a connection to Microsoft Azure Blob Storage container using
109/// the specified credentials.
110///
111/// # Example
112/// ```
113/// # let ACCOUNT = "foo";
114/// # let BUCKET_NAME = "foo";
115/// # let ACCESS_KEY = "foo";
116/// # use object_store::azure::MicrosoftAzureBuilder;
117/// let azure = MicrosoftAzureBuilder::new()
118///  .with_account(ACCOUNT)
119///  .with_access_key(ACCESS_KEY)
120///  .with_container_name(BUCKET_NAME)
121///  .build();
122/// ```
123#[derive(Default, Clone)]
124pub struct MicrosoftAzureBuilder {
125    /// Account name
126    account_name: Option<String>,
127    /// Access key
128    access_key: Option<String>,
129    /// Container name
130    container_name: Option<String>,
131    /// Bearer token
132    bearer_token: Option<String>,
133    /// Client id
134    client_id: Option<String>,
135    /// Client secret
136    client_secret: Option<String>,
137    /// Tenant id
138    tenant_id: Option<String>,
139    /// Query pairs for shared access signature authorization
140    sas_query_pairs: Option<Vec<(String, String)>>,
141    /// Shared access signature
142    sas_key: Option<String>,
143    /// Authority host
144    authority_host: Option<String>,
145    /// Url
146    url: Option<String>,
147    /// When set to true, azurite storage emulator has to be used
148    use_emulator: ConfigValue<bool>,
149    /// Storage endpoint
150    endpoint: Option<String>,
151    /// Msi endpoint for acquiring managed identity token
152    msi_endpoint: Option<String>,
153    /// Object id for use with managed identity authentication
154    object_id: Option<String>,
155    /// Msi resource id for use with managed identity authentication
156    msi_resource_id: Option<String>,
157    /// File containing token for Azure AD workload identity federation
158    federated_token_file: Option<String>,
159    /// When set to true, azure cli has to be used for acquiring access token
160    use_azure_cli: ConfigValue<bool>,
161    /// Retry config
162    retry_config: RetryConfig,
163    /// Client options
164    client_options: ClientOptions,
165    /// Credentials
166    credentials: Option<AzureCredentialProvider>,
167    /// Skip signing requests
168    skip_signature: ConfigValue<bool>,
169    /// When set to true, fabric url scheme will be used
170    ///
171    /// i.e. https://{account_name}.dfs.fabric.microsoft.com
172    use_fabric_endpoint: ConfigValue<bool>,
173    /// When set to true, skips tagging objects
174    disable_tagging: ConfigValue<bool>,
175}
176
177/// Configuration keys for [`MicrosoftAzureBuilder`]
178///
179/// Configuration via keys can be done via [`MicrosoftAzureBuilder::with_config`]
180///
181/// # Example
182/// ```
183/// # use object_store::azure::{MicrosoftAzureBuilder, AzureConfigKey};
184/// let builder = MicrosoftAzureBuilder::new()
185///     .with_config("azure_client_id".parse().unwrap(), "my-client-id")
186///     .with_config(AzureConfigKey::AuthorityId, "my-tenant-id");
187/// ```
188#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Deserialize, Serialize)]
189#[non_exhaustive]
190pub enum AzureConfigKey {
191    /// The name of the azure storage account
192    ///
193    /// Supported keys:
194    /// - `azure_storage_account_name`
195    /// - `account_name`
196    AccountName,
197
198    /// Master key for accessing storage account
199    ///
200    /// Supported keys:
201    /// - `azure_storage_account_key`
202    /// - `azure_storage_access_key`
203    /// - `azure_storage_master_key`
204    /// - `access_key`
205    /// - `account_key`
206    /// - `master_key`
207    AccessKey,
208
209    /// Service principal client id for authorizing requests
210    ///
211    /// Supported keys:
212    /// - `azure_storage_client_id`
213    /// - `azure_client_id`
214    /// - `client_id`
215    ClientId,
216
217    /// Service principal client secret for authorizing requests
218    ///
219    /// Supported keys:
220    /// - `azure_storage_client_secret`
221    /// - `azure_client_secret`
222    /// - `client_secret`
223    ClientSecret,
224
225    /// Tenant id used in oauth flows
226    ///
227    /// Supported keys:
228    /// - `azure_storage_tenant_id`
229    /// - `azure_storage_authority_id`
230    /// - `azure_tenant_id`
231    /// - `azure_authority_id`
232    /// - `tenant_id`
233    /// - `authority_id`
234    AuthorityId,
235
236    /// Shared access signature.
237    ///
238    /// The signature is expected to be percent-encoded, much like they are provided
239    /// in the azure storage explorer or azure portal.
240    ///
241    /// Supported keys:
242    /// - `azure_storage_sas_key`
243    /// - `azure_storage_sas_token`
244    /// - `sas_key`
245    /// - `sas_token`
246    SasKey,
247
248    /// Bearer token
249    ///
250    /// Supported keys:
251    /// - `azure_storage_token`
252    /// - `bearer_token`
253    /// - `token`
254    Token,
255
256    /// Use object store with azurite storage emulator
257    ///
258    /// Supported keys:
259    /// - `azure_storage_use_emulator`
260    /// - `object_store_use_emulator`
261    /// - `use_emulator`
262    UseEmulator,
263
264    /// Override the endpoint used to communicate with blob storage
265    ///
266    /// Supported keys:
267    /// - `azure_storage_endpoint`
268    /// - `azure_endpoint`
269    /// - `endpoint`
270    Endpoint,
271
272    /// Use object store with url scheme account.dfs.fabric.microsoft.com
273    ///
274    /// Supported keys:
275    /// - `azure_use_fabric_endpoint`
276    /// - `use_fabric_endpoint`
277    UseFabricEndpoint,
278
279    /// Endpoint to request a imds managed identity token
280    ///
281    /// Supported keys:
282    /// - `azure_msi_endpoint`
283    /// - `azure_identity_endpoint`
284    /// - `identity_endpoint`
285    /// - `msi_endpoint`
286    MsiEndpoint,
287
288    /// Object id for use with managed identity authentication
289    ///
290    /// Supported keys:
291    /// - `azure_object_id`
292    /// - `object_id`
293    ObjectId,
294
295    /// Msi resource id for use with managed identity authentication
296    ///
297    /// Supported keys:
298    /// - `azure_msi_resource_id`
299    /// - `msi_resource_id`
300    MsiResourceId,
301
302    /// File containing token for Azure AD workload identity federation
303    ///
304    /// Supported keys:
305    /// - `azure_federated_token_file`
306    /// - `federated_token_file`
307    FederatedTokenFile,
308
309    /// Use azure cli for acquiring access token
310    ///
311    /// Supported keys:
312    /// - `azure_use_azure_cli`
313    /// - `use_azure_cli`
314    UseAzureCli,
315
316    /// Skip signing requests
317    ///
318    /// Supported keys:
319    /// - `azure_skip_signature`
320    /// - `skip_signature`
321    SkipSignature,
322
323    /// Container name
324    ///
325    /// Supported keys:
326    /// - `azure_container_name`
327    /// - `container_name`
328    ContainerName,
329
330    /// Disables tagging objects
331    ///
332    /// This can be desirable if not supported by the backing store
333    ///
334    /// Supported keys:
335    /// - `azure_disable_tagging`
336    /// - `disable_tagging`
337    DisableTagging,
338
339    /// Client options
340    Client(ClientConfigKey),
341}
342
343impl AsRef<str> for AzureConfigKey {
344    fn as_ref(&self) -> &str {
345        match self {
346            Self::AccountName => "azure_storage_account_name",
347            Self::AccessKey => "azure_storage_account_key",
348            Self::ClientId => "azure_storage_client_id",
349            Self::ClientSecret => "azure_storage_client_secret",
350            Self::AuthorityId => "azure_storage_tenant_id",
351            Self::SasKey => "azure_storage_sas_key",
352            Self::Token => "azure_storage_token",
353            Self::UseEmulator => "azure_storage_use_emulator",
354            Self::UseFabricEndpoint => "azure_use_fabric_endpoint",
355            Self::Endpoint => "azure_storage_endpoint",
356            Self::MsiEndpoint => "azure_msi_endpoint",
357            Self::ObjectId => "azure_object_id",
358            Self::MsiResourceId => "azure_msi_resource_id",
359            Self::FederatedTokenFile => "azure_federated_token_file",
360            Self::UseAzureCli => "azure_use_azure_cli",
361            Self::SkipSignature => "azure_skip_signature",
362            Self::ContainerName => "azure_container_name",
363            Self::DisableTagging => "azure_disable_tagging",
364            Self::Client(key) => key.as_ref(),
365        }
366    }
367}
368
369impl FromStr for AzureConfigKey {
370    type Err = crate::Error;
371
372    fn from_str(s: &str) -> Result<Self, Self::Err> {
373        match s {
374            "azure_storage_account_key"
375            | "azure_storage_access_key"
376            | "azure_storage_master_key"
377            | "master_key"
378            | "account_key"
379            | "access_key" => Ok(Self::AccessKey),
380            "azure_storage_account_name" | "account_name" => Ok(Self::AccountName),
381            "azure_storage_client_id" | "azure_client_id" | "client_id" => Ok(Self::ClientId),
382            "azure_storage_client_secret" | "azure_client_secret" | "client_secret" => {
383                Ok(Self::ClientSecret)
384            }
385            "azure_storage_tenant_id"
386            | "azure_storage_authority_id"
387            | "azure_tenant_id"
388            | "azure_authority_id"
389            | "tenant_id"
390            | "authority_id" => Ok(Self::AuthorityId),
391            "azure_storage_sas_key" | "azure_storage_sas_token" | "sas_key" | "sas_token" => {
392                Ok(Self::SasKey)
393            }
394            "azure_storage_token" | "bearer_token" | "token" => Ok(Self::Token),
395            "azure_storage_use_emulator" | "use_emulator" => Ok(Self::UseEmulator),
396            "azure_storage_endpoint" | "azure_endpoint" | "endpoint" => Ok(Self::Endpoint),
397            "azure_msi_endpoint"
398            | "azure_identity_endpoint"
399            | "identity_endpoint"
400            | "msi_endpoint" => Ok(Self::MsiEndpoint),
401            "azure_object_id" | "object_id" => Ok(Self::ObjectId),
402            "azure_msi_resource_id" | "msi_resource_id" => Ok(Self::MsiResourceId),
403            "azure_federated_token_file" | "federated_token_file" => Ok(Self::FederatedTokenFile),
404            "azure_use_fabric_endpoint" | "use_fabric_endpoint" => Ok(Self::UseFabricEndpoint),
405            "azure_use_azure_cli" | "use_azure_cli" => Ok(Self::UseAzureCli),
406            "azure_skip_signature" | "skip_signature" => Ok(Self::SkipSignature),
407            "azure_container_name" | "container_name" => Ok(Self::ContainerName),
408            "azure_disable_tagging" | "disable_tagging" => Ok(Self::DisableTagging),
409            // Backwards compatibility
410            "azure_allow_http" => Ok(Self::Client(ClientConfigKey::AllowHttp)),
411            _ => match s.parse() {
412                Ok(key) => Ok(Self::Client(key)),
413                Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()),
414            },
415        }
416    }
417}
418
419impl std::fmt::Debug for MicrosoftAzureBuilder {
420    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
421        write!(
422            f,
423            "MicrosoftAzureBuilder {{ account: {:?}, container_name: {:?} }}",
424            self.account_name, self.container_name
425        )
426    }
427}
428
429impl MicrosoftAzureBuilder {
430    /// Create a new [`MicrosoftAzureBuilder`] with default values.
431    pub fn new() -> Self {
432        Default::default()
433    }
434
435    /// Create an instance of [`MicrosoftAzureBuilder`] with values pre-populated from environment variables.
436    ///
437    /// Variables extracted from environment:
438    /// * AZURE_STORAGE_ACCOUNT_NAME: storage account name
439    /// * AZURE_STORAGE_ACCOUNT_KEY: storage account master key
440    /// * AZURE_STORAGE_ACCESS_KEY: alias for AZURE_STORAGE_ACCOUNT_KEY
441    /// * AZURE_STORAGE_CLIENT_ID -> client id for service principal authorization
442    /// * AZURE_STORAGE_CLIENT_SECRET -> client secret for service principal authorization
443    /// * AZURE_STORAGE_TENANT_ID -> tenant id used in oauth flows
444    /// # Example
445    /// ```
446    /// use object_store::azure::MicrosoftAzureBuilder;
447    ///
448    /// let azure = MicrosoftAzureBuilder::from_env()
449    ///     .with_container_name("foo")
450    ///     .build();
451    /// ```
452    pub fn from_env() -> Self {
453        let mut builder = Self::default();
454        for (os_key, os_value) in std::env::vars_os() {
455            if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) {
456                if key.starts_with("AZURE_") {
457                    if let Ok(config_key) = key.to_ascii_lowercase().parse() {
458                        builder = builder.with_config(config_key, value);
459                    }
460                }
461            }
462        }
463
464        if let Ok(text) = std::env::var(MSI_ENDPOINT_ENV_KEY) {
465            builder = builder.with_msi_endpoint(text);
466        }
467
468        builder
469    }
470
471    /// Parse available connection info form a well-known storage URL.
472    ///
473    /// The supported url schemes are:
474    ///
475    /// - `abfs[s]://<container>/<path>` (according to [fsspec](https://github.com/fsspec/adlfs))
476    /// - `abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>`
477    /// - `abfs[s]://<file_system>@<account_name>.dfs.fabric.microsoft.com/<path>`
478    /// - `az://<container>/<path>` (according to [fsspec](https://github.com/fsspec/adlfs))
479    /// - `adl://<container>/<path>` (according to [fsspec](https://github.com/fsspec/adlfs))
480    /// - `azure://<container>/<path>` (custom)
481    /// - `https://<account>.dfs.core.windows.net`
482    /// - `https://<account>.blob.core.windows.net`
483    /// - `https://<account>.blob.core.windows.net/<container>`
484    /// - `https://<account>.dfs.fabric.microsoft.com`
485    /// - `https://<account>.dfs.fabric.microsoft.com/<container>`
486    /// - `https://<account>.blob.fabric.microsoft.com`
487    /// - `https://<account>.blob.fabric.microsoft.com/<container>`
488    ///
489    /// Note: Settings derived from the URL will override any others set on this builder
490    ///
491    /// # Example
492    /// ```
493    /// use object_store::azure::MicrosoftAzureBuilder;
494    ///
495    /// let azure = MicrosoftAzureBuilder::from_env()
496    ///     .with_url("abfss://file_system@account.dfs.core.windows.net/")
497    ///     .build();
498    /// ```
499    pub fn with_url(mut self, url: impl Into<String>) -> Self {
500        self.url = Some(url.into());
501        self
502    }
503
504    /// Set an option on the builder via a key - value pair.
505    pub fn with_config(mut self, key: AzureConfigKey, value: impl Into<String>) -> Self {
506        match key {
507            AzureConfigKey::AccessKey => self.access_key = Some(value.into()),
508            AzureConfigKey::AccountName => self.account_name = Some(value.into()),
509            AzureConfigKey::ClientId => self.client_id = Some(value.into()),
510            AzureConfigKey::ClientSecret => self.client_secret = Some(value.into()),
511            AzureConfigKey::AuthorityId => self.tenant_id = Some(value.into()),
512            AzureConfigKey::SasKey => self.sas_key = Some(value.into()),
513            AzureConfigKey::Token => self.bearer_token = Some(value.into()),
514            AzureConfigKey::MsiEndpoint => self.msi_endpoint = Some(value.into()),
515            AzureConfigKey::ObjectId => self.object_id = Some(value.into()),
516            AzureConfigKey::MsiResourceId => self.msi_resource_id = Some(value.into()),
517            AzureConfigKey::FederatedTokenFile => self.federated_token_file = Some(value.into()),
518            AzureConfigKey::UseAzureCli => self.use_azure_cli.parse(value),
519            AzureConfigKey::SkipSignature => self.skip_signature.parse(value),
520            AzureConfigKey::UseEmulator => self.use_emulator.parse(value),
521            AzureConfigKey::Endpoint => self.endpoint = Some(value.into()),
522            AzureConfigKey::UseFabricEndpoint => self.use_fabric_endpoint.parse(value),
523            AzureConfigKey::Client(key) => {
524                self.client_options = self.client_options.with_config(key, value)
525            }
526            AzureConfigKey::ContainerName => self.container_name = Some(value.into()),
527            AzureConfigKey::DisableTagging => self.disable_tagging.parse(value),
528        };
529        self
530    }
531
532    /// Get config value via a [`AzureConfigKey`].
533    ///
534    /// # Example
535    /// ```
536    /// use object_store::azure::{MicrosoftAzureBuilder, AzureConfigKey};
537    ///
538    /// let builder = MicrosoftAzureBuilder::from_env()
539    ///     .with_account("foo");
540    /// let account_name = builder.get_config_value(&AzureConfigKey::AccountName).unwrap_or_default();
541    /// assert_eq!("foo", &account_name);
542    /// ```
543    pub fn get_config_value(&self, key: &AzureConfigKey) -> Option<String> {
544        match key {
545            AzureConfigKey::AccountName => self.account_name.clone(),
546            AzureConfigKey::AccessKey => self.access_key.clone(),
547            AzureConfigKey::ClientId => self.client_id.clone(),
548            AzureConfigKey::ClientSecret => self.client_secret.clone(),
549            AzureConfigKey::AuthorityId => self.tenant_id.clone(),
550            AzureConfigKey::SasKey => self.sas_key.clone(),
551            AzureConfigKey::Token => self.bearer_token.clone(),
552            AzureConfigKey::UseEmulator => Some(self.use_emulator.to_string()),
553            AzureConfigKey::UseFabricEndpoint => Some(self.use_fabric_endpoint.to_string()),
554            AzureConfigKey::Endpoint => self.endpoint.clone(),
555            AzureConfigKey::MsiEndpoint => self.msi_endpoint.clone(),
556            AzureConfigKey::ObjectId => self.object_id.clone(),
557            AzureConfigKey::MsiResourceId => self.msi_resource_id.clone(),
558            AzureConfigKey::FederatedTokenFile => self.federated_token_file.clone(),
559            AzureConfigKey::UseAzureCli => Some(self.use_azure_cli.to_string()),
560            AzureConfigKey::SkipSignature => Some(self.skip_signature.to_string()),
561            AzureConfigKey::Client(key) => self.client_options.get_config_value(key),
562            AzureConfigKey::ContainerName => self.container_name.clone(),
563            AzureConfigKey::DisableTagging => Some(self.disable_tagging.to_string()),
564        }
565    }
566
567    /// Sets properties on this builder based on a URL
568    ///
569    /// This is a separate member function to allow fallible computation to
570    /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
571    fn parse_url(&mut self, url: &str) -> Result<()> {
572        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
573        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
574
575        let validate = |s: &str| match s.contains('.') {
576            true => Err(UrlNotRecognisedSnafu { url }.build()),
577            false => Ok(s.to_string()),
578        };
579
580        match parsed.scheme() {
581            "az" | "adl" | "azure" => self.container_name = Some(validate(host)?),
582            "abfs" | "abfss" => {
583                // abfs(s) might refer to the fsspec convention abfs://<container>/<path>
584                // or the convention for the hadoop driver abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>
585                if parsed.username().is_empty() {
586                    self.container_name = Some(validate(host)?);
587                } else if let Some(a) = host.strip_suffix(".dfs.core.windows.net") {
588                    self.container_name = Some(validate(parsed.username())?);
589                    self.account_name = Some(validate(a)?);
590                } else if let Some(a) = host.strip_suffix(".dfs.fabric.microsoft.com") {
591                    self.container_name = Some(validate(parsed.username())?);
592                    self.account_name = Some(validate(a)?);
593                    self.use_fabric_endpoint = true.into();
594                } else {
595                    return Err(UrlNotRecognisedSnafu { url }.build().into());
596                }
597            }
598            "https" => match host.split_once('.') {
599                Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
600                    self.account_name = Some(validate(a)?);
601                    if let Some(container) = parsed.path_segments().unwrap().next() {
602                        self.container_name = Some(validate(container)?);
603                    }
604                }
605                Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
606                    self.account_name = Some(validate(a)?);
607                    // Attempt to infer the container name from the URL
608                    // - https://onelake.dfs.fabric.microsoft.com/<workspaceGUID>/<itemGUID>/Files/test.csv
609                    // - https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName>
610                    //
611                    // See <https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api>
612                    if let Some(workspace) = parsed.path_segments().unwrap().next() {
613                        if !workspace.is_empty() {
614                            self.container_name = Some(workspace.to_string())
615                        }
616                    }
617                    self.use_fabric_endpoint = true.into();
618                }
619                _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
620            },
621            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
622        }
623        Ok(())
624    }
625
626    /// Set the Azure Account (required)
627    pub fn with_account(mut self, account: impl Into<String>) -> Self {
628        self.account_name = Some(account.into());
629        self
630    }
631
632    /// Set the Azure Container Name (required)
633    pub fn with_container_name(mut self, container_name: impl Into<String>) -> Self {
634        self.container_name = Some(container_name.into());
635        self
636    }
637
638    /// Set the Azure Access Key (required - one of access key, bearer token, or client credentials)
639    pub fn with_access_key(mut self, access_key: impl Into<String>) -> Self {
640        self.access_key = Some(access_key.into());
641        self
642    }
643
644    /// Set a static bearer token to be used for authorizing requests
645    pub fn with_bearer_token_authorization(mut self, bearer_token: impl Into<String>) -> Self {
646        self.bearer_token = Some(bearer_token.into());
647        self
648    }
649
650    /// Set a client secret used for client secret authorization
651    pub fn with_client_secret_authorization(
652        mut self,
653        client_id: impl Into<String>,
654        client_secret: impl Into<String>,
655        tenant_id: impl Into<String>,
656    ) -> Self {
657        self.client_id = Some(client_id.into());
658        self.client_secret = Some(client_secret.into());
659        self.tenant_id = Some(tenant_id.into());
660        self
661    }
662
663    /// Sets the client id for use in client secret or k8s federated credential flow
664    pub fn with_client_id(mut self, client_id: impl Into<String>) -> Self {
665        self.client_id = Some(client_id.into());
666        self
667    }
668
669    /// Sets the client secret for use in client secret flow
670    pub fn with_client_secret(mut self, client_secret: impl Into<String>) -> Self {
671        self.client_secret = Some(client_secret.into());
672        self
673    }
674
675    /// Sets the tenant id for use in client secret or k8s federated credential flow
676    pub fn with_tenant_id(mut self, tenant_id: impl Into<String>) -> Self {
677        self.tenant_id = Some(tenant_id.into());
678        self
679    }
680
681    /// Set query pairs appended to the url for shared access signature authorization
682    pub fn with_sas_authorization(mut self, query_pairs: impl Into<Vec<(String, String)>>) -> Self {
683        self.sas_query_pairs = Some(query_pairs.into());
684        self
685    }
686
687    /// Set the credential provider overriding any other options
688    pub fn with_credentials(mut self, credentials: AzureCredentialProvider) -> Self {
689        self.credentials = Some(credentials);
690        self
691    }
692
693    /// Set if the Azure emulator should be used (defaults to false)
694    pub fn with_use_emulator(mut self, use_emulator: bool) -> Self {
695        self.use_emulator = use_emulator.into();
696        self
697    }
698
699    /// Override the endpoint used to communicate with blob storage
700    ///
701    /// Defaults to `https://{account}.blob.core.windows.net`
702    ///
703    /// By default, only HTTPS schemes are enabled. To connect to an HTTP endpoint, enable
704    /// [`Self::with_allow_http`].
705    pub fn with_endpoint(mut self, endpoint: String) -> Self {
706        self.endpoint = Some(endpoint);
707        self
708    }
709
710    /// Set if Microsoft Fabric url scheme should be used (defaults to false)
711    ///
712    /// When disabled the url scheme used is `https://{account}.blob.core.windows.net`
713    /// When enabled the url scheme used is `https://{account}.dfs.fabric.microsoft.com`
714    ///
715    /// Note: [`Self::with_endpoint`] will take precedence over this option
716    pub fn with_use_fabric_endpoint(mut self, use_fabric_endpoint: bool) -> Self {
717        self.use_fabric_endpoint = use_fabric_endpoint.into();
718        self
719    }
720
721    /// Sets what protocol is allowed
722    ///
723    /// If `allow_http` is :
724    /// * false (default):  Only HTTPS are allowed
725    /// * true:  HTTP and HTTPS are allowed
726    pub fn with_allow_http(mut self, allow_http: bool) -> Self {
727        self.client_options = self.client_options.with_allow_http(allow_http);
728        self
729    }
730
731    /// Sets an alternative authority host for OAuth based authorization
732    ///
733    /// Common hosts for azure clouds are defined in [authority_hosts](crate::azure::authority_hosts).
734    ///
735    /// Defaults to <https://login.microsoftonline.com>
736    pub fn with_authority_host(mut self, authority_host: impl Into<String>) -> Self {
737        self.authority_host = Some(authority_host.into());
738        self
739    }
740
741    /// Set the retry configuration
742    pub fn with_retry(mut self, retry_config: RetryConfig) -> Self {
743        self.retry_config = retry_config;
744        self
745    }
746
747    /// Set the proxy_url to be used by the underlying client
748    pub fn with_proxy_url(mut self, proxy_url: impl Into<String>) -> Self {
749        self.client_options = self.client_options.with_proxy_url(proxy_url);
750        self
751    }
752
753    /// Set a trusted proxy CA certificate
754    pub fn with_proxy_ca_certificate(mut self, proxy_ca_certificate: impl Into<String>) -> Self {
755        self.client_options = self
756            .client_options
757            .with_proxy_ca_certificate(proxy_ca_certificate);
758        self
759    }
760
761    /// Set a list of hosts to exclude from proxy connections
762    pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into<String>) -> Self {
763        self.client_options = self.client_options.with_proxy_excludes(proxy_excludes);
764        self
765    }
766
767    /// Sets the client options, overriding any already set
768    pub fn with_client_options(mut self, options: ClientOptions) -> Self {
769        self.client_options = options;
770        self
771    }
772
773    /// Sets the endpoint for acquiring managed identity token
774    pub fn with_msi_endpoint(mut self, msi_endpoint: impl Into<String>) -> Self {
775        self.msi_endpoint = Some(msi_endpoint.into());
776        self
777    }
778
779    /// Sets a file path for acquiring azure federated identity token in k8s
780    ///
781    /// requires `client_id` and `tenant_id` to be set
782    pub fn with_federated_token_file(mut self, federated_token_file: impl Into<String>) -> Self {
783        self.federated_token_file = Some(federated_token_file.into());
784        self
785    }
786
787    /// Set if the Azure Cli should be used for acquiring access token
788    ///
789    /// <https://learn.microsoft.com/en-us/cli/azure/account?view=azure-cli-latest#az-account-get-access-token>
790    pub fn with_use_azure_cli(mut self, use_azure_cli: bool) -> Self {
791        self.use_azure_cli = use_azure_cli.into();
792        self
793    }
794
795    /// If enabled, [`MicrosoftAzure`] will not fetch credentials and will not sign requests
796    ///
797    /// This can be useful when interacting with public containers
798    pub fn with_skip_signature(mut self, skip_signature: bool) -> Self {
799        self.skip_signature = skip_signature.into();
800        self
801    }
802
803    /// If set to `true` will ignore any tags provided to put_opts
804    pub fn with_disable_tagging(mut self, ignore: bool) -> Self {
805        self.disable_tagging = ignore.into();
806        self
807    }
808
809    /// Configure a connection to container with given name on Microsoft Azure Blob store.
810    pub fn build(mut self) -> Result<MicrosoftAzure> {
811        if let Some(url) = self.url.take() {
812            self.parse_url(&url)?;
813        }
814
815        let container = self.container_name.ok_or(Error::MissingContainerName {})?;
816
817        let static_creds = |credential: AzureCredential| -> AzureCredentialProvider {
818            Arc::new(StaticCredentialProvider::new(credential))
819        };
820
821        let (is_emulator, storage_url, auth, account) = if self.use_emulator.get()? {
822            let account_name = self
823                .account_name
824                .unwrap_or_else(|| EMULATOR_ACCOUNT.to_string());
825            // Allow overriding defaults. Values taken from
826            // from https://docs.rs/azure_storage/0.2.0/src/azure_storage/core/clients/storage_account_client.rs.html#129-141
827            let url = url_from_env("AZURITE_BLOB_STORAGE_URL", "http://127.0.0.1:10000")?;
828            let credential = if let Some(k) = self.access_key {
829                AzureCredential::AccessKey(AzureAccessKey::try_new(&k)?)
830            } else if let Some(bearer_token) = self.bearer_token {
831                AzureCredential::BearerToken(bearer_token)
832            } else if let Some(query_pairs) = self.sas_query_pairs {
833                AzureCredential::SASToken(query_pairs)
834            } else if let Some(sas) = self.sas_key {
835                AzureCredential::SASToken(split_sas(&sas)?)
836            } else {
837                AzureCredential::AccessKey(AzureAccessKey::try_new(EMULATOR_ACCOUNT_KEY)?)
838            };
839
840            self.client_options = self.client_options.with_allow_http(true);
841            (true, url, static_creds(credential), account_name)
842        } else {
843            let account_name = self.account_name.ok_or(Error::MissingAccount {})?;
844            let account_url = match self.endpoint {
845                Some(account_url) => account_url,
846                None => match self.use_fabric_endpoint.get()? {
847                    true => {
848                        format!("https://{}.blob.fabric.microsoft.com", &account_name)
849                    }
850                    false => format!("https://{}.blob.core.windows.net", &account_name),
851                },
852            };
853
854            let url =
855                Url::parse(&account_url).context(UnableToParseUrlSnafu { url: account_url })?;
856
857            let credential = if let Some(credential) = self.credentials {
858                credential
859            } else if let Some(bearer_token) = self.bearer_token {
860                static_creds(AzureCredential::BearerToken(bearer_token))
861            } else if let Some(access_key) = self.access_key {
862                let key = AzureAccessKey::try_new(&access_key)?;
863                static_creds(AzureCredential::AccessKey(key))
864            } else if let (Some(client_id), Some(tenant_id), Some(federated_token_file)) =
865                (&self.client_id, &self.tenant_id, self.federated_token_file)
866            {
867                let client_credential = WorkloadIdentityOAuthProvider::new(
868                    client_id,
869                    federated_token_file,
870                    tenant_id,
871                    self.authority_host,
872                );
873                Arc::new(TokenCredentialProvider::new(
874                    client_credential,
875                    self.client_options.client()?,
876                    self.retry_config.clone(),
877                )) as _
878            } else if let (Some(client_id), Some(client_secret), Some(tenant_id)) =
879                (&self.client_id, self.client_secret, &self.tenant_id)
880            {
881                let client_credential = ClientSecretOAuthProvider::new(
882                    client_id.clone(),
883                    client_secret,
884                    tenant_id,
885                    self.authority_host,
886                );
887                Arc::new(TokenCredentialProvider::new(
888                    client_credential,
889                    self.client_options.client()?,
890                    self.retry_config.clone(),
891                )) as _
892            } else if let Some(query_pairs) = self.sas_query_pairs {
893                static_creds(AzureCredential::SASToken(query_pairs))
894            } else if let Some(sas) = self.sas_key {
895                static_creds(AzureCredential::SASToken(split_sas(&sas)?))
896            } else if self.use_azure_cli.get()? {
897                Arc::new(AzureCliCredential::new()) as _
898            } else {
899                let msi_credential = ImdsManagedIdentityProvider::new(
900                    self.client_id,
901                    self.object_id,
902                    self.msi_resource_id,
903                    self.msi_endpoint,
904                );
905                Arc::new(TokenCredentialProvider::new(
906                    msi_credential,
907                    self.client_options.metadata_client()?,
908                    self.retry_config.clone(),
909                )) as _
910            };
911            (false, url, credential, account_name)
912        };
913
914        let config = AzureConfig {
915            account,
916            is_emulator,
917            skip_signature: self.skip_signature.get()?,
918            container,
919            disable_tagging: self.disable_tagging.get()?,
920            retry_config: self.retry_config,
921            client_options: self.client_options,
922            service: storage_url,
923            credentials: auth,
924        };
925
926        let client = Arc::new(AzureClient::new(config)?);
927
928        Ok(MicrosoftAzure { client })
929    }
930}
931
932/// Parses the contents of the environment variable `env_name` as a URL
933/// if present, otherwise falls back to default_url
934fn url_from_env(env_name: &str, default_url: &str) -> Result<Url> {
935    let url = match std::env::var(env_name) {
936        Ok(env_value) => Url::parse(&env_value).context(UnableToParseEmulatorUrlSnafu {
937            env_name,
938            env_value,
939        })?,
940        Err(_) => Url::parse(default_url).expect("Failed to parse default URL"),
941    };
942    Ok(url)
943}
944
945fn split_sas(sas: &str) -> Result<Vec<(String, String)>, Error> {
946    let sas = percent_decode_str(sas)
947        .decode_utf8()
948        .context(DecodeSasKeySnafu {})?;
949    let kv_str_pairs = sas
950        .trim_start_matches('?')
951        .split('&')
952        .filter(|s| !s.chars().all(char::is_whitespace));
953    let mut pairs = Vec::new();
954    for kv_pair_str in kv_str_pairs {
955        let (k, v) = kv_pair_str
956            .trim()
957            .split_once('=')
958            .ok_or(Error::MissingSasComponent {})?;
959        pairs.push((k.into(), v.into()))
960    }
961    Ok(pairs)
962}
963
964#[cfg(test)]
965mod tests {
966    use super::*;
967    use std::collections::HashMap;
968
969    #[test]
970    fn azure_blob_test_urls() {
971        let mut builder = MicrosoftAzureBuilder::new();
972        builder
973            .parse_url("abfss://file_system@account.dfs.core.windows.net/")
974            .unwrap();
975        assert_eq!(builder.account_name, Some("account".to_string()));
976        assert_eq!(builder.container_name, Some("file_system".to_string()));
977        assert!(!builder.use_fabric_endpoint.get().unwrap());
978
979        let mut builder = MicrosoftAzureBuilder::new();
980        builder
981            .parse_url("abfss://file_system@account.dfs.fabric.microsoft.com/")
982            .unwrap();
983        assert_eq!(builder.account_name, Some("account".to_string()));
984        assert_eq!(builder.container_name, Some("file_system".to_string()));
985        assert!(builder.use_fabric_endpoint.get().unwrap());
986
987        let mut builder = MicrosoftAzureBuilder::new();
988        builder.parse_url("abfs://container/path").unwrap();
989        assert_eq!(builder.container_name, Some("container".to_string()));
990
991        let mut builder = MicrosoftAzureBuilder::new();
992        builder.parse_url("az://container").unwrap();
993        assert_eq!(builder.container_name, Some("container".to_string()));
994
995        let mut builder = MicrosoftAzureBuilder::new();
996        builder.parse_url("az://container/path").unwrap();
997        assert_eq!(builder.container_name, Some("container".to_string()));
998
999        let mut builder = MicrosoftAzureBuilder::new();
1000        builder
1001            .parse_url("https://account.dfs.core.windows.net/")
1002            .unwrap();
1003        assert_eq!(builder.account_name, Some("account".to_string()));
1004        assert!(!builder.use_fabric_endpoint.get().unwrap());
1005
1006        let mut builder = MicrosoftAzureBuilder::new();
1007        builder
1008            .parse_url("https://account.blob.core.windows.net/")
1009            .unwrap();
1010        assert_eq!(builder.account_name, Some("account".to_string()));
1011        assert!(!builder.use_fabric_endpoint.get().unwrap());
1012
1013        let mut builder = MicrosoftAzureBuilder::new();
1014        builder
1015            .parse_url("https://account.blob.core.windows.net/container")
1016            .unwrap();
1017        assert_eq!(builder.account_name, Some("account".to_string()));
1018        assert_eq!(builder.container_name, Some("container".to_string()));
1019        assert!(!builder.use_fabric_endpoint.get().unwrap());
1020
1021        let mut builder = MicrosoftAzureBuilder::new();
1022        builder
1023            .parse_url("https://account.dfs.fabric.microsoft.com/")
1024            .unwrap();
1025        assert_eq!(builder.account_name, Some("account".to_string()));
1026        assert_eq!(builder.container_name, None);
1027        assert!(builder.use_fabric_endpoint.get().unwrap());
1028
1029        let mut builder = MicrosoftAzureBuilder::new();
1030        builder
1031            .parse_url("https://account.dfs.fabric.microsoft.com/container")
1032            .unwrap();
1033        assert_eq!(builder.account_name, Some("account".to_string()));
1034        assert_eq!(builder.container_name.as_deref(), Some("container"));
1035        assert!(builder.use_fabric_endpoint.get().unwrap());
1036
1037        let mut builder = MicrosoftAzureBuilder::new();
1038        builder
1039            .parse_url("https://account.blob.fabric.microsoft.com/")
1040            .unwrap();
1041        assert_eq!(builder.account_name, Some("account".to_string()));
1042        assert_eq!(builder.container_name, None);
1043        assert!(builder.use_fabric_endpoint.get().unwrap());
1044
1045        let mut builder = MicrosoftAzureBuilder::new();
1046        builder
1047            .parse_url("https://account.blob.fabric.microsoft.com/container")
1048            .unwrap();
1049        assert_eq!(builder.account_name, Some("account".to_string()));
1050        assert_eq!(builder.container_name.as_deref(), Some("container"));
1051        assert!(builder.use_fabric_endpoint.get().unwrap());
1052
1053        let err_cases = [
1054            "mailto://account.blob.core.windows.net/",
1055            "az://blob.mydomain/",
1056            "abfs://container.foo/path",
1057            "abfss://file_system@account.foo.dfs.core.windows.net/",
1058            "abfss://file_system.bar@account.dfs.core.windows.net/",
1059            "https://blob.mydomain/",
1060            "https://blob.foo.dfs.core.windows.net/",
1061        ];
1062        let mut builder = MicrosoftAzureBuilder::new();
1063        for case in err_cases {
1064            builder.parse_url(case).unwrap_err();
1065        }
1066    }
1067
1068    #[test]
1069    fn azure_test_config_from_map() {
1070        let azure_client_id = "object_store:fake_access_key_id";
1071        let azure_storage_account_name = "object_store:fake_secret_key";
1072        let azure_storage_token = "object_store:fake_default_region";
1073        let options = HashMap::from([
1074            ("azure_client_id", azure_client_id),
1075            ("azure_storage_account_name", azure_storage_account_name),
1076            ("azure_storage_token", azure_storage_token),
1077        ]);
1078
1079        let builder = options
1080            .into_iter()
1081            .fold(MicrosoftAzureBuilder::new(), |builder, (key, value)| {
1082                builder.with_config(key.parse().unwrap(), value)
1083            });
1084        assert_eq!(builder.client_id.unwrap(), azure_client_id);
1085        assert_eq!(builder.account_name.unwrap(), azure_storage_account_name);
1086        assert_eq!(builder.bearer_token.unwrap(), azure_storage_token);
1087    }
1088
1089    #[test]
1090    fn azure_test_split_sas() {
1091        let raw_sas = "?sv=2021-10-04&st=2023-01-04T17%3A48%3A57Z&se=2023-01-04T18%3A15%3A00Z&sr=c&sp=rcwl&sig=C7%2BZeEOWbrxPA3R0Cw%2Fw1EZz0%2B4KBvQexeKZKe%2BB6h0%3D";
1092        let expected = vec![
1093            ("sv".to_string(), "2021-10-04".to_string()),
1094            ("st".to_string(), "2023-01-04T17:48:57Z".to_string()),
1095            ("se".to_string(), "2023-01-04T18:15:00Z".to_string()),
1096            ("sr".to_string(), "c".to_string()),
1097            ("sp".to_string(), "rcwl".to_string()),
1098            (
1099                "sig".to_string(),
1100                "C7+ZeEOWbrxPA3R0Cw/w1EZz0+4KBvQexeKZKe+B6h0=".to_string(),
1101            ),
1102        ];
1103        let pairs = split_sas(raw_sas).unwrap();
1104        assert_eq!(expected, pairs);
1105    }
1106}