object_store/gcp/
builder.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::client::TokenCredentialProvider;
19use crate::gcp::client::{GoogleCloudStorageClient, GoogleCloudStorageConfig};
20use crate::gcp::credential::{
21    ApplicationDefaultCredentials, InstanceCredentialProvider, ServiceAccountCredentials,
22    DEFAULT_GCS_BASE_URL,
23};
24use crate::gcp::{
25    credential, GcpCredential, GcpCredentialProvider, GcpSigningCredential,
26    GcpSigningCredentialProvider, GoogleCloudStorage, STORE,
27};
28use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider};
29use serde::{Deserialize, Serialize};
30use snafu::{OptionExt, ResultExt, Snafu};
31use std::str::FromStr;
32use std::sync::Arc;
33use url::Url;
34
35use super::credential::{AuthorizedUserSigningCredentials, InstanceSigningCredentialProvider};
36
37#[derive(Debug, Snafu)]
38enum Error {
39    #[snafu(display("Missing bucket name"))]
40    MissingBucketName {},
41
42    #[snafu(display("One of service account path or service account key may be provided."))]
43    ServiceAccountPathAndKeyProvided,
44
45    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
46    UnableToParseUrl {
47        source: url::ParseError,
48        url: String,
49    },
50
51    #[snafu(display(
52        "Unknown url scheme cannot be parsed into storage location: {}",
53        scheme
54    ))]
55    UnknownUrlScheme { scheme: String },
56
57    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
58    UrlNotRecognised { url: String },
59
60    #[snafu(display("Configuration key: '{}' is not known.", key))]
61    UnknownConfigurationKey { key: String },
62
63    #[snafu(display("GCP credential error: {}", source))]
64    Credential { source: credential::Error },
65}
66
67impl From<Error> for crate::Error {
68    fn from(err: Error) -> Self {
69        match err {
70            Error::UnknownConfigurationKey { key } => {
71                Self::UnknownConfigurationKey { store: STORE, key }
72            }
73            _ => Self::Generic {
74                store: STORE,
75                source: Box::new(err),
76            },
77        }
78    }
79}
80
81/// Configure a connection to Google Cloud Storage.
82///
83/// If no credentials are explicitly provided, they will be sourced
84/// from the environment as documented [here](https://cloud.google.com/docs/authentication/application-default-credentials).
85///
86/// # Example
87/// ```
88/// # let BUCKET_NAME = "foo";
89/// # use object_store::gcp::GoogleCloudStorageBuilder;
90/// let gcs = GoogleCloudStorageBuilder::from_env().with_bucket_name(BUCKET_NAME).build();
91/// ```
92#[derive(Debug, Clone)]
93pub struct GoogleCloudStorageBuilder {
94    /// Bucket name
95    bucket_name: Option<String>,
96    /// Url
97    url: Option<String>,
98    /// Path to the service account file
99    service_account_path: Option<String>,
100    /// The serialized service account key
101    service_account_key: Option<String>,
102    /// Path to the application credentials file.
103    application_credentials_path: Option<String>,
104    /// Retry config
105    retry_config: RetryConfig,
106    /// Client options
107    client_options: ClientOptions,
108    /// Credentials
109    credentials: Option<GcpCredentialProvider>,
110    /// Credentials for sign url
111    signing_credentials: Option<GcpSigningCredentialProvider>,
112}
113
114/// Configuration keys for [`GoogleCloudStorageBuilder`]
115///
116/// Configuration via keys can be done via [`GoogleCloudStorageBuilder::with_config`]
117///
118/// # Example
119/// ```
120/// # use object_store::gcp::{GoogleCloudStorageBuilder, GoogleConfigKey};
121/// let builder = GoogleCloudStorageBuilder::new()
122///     .with_config("google_service_account".parse().unwrap(), "my-service-account")
123///     .with_config(GoogleConfigKey::Bucket, "my-bucket");
124/// ```
125#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Serialize, Deserialize)]
126#[non_exhaustive]
127pub enum GoogleConfigKey {
128    /// Path to the service account file
129    ///
130    /// Supported keys:
131    /// - `google_service_account`
132    /// - `service_account`
133    /// - `google_service_account_path`
134    /// - `service_account_path`
135    ServiceAccount,
136
137    /// The serialized service account key.
138    ///
139    /// Supported keys:
140    /// - `google_service_account_key`
141    /// - `service_account_key`
142    ServiceAccountKey,
143
144    /// Bucket name
145    ///
146    /// See [`GoogleCloudStorageBuilder::with_bucket_name`] for details.
147    ///
148    /// Supported keys:
149    /// - `google_bucket`
150    /// - `google_bucket_name`
151    /// - `bucket`
152    /// - `bucket_name`
153    Bucket,
154
155    /// Application credentials path
156    ///
157    /// See [`GoogleCloudStorageBuilder::with_application_credentials`].
158    ApplicationCredentials,
159
160    /// Client options
161    Client(ClientConfigKey),
162}
163
164impl AsRef<str> for GoogleConfigKey {
165    fn as_ref(&self) -> &str {
166        match self {
167            Self::ServiceAccount => "google_service_account",
168            Self::ServiceAccountKey => "google_service_account_key",
169            Self::Bucket => "google_bucket",
170            Self::ApplicationCredentials => "google_application_credentials",
171            Self::Client(key) => key.as_ref(),
172        }
173    }
174}
175
176impl FromStr for GoogleConfigKey {
177    type Err = crate::Error;
178
179    fn from_str(s: &str) -> Result<Self, Self::Err> {
180        match s {
181            "google_service_account"
182            | "service_account"
183            | "google_service_account_path"
184            | "service_account_path" => Ok(Self::ServiceAccount),
185            "google_service_account_key" | "service_account_key" => Ok(Self::ServiceAccountKey),
186            "google_bucket" | "google_bucket_name" | "bucket" | "bucket_name" => Ok(Self::Bucket),
187            "google_application_credentials" => Ok(Self::ApplicationCredentials),
188            _ => match s.parse() {
189                Ok(key) => Ok(Self::Client(key)),
190                Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()),
191            },
192        }
193    }
194}
195
196impl Default for GoogleCloudStorageBuilder {
197    fn default() -> Self {
198        Self {
199            bucket_name: None,
200            service_account_path: None,
201            service_account_key: None,
202            application_credentials_path: None,
203            retry_config: Default::default(),
204            client_options: ClientOptions::new().with_allow_http(true),
205            url: None,
206            credentials: None,
207            signing_credentials: None,
208        }
209    }
210}
211
212impl GoogleCloudStorageBuilder {
213    /// Create a new [`GoogleCloudStorageBuilder`] with default values.
214    pub fn new() -> Self {
215        Default::default()
216    }
217
218    /// Create an instance of [`GoogleCloudStorageBuilder`] with values pre-populated from environment variables.
219    ///
220    /// Variables extracted from environment:
221    /// * GOOGLE_SERVICE_ACCOUNT: location of service account file
222    /// * GOOGLE_SERVICE_ACCOUNT_PATH: (alias) location of service account file
223    /// * SERVICE_ACCOUNT: (alias) location of service account file
224    /// * GOOGLE_SERVICE_ACCOUNT_KEY: JSON serialized service account key
225    /// * GOOGLE_BUCKET: bucket name
226    /// * GOOGLE_BUCKET_NAME: (alias) bucket name
227    ///
228    /// # Example
229    /// ```
230    /// use object_store::gcp::GoogleCloudStorageBuilder;
231    ///
232    /// let gcs = GoogleCloudStorageBuilder::from_env()
233    ///     .with_bucket_name("foo")
234    ///     .build();
235    /// ```
236    pub fn from_env() -> Self {
237        let mut builder = Self::default();
238
239        if let Ok(service_account_path) = std::env::var("SERVICE_ACCOUNT") {
240            builder.service_account_path = Some(service_account_path);
241        }
242
243        for (os_key, os_value) in std::env::vars_os() {
244            if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) {
245                if key.starts_with("GOOGLE_") {
246                    if let Ok(config_key) = key.to_ascii_lowercase().parse() {
247                        builder = builder.with_config(config_key, value);
248                    }
249                }
250            }
251        }
252
253        builder
254    }
255
256    /// Parse available connection info form a well-known storage URL.
257    ///
258    /// The supported url schemes are:
259    ///
260    /// - `gs://<bucket>/<path>`
261    ///
262    /// Note: Settings derived from the URL will override any others set on this builder
263    ///
264    /// # Example
265    /// ```
266    /// use object_store::gcp::GoogleCloudStorageBuilder;
267    ///
268    /// let gcs = GoogleCloudStorageBuilder::from_env()
269    ///     .with_url("gs://bucket/path")
270    ///     .build();
271    /// ```
272    pub fn with_url(mut self, url: impl Into<String>) -> Self {
273        self.url = Some(url.into());
274        self
275    }
276
277    /// Set an option on the builder via a key - value pair.
278    pub fn with_config(mut self, key: GoogleConfigKey, value: impl Into<String>) -> Self {
279        match key {
280            GoogleConfigKey::ServiceAccount => self.service_account_path = Some(value.into()),
281            GoogleConfigKey::ServiceAccountKey => self.service_account_key = Some(value.into()),
282            GoogleConfigKey::Bucket => self.bucket_name = Some(value.into()),
283            GoogleConfigKey::ApplicationCredentials => {
284                self.application_credentials_path = Some(value.into())
285            }
286            GoogleConfigKey::Client(key) => {
287                self.client_options = self.client_options.with_config(key, value)
288            }
289        };
290        self
291    }
292
293    /// Get config value via a [`GoogleConfigKey`].
294    ///
295    /// # Example
296    /// ```
297    /// use object_store::gcp::{GoogleCloudStorageBuilder, GoogleConfigKey};
298    ///
299    /// let builder = GoogleCloudStorageBuilder::from_env()
300    ///     .with_service_account_key("foo");
301    /// let service_account_key = builder.get_config_value(&GoogleConfigKey::ServiceAccountKey).unwrap_or_default();
302    /// assert_eq!("foo", &service_account_key);
303    /// ```
304    pub fn get_config_value(&self, key: &GoogleConfigKey) -> Option<String> {
305        match key {
306            GoogleConfigKey::ServiceAccount => self.service_account_path.clone(),
307            GoogleConfigKey::ServiceAccountKey => self.service_account_key.clone(),
308            GoogleConfigKey::Bucket => self.bucket_name.clone(),
309            GoogleConfigKey::ApplicationCredentials => self.application_credentials_path.clone(),
310            GoogleConfigKey::Client(key) => self.client_options.get_config_value(key),
311        }
312    }
313
314    /// Sets properties on this builder based on a URL
315    ///
316    /// This is a separate member function to allow fallible computation to
317    /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
318    fn parse_url(&mut self, url: &str) -> Result<()> {
319        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
320        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
321
322        match parsed.scheme() {
323            "gs" => self.bucket_name = Some(host.to_string()),
324            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
325        }
326        Ok(())
327    }
328
329    /// Set the bucket name (required)
330    pub fn with_bucket_name(mut self, bucket_name: impl Into<String>) -> Self {
331        self.bucket_name = Some(bucket_name.into());
332        self
333    }
334
335    /// Set the path to the service account file.
336    ///
337    /// This or [`GoogleCloudStorageBuilder::with_service_account_key`] must be
338    /// set.
339    ///
340    /// Example `"/tmp/gcs.json"`.
341    ///
342    /// Example contents of `gcs.json`:
343    ///
344    /// ```json
345    /// {
346    ///    "gcs_base_url": "https://localhost:4443",
347    ///    "disable_oauth": true,
348    ///    "client_email": "",
349    ///    "private_key": ""
350    /// }
351    /// ```
352    pub fn with_service_account_path(mut self, service_account_path: impl Into<String>) -> Self {
353        self.service_account_path = Some(service_account_path.into());
354        self
355    }
356
357    /// Set the service account key. The service account must be in the JSON
358    /// format.
359    ///
360    /// This or [`GoogleCloudStorageBuilder::with_service_account_path`] must be
361    /// set.
362    pub fn with_service_account_key(mut self, service_account: impl Into<String>) -> Self {
363        self.service_account_key = Some(service_account.into());
364        self
365    }
366
367    /// Set the path to the application credentials file.
368    ///
369    /// <https://cloud.google.com/docs/authentication/provide-credentials-adc>
370    pub fn with_application_credentials(
371        mut self,
372        application_credentials_path: impl Into<String>,
373    ) -> Self {
374        self.application_credentials_path = Some(application_credentials_path.into());
375        self
376    }
377
378    /// Set the credential provider overriding any other options
379    pub fn with_credentials(mut self, credentials: GcpCredentialProvider) -> Self {
380        self.credentials = Some(credentials);
381        self
382    }
383
384    /// Set the retry configuration
385    pub fn with_retry(mut self, retry_config: RetryConfig) -> Self {
386        self.retry_config = retry_config;
387        self
388    }
389
390    /// Set the proxy_url to be used by the underlying client
391    pub fn with_proxy_url(mut self, proxy_url: impl Into<String>) -> Self {
392        self.client_options = self.client_options.with_proxy_url(proxy_url);
393        self
394    }
395
396    /// Set a trusted proxy CA certificate
397    pub fn with_proxy_ca_certificate(mut self, proxy_ca_certificate: impl Into<String>) -> Self {
398        self.client_options = self
399            .client_options
400            .with_proxy_ca_certificate(proxy_ca_certificate);
401        self
402    }
403
404    /// Set a list of hosts to exclude from proxy connections
405    pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into<String>) -> Self {
406        self.client_options = self.client_options.with_proxy_excludes(proxy_excludes);
407        self
408    }
409
410    /// Sets the client options, overriding any already set
411    pub fn with_client_options(mut self, options: ClientOptions) -> Self {
412        self.client_options = options;
413        self
414    }
415
416    /// Configure a connection to Google Cloud Storage, returning a
417    /// new [`GoogleCloudStorage`] and consuming `self`
418    pub fn build(mut self) -> Result<GoogleCloudStorage> {
419        if let Some(url) = self.url.take() {
420            self.parse_url(&url)?;
421        }
422
423        let bucket_name = self.bucket_name.ok_or(Error::MissingBucketName {})?;
424
425        // First try to initialize from the service account information.
426        let service_account_credentials =
427            match (self.service_account_path, self.service_account_key) {
428                (Some(path), None) => {
429                    Some(ServiceAccountCredentials::from_file(path).context(CredentialSnafu)?)
430                }
431                (None, Some(key)) => {
432                    Some(ServiceAccountCredentials::from_key(&key).context(CredentialSnafu)?)
433                }
434                (None, None) => None,
435                (Some(_), Some(_)) => return Err(Error::ServiceAccountPathAndKeyProvided.into()),
436            };
437
438        // Then try to initialize from the application credentials file, or the environment.
439        let application_default_credentials =
440            ApplicationDefaultCredentials::read(self.application_credentials_path.as_deref())?;
441
442        let disable_oauth = service_account_credentials
443            .as_ref()
444            .map(|c| c.disable_oauth)
445            .unwrap_or(false);
446
447        let gcs_base_url: String = service_account_credentials
448            .as_ref()
449            .and_then(|c| c.gcs_base_url.clone())
450            .unwrap_or_else(|| DEFAULT_GCS_BASE_URL.to_string());
451
452        let credentials = if let Some(credentials) = self.credentials {
453            credentials
454        } else if disable_oauth {
455            Arc::new(StaticCredentialProvider::new(GcpCredential {
456                bearer: "".to_string(),
457            })) as _
458        } else if let Some(credentials) = service_account_credentials.clone() {
459            Arc::new(TokenCredentialProvider::new(
460                credentials.token_provider()?,
461                self.client_options.client()?,
462                self.retry_config.clone(),
463            )) as _
464        } else if let Some(credentials) = application_default_credentials.clone() {
465            match credentials {
466                ApplicationDefaultCredentials::AuthorizedUser(token) => {
467                    Arc::new(TokenCredentialProvider::new(
468                        token,
469                        self.client_options.client()?,
470                        self.retry_config.clone(),
471                    )) as _
472                }
473                ApplicationDefaultCredentials::ServiceAccount(token) => {
474                    Arc::new(TokenCredentialProvider::new(
475                        token.token_provider()?,
476                        self.client_options.client()?,
477                        self.retry_config.clone(),
478                    )) as _
479                }
480            }
481        } else {
482            Arc::new(TokenCredentialProvider::new(
483                InstanceCredentialProvider::default(),
484                self.client_options.metadata_client()?,
485                self.retry_config.clone(),
486            )) as _
487        };
488
489        let signing_credentials = if let Some(signing_credentials) = self.signing_credentials {
490            signing_credentials
491        } else if disable_oauth {
492            Arc::new(StaticCredentialProvider::new(GcpSigningCredential {
493                email: "".to_string(),
494                private_key: None,
495            })) as _
496        } else if let Some(credentials) = service_account_credentials.clone() {
497            credentials.signing_credentials()?
498        } else if let Some(credentials) = application_default_credentials.clone() {
499            match credentials {
500                ApplicationDefaultCredentials::AuthorizedUser(token) => {
501                    Arc::new(TokenCredentialProvider::new(
502                        AuthorizedUserSigningCredentials::from(token)?,
503                        self.client_options.client()?,
504                        self.retry_config.clone(),
505                    )) as _
506                }
507                ApplicationDefaultCredentials::ServiceAccount(token) => {
508                    token.signing_credentials()?
509                }
510            }
511        } else {
512            Arc::new(TokenCredentialProvider::new(
513                InstanceSigningCredentialProvider::default(),
514                self.client_options.metadata_client()?,
515                self.retry_config.clone(),
516            )) as _
517        };
518
519        let config = GoogleCloudStorageConfig::new(
520            gcs_base_url,
521            credentials,
522            signing_credentials,
523            bucket_name,
524            self.retry_config,
525            self.client_options,
526        );
527
528        Ok(GoogleCloudStorage {
529            client: Arc::new(GoogleCloudStorageClient::new(config)?),
530        })
531    }
532}
533
534#[cfg(test)]
535mod tests {
536    use super::*;
537    use std::collections::HashMap;
538    use std::io::Write;
539    use tempfile::NamedTempFile;
540
541    const FAKE_KEY: &str = r#"{"private_key": "private_key", "private_key_id": "private_key_id", "client_email":"client_email", "disable_oauth":true}"#;
542
543    #[test]
544    fn gcs_test_service_account_key_and_path() {
545        let mut tfile = NamedTempFile::new().unwrap();
546        write!(tfile, "{FAKE_KEY}").unwrap();
547        let _ = GoogleCloudStorageBuilder::new()
548            .with_service_account_key(FAKE_KEY)
549            .with_service_account_path(tfile.path().to_str().unwrap())
550            .with_bucket_name("foo")
551            .build()
552            .unwrap_err();
553    }
554
555    #[test]
556    fn gcs_test_config_from_map() {
557        let google_service_account = "object_store:fake_service_account".to_string();
558        let google_bucket_name = "object_store:fake_bucket".to_string();
559        let options = HashMap::from([
560            ("google_service_account", google_service_account.clone()),
561            ("google_bucket_name", google_bucket_name.clone()),
562        ]);
563
564        let builder = options
565            .iter()
566            .fold(GoogleCloudStorageBuilder::new(), |builder, (key, value)| {
567                builder.with_config(key.parse().unwrap(), value)
568            });
569
570        assert_eq!(
571            builder.service_account_path.unwrap(),
572            google_service_account.as_str()
573        );
574        assert_eq!(builder.bucket_name.unwrap(), google_bucket_name.as_str());
575    }
576
577    #[test]
578    fn gcs_test_config_aliases() {
579        // Service account path
580        for alias in [
581            "google_service_account",
582            "service_account",
583            "google_service_account_path",
584            "service_account_path",
585        ] {
586            let builder = GoogleCloudStorageBuilder::new()
587                .with_config(alias.parse().unwrap(), "/fake/path.json");
588            assert_eq!("/fake/path.json", builder.service_account_path.unwrap());
589        }
590
591        // Service account key
592        for alias in ["google_service_account_key", "service_account_key"] {
593            let builder =
594                GoogleCloudStorageBuilder::new().with_config(alias.parse().unwrap(), FAKE_KEY);
595            assert_eq!(FAKE_KEY, builder.service_account_key.unwrap());
596        }
597
598        // Bucket name
599        for alias in [
600            "google_bucket",
601            "google_bucket_name",
602            "bucket",
603            "bucket_name",
604        ] {
605            let builder =
606                GoogleCloudStorageBuilder::new().with_config(alias.parse().unwrap(), "fake_bucket");
607            assert_eq!("fake_bucket", builder.bucket_name.unwrap());
608        }
609    }
610
611    #[tokio::test]
612    async fn gcs_test_proxy_url() {
613        let mut tfile = NamedTempFile::new().unwrap();
614        write!(tfile, "{FAKE_KEY}").unwrap();
615        let service_account_path = tfile.path();
616        let gcs = GoogleCloudStorageBuilder::new()
617            .with_service_account_path(service_account_path.to_str().unwrap())
618            .with_bucket_name("foo")
619            .with_proxy_url("https://example.com")
620            .build();
621        assert!(gcs.is_ok());
622
623        let err = GoogleCloudStorageBuilder::new()
624            .with_service_account_path(service_account_path.to_str().unwrap())
625            .with_bucket_name("foo")
626            .with_proxy_url("asdf://example.com")
627            .build()
628            .unwrap_err()
629            .to_string();
630
631        assert_eq!("Generic HTTP client error: builder error", err);
632    }
633
634    #[test]
635    fn gcs_test_urls() {
636        let mut builder = GoogleCloudStorageBuilder::new();
637        builder.parse_url("gs://bucket/path").unwrap();
638        assert_eq!(builder.bucket_name.as_deref(), Some("bucket"));
639
640        builder.parse_url("gs://bucket.mydomain/path").unwrap();
641        assert_eq!(builder.bucket_name.as_deref(), Some("bucket.mydomain"));
642
643        builder.parse_url("mailto://bucket/path").unwrap_err();
644    }
645
646    #[test]
647    fn gcs_test_service_account_key_only() {
648        let _ = GoogleCloudStorageBuilder::new()
649            .with_service_account_key(FAKE_KEY)
650            .with_bucket_name("foo")
651            .build()
652            .unwrap();
653    }
654
655    #[test]
656    fn gcs_test_config_get_value() {
657        let google_service_account = "object_store:fake_service_account".to_string();
658        let google_bucket_name = "object_store:fake_bucket".to_string();
659        let builder = GoogleCloudStorageBuilder::new()
660            .with_config(GoogleConfigKey::ServiceAccount, &google_service_account)
661            .with_config(GoogleConfigKey::Bucket, &google_bucket_name);
662
663        assert_eq!(
664            builder
665                .get_config_value(&GoogleConfigKey::ServiceAccount)
666                .unwrap(),
667            google_service_account
668        );
669        assert_eq!(
670            builder.get_config_value(&GoogleConfigKey::Bucket).unwrap(),
671            google_bucket_name
672        );
673    }
674}