From a612b674705b16eeee6c974a616c72a77429443e Mon Sep 17 00:00:00 2001 From: dennis zhuang Date: Tue, 3 Sep 2024 15:02:55 +0800 Subject: [PATCH] feat: supports name in object storage config (#4630) * feat: supports name in object storage config * fix: integration test * fix: integration test * fix: update sample config * fix: config api test --- config/datanode.example.toml | 14 ++++++ config/standalone.example.toml | 14 ++++++ src/datanode/src/config.rs | 59 ++++++++++++++++++++++-- src/datanode/src/datanode.rs | 4 +- src/object-store/src/manager.rs | 8 +++- tests-integration/src/tests/test_util.rs | 12 +++-- tests-integration/tests/http.rs | 1 + 7 files changed, 101 insertions(+), 11 deletions(-) diff --git a/config/datanode.example.toml b/config/datanode.example.toml index d1c370819ec7..48f37b62ea3c 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -360,9 +360,23 @@ region = "us-west-2" # Custom storage options # [[storage.providers]] +# name = "S3" # type = "S3" +# bucket = "greptimedb" +# root = "data" +# access_key_id = "test" +# secret_access_key = "123456" +# endpoint = "https://s3.amazonaws.com" +# region = "us-west-2" # [[storage.providers]] +# name = "Gcs" # type = "Gcs" +# bucket = "greptimedb" +# root = "data" +# scope = "test" +# credential_path = "123456" +# credential = "base64-credential" +# endpoint = "https://storage.googleapis.com" ## The region engine options. You can configure multiple region engines. [[region_engine]] diff --git a/config/standalone.example.toml b/config/standalone.example.toml index f504a5ce42a1..6ca7d917d39a 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -398,9 +398,23 @@ region = "us-west-2" # Custom storage options # [[storage.providers]] +# name = "S3" # type = "S3" +# bucket = "greptimedb" +# root = "data" +# access_key_id = "test" +# secret_access_key = "123456" +# endpoint = "https://s3.amazonaws.com" +# region = "us-west-2" # [[storage.providers]] +# name = "Gcs" # type = "Gcs" +# bucket = "greptimedb" +# root = "data" +# scope = "test" +# credential_path = "123456" +# credential = "base64-credential" +# endpoint = "https://storage.googleapis.com" ## The region engine options. You can configure multiple region engines. [[region_engine]] diff --git a/src/datanode/src/config.rs b/src/datanode/src/config.rs index 6ce16b779922..7cb18131ed3e 100644 --- a/src/datanode/src/config.rs +++ b/src/datanode/src/config.rs @@ -47,7 +47,8 @@ pub enum ObjectStoreConfig { } impl ObjectStoreConfig { - pub fn name(&self) -> &'static str { + /// Returns the object storage type name, such as `S3`, `Oss` etc. + pub fn provider_name(&self) -> &'static str { match self { Self::File(_) => "File", Self::S3(_) => "S3", @@ -56,6 +57,24 @@ impl ObjectStoreConfig { Self::Gcs(_) => "Gcs", } } + + /// Returns the object storage configuration name, return the provider name if it's empty. + pub fn config_name(&self) -> &str { + let name = match self { + // file storage doesn't support name + Self::File(_) => self.provider_name(), + Self::S3(s3) => &s3.name, + Self::Oss(oss) => &oss.name, + Self::Azblob(az) => &az.name, + Self::Gcs(gcs) => &gcs.name, + }; + + if name.trim().is_empty() { + return self.provider_name(); + } + + name + } } /// Storage engine config @@ -66,6 +85,7 @@ pub struct StorageConfig { pub data_home: String, #[serde(flatten)] pub store: ObjectStoreConfig, + /// Object storage providers pub providers: Vec, } @@ -95,6 +115,7 @@ pub struct ObjectStorageCacheConfig { #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] pub struct S3Config { + pub name: String, pub bucket: String, pub root: String, #[serde(skip_serializing)] @@ -109,7 +130,8 @@ pub struct S3Config { impl PartialEq for S3Config { fn eq(&self, other: &Self) -> bool { - self.bucket == other.bucket + self.name == other.name + && self.bucket == other.bucket && self.root == other.root && self.access_key_id.expose_secret() == other.access_key_id.expose_secret() && self.secret_access_key.expose_secret() == other.secret_access_key.expose_secret() @@ -122,6 +144,7 @@ impl PartialEq for S3Config { #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] pub struct OssConfig { + pub name: String, pub bucket: String, pub root: String, #[serde(skip_serializing)] @@ -135,7 +158,8 @@ pub struct OssConfig { impl PartialEq for OssConfig { fn eq(&self, other: &Self) -> bool { - self.bucket == other.bucket + self.name == other.name + && self.bucket == other.bucket && self.root == other.root && self.access_key_id.expose_secret() == other.access_key_id.expose_secret() && self.access_key_secret.expose_secret() == other.access_key_secret.expose_secret() @@ -147,6 +171,7 @@ impl PartialEq for OssConfig { #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] pub struct AzblobConfig { + pub name: String, pub container: String, pub root: String, #[serde(skip_serializing)] @@ -161,7 +186,8 @@ pub struct AzblobConfig { impl PartialEq for AzblobConfig { fn eq(&self, other: &Self) -> bool { - self.container == other.container + self.name == other.name + && self.container == other.container && self.root == other.root && self.account_name.expose_secret() == other.account_name.expose_secret() && self.account_key.expose_secret() == other.account_key.expose_secret() @@ -174,6 +200,7 @@ impl PartialEq for AzblobConfig { #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] pub struct GcsConfig { + pub name: String, pub root: String, pub bucket: String, pub scope: String, @@ -188,7 +215,8 @@ pub struct GcsConfig { impl PartialEq for GcsConfig { fn eq(&self, other: &Self) -> bool { - self.root == other.root + self.name == other.name + && self.root == other.root && self.bucket == other.bucket && self.scope == other.scope && self.credential_path.expose_secret() == other.credential_path.expose_secret() @@ -201,6 +229,7 @@ impl PartialEq for GcsConfig { impl Default for S3Config { fn default() -> Self { Self { + name: String::default(), bucket: String::default(), root: String::default(), access_key_id: SecretString::from(String::default()), @@ -215,6 +244,7 @@ impl Default for S3Config { impl Default for OssConfig { fn default() -> Self { Self { + name: String::default(), bucket: String::default(), root: String::default(), access_key_id: SecretString::from(String::default()), @@ -228,6 +258,7 @@ impl Default for OssConfig { impl Default for AzblobConfig { fn default() -> Self { Self { + name: String::default(), container: String::default(), root: String::default(), account_name: SecretString::from(String::default()), @@ -242,6 +273,7 @@ impl Default for AzblobConfig { impl Default for GcsConfig { fn default() -> Self { Self { + name: String::default(), root: String::default(), bucket: String::default(), scope: String::default(), @@ -355,6 +387,23 @@ mod tests { let _parsed: DatanodeOptions = toml::from_str(&toml_string).unwrap(); } + #[test] + fn test_config_name() { + let object_store_config = ObjectStoreConfig::default(); + assert_eq!("File", object_store_config.config_name()); + + let s3_config = ObjectStoreConfig::S3(S3Config::default()); + assert_eq!("S3", s3_config.config_name()); + assert_eq!("S3", s3_config.provider_name()); + + let s3_config = ObjectStoreConfig::S3(S3Config { + name: "test".to_string(), + ..Default::default() + }); + assert_eq!("test", s3_config.config_name()); + assert_eq!("S3", s3_config.provider_name()); + } + #[test] fn test_secstr() { let toml_str = r#" diff --git a/src/datanode/src/datanode.rs b/src/datanode/src/datanode.rs index eca551a4a0d4..09bf901d37f0 100644 --- a/src/datanode/src/datanode.rs +++ b/src/datanode/src/datanode.rs @@ -273,11 +273,11 @@ impl DatanodeBuilder { /// Builds [ObjectStoreManager] from [StorageConfig]. pub async fn build_object_store_manager(cfg: &StorageConfig) -> Result { let object_store = store::new_object_store(cfg.store.clone(), &cfg.data_home).await?; - let default_name = cfg.store.name(); + let default_name = cfg.store.config_name(); let mut object_store_manager = ObjectStoreManager::new(default_name, object_store); for store in &cfg.providers { object_store_manager.add( - store.name(), + store.config_name(), store::new_object_store(store.clone(), &cfg.data_home).await?, ); } diff --git a/src/object-store/src/manager.rs b/src/object-store/src/manager.rs index 6513923b52cc..b73147baf18e 100644 --- a/src/object-store/src/manager.rs +++ b/src/object-store/src/manager.rs @@ -37,8 +37,13 @@ impl ObjectStoreManager { } /// Adds an object store to the manager. + /// # Safety + /// Panic when the name already exists pub fn add(&mut self, name: &str, object_store: ObjectStore) { - self.stores.insert(name.to_lowercase(), object_store); + let name = name.to_lowercase(); + if self.stores.insert(name.clone(), object_store).is_some() { + panic!("Object storage provider name conflicts, the `{name}` already exists"); + } } /// Finds an object store corresponding to the name. @@ -46,6 +51,7 @@ impl ObjectStoreManager { self.stores.get(&name.to_lowercase()) } + /// Returns the default object storage pub fn default_object_store(&self) -> &ObjectStore { &self.default_object_store } diff --git a/tests-integration/src/tests/test_util.rs b/tests-integration/src/tests/test_util.rs index 0f93766ec7e9..f8ae3ccd8fbd 100644 --- a/tests-integration/src/tests/test_util.rs +++ b/tests-integration/src/tests/test_util.rs @@ -191,7 +191,10 @@ pub(crate) async fn distributed() -> Arc { pub(crate) async fn standalone_with_multiple_object_stores() -> Arc { let _ = dotenv::dotenv(); let test_name = uuid::Uuid::new_v4().to_string(); - let storage_types = StorageType::build_storage_types_based_on_env(); + let mut storage_types = StorageType::build_storage_types_based_on_env(); + // File is the default storage, remove it to avoid panic + storage_types.retain(|x| *x != StorageType::File); + let instance = GreptimeDbStandaloneBuilder::new(&test_name) .with_store_providers(storage_types) .build() @@ -202,10 +205,13 @@ pub(crate) async fn standalone_with_multiple_object_stores() -> Arc Arc { let _ = dotenv::dotenv(); let test_name = uuid::Uuid::new_v4().to_string(); - let providers = StorageType::build_storage_types_based_on_env(); + let mut storage_types = StorageType::build_storage_types_based_on_env(); + // File is the default storage, remove it to avoid panic + storage_types.retain(|x| *x != StorageType::File); + let cluster = GreptimeDbClusterBuilder::new(&test_name) .await - .with_store_providers(providers) + .with_store_providers(storage_types) .build() .await; Arc::new(MockDistributedInstance(cluster)) diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 6293dbc23bee..4d9c6ceb871c 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -902,6 +902,7 @@ fn drop_lines_with_inconsistent_results(input: String) -> String { "selector_result_cache_size =", "metadata_cache_size =", "content_cache_size =", + "name =", ]; input