From ed2453901949101556c0c89b9acf442873c06ce8 Mon Sep 17 00:00:00 2001 From: comphead Date: Fri, 26 Jan 2024 13:57:07 -0800 Subject: [PATCH] Rename `CatalogList` to `CatalogProviderList` (#9002) * Rename `CatalogList` to `CatalogProviderList` --- datafusion-cli/src/catalog.rs | 11 ++++++---- .../examples/external_dependency/catalog.rs | 14 ++++++------- .../core/src/catalog/information_schema.rs | 6 +++--- datafusion/core/src/catalog/mod.rs | 20 +++++++++++-------- datafusion/core/src/execution/context/mod.rs | 19 +++++++++--------- docs/source/library-user-guide/catalogs.md | 16 +++++++-------- 6 files changed, 47 insertions(+), 39 deletions(-) diff --git a/datafusion-cli/src/catalog.rs b/datafusion-cli/src/catalog.rs index d790e3118a11..cca2b44ad983 100644 --- a/datafusion-cli/src/catalog.rs +++ b/datafusion-cli/src/catalog.rs @@ -17,7 +17,7 @@ use async_trait::async_trait; use datafusion::catalog::schema::SchemaProvider; -use datafusion::catalog::{CatalogList, CatalogProvider}; +use datafusion::catalog::{CatalogProvider, CatalogProviderList}; use datafusion::datasource::listing::{ ListingTable, ListingTableConfig, ListingTableUrl, }; @@ -31,17 +31,20 @@ use std::sync::{Arc, Weak}; /// Wraps another catalog, automatically creating table providers /// for local files if needed pub struct DynamicFileCatalog { - inner: Arc, + inner: Arc, state: Weak>, } impl DynamicFileCatalog { - pub fn new(inner: Arc, state: Weak>) -> Self { + pub fn new( + inner: Arc, + state: Weak>, + ) -> Self { Self { inner, state } } } -impl CatalogList for DynamicFileCatalog { +impl CatalogProviderList for DynamicFileCatalog { fn as_any(&self) -> &dyn Any { self } diff --git a/datafusion-examples/examples/external_dependency/catalog.rs b/datafusion-examples/examples/external_dependency/catalog.rs index aa9fd103a50c..29e505fb1dcb 100644 --- a/datafusion-examples/examples/external_dependency/catalog.rs +++ b/datafusion-examples/examples/external_dependency/catalog.rs @@ -24,7 +24,7 @@ use datafusion::{ arrow::util::pretty, catalog::{ schema::SchemaProvider, - {CatalogList, CatalogProvider}, + {CatalogProviderList, CatalogProvider}, }, datasource::{ file_format::{csv::CsvFormat, parquet::ParquetFormat, FileFormat}, @@ -53,9 +53,9 @@ async fn main() -> Result<()> { .unwrap(); let mut ctx = SessionContext::new(); let state = ctx.state(); - let catlist = Arc::new(CustomCatalogList::new()); + let catlist = Arc::new(CustomCatalogProvderList::new()); // use our custom catalog list for context. each context has a single catalog list. - // context will by default have MemoryCatalogList + // context will by default have [`MemoryCatalogProviderList`] ctx.register_catalog_list(catlist.clone()); // initialize our catalog and schemas @@ -250,18 +250,18 @@ impl CatalogProvider for DirCatalog { } } } -/// Catalog lists holds multiple catalogs. Each context has a single catalog list. -struct CustomCatalogList { +/// Catalog lists holds multiple catalog providers. Each context has a single catalog list. +struct CustomCatalogProviderList { catalogs: RwLock>>, } -impl CustomCatalogList { +impl CustomCatalogProviderList { fn new() -> Self { Self { catalogs: RwLock::new(HashMap::new()), } } } -impl CatalogList for CustomCatalogList { +impl CatalogProviderList for CustomCatalogProviderList { fn as_any(&self) -> &dyn Any { self } diff --git a/datafusion/core/src/catalog/information_schema.rs b/datafusion/core/src/catalog/information_schema.rs index 3a8fef2d25ab..0e8dbb123ed8 100644 --- a/datafusion/core/src/catalog/information_schema.rs +++ b/datafusion/core/src/catalog/information_schema.rs @@ -39,7 +39,7 @@ use crate::{ physical_plan::streaming::PartitionStream, }; -use super::{schema::SchemaProvider, CatalogList}; +use super::{schema::SchemaProvider, CatalogProviderList}; pub(crate) const INFORMATION_SCHEMA: &str = "information_schema"; pub(crate) const TABLES: &str = "tables"; @@ -62,7 +62,7 @@ pub struct InformationSchemaProvider { impl InformationSchemaProvider { /// Creates a new [`InformationSchemaProvider`] for the provided `catalog_list` - pub fn new(catalog_list: Arc) -> Self { + pub fn new(catalog_list: Arc) -> Self { Self { config: InformationSchemaConfig { catalog_list }, } @@ -71,7 +71,7 @@ impl InformationSchemaProvider { #[derive(Clone)] struct InformationSchemaConfig { - catalog_list: Arc, + catalog_list: Arc, } impl InformationSchemaConfig { diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs index da7e1f5e2193..6eba43f7df79 100644 --- a/datafusion/core/src/catalog/mod.rs +++ b/datafusion/core/src/catalog/mod.rs @@ -33,7 +33,7 @@ use std::sync::Arc; /// /// Please see the documentation on `CatalogProvider` for details of /// implementing a custom catalog. -pub trait CatalogList: Sync + Send { +pub trait CatalogProviderList: Sync + Send { /// Returns the catalog list as [`Any`] /// so that it can be downcast to a specific implementation. fn as_any(&self) -> &dyn Any; @@ -53,14 +53,18 @@ pub trait CatalogList: Sync + Send { fn catalog(&self, name: &str) -> Option>; } +/// See [`CatalogProviderList`] +#[deprecated(since = "35.0.0", note = "use [`CatalogProviderList`] instead")] +pub trait CatalogList: CatalogProviderList {} + /// Simple in-memory list of catalogs -pub struct MemoryCatalogList { +pub struct MemoryCatalogProviderList { /// Collection of catalogs containing schemas and ultimately TableProviders pub catalogs: DashMap>, } -impl MemoryCatalogList { - /// Instantiates a new `MemoryCatalogList` with an empty collection of catalogs +impl MemoryCatalogProviderList { + /// Instantiates a new `MemoryCatalogProviderList` with an empty collection of catalogs pub fn new() -> Self { Self { catalogs: DashMap::new(), @@ -68,13 +72,13 @@ impl MemoryCatalogList { } } -impl Default for MemoryCatalogList { +impl Default for MemoryCatalogProviderList { fn default() -> Self { Self::new() } } -impl CatalogList for MemoryCatalogList { +impl CatalogProviderList for MemoryCatalogProviderList { fn as_any(&self) -> &dyn Any { self } @@ -105,14 +109,14 @@ impl CatalogList for MemoryCatalogList { /// types, and how to access the data. /// /// The Catalog API consists: -/// * [`CatalogList`]: a collection of `CatalogProvider`s +/// * [`CatalogProviderList`]: a collection of `CatalogProvider`s /// * [`CatalogProvider`]: a collection of `SchemaProvider`s (sometimes called a "database" in other systems) /// * [`SchemaProvider`]: a collection of `TableProvider`s (often called a "schema" in other systems) /// * [`TableProvider]`: individual tables /// /// # Implementing Catalogs /// -/// To implement a catalog, you implement at least one of the [`CatalogList`], +/// To implement a catalog, you implement at least one of the [`CatalogProviderList`], /// [`CatalogProvider`] and [`SchemaProvider`] traits and register them /// appropriately the [`SessionContext`]. /// diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 9b623d7a51ec..b5ad6174821b 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -24,7 +24,7 @@ mod json; mod parquet; use crate::{ - catalog::{CatalogList, MemoryCatalogList}, + catalog::{CatalogProviderList, MemoryCatalogProviderList}, datasource::{ cte_worktable::CteWorkTable, function::{TableFunction, TableFunctionImpl}, @@ -1173,8 +1173,8 @@ impl SessionContext { Arc::downgrade(&self.state) } - /// Register [`CatalogList`] in [`SessionState`] - pub fn register_catalog_list(&mut self, catalog_list: Arc) { + /// Register [`CatalogProviderList`] in [`SessionState`] + pub fn register_catalog_list(&mut self, catalog_list: Arc) { self.state.write().catalog_list = catalog_list; } } @@ -1245,7 +1245,7 @@ pub struct SessionState { /// Responsible for planning `LogicalPlan`s, and `ExecutionPlan` query_planner: Arc, /// Collection of catalogs containing schemas and ultimately TableProviders - catalog_list: Arc, + catalog_list: Arc, /// Table Functions table_functions: HashMap>, /// Scalar functions that are registered with the context @@ -1285,7 +1285,8 @@ impl SessionState { /// Returns new [`SessionState`] using the provided /// [`SessionConfig`] and [`RuntimeEnv`]. pub fn new_with_config_rt(config: SessionConfig, runtime: Arc) -> Self { - let catalog_list = Arc::new(MemoryCatalogList::new()) as Arc; + let catalog_list = + Arc::new(MemoryCatalogProviderList::new()) as Arc; Self::new_with_config_rt_and_catalog_list(config, runtime, catalog_list) } @@ -1297,11 +1298,11 @@ impl SessionState { } /// Returns new [`SessionState`] using the provided - /// [`SessionConfig`], [`RuntimeEnv`], and [`CatalogList`] + /// [`SessionConfig`], [`RuntimeEnv`], and [`CatalogProviderList`] pub fn new_with_config_rt_and_catalog_list( config: SessionConfig, runtime: Arc, - catalog_list: Arc, + catalog_list: Arc, ) -> Self { let session_id = Uuid::new_v4().to_string(); @@ -1366,7 +1367,7 @@ impl SessionState { pub fn with_config_rt_and_catalog_list( config: SessionConfig, runtime: Arc, - catalog_list: Arc, + catalog_list: Arc, ) -> Self { Self::new_with_config_rt_and_catalog_list(config, runtime, catalog_list) } @@ -1840,7 +1841,7 @@ impl SessionState { } /// Return catalog list - pub fn catalog_list(&self) -> Arc { + pub fn catalog_list(&self) -> Arc { self.catalog_list.clone() } diff --git a/docs/source/library-user-guide/catalogs.md b/docs/source/library-user-guide/catalogs.md index e53d16366350..d30e26f1964a 100644 --- a/docs/source/library-user-guide/catalogs.md +++ b/docs/source/library-user-guide/catalogs.md @@ -23,7 +23,7 @@ This section describes how to create and manage catalogs, schemas, and tables in ## General Concepts -CatalogList, Catalogs, schemas, and tables are organized in a hierarchy. A CatalogList contains catalogs, a catalog contains schemas and a schema contains tables. +CatalogProviderList, Catalogs, schemas, and tables are organized in a hierarchy. A CatalogProviderList contains catalog providers, a catalog provider contains schemas and a schema contains tables. DataFusion comes with a basic in memory catalog functionality in the [`catalog` module]. You can use these in memory implementations as is, or extend DataFusion with your own catalog implementations, for example based on local files or files on remote object storage. @@ -31,9 +31,9 @@ DataFusion comes with a basic in memory catalog functionality in the [`catalog` Similarly to other concepts in DataFusion, you'll implement various traits to create your own catalogs, schemas, and tables. The following sections describe the traits you'll need to implement. -The `CatalogList` trait has methods to register new catalogs, get a catalog by name and list all catalogs .The `CatalogProvider` trait has methods to set a schema to a name, get a schema by name, and list all schemas. The `SchemaProvider`, which can be registered with a `CatalogProvider`, has methods to set a table to a name, get a table by name, list all tables, deregister a table, and check for a table's existence. The `TableProvider` trait has methods to scan underlying data and use it in DataFusion. The `TableProvider` trait is covered in more detail [here](./custom-table-providers.md). +The `CatalogProviderList` trait has methods to register new catalogs, get a catalog by name and list all catalogs .The `CatalogProvider` trait has methods to set a schema to a name, get a schema by name, and list all schemas. The `SchemaProvider`, which can be registered with a `CatalogProvider`, has methods to set a table to a name, get a table by name, list all tables, deregister a table, and check for a table's existence. The `TableProvider` trait has methods to scan underlying data and use it in DataFusion. The `TableProvider` trait is covered in more detail [here](./custom-table-providers.md). -In the following example, we'll implement an in memory catalog, starting with the `SchemaProvider` trait as we need one to register with the `CatalogProvider`. Finally we will implement `CatalogList` to register the `CatalogProvider`. +In the following example, we'll implement an in memory catalog, starting with the `SchemaProvider` trait as we need one to register with the `CatalogProvider`. Finally we will implement `CatalogProviderList` to register the `CatalogProvider`. ## Implementing `MemorySchemaProvider` @@ -169,19 +169,19 @@ impl CatalogProvider for MemoryCatalogProvider { Again, this is fairly straightforward, as there's an underlying data structure to store the state, via key-value pairs. -## Implementing `MemoryCatalogList` +## Implementing `MemoryCatalogProviderList` ```rust -pub struct MemoryCatalogList { +pub struct MemoryCatalogProviderList { /// Collection of catalogs containing schemas and ultimately TableProviders pub catalogs: DashMap>, } ``` -With that the `CatalogList` trait can be implemented. +With that the `CatalogProviderList` trait can be implemented. ```rust -impl CatalogList for MemoryCatalogList { +impl CatalogProviderList for MemoryCatalogProviderList { fn as_any(&self) -> &dyn Any { self } @@ -213,4 +213,4 @@ To recap, you need to: 1. Implement the `TableProvider` trait to create a table provider, or use an existing one. 2. Implement the `SchemaProvider` trait to create a schema provider, or use an existing one. 3. Implement the `CatalogProvider` trait to create a catalog provider, or use an existing one. -4. Implement the `CatalogList` trait to create a CatalogList, or use an existing one. +4. Implement the `CatalogProviderList` trait to create a CatalogProviderList, or use an existing one.