Skip to content

Commit

Permalink
Rename CatalogList to CatalogProviderList (#9002)
Browse files Browse the repository at this point in the history
* Rename `CatalogList` to `CatalogProviderList`
  • Loading branch information
comphead authored Jan 26, 2024
1 parent 095e228 commit ed24539
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 39 deletions.
11 changes: 7 additions & 4 deletions datafusion-cli/src/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use async_trait::async_trait;
use datafusion::catalog::schema::SchemaProvider;
use datafusion::catalog::{CatalogList, CatalogProvider};
use datafusion::catalog::{CatalogProvider, CatalogProviderList};
use datafusion::datasource::listing::{
ListingTable, ListingTableConfig, ListingTableUrl,
};
Expand All @@ -31,17 +31,20 @@ use std::sync::{Arc, Weak};
/// Wraps another catalog, automatically creating table providers
/// for local files if needed
pub struct DynamicFileCatalog {
inner: Arc<dyn CatalogList>,
inner: Arc<dyn CatalogProviderList>,
state: Weak<RwLock<SessionState>>,
}

impl DynamicFileCatalog {
pub fn new(inner: Arc<dyn CatalogList>, state: Weak<RwLock<SessionState>>) -> Self {
pub fn new(
inner: Arc<dyn CatalogProviderList>,
state: Weak<RwLock<SessionState>>,
) -> Self {
Self { inner, state }
}
}

impl CatalogList for DynamicFileCatalog {
impl CatalogProviderList for DynamicFileCatalog {
fn as_any(&self) -> &dyn Any {
self
}
Expand Down
14 changes: 7 additions & 7 deletions datafusion-examples/examples/external_dependency/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use datafusion::{
arrow::util::pretty,
catalog::{
schema::SchemaProvider,
{CatalogList, CatalogProvider},
{CatalogProviderList, CatalogProvider},
},
datasource::{
file_format::{csv::CsvFormat, parquet::ParquetFormat, FileFormat},
Expand Down Expand Up @@ -53,9 +53,9 @@ async fn main() -> Result<()> {
.unwrap();
let mut ctx = SessionContext::new();
let state = ctx.state();
let catlist = Arc::new(CustomCatalogList::new());
let catlist = Arc::new(CustomCatalogProvderList::new());
// use our custom catalog list for context. each context has a single catalog list.
// context will by default have MemoryCatalogList
// context will by default have [`MemoryCatalogProviderList`]
ctx.register_catalog_list(catlist.clone());

// initialize our catalog and schemas
Expand Down Expand Up @@ -250,18 +250,18 @@ impl CatalogProvider for DirCatalog {
}
}
}
/// Catalog lists holds multiple catalogs. Each context has a single catalog list.
struct CustomCatalogList {
/// Catalog lists holds multiple catalog providers. Each context has a single catalog list.
struct CustomCatalogProviderList {
catalogs: RwLock<HashMap<String, Arc<dyn CatalogProvider>>>,
}
impl CustomCatalogList {
impl CustomCatalogProviderList {
fn new() -> Self {
Self {
catalogs: RwLock::new(HashMap::new()),
}
}
}
impl CatalogList for CustomCatalogList {
impl CatalogProviderList for CustomCatalogProviderList {
fn as_any(&self) -> &dyn Any {
self
}
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/src/catalog/information_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use crate::{
physical_plan::streaming::PartitionStream,
};

use super::{schema::SchemaProvider, CatalogList};
use super::{schema::SchemaProvider, CatalogProviderList};

pub(crate) const INFORMATION_SCHEMA: &str = "information_schema";
pub(crate) const TABLES: &str = "tables";
Expand All @@ -62,7 +62,7 @@ pub struct InformationSchemaProvider {

impl InformationSchemaProvider {
/// Creates a new [`InformationSchemaProvider`] for the provided `catalog_list`
pub fn new(catalog_list: Arc<dyn CatalogList>) -> Self {
pub fn new(catalog_list: Arc<dyn CatalogProviderList>) -> Self {
Self {
config: InformationSchemaConfig { catalog_list },
}
Expand All @@ -71,7 +71,7 @@ impl InformationSchemaProvider {

#[derive(Clone)]
struct InformationSchemaConfig {
catalog_list: Arc<dyn CatalogList>,
catalog_list: Arc<dyn CatalogProviderList>,
}

impl InformationSchemaConfig {
Expand Down
20 changes: 12 additions & 8 deletions datafusion/core/src/catalog/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use std::sync::Arc;
///
/// Please see the documentation on `CatalogProvider` for details of
/// implementing a custom catalog.
pub trait CatalogList: Sync + Send {
pub trait CatalogProviderList: Sync + Send {
/// Returns the catalog list as [`Any`]
/// so that it can be downcast to a specific implementation.
fn as_any(&self) -> &dyn Any;
Expand All @@ -53,28 +53,32 @@ pub trait CatalogList: Sync + Send {
fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>;
}

/// See [`CatalogProviderList`]
#[deprecated(since = "35.0.0", note = "use [`CatalogProviderList`] instead")]
pub trait CatalogList: CatalogProviderList {}

/// Simple in-memory list of catalogs
pub struct MemoryCatalogList {
pub struct MemoryCatalogProviderList {
/// Collection of catalogs containing schemas and ultimately TableProviders
pub catalogs: DashMap<String, Arc<dyn CatalogProvider>>,
}

impl MemoryCatalogList {
/// Instantiates a new `MemoryCatalogList` with an empty collection of catalogs
impl MemoryCatalogProviderList {
/// Instantiates a new `MemoryCatalogProviderList` with an empty collection of catalogs
pub fn new() -> Self {
Self {
catalogs: DashMap::new(),
}
}
}

impl Default for MemoryCatalogList {
impl Default for MemoryCatalogProviderList {
fn default() -> Self {
Self::new()
}
}

impl CatalogList for MemoryCatalogList {
impl CatalogProviderList for MemoryCatalogProviderList {
fn as_any(&self) -> &dyn Any {
self
}
Expand Down Expand Up @@ -105,14 +109,14 @@ impl CatalogList for MemoryCatalogList {
/// types, and how to access the data.
///
/// The Catalog API consists:
/// * [`CatalogList`]: a collection of `CatalogProvider`s
/// * [`CatalogProviderList`]: a collection of `CatalogProvider`s
/// * [`CatalogProvider`]: a collection of `SchemaProvider`s (sometimes called a "database" in other systems)
/// * [`SchemaProvider`]: a collection of `TableProvider`s (often called a "schema" in other systems)
/// * [`TableProvider]`: individual tables
///
/// # Implementing Catalogs
///
/// To implement a catalog, you implement at least one of the [`CatalogList`],
/// To implement a catalog, you implement at least one of the [`CatalogProviderList`],
/// [`CatalogProvider`] and [`SchemaProvider`] traits and register them
/// appropriately the [`SessionContext`].
///
Expand Down
19 changes: 10 additions & 9 deletions datafusion/core/src/execution/context/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ mod json;
mod parquet;

use crate::{
catalog::{CatalogList, MemoryCatalogList},
catalog::{CatalogProviderList, MemoryCatalogProviderList},
datasource::{
cte_worktable::CteWorkTable,
function::{TableFunction, TableFunctionImpl},
Expand Down Expand Up @@ -1173,8 +1173,8 @@ impl SessionContext {
Arc::downgrade(&self.state)
}

/// Register [`CatalogList`] in [`SessionState`]
pub fn register_catalog_list(&mut self, catalog_list: Arc<dyn CatalogList>) {
/// Register [`CatalogProviderList`] in [`SessionState`]
pub fn register_catalog_list(&mut self, catalog_list: Arc<dyn CatalogProviderList>) {
self.state.write().catalog_list = catalog_list;
}
}
Expand Down Expand Up @@ -1245,7 +1245,7 @@ pub struct SessionState {
/// Responsible for planning `LogicalPlan`s, and `ExecutionPlan`
query_planner: Arc<dyn QueryPlanner + Send + Sync>,
/// Collection of catalogs containing schemas and ultimately TableProviders
catalog_list: Arc<dyn CatalogList>,
catalog_list: Arc<dyn CatalogProviderList>,
/// Table Functions
table_functions: HashMap<String, Arc<TableFunction>>,
/// Scalar functions that are registered with the context
Expand Down Expand Up @@ -1285,7 +1285,8 @@ impl SessionState {
/// Returns new [`SessionState`] using the provided
/// [`SessionConfig`] and [`RuntimeEnv`].
pub fn new_with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self {
let catalog_list = Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
let catalog_list =
Arc::new(MemoryCatalogProviderList::new()) as Arc<dyn CatalogProviderList>;
Self::new_with_config_rt_and_catalog_list(config, runtime, catalog_list)
}

Expand All @@ -1297,11 +1298,11 @@ impl SessionState {
}

/// Returns new [`SessionState`] using the provided
/// [`SessionConfig`], [`RuntimeEnv`], and [`CatalogList`]
/// [`SessionConfig`], [`RuntimeEnv`], and [`CatalogProviderList`]
pub fn new_with_config_rt_and_catalog_list(
config: SessionConfig,
runtime: Arc<RuntimeEnv>,
catalog_list: Arc<dyn CatalogList>,
catalog_list: Arc<dyn CatalogProviderList>,
) -> Self {
let session_id = Uuid::new_v4().to_string();

Expand Down Expand Up @@ -1366,7 +1367,7 @@ impl SessionState {
pub fn with_config_rt_and_catalog_list(
config: SessionConfig,
runtime: Arc<RuntimeEnv>,
catalog_list: Arc<dyn CatalogList>,
catalog_list: Arc<dyn CatalogProviderList>,
) -> Self {
Self::new_with_config_rt_and_catalog_list(config, runtime, catalog_list)
}
Expand Down Expand Up @@ -1840,7 +1841,7 @@ impl SessionState {
}

/// Return catalog list
pub fn catalog_list(&self) -> Arc<dyn CatalogList> {
pub fn catalog_list(&self) -> Arc<dyn CatalogProviderList> {
self.catalog_list.clone()
}

Expand Down
16 changes: 8 additions & 8 deletions docs/source/library-user-guide/catalogs.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ This section describes how to create and manage catalogs, schemas, and tables in

## General Concepts

CatalogList, Catalogs, schemas, and tables are organized in a hierarchy. A CatalogList contains catalogs, a catalog contains schemas and a schema contains tables.
CatalogProviderList, Catalogs, schemas, and tables are organized in a hierarchy. A CatalogProviderList contains catalog providers, a catalog provider contains schemas and a schema contains tables.

DataFusion comes with a basic in memory catalog functionality in the [`catalog` module]. You can use these in memory implementations as is, or extend DataFusion with your own catalog implementations, for example based on local files or files on remote object storage.

[`catalog` module]: https://docs.rs/datafusion/latest/datafusion/catalog/index.html

Similarly to other concepts in DataFusion, you'll implement various traits to create your own catalogs, schemas, and tables. The following sections describe the traits you'll need to implement.

The `CatalogList` trait has methods to register new catalogs, get a catalog by name and list all catalogs .The `CatalogProvider` trait has methods to set a schema to a name, get a schema by name, and list all schemas. The `SchemaProvider`, which can be registered with a `CatalogProvider`, has methods to set a table to a name, get a table by name, list all tables, deregister a table, and check for a table's existence. The `TableProvider` trait has methods to scan underlying data and use it in DataFusion. The `TableProvider` trait is covered in more detail [here](./custom-table-providers.md).
The `CatalogProviderList` trait has methods to register new catalogs, get a catalog by name and list all catalogs .The `CatalogProvider` trait has methods to set a schema to a name, get a schema by name, and list all schemas. The `SchemaProvider`, which can be registered with a `CatalogProvider`, has methods to set a table to a name, get a table by name, list all tables, deregister a table, and check for a table's existence. The `TableProvider` trait has methods to scan underlying data and use it in DataFusion. The `TableProvider` trait is covered in more detail [here](./custom-table-providers.md).

In the following example, we'll implement an in memory catalog, starting with the `SchemaProvider` trait as we need one to register with the `CatalogProvider`. Finally we will implement `CatalogList` to register the `CatalogProvider`.
In the following example, we'll implement an in memory catalog, starting with the `SchemaProvider` trait as we need one to register with the `CatalogProvider`. Finally we will implement `CatalogProviderList` to register the `CatalogProvider`.

## Implementing `MemorySchemaProvider`

Expand Down Expand Up @@ -169,19 +169,19 @@ impl CatalogProvider for MemoryCatalogProvider {

Again, this is fairly straightforward, as there's an underlying data structure to store the state, via key-value pairs.

## Implementing `MemoryCatalogList`
## Implementing `MemoryCatalogProviderList`

```rust
pub struct MemoryCatalogList {
pub struct MemoryCatalogProviderList {
/// Collection of catalogs containing schemas and ultimately TableProviders
pub catalogs: DashMap<String, Arc<dyn CatalogProvider>>,
}
```

With that the `CatalogList` trait can be implemented.
With that the `CatalogProviderList` trait can be implemented.

```rust
impl CatalogList for MemoryCatalogList {
impl CatalogProviderList for MemoryCatalogProviderList {
fn as_any(&self) -> &dyn Any {
self
}
Expand Down Expand Up @@ -213,4 +213,4 @@ To recap, you need to:
1. Implement the `TableProvider` trait to create a table provider, or use an existing one.
2. Implement the `SchemaProvider` trait to create a schema provider, or use an existing one.
3. Implement the `CatalogProvider` trait to create a catalog provider, or use an existing one.
4. Implement the `CatalogList` trait to create a CatalogList, or use an existing one.
4. Implement the `CatalogProviderList` trait to create a CatalogProviderList, or use an existing one.

0 comments on commit ed24539

Please sign in to comment.