From 7d593c42ec66526dbad10cb545a2468d1bc4940d Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Thu, 28 Mar 2024 08:28:32 -0400 Subject: [PATCH] feat(api): add `catalog` and `database` kwargs to `ibis.table` (#8801) --- ibis/expr/api.py | 31 ++++++++++++++++++- ibis/expr/format.py | 8 +++++ ibis/expr/operations/relations.py | 2 +- .../repr.txt | 3 ++ .../reprcatdb.txt | 3 ++ .../reprdb.txt | 3 ++ ibis/expr/tests/test_api.py | 15 +++++++++ ibis/expr/tests/test_format.py | 19 ++++++++++++ 8 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/repr.txt create mode 100644 ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/reprcatdb.txt create mode 100644 ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/reprdb.txt diff --git a/ibis/expr/api.py b/ibis/expr/api.py index 1bd12ea86aeb..d9ca23f984e5 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -313,15 +313,25 @@ def schema( def table( schema: SchemaLike | None = None, name: str | None = None, + catalog: str | None = None, + database: str | None = None, ) -> ir.Table: """Create a table literal or an abstract table without data. + Ibis uses the word database to refer to a collection of tables, and the word + catalog to refer to a collection of databases. You can use a combination of + `catalog` and `database` to specify a hierarchical location for table. + Parameters ---------- schema A schema for the table name Name for the table. One is generated if this value is `None`. + catalog + A collection of database. + database + A collection of tables. Required if catalog is not `None`. Returns ------- @@ -340,13 +350,32 @@ def table( a int64 b string + + Create a table with no data backing it in a specific location + + >>> import ibis + >>> ibis.options.interactive = False + >>> t = ibis.table(schema=dict(a="int"), name="t", catalog="cat", database="db") + >>> t + UnboundTable: cat.db.t + a int64 """ if name is None: if isinstance(schema, type): name = schema.__name__ else: name = next(_table_names) - return ops.UnboundTable(name=name, schema=schema).to_expr() + if catalog is not None and database is None: + raise ValueError( + "A catalog-only namespace is invalid in Ibis, " + "please specify a database as well." + ) + + return ops.UnboundTable( + name=name, + schema=schema, + namespace=ops.Namespace(catalog=catalog, database=database), + ).to_expr() def memtable( diff --git a/ibis/expr/format.py b/ibis/expr/format.py index 2975de37d10d..2c5186bcd7f0 100644 --- a/ibis/expr/format.py +++ b/ibis/expr/format.py @@ -244,6 +244,14 @@ def _physical_table(op, name, **kwargs): return f"{op.__class__.__name__}: {name}\n{schema}" +@fmt.register(ops.UnboundTable) +@fmt.register(ops.DatabaseTable) +def _unbound_table(op, name, **kwargs): + schema = render_schema(op.schema, indent_level=1) + name = ".".join(filter(None, op.namespace.args + (name,))) + return f"{op.__class__.__name__}: {name}\n{schema}" + + @fmt.register(ops.InMemoryTable) def _in_memory_table(op, data, **kwargs): import rich.pretty diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 215bcd1421d7..fd6f73f76a32 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -313,8 +313,8 @@ class PhysicalTable(Relation): @public class Namespace(Concrete): - database: Optional[str] = None catalog: Optional[str] = None + database: Optional[str] = None @public diff --git a/ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/repr.txt b/ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/repr.txt new file mode 100644 index 000000000000..5dcb87c3bfe8 --- /dev/null +++ b/ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/repr.txt @@ -0,0 +1,3 @@ +UnboundTable: bork + a int64 + b int64 \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/reprcatdb.txt b/ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/reprcatdb.txt new file mode 100644 index 000000000000..d25f4eb5149d --- /dev/null +++ b/ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/reprcatdb.txt @@ -0,0 +1,3 @@ +UnboundTable: ork.bork.bork + a int64 + b int64 \ No newline at end of file diff --git a/ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/reprdb.txt b/ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/reprdb.txt new file mode 100644 index 000000000000..b6fe4c9bed42 --- /dev/null +++ b/ibis/expr/tests/snapshots/test_format/test_format_unbound_table_namespace/reprdb.txt @@ -0,0 +1,3 @@ +UnboundTable: bork.bork + a int64 + b int64 \ No newline at end of file diff --git a/ibis/expr/tests/test_api.py b/ibis/expr/tests/test_api.py index edb5adbbeafa..975e65121312 100644 --- a/ibis/expr/tests/test_api.py +++ b/ibis/expr/tests/test_api.py @@ -156,3 +156,18 @@ def test_nested_name_property(): x = x + 1 assert x.op().name.count("Add") == n + + +def test_unbound_table_namespace(): + t = ibis.table(name="bork", schema=(("a", "int"), ("b", "int")), database="bork") + + assert t.op().namespace == ops.Namespace(database="bork") + + t = ibis.table( + name="bork", schema=(("a", "int"), ("b", "int")), database="bork", catalog="ork" + ) + + assert t.op().namespace == ops.Namespace(catalog="ork", database="bork") + + with pytest.raises(ValueError, match="A catalog-only namespace is invalid in Ibis"): + ibis.table(name="bork", schema=(("a", "int"), ("b", "int")), catalog="bork") diff --git a/ibis/expr/tests/test_format.py b/ibis/expr/tests/test_format.py index d8ab0ecc9e83..63ebda4f1597 100644 --- a/ibis/expr/tests/test_format.py +++ b/ibis/expr/tests/test_format.py @@ -379,6 +379,25 @@ def test_format_in_memory_table(snapshot): snapshot.assert_match(result, "repr.txt") +def test_format_unbound_table_namespace(snapshot): + t = ibis.table(name="bork", schema=(("a", "int"), ("b", "int"))) + + result = fmt(t) + snapshot.assert_match(result, "repr.txt") + + t = ibis.table(name="bork", schema=(("a", "int"), ("b", "int")), database="bork") + + result = fmt(t) + snapshot.assert_match(result, "reprdb.txt") + + t = ibis.table( + name="bork", schema=(("a", "int"), ("b", "int")), catalog="ork", database="bork" + ) + + result = fmt(t) + snapshot.assert_match(result, "reprcatdb.txt") + + def test_format_new_relational_operation(alltypes, snapshot): class MyRelation(ops.Relation): parent: ops.Relation