Skip to content

Commit

Permalink
feat(sql): enable cross-database joins (ibis-project#9849)
Browse files Browse the repository at this point in the history
  • Loading branch information
gforsyth authored Aug 16, 2024
1 parent 60103cb commit c3ff6ae
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 5 deletions.
2 changes: 1 addition & 1 deletion docker/mysql/startup.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE USER 'ibis'@'localhost' IDENTIFIED BY 'ibis';
CREATE SCHEMA IF NOT EXISTS test_schema;
GRANT CREATE,SELECT,DROP ON *.* TO 'ibis'@'%';
GRANT CREATE,SELECT,DROP,INSERT ON *.* TO 'ibis'@'%';
FLUSH PRIVILEGES;
36 changes: 36 additions & 0 deletions ibis/backends/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1726,3 +1726,39 @@ def test_no_accidental_cross_database_table_load(con_create_database):
# Clean up
con.drop_table(table, database=dbname)
con.drop_database(dbname)


@pytest.mark.notyet(["druid"], reason="can't create tables")
@pytest.mark.notyet(
["flink"], reason="can't create non-temporary tables from in-memory data"
)
def test_cross_database_join(con_create_database, monkeypatch):
con = con_create_database

monkeypatch.setattr(ibis.options, "default_backend", con)

left = ibis.memtable({"a": [1], "b": [2]})
right = ibis.memtable({"a": [1], "c": [3]})

# Create an extra database
con.create_database(dbname := gen_name("dummy_db"))

# Insert left into current_database
left = con.create_table(left_table := gen_name("left"), obj=left)

# Insert right into new database
right = con.create_table(
right_table := gen_name("right"), obj=right, database=dbname
)

expr = left.join(right, "a")
assert expr.columns == ["a", "b", "c"]

result = expr.to_pyarrow()
expected = pa.Table.from_pydict({"a": [1], "b": [2], "c": [3]})

assert result.equals(expected)

con.drop_table(left_table)
con.drop_table(right_table, database=dbname)
con.drop_database(dbname)
16 changes: 12 additions & 4 deletions ibis/backends/trino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,12 @@ def create_table(
The schema of the table to create; optional, but one of `obj` or
`schema` must be specified
database
Not yet implemented.
The database to insert the table into.
If not provided, the current database is used.
You can provide a single database name, like `"mydb"`. For
multi-level hierarchies, you can pass in a dotted string path like
`"catalog.database"` or a tuple of strings like `("catalog",
"database")`.
temp
This parameter is not yet supported in the Trino backend, because
Trino doesn't implement temporary tables
Expand All @@ -436,13 +441,16 @@ def create_table(
"Temporary tables are not supported in the Trino backend"
)

table_loc = self._to_sqlglot_table(database)
catalog, db = self._to_catalog_db_tuple(table_loc)

quoted = self.compiler.quoted
orig_table_ref = sg.to_identifier(name, quoted=quoted)
orig_table_ref = sg.table(name, catalog=catalog, db=db, quoted=quoted)

if overwrite:
name = util.gen_name(f"{self.name}_overwrite")

table_ref = sg.table(name, catalog=database, quoted=quoted)
table_ref = sg.table(name, catalog=catalog, db=db, quoted=quoted)

if schema is not None and obj is None:
column_defs = [
Expand Down Expand Up @@ -524,7 +532,7 @@ def create_table(
if temp_memtable_view is not None:
self.drop_table(temp_memtable_view)

return self.table(orig_table_ref.name)
return self.table(orig_table_ref.name, database=(catalog, db))

def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame:
import pandas as pd
Expand Down

0 comments on commit c3ff6ae

Please sign in to comment.