diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 4b60f87a1226..1f0b39fe16d1 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -900,20 +900,23 @@ def read_in_memory( return self.table(table_name) def read_delta( - self, source_table: str, table_name: str | None = None, **kwargs: Any + self, + source_table: str, + table_name: str | None = None, + **kwargs: Any, ) -> ir.Table: """Register a Delta Lake table as a table in the current database. Parameters ---------- source_table - The data source. Must be a directory containing a Delta Lake table. + The data source. Must be a directory + containing a Delta Lake table. table_name - An optional name to use for the created table. This defaults to a - generated name. - kwargs - Additional keyword arguments passed to the `delta` extension's - `delta_scan` function. + An optional name to use for the created table. This defaults to + a sequentially generated name. + **kwargs + Additional keyword arguments passed to deltalake.DeltaTable. Returns ------- @@ -925,25 +928,21 @@ def read_delta( table_name = table_name or util.gen_name("read_delta") - # always try to load the delta extension - extensions = ["delta"] - - # delta handles s3 itself, not with httpfs - if source_table.startswith(("http://", "https://")): - extensions.append("httpfs") + try: + from deltalake import DeltaTable + except ImportError: + raise ImportError( + "The deltalake extra is required to use the " + "read_delta method. You can install it using pip:\n\n" + "pip install 'ibis-framework[deltalake]'\n" + ) - self._load_extensions(extensions) + delta_table = DeltaTable(source_table, **kwargs) - options = [ - sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items() - ] - self._create_temp_view( - table_name, - sg.select(STAR).from_(self.compiler.f.delta_scan(source_table, *options)), + return self.read_in_memory( + delta_table.to_pyarrow_dataset(), table_name=table_name ) - return self.table(table_name) - def list_tables( self, like: str | None = None, diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index cec260adae6a..8d067959dbd1 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -399,7 +399,7 @@ def test_to_pyarrow_decimal(backend, dtype, pyarrow_dtype): ) @pytest.mark.notyet(["clickhouse"], raises=Exception) @pytest.mark.notyet(["mssql", "pandas"], raises=PyDeltaTableError) -def test_roundtrip_delta(backend, con, alltypes, tmp_path): +def test_roundtrip_delta(backend, con, alltypes, tmp_path, monkeypatch): if con.name == "pyspark": pytest.importorskip("delta") else: @@ -410,10 +410,11 @@ def test_roundtrip_delta(backend, con, alltypes, tmp_path): path = tmp_path / "test.delta" t.to_delta(path) - dt = con.read_delta(path) - result = con.to_pandas(dt) + monkeypatch.setattr(ibis.options, "default_backend", con) + dt = ibis.read_delta(path) + result = dt.to_pandas() - backend.assert_frame_equal(result, expected, check_dtype=con.name != "duckdb") + backend.assert_frame_equal(result, expected) @pytest.mark.notimpl(