From ab214f474bd865ef80d7cc58ed8f1f5bf2d8dc5c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 25 Jan 2024 15:30:10 +0100 Subject: [PATCH 01/14] ENH: allow using open_arrow with PyCapsule protocol (without pyarrow dependency) --- pyogrio/_io.pyx | 44 ++++++++++++++++++++++++++++++------- pyogrio/_ogr.pxd | 1 + pyogrio/raw.py | 19 ++++++++++++++-- pyogrio/tests/test_arrow.py | 14 ++++++++++++ 4 files changed, 68 insertions(+), 10 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 12fb85a3..dbab9894 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -18,6 +18,8 @@ from libc.string cimport strlen from libc.math cimport isnan cimport cython +from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer + import numpy as np cimport numpy as np @@ -1239,6 +1241,25 @@ def ogr_read( field_data ) + +cdef void pycapsule_array_stream_deleter(object stream_capsule) noexcept: + cdef ArrowArrayStream* stream = PyCapsule_GetPointer( + stream_capsule, 'arrow_array_stream' + ) + # Do not invoke the deleter on a used/moved capsule + if stream.release != NULL: + stream.release(stream) + + free(stream) + + +cdef object alloc_c_stream(ArrowArrayStream** c_stream) noexcept: + c_stream[0] = malloc(sizeof(ArrowArrayStream)) + # Ensure the capsule destructor doesn't call a random release pointer + c_stream[0].release = NULL + return PyCapsule_New(c_stream[0], 'arrow_array_stream', &pycapsule_array_stream_deleter) + + @contextlib.contextmanager def ogr_open_arrow( str path, @@ -1257,7 +1278,9 @@ def ogr_open_arrow( str sql=None, str sql_dialect=None, int return_fids=False, - int batch_size=0): + int batch_size=0, + return_capsule=False, +): cdef int err = 0 cdef const char *path_c = NULL @@ -1268,7 +1291,7 @@ def ogr_open_arrow( cdef char **fields_c = NULL cdef const char *field_c = NULL cdef char **options = NULL - cdef ArrowArrayStream stream + cdef ArrowArrayStream* stream cdef ArrowSchema schema IF CTE_GDAL_VERSION < (3, 6, 0): @@ -1384,19 +1407,23 @@ def ogr_open_arrow( # make sure layer is read from beginning OGR_L_ResetReading(ogr_layer) - if not OGR_L_GetArrowStream(ogr_layer, &stream, options): - raise RuntimeError("Failed to open ArrowArrayStream from Layer") + # allocate the stream struct and wrap in capsule to ensure clean-up on error + capsule = alloc_c_stream(&stream) - stream_ptr = &stream + if not OGR_L_GetArrowStream(ogr_layer, stream, options): + raise RuntimeError("Failed to open ArrowArrayStream from Layer") if skip_features: # only supported for GDAL >= 3.8.0; have to do this after getting # the Arrow stream OGR_L_SetNextByIndex(ogr_layer, skip_features) - # stream has to be consumed before the Dataset is closed - import pyarrow as pa - reader = pa.RecordBatchStreamReader._import_from_c(stream_ptr) + if return_capsule: + reader = capsule + else: + import pyarrow as pa + stream_ptr = &stream + reader = pa.RecordBatchStreamReader._import_from_c(stream_ptr) meta = { 'crs': crs, @@ -1407,6 +1434,7 @@ def ogr_open_arrow( 'fid_column': fid_column, } + # stream has to be consumed before the Dataset is closed yield meta, reader finally: diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index 46d5bc8d..4ddae278 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -196,6 +196,7 @@ cdef extern from "arrow_bridge.h": struct ArrowArrayStream: int (*get_schema)(ArrowArrayStream* stream, ArrowSchema* out) + void (*release)(ArrowArrayStream*) noexcept nogil cdef extern from "ogr_api.h": diff --git a/pyogrio/raw.py b/pyogrio/raw.py index 4384ee9a..cf9bac11 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -331,6 +331,16 @@ def read_arrow( return meta, table +class _ArrowStream: + def __init__(self, capsule): + self._capsule = capsule + + def __arrow_c_stream__(self, requested_schema=None): + if requested_schema is not None: + raise NotImplementedError("requested_schema is not supported") + return self._capsule + + def open_arrow( path_or_buffer, /, @@ -349,10 +359,11 @@ def open_arrow( sql_dialect=None, return_fids=False, batch_size=65_536, + return_pyarrow=True, **kwargs, ): """ - Open OGR data source as a stream of pyarrow record batches. + Open OGR data source as a stream of Arrow record batches. See docstring of `read` for parameters. @@ -395,7 +406,7 @@ def open_arrow( dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {} try: - return ogr_open_arrow( + reader = ogr_open_arrow( path, layer=layer, encoding=encoding, @@ -413,7 +424,11 @@ def open_arrow( return_fids=return_fids, dataset_kwargs=dataset_kwargs, batch_size=batch_size, + return_capsule=not return_pyarrow, ) + if not return_pyarrow: + reader = _ArrowStream(reader) + return reader finally: if buffer is not None: remove_virtual_file(path) diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index dc62bff1..722f3783 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -4,6 +4,7 @@ import pytest +import pyogrio from pyogrio import __gdal_version__, read_dataframe from pyogrio.raw import open_arrow, read_arrow from pyogrio.tests.conftest import requires_arrow_api @@ -185,6 +186,19 @@ def test_open_arrow_max_features_unsupported(naturalearth_lowres, max_features): pass +def test_open_arrow_capsule_protocol(naturalearth_lowres): + pytest.importorskip("pyarrow", minversion="14") + + with open_arrow(naturalearth_lowres, return_pyarrow=False) as (meta, reader): + assert isinstance(meta, dict) + assert isinstance(reader, pyogrio.raw._ArrowStream) + + result = pyarrow.table(reader) + + expected = read_arrow(naturalearth_lowres) + assert result.equals(expected) + + @contextlib.contextmanager def use_arrow_context(): original = os.environ.get("PYOGRIO_USE_ARROW", None) From c48fac2bbc0c8298b7284f2e1a0905bf9e611299 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 25 Jan 2024 15:42:19 +0100 Subject: [PATCH 02/14] don't use capsule in default case as that frees the stream before consuming --- pyogrio/_io.pyx | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index dbab9894..8754d418 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1408,9 +1408,13 @@ def ogr_open_arrow( OGR_L_ResetReading(ogr_layer) # allocate the stream struct and wrap in capsule to ensure clean-up on error - capsule = alloc_c_stream(&stream) + if return_capsule: + capsule = alloc_c_stream(&stream) + else: + stream = malloc(sizeof(ArrowArrayStream)) if not OGR_L_GetArrowStream(ogr_layer, stream, options): + free(stream) raise RuntimeError("Failed to open ArrowArrayStream from Layer") if skip_features: @@ -1422,7 +1426,7 @@ def ogr_open_arrow( reader = capsule else: import pyarrow as pa - stream_ptr = &stream + stream_ptr = stream reader = pa.RecordBatchStreamReader._import_from_c(stream_ptr) meta = { @@ -1458,6 +1462,9 @@ def ogr_open_arrow( GDALClose(ogr_dataset) ogr_dataset = NULL + free(stream) + + def ogr_read_bounds( str path, object layer=None, From 102c11705df6d940bcf168491e015befd6c74be8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 12 Mar 2024 18:21:00 +0100 Subject: [PATCH 03/14] remove noexcept for cython compat --- pyogrio/_io.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index f255f0ee..fb6efefc 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1252,7 +1252,7 @@ cdef void pycapsule_array_stream_deleter(object stream_capsule) noexcept: free(stream) -cdef object alloc_c_stream(ArrowArrayStream** c_stream) noexcept: +cdef object alloc_c_stream(ArrowArrayStream** c_stream): c_stream[0] = malloc(sizeof(ArrowArrayStream)) # Ensure the capsule destructor doesn't call a random release pointer c_stream[0].release = NULL From 947b5d1e7d910b85d7db813a6bc669eee24a66eb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Mar 2024 13:43:36 +0100 Subject: [PATCH 04/14] fix context manager and memory issues with non-capsule code path --- pyogrio/_io.pyx | 28 ++++++++++++++++++++-------- pyogrio/raw.py | 17 ++--------------- pyogrio/tests/test_arrow.py | 4 ++-- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index fb6efefc..743ab150 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1259,6 +1259,16 @@ cdef object alloc_c_stream(ArrowArrayStream** c_stream): return PyCapsule_New(c_stream[0], 'arrow_array_stream', &pycapsule_array_stream_deleter) +class _ArrowStream: + def __init__(self, capsule): + self._capsule = capsule + + def __arrow_c_stream__(self, requested_schema=None): + if requested_schema is not None: + raise NotImplementedError("requested_schema is not supported") + return self._capsule + + @contextlib.contextmanager def ogr_open_arrow( str path, @@ -1278,7 +1288,7 @@ def ogr_open_arrow( str sql_dialect=None, int return_fids=False, int batch_size=0, - return_capsule=False, + return_pyarrow=True, ): cdef int err = 0 @@ -1414,13 +1424,14 @@ def ogr_open_arrow( OGR_L_ResetReading(ogr_layer) # allocate the stream struct and wrap in capsule to ensure clean-up on error - if return_capsule: + if not return_pyarrow: capsule = alloc_c_stream(&stream) else: stream = malloc(sizeof(ArrowArrayStream)) if not OGR_L_GetArrowStream(ogr_layer, stream, options): - free(stream) + if return_pyarrow: + free(stream) raise RuntimeError("Failed to open ArrowArrayStream from Layer") if skip_features: @@ -1428,12 +1439,12 @@ def ogr_open_arrow( # the Arrow stream OGR_L_SetNextByIndex(ogr_layer, skip_features) - if return_capsule: - reader = capsule - else: + if return_pyarrow: import pyarrow as pa stream_ptr = stream reader = pa.RecordBatchStreamReader._import_from_c(stream_ptr) + else: + reader = _ArrowStream(capsule) meta = { 'crs': crs, @@ -1448,7 +1459,7 @@ def ogr_open_arrow( yield meta, reader finally: - if reader is not None: + if return_pyarrow and reader is not None: # Mark reader as closed to prevent reading batches reader.close() @@ -1468,7 +1479,8 @@ def ogr_open_arrow( GDALClose(ogr_dataset) ogr_dataset = NULL - free(stream) + if return_pyarrow: + free(stream) def ogr_read_bounds( diff --git a/pyogrio/raw.py b/pyogrio/raw.py index e160ab82..4dbe10b3 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -343,16 +343,6 @@ def read_arrow( return meta, table -class _ArrowStream: - def __init__(self, capsule): - self._capsule = capsule - - def __arrow_c_stream__(self, requested_schema=None): - if requested_schema is not None: - raise NotImplementedError("requested_schema is not supported") - return self._capsule - - def open_arrow( path_or_buffer, /, @@ -418,7 +408,7 @@ def open_arrow( dataset_kwargs = _preprocess_options_key_value(kwargs) if kwargs else {} try: - reader = ogr_open_arrow( + return ogr_open_arrow( path, layer=layer, encoding=encoding, @@ -436,11 +426,8 @@ def open_arrow( return_fids=return_fids, dataset_kwargs=dataset_kwargs, batch_size=batch_size, - return_capsule=not return_pyarrow, + return_pyarrow=return_pyarrow, ) - if not return_pyarrow: - reader = _ArrowStream(reader) - return reader finally: if buffer is not None: remove_virtual_file(path) diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index d66f4b65..4666b31e 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -213,11 +213,11 @@ def test_open_arrow_capsule_protocol(naturalearth_lowres): with open_arrow(naturalearth_lowres, return_pyarrow=False) as (meta, reader): assert isinstance(meta, dict) - assert isinstance(reader, pyogrio.raw._ArrowStream) + assert isinstance(reader, pyogrio._io._ArrowStream) result = pyarrow.table(reader) - expected = read_arrow(naturalearth_lowres) + _, expected = read_arrow(naturalearth_lowres) assert result.equals(expected) From 83f5c40101e908844d82c4443bf2063f65e0b966 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Mar 2024 14:07:35 +0100 Subject: [PATCH 05/14] add docstring and changelog note --- CHANGES.md | 5 +++++ pyogrio/raw.py | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 57f10678..72f2cb1c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,11 @@ - `read_arrow` and `open_arrow` now provide [GeoArrow-compliant extension metadata](https://geoarrow.org/extension-types.html), including the CRS, when using GDAL 3.8 or higher (#366). +- The `open_arrow` function can now be used without a `pyarrow` dependency. In + that case, specify `use_pyarrow=False` and the returned reader will be a + generic object implementing the [Arrow PyCapsule Protocol](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) (i.e. having an `__arrow_c_stream__` + method). This object can then be consumed by your Arrow implementation of choice + that supports this protocol (#349). - Warn when reading from a multilayer file without specifying a layer (#362). diff --git a/pyogrio/raw.py b/pyogrio/raw.py index 4dbe10b3..ef1c7b15 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -369,10 +369,28 @@ def open_arrow( See docstring of `read` for parameters. - The RecordBatchStreamReader is reading from a stream provided by OGR and must not be + The RecordBatchReader is reading from a stream provided by OGR and must not be accessed after the OGR dataset has been closed, i.e. after the context manager has been closed. + By default this function returns a `pyarrow.RecordBatchReader`. Optionally, + you can use this function without a `pyarrow` dependency by specifying + ``return_pyarrow=False``. In that case, the returned reader will be a + generic object implementing the `Arrow PyCapsule Protocol`_ (i.e. having + an `__arrow_c_stream__` method). This object can then be consumed by + your Arrow implementation of choice that supports this protocol. + + .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html + + Other Parameters + ---------------- + batch_size : int (default: 65_536) + Maximum number of features to retrieve in a batch. + return_pyarrow : bool (default: True) + If False, return a generic ArrowStream object instead of a pyarrow + RecordBatchReader. This object needs to be passed to another library + supporting the Arrow PyCapsule Protocol to consume the stream of data. + Examples -------- @@ -385,12 +403,22 @@ def open_arrow( >>> for table in reader: >>> geometries = shapely.from_wkb(table[meta["geometry_name"]]) + Or without directly returning a pyarrow object: + + >>> with open_arrow(path) as source: + >>> meta, stream = source + >>> reader = pa.RecordBatchReader.from_stream(stream) + >>> for table in reader: + >>> geometries = shapely.from_wkb(table[meta["geometry_name"]]) + Returns ------- - (dict, pyarrow.RecordBatchStreamReader) + (dict, pyarrow.RecordBatchReader or ArrowStream) Returns a tuple of meta information about the data source in a dict, - and a pyarrow RecordBatchStreamReader with data. + and a data stream object (a pyarrow RecordBatchReader if + `return_pyarrow` is set to True, otherwise a generic ArrowStrem + object). Meta is: { "crs": "", From 7c33fa04b54dcdf4adfbf74136210431a8fcbd50 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Mar 2024 15:22:20 +0100 Subject: [PATCH 06/14] always allocate stream through capsule --- pyogrio/_io.pyx | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 743ab150..b588b150 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1424,14 +1424,9 @@ def ogr_open_arrow( OGR_L_ResetReading(ogr_layer) # allocate the stream struct and wrap in capsule to ensure clean-up on error - if not return_pyarrow: - capsule = alloc_c_stream(&stream) - else: - stream = malloc(sizeof(ArrowArrayStream)) + capsule = alloc_c_stream(&stream) if not OGR_L_GetArrowStream(ogr_layer, stream, options): - if return_pyarrow: - free(stream) raise RuntimeError("Failed to open ArrowArrayStream from Layer") if skip_features: @@ -1463,6 +1458,8 @@ def ogr_open_arrow( # Mark reader as closed to prevent reading batches reader.close() + # `stream` will be freed through `capsule` destructor + CSLDestroy(options) if fields_c != NULL: CSLDestroy(fields_c) @@ -1479,9 +1476,6 @@ def ogr_open_arrow( GDALClose(ogr_dataset) ogr_dataset = NULL - if return_pyarrow: - free(stream) - def ogr_read_bounds( str path, From 51787e7dea29f3b610b8e5c51bef4feea3f417ba Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Mar 2024 18:50:55 +0100 Subject: [PATCH 07/14] ensure it actually runs without having pyarrow installed + test this --- pyogrio/_compat.py | 3 ++- pyogrio/raw.py | 10 +++++++-- pyogrio/tests/conftest.py | 7 +++--- pyogrio/tests/test_arrow.py | 22 ++++++++++++++++-- pyogrio/tests/test_geopandas_io.py | 8 +++---- pyogrio/tests/test_raw_io.py | 36 +++++++++++++++++++++++++++--- 6 files changed, 71 insertions(+), 15 deletions(-) diff --git a/pyogrio/_compat.py b/pyogrio/_compat.py index b5b724b5..afc586e8 100644 --- a/pyogrio/_compat.py +++ b/pyogrio/_compat.py @@ -24,7 +24,8 @@ pandas = None -HAS_ARROW_API = __gdal_version__ >= (3, 6, 0) and pyarrow is not None +HAS_ARROW_API = __gdal_version__ >= (3, 6, 0) +HAS_PYARROW = pyarrow is not None HAS_GEOPANDAS = geopandas is not None diff --git a/pyogrio/raw.py b/pyogrio/raw.py index ef1c7b15..c0ff787a 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -1,7 +1,7 @@ import warnings from pyogrio._env import GDALEnv -from pyogrio._compat import HAS_ARROW_API +from pyogrio._compat import HAS_ARROW_API, HAS_PYARROW from pyogrio.core import detect_write_driver from pyogrio.errors import DataSourceError from pyogrio.util import ( @@ -256,6 +256,12 @@ def read_arrow( "geometry_name": "", } """ + if not HAS_PYARROW: + raise RuntimeError( + "pyarrow required to read using 'read_arrow'. You can use 'open_arrow' " + "to read data with an alternative Arrow implementation" + ) + from pyarrow import Table gdal_version = get_gdal_version() @@ -429,7 +435,7 @@ def open_arrow( } """ if not HAS_ARROW_API: - raise RuntimeError("pyarrow and GDAL>= 3.6 required to read using arrow") + raise RuntimeError("GDAL>= 3.6 required to read using arrow") path, buffer = get_vsi_path(path_or_buffer) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index 76327b4f..20b84415 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -8,7 +8,7 @@ __version__, list_drivers, ) -from pyogrio._compat import HAS_ARROW_API, HAS_GDAL_GEOS, HAS_SHAPELY +from pyogrio._compat import HAS_ARROW_API, HAS_GDAL_GEOS, HAS_PYARROW, HAS_SHAPELY from pyogrio.raw import read, write @@ -43,8 +43,9 @@ def pytest_report_header(config): # marks to skip tests if optional dependecies are not present -requires_arrow_api = pytest.mark.skipif( - not HAS_ARROW_API, reason="GDAL>=3.6 and pyarrow required" +requires_arrow_api = pytest.mark.skipif(not HAS_ARROW_API, reason="GDAL>=3.6 required") +requires_pyarrow_api = pytest.mark.skipif( + not HAS_ARROW_API or not HAS_PYARROW, reason="GDAL>=3.6 and pyarrow required" ) requires_gdal_geos = pytest.mark.skipif( diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index 4666b31e..6e454e6a 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -2,6 +2,7 @@ import json import math import os +import sys import pytest import numpy as np @@ -9,7 +10,7 @@ import pyogrio from pyogrio import __gdal_version__, read_dataframe from pyogrio.raw import open_arrow, read_arrow, write -from pyogrio.tests.conftest import requires_arrow_api +from pyogrio.tests.conftest import requires_pyarrow_api try: import pandas as pd @@ -21,7 +22,7 @@ pass # skip all tests in this file if Arrow API or GeoPandas are unavailable -pytestmark = requires_arrow_api +pytestmark = requires_pyarrow_api pytest.importorskip("geopandas") @@ -221,6 +222,23 @@ def test_open_arrow_capsule_protocol(naturalearth_lowres): assert result.equals(expected) +def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres): + pyarrow = pytest.importorskip("pyarrow", minversion="14") + + # Make PyArrow temporarily unavailable (importing will fail) + sys.modules["pyarrow"] = None + try: + with open_arrow(naturalearth_lowres, return_pyarrow=False) as (meta, reader): + assert isinstance(meta, dict) + assert isinstance(reader, pyogrio._io._ArrowStream) + result = pyarrow.table(reader) + finally: + sys.modules["pyarrow"] = pyarrow + + _, expected = read_arrow(naturalearth_lowres) + assert result.equals(expected) + + @contextlib.contextmanager def use_arrow_context(): original = os.environ.get("PYOGRIO_USE_ARROW", None) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index f12dbbdd..893d506d 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -14,7 +14,7 @@ from pyogrio.tests.conftest import ( ALL_EXTS, DRIVERS, - requires_arrow_api, + requires_pyarrow_api, requires_gdal_geos, ) from pyogrio._compat import PANDAS_GE_15 @@ -45,7 +45,7 @@ scope="session", params=[ False, - pytest.param(True, marks=requires_arrow_api), + pytest.param(True, marks=requires_pyarrow_api), ], ) def use_arrow(request): @@ -1521,7 +1521,7 @@ def test_read_dataframe_arrow_dtypes(tmp_path): assert_geodataframe_equal(result, df) -@requires_arrow_api +@requires_pyarrow_api @pytest.mark.skipif( __gdal_version__ < (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3" ) @@ -1538,7 +1538,7 @@ def test_arrow_bool_roundtrip(tmpdir): assert_geodataframe_equal(result, df) -@requires_arrow_api +@requires_pyarrow_api @pytest.mark.skipif( __gdal_version__ >= (3, 8, 3), reason="Arrow bool value bug fixed in GDAL >= 3.8.3" ) diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py index 5d0bf947..21cd600e 100644 --- a/pyogrio/tests/test_raw_io.py +++ b/pyogrio/tests/test_raw_io.py @@ -1,4 +1,5 @@ import contextlib +import ctypes import json import os import sys @@ -7,6 +8,7 @@ from numpy import array_equal import pytest +import pyogrio from pyogrio import ( list_layers, list_drivers, @@ -14,13 +16,14 @@ set_gdal_config_options, __gdal_version__, ) -from pyogrio._compat import HAS_SHAPELY -from pyogrio.raw import read, write +from pyogrio._compat import HAS_SHAPELY, HAS_PYARROW +from pyogrio.raw import read, write, open_arrow from pyogrio.errors import DataSourceError, DataLayerError, FeatureError from pyogrio.tests.conftest import ( DRIVERS, DRIVER_EXT, prepare_testfile, + requires_pyarrow_api, requires_arrow_api, ) @@ -1017,7 +1020,7 @@ def test_write_float_nan_null(tmp_path, dtype): assert '{ "col": NaN }' in content -@requires_arrow_api +@requires_pyarrow_api @pytest.mark.skipif( "Arrow" not in list_drivers(), reason="Arrow driver is not available" ) @@ -1186,3 +1189,30 @@ def test_write_with_mask(tmp_path): field_mask = [np.array([False, True, False])] * 2 with pytest.raises(ValueError): write(filename, geometry, field_data, fields, field_mask, **meta) + + +@requires_arrow_api +def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres): + # this test is included here instead of test_arrow.py to ensure we also run + # it when pyarrow is not installed + + with open_arrow(naturalearth_lowres, return_pyarrow=False) as (meta, reader): + assert isinstance(meta, dict) + assert isinstance(reader, pyogrio._io._ArrowStream) + capsule = reader.__arrow_c_stream__() + assert ( + ctypes.pythonapi.PyCapsule_IsValid( + ctypes.py_object(capsule), b"arrow_array_stream" + ) + == 1 + ) + + +@pytest.mark.skipif(HAS_PYARROW, reason="pyarrow is installed") +def test_open_arrow_error_no_pyarrow(naturalearth_lowres): + # this test is included here instead of test_arrow.py to ensure we run + # it when pyarrow is not installed + + with pytest.raises(ImportError): + with open_arrow(naturalearth_lowres, return_pyarrow=True) as _: + pass From 437485efb35562a17b73bd1a01f9d660f537cb9b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Mar 2024 18:58:24 +0100 Subject: [PATCH 08/14] clean-up pxd file --- pyogrio/_ogr.pxd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index a7d7aaa8..fa75dd89 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -190,13 +190,13 @@ cdef extern from "ogr_srs_api.h": void OSRRelease(OGRSpatialReferenceH srs) -cdef extern from "arrow_bridge.h": +cdef extern from "arrow_bridge.h" nogil: struct ArrowSchema: int64_t n_children struct ArrowArrayStream: - int (*get_schema)(ArrowArrayStream* stream, ArrowSchema* out) - void (*release)(ArrowArrayStream*) noexcept nogil + int (*get_schema)(ArrowArrayStream* stream, ArrowSchema* out) noexcept + void (*release)(ArrowArrayStream*) noexcept cdef extern from "ogr_api.h": From 73b9b2bb4112aa03897c062c92cceaf03a95d690 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Mar 2024 19:00:51 +0100 Subject: [PATCH 09/14] fix test in case of no pyarrow --- pyogrio/tests/test_raw_io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py index 21cd600e..81110fdc 100644 --- a/pyogrio/tests/test_raw_io.py +++ b/pyogrio/tests/test_raw_io.py @@ -1209,6 +1209,7 @@ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres): @pytest.mark.skipif(HAS_PYARROW, reason="pyarrow is installed") +@requires_arrow_api def test_open_arrow_error_no_pyarrow(naturalearth_lowres): # this test is included here instead of test_arrow.py to ensure we run # it when pyarrow is not installed From d1282ca5f6d55613dcdf2a02bc0d56bff94e47ec Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Apr 2024 09:59:50 +0200 Subject: [PATCH 10/14] small cleanup --- pyogrio/_io.pyx | 4 ++-- pyogrio/raw.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index ea74739c..43520f2a 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1465,8 +1465,8 @@ def ogr_open_arrow( if return_pyarrow: import pyarrow as pa - stream_ptr = stream - reader = pa.RecordBatchStreamReader._import_from_c(stream_ptr) + + reader = pa.RecordBatchStreamReader._import_from_c( stream) else: reader = _ArrowStream(capsule) diff --git a/pyogrio/raw.py b/pyogrio/raw.py index 8e63a92e..2b5e0bbd 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -373,7 +373,7 @@ def open_arrow( you can use this function without a `pyarrow` dependency by specifying ``return_pyarrow=False``. In that case, the returned reader will be a generic object implementing the `Arrow PyCapsule Protocol`_ (i.e. having - an `__arrow_c_stream__` method). This object can then be consumed by + an ``__arrow_c_stream__`` method). This object can then be consumed by your Arrow implementation of choice that supports this protocol. .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html From a554294e17dd111da36476258181f59034666ae1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Apr 2024 10:01:04 +0200 Subject: [PATCH 11/14] Apply suggestions from code review Co-authored-by: Brendan Ward --- pyogrio/raw.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyogrio/raw.py b/pyogrio/raw.py index 2b5e0bbd..200e8233 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -401,11 +401,11 @@ def open_arrow( Or without directly returning a pyarrow object: - >>> with open_arrow(path) as source: + >>> with open_arrow(path, return_pyarrow=False) as source: >>> meta, stream = source >>> reader = pa.RecordBatchReader.from_stream(stream) >>> for table in reader: - >>> geometries = shapely.from_wkb(table[meta["geometry_name"]]) + >>> geometries = shapely.from_wkb(table[meta["geometry_name"] or "wkb_geometry"]) Returns ------- From 69a09221de1b4ea76e5b42364decf92a506c7996 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Apr 2024 10:05:57 +0200 Subject: [PATCH 12/14] return_pyarrow -> use_pyarrow --- pyogrio/_io.pyx | 6 +++--- pyogrio/raw.py | 12 ++++++------ pyogrio/tests/test_arrow.py | 4 ++-- pyogrio/tests/test_raw_io.py | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 43520f2a..2b57fbe9 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1306,7 +1306,7 @@ def ogr_open_arrow( str sql_dialect=None, int return_fids=False, int batch_size=0, - return_pyarrow=True, + use_pyarrow=True, ): cdef int err = 0 @@ -1463,7 +1463,7 @@ def ogr_open_arrow( # the Arrow stream OGR_L_SetNextByIndex(ogr_layer, skip_features) - if return_pyarrow: + if use_pyarrow: import pyarrow as pa reader = pa.RecordBatchStreamReader._import_from_c( stream) @@ -1483,7 +1483,7 @@ def ogr_open_arrow( yield meta, reader finally: - if return_pyarrow and reader is not None: + if use_pyarrow and reader is not None: # Mark reader as closed to prevent reading batches reader.close() diff --git a/pyogrio/raw.py b/pyogrio/raw.py index 200e8233..b1b98e29 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -357,7 +357,7 @@ def open_arrow( sql_dialect=None, return_fids=False, batch_size=65_536, - return_pyarrow=True, + use_pyarrow=True, **kwargs, ): """ @@ -371,7 +371,7 @@ def open_arrow( By default this function returns a `pyarrow.RecordBatchReader`. Optionally, you can use this function without a `pyarrow` dependency by specifying - ``return_pyarrow=False``. In that case, the returned reader will be a + ``use_pyarrow=False``. In that case, the returned reader will be a generic object implementing the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_stream__`` method). This object can then be consumed by your Arrow implementation of choice that supports this protocol. @@ -382,7 +382,7 @@ def open_arrow( ---------------- batch_size : int (default: 65_536) Maximum number of features to retrieve in a batch. - return_pyarrow : bool (default: True) + use_pyarrow : bool (default: True) If False, return a generic ArrowStream object instead of a pyarrow RecordBatchReader. This object needs to be passed to another library supporting the Arrow PyCapsule Protocol to consume the stream of data. @@ -401,7 +401,7 @@ def open_arrow( Or without directly returning a pyarrow object: - >>> with open_arrow(path, return_pyarrow=False) as source: + >>> with open_arrow(path, use_pyarrow=False) as source: >>> meta, stream = source >>> reader = pa.RecordBatchReader.from_stream(stream) >>> for table in reader: @@ -413,7 +413,7 @@ def open_arrow( Returns a tuple of meta information about the data source in a dict, and a data stream object (a pyarrow RecordBatchReader if - `return_pyarrow` is set to True, otherwise a generic ArrowStrem + `use_pyarrow` is set to True, otherwise a generic ArrowStrem object). Meta is: { @@ -450,7 +450,7 @@ def open_arrow( return_fids=return_fids, dataset_kwargs=dataset_kwargs, batch_size=batch_size, - return_pyarrow=return_pyarrow, + use_pyarrow=use_pyarrow, ) finally: if buffer is not None: diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index 96565c7f..2ff50cf0 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -212,7 +212,7 @@ def test_read_arrow_geoarrow_metadata(naturalearth_lowres): def test_open_arrow_capsule_protocol(naturalearth_lowres): pytest.importorskip("pyarrow", minversion="14") - with open_arrow(naturalearth_lowres, return_pyarrow=False) as (meta, reader): + with open_arrow(naturalearth_lowres, use_pyarrow=False) as (meta, reader): assert isinstance(meta, dict) assert isinstance(reader, pyogrio._io._ArrowStream) @@ -228,7 +228,7 @@ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres): # Make PyArrow temporarily unavailable (importing will fail) sys.modules["pyarrow"] = None try: - with open_arrow(naturalearth_lowres, return_pyarrow=False) as (meta, reader): + with open_arrow(naturalearth_lowres, use_pyarrow=False) as (meta, reader): assert isinstance(meta, dict) assert isinstance(reader, pyogrio._io._ArrowStream) result = pyarrow.table(reader) diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py index 15ae864f..7e73b25d 100644 --- a/pyogrio/tests/test_raw_io.py +++ b/pyogrio/tests/test_raw_io.py @@ -1204,7 +1204,7 @@ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres): # this test is included here instead of test_arrow.py to ensure we also run # it when pyarrow is not installed - with open_arrow(naturalearth_lowres, return_pyarrow=False) as (meta, reader): + with open_arrow(naturalearth_lowres, use_pyarrow=False) as (meta, reader): assert isinstance(meta, dict) assert isinstance(reader, pyogrio._io._ArrowStream) capsule = reader.__arrow_c_stream__() @@ -1223,5 +1223,5 @@ def test_open_arrow_error_no_pyarrow(naturalearth_lowres): # it when pyarrow is not installed with pytest.raises(ImportError): - with open_arrow(naturalearth_lowres, return_pyarrow=True) as _: + with open_arrow(naturalearth_lowres, use_pyarrow=True) as _: pass From 307027440cd140e26fc777d0fc9eec51db5514ac Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Apr 2024 10:10:09 +0200 Subject: [PATCH 13/14] reflow text, expand example explanation --- CHANGES.md | 6 +++--- pyogrio/raw.py | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2d59fd58..4abfe242 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,9 +11,9 @@ including the CRS, when using GDAL 3.8 or higher (#366). - The `open_arrow` function can now be used without a `pyarrow` dependency. In that case, specify `use_pyarrow=False` and the returned reader will be a - generic object implementing the [Arrow PyCapsule Protocol](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) (i.e. having an `__arrow_c_stream__` - method). This object can then be consumed by your Arrow implementation of choice - that supports this protocol (#349). + generic object implementing the [Arrow PyCapsule Protocol](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) + (i.e. having an `__arrow_c_stream__`method). This object can then be consumed + by your Arrow implementation of choice that supports this protocol (#349). - Warn when reading from a multilayer file without specifying a layer (#362). ### Bug fixes diff --git a/pyogrio/raw.py b/pyogrio/raw.py index b1b98e29..8abf8232 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -399,7 +399,9 @@ def open_arrow( >>> for table in reader: >>> geometries = shapely.from_wkb(table[meta["geometry_name"]]) - Or without directly returning a pyarrow object: + Or without directly returning a pyarrow object, allowing you to consume + the `stream` with any library that supports the Arrow PyCapsule protocol + (in this example still using pyarrow for that): >>> with open_arrow(path, use_pyarrow=False) as source: >>> meta, stream = source From f648447ff7cf6942fca0e1b0b05ea50a1f94ac74 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Apr 2024 20:53:54 +0200 Subject: [PATCH 14/14] switch default to use_pyarrow=False --- CHANGES.md | 10 ++++--- pyogrio/_io.pyx | 2 +- pyogrio/raw.py | 55 +++++++++++++++++++----------------- pyogrio/tests/test_arrow.py | 13 +++++---- pyogrio/tests/test_raw_io.py | 2 +- 5 files changed, 45 insertions(+), 37 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 4abfe242..992ab023 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,11 +9,13 @@ - `read_arrow` and `open_arrow` now provide [GeoArrow-compliant extension metadata](https://geoarrow.org/extension-types.html), including the CRS, when using GDAL 3.8 or higher (#366). -- The `open_arrow` function can now be used without a `pyarrow` dependency. In - that case, specify `use_pyarrow=False` and the returned reader will be a - generic object implementing the [Arrow PyCapsule Protocol](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) +- The `open_arrow` function can now be used without a `pyarrow` dependency. By + default, it will now return a stream object implementing the + [Arrow PyCapsule Protocol](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) (i.e. having an `__arrow_c_stream__`method). This object can then be consumed - by your Arrow implementation of choice that supports this protocol (#349). + by your Arrow implementation of choice that supports this protocol. To keep + the previous behaviour of returning a `pyarrow.RecordBatchReader`, specify + `use_pyarrow=True` (#349). - Warn when reading from a multilayer file without specifying a layer (#362). ### Bug fixes diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 2b57fbe9..004abc84 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -1306,7 +1306,7 @@ def ogr_open_arrow( str sql_dialect=None, int return_fids=False, int batch_size=0, - use_pyarrow=True, + use_pyarrow=False, ): cdef int err = 0 diff --git a/pyogrio/raw.py b/pyogrio/raw.py index 8abf8232..6466f4d7 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -303,6 +303,7 @@ def read_arrow( return_fids=return_fids, skip_features=gdal_skip_features, batch_size=batch_size, + use_pyarrow=True, **kwargs, ) as source: meta, reader = source @@ -357,7 +358,7 @@ def open_arrow( sql_dialect=None, return_fids=False, batch_size=65_536, - use_pyarrow=True, + use_pyarrow=False, **kwargs, ): """ @@ -365,16 +366,16 @@ def open_arrow( See docstring of `read` for parameters. - The RecordBatchReader is reading from a stream provided by OGR and must not be + The returned object is reading from a stream provided by OGR and must not be accessed after the OGR dataset has been closed, i.e. after the context manager has been closed. - By default this function returns a `pyarrow.RecordBatchReader`. Optionally, - you can use this function without a `pyarrow` dependency by specifying - ``use_pyarrow=False``. In that case, the returned reader will be a - generic object implementing the `Arrow PyCapsule Protocol`_ (i.e. having - an ``__arrow_c_stream__`` method). This object can then be consumed by - your Arrow implementation of choice that supports this protocol. + By default this functions returns a generic stream object implementing + the `Arrow PyCapsule Protocol`_ (i.e. having an ``__arrow_c_stream__`` + method). This object can then be consumed by your Arrow implementation + of choice that supports this protocol. + Optionally, you can specify ``use_pyarrow=True`` to directly get the + stream as a `pyarrow.RecordBatchReader`. .. _Arrow PyCapsule Protocol: https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html @@ -382,10 +383,11 @@ def open_arrow( ---------------- batch_size : int (default: 65_536) Maximum number of features to retrieve in a batch. - use_pyarrow : bool (default: True) - If False, return a generic ArrowStream object instead of a pyarrow - RecordBatchReader. This object needs to be passed to another library - supporting the Arrow PyCapsule Protocol to consume the stream of data. + use_pyarrow : bool (default: False) + If True, return a pyarrow RecordBatchReader instead of a generic + ArrowStream object. In the default case, this stream object needs + to be passed to another library supporting the Arrow PyCapsule + Protocol to consume the stream of data. Examples -------- @@ -395,28 +397,29 @@ def open_arrow( >>> import shapely >>> >>> with open_arrow(path) as source: - >>> meta, reader = source - >>> for table in reader: - >>> geometries = shapely.from_wkb(table[meta["geometry_name"]]) - - Or without directly returning a pyarrow object, allowing you to consume - the `stream` with any library that supports the Arrow PyCapsule protocol - (in this example still using pyarrow for that): - - >>> with open_arrow(path, use_pyarrow=False) as source: >>> meta, stream = source + >>> # wrap the arrow stream object in a pyarrow RecordBatchReader >>> reader = pa.RecordBatchReader.from_stream(stream) - >>> for table in reader: - >>> geometries = shapely.from_wkb(table[meta["geometry_name"] or "wkb_geometry"]) + >>> for batch in reader: + >>> geometries = shapely.from_wkb(batch[meta["geometry_name"] or "wkb_geometry"]) + + The returned `stream` object needs to be consumed by a library implementing + the Arrow PyCapsule Protocol. In the above example, pyarrow is used through + its RecordBatchReader. For this case, you can also specify ``use_pyarrow=True`` + to directly get this result as a short-cut: + + >>> with open_arrow(path, use_pyarrow=True) as source: + >>> meta, reader = source + >>> for batch in reader: + >>> geometries = shapely.from_wkb(batch[meta["geometry_name"] or "wkb_geometry"]) Returns ------- (dict, pyarrow.RecordBatchReader or ArrowStream) Returns a tuple of meta information about the data source in a dict, - and a data stream object (a pyarrow RecordBatchReader if - `use_pyarrow` is set to True, otherwise a generic ArrowStrem - object). + and a data stream object (a generic ArrowStream object, or a pyarrow + RecordBatchReader if `use_pyarrow` is set to True). Meta is: { "crs": "", diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index 2ff50cf0..481e6b8c 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -139,8 +139,8 @@ def test_read_arrow_raw(naturalearth_lowres): assert isinstance(table, pyarrow.Table) -def test_open_arrow(naturalearth_lowres): - with open_arrow(naturalearth_lowres) as (meta, reader): +def test_open_arrow_pyarrow(naturalearth_lowres): + with open_arrow(naturalearth_lowres, use_pyarrow=True) as (meta, reader): assert isinstance(meta, dict) assert isinstance(reader, pyarrow.RecordBatchReader) assert isinstance(reader.read_all(), pyarrow.Table) @@ -150,7 +150,10 @@ def test_open_arrow_batch_size(naturalearth_lowres): meta, table = read_arrow(naturalearth_lowres) batch_size = math.ceil(len(table) / 2) - with open_arrow(naturalearth_lowres, batch_size=batch_size) as (meta, reader): + with open_arrow(naturalearth_lowres, batch_size=batch_size, use_pyarrow=True) as ( + meta, + reader, + ): assert isinstance(meta, dict) assert isinstance(reader, pyarrow.RecordBatchReader) count = 0 @@ -212,7 +215,7 @@ def test_read_arrow_geoarrow_metadata(naturalearth_lowres): def test_open_arrow_capsule_protocol(naturalearth_lowres): pytest.importorskip("pyarrow", minversion="14") - with open_arrow(naturalearth_lowres, use_pyarrow=False) as (meta, reader): + with open_arrow(naturalearth_lowres) as (meta, reader): assert isinstance(meta, dict) assert isinstance(reader, pyogrio._io._ArrowStream) @@ -228,7 +231,7 @@ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres): # Make PyArrow temporarily unavailable (importing will fail) sys.modules["pyarrow"] = None try: - with open_arrow(naturalearth_lowres, use_pyarrow=False) as (meta, reader): + with open_arrow(naturalearth_lowres) as (meta, reader): assert isinstance(meta, dict) assert isinstance(reader, pyogrio._io._ArrowStream) result = pyarrow.table(reader) diff --git a/pyogrio/tests/test_raw_io.py b/pyogrio/tests/test_raw_io.py index 7e73b25d..8330bbfe 100644 --- a/pyogrio/tests/test_raw_io.py +++ b/pyogrio/tests/test_raw_io.py @@ -1204,7 +1204,7 @@ def test_open_arrow_capsule_protocol_without_pyarrow(naturalearth_lowres): # this test is included here instead of test_arrow.py to ensure we also run # it when pyarrow is not installed - with open_arrow(naturalearth_lowres, use_pyarrow=False) as (meta, reader): + with open_arrow(naturalearth_lowres) as (meta, reader): assert isinstance(meta, dict) assert isinstance(reader, pyogrio._io._ArrowStream) capsule = reader.__arrow_c_stream__()