Skip to content

Commit

Permalink
Add stream parameters in pylibcudf IO APIs
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Dec 18, 2024
1 parent d742599 commit e27cad2
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 43 deletions.
3 changes: 2 additions & 1 deletion python/pylibcudf/pylibcudf/io/csv.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ from pylibcudf.libcudf.io.types cimport (
table_with_metadata,
)
from pylibcudf.libcudf.types cimport size_type
from rmm._cuda.stream import Stream

cdef class CsvReaderOptions:
cdef csv_reader_options c_obj
Expand Down Expand Up @@ -61,7 +62,7 @@ cdef class CsvReaderOptionsBuilder:
cpdef CsvReaderOptionsBuilder dayfirst(self, bool dayfirst)
cpdef CsvReaderOptions build(self)

cpdef TableWithMetadata read_csv(CsvReaderOptions options)
cpdef TableWithMetadata read_csv(CsvReaderOptions options, Stream stream)

cdef class CsvWriterOptions:
cdef csv_writer_options c_obj
Expand Down
43 changes: 3 additions & 40 deletions python/pylibcudf/pylibcudf/io/csv.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from pylibcudf.io.types import (
)
from pylibcudf.table import Table
from pylibcudf.types import DataType
from rmm._cuda.stream import Stream

class CsvReaderOptions:
def __init__(self): ...
Expand Down Expand Up @@ -56,46 +57,8 @@ class CsvReaderOptionsBuilder:
def build(self) -> CsvReaderOptions: ...

def read_csv(
source_info: SourceInfo,
*,
compression: CompressionType = CompressionType.AUTO,
byte_range_offset: int = 0,
byte_range_size: int = 0,
col_names: list[str] | None = None,
prefix: str = "",
mangle_dupe_cols: bool = True,
usecols: list[int] | list[str] | None = None,
nrows: int = -1,
skiprows: int = 0,
skipfooter: int = 0,
header: int = 0,
lineterminator: str = "\n",
delimiter: str | None = None,
thousands: str | None = None,
decimal: str = ".",
comment: str | None = None,
delim_whitespace: bool = False,
skipinitialspace: bool = False,
skip_blank_lines: bool = True,
quoting: QuoteStyle = QuoteStyle.MINIMAL,
quotechar: str = '"',
doublequote: bool = True,
parse_dates: list[str] | list[int] | None = None,
parse_hex: list[str] | list[int] | None = None,
# Technically this should be dict/list
# but using a fused type prevents using None as default
dtypes: Mapping[str, DataType] | list[DataType] | None = None,
true_values: list[str] | None = None,
false_values: list[str] | None = None,
na_values: list[str] | None = None,
keep_default_na: bool = True,
na_filter: bool = True,
dayfirst: bool = False,
# Note: These options are supported by the libcudf reader
# but are not exposed here since there is no demand for them
# on the Python side yet.
# detect_whitespace_around_quotes: bool = False,
# timestamp_type: DataType = DataType(type_id.EMPTY),
options: CsvReaderOptions,
stream: Stream,
) -> TableWithMetadata: ...
def write_csv(options: CsvWriterOptionsBuilder): ...

Expand Down
6 changes: 4 additions & 2 deletions python/pylibcudf/pylibcudf/io/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ from pylibcudf.libcudf.io.types cimport (
from pylibcudf.libcudf.types cimport data_type, size_type
from pylibcudf.types cimport DataType
from pylibcudf.table cimport Table
from rmm._cuda.stream import Stream

__all__ = [
"read_csv",
Expand Down Expand Up @@ -629,7 +630,8 @@ cdef class CsvReaderOptionsBuilder:


cpdef TableWithMetadata read_csv(
CsvReaderOptions options
CsvReaderOptions options,
Stream stream,
):
"""
Read from CSV format.
Expand All @@ -646,7 +648,7 @@ cpdef TableWithMetadata read_csv(
"""
cdef table_with_metadata c_result
with nogil:
c_result = move(cpp_read_csv(options.c_obj))
c_result = move(cpp_read_csv(options.c_obj), stream.view())

cdef TableWithMetadata tbl_meta = TableWithMetadata.from_libcudf(c_result)
return tbl_meta
Expand Down

0 comments on commit e27cad2

Please sign in to comment.