Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add io/timezone APIs to pylibcudf #16771

Merged
merged 9 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ I/O Functions
csv
json
parquet
timezone
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
========
Timezone
========

.. automodule:: pylibcudf.io.timezone
:members:
27 changes: 4 additions & 23 deletions python/cudf/cudf/_lib/timezone.pyx
Original file line number Diff line number Diff line change
@@ -1,29 +1,10 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.optional cimport make_optional
from libcpp.string cimport string
from libcpp.utility cimport move
import pylibcudf as plc

from pylibcudf.libcudf.io.timezone cimport (
make_timezone_transition_table as cpp_make_timezone_transition_table,
)
from pylibcudf.libcudf.table.table cimport table

from cudf._lib.utils cimport columns_from_unique_ptr
from cudf._lib.column cimport Column


def make_timezone_transition_table(tzdir, tzname):
cdef unique_ptr[table] c_result
cdef string c_tzdir = tzdir.encode()
cdef string c_tzname = tzname.encode()

with nogil:
c_result = move(
cpp_make_timezone_transition_table(
make_optional[string](c_tzdir),
c_tzname
)
)

return columns_from_unique_ptr(move(c_result))
plc_table = plc.io.timezone.make_timezone_transition_table(tzdir, tzname)
return [Column.from_pylibcudf(col) for col in plc_table.columns()]
4 changes: 3 additions & 1 deletion python/pylibcudf/pylibcudf/io/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
# the License.
# =============================================================================

set(cython_sources avro.pyx csv.pyx datasource.pyx json.pyx orc.pyx parquet.pyx types.pyx)
set(cython_sources avro.pyx csv.pyx datasource.pyx json.pyx orc.pyx parquet.pyx timezone.pyx
types.pyx
)

set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
Expand Down
2 changes: 1 addition & 1 deletion python/pylibcudf/pylibcudf/io/__init__.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

# CSV is removed since it is def not cpdef (to force kw-only arguments)
from . cimport avro, datasource, json, orc, parquet, types
from . cimport avro, datasource, json, orc, parquet, timezone, types
from .types cimport SourceInfo, TableWithMetadata
2 changes: 1 addition & 1 deletion python/pylibcudf/pylibcudf/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from . import avro, csv, datasource, json, orc, parquet, types
from . import avro, csv, datasource, json, orc, parquet, timezone, types
from .types import SinkInfo, SourceInfo, TableWithMetadata
6 changes: 6 additions & 0 deletions python/pylibcudf/pylibcudf/io/timezone.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from ..table cimport Table


cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name)
43 changes: 43 additions & 0 deletions python/pylibcudf/pylibcudf/io/timezone.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.optional cimport make_optional
from libcpp.string cimport string
from libcpp.utility cimport move
from pylibcudf.libcudf.io.timezone cimport (
make_timezone_transition_table as cpp_make_timezone_transition_table,
)
from pylibcudf.libcudf.table.table cimport table

from ..table cimport Table


cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name):
"""
Creates a transition table to convert ORC timestamps to UTC.

Parameters
----------
tzif_dir : str
The directory where the TZif files are located
timezone_name : str
standard timezone name

Returns
-------
Table
The transition table for the given timezone.
"""
cdef unique_ptr[table] c_result
cdef string c_tzdir = tzif_dir.encode()
cdef string c_tzname = timezone_name.encode()

with nogil:
c_result = move(
cpp_make_timezone_transition_table(
make_optional[string](c_tzdir),
c_tzname
)
)

return Table.from_libcudf(move(c_result))
16 changes: 16 additions & 0 deletions python/pylibcudf/pylibcudf/tests/io/test_timezone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
import zoneinfo

import pylibcudf as plc
import pytest


def test_make_timezone_transition_table():
if len(zoneinfo.TZPATH) == 0:
pytest.skip("No TZPATH available.")
tz_path = zoneinfo.TZPATH[0]
result = plc.io.timezone.make_timezone_transition_table(
tz_path, "America/Los_Angeles"
)
assert isinstance(result, plc.Table)
assert result.num_rows() > 0
Loading