Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Plumb pylibcudf datetime APIs through cudf python #17275

Merged
merged 3 commits into from
Nov 8, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 70 additions & 110 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,41 +9,29 @@ from libcpp.utility cimport move

cimport pylibcudf.libcudf.datetime as libcudf_datetime
from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.filling cimport calendrical_month_sequence
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.types cimport size_type
from pylibcudf.datetime import DatetimeComponent
from pylibcudf.datetime import DatetimeComponent, RoundingFrequency

from cudf._lib.column cimport Column
from cudf._lib.scalar cimport DeviceScalar
import pylibcudf as plc


@acquire_spill_lock()
def add_months(Column col, Column months):
# months must be int16 dtype
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
cdef column_view months_view = months.view()

with nogil:
c_result = move(
libcudf_datetime.add_calendrical_months(
col_view,
months_view
)
return Column.from_pylibcudf(
plc.datetime.add_calendrical_months(
col.to_pylibcudf(mode="read"),
months.to_pylibcudf(mode="read")
)

return Column.from_unique_ptr(move(c_result))
)


@acquire_spill_lock()
def extract_datetime_component(Column col, object field):

cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
cdef libcudf_datetime.datetime_component component

component_names = {
"year": DatetimeComponent.YEAR,
"month": DatetimeComponent.MONTH,
Expand All @@ -57,33 +45,29 @@ def extract_datetime_component(Column col, object field):
"nanosecond": DatetimeComponent.NANOSECOND,
}
if field == "day_of_year":
with nogil:
c_result = move(libcudf_datetime.day_of_year(col_view))
result = Column.from_pylibcudf(
plc.datetime.day_of_year(
col.to_pylibcudf(mode="read")
)
)
elif field in component_names:
component = component_names[field]
with nogil:
c_result = move(
libcudf_datetime.extract_datetime_component(
col_view,
component
)
result = Column.from_pylibcudf(
plc.datetime.extract_datetime_component(
col.to_pylibcudf(mode="read"),
component_names[field],
)
)
if field == "weekday":
# Pandas counts Monday-Sunday as 0-6
# while libcudf counts Monday-Sunday as 1-7
result = result - result.dtype.type(1)
else:
raise ValueError(f"Invalid field: '{field}'")

result = Column.from_unique_ptr(move(c_result))

if field == "weekday":
# Pandas counts Monday-Sunday as 0-6
# while libcudf counts Monday-Sunday as 1-7
result = result - result.dtype.type(1)

return result


cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq):
cdef libcudf_datetime.rounding_frequency freq_val

# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timedelta.resolution_string.html
old_to_new_freq_map = {
"H": "h",
Expand All @@ -101,78 +85,60 @@ cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq):
FutureWarning
)
freq = old_to_new_freq_map.get(freq)
if freq == "D":
freq_val = libcudf_datetime.rounding_frequency.DAY
elif freq == "h":
freq_val = libcudf_datetime.rounding_frequency.HOUR
elif freq == "min":
freq_val = libcudf_datetime.rounding_frequency.MINUTE
elif freq == "s":
freq_val = libcudf_datetime.rounding_frequency.SECOND
elif freq == "ms":
freq_val = libcudf_datetime.rounding_frequency.MILLISECOND
elif freq == "us":
freq_val = libcudf_datetime.rounding_frequency.MICROSECOND
elif freq == "ns":
freq_val = libcudf_datetime.rounding_frequency.NANOSECOND
rounding_fequency_map = {
"D": RoundingFrequency.DAY,
"h": RoundingFrequency.HOUR,
"min": RoundingFrequency.MINUTE,
"s": RoundingFrequency.SECOND,
"ms": RoundingFrequency.MILLISECOND,
"us": RoundingFrequency.MICROSECOND,
"ns": RoundingFrequency.NANOSECOND,
}
if freq in rounding_fequency_map:
return rounding_fequency_map[freq]
else:
raise ValueError(f"Invalid resolution: '{freq}'")
return freq_val


@acquire_spill_lock()
def ceil_datetime(Column col, object freq):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
cdef libcudf_datetime.rounding_frequency freq_val = \
_get_rounding_frequency(freq)

with nogil:
c_result = move(libcudf_datetime.ceil_datetimes(col_view, freq_val))

result = Column.from_unique_ptr(move(c_result))
return result
return Column.from_pylibcudf(
plc.datetime.ceil_datetimes(
col.to_pylibcudf(mode="read"),
_get_rounding_frequency(freq),
)
)


@acquire_spill_lock()
def floor_datetime(Column col, object freq):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
cdef libcudf_datetime.rounding_frequency freq_val = \
_get_rounding_frequency(freq)

with nogil:
c_result = move(libcudf_datetime.floor_datetimes(col_view, freq_val))

result = Column.from_unique_ptr(move(c_result))
return result
return Column.from_pylibcudf(
plc.datetime.floor_datetimes(
col.to_pylibcudf(mode="read"),
_get_rounding_frequency(freq),
)
)


@acquire_spill_lock()
def round_datetime(Column col, object freq):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()
cdef libcudf_datetime.rounding_frequency freq_val = \
_get_rounding_frequency(freq)

with nogil:
c_result = move(libcudf_datetime.round_datetimes(col_view, freq_val))

result = Column.from_unique_ptr(move(c_result))
return result
return Column.from_pylibcudf(
plc.datetime.round_datetimes(
col.to_pylibcudf(mode="read"),
_get_rounding_frequency(freq),
)
)


@acquire_spill_lock()
def is_leap_year(Column col):
"""Returns a boolean indicator whether the year of the date is a leap year
"""
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.is_leap_year(col_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.datetime.is_leap_year(
col.to_pylibcudf(mode="read")
)
)


@acquire_spill_lock()
Expand All @@ -199,34 +165,28 @@ def extract_quarter(Column col):
Returns a column which contains the corresponding quarter of the year
for every timestamp inside the input column.
"""
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.extract_quarter(col_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.datetime.extract_quarter(
col.to_pylibcudf(mode="read")
)
)


@acquire_spill_lock()
def days_in_month(Column col):
"""Extracts the number of days in the month of the date
"""
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.days_in_month(col_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.datetime.days_in_month(
col.to_pylibcudf(mode="read")
)
)


@acquire_spill_lock()
def last_day_of_month(Column col):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.last_day_of_month(col_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.datetime.last_day_of_month(
col.to_pylibcudf(mode="read")
)
)
Loading