Skip to content

Commit

Permalink
Add io/timezone APIs to pylibcudf (#16771)
Browse files Browse the repository at this point in the history
Contributes to #15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16771
  • Loading branch information
mroeschke authored Sep 26, 2024
1 parent 12ee360 commit 61af769
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ I/O Functions
csv
json
parquet
timezone
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
========
Timezone
========

.. automodule:: pylibcudf.io.timezone
:members:
27 changes: 4 additions & 23 deletions python/cudf/cudf/_lib/timezone.pyx
Original file line number Diff line number Diff line change
@@ -1,29 +1,10 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.optional cimport make_optional
from libcpp.string cimport string
from libcpp.utility cimport move
import pylibcudf as plc

from pylibcudf.libcudf.io.timezone cimport (
make_timezone_transition_table as cpp_make_timezone_transition_table,
)
from pylibcudf.libcudf.table.table cimport table

from cudf._lib.utils cimport columns_from_unique_ptr
from cudf._lib.column cimport Column


def make_timezone_transition_table(tzdir, tzname):
cdef unique_ptr[table] c_result
cdef string c_tzdir = tzdir.encode()
cdef string c_tzname = tzname.encode()

with nogil:
c_result = move(
cpp_make_timezone_transition_table(
make_optional[string](c_tzdir),
c_tzname
)
)

return columns_from_unique_ptr(move(c_result))
plc_table = plc.io.timezone.make_timezone_transition_table(tzdir, tzname)
return [Column.from_pylibcudf(col) for col in plc_table.columns()]
4 changes: 3 additions & 1 deletion python/pylibcudf/pylibcudf/io/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
# the License.
# =============================================================================

set(cython_sources avro.pyx csv.pyx datasource.pyx json.pyx orc.pyx parquet.pyx types.pyx)
set(cython_sources avro.pyx csv.pyx datasource.pyx json.pyx orc.pyx parquet.pyx timezone.pyx
types.pyx
)

set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
Expand Down
2 changes: 1 addition & 1 deletion python/pylibcudf/pylibcudf/io/__init__.pxd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

# CSV is removed since it is def not cpdef (to force kw-only arguments)
from . cimport avro, datasource, json, orc, parquet, types
from . cimport avro, datasource, json, orc, parquet, timezone, types
from .types cimport SourceInfo, TableWithMetadata
2 changes: 1 addition & 1 deletion python/pylibcudf/pylibcudf/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from . import avro, csv, datasource, json, orc, parquet, types
from . import avro, csv, datasource, json, orc, parquet, timezone, types
from .types import SinkInfo, SourceInfo, TableWithMetadata
6 changes: 6 additions & 0 deletions python/pylibcudf/pylibcudf/io/timezone.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from ..table cimport Table


cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name)
43 changes: 43 additions & 0 deletions python/pylibcudf/pylibcudf/io/timezone.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.optional cimport make_optional
from libcpp.string cimport string
from libcpp.utility cimport move
from pylibcudf.libcudf.io.timezone cimport (
make_timezone_transition_table as cpp_make_timezone_transition_table,
)
from pylibcudf.libcudf.table.table cimport table

from ..table cimport Table


cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name):
"""
Creates a transition table to convert ORC timestamps to UTC.
Parameters
----------
tzif_dir : str
The directory where the TZif files are located
timezone_name : str
standard timezone name
Returns
-------
Table
The transition table for the given timezone.
"""
cdef unique_ptr[table] c_result
cdef string c_tzdir = tzif_dir.encode()
cdef string c_tzname = timezone_name.encode()

with nogil:
c_result = move(
cpp_make_timezone_transition_table(
make_optional[string](c_tzdir),
c_tzname
)
)

return Table.from_libcudf(move(c_result))
16 changes: 16 additions & 0 deletions python/pylibcudf/pylibcudf/tests/io/test_timezone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
import zoneinfo

import pylibcudf as plc
import pytest


def test_make_timezone_transition_table():
if len(zoneinfo.TZPATH) == 0:
pytest.skip("No TZPATH available.")
tz_path = zoneinfo.TZPATH[0]
result = plc.io.timezone.make_timezone_transition_table(
tz_path, "America/Los_Angeles"
)
assert isinstance(result, plc.Table)
assert result.num_rows() > 0

0 comments on commit 61af769

Please sign in to comment.