From a95fbc88f94df24c3418766fbbea5b6633ff2328 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 18 Dec 2024 16:30:46 -0500
Subject: [PATCH] Add JSON reader options structs to pylibcudf (#17614)

Apart of #17565

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/17614
---
 python/cudf/cudf/io/json.py                   |  46 +-
 python/cudf_polars/cudf_polars/dsl/ir.py      |  10 +-
 python/pylibcudf/pylibcudf/io/json.pxd        |  60 ++-
 python/pylibcudf/pylibcudf/io/json.pyi        |  54 +-
 python/pylibcudf/pylibcudf/io/json.pyx        | 495 +++++++++++++-----
 .../pylibcudf/pylibcudf/tests/io/test_json.py |  57 +-
 6 files changed, 498 insertions(+), 224 deletions(-)

diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index e0c9e535e6f..39a85465deb 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -161,13 +161,15 @@ def read_json(
         if cudf.get_option("io.json.low_memory") and lines:
             res_cols, res_col_names, res_child_names = (
                 plc.io.json.chunked_read_json(
-                    plc.io.SourceInfo(filepaths_or_buffers),
-                    processed_dtypes,
-                    c_compression,
-                    keep_quotes=keep_quotes,
-                    mixed_types_as_string=mixed_types_as_string,
-                    prune_columns=prune_columns,
-                    recovery_mode=c_on_bad_lines,
+                    plc.io.json._setup_json_reader_options(
+                        plc.io.SourceInfo(filepaths_or_buffers),
+                        processed_dtypes,
+                        c_compression,
+                        keep_quotes=keep_quotes,
+                        mixed_types_as_string=mixed_types_as_string,
+                        prune_columns=prune_columns,
+                        recovery_mode=c_on_bad_lines,
+                    )
                 )
             )
             df = cudf.DataFrame._from_data(
@@ -181,19 +183,23 @@ def read_json(
             return df
         else:
             table_w_meta = plc.io.json.read_json(
-                plc.io.SourceInfo(filepaths_or_buffers),
-                processed_dtypes,
-                c_compression,
-                lines,
-                byte_range_offset=byte_range[0]
-                if byte_range is not None
-                else 0,
-                byte_range_size=byte_range[1] if byte_range is not None else 0,
-                keep_quotes=keep_quotes,
-                mixed_types_as_string=mixed_types_as_string,
-                prune_columns=prune_columns,
-                recovery_mode=c_on_bad_lines,
-                extra_parameters=kwargs,
+                plc.io.json._setup_json_reader_options(
+                    plc.io.SourceInfo(filepaths_or_buffers),
+                    processed_dtypes,
+                    c_compression,
+                    lines,
+                    byte_range_offset=byte_range[0]
+                    if byte_range is not None
+                    else 0,
+                    byte_range_size=byte_range[1]
+                    if byte_range is not None
+                    else 0,
+                    keep_quotes=keep_quotes,
+                    mixed_types_as_string=mixed_types_as_string,
+                    prune_columns=prune_columns,
+                    recovery_mode=c_on_bad_lines,
+                    extra_parameters=kwargs,
+                )
             )
 
             df = cudf.DataFrame._from_data(
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index b5af3bb80bf..1c1d4860eec 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -604,10 +604,12 @@ def slice_skip(tbl: plc.Table):
                 (name, typ, []) for name, typ in schema.items()
             ]
             plc_tbl_w_meta = plc.io.json.read_json(
-                plc.io.SourceInfo(paths),
-                lines=True,
-                dtypes=json_schema,
-                prune_columns=True,
+                plc.io.json._setup_json_reader_options(
+                    plc.io.SourceInfo(paths),
+                    lines=True,
+                    dtypes=json_schema,
+                    prune_columns=True,
+                )
             )
             # TODO: I don't think cudf-polars supports nested types in general right now
             # (but when it does, we should pass child column names from nested columns in)
diff --git a/python/pylibcudf/pylibcudf/io/json.pxd b/python/pylibcudf/pylibcudf/io/json.pxd
index 4894ca3bd6e..7e446298ba9 100644
--- a/python/pylibcudf/pylibcudf/io/json.pxd
+++ b/python/pylibcudf/pylibcudf/io/json.pxd
@@ -8,6 +8,8 @@ from pylibcudf.io.types cimport (
 )
 from pylibcudf.libcudf.io.json cimport (
     json_recovery_mode_t,
+    json_reader_options,
+    json_reader_options_builder,
     json_writer_options,
     json_writer_options_builder,
 )
@@ -15,19 +17,43 @@ from pylibcudf.libcudf.types cimport size_type
 from pylibcudf.table cimport Table
 
 
-cpdef TableWithMetadata read_json(
-    SourceInfo source_info,
-    list dtypes = *,
-    compression_type compression = *,
-    bool lines = *,
-    size_t byte_range_offset = *,
-    size_t byte_range_size = *,
-    bool keep_quotes = *,
-    bool mixed_types_as_string = *,
-    bool prune_columns = *,
-    json_recovery_mode_t recovery_mode = *,
-    dict extra_parameters = *,
-)
+cdef class JsonReaderOptions:
+    cdef json_reader_options c_obj
+    cdef SourceInfo source
+    cpdef void set_dtypes(self, list types)
+    cpdef void enable_keep_quotes(self, bool keep_quotes)
+    cpdef void enable_mixed_types_as_string(self, bool mixed_types_as_string)
+    cpdef void enable_prune_columns(self, bool prune_columns)
+    cpdef void set_byte_range_offset(self, size_t offset)
+    cpdef void set_byte_range_size(self, size_t size)
+    cpdef void enable_lines(self, bool val)
+    # These hidden options are subjected to change without deprecation cycle.
+    # These are used to test libcudf JSON reader features, not used in cuDF.
+    cpdef void set_delimiter(self, str val)
+    cpdef void enable_dayfirst(self, bool val)
+    cpdef void enable_experimental(self, bool val)
+    cpdef void enable_normalize_single_quotes(self, bool val)
+    cpdef void enable_normalize_whitespace(self, bool val)
+    cpdef void set_strict_validation(self, bool val)
+    cpdef void allow_unquoted_control_chars(self, bool val)
+    cpdef void allow_numeric_leading_zeros(self, bool val)
+    cpdef void allow_nonnumeric_numbers(self, bool val)
+    cpdef void set_na_values(self, list vals)
+
+cdef class JsonReaderOptionsBuilder:
+    cdef json_reader_options_builder c_obj
+    cdef SourceInfo source
+    cpdef JsonReaderOptionsBuilder compression(self, compression_type compression)
+    cpdef JsonReaderOptionsBuilder lines(self, bool val)
+    cpdef JsonReaderOptionsBuilder keep_quotes(self, bool val)
+    cpdef JsonReaderOptionsBuilder byte_range_offset(self, size_t byte_range_offset)
+    cpdef JsonReaderOptionsBuilder byte_range_size(self, size_t byte_range_size)
+    cpdef JsonReaderOptionsBuilder recovery_mode(
+        self, json_recovery_mode_t recovery_mode
+    )
+    cpdef build(self)
+
+cpdef TableWithMetadata read_json(JsonReaderOptions options)
 
 cdef class JsonWriterOptions:
     cdef json_writer_options c_obj
@@ -50,12 +76,6 @@ cdef class JsonWriterOptionsBuilder:
 cpdef void write_json(JsonWriterOptions options)
 
 cpdef tuple chunked_read_json(
-    SourceInfo source_info,
-    list dtypes = *,
-    compression_type compression = *,
-    bool keep_quotes = *,
-    bool mixed_types_as_string = *,
-    bool prune_columns = *,
-    json_recovery_mode_t recovery_mode = *,
+    JsonReaderOptions options,
     int chunk_size= *,
 )
diff --git a/python/pylibcudf/pylibcudf/io/json.pyi b/python/pylibcudf/pylibcudf/io/json.pyi
index e0489742cd0..b84b437a3a2 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyi
+++ b/python/pylibcudf/pylibcudf/io/json.pyi
@@ -19,18 +19,40 @@ ChildNameToTypeMap: TypeAlias = Mapping[str, ChildNameToTypeMap]
 
 NameAndType: TypeAlias = tuple[str, DataType, list[NameAndType]]
 
-def read_json(
-    source_info: SourceInfo,
-    dtypes: list[NameAndType] | None = None,
-    compression: CompressionType = CompressionType.AUTO,
-    lines: bool = False,
-    byte_range_offset: int = 0,
-    byte_range_size: int = 0,
-    keep_quotes: bool = False,
-    mixed_types_as_string: bool = False,
-    prune_columns: bool = False,
-    recovery_mode: JSONRecoveryMode = JSONRecoveryMode.FAIL,
-) -> TableWithMetadata: ...
+class JsonReaderOptions:
+    def set_dtypes(
+        self, types: list[DataType] | list[NameAndType]
+    ) -> None: ...
+    def enable_keep_quotes(self, keep_quotes: bool) -> None: ...
+    def enable_mixed_types_as_string(
+        self, mixed_types_as_string: bool
+    ) -> None: ...
+    def enable_prune_columns(self, prune_columns: bool) -> None: ...
+    def set_byte_range_offset(self, offset: int) -> None: ...
+    def set_byte_range_size(self, size: int) -> None: ...
+    def enable_lines(self, val: bool) -> None: ...
+    def set_delimiter(self, val: str) -> None: ...
+    def enable_dayfirst(self, val: bool) -> None: ...
+    def enable_experimental(self, val: bool) -> None: ...
+    def enable_normalize_single_quotes(self, val: bool) -> None: ...
+    def enable_normalize_whitespace(self, val: bool) -> None: ...
+    def set_strict_validation(self, val: bool) -> None: ...
+    def allow_unquoted_control_chars(self, val: bool) -> None: ...
+    def allow_numeric_leading_zeros(self, val: bool) -> None: ...
+    def allow_nonnumeric_numbers(self, val: bool) -> None: ...
+    def set_na_values(self, vals: list[str]) -> None: ...
+    @staticmethod
+    def builder(source: SourceInfo) -> JsonReaderOptionsBuilder: ...
+
+class JsonReaderOptionsBuilder:
+    def compression(self, compression: CompressionType) -> Self: ...
+    def lines(self, lines: bool) -> Self: ...
+    def byte_range_offset(self, byte_range_offset: int) -> Self: ...
+    def byte_range_size(self, byte_range_size: int) -> Self: ...
+    def recovery_mode(self, recovery_mode: JSONRecoveryMode) -> Self: ...
+    def build(self) -> JsonReaderOptions: ...
+
+def read_json(options: JsonReaderOptions) -> TableWithMetadata: ...
 
 class JsonWriterOptions:
     @staticmethod
@@ -48,12 +70,6 @@ class JsonWriterOptionsBuilder:
 
 def write_json(options: JsonWriterOptions) -> None: ...
 def chunked_read_json(
-    source_info: SourceInfo,
-    dtypes: list[NameAndType] | None = None,
-    compression: CompressionType = CompressionType.AUTO,
-    keep_quotes: bool = False,
-    mixed_types_as_string: bool = False,
-    prune_columns: bool = False,
-    recovery_mode: JSONRecoveryMode = JSONRecoveryMode.FAIL,
+    options: JsonReaderOptions,
     chunk_size: int = 100_000_000,
 ) -> tuple[list[Column], list[str], ChildNameToTypeMap]: ...
diff --git a/python/pylibcudf/pylibcudf/io/json.pyx b/python/pylibcudf/pylibcudf/io/json.pyx
index 16078b31566..1d8a559afad 100644
--- a/python/pylibcudf/pylibcudf/io/json.pyx
+++ b/python/pylibcudf/pylibcudf/io/json.pyx
@@ -25,6 +25,8 @@ __all__ = [
     "chunked_read_json",
     "read_json",
     "write_json",
+    "JsonReaderOptions",
+    "JsonReaderOptionsBuilder",
     "JsonWriterOptions",
     "JsonWriterOptionsBuilder"
 ]
@@ -51,23 +53,21 @@ cdef map[string, schema_element] _generate_schema_map(list dtypes):
     return schema_map
 
 
-cdef json_reader_options _setup_json_reader_options(
+cpdef JsonReaderOptions _setup_json_reader_options(
         SourceInfo source_info,
         list dtypes,
-        compression_type compression,
-        bool lines,
-        size_t byte_range_offset,
-        size_t byte_range_size,
-        bool keep_quotes,
-        bool mixed_types_as_string,
-        bool prune_columns,
-        json_recovery_mode_t recovery_mode,
-        dict extra_parameters=None):
-
-    cdef vector[string] na_vec
-    cdef vector[data_type] types_vec
-    cdef json_reader_options opts = (
-        json_reader_options.builder(source_info.c_obj)
+        compression_type compression = compression_type.AUTO,
+        bool lines = False,
+        size_t byte_range_offset = 0,
+        size_t byte_range_size = 0,
+        bool keep_quotes = False,
+        bool mixed_types_as_string = False,
+        bool prune_columns = False,
+        json_recovery_mode_t recovery_mode = json_recovery_mode_t.FAIL,
+        dict extra_parameters=None,
+):
+    options = (
+        JsonReaderOptions.builder(source_info)
         .compression(compression)
         .lines(lines)
         .byte_range_offset(byte_range_offset)
@@ -77,88 +77,359 @@ cdef json_reader_options _setup_json_reader_options(
     )
 
     if dtypes is not None:
-        if isinstance(dtypes[0], tuple):
-            opts.set_dtypes(move(_generate_schema_map(dtypes)))
-        else:
-            for dtype in dtypes:
-                types_vec.push_back((<DataType>dtype).c_obj)
-            opts.set_dtypes(types_vec)
+        options.set_dtypes(dtypes)
 
-    opts.enable_keep_quotes(keep_quotes)
-    opts.enable_mixed_types_as_string(mixed_types_as_string)
-    opts.enable_prune_columns(prune_columns)
+    options.enable_keep_quotes(keep_quotes)
+    options.enable_mixed_types_as_string(mixed_types_as_string)
+    options.enable_prune_columns(prune_columns)
 
     # These hidden options are subjected to change without deprecation cycle.
     # These are used to test libcudf JSON reader features, not used in cuDF.
     if extra_parameters is not None:
         for key, value in extra_parameters.items():
             if key == 'delimiter':
-                opts.set_delimiter(ord(value))
+                options.set_delimiter(value)
             elif key == 'dayfirst':
-                opts.enable_dayfirst(value)
+                options.enable_dayfirst(value)
             elif key == 'experimental':
-                opts.enable_experimental(value)
+                options.enable_experimental(value)
             elif key == 'normalize_single_quotes':
-                opts.enable_normalize_single_quotes(value)
+                options.enable_normalize_single_quotes(value)
             elif key == 'normalize_whitespace':
-                opts.enable_normalize_whitespace(value)
+                options.enable_normalize_whitespace(value)
             elif key == 'strict_validation':
-                opts.set_strict_validation(value)
+                options.set_strict_validation(value)
             elif key == 'allow_unquoted_control_chars':
-                opts.allow_unquoted_control_chars(value)
+                options.allow_unquoted_control_chars(value)
             elif key == 'allow_numeric_leading_zeros':
-                opts.allow_numeric_leading_zeros(value)
+                options.allow_numeric_leading_zeros(value)
             elif key == 'allow_nonnumeric_numbers':
-                opts.allow_nonnumeric_numbers(value)
+                options.allow_nonnumeric_numbers(value)
             elif key == 'na_values':
-                for na_val in value:
-                    if isinstance(na_val, str):
-                        na_vec.push_back(na_val.encode())
-                opts.set_na_values(na_vec)
+                options.set_na_values(value)
             else:
                 raise ValueError(
                     "cudf engine doesn't support the "
                     f"'{key}' keyword argument for read_json"
                 )
-    return opts
+    return options
+
+
+cdef class JsonReaderOptions:
+    """
+    The settings to use for ``read_json``
+
+    For details, see `:cpp:class:`cudf::io::json_reader_options`
+    """
+    @staticmethod
+    def builder(SourceInfo source):
+        """
+        Create a JsonReaderOptionsBuilder object
+
+        For details, see :cpp:func:`cudf::io::json_reader_options::builder`
+
+        Parameters
+        ----------
+        sink : SourceInfo
+            The source to read the JSON file from.
+
+        Returns
+        -------
+        JsonReaderOptionsBuilder
+            Builder to build JsonReaderOptions
+        """
+        cdef JsonReaderOptionsBuilder json_builder = (
+            JsonReaderOptionsBuilder.__new__(JsonReaderOptionsBuilder)
+        )
+        json_builder.c_obj = json_reader_options.builder(source.c_obj)
+        json_builder.source = source
+        return json_builder
+
+    cpdef void set_dtypes(self, list types):
+        """
+        Set data types for columns to be read.
+
+        Parameters
+        ----------
+        types : list
+            List of dtypes or a list of tuples of
+            column names, dtypes, and list of tuples
+            (to support nested column hierarchy)
+
+        Returns
+        -------
+        None
+        """
+        cdef vector[data_type] types_vec
+        if isinstance(types[0], tuple):
+            self.c_obj.set_dtypes(_generate_schema_map(types))
+        else:
+            types_vec.reserve(len(types))
+            for dtype in types:
+                types_vec.push_back((<DataType>dtype).c_obj)
+            self.c_obj.set_dtypes(types_vec)
+
+    cpdef void enable_keep_quotes(self, bool keep_quotes):
+        """
+        Set whether the reader should keep quotes of string values.
+
+        Parameters
+        ----------
+        keep_quotes : bool
+           Boolean value to indicate whether the reader should
+           keep quotes of string values
+
+        Returns
+        -------
+        None
+        """
+        self.c_obj.enable_keep_quotes(keep_quotes)
+
+    cpdef void enable_mixed_types_as_string(self, bool mixed_types_as_string):
+        """
+        Set whether to parse mixed types as a string column.
+        Also enables forcing to read a struct as string column using schema.
+
+        Parameters
+        ----------
+        mixed_types_as_string : bool
+           Boolean value to enable/disable parsing mixed types
+           as a string column
+
+        Returns
+        -------
+        None
+        """
+        self.c_obj.enable_mixed_types_as_string(mixed_types_as_string)
+
+    cpdef void enable_prune_columns(self, bool prune_columns):
+        """
+        Set whether to prune columns on read, selected
+        based on the ``set_dtypes`` option.
+
+        Parameters
+        ----------
+        prune_columns : bool
+           When set as true, if the reader options include
+           ``set_dtypes``, then the reader will only return those
+           columns which are mentioned in ``set_dtypes``. If false,
+           then all columns are returned, independent of the
+           ``set_dtypes`` setting.
+
+        Returns
+        -------
+        None
+        """
+        self.c_obj.enable_prune_columns(prune_columns)
+
+    cpdef void set_byte_range_offset(self, size_t offset):
+        """
+        Set number of bytes to skip from source start.
+
+        Parameters
+        ----------
+        offset : size_t
+            Number of bytes of offset
+
+        Returns
+        -------
+        None
+        """
+        self.c_obj.set_byte_range_offset(offset)
+
+    cpdef void set_byte_range_size(self, size_t size):
+        """
+        Set number of bytes to read.
+
+        Parameters
+        ----------
+        size : size_t
+            Number of bytes to read
+
+        Returns
+        -------
+        None
+        """
+        self.c_obj.set_byte_range_size(size)
+
+    cpdef void enable_lines(self, bool val):
+        """
+        Set whether to read the file as a json object per line.
+
+        Parameters
+        ----------
+        val : bool
+            Boolean value to enable/disable the option
+            to read each line as a json object
+
+        Returns
+        -------
+        None
+        """
+        self.c_obj.enable_lines(val)
+
+    # These hidden options are subjected to change without deprecation cycle.
+    # These are used to test libcudf JSON reader features, not used in cuDF.
+
+    cpdef void set_delimiter(self, str val):
+        self.c_obj.set_delimiter(val.encode())
+
+    cpdef void enable_dayfirst(self, bool val):
+        self.c_obj.enable_dayfirst(val)
+
+    cpdef void enable_experimental(self, bool val):
+        self.c_obj.enable_experimental(val)
+
+    cpdef void enable_normalize_single_quotes(self, bool val):
+        self.c_obj.enable_normalize_single_quotes(val)
+
+    cpdef void enable_normalize_whitespace(self, bool val):
+        self.c_obj.enable_normalize_whitespace(val)
+
+    cpdef void set_strict_validation(self, bool val):
+        self.c_obj.set_strict_validation(val)
+
+    cpdef void allow_unquoted_control_chars(self, bool val):
+        self.c_obj.allow_unquoted_control_chars(val)
+
+    cpdef void allow_numeric_leading_zeros(self, bool val):
+        self.c_obj.allow_numeric_leading_zeros(val)
+
+    cpdef void allow_nonnumeric_numbers(self, bool val):
+        self.c_obj.allow_nonnumeric_numbers(val)
+
+    cpdef void set_na_values(self, list vals):
+        cdef vector[string] vec
+        for val in vals:
+            if isinstance(val, str):
+                vec.push_back(val.encode())
+        self.c_obj.set_na_values(vec)
+
+
+cdef class JsonReaderOptionsBuilder:
+    cpdef JsonReaderOptionsBuilder compression(self, compression_type compression):
+        """
+        Sets compression type.
+
+        Parameters
+        ----------
+        compression : CompressionType
+            The compression type to use
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.compression(compression)
+        return self
+
+    cpdef JsonReaderOptionsBuilder lines(self, bool val):
+        """
+        Set whether to read the file as a json object per line.
+
+        Parameters
+        ----------
+        val : bool
+            Boolean value to enable/disable the option
+            to read each line as a json object
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.lines(val)
+        return self
+
+    cpdef JsonReaderOptionsBuilder keep_quotes(self, bool val):
+        """
+        Set whether the reader should keep quotes of string values.
+
+        Parameters
+        ----------
+        val : bool
+            Boolean value to indicate whether the
+            reader should keep quotes of string values
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.keep_quotes(val)
+        return self
+
+    cpdef JsonReaderOptionsBuilder byte_range_offset(self, size_t byte_range_offset):
+        """
+        Set number of bytes to skip from source start.
+
+        Parameters
+        ----------
+        byte_range_offset : size_t
+            Number of bytes of offset
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.byte_range_offset(byte_range_offset)
+        return self
+
+    cpdef JsonReaderOptionsBuilder byte_range_size(self, size_t byte_range_size):
+        """
+        Set number of bytes to read.
+
+        Parameters
+        ----------
+        byte_range_size : size_t
+            Number of bytes to read
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.byte_range_size(byte_range_size)
+        return self
+
+    cpdef JsonReaderOptionsBuilder recovery_mode(
+        self,
+        json_recovery_mode_t recovery_mode
+    ):
+        """
+        Specifies the JSON reader's behavior on invalid JSON lines.
+
+        Parameters
+        ----------
+        recovery_mode : json_recovery_mode_t
+            An enum value to indicate the JSON reader's
+            behavior on invalid JSON lines.
+
+        Returns
+        -------
+        Self
+        """
+        self.c_obj.recovery_mode(recovery_mode)
+        return self
+
+    cpdef build(self):
+        """Create a JsonReaderOptions object"""
+        cdef JsonReaderOptions json_options = JsonReaderOptions.__new__(
+            JsonReaderOptions
+        )
+        json_options.c_obj = move(self.c_obj.build())
+        json_options.source = self.source
+        return json_options
 
 
 cpdef tuple chunked_read_json(
-    SourceInfo source_info,
-    list dtypes = None,
-    compression_type compression = compression_type.AUTO,
-    bool keep_quotes = False,
-    bool mixed_types_as_string = False,
-    bool prune_columns = False,
-    json_recovery_mode_t recovery_mode = json_recovery_mode_t.FAIL,
+    JsonReaderOptions options,
     int chunk_size=100_000_000,
 ):
-    """Reads an JSON file into a :py:class:`~.types.TableWithMetadata`.
+    """
+    Reads chunks of a JSON file into a :py:class:`~.types.TableWithMetadata`.
 
     Parameters
     ----------
-    source_info : SourceInfo
-        The SourceInfo object to read the JSON file from.
-    dtypes : list, default None
-        Set data types for the columns in the JSON file.
-
-        Each element of the list has the format
-        (column_name, column_dtype, list of child dtypes), where
-        the list of child dtypes is an empty list if the child is not
-        a nested type (list or struct dtype), and is of format
-        (column_child_name, column_child_type, list of grandchild dtypes).
-    compression: CompressionType, default CompressionType.AUTO
-        The compression format of the JSON source.
-    keep_quotes : bool, default False
-        Whether the reader should keep quotes of string values.
-    mixed_types_as_string : bool, default False
-        If True, mixed type columns are returned as string columns.
-        If `False` parsing mixed type columns will thrown an error.
-    prune_columns : bool, default False
-        Whether to only read columns specified in dtypes.
-    recover_mode : JSONRecoveryMode, default JSONRecoveryMode.FAIL
-        Whether to raise an error or set corresponding values to null
-        when encountering an invalid JSON line.
+    options : JsonReaderOptions
+        Settings for controlling reading behavior
     chunk_size : int, default 100_000_000 bytes.
         The number of bytes to be read in chunks.
         The chunk_size should be set to at least row_size.
@@ -171,20 +442,6 @@ cpdef tuple chunked_read_json(
     cdef size_type c_range_size = (
         chunk_size if chunk_size is not None else 0
     )
-    cdef json_reader_options opts = _setup_json_reader_options(
-        source_info=source_info,
-        dtypes=dtypes,
-        compression=compression,
-        lines=True,
-        byte_range_offset=0,
-        byte_range_size=0,
-        keep_quotes=keep_quotes,
-        mixed_types_as_string=mixed_types_as_string,
-        prune_columns=prune_columns,
-        recovery_mode=recovery_mode,
-    )
-
-    # Read JSON
     cdef table_with_metadata c_result
 
     final_columns = []
@@ -192,12 +449,13 @@ cpdef tuple chunked_read_json(
     child_names = None
     i = 0
     while True:
-        opts.set_byte_range_offset(c_range_size * i)
-        opts.set_byte_range_size(c_range_size)
+        options.enable_lines(True)
+        options.set_byte_range_offset(c_range_size * i)
+        options.set_byte_range_size(c_range_size)
 
         try:
             with nogil:
-                c_result = move(cpp_read_json(opts))
+                c_result = move(cpp_read_json(options.c_obj))
         except (ValueError, OverflowError):
             break
         if meta_names is None:
@@ -225,75 +483,30 @@ cpdef tuple chunked_read_json(
 
 
 cpdef TableWithMetadata read_json(
-    SourceInfo source_info,
-    list dtypes = None,
-    compression_type compression = compression_type.AUTO,
-    bool lines = False,
-    size_t byte_range_offset = 0,
-    size_t byte_range_size = 0,
-    bool keep_quotes = False,
-    bool mixed_types_as_string = False,
-    bool prune_columns = False,
-    json_recovery_mode_t recovery_mode = json_recovery_mode_t.FAIL,
-    dict extra_parameters = None,
+    JsonReaderOptions options
 ):
-    """Reads an JSON file into a :py:class:`~.types.TableWithMetadata`.
+    """
+    Read from JSON format.
+
+    The source to read from and options are encapsulated
+    by the `options` object.
+
+    For details, see :cpp:func:`read_json`.
 
     Parameters
     ----------
-    source_info : SourceInfo
-        The SourceInfo object to read the JSON file from.
-    dtypes : list, default None
-        Set data types for the columns in the JSON file.
-
-        Each element of the list has the format
-        (column_name, column_dtype, list of child dtypes), where
-        the list of child dtypes is an empty list if the child is not
-        a nested type (list or struct dtype), and is of format
-        (column_child_name, column_child_type, list of grandchild dtypes).
-    compression: CompressionType, default CompressionType.AUTO
-        The compression format of the JSON source.
-    byte_range_offset : size_t, default 0
-        Number of bytes to skip from source start.
-    byte_range_size : size_t, default 0
-        Number of bytes to read. By default, will read all bytes.
-    keep_quotes : bool, default False
-        Whether the reader should keep quotes of string values.
-    mixed_types_as_string : bool, default False
-        If True, mixed type columns are returned as string columns.
-        If `False` parsing mixed type columns will thrown an error.
-    prune_columns : bool, default False
-        Whether to only read columns specified in dtypes.
-    recover_mode : JSONRecoveryMode, default JSONRecoveryMode.FAIL
-        Whether to raise an error or set corresponding values to null
-        when encountering an invalid JSON line.
-    extra_parameters : dict, default None
-        Additional hidden parameters to pass to the JSON reader.
+    options: JsonReaderOptions
+        Settings for controlling reading behavior
 
     Returns
     -------
     TableWithMetadata
         The Table and its corresponding metadata (column names) that were read in.
     """
-    cdef json_reader_options opts = _setup_json_reader_options(
-        source_info=source_info,
-        dtypes=dtypes,
-        compression=compression,
-        lines=lines,
-        byte_range_offset=byte_range_offset,
-        byte_range_size=byte_range_size,
-        keep_quotes=keep_quotes,
-        mixed_types_as_string=mixed_types_as_string,
-        prune_columns=prune_columns,
-        recovery_mode=recovery_mode,
-        extra_parameters=extra_parameters,
-    )
-
-    # Read JSON
     cdef table_with_metadata c_result
 
     with nogil:
-        c_result = move(cpp_read_json(opts))
+        c_result = move(cpp_read_json(options.c_obj))
 
     return TableWithMetadata.from_libcudf(c_result)
 
diff --git a/python/pylibcudf/pylibcudf/tests/io/test_json.py b/python/pylibcudf/pylibcudf/tests/io/test_json.py
index 9b0c5a29fe8..747bbfa1370 100644
--- a/python/pylibcudf/pylibcudf/tests/io/test_json.py
+++ b/python/pylibcudf/pylibcudf/tests/io/test_json.py
@@ -167,9 +167,12 @@ def test_read_json_basic(
         source.seek(0)
 
     res = plc.io.json.read_json(
-        plc.io.SourceInfo([source]),
-        compression=compression_type,
-        lines=lines,
+        (
+            plc.io.json.JsonReaderOptions.builder(plc.io.SourceInfo([source]))
+            .compression(compression_type)
+            .lines(lines)
+            .build()
+        )
     )
 
     # Adjustments to correct for the fact orient=records is lossy
@@ -243,9 +246,14 @@ def get_child_types(typ):
 
     new_schema = pa.schema(new_fields)
 
-    res = plc.io.json.read_json(
-        plc.io.SourceInfo([source]), dtypes=dtypes, lines=True
+    options = (
+        plc.io.json.JsonReaderOptions.builder(plc.io.SourceInfo([source]))
+        .lines(True)
+        .build()
     )
+    options.set_dtypes(dtypes)
+
+    res = plc.io.json.read_json(options)
     new_table = pa_table.cast(new_schema)
 
     # orient=records is lossy
@@ -269,10 +277,15 @@ def test_read_json_lines_byte_range(source_or_sink, chunk_size):
     for chunk_start in range(0, len(json_str.encode("utf-8")), chunk_size):
         tbls_w_meta.append(
             plc.io.json.read_json(
-                plc.io.SourceInfo([source]),
-                lines=True,
-                byte_range_offset=chunk_start,
-                byte_range_size=chunk_start + chunk_size,
+                (
+                    plc.io.json.JsonReaderOptions.builder(
+                        plc.io.SourceInfo([source])
+                    )
+                    .lines(True)
+                    .byte_range_offset(chunk_start)
+                    .byte_range_size(chunk_start + chunk_size)
+                    .build()
+                )
             )
         )
 
@@ -302,7 +315,12 @@ def test_read_json_lines_keep_quotes(keep_quotes, source_or_sink):
     write_source_str(source, json_bytes)
 
     tbl_w_meta = plc.io.json.read_json(
-        plc.io.SourceInfo([source]), lines=True, keep_quotes=keep_quotes
+        (
+            plc.io.json.JsonReaderOptions.builder(plc.io.SourceInfo([source]))
+            .lines(True)
+            .keep_quotes(keep_quotes)
+            .build()
+        )
     )
 
     template = "{0}"
@@ -330,20 +348,19 @@ def test_read_json_lines_recovery_mode(recovery_mode, source_or_sink):
     json_str = '{"a":1,"b":10}\n{"a":2,"b":11}\nabc\n{"a":3,"b":12}\n'
     write_source_str(source, json_str)
 
+    options = (
+        plc.io.json.JsonReaderOptions.builder(plc.io.SourceInfo([source]))
+        .lines(True)
+        .recovery_mode(recovery_mode)
+        .build()
+    )
+
     if recovery_mode == plc.io.types.JSONRecoveryMode.FAIL:
         with pytest.raises(RuntimeError):
-            plc.io.json.read_json(
-                plc.io.SourceInfo([source]),
-                lines=True,
-                recovery_mode=recovery_mode,
-            )
+            plc.io.json.read_json(options)
     else:
         # Recover case (bad values replaced with nulls)
-        tbl_w_meta = plc.io.json.read_json(
-            plc.io.SourceInfo([source]),
-            lines=True,
-            recovery_mode=recovery_mode,
-        )
+        tbl_w_meta = plc.io.json.read_json(options)
         exp = pa.Table.from_arrays(
             [[1, 2, None, 3], [10, 11, None, 12]], names=["a", "b"]
         )