Skip to content

Commit

Permalink
fix: review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Ancy Augustin authored and Ancy Augustin committed Nov 25, 2024
1 parent 8ca6389 commit 303de9f
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 99 deletions.
12 changes: 6 additions & 6 deletions docs/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,15 @@ With a :class:`.DataFrameClient` object, you can:
Pandas Utility
~~~~~~~~~~~~~~

Utility functions for managing Pandas DataFrames and interacting with the DataFrame API include:
Utility functions to interact with :class:`.DataFrameClient` using [pandas.DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html)

* Create a table from a pandas dataframe.
* Create a table from a `pandas.DataFrame`.

* Append pandas dataframe to an existing table.
* Append `pandas.DataFrame` to an existing table.

* Query decimated data from a table as pandas dataframe.
* Query decimated data from a table as `pandas.DataFrame`.

* Query data from a table as pandas dataframe.
* Query data from a table as `pandas.DataFrame`.

Examples
~~~~~~~~
Expand All @@ -134,7 +134,7 @@ Export data from a table
:language: python
:linenos:

Table operations using pandas dataframe
Table operations using [pandas.DataFrame](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html)

.. literalinclude:: ../examples/dataframe/pandas_dataframe_operations.py
:language: python
Expand Down
6 changes: 4 additions & 2 deletions examples/dataframe/pandas_dataframe_operations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pandas as pd
from nisystemlink.clients.dataframe import DataFrameClient
from nisystemlink.clients.core import HttpConfiguration
from nisystemlink.clients.dataframe.models import (
DecimationMethod,
DecimationOptions,
Expand All @@ -21,7 +20,10 @@
data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]
)
df.set_index("a", inplace=True)
print(df)

print(client.list_tables())
client.list_tables()
try:
table_id = create_table_from_pandas_df(
client, df, "Example Table", nullable_columns=False
Expand Down Expand Up @@ -55,4 +57,4 @@
print("Queried table data as pandas dataframe:")
print(queried_df)

client.delete_table(table_id)
client.delete_table(table_id)
2 changes: 1 addition & 1 deletion nisystemlink/clients/dataframe/models/_data_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class DataFrame(JsonModel):
columns: Optional[List[str]] = None
"""The names and order of the columns included in the data frame."""

data: List[List[Optional[str]]] = None
data: Optional[List[List[Optional[str]]]] = None
"""The data for each row with the order specified in the columns property.
Must contain a value for each column in the columns property."""

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional

import pandas as pd
from nisystemlink.clients.dataframe import DataFrameClient
from nisystemlink.clients.dataframe.models import (
Expand All @@ -24,7 +26,7 @@ def create_table_from_pandas_df(
client (DataFrameClient): Instance of DataFrameClient.
df (pd.DataFrame): Pandas dataframe.
table_name (str): Name of the table.
nullable_columns (bool): Make the columns nullable.
nullable_columns (bool): Make the columns nullable. Nullable columns can contain `null` values.
Returns:
str: ID of the table.
Expand All @@ -42,7 +44,10 @@ def create_table_from_pandas_df(


def append_pandas_df_to_table(
client: DataFrameClient, table_id: str, df: pd.DataFrame
client: DataFrameClient,
table_id: str,
df: pd.DataFrame,
end_of_data: Optional[bool] = None,
) -> None:
"""Append `df` to table.
Expand All @@ -54,12 +59,48 @@ def append_pandas_df_to_table(
Returns:
None
"""
frame = DataFrame()
frame: DataFrame = DataFrame()
frame.from_pandas(df)
client.append_table_data(
id=table_id, data=AppendTableDataRequest(frame=frame, end_of_data=False)
id=table_id, data=AppendTableDataRequest(frame=frame, end_of_data=end_of_data)
)


def create_table_with_data_from_pandas_df(
client: DataFrameClient,
df: pd.DataFrame,
table_name: str,
nullable_columns: bool,
batch_size: int = 1000,
end_of_data: Optional[bool] = None,
) -> str:
"""Create a table and upload data from a pandas DataFrame.
This function creates the table, uploads the data (with batching for large data),
and closes the upload process in one seamless call.
Args:
client (DataFrameClient): Instance of DataFrameClient.
df (pd.DataFrame): Pandas DataFrame with data to upload.
table_name (str): Name of the table to create.
nullable_columns (bool): Make the columns nullable. Nullable columns can contain `null` values.
batch_size (Optional[int]): Number of rows to batch in each upload. Default is 1000.
Returns:
str: ID of the created table.
"""
table_id = create_table_from_pandas_df(
client=client, df=df, table_name=table_name, nullable_columns=nullable_columns
)

num_rows = len(df)
for start_row in range(0, num_rows, batch_size):
end_row = min(start_row + batch_size, num_rows)
batch_df = df.iloc[start_row:end_row]
append_pandas_df_to_table(client, table_id, batch_df, end_of_data)

return table_id


def query_decimated_table_data_as_pandas_df(
client: DataFrameClient,
Expand All @@ -78,10 +119,10 @@ def query_decimated_table_data_as_pandas_df(
Returns:
pd.DataFrame: Table data in pandas dataframe format.
"""
index_name: str = None
index_name = None
if index:
index_name = _get_table_index_name(client=client, table_id=table_id)
if query.columns:
if query.columns and index_name:
if index_name not in query.columns:
query.columns.append(index_name)
response = client.query_decimated_data(table_id, query)
Expand All @@ -107,11 +148,10 @@ def query_table_data_as_pandas_df(
"""
continuation_token = None
all_rows = []
index_name: str = None

if index:
index_name = _get_table_index_name(client=client, table_id=table_id)
if query.columns:
if query.columns and index_name:
if index_name not in query.columns:
query.columns.append(index_name)

Expand Down
12 changes: 8 additions & 4 deletions nisystemlink/clients/dataframe/utilities/_pandas_exception.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
SUPPORTED_INDEX_DATA_TYPE = ["INT32", "INT64", "TIMESTAMP"]


class DataFrameError(Exception):
"""Base class for Dataframe errors."""

Expand All @@ -11,9 +14,8 @@ def __init__(self, index_name: str = None) -> None:
self.index_name = index_name
self.message = "Data frame must contain one index."
if index_name:
self.message = (
f"Column '{self.index_name}' must be of type INT32, INT64, or TIMESTAMP to be an index column."
)
self.message = f"Column '{self.index_name}' must be of type {SUPPORTED_INDEX_DATA_TYPE}"
" to be an index column."
super().__init__(self.message)


Expand All @@ -23,5 +25,7 @@ class InvalidColumnTypeError(DataFrameError):
def __init__(self, column_name: str, column_type: str) -> None:
self.column_name = column_name
self.column_type = column_type
self.message = f"Column '{column_name}' has an unsupported datatype: {column_type}"
self.message = (
f"Column '{column_name}' has an unsupported datatype: {column_type}"
)
super().__init__(self.message)
22 changes: 10 additions & 12 deletions nisystemlink/clients/dataframe/utilities/_pandas_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

from ._pandas_exception import InvalidColumnTypeError, InvalidIndexError

UNSUPPORTED_PANDAS_INT_TYPES = ["int8", "int16"]
"""List of unsupported pandas integer types for conversion to `DataType`."""

UNSUPPORTED_PANDAS_FLOAT_TYPES = ["float16"]
"""List of unsupported pandas float types for conversion to `DataType`."""
UNSUPPORTED_PANDAS_DATA_TYPE_CONVERSION = {
"int8": "int32",
"int16": "int32",
"float16": "float32",
}
"""Mapping of unsupported pandas types to supported data types for `DataType`."""

SUPPORTED_INDEX_DATA_TYPE = [DataType.Int32, DataType.Int64, DataType.Timestamp]
"""List of supported index data types for table creation.
Expand Down Expand Up @@ -62,11 +63,8 @@ def _type_cast_column_datatype(
data = pd.to_numeric(data, downcast="integer")
pd_dtype = data.dtype

if pd_dtype in UNSUPPORTED_PANDAS_INT_TYPES:
data = data.astype("int32")

elif pd_dtype in UNSUPPORTED_PANDAS_FLOAT_TYPES:
data = data.astype("float32")
if pd_dtype in UNSUPPORTED_PANDAS_DATA_TYPE_CONVERSION:
data = data.astype(UNSUPPORTED_PANDAS_DATA_TYPE_CONVERSION[pd_dtype])

return data

Expand All @@ -81,7 +79,7 @@ def _infer_index_column(df: pd.DataFrame) -> Column:
InvalidIndexError: If multiple index present or index is of unsupported type.
Returns:
Column: Valid `Column` to the table.
Column: Valid Index `Column` for the table.
"""
index = df.index.name

Expand Down Expand Up @@ -140,7 +138,7 @@ def _infer_dataframe_columns(df: pd.DataFrame, nullable_columns: bool) -> List[C
return columns


def _get_table_index_name(client: DataFrameClient, table_id: str) -> str:
def _get_table_index_name(client: DataFrameClient, table_id: str) -> Optional[str]:
"""Get the index name from the table columns.
Args:
Expand Down
Loading

0 comments on commit 303de9f

Please sign in to comment.