Skip to content

Commit

Permalink
feat: PandasJSONEncoder
Browse files Browse the repository at this point in the history
  • Loading branch information
thorwhalen committed Feb 21, 2025
1 parent 6d91b9b commit baa98dd
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 5 deletions.
11 changes: 6 additions & 5 deletions tabled/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@
)
from tabled.multi import ColumnOrientedMapping
from tabled.util import (
collapse_rows,
expand_rows,
collapse_columns,
expand_columns,
auto_decode_bytes,
collapse_rows, # collapse rows in a dataframe
expand_rows, # expand rows in a dataframe
collapse_columns, # collapse columns in a dataframe
expand_columns, # expand columns in a dataframe
auto_decode_bytes, # Decode a byte sequence into a string, trying charset_normalizer gueses if fails.
PandasJSONEncoder, # a json encoder that can handle pandas and numpy objects
ensure_columns, # ensure that the columns are in the dataframe
ensure_first_columns, # ensure that the columns are the first columns in the dataframe
ensure_last_columns, # ensure that the columns are the last columns in the dataframe
Expand Down
85 changes: 85 additions & 0 deletions tabled/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,3 +634,88 @@ def expand_columns(
result_df.drop(columns=[col], inplace=True)

return result_df


# -------------------------------------------------------------------------------------
# Serialization

import json
import datetime
import pandas as pd
import numpy as np


class PandasJSONEncoder(json.JSONEncoder):
"""
A custom JSON encoder that can handle pandas and numpy types more robustly,
even if they appear within nested data structures.
>>> import json, datetime, pandas as pd, numpy as np
>>> # Test with a DataFrame containing timestamps and missing values.
>>> df = pd.DataFrame({
... 'a': [1, 2, 3],
... 'b': [pd.Timestamp('2023-04-09 00:02:53+0000', tz='UTC'),
... pd.NaT,
... pd.Timestamp('2023-04-09 00:02:53+0000', tz='UTC')]
... })
>>> json_str = json.dumps(df, cls=PandasJSONEncoder)
>>> json_str # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
'[{"a": 1, "b": "2023-04-09T00:02:53..."}..., {"a": 2, "b": null}, {"a": 3, "b": "2023-04-09T00:02:53..."}...]'
>>> # Test with a Series containing timestamps and missing values.
>>> s = pd.Series([pd.Timestamp('2023-04-09 00:02:53+0000', tz='UTC'), pd.NaT])
>>> json_str = json.dumps(s, cls=PandasJSONEncoder)
>>> json_str # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
'{"0": "2023-04-09T00:02:53...", "1": null}'
>>> # Test with numpy arrays and numpy scalar types.
>>> data = {
... "arr": np.array([1, 2, 3], dtype=np.int32),
... "flt": np.float32(3.14),
... "bool": np.bool_(False)
... }
>>> json_str = json.dumps(data, cls=PandasJSONEncoder)
>>> json_str # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
'{"arr": [1, 2, 3], "flt": 3.14..., "bool": false}'
>>> # Test with a datetime.date.
>>> date_val = datetime.date(2002, 1, 1)
>>> json.dumps(date_val, cls=PandasJSONEncoder) # doctest: +NORMALIZE_WHITESPACE
'"2002-01-01"'
"""

def default(self, obj):
# Handle pandas DataFrame by delegating to its own JSON conversion.
if isinstance(obj, pd.DataFrame):
# Using 'records' orientation to produce a list of row dictionaries.
# Pandas will also handle nested types like Timestamps.
return json.loads(obj.to_json(orient='records', date_format='iso'))
# Handle pandas Series similarly.
if isinstance(obj, pd.Series):
# to_json for Series returns a JSON object (dict) keyed by the index.
# This approach ensures that any non-JSON-serializable objects are handled by pandas.
return json.loads(obj.to_json(date_format='iso'))
# Handle pandas Timestamp objects.
if isinstance(obj, pd.Timestamp):
if pd.isna(obj):
return None
return obj.isoformat()
# Convert numpy arrays to lists.
if isinstance(obj, np.ndarray):
return obj.tolist()
# Convert numpy boolean, floating, and integer scalars to native Python types.
if isinstance(obj, np.bool_):
return bool(obj)
if isinstance(obj, (np.floating,)):
return float(obj)
if isinstance(obj, (np.integer,)):
return int(obj)
# Convert datetime.date and datetime.datetime to ISO 8601 strings.
if isinstance(obj, (datetime.date, datetime.datetime)):
return obj.isoformat()
# For other objects, if pd.isna returns a boolean True, return None.
is_na = pd.isna(obj)
if isinstance(is_na, bool) and is_na:
return None
# Fallback to the default method.
return super().default(obj)

0 comments on commit baa98dd

Please sign in to comment.