diff --git a/ecoscope/io/earthranger_utils.py b/ecoscope/io/earthranger_utils.py index f2b2ffc0..2ee61566 100644 --- a/ecoscope/io/earthranger_utils.py +++ b/ecoscope/io/earthranger_utils.py @@ -41,7 +41,7 @@ def clean_time_cols(df): for col in time_cols: if col in df.columns: # convert x is not None to pd.isna(x) is False - df[col] = df[col].apply(lambda x: pd.to_datetime(parser.parse(x)) if not pd.isna(x) else None) + df[col] = df[col].apply(lambda x: pd.to_datetime(parser.parse(x), utc=True) if not pd.isna(x) else None) return df diff --git a/tests/test_earthranger_utils.py b/tests/test_earthranger_utils.py new file mode 100644 index 00000000..f717a7bf --- /dev/null +++ b/tests/test_earthranger_utils.py @@ -0,0 +1,53 @@ +import pytest + +import numpy as np +import pandas as pd + +from ecoscope.io.earthranger_utils import clean_time_cols + + +@pytest.fixture +def df_with_times(): + return pd.DataFrame( + data={ + "time": [ + "2023-01-30 11:26:13.805829-08:00", + "2023-09-27T06:16:46.158966", + "2023-09-27T06:16:46.23-07:00", + "2023-09-27T06:16:46.1589-07:00", + "2023-09-27T22:00:01.23-11:00", + "2023-09-27T06:16:46.00-07:00", + "2023-09-27T22:00:00.00-02:00", + pd.NA, + ] + }, + index=["A", "B", "C", "D", "E", "F", "G", "H"], + ) + + +def test_clean_time_cols(df_with_times): + with pytest.raises(AttributeError): + df_with_times["time"].dt + + cleaned = clean_time_cols(df_with_times) + assert pd.api.types.is_datetime64_ns_dtype(cleaned["time"]) + # Check we have our dt accessor + df_with_times["time"].dt + + expected_times = pd.arrays.DatetimeArray._from_sequence( + [ + "2023-01-30 19:26:13.805829+00:00", + "2023-09-27 06:16:46.158966+00:00", + "2023-09-27 13:16:46.230000+00:00", + "2023-09-27 13:16:46.158900+00:00", + "2023-09-28 09:00:01.230000+00:00", + "2023-09-27 13:16:46+00:00", + "2023-09-28 00:00:00+00:00", + ] + ) + + # Since the parser resolves nan's to pd.NaT, + # and pd.NaT != pd.NaT + # check the nan separately from the array equality + assert np.array_equal(expected_times, cleaned["time"].array[:-1]) + assert pd.isnull(cleaned["time"]["H"])