Skip to content

Commit

Permalink
Merge pull request #89 from roocs/fix_reftime_calendar
Browse files Browse the repository at this point in the history
Fix reftime calendar and remove fill value fix
  • Loading branch information
agstephens authored Jan 12, 2022
2 parents 8d74e67 + 0ca26a1 commit 34a4360
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 128 deletions.
16 changes: 0 additions & 16 deletions daops/data_utils/attr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,6 @@ def add_global_attrs_if_needed(ds_id, ds, **operands):
return ds


def remove_fill_values(ds_id, ds):
"""
:param ds: Xarray Dataset
:param operands: sequence of arguments
:return: Xarray Dataset
Remove _FillValue attribute that is added by xarray.
"""

main_var = xu.get_main_variable(ds)
for coord_id in ds[main_var].dims:
if ds.coords[coord_id].dims == (coord_id,):
ds[coord_id].encoding["_FillValue"] = None

return ds


def remove_coord_attr(ds_id, ds, **operands):
"""
:param ds: Xarray DataSet
Expand Down
11 changes: 8 additions & 3 deletions daops/fix_utils/decadal_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,17 @@
}


def get_time_calendar(ds_id, ds):
times = ds.time.values
cal = times[0].calendar
return cal


def get_lead_times(ds_id, ds):

start_date = datetime.fromisoformat(get_start_date(ds_id, ds))

times = ds.time.values
cal = times[0].calendar
cal = get_time_calendar(ds_id, ds)
reftime = cftime.datetime(
start_date.year,
start_date.month,
Expand All @@ -46,7 +51,7 @@ def get_lead_times(ds_id, ds):

lead_times = []
# calculate leadtime from reftime and valid times
for time in times:
for time in ds.time.values:
td = time - reftime
days = td.days
lead_times.append(days)
Expand Down
22 changes: 0 additions & 22 deletions tests/test_data_utils/test_attr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from daops.data_utils.attr_utils import add_global_attrs_if_needed
from daops.data_utils.attr_utils import edit_global_attrs
from daops.data_utils.attr_utils import edit_var_attrs
from daops.data_utils.attr_utils import remove_fill_values
from tests._common import CMIP6_DECADAL
from tests._common import MINI_ESGF_MASTER_DIR

Expand Down Expand Up @@ -135,24 +134,3 @@ def test_add_global_attrs_if_needed(load_esgf_test_data):
== "EC-EARTH model output prepared for CMIP5 historical"
)
assert ds_change_global_attrs.attrs["test"] == "this is a new test attribute"


def test_remove_fill_values(load_esgf_test_data):
ds = xr.open_mfdataset(
f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_EC-EARTH_historical_r1i1p1_185001-185912.nc",
combine="by_coords",
use_cftime=True,
)

ds_id = "cmip5.output1.ICHEC.EC-EARTH.historical.mon.atmos.Amon.r1i1p1.latest.tas"

assert ds.lat.encoding.get("_FillValue", "") == ""
assert ds.lat.encoding.get("_FillValue", "") == ""
assert ds.lat.encoding.get("_FillValue", "") == ""

operands = {}
ds = remove_fill_values(ds_id, ds, **operands)

assert ds.lat.encoding.get("_FillValue", "") is None
assert ds.lat.encoding.get("_FillValue", "") is None
assert ds.lat.encoding.get("_FillValue", "") is None
102 changes: 15 additions & 87 deletions tests/test_fixes_applied.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def test_fixes_applied_decadal_MOHC_mon(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(2004, 11, 1, 0, 0, 0, 0), dtype=object
cftime.Datetime360Day(2004, 11, 1, 0, 0, 0, 0), dtype=object
)

# check AddCoordFix is applied
Expand All @@ -79,15 +80,6 @@ def test_fixes_applied_decadal_MOHC_mon(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -142,8 +134,9 @@ def test_fixes_applied_decadal_MOHC_day(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
cftime.Datetime360Day(1960, 11, 1, 0, 0, 0, 0), dtype=object
)

# check AddCoordFix is applied
Expand All @@ -163,15 +156,6 @@ def test_fixes_applied_decadal_MOHC_day(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -225,8 +209,9 @@ def test_fixes_applied_decadal_EC_Earth_mon(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
cftime.DatetimeProlepticGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
)

# check AddCoordFix is applied
Expand All @@ -246,15 +231,6 @@ def test_fixes_applied_decadal_EC_Earth_mon(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -308,8 +284,9 @@ def test_fixes_applied_decadal_EC_Earth_day(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1961, 11, 1, 0, 0, 0, 0), dtype=object
cftime.DatetimeProlepticGregorian(1961, 11, 1, 0, 0, 0, 0), dtype=object
)

# check AddCoordFix is applied
Expand All @@ -329,15 +306,6 @@ def test_fixes_applied_decadal_EC_Earth_day(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand All @@ -352,7 +320,7 @@ def test_fixes_applied_decadal_EC_Earth_day(tmpdir, load_esgf_test_data):
def test_fixes_applied_decadal_EC_Earth_url_fix(tmpdir, load_esgf_test_data):
# change fix index to test index which holds these decadal fixes
fix_index = CONFIG["elasticsearch"]["fix_store"]
test_fix_index = "c3s-roocs-fix-for-tests"
test_fix_index = "c3s-roocs-fix"
CONFIG["elasticsearch"]["fix_store"] = test_fix_index

# don't use catalog - decadal datasets not in current catalog
Expand Down Expand Up @@ -391,6 +359,7 @@ def test_fixes_applied_decadal_EC_Earth_url_fix(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
)
Expand All @@ -412,15 +381,6 @@ def test_fixes_applied_decadal_EC_Earth_url_fix(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -475,6 +435,7 @@ def test_fixes_applied_decadal_MPI_M_mon(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
)
Expand All @@ -496,15 +457,6 @@ def test_fixes_applied_decadal_MPI_M_mon(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -559,6 +511,7 @@ def test_fixes_applied_decadal_MPI_M_day(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
)
Expand All @@ -580,15 +533,6 @@ def test_fixes_applied_decadal_MPI_M_day(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -643,8 +587,9 @@ def test_fixes_applied_decadal_CMCC_mon(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
cftime.DatetimeNoLeap(1960, 11, 1, 0, 0, 0, 0), dtype=object
)

# check AddCoordFix is applied
Expand All @@ -664,15 +609,6 @@ def test_fixes_applied_decadal_CMCC_mon(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -728,6 +664,7 @@ def test_fixes_applied_decadal_CMCC_day(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
)
Expand All @@ -749,15 +686,6 @@ def test_fixes_applied_decadal_CMCC_day(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down

0 comments on commit 34a4360

Please sign in to comment.