Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix reftime calendar and remove fill value fix #89

Merged
merged 3 commits into from
Jan 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 0 additions & 16 deletions daops/data_utils/attr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,6 @@ def add_global_attrs_if_needed(ds_id, ds, **operands):
return ds


def remove_fill_values(ds_id, ds):
"""
:param ds: Xarray Dataset
:param operands: sequence of arguments
:return: Xarray Dataset
Remove _FillValue attribute that is added by xarray.
"""

main_var = xu.get_main_variable(ds)
for coord_id in ds[main_var].dims:
if ds.coords[coord_id].dims == (coord_id,):
ds[coord_id].encoding["_FillValue"] = None

return ds


def remove_coord_attr(ds_id, ds, **operands):
"""
:param ds: Xarray DataSet
Expand Down
11 changes: 8 additions & 3 deletions daops/fix_utils/decadal_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,17 @@
}


def get_time_calendar(ds_id, ds):
times = ds.time.values
cal = times[0].calendar
return cal


def get_lead_times(ds_id, ds):

start_date = datetime.fromisoformat(get_start_date(ds_id, ds))

times = ds.time.values
cal = times[0].calendar
cal = get_time_calendar(ds_id, ds)
reftime = cftime.datetime(
start_date.year,
start_date.month,
Expand All @@ -46,7 +51,7 @@ def get_lead_times(ds_id, ds):

lead_times = []
# calculate leadtime from reftime and valid times
for time in times:
for time in ds.time.values:
td = time - reftime
days = td.days
lead_times.append(days)
Expand Down
22 changes: 0 additions & 22 deletions tests/test_data_utils/test_attr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from daops.data_utils.attr_utils import add_global_attrs_if_needed
from daops.data_utils.attr_utils import edit_global_attrs
from daops.data_utils.attr_utils import edit_var_attrs
from daops.data_utils.attr_utils import remove_fill_values
from tests._common import CMIP6_DECADAL
from tests._common import MINI_ESGF_MASTER_DIR

Expand Down Expand Up @@ -135,24 +134,3 @@ def test_add_global_attrs_if_needed(load_esgf_test_data):
== "EC-EARTH model output prepared for CMIP5 historical"
)
assert ds_change_global_attrs.attrs["test"] == "this is a new test attribute"


def test_remove_fill_values(load_esgf_test_data):
ds = xr.open_mfdataset(
f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/ICHEC/EC-EARTH/historical/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_EC-EARTH_historical_r1i1p1_185001-185912.nc",
combine="by_coords",
use_cftime=True,
)

ds_id = "cmip5.output1.ICHEC.EC-EARTH.historical.mon.atmos.Amon.r1i1p1.latest.tas"

assert ds.lat.encoding.get("_FillValue", "") == ""
assert ds.lat.encoding.get("_FillValue", "") == ""
assert ds.lat.encoding.get("_FillValue", "") == ""

operands = {}
ds = remove_fill_values(ds_id, ds, **operands)

assert ds.lat.encoding.get("_FillValue", "") is None
assert ds.lat.encoding.get("_FillValue", "") is None
assert ds.lat.encoding.get("_FillValue", "") is None
102 changes: 15 additions & 87 deletions tests/test_fixes_applied.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ def test_fixes_applied_decadal_MOHC_mon(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(2004, 11, 1, 0, 0, 0, 0), dtype=object
cftime.Datetime360Day(2004, 11, 1, 0, 0, 0, 0), dtype=object
)

# check AddCoordFix is applied
Expand All @@ -79,15 +80,6 @@ def test_fixes_applied_decadal_MOHC_mon(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -142,8 +134,9 @@ def test_fixes_applied_decadal_MOHC_day(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
cftime.Datetime360Day(1960, 11, 1, 0, 0, 0, 0), dtype=object
)

# check AddCoordFix is applied
Expand All @@ -163,15 +156,6 @@ def test_fixes_applied_decadal_MOHC_day(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -225,8 +209,9 @@ def test_fixes_applied_decadal_EC_Earth_mon(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
cftime.DatetimeProlepticGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
)

# check AddCoordFix is applied
Expand All @@ -246,15 +231,6 @@ def test_fixes_applied_decadal_EC_Earth_mon(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -308,8 +284,9 @@ def test_fixes_applied_decadal_EC_Earth_day(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1961, 11, 1, 0, 0, 0, 0), dtype=object
cftime.DatetimeProlepticGregorian(1961, 11, 1, 0, 0, 0, 0), dtype=object
)

# check AddCoordFix is applied
Expand All @@ -329,15 +306,6 @@ def test_fixes_applied_decadal_EC_Earth_day(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand All @@ -352,7 +320,7 @@ def test_fixes_applied_decadal_EC_Earth_day(tmpdir, load_esgf_test_data):
def test_fixes_applied_decadal_EC_Earth_url_fix(tmpdir, load_esgf_test_data):
# change fix index to test index which holds these decadal fixes
fix_index = CONFIG["elasticsearch"]["fix_store"]
test_fix_index = "c3s-roocs-fix-for-tests"
test_fix_index = "c3s-roocs-fix"
CONFIG["elasticsearch"]["fix_store"] = test_fix_index

# don't use catalog - decadal datasets not in current catalog
Expand Down Expand Up @@ -391,6 +359,7 @@ def test_fixes_applied_decadal_EC_Earth_url_fix(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
)
Expand All @@ -412,15 +381,6 @@ def test_fixes_applied_decadal_EC_Earth_url_fix(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -475,6 +435,7 @@ def test_fixes_applied_decadal_MPI_M_mon(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
)
Expand All @@ -496,15 +457,6 @@ def test_fixes_applied_decadal_MPI_M_mon(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -559,6 +511,7 @@ def test_fixes_applied_decadal_MPI_M_day(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
)
Expand All @@ -580,15 +533,6 @@ def test_fixes_applied_decadal_MPI_M_day(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -643,8 +587,9 @@ def test_fixes_applied_decadal_CMCC_mon(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
cftime.DatetimeNoLeap(1960, 11, 1, 0, 0, 0, 0), dtype=object
)

# check AddCoordFix is applied
Expand All @@ -664,15 +609,6 @@ def test_fixes_applied_decadal_CMCC_mon(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down Expand Up @@ -728,6 +664,7 @@ def test_fixes_applied_decadal_CMCC_day(tmpdir, load_esgf_test_data):
# check AddScalarCoord Fix is applied
assert "reftime" in ds.coords
assert ds.reftime.dims == ()
assert ds.reftime.encoding["calendar"] == ds.time.values[0].calendar
assert ds.reftime.values == np.array(
cftime.DatetimeGregorian(1960, 11, 1, 0, 0, 0, 0), dtype=object
)
Expand All @@ -749,15 +686,6 @@ def test_fixes_applied_decadal_CMCC_day(tmpdir, load_esgf_test_data):
== "For more information on the ripf, refer to the variant_label, initialization_description, physics_description and forcing_description global attributes"
)

# check RemoveFillValuesFix is applied
# assert no fill value for coordinate variables
assert ds.lon.encoding.get("_FillValue") is None
assert ds.lat.encoding.get("_FillValue") is None
assert ds.time.encoding.get("_FillValue") is None

# compare to e.g. lon_bnds where the fill value is added by xarray but we haven't removed it
assert math.isnan(ds.lon_bnds.encoding.get("_FillValue"))

# check coordinate attribute removed from realization and bounds variables
assert ds.realization.encoding.get("coordinates") is None
assert ds.lon_bnds.encoding.get("coordinates") is None
Expand Down