Skip to content

Commit

Permalink
CDAT Migration: Refactor annual_cycle_zonal_mean set (#798)
Browse files Browse the repository at this point in the history
* Refactor `annual_cycle_zonal_mean` set

* Address PR review comments

* Add lat lon regression testing

* Add debugging scripts

* Update `_open_climo_dataset()` to decode times as workaround to misaligned time coords
- Update `annual_cycle_zonal_mean_plot.py` to convert time coordinates to month integers

* Fix unit tests

* Remove old plotter

* Add script to debug decode_times=True and ncclimo file

* Update plotter time values to month integers

* Fix slow `.load()` and multiprocessing issue
- Due to incorrectly updating `keep_bnds` logic
- Add `_encode_time_coords()` to workaround cftime issue `ValueError: "months since" units only allowed for "360_day" calendar`

* Update `_encode_time_coords()` docstring

* Add AODVIS debug script

* update AODVIS obs datasets; regression test results

---------

Co-authored-by: Tom Vo <[email protected]>
  • Loading branch information
chengzhuzhang and tomvothecoder committed Oct 29, 2024
1 parent eeba1dc commit 7910a39
Show file tree
Hide file tree
Showing 20 changed files with 5,715 additions and 410 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
[#]
sets = ["annual_cycle_zonal_mean"]
case_id = "AOD_550"
variables = ["AODVIS"]
ref_name = "AOD_550"
reference_name = "AERONET-composite AOD"
test_colormap = "Oranges"
reference_colormap = "Oranges"
diff_colormap = "BrBG_r"
contour_levels = [0., 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2]
diff_levels = [-0.5, -0.4, -0.3, -0.2, -0.1, -0.05, -0.02, 0.02, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5]

# [#]
# sets = ["annual_cycle_zonal_mean"]
# case_id = "OMI-MLS"
# variables = ["SCO"]
# ref_name = "OMI-MLS"
# reference_name = "OMI-MLS"
# test_colormap = "WhiteBlueGreenYellowRed.rgb"
# reference_colormap = "WhiteBlueGreenYellowRed.rgb"
# diff_colormap = "diverging_bwr.rgb"
# contour_levels = [180,200,220,240,260,280,300,320,340,360,380]
# diff_levels = [ -40, -30, -20, -15, -10, -5,-2,2, 5, 10, 15, 20, 30,40]

# [#]
# sets = ["annual_cycle_zonal_mean"]
# case_id = "OMI-MLS"
# variables = ["TCO"]
# ref_name = "OMI-MLS"
# reference_name = "OMI-MLS"
# test_colormap = "WhiteBlueGreenYellowRed.rgb"
# reference_colormap = "WhiteBlueGreenYellowRed.rgb"
# diff_colormap = "diverging_bwr.rgb"
# contour_levels = [12,16,20,24,28,32,36,40,44]
# diff_levels = [-20,-15,-10,-5,-2,2,5,10,15,20]

# [#]
# sets = ["annual_cycle_zonal_mean"]
# case_id = "SST_CL_HadISST"
# variables = ["SST"]
# ref_name = "HadISST_CL"
# reference_name = "HadISST (Climatology)"
# contour_levels = [-1, 0, 1, 3, 6, 9, 12, 15, 18, 20, 22, 24, 26, 28, 29]
# diff_levels = [-5, -4, -3, -2, -1, -0.5, -0.2, 0.2, 0.5, 1, 2, 3, 4, 5]


# [#]
# sets = ["annual_cycle_zonal_mean"]
# case_id = "GPCP_v3.2"
# variables = ["PRECT"]
# ref_name = "GPCP_v3.2"
# reference_name = "GPCP_v3.2"
# test_colormap = "WhiteBlueGreenYellowRed.rgb"
# reference_colormap = "WhiteBlueGreenYellowRed.rgb"
# diff_colormap = "BrBG"
# contour_levels = [0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16]
# diff_levels = [-5, -4, -3, -2, -1, -0.5, 0.5, 1, 2, 3, 4, 5]

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# %%
from auxiliary_tools.cdat_regression_testing.base_run_script import run_set

SET_NAME = "annual_cycle_zonal_mean"
SET_DIR = (
"/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/669-annual_cycle_zonal_mean"
)
# CFG_PATH = "auxiliary_tools/cdat_regression_testing/669-annual_cycle_zonal_mean/669-annual_cycle_zonal_mean.cfg"
CFG_PATH = "e3sm_diags/driver/default_diags/annual_cycle_zonal_mean_model_vs_obs.cfg"
MULTIPROCESSING = True

run_set(SET_NAME, SET_DIR, CFG_PATH, MULTIPROCESSING)

# %%
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import numpy as np
import xcdat as xc

dev_path = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/669-annual_cycle_zonal_mean-debug/annual_cycle_zonal_mean/AOD_550/AOD_550-AODVIS-ANNUALCYCLE-global_ref.nc"
main_path = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/main/annual_cycle_zonal_mean/AOD_550/AOD_550-AODVIS-Annual-Cycle_test.nc"


var_a = xc.open_dataset(dev_path)["AODVIS"]
var_b = xc.open_dataset(main_path)["AODVIS"]

"""
Floating point comparison
AssertionError:
Not equal to tolerance rtol=1e-07, atol=0
Mismatched elements: 1808 / 2160 (83.7%)
Max absolute difference: 0.12250582
Max relative difference: 91.14554689
x: array([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],...
y: array([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],...
"""
np.testing.assert_allclose(var_a, var_b)

# Get the max of all values
# -------------------------
# 0.28664299845695496
print(var_a.max().item())
# 0.2866430557436412
print(var_b.max().item())

# Get the min of all values
# -------------------------
# 0.0
print(var_a.min().item())
# 0.0
print(var_b.min().item())

# Get the sum of all values
# -------------------------
# 224.2569122314453
print(var_a.sum().item())
# 224.25691348856003
print(var_b.sum().item())

# Get the mean of all values
# -------------------------
# 0.10382264107465744
print(var_a.mean().item())
# 0.1038226451335926
print(var_b.mean().item())


# %%
# Get the max absolute diff
# -------------------------
# 0.12250582128763199
print((var_a - var_b).max().item())
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# %%
from glob import glob

import cftime
import xcdat as xc

args = {
"add_bounds": ["X", "Y"],
"coords": "minimal",
"compat": "override",
"chunks": "auto",
}

filepath = "/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/climatology/MERRA2_Aerosols/MERRA2_Aerosols_[0-1][0-9]_*climo.nc"
paths = sorted(glob(filepath))

# Check time coordinates are in ascending order with wildcard and sorted glob
ds_mf = xc.open_mfdataset(filepath, decode_times=True, **args)
ds_mf_raw = xc.open_mfdataset(paths, **args, decode_times=True)

# %%

# filepath 1: '/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/climatology/MERRA2_Aerosols/MERRA2_Aerosols_01_198001_202101_climo.nc'
ds_raw_time = xc.open_mfdataset(paths[0], **args, decode_times=False)

# 10782720
time_int = ds_raw_time.time.values.item()
# 'minutes since 1980-01-01 00:30:00'
units = ds_raw_time.time.units
# None, so "standard"
calendar = ds_raw_time.time.attrs.get("calendar", "standard")

# cftime.DatetimeGregorian(2000, 7, 2, 0, 30, 0, 0, has_year_zero=False)
cftime.num2date(time_int, units, calendar=calendar)

# %%
import datetime

first_step = datetime.datetime(1980, 1, 1, hour=0, minute=30)
time_delta = datetime.timedelta(minutes=10782720)

# datetime.datetime(2000, 7, 2, 0, 30)
print(first_step + time_delta)
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# %%
from glob import glob

import xarray as xr
import xcdat as xc

args = {
"decode_times": True,
"add_bounds": ["X", "Y"],
"coords": "minimal",
"compat": "override",
"chunks": "auto",
}

filepath = "/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/climatology/MERRA2_Aerosols/MERRA2_Aerosols_[0-1][0-9]_*climo.nc"
paths = sorted(glob(filepath))

# filepath 1: '/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/climatology/MERRA2_Aerosols/MERRA2_Aerosols_01_198001_202101_climo.nc'
ds_fp1 = xc.open_mfdataset(paths[0], **args)
# filepath 2: '/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/climatology/MERRA2_Aerosols/MERRA2_Aerosols_02_198002_202102_climo.nc'
ds_fp2 = xc.open_mfdataset(paths[1], **args)

# Check the units -- different
# ----------------------------
# 'minutes since 1980-01-01 00:30:00'
ds_fp1.time.attrs["units"]
# 'minutes since 1980-02-01 00:30:00'
ds_fp2.time.attrs["units"]

# Check the time values -- same
# ----------------------------
# 10782720
ds_fp1.time.values[0]
# 10782720
ds_fp2.time.values[0]


# %%
import cftime


def _get_encoded_time(var: xr.DataArray) -> xr.DataArray:
"""Convert `cftime` datetime objects to encoded float objects.
This function is useful for plotters that plot the time axis, which
requires time coordinates to be encoded as floats if arithmetic is
performed.
Parameters
----------
var : xr.DataArray
The variable.
Returns
-------
xr.DataArray
The encoded time coordinates.
"""
time_coords = xc.get_dim_coords(var, axis="T")

for index, time_val in enumerate(time_coords):
new_time_val = cftime.date2num(
time_val.item(),
time_val.encoding["units"],
calendar=time_val.encoding["calendar"],
)
time_coords.values[index] = new_time_val
time_coords[time_coords.name].values[index] = new_time_val

return time_coords


# %%
new_time = _get_encoded_time(ds_fp1)

# %%
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# %%
from glob import glob

import xarray as xr
import xcdat as xc

args = {
"decode_times": False,
"add_bounds": ["X", "Y"],
"coords": "minimal",
"compat": "override",
"chunks": "auto",
}

# %%
filepath = "/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/climatology/AOD_550/AOD_550_[0-1][0-9]_*climo.nc"
filepaths = glob(filepath)
ds = xc.open_mfdataset(filepaths, **args)

# array([ 15.5101, 46.03 , 76.5498, 107.57 , 137.5895, 168.6097,
# 198.6292, 229.6494, 259.6689, 290.6891, 321.7142, 351.2334])
ds.time.values

# %%
ds1 = xc.open_mfdataset(filepaths[0], **args)
ds2 = xc.open_mfdataset(filepaths[1], **args)

# 'days since 2000-03-01'
ds1.time.units
# 'days since 2000-03-01'
ds2.time.units
Loading

0 comments on commit 7910a39

Please sign in to comment.