Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix obs climatology #1135

Draft
wants to merge 5 commits into
base: main-dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 71 additions & 25 deletions pyaerocom/colocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from pyaerocom.time_resampler import TimeResampler
from pyaerocom.tstype import TsType
from pyaerocom.variable import Variable
from pyaerocom.stationdata import StationData

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -393,13 +394,22 @@ def check_ts_type(data, ts_type):


def _colocate_site_data_helper(
stat_data, stat_data_ref, var, var_ref, ts_type, resample_how, min_num_obs, use_climatology_ref
stat_data: StationData,
stat_data_ref: StationData,
var: str,
var_ref: str,
ts_type: str,
resample_how: dict or str,
min_num_obs: int,
use_climatology_kwargs: dict,
):
"""
Helper method that colocates two timeseries from 2 StationData objects

Used in main loop of :func:`colocate_gridded_ungridded`



Parameters
----------
stat_data : StationData
Expand All @@ -419,8 +429,13 @@ def _colocate_site_data_helper(
to aggregate from hourly to daily, rather than the mean.
min_num_obs : int or dict, optional
minimum number of observations for resampling of time
use_climatology_ref : bool
if True, climatological timeseries are used from observations
use_climatology_kwargs : dict
if provided, climatology is calculated from obs data following the
kwargs provided in this dict. e.g {'resample_how': 'mean',
'set_year': 2000, 'min_num_obs': 10,
'clim_mincount': 10,
'clim_freq': 'monthly'}
climatology is only calculed use_climatology_kwargs is not None

Raises
------
Expand All @@ -440,8 +455,11 @@ def _colocate_site_data_helper(
var, ts_type=ts_type, how=resample_how, min_num_obs=min_num_obs, inplace=True
)[var]

if use_climatology_ref:
obs_ts = stat_data_ref.calc_climatology(var_ref, min_num_obs=min_num_obs)[var_ref]
if isinstance(use_climatology_kwargs, dict):
clim_min_obs = use_climatology_kwargs.get('min_num_obs', None)
if clim_min_obs is None:
use_climatology_kwargs['min_num_obs'] = min_num_obs
obs_ts = stat_data_ref.calc_climatology(var_ref, **use_climatology_kwargs)[var_ref]
else:
obs_ts = stat_data_ref.resample_time(
var_ref, ts_type=ts_type, how=resample_how, min_num_obs=min_num_obs, inplace=True
Expand All @@ -452,7 +470,14 @@ def _colocate_site_data_helper(


def _colocate_site_data_helper_timecol(
stat_data, stat_data_ref, var, var_ref, ts_type, resample_how, min_num_obs, use_climatology_ref
stat_data: StationData,
stat_data_ref: StationData,
var: str,
var_ref: str,
ts_type: str,
resample_how: dict or str,
min_num_obs: int,
use_climatology_kwargs: dict
):
"""
Helper method that colocates two timeseries from 2 StationData objects
Expand Down Expand Up @@ -482,27 +507,31 @@ def _colocate_site_data_helper_timecol(
to aggregate from hourly to daily, rather than the mean.
min_num_obs : int or dict, optional
minimum number of observations for resampling of time
use_climatology_ref : bool
if True, NotImplementedError is raised
use_climatology_kwargs: dict
if provided, NotImplementedError is raised

Raises
------
TemporalResolutionError
if model or obs sampling frequency is lower than desired output frequency
NotImplementedError
if input arg `use_climatology_ref` is True.
if input arg `use_climatology_kwargs` is provided.

Returns
-------
pandas.DataFrame
dataframe containing the colocated input data (column names are
data and ref)
"""
if use_climatology_ref:
raise NotImplementedError(
print("use_climatology_kwargs", use_climatology_kwargs)
if use_climatology_kwargs is not None:
if use_climatology_kwargs == False:
pass
else:
raise NotImplementedError(
"Using observation climatology in colocation with option "
"colocate_time=True is not available yet ..."
)
"use_climatology_kwargs=True is not available yet ..."
)

grid_tst = stat_data.get_var_ts_type(var)
obs_tst = stat_data_ref.get_var_ts_type(var_ref)
Expand Down Expand Up @@ -587,7 +616,7 @@ def colocate_gridded_ungridded(
update_baseyear_gridded=None,
min_num_obs=None,
colocate_time=False,
use_climatology_ref=False,
use_climatology_kwargs=None,
resample_how=None,
**kwargs,
):
Expand Down Expand Up @@ -649,8 +678,13 @@ def colocate_gridded_ungridded(
if True and if original time resolution of data is higher than desired
time resolution (`ts_type`), then both datasets are colocated in time
*before* resampling to lower resolution.
use_climatology_ref : bool
if True, climatological timeseries are used from observations
use_climatology_kwargs : dict or str, optional. If str = 'default' then default values
are used.
Climatology is calculated from obs data following the
kwargs provided e.g like this {'resample_how': 'mean',
'set_year': 2000, 'min_num_obs': 10,
'clim_mincount': 10,
'clim_freq': 'monthly'}
resample_how : str or dict
string specifying how data should be aggregated when resampling in time.
Default is "mean". Can also be a nested dictionary, e.g.
Expand Down Expand Up @@ -731,11 +765,24 @@ def colocate_gridded_ungridded(
if not colocate_time and ts_type < ts_type_data:
data = data.resample_time(str(ts_type), min_num_obs=min_num_obs, how=resample_how)
ts_type_data = ts_type

if use_climatology_ref:
col_freq = "monthly"
obs_start = const.CLIM_START
obs_stop = const.CLIM_STOP
if use_climatology_kwargs == "default" or use_climatology_kwargs == True:
use_climatology_kwargs = dict()
elif use_climatology_kwargs == False:
use_climatology_kwargs = None
if use_climatology_kwargs is not None:
if use_climatology_kwargs.get("clim_freq") is None:
use_climatology_kwargs["clim_freq"] = const.CLIM_FREQ
if use_climatology_kwargs.get("start") is None:
use_climatology_kwargs["start"] = const.CLIM_START
if use_climatology_kwargs.get("stop") is None:
use_climatology_kwargs["stop"] = const.CLIM_STOP
obs_start = const.CLIM_START
if use_climatology_kwargs.get('resample_how') is None:
use_climatology_kwargs['resample_how'] = const.CLIM_RESAMPLE_HOW
if use_climatology_kwargs.get('clim_mincount') is None:
use_climatology_kwargs['clim_mincount'] = const.CLIM_MIN_COUNT

obs_start, obs_stop, col_freq = use_climatology_kwargs["start"], use_climatology_kwargs["stop"], use_climatology_kwargs["clim_freq"]
else:
col_freq = str(ts_type)
obs_start = start
Expand All @@ -752,7 +799,6 @@ def colocate_gridded_ungridded(

# filter_by_meta wipes is_vertical_profile
data_ref = data_ref.filter_by_meta(latitude=lat_range, longitude=lon_range)

# get timeseries from all stations in provided time resolution
# (time resampling is done below in main loop)
all_stats = data_ref.to_station_data_all(
Expand Down Expand Up @@ -853,7 +899,7 @@ def colocate_gridded_ungridded(
ts_type=col_freq,
resample_how=resample_how,
min_num_obs=min_num_obs,
use_climatology_ref=use_climatology_ref,
use_climatology_kwargs=use_climatology_kwargs,
)
else:
_df = _colocate_site_data_helper(
Expand All @@ -864,7 +910,7 @@ def colocate_gridded_ungridded(
ts_type=col_freq,
resample_how=resample_how,
min_num_obs=min_num_obs,
use_climatology_ref=use_climatology_ref,
use_climatology_kwargs=use_climatology_kwargs,
)

# this try/except block was introduced on 23/2/2021 as temporary fix from
Expand Down Expand Up @@ -918,7 +964,7 @@ def colocate_gridded_ungridded(
"from_files": files,
"from_files_ref": None,
"colocate_time": colocate_time,
"obs_is_clim": use_climatology_ref,
"obs_is_clim": use_climatology_kwargs is not None,
"pyaerocom": pya_ver,
"min_num_obs": min_num_obs,
"resample_how": resample_how,
Expand Down
14 changes: 7 additions & 7 deletions pyaerocom/colocation_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def _colocate_vertical_profile_gridded(
var_ref=None,
min_num_obs=None,
colocate_time=False,
use_climatology_ref=False,
use_climatology_kwargs=None,
resample_how=None,
layer_limits: dict[str, dict[str, float]] = None,
obs_stat_data=None,
Expand Down Expand Up @@ -197,7 +197,7 @@ def _colocate_vertical_profile_gridded(
ts_type=col_freq,
resample_how=resample_how,
min_num_obs=min_num_obs,
use_climatology_ref=use_climatology_ref,
use_climatology_kwargs=use_climatology_kwargs,
)
else:
_df = _colocate_site_data_helper(
Expand All @@ -208,7 +208,7 @@ def _colocate_vertical_profile_gridded(
ts_type=col_freq,
resample_how=resample_how,
min_num_obs=min_num_obs,
use_climatology_ref=use_climatology_ref,
use_climatology_kwargs=use_climatology_kwargs,
)

try:
Expand Down Expand Up @@ -258,7 +258,7 @@ def _colocate_vertical_profile_gridded(
"from_files": files,
"from_files_ref": None,
"colocate_time": colocate_time,
"obs_is_clim": use_climatology_ref,
"obs_is_clim": use_climatology_kwargs,
"pyaerocom": pya_ver,
"min_num_obs": min_num_obs,
"resample_how": resample_how,
Expand Down Expand Up @@ -308,7 +308,7 @@ def colocate_vertical_profile_gridded(
update_baseyear_gridded: int = None,
min_num_obs: int | dict | None = None,
colocate_time: bool = False,
use_climatology_ref: bool = False,
use_climatology_kwargs: bool = False,
resample_how: str | dict = None,
colocation_layer_limits: list[dict] = None,
profile_layer_limits: list[dict] = None,
Expand Down Expand Up @@ -409,7 +409,7 @@ def colocate_vertical_profile_gridded(
data = data.resample_time(str(ts_type), min_num_obs=min_num_obs, how=resample_how)
ts_type_data = ts_type

if use_climatology_ref: # pragma: no cover
if use_climatology_kwargs: # pragma: no cover
col_freq = "monthly"
obs_start = const.CLIM_START
obs_stop = const.CLIM_STOP
Expand Down Expand Up @@ -464,7 +464,7 @@ def colocate_vertical_profile_gridded(
var_ref=var_ref,
min_num_obs=min_num_obs,
colocate_time=colocate_time,
use_climatology_ref=use_climatology_ref,
use_climatology_kwargs=use_climatology_kwargs,
resample_how=resample_how,
layer_limits=layer_limits,
obs_stat_data=all_stats["stats"],
Expand Down
11 changes: 6 additions & 5 deletions pyaerocom/colocation_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,10 @@ class ColocationSetup(BrowseDict):
obs_data_dir : str, optional
location of obs data. If None, attempt to infer obs location based on
obs ID.
obs_use_climatology : bool
BETA if True, pyaerocom default climatology is computed from observation
stations (so far only possible for unrgidded / gridded colocation).
obs_use_climatology : dict
dictionary with keyword arguments that are passed used when caluclating
climatology for obs data. Climatology is only calculated if obs_use_climatology
is provided.
obs_vert_type : str
AeroCom vertical code encoded in the model filenames (only AeroCom 3
and later). Specifies which model file should be read in case there are
Expand Down Expand Up @@ -354,7 +355,7 @@ def __init__(
self.obs_name = None
self.obs_data_dir = None

self.obs_use_climatology = False
self.obs_use_climatology = None

self._obs_cache_only = False # only relevant if obs is ungridded
self.obs_vert_type = None
Expand Down Expand Up @@ -1487,7 +1488,7 @@ def _prepare_colocation_args(self, model_var: str, obs_var: str):
args.update(
ts_type=ts_type,
var_ref=obs_var,
use_climatology_ref=self.obs_use_climatology,
use_climatology_kwargs=self.obs_use_climatology,
)
else:
ts_type = self._get_colocation_ts_type(model_data.ts_type, obs_data.ts_type)
Expand Down
2 changes: 1 addition & 1 deletion pyaerocom/combine_vardata_ungridded.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _combine_2_sites(
to_ts_type,
resample_how=resample_how,
min_num_obs=min_num_obs,
use_climatology_ref=False,
use_climatology_kwargs=None,
)

# remove timestamps where both observations are NaN
Expand Down
8 changes: 4 additions & 4 deletions tests/test_colocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def test__regrid_gridded(data_tm5):


@pytest.mark.parametrize(
"stat_data,stat_data_ref,var,var_ref,ts_type,resample_how,min_num_obs, use_climatology_ref,num_valid",
"stat_data,stat_data_ref,var,var_ref,ts_type,resample_how,min_num_obs, use_climatology_kwargs,num_valid",
[
(S4, S3, "concpm10", "concpm10", "monthly", "mean", {"monthly": {"daily": 25}}, False, 10),
(S3, S4, "concpm10", "concpm10", "monthly", "mean", {"monthly": {"daily": 25}}, False, 24),
Expand All @@ -91,7 +91,7 @@ def test__colocate_site_data_helper_timecol(
ts_type,
resample_how,
min_num_obs,
use_climatology_ref,
use_climatology_kwargs,
num_valid,
):
result = _colocate_site_data_helper_timecol(
Expand All @@ -102,7 +102,7 @@ def test__colocate_site_data_helper_timecol(
ts_type,
resample_how,
min_num_obs,
use_climatology_ref,
use_climatology_kwargs,
)

assert isinstance(result, pd.DataFrame)
Expand Down Expand Up @@ -162,7 +162,7 @@ def test_colocate_gridded_ungridded_new_var(data_tm5, aeronetsunv3lev2_subset):
(
dict(
filter_name=f"{ALL_REGION_NAME}-noMOUNTAINS",
use_climatology_ref=True,
use_climatology_kwargs={'clim_freq':'monthly'},
min_num_obs=const.OBS_MIN_NUM_RESAMPLE,
),
"monthly",
Expand Down
6 changes: 3 additions & 3 deletions tests/test_colocation_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def example_earlinet_ungriddeddata():


@pytest.mark.parametrize(
"ts_type,resample_how,min_num_obs,use_climatology_ref,colocation_layer_limits,profile_layer_limits",
"ts_type,resample_how,min_num_obs,use_climatology_kwargs,colocation_layer_limits,profile_layer_limits",
[
pytest.param(
"daily",
Expand All @@ -83,7 +83,7 @@ def test_colocate_vertical_profile_gridded(
ts_type,
resample_how,
min_num_obs,
use_climatology_ref,
use_climatology_kwargs,
colocation_layer_limits,
profile_layer_limits,
):
Expand All @@ -93,7 +93,7 @@ def test_colocate_vertical_profile_gridded(
ts_type=ts_type,
resample_how=resample_how,
min_num_obs=min_num_obs,
use_climatology_ref=use_climatology_ref,
use_climatology_kwargs=use_climatology_kwargs,
colocation_layer_limits=colocation_layer_limits,
profile_layer_limits=profile_layer_limits,
)
Expand Down
Loading
Loading