From d8f4f9b00d6933ee745000f31422ed0004409c3d Mon Sep 17 00:00:00 2001 From: Fabien Collas Date: Thu, 2 Nov 2023 16:20:53 +0100 Subject: [PATCH 1/3] Added compatibility for get_cmems import function --- wavy/insitu_readers.py | 62 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/wavy/insitu_readers.py b/wavy/insitu_readers.py index 11ecc53d..b49e5c42 100755 --- a/wavy/insitu_readers.py +++ b/wavy/insitu_readers.py @@ -18,7 +18,7 @@ import dotenv import xarray as xr import logging -#logging.basicConfig(level=logging.DEBUG) +# logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=30) logger = logging.getLogger(__name__) @@ -44,10 +44,12 @@ variables_frost = load_or_default('variables_frost.yaml') # ---------------------------------------------------------------------# + def get_typeid(insitu_dict: dict, s: str) -> str: typeid = insitu_dict[s].get('typeids', 22) return typeid + def make_frost_reference_time_period(sdate, edate): sdate = parse_date(sdate) edate = parse_date(edate) @@ -56,9 +58,10 @@ def make_frost_reference_time_period(sdate, edate): edate.strftime(formatstr)) return refstr + def call_frost_api( - sdate: datetime, edate: datetime, - nID: str, varstr: str, sensor: str) -> 'requests.models.Response': + sdate: datetime, edate: datetime, + nID: str, varstr: str, sensor: str) -> 'requests.models.Response': """ make frost api call """ @@ -82,6 +85,7 @@ def call_frost_api( else: return r + def call_frost_api_v1( nID: str, varstr: str, frost_reference_time: str, client_id: str, sensor: str)\ @@ -104,6 +108,7 @@ def call_frost_api_v1( print('parameters forst api call: ', parameters) return requests.get(endpoint, parameters, auth=(client_id, client_id)) + def find_preferred(idx, sensors, refs, pref): sensorsU = np.unique(sensors) preferred_idx = [] @@ -118,6 +123,7 @@ def find_preferred(idx, sensors, refs, pref): preferred_idx.append(list(idx_1)[0]) return preferred_idx + def get_frost_df_v1(r: 'requests.models.Response')\ -> 'pandas.core.frame.DataFrame': """ @@ -223,6 +229,7 @@ def get_frost(**kwargs): ds = build_xr_ds(var_tuple, varnames) return ds + def get_nc_thredds(**kwargs): sd = kwargs.get('sd') ed = kwargs.get('ed') @@ -248,6 +255,7 @@ def get_nc_thredds(**kwargs): var_sliced = ds_sliced[[ncvar, lonstr, latstr]] return var_sliced + def get_nc_thredds_static_coords(**kwargs): sd = kwargs.get('sd') ed = kwargs.get('ed') @@ -294,6 +302,7 @@ def get_nc_thredds_static_coords(**kwargs): return ds_combined + def get_nc_thredds_static_coords_single_file(**kwargs): sd = kwargs.get('sd') ed = kwargs.get('ed') @@ -340,6 +349,7 @@ def get_nc_thredds_static_coords_single_file(**kwargs): return ds_combined + def get_cmems(**kwargs): sd = kwargs.get('sd') ed = kwargs.get('ed') @@ -376,11 +386,12 @@ def get_cmems(**kwargs): # builds the dictionary given as an argument to dict_var = {coord: ds.coords[coord].values - for coord in list(ds.coords)} + for coord in list(ds.coords) if coord + in [lonstr, latstr, timestr]} - dict_var.update({var: ds[[var]] - .isel({fixed_dim_str: fixed_dim_idx}) - .to_array().values[0] for var in list(ds.data_vars)}) + dict_var.update({var: build_ts_from_dim_lvls(ds, + fixed_dim_str, var) + for var in list(ds.data_vars)}) # build an xr.dataset with timestr as the only coordinate # using build_xr_ds function @@ -388,7 +399,7 @@ def get_cmems(**kwargs): except Exception as e: logger.exception(e) - + ds_combined = xr.concat(ds_list, timestr, coords='minimal', data_vars='minimal', @@ -401,7 +412,41 @@ def get_cmems(**kwargs): return ds_sliced + +def build_ts_from_dim_lvls(ds, fixed_dim_str, var): + + lvl_nb = len(ds[fixed_dim_str].data) + + if lvl_nb==1: + ts = list(ds.isel({fixed_dim_str: 0})[var].data) + + elif lvl_nb > 1: + + lvl_not_nan = [] + for i in range(lvl_nb): + + if not np.isnan(ds.isel({fixed_dim_str: i})[var].data).all(): + lvl_not_nan.append(i) + + if len(lvl_not_nan)==1: + ts= list(ds.isel({fixed_dim_str: lvl_not_nan[0]})[var].data) + + else: + + ts = ds.isel({fixed_dim_str: 0})[var].data + dict_not_nan = {} + for i in range(1, lvl_nb): + + nan_val_tmp = np.isnan(ds.isel({fixed_dim_str: i})[var].data) + not_nan_idx = [j for j in range(len(nan_val_tmp)) + if not nan_val_tmp[j]] + ts[not_nan_idx] = ds.isel({fixed_dim_str: i})[var].data[not_nan_idx] + + return np.array(ts, dtype='f') + + def build_xr_ds_cmems(dict_var, var_name_ref): + ds = xr.Dataset({ var_name: xr.DataArray( data=dict_var[var_name], @@ -409,4 +454,5 @@ def build_xr_ds_cmems(dict_var, var_name_ref): coords={var_name_ref: dict_var[var_name_ref]} ) for var_name in dict_var.keys()}, attrs={'title': 'wavy dataset'}) + return ds From 2354d581966de0d5d55f2734f42b722b14c6323f Mon Sep 17 00:00:00 2001 From: Fabien Collas Date: Fri, 3 Nov 2023 15:28:42 +0100 Subject: [PATCH 2/3] Changed get_remote_files_cmems so it is also compatible with use of twinID --- wavy/insitu_collectors.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wavy/insitu_collectors.py b/wavy/insitu_collectors.py index dd1b65ab..2f98bf88 100755 --- a/wavy/insitu_collectors.py +++ b/wavy/insitu_collectors.py @@ -87,7 +87,8 @@ def get_remote_files_cmems(**kwargs): # check if search str template file_search_template = cfg.download['ftp']\ - .get('search_str', '%Y%m%dT') + .get('search_str', '%Y%m%dT').replace('name', + name) # credentials server = insitu_dict[product]['download']['ftp']['server'] @@ -137,6 +138,7 @@ def get_remote_files_cmems(**kwargs): matchingtmp = [s for s in content if tmpdate_new.strftime(file_search_template) in s] + tmplst = tmplst + matchingtmp tmpdate_new = tmpdate_new + timedelta(minutes=twin) matching = np.unique(tmplst) From 7b244aa982237b4dbfaf78532dcb47b0a234d808 Mon Sep 17 00:00:00 2001 From: Fabien Collas Date: Mon, 6 Nov 2023 10:19:30 +0100 Subject: [PATCH 3/3] Changed function name and added explanation for rebuild_split_variable --- wavy/insitu_readers.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/wavy/insitu_readers.py b/wavy/insitu_readers.py index b49e5c42..22c28b65 100755 --- a/wavy/insitu_readers.py +++ b/wavy/insitu_readers.py @@ -389,7 +389,7 @@ def get_cmems(**kwargs): for coord in list(ds.coords) if coord in [lonstr, latstr, timestr]} - dict_var.update({var: build_ts_from_dim_lvls(ds, + dict_var.update({var: rebuild_split_variable(ds, fixed_dim_str, var) for var in list(ds.data_vars)}) @@ -413,8 +413,21 @@ def get_cmems(**kwargs): return ds_sliced -def build_ts_from_dim_lvls(ds, fixed_dim_str, var): - +def rebuild_split_variable(ds, fixed_dim_str, var): + ''' + Gather values of a given variable, for which + values are split between several levels of + a given dimension of a dataset. + + Args: + ds (xarray dataset): dataset + fixed_dim_str (string): name of the dimension + var (string): name of the variable + + Returns: + 1D numpy array, returns the complete variable + serie of values on a single dimension + ''' lvl_nb = len(ds[fixed_dim_str].data) if lvl_nb==1: