From 3eed56a56fd9045cc136ef3addf5737e33049591 Mon Sep 17 00:00:00 2001 From: Fabien Collas Date: Wed, 22 Nov 2023 15:18:11 +0100 Subject: [PATCH] Added filter_runmean function and test --- tests/test_filtermod.py | 73 +++++++++++++++++------------------------ wavy/filtermod.py | 44 +++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 46 deletions(-) diff --git a/tests/test_filtermod.py b/tests/test_filtermod.py index d9dc14cb..3a323ae3 100644 --- a/tests/test_filtermod.py +++ b/tests/test_filtermod.py @@ -1,43 +1,30 @@ -#import pytest -#from datetime import datetime, timedelta -#import yaml -#import numpy as np -#import os -#from copy import deepcopy -# -#from wavy.insitumod import insitu_class as ic -#from wavy.filtermod import apply_land_mask -# -#sd = "2021-8-2 01" -#ed = "2021-8-3 12" -# -##ico = ic(nID,sensor,sd,ed) -##test_dict = deepcopy(ico.vars()) -# -##@pytest.fixture -##def test_data(): -## return os.path.abspath(os.path.join(\ -## os.path.dirname( __file__ ),'data')) -# -#def test_landmask(): -# vardict = { 'latitude':[60.12,62.24, 64.08,65.08, 67.65,68.95], -# 'longitude':[-23.47,-21.54, -19.32,-17.8, -13.97,-10.99]} -# d,m = apply_land_mask(vardict) -# assert len(m[m==False]) == int(2) -# -##def test_cleaners(): -## nID = 'D_Breisundet_wave' -## sensor = 'wavescan' -## ico = ic(nID,sensor,sd,ed,priorOp='square',cleaner='linearGAM',postOp='root',date_incr=1,filterData=True) -## assert len(vars(ico).keys()) == 16 -## assert 'filter' in vars(ico).keys() -## assert 'filterSpecs' in vars(ico).keys() -# -#def test_smoothers(): -# nID = 'D_Breisundet_wave' -# sensor = 'wavescan' -# ico = ic(nID,sd,ed,smoother='blockMean',date_incr=1,filterData=True,sensor=sensor) -# assert len(vars(ico).keys()) == 16 -# assert 'filter' in vars(ico).keys() -# assert 'filterSpecs' in vars(ico).keys() -# +import pytest +from datetime import datetime, timedelta +import yaml +import numpy as np +import os +from copy import deepcopy +from wavy.insitu_module import insitu_class as ic + + +def test_filter_runmean(test_data): + varalias = 'Hs' # default + sd = "2023-8-20 00" + ed = "2023-8-21 00" + nID = 'MO_Draugen_daily' + name = 'Draugen' + ico = ic(nID=nID, sd=sd, ed=ed, varalias=varalias, name=name) + print(ico) + print(vars(ico).keys()) + + ico = ico.populate(path=str(test_data/"insitu/daily/Draugen")) + new = ico.filter_runmean(window=3, + chunk_min=3, + sampling_rate_Hz=1/600) + print(new.vars.time) + print(new.vars.Hs) + assert len(new.vars.time) == 6 + assert not all(np.isnan(v) for v in ico.vars['Hs']) + print(ico.vars.Hs[1:4]) + print(np.mean(ico.vars.Hs[1:4])) + assert new.vars.Hs[2] == np.mean(ico.vars.Hs[1:4]) diff --git a/wavy/filtermod.py b/wavy/filtermod.py index e6150f70..4a50f35a 100644 --- a/wavy/filtermod.py +++ b/wavy/filtermod.py @@ -105,7 +105,7 @@ def filter_distance_to_coast(self, llim=0, ulim=100000000, **kwargs): coast_sdef, points_sdef, 10000000, neighbours=1) # get rid of infs mask = np.where((distance_array > llim) & (distance_array < ulim))[0] - #new.dist_to_coast = distance_array[mask] + # new.dist_to_coast = distance_array[mask] # impose on dataset ds = new.vars.isel(time=mask) # add to dataset @@ -119,6 +119,7 @@ def filter_distance_to_coast(self, llim=0, ulim=100000000, **kwargs): return new def filter_blockMean(self, **kwargs): + print('Apply blockMean') return self def filter_lanczos(self, **kwargs): @@ -161,6 +162,43 @@ def filter_lanczos(self, **kwargs): new.vars[new.varalias].values = flatten(ts_lst) return new + def filter_runmean(self, **kwargs): + print('Apply running mean filter') + from wavy.utils import runmean + new = deepcopy(self) + + # apply slider if needed + win = kwargs.get('slider', len(new.vars.time)) + ol = kwargs.get('overlap', 0) + indices = new.slider_chunks(slider=win, overlap=ol) + + ts_lst = [] + tgc_idx_lst = [] + for i, j in indices: + tmp_idx = range(i, j) + # create tmp dataset reduced to i:j + tmp_ds = new.vars.isel(time=tmp_idx) + # apply gap chunks if needed + pdtimes = tmp_ds.time.to_pandas() + tgc_indices = new.time_gap_chunks(pdtimes, **kwargs) + for k, l in tgc_indices: + tmp_tgc_idx = range(k, l+1) + # apply min chunk size + if len(tmp_tgc_idx) > kwargs.get("chunk_min", 5): + y = tmp_ds[new.varalias].values[tmp_tgc_idx] + window = kwargs.get('window') + ts, _ = runmean(y, window, + mode='centered') + ts_lst.append(ts) + tgc_idx_lst.append(np.array(tmp_idx)[tmp_tgc_idx]) + else: + print("Chunk size to small -> not filtered and rejected") + pass + + new.vars = new.vars.isel(time=flatten(tgc_idx_lst)) + new.vars[new.varalias].values = flatten(ts_lst) + return new + def filter_GP(self, **kwargs): print('Apply GPR filter') new = deepcopy(self) @@ -210,7 +248,7 @@ def filter_linearGAM(self, **kwargs): ol = kwargs.get('overlap', 0) indices = new.slider_chunks(slider=win, overlap=ol) - ts_lst = [] + ts_lst = [] tgc_idx_lst = [] for i, j in indices: tmp_idx = range(i, j+1) @@ -263,7 +301,7 @@ def despike_blockStd(self, **kwargs): tgc_idx_lst = [] for i, j in indices: - tmp_idx = range(i, j+1) + tmp_idx = range(i, j) print('tmp_idx', tmp_idx) # create tmp dataset reduced to i:j tmp_ds = new.vars.isel(time=tmp_idx)