diff --git a/wwdata/Class_HydroData.py b/wwdata/Class_HydroData.py index 0881772f7..c1e770b72 100644 --- a/wwdata/Class_HydroData.py +++ b/wwdata/Class_HydroData.py @@ -1555,7 +1555,7 @@ def detect_drift(self, data_name, arange, max_slope=None, period=None, plot=Fals Parameters ---------- - data : str + data_name : str name of the column containing the data to detect drift arange : 2-element array of ints the range in which to apply the function @@ -1573,7 +1573,6 @@ def detect_drift(self, data_name, arange, max_slope=None, period=None, plot=Fals !!Doesn't check the last day mentioned in the arange!! """ - from scipy import signal series = self.data[data_name][arange[0]:arange[1]].copy() @@ -1596,70 +1595,119 @@ def detect_drift(self, data_name, arange, max_slope=None, period=None, plot=Fals line_segment = series - detrended_values[:] slope = (int(line_segment[-1]) - int(line_segment[0])) / (arange[1].day - arange[0].day + 1) - if slope > max_slope: + if slope > max_slope or slope < -max_slope: print('Based on the specified maximum slope, a drift was' ' detected with a slope higher than the maximum one. \n' - 'Slope detected: {}, maximum slope: {}'.format(slope, max_slope)) + 'Slope detected: {}, maximum slope:+/- {}'.format(slope, max_slope)) + self.line_segment = line_segment else: + plot = False print('No drift detected.') - # detrend_values=pd.DataFrame(detrended_values, index=series.index) --> dataframe of detrended values if plot is True: fig = plt.figure(figsize=(16, 6)) ax = fig.add_subplot(111) - ax.plot(line_segment, 'b-',label='slope') ax.plot(series, 'g--', label='original data') - ax.plot(series.index, detrended_values, 'r', label='detrended values') - ax.plot(series-(line_segment-line_segment[0]), 'm', label='without drift(?)') #some interesting plot/data + if slope > max_slope and slope < -max_slope: + ax.plot(line_segment, 'b-',label='slope') + ax.plot(series.index, detrended_values, 'r', label='detrended values') + ax.plot(series-(line_segment-line_segment[0]), 'm', label='without drift(?)') #some interesting plot/data ax.legend(fontsize=16) ax.set_xlabel(self.timename, fontsize=20) ax.set_ylabel(data_name, fontsize=20) ax.tick_params(labelsize=15) + ax.legend(loc='upper right', shadow=True) else: - if type(period) is int: - start_index = 0 - end_index = 0 - new_index = end_index - if plot is True: - fig = plt.figure(figsize=(16,6)) - ax = fig.add_subplot(111) - ax.plot(series, 'g--', label='original data') - - while series.index.day[new_index] + period <= series.index.day[len(series)-1]: - checked = False - while series.index.day[end_index] < (series.index.day[start_index] + period): - if series.index.day[end_index] == (series.index.day[start_index] + 1): - if checked is False: - new_index = end_index - checked = True - end_index += 1 - if end_index == len(series)-1: - break - - detrended_values = signal.detrend(series[start_index:(end_index-1)]) - line_segment = series[start_index:(end_index-1)] - detrended_values[:] - slope = (int(line_segment[-1]) - int(line_segment[0])) / ( - arange[1].day - arange[0].day + 1) - - if slope > max_slope: - print('Drift detected in period {} to {}. \n' - 'Slope detected: {}, maximum slope: {}'.format - (series.index.day[start_index], series.index.day - [end_index-1],slope, max_slope)) - - if plot is True: - detrended_values = pd.DataFrame(detrended_values, - index=series.index[start_index:(end_index-1)]) - ax.plot(line_segment, 'b-', label='slope') - ax.plot(detrended_values, 'r--', label='detrended values') - ax.plot(series[start_index:(end_index-1)]-(line_segment-line_segment[0]), 'm-') #bad visualisation - - start_index = new_index - end_index = new_index - else: - return ValueError('period must be an integer') + if type(period) is not int: + return ValueError('the period must be a integer') + + start_index = 0 + end_index = 0 + new_index = end_index + n = 0 + m = 0 + list_value = [] + + while series.index.day[new_index] + period <= series.index.day[len(series)-1]: + checked = False + while series.index.day[end_index] < (series.index.day[start_index] + period): + if series.index.day[end_index] == (series.index.day[start_index] + 1): + if checked is False: + new_index = end_index + checked = True + end_index += 1 + if end_index == len(series)-1: + break + + detrended_values = signal.detrend(series[start_index:(end_index-1)]) + line_segment = series[start_index:(end_index-1)] - detrended_values[:] + slope = (int(line_segment[-1]) - int(line_segment[0])) / ( + arange[1].day - arange[0].day + 1) + + if slope > max_slope: + n += 1 + print('Drift detected in period {} to {}, slope: {}'.format + (series.index.day[start_index], series.index.day + [end_index-1], slope)) + if n == 1: + start_value = series.index[start_index] + end_value = series.index[end_index] + else: + if n > 0: + list_value.append([start_value, end_value]) + n = 0 + + if -max_slope > slope: + m += 1 + print('Drift detected in period {} to {}, slope: {}'.format + (series.index.day[start_index], series.index.day + [end_index - 1], slope)) + if m == 1: + start_value = series.index[start_index] + end_value = series.index[end_index] + else: + if m > 0: + list_value.append([start_value, end_value]) + m = 0 + + if series.index.day[end_index] == series.index.day[-1] and (n > 0 or m > 0): + list_value.append([start_value, end_value]) + start_index = new_index + end_index = new_index + + if len(list_value) == 0: + plot = False + print('No drift detected') + + if plot is True: + detrended_values = pd.DataFrame() + fig = plt.figure(figsize=(16, 6)) + ax = fig.add_subplot(111) + ax.plot(series, 'g--', label='original data') + for l in range(len(list_value)-1): + if list_value[l][1] > list_value[l+1][0]: + ind = len(series[:list_value[l][1]]) + list_value[l+1][0] = series.index[ind] + + for value in list_value: + detrend = signal.detrend(series[value[0]:value[1]]) + df = pd.DataFrame(detrend, index=series.index[len(series[:value[0]])-1:len(series[:value[1]])]) + detrended_values.append(df) + line_segment = series[value[0]:value[1]] - detrend[:] + ax.plot(line_segment, 'b-', label='slope') + ax.plot(df, 'r--', label='detrended values') + + if line_segment[0] < line_segment[-1]: + #ax.plot(series[value[0]:value[1]]-(line_segment-line_segment[0]), 'm-', label='without drift(?)') + series[value[0]:value[1]] = series[value[0]:value[1]]-(line_segment-line_segment[0]) + else: + #ax.plot(series[value[0]:value[1]]-(line_segment-line_segment[-1]), 'm-', label='without drift(?)') + series[value[0]:value[1]] = series[value[0]:value[1]]-(line_segment-line_segment[-1]) + #ax.plot(series, 'k--') + self.list_value = list_value + def remove_drift(self, data_name, arange, max_slope, period=None, plot=False): """ @@ -1669,7 +1717,7 @@ def remove_drift(self, data_name, arange, max_slope, period=None, plot=False): Parameters ---------- - data : str + data_name : str name of the column containing the data to remove drift arange : 2-element array of ints the range in which to apply the function @@ -1678,13 +1726,60 @@ def remove_drift(self, data_name, arange, max_slope, period=None, plot=False): period : int the period, in days, which a certain slope is allowed plot : bool - if true, a plot is made, ... + if true, a plot is made... Returns ---------- the fixed dataset without drift """ - pass + from scipy import signal + org_dat = self.data[data_name][arange[0]:arange[1]].copy()#for plotting + + self.detect_drift(data_name=data_name, arange=arange, max_slope= + max_slope, period=period, plot=False) + series = self.data[data_name][arange[0]:arange[1]].copy() + + if period is None or period is arange[1].day - arange[0].day + 1: + new_data = series - self.line_segment + self.line_segment[0] + self.data[data_name].update(new_data) + if plot is True: + fig = plt.figure(figsize=(16, 6)) + ax = fig.add_subplot(111) + ax.plot(series, 'm--', label='original data') + ax.plot(self.data[data_name], 'r--', label='new data') + ax.legend(loc='upper right', shadow=True) + + else: + for n in range(len(self.list_value)-1): + if self.list_value[n][1] > self.list_value[n+1][0]: + ind = len(series[:self.list_value[n][1]]) + self.list_value[n+1][0] = series.index[ind] + + detrended_values = pd.DataFrame() + for value in self.list_value: + detrend = signal.detrend(series[value[0]:value[1]]) + df = pd.DataFrame(detrend, index=series.index[len(series[:value[0]])-1:len(series[:value[1]])]) + detrended_values.append(df) + line_segment = series[value[0]:value[1]] - detrend[:] + if line_segment[0] < line_segment[-1]: + series[value[0]:value[1]] = series[value[0]:value[1]]-(line_segment-line_segment[0]) + else: + series[value[0]:value[1]] = series[value[0]:value[1]]-(line_segment-line_segment[-1]) + self.data[data_name].update(series) + + if plot is True: + plt.figure(1, figsize=(16, 6)) + plt.subplot(211) + plt.plot(org_dat, 'k--', label='original data') + plt.subplot(212) + plt.plot(self.data[data_name][arange[0]:arange[1]], 'g--', label='new data') + plt.show() + + #ax.plot(series, ) + #ab = fig.add_subplot(212) + #ab.plot(self.data[data_name], ) + #ax.legend(loc='upper right', shadow=True) + #ab.legend(loc='upper right', shadow=True) #============================================================================== # DAILY PROFILE CALCULATION