-
Notifications
You must be signed in to change notification settings - Fork 2
/
dataset_fill_timestamps.m
114 lines (101 loc) · 4.09 KB
/
dataset_fill_timestamps.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
function ds_filled = dataset_fill_timestamps( ds, t_var, varargin )
% DATASET_FILL_TIMESTAMPS - fill in missing timestamps in a dataset containing a
% regularly-spaced time series and discard duplicate timestamps.
%
% FIXME - Deprecated. This function is being superseded by
% 'table_fill_timestamps.m'
%
% t_var specifies the name of the dataset variable containing the (unfilled)
% timestamps for the data in ds. The timestamps must be Matlab serial
% datenumbers. That is, ds.( t_var ) must contain a vector of Matlab serial datenumbers.
%
% dataset_fill_timestamps identifies missing timestamps in ds.( t_var ),
% assuming a regular interval specified by the parameter-value pair delta_t.
% The default delta_t value is 30 minutes. Where timestamps are added to ds.(
% t_var ) to complete the time series, all other variables are populated with
% NaN.
%
% If a timestamp occurs more than once, the first row is kept and subsequent
% rows discarded.
%
% USAGE:
% ds_filled = dataset_fill_timestamps( ds, t_var )
% ds_filled = dataset_fill_timestamps( ds, t_var, t_min, t_max )
% ds_filled = dataset_fill_timestamps( ds, ..., 'tstamps_as_strings', ...
% logical_value )
%
% INPUTS:
% ds: dataset array; the data to be filled
% t_var: string containing the name of the time variable in ds
% (e.g. 'TIMESTAMP'). ds.( t_var ) must contain the timestamps for the
% data as matlab serial datenumbers.
%
% PARAMETER-VALUE PAIRS
% delta_t: optional: interval of the time series, in days. e.g., 30
% mins should have delta_t value of 1/48. Defaults to 1/48.
% t_min: Matlab serial datenumber; timestamp at which to begin filling.
% Defaults to the earliest timestamp in the dataset.
% t_max: Matlab serial datenumber; timestamp at which to end filling.
% Defaults to the latest timestamp in the dataset.
% tstamps_as_strings: true|{false}: if true, return timestamps as strings.
% If false (the default) return timestamps as Matlab serial
% datenumbers.
%
% SEE ALSO
% dataset, datenum
%
% author: Timothy W. Hilton, UNM, Dec. 2011
% -----
% define optional inputs, with defaults
% -----
warning( 'This function ( dataset_fill_timestamps.m ) is deprecated' );
p = inputParser;
p.addRequired( 'ds' ); %, @( x ) isa( x, 'dataset' ) );
p.addRequired( 't_var', @ischar );
p.addOptional( 'delta_t', ( 1 / 48), @isnumeric );
p.addOptional( 'tstamps_as_strings', false, @islogical );
p.addParamValue( 't_min', ...
NaN, ...
@( x ) isnumeric( x ) );
p.addParamValue( 't_max', ...
NaN, ...
@( x ) isnumeric( x ) );
% parse optional inputs
p.parse( ds, t_var, varargin{ : } );
ds = p.Results.ds;
t_var = p.Results.t_var;
delta_t = p.Results.delta_t;
t_min = p.Results.t_min;
t_max = p.Results.t_max;
tstamps_as_strings = p.Results.tstamps_as_strings;
if isnan( t_min )
t_min = min( ds.( t_var ) );
end
if isnan( t_max )
t_max = max( ds.( t_var ) );
end
full_ts = ( t_min : delta_t : t_max )';
full_ts = cellstr( datestr( full_ts, 'mm/dd/yyyy HH:MM:SS' ) );
ds.( t_var ) = cellstr( datestr( ds.( t_var ), ...
'mm/dd/yyyy HH:MM:SS' ) );
%% create a dataset containing the filled timestamps
ds_filled = dataset( { full_ts, t_var } );
%% fill in the timestamps in ds, adding NaNs in all variables where
%% missing timestamps were added
[ ds_filled, Aidx, Bidx ] = join( ds_filled, ...
ds, ...
'Keys', t_var, ...
'Type', 'LeftOuter', ...
'MergeKeys', true );
% timestamps (they're strings now) got sorted lexigrapically -- sort
% them now by the actual date
dn = datenum(ds_filled.( t_var ), 'mm/dd/yyyy HH:MM:SS');
[ ~, idx ] = sort( dn );
ds_filled = ds_filled( idx, : );
if ~tstamps_as_strings
ds_filled.( t_var ) = dn ( idx );
end
%remove duplicate timestamps
dup_tol = 0.00000001; %floating point tolerance
dup_idx = find( diff( ds_filled.( t_var ) ) < dup_tol ) + 1;
ds_filled( dup_idx, : ) = [];