-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtsmExtend.R
executable file
·204 lines (172 loc) · 7.94 KB
/
tsmExtend.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
tsmExtend<- function(X,yrX,yrsp,N1,N2){
# Extend time series matrix on recent end by quantile method
# D. Meko; last revised 2022-01-26
#
# X, [matrix]r the time series; assumed nX columns, no "time" column
# yrX [matrix]i 1-col year matrix for X
# yrsp: desired stop year of extended time series matrix (tsm)
# If yrsp=NA, extend X so that all series have data up to year of most recent series in X
# If yrsp=nnnn, where nnnn is any year, extend all series to be full through year nnnn, as long as
# nnn is not later than yrX(length(yrX)). If n>yrX(length(yrX)), treat as if yrsp=NA
# If yrsp=-1, truncate X to end with the last year with data at all series in X.
# N1: common period to all series in X should be at least this many years (e.g., 50)
# N2: each series in X must overlap somewhere with each other series by at least N2 years (e.g., 100)
#
# Returns list Output, with fields:
# Y: the extended version of input X
# yrY: the extended version of input yrX
# khow: how the extension was made
# =1 no NA in any column of X; no action needed; return Y as X
# =2 successful extension, but note that input yrsp later than last year with
# data for any series
# =3 successful extension as requested
# =4 aborted because common period of all X <N1 years
# =5 aborted because for some year i of series A needing extension, no
# other series that has data in year i overlaps A by N>=N2 years
# =6 aborted because yrX not continuous
# =7 aborted because first year of X has a NA in some column
#
# N1: the common period of N>=N1 years is used for the spearman correlation (r) matrix to set
# order of series examined for a quantile prediction
# N2: a period N>=N2 of overlap of series A and B is examined to find the quantile from that
# period of N years from which to pull an extension value
# Method. Overall common period is used to get spearman r matrix. That matrix allows
# for each column of X and ordering of the other series from most-correlated to least-correlated.
# For given year i of series A needing "filling" all other series are examined to find which of
# the other series has data in year i. Then, in order of decreasing r, the two-series overlap
# of N>=N2 years is used to pull the overlap. The most highly correlated series that also has data
# in year i provides the estimate. The value of B in year i is compared with the values of
# B in the N>=N2 overlap to get a quantile, or non-exceedance probability. The "prediction" is linearly
# interpolated from the sorted values of A in the N>=N2 overlap such that the estimate has the same
# non-exceedance probability in the overlap.
# Debugging stressors
#X <- X[216:316,,drop=FALSE]; yrX <- yrX[216:316,,drop=FALSE]
# X <- X[-250,,drop=FALSE]; yrX <- yrX[-250,,drop=FALSE]
# X [1,4]<- NA
mX <- dim(X)[1] # number of rows in X
nX <- dim(X)[2] # number of cols in X
Ifill<-matrix(0,mX,nX) # to hold either 0 or the column of X used as predictor
# Input X should be a tsm that increments by 1
L1 <- !all(diff(yrX)==1)
if (L1) {
khow=6;
Output <- list("Y"=NA,"yrY"=NA,"khow"=khow)
return(Output) # abort year vector, yrX, not continuous
}
#--- All columns of input tsm X should have valid data in first years
xtemp<-X[1,]
if (any(is.na(xtemp))){
khow<-7
warning('First row of input tsm X must not have any NA')
Output <- list("Y"=NA,"yrY"=NA,"khow"=khow)
return(Output) # abort; a NA in first year of X, some column
}
# Check that matrix already not without NA and that yrsp not later than last
# tree-ring data in any series.
khow<-3 # initialize as successful extension, no unusual circumstances
if (!any(is.na(X))){
khow<-1
Y<-X; yrY<-yrX
} else if (yrsp> yrX[length(yrX),1]) {
yrsp<-NA
khow=2
}
yrab <- yrX
if (khow==1){
# no action needed; Y, yrY, khow already set
} else {
#--- FIND COMMON PERIOD OF X AND COMPUTE SPEARMAN r MATRIX
L <- complete.cases(X)
U <- X[L,,drop="FALSE"]
yrU <- yrX[L,,drop="FALSE"]
mU <- dim(U)[1]; nU <- dim(U)[2]
L1 <- dim(U)[1]<N1
if (L1) {
khow=4;
Output <- list("Y"=NA,"yrY"=NA,"khow"=khow)
return(Output) # abort because common period too short
}
R <- cor(U,method="spearman") # Spearman r matrix, common period
#--- MAKE POINTER MATRIX THAT FOR EACH COL OF INPUT X GIVES THE HIGHEST TO
# LOWEST SISTER SERIES FROM LEFT TO RIGHT
H <- matrix(NA,nrow=nX,ncol=(nX-1)) # allocate for index
# Loop over rows of r matrix, sorting high to low and filling rows of X
j1 <- 1:nX
for (n in 1:nX){
jthis <- j1[-n]
r <-R[n,]
r <- r[-n]
tmp<-sort(r,decreasing=TRUE,index.return=TRUE)
H[n,]<-jthis[tmp$ix]
}
#--- IDENTIFY ANALOG AND FILL IN
#
# Loop over all series (cols) of U. For a series, start with most recent year of missing
# data and work backwards, filling in values using as predictor series that most highly
# correlated chronology that has data for tha year.
Y<-X; yrY<-yrX # to hold filled in version of X
for (n in 1:nX){
ncheck<-n
ix <- H[n,] # cols of predictor series, L-R from most to least preferred
xthis <- X[,n,drop=FALSE] # 1-col matrix of key series (needing filling in)
if (!is.na(xthis[length(xthis)])){
# no action needed
} else {
# work back until a year in which data not missing
kwh<-TRUE
j<-mX
while (kwh){
u <- X[j,ix] # vector of row of data for the other series for year needing filling in series n
jgood <- !is.na(u);
jfind <- which(jgood);
jfind <- jfind[1];
ifind <-ix[jfind]; # predictor col of X
# Get common period of columns n and ifind of X
a <- X[,n]
b <- X[,ifind]
L <- !is.na(a) & !is.na(b) # mark rows with data for both series
a <- a[L]; b<-b[L]; yrab <- yrX[L,1,drop=FALSE]
nab <- length(a)
if (nab<N2){
warning('Insufficient overlap (N<N2) of good data at predictor and predictand')
khow<-5;
Output <- list("Y"=NA,"yrY"=NA,"khow"=khow)
return(Output) # abort because too short overlap of predictor and predictand
}
# Get non-exceedance probabiiity of sorted series of length nab
p = (1:nab)/(nab+1);
asort<-sort(a) # ascending sort
bsort<-sort(b)
# Get current value of predictor series; if outside the range of predictor
# series in common period, set estimate as the max or min of predictand series
# for the common period
xhave <- X[j,ifind]
if (xhave >= max(bsort)){
apred <- max(asort)
} else if (xhave<=min(bsort)){
apred<-min(asort)
} else {
# interpolate by non-exceedance probability
#phave<- interp1(bsort,p,xhave,method="linear")
#apred <- interp1(bsort,asort,xhave,method="linear")
apredList <- approx(bsort,asort,xhave,method="linear",ties="ordered")
apred <- apredList$y
} # end if (xhave >= max(bsort))
Y[j,n] <- apred # filled in value
Ifill[j,n] <- ifind # corresponding column of predictor
j<-j-1 # will check next year back
# if (j==305){
# print(j)
# browser()
# }
if (is.na(X[j,n])){
# proceed with the estimation
} else {
kwh <- FALSE # will exit the while loop
}
} # end while
} # if (!is.nan(uthis[length(uthis)]))
} # for (n in 1:nX)
} # if khow==1
Output <- list("Y"=Y,"yrY"=yrY,"khow"=khow)
}