-
Notifications
You must be signed in to change notification settings - Fork 0
/
precursor_archive.py
256 lines (219 loc) · 8.02 KB
/
precursor_archive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import os, sys, copy
import requests
import rasterio as rio
import xml.etree.ElementTree as ET
import logging as log
from util import *
from gimms import Gimms
load_env()
class PrecursorArchive:
def __init__(self, root_dir='./precursors', year_maxes_root='./graph_data', default_file_ext='img', api=None):
self.api = api or Gimms()
self._jds = ALL_MODIS_JULIAN_DAYS
self._intervals = INTERVALS
self._default_file_ext = default_file_ext
self._default_max_prefix = 'maxMODIS'
self._default_maxmax_prefix = 'maxMODISmax'
self._root_dir = os.path.realpath(root_dir)
self._year_maxes_root = os.path.realpath(year_maxes_root)
self._init_state()
self._update_state()
def update(self):
log.info('Updating precursor archive...')
all_updated = list(self._update_all())
return all_updated
def _update_all(self):
self._update_state()
for d, y, jd in self._walk_state():
out_path, ptype, updated = self._update_date(y, jd)
if updated:
yield y, jd, out_path, ptype
self._clean()
self._update_state()
def _update_date(self, y, jd):
out_path, ptype, updated = self._update_24day_max(y, jd)
return out_path, ptype, updated
def _update_24day_max(self, y, jd):
std_path = self._get_file_path(y, jd, nrt=False, is_maxmax=True)
nrt_path = self._get_file_path(y, jd, nrt=True, is_maxmax=True)
y2, jd2 = self._get_previous_date(y, jd)
y3, jd3 = self._get_previous_date(y2, jd2)
dates = [ (y,jd), (y2,jd2), (y3,jd3) ]
inputs_are_std = False
try:
paths, nrt = self._get_24day_max_input_paths(y, jd)
if not nrt:
inputs_are_std = True
except FileNotFoundError:
pass
inputs_updated = False
for _y, _jd, in dates:
path, ptype, updated = self._update_8day_max(_y, _jd)
if ptype == None:
return None, None, False
inputs_updated = inputs_updated or updated
if not inputs_updated and os.path.exists(std_path):
return std_path, 'std', False
log.info(f'Updating 24-day max for {y} / {jd}...')
paths, nrt = self._get_24day_max_input_paths(y, jd)
ptype = 'nrt' if nrt else 'std'
if inputs_updated and os.path.exists(std_path) and ptype == 'std':
log.info(f'Removing outdated 24-day std max {std_path}...')
os.remove(std_path)
if inputs_updated and os.path.exists(nrt_path) and ptype == 'nrt':
log.info(f'Removing outdated 24-day nrt max {nrt_path}...')
os.remove(nrt_path)
ptype = 'nrt' if nrt else 'std'
out_path = nrt_path if nrt else std_path
cmd = f'''
gdal_calc.py
--calc="
maximum(maximum((A<251)*A,(B<251)*B),(C<251)*C)
+((A==254)|(B==254)|(C==254))*254
+((A==255)&(B==255)&(C==255))*255
"
--NoDataValue=252
--format=HFA
--co "STATISTICS=YES"
--co "COMPRESSED=YES"
--outfile={out_path}
-A {paths[0]}
-B {paths[1]}
-C {paths[2]}
--type=Byte
--debug
'''
try:
run_process(cmd)
except:
return None, None, False
return out_path, ptype, True
def _update_8day_max(self, y, jd):
_dir = self._get_dir(jd)
try:
std_path = self._get_file_path(y, jd, nrt=False)
if os.path.exists(std_path):
log.debug(f'Found std file at {std_path}...')
return std_path, 'std', False
std_path = self.api.get(y, jd, out_dir=_dir, check=True)
return std_path, 'std', True
except DataNotFoundError as e:
log.info('No std data available, trying nrt instead...')
nrt_path = self._get_file_path(y, jd, nrt=True)
if os.path.exists(nrt_path):
return nrt_path, 'nrt', False
nrt_path = self.api.get(y, jd, out_dir=_dir, nrt=True, check=True)
return nrt_path, 'nrt', True
except DataNotFoundError as e:
return None, None, False
def _get_24day_max_input_paths(self, y, jd):
y1, jd1 = y, jd
p1, p1nrt = self._get_best_8day_max_path(y, jd)
y2, jd2 = self._get_previous_date(y1, jd1)
p2, p2nrt = self._get_best_8day_max_path(y2, jd2)
y3, jd3 = self._get_previous_date(y2, jd2)
p3, p3nrt = self._get_best_8day_max_path(y3, jd3)
nrt = True in [ p1nrt, p2nrt, p3nrt ]
return (p1, p2, p3), nrt
def _get_dir(self, jd):
p = os.path.join(self._root_dir, jd)
return p
def _get_best_8day_max_path(self, y, jd, std_only=False):
std_ok = self._check(y, jd)
if std_ok:
nrt = False
return self._get_file_path(y, jd), nrt
if not std_ok and std_only:
raise FileNotFoundError(f"No 8-day STD max found for {y} / {jd}.")
nrt_ok = self._check(y, jd, nrt=True)
if nrt_ok:
return self._get_file_path(y, jd, nrt=True), nrt_ok
raise FileNotFoundError(f"No 8-day max found for {y} / {jd}.")
def _get_previous_date(self, y, jd):
try:
interval = [ d for d in self._intervals if d[0] == jd ][0]
except:
raise Exception(f"Bad year ({year}) or julian day ({jd})")
ints = [ int(v) for v in interval ]
p_y = str(int(y)-1) if ints[0] < ints[1] else y
p_jd_i = self._jds.index(jd)-1
p_jd = self._jds[p_jd_i]
return p_y, p_jd
def _check(self, y, jd, nrt=False, is_maxmax=False):
try:
self._get_file_path(y, jd, nrt=nrt, is_maxmax=is_maxmax, check=True)
except FileNotFoundError:
return False
return True
def _clean(self):
self._update_state()
for d, y, jd in self._walk_state():
if d['max']['std'] and d['max']['nrt']:
path = self._get_file_path(y=y, jd=jd, nrt=True, is_maxmax=False)
os.remove(path)
if d['maxmax']['std'] and d['maxmax']['nrt']:
path = self._get_file_path(y=y, jd=jd, nrt=True, is_maxmax=True)
log.info(f'Removing {path}...')
os.remove(path)
self._update_state()
def _update_state(self):
for d, y, jd in self._walk_state():
d['max']['std'] = self._check(y, jd, nrt=False, is_maxmax=False)
d['maxmax']['std'] = self._check(y, jd, nrt=False, is_maxmax=True)
d['max']['nrt'] = self._check(y, jd, nrt=True, is_maxmax=False)
d['maxmax']['nrt'] = self._check(y, jd, nrt=True, is_maxmax=True)
def _walk_state(self):
for jd in self._state.keys():
for y in self._state[jd].keys():
yield self._state[jd][y], y, jd
def _init_state(self):
'''Example state dict:
{
'001': {
'2003'': {
'max': { 'nrt': True, 'std': False }
'maxmax': { 'nrt': True, 'std': True }
},
...
'2021': { ... },
},
...
'353': { ... },
}
'''
day_delta = 8
state = {}
for jd in self._jds:
state[jd] = {}
for y in get_all_modis_data_years():
dt = datetime.datetime.strptime('{y}{jd}'.format(y=y, jd=jd), '%Y%j')
today = datetime.datetime.today()
if dt > today - datetime.timedelta(days=day_delta):
# skip unavailable dates
continue
s = { 'std': False, 'nrt': False }
state[jd][y] = {}
state[jd][y]['max'] = s
state[jd][y]['maxmax'] = s.copy()
self._state = state
def _get_file_path(self, y, jd=None, is_maxmax=False, nrt=False, check=False, year_only=False, ext=None):
filename = self._filename_template(y, jd, year_only=year_only, ext=ext, nrt=nrt, is_maxmax=is_maxmax)
if not year_only:
full_path = os.path.join(self._root_dir, jd, filename)
else:
full_path = os.path.join(self._year_maxes_root, filename)
if check and not os.path.exists(full_path):
raise FileNotFoundError(f'{full_path} does not exist on the file system.')
real_path = os.path.realpath(full_path)
return real_path
def _filename_template(self, y, jd, is_maxmax=False, nrt=False, year_only=False, ext=None):
ext = ext or self._default_file_ext
max_prefix = self._default_max_prefix
maxmax_prefix=self._default_maxmax_prefix
prefix = maxmax_prefix if is_maxmax else max_prefix
ptype = 'nrt' if nrt else 'std'
if not year_only:
filename = f'{prefix}.{y}.{jd}.{ptype}.{ext}'
else:
filename = f'{prefix}.{y}.{ptype}.{ext}'
return filename