-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathsatellite_utils.py
412 lines (344 loc) · 15.9 KB
/
satellite_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
from __future__ import annotations
from collections.abc import Mapping
from typing import Any, Optional
import ee
import pandas as pd
import time
from tqdm.auto import tqdm
# taken from https://github.com/sustainlab-group/africa_poverty (slightly modified)
def df_to_fc(df: pd.DataFrame, lat_colname: str = 'lat',
lon_colname: str = 'lon') -> ee.FeatureCollection:
'''Create a ee.FeatureCollection from a pd.DataFrame.
Args
- csv_path: str, path to CSV file that includes at least two columns for
latitude and longitude coordinates
- lat_colname: str, name of latitude column
- lon_colname: str, name of longitude column
Returns: ee.FeatureCollection, contains one feature per row in the CSV file
'''
# convert values to Python native types
# see https://stackoverflow.com/a/47424340
df = df.astype('object')
ee_features = []
for i in range(len(df)):
props = df.iloc[i].to_dict()
# oddly EE wants (lon, lat) instead of (lat, lon)
_geometry = ee.Geometry.Point([
props[lon_colname],
props[lat_colname],
])
ee_feat = ee.Feature(_geometry, props)
ee_features.append(ee_feat)
return ee.FeatureCollection(ee_features)
def decode_qamask(img: ee.Image) -> ee.Image:
'''
Args
- img: ee.Image, Landsat 5/7/8 image containing 'pixel_qa' band
Returns
- masks: ee.Image, contains 5 bands of masks
Pixel QA Bit Flags (universal across Landsat 5/7/8)
Bit Attribute
0 Fill
1 Clear
2 Water
3 Cloud Shadow
4 Snow
5 Cloud
'''
qa = img.select('pixel_qa')
clear = qa.bitwiseAnd(2).neq(0) # 0 = not clear, 1 = clear
clear = clear.updateMask(clear).rename(['pxqa_clear'])
water = qa.bitwiseAnd(4).neq(0) # 0 = not water, 1 = water
water = water.updateMask(water).rename(['pxqa_water'])
cloud_shadow = qa.bitwiseAnd(8).eq(0) # 0 = shadow, 1 = not shadow
cloud_shadow = cloud_shadow.updateMask(cloud_shadow).rename(['pxqa_cloudshadow'])
snow = qa.bitwiseAnd(16).eq(0) # 0 = snow, 1 = not snow
snow = snow.updateMask(snow).rename(['pxqa_snow'])
cloud = qa.bitwiseAnd(32).eq(0) # 0 = cloud, 1 = not cloud
cloud = cloud.updateMask(cloud).rename(['pxqa_cloud'])
masks = ee.Image.cat([clear, water, cloud_shadow, snow, cloud])
return masks
def mask_qaclear(img: ee.Image) -> ee.Image:
'''
Args
- img: ee.Image, Landsat 5/7/8 image containing 'pixel_qa' band
Returns
- img: ee.Image, input image with cloud-shadow, snow, cloud, and unclear
pixels masked out
'''
qam = decode_qamask(img)
cloudshadow_mask = qam.select('pxqa_cloudshadow')
snow_mask = qam.select('pxqa_snow')
cloud_mask = qam.select('pxqa_cloud')
return img.updateMask(cloudshadow_mask).updateMask(snow_mask).updateMask(cloud_mask)
def add_latlon(img: ee.Image) -> ee.Image:
'''Creates a new ee.Image with 2 added bands of longitude and latitude
coordinates named 'LON' and 'LAT', respectively
'''
latlon = ee.Image.pixelLonLat().select(
opt_selectors=['longitude', 'latitude'],
opt_names=['LON', 'LAT'])
return img.addBands(latlon)
def composite_nl(year: int) -> ee.Image:
'''Creates a median-composite nightlights (NL) image.
Args
- year: int, start year of survey
Returns: ee.Image, contains a single band named 'NIGHTLIGHTS'
'''
if year <= 2013:
img_col = ee.ImageCollection('NOAA/DMSP-OLS/NIGHTTIME_LIGHTS')
else:
img_col = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG')
start_date, end_date = f'{year}-01-01', f'{year}-12-31'
return img_col.filterDate(start_date, end_date).median().select([0], ['NIGHTLIGHTS'])
def tfexporter(collection: ee.FeatureCollection, export: str, prefix: str,
fname: str, selectors: Optional[ee.List] = None,
dropselectors: Optional[ee.List] = None,
bucket: Optional[str] = None) -> ee.batch.Task:
'''Creates and starts a task to export a ee.FeatureCollection to a TFRecord
file in Google Drive or Google Cloud Storage (GCS).
GCS: gs://bucket/prefix/fname.tfrecord
Drive: prefix/fname.tfrecord
Args
- collection: ee.FeatureCollection
- export: str, 'drive' for Drive, 'gcs' for GCS
- prefix: str, folder name in Drive or GCS to export to, no trailing '/'
- fname: str, filename
- selectors: None or ee.List of str, names of properties to include in
output, set to None to include all properties
- dropselectors: None or ee.List of str, names of properties to exclude
- bucket: None or str, name of GCS bucket, only used if export=='gcs'
Returns
- task: ee.batch.Task
'''
if dropselectors is not None:
if selectors is None:
selectors = collection.first().propertyNames()
selectors = selectors.removeAll(dropselectors)
if export == 'gcs':
task = ee.batch.Export.table.toCloudStorage(
collection=collection,
description=fname,
bucket=bucket,
fileNamePrefix=f'{prefix}/{fname}',
fileFormat='TFRecord',
selectors=selectors)
elif export == 'drive':
task = ee.batch.Export.table.toDrive(
collection=collection,
description=fname,
folder=prefix,
fileNamePrefix=fname,
fileFormat='TFRecord',
selectors=selectors)
else:
raise ValueError(f'export "{export}" is not one of ["gcs", "drive"]')
task.start()
return task
def sample_patch(point: ee.Feature, patches_array: ee.Image,
scale: float) -> ee.Feature:
'''Extracts an image patch at a specific point.
Args
- point: ee.Feature
- patches_array: ee.Image, Array Image
- scale: int or float, scale in meters of the projection to sample in
Returns: ee.Feature, 1 property per band from the input image
'''
arrays_samples = patches_array.sample(
region=point.geometry(),
scale=scale,
projection='EPSG:3857',
factor=None,
numPixels=None,
dropNulls=False,
tileScale=12)
return arrays_samples.first().copyProperties(point)
def get_array_patches(img: ee.Image,
scale: float,
ksize: float,
points: ee.FeatureCollection,
export: str,
prefix: str,
fname: str,
selectors: Optional[ee.List] = None,
dropselectors: Optional[ee.List] = None,
bucket: Optional[str] = None
) -> ee.batch.Task:
'''Creates and starts a task to export square image patches in TFRecord
format to Google Drive or Google Cloud Storage (GCS). The image patches are
sampled from the given ee.Image at specific coordinates.
Args
- img: ee.Image, image covering the entire region of interest
- scale: int or float, scale in meters of the projection to sample in
- ksize: int or float, radius of square image patch
- points: ee.FeatureCollection, coordinates from which to sample patches
- export: str, 'drive' for Google Drive, 'gcs' for GCS
- prefix: str, folder name in Drive or GCS to export to, no trailing '/'
- fname: str, filename for export
- selectors: None or ee.List, names of properties to include in output,
set to None to include all properties
- dropselectors: None or ee.List, names of properties to exclude
- bucket: None or str, name of GCS bucket, only used if export=='gcs'
Returns: ee.batch.Task
'''
kern = ee.Kernel.square(radius=ksize, units='pixels')
patches_array = img.neighborhoodToArray(kern)
# ee.Image.sampleRegions() does not cut it for larger collections,
# using mapped sample instead
samples = points.map(lambda pt: sample_patch(pt, patches_array, scale))
# export to a TFRecord file which can be loaded directly in TensorFlow
return tfexporter(collection=samples, export=export, prefix=prefix,
fname=fname, selectors=selectors,
dropselectors=dropselectors, bucket=bucket)
def wait_on_tasks(tasks: Mapping[Any, ee.batch.Task],
show_probar: bool = True,
poll_interval: int = 20,
) -> None:
'''Displays a progress bar of task progress.
Args
- tasks: dict, maps task ID to a ee.batch.Task
- show_progbar: bool, whether to display progress bar
- poll_interval: int, # of seconds between each refresh
'''
remaining_tasks = list(tasks.keys())
done_states = {ee.batch.Task.State.COMPLETED,
ee.batch.Task.State.FAILED,
ee.batch.Task.State.CANCEL_REQUESTED,
ee.batch.Task.State.CANCELLED}
progbar = tqdm(total=len(remaining_tasks))
while len(remaining_tasks) > 0:
new_remaining_tasks = []
for taskID in remaining_tasks:
status = tasks[taskID].status()
state = status['state']
if state in done_states:
progbar.update(1)
if state == ee.batch.Task.State.FAILED:
state = (state, status['error_message'])
elapsed_ms = status['update_timestamp_ms'] - status['creation_timestamp_ms']
elapsed_min = int((elapsed_ms / 1000) / 60)
progbar.write(f'Task {taskID} finished in {elapsed_min} min with state: {state}')
else:
new_remaining_tasks.append(taskID)
remaining_tasks = new_remaining_tasks
time.sleep(poll_interval)
progbar.close()
class LandsatSR:
def __init__(self, filterpoly: ee.Geometry, start_date: str,
end_date: str) -> None:
'''
Args
- filterpoly: ee.Geometry
- start_date: str, string representation of start date
- end_date: str, string representation of end date
'''
self.filterpoly = filterpoly
self.start_date = start_date
self.end_date = end_date
self.l8 = self.init_coll('LANDSAT/LC08/C01/T1_SR').map(self.rename_l8).map(self.rescale_l8)
self.l7 = self.init_coll('LANDSAT/LE07/C01/T1_SR').map(self.rename_l57).map(self.rescale_l57)
self.l5 = self.init_coll('LANDSAT/LT05/C01/T1_SR').map(self.rename_l57).map(self.rescale_l57)
self.merged = self.l5.merge(self.l7).merge(self.l8).sort('system:time_start')
def init_coll(self, name: str) -> ee.ImageCollection:
'''
Creates a ee.ImageCollection containing images of desired points
between the desired start and end dates.
Args
- name: str, name of collection
Returns: ee.ImageCollection
'''
return (ee.ImageCollection(name)
.filterBounds(self.filterpoly)
.filterDate(self.start_date, self.end_date))
@staticmethod
def rename_l8(img: ee.Image) -> ee.Image:
'''
Args
- img: ee.Image, Landsat 8 image
Returns
- img: ee.Image, with bands renamed
See: https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LC08_C01_T1_SR
Name Scale Factor Description
B1 0.0001 Band 1 (Ultra Blue) surface reflectance, 0.435-0.451 um
B2 0.0001 Band 2 (Blue) surface reflectance, 0.452-0.512 um
B3 0.0001 Band 3 (Green) surface reflectance, 0.533-0.590 um
B4 0.0001 Band 4 (Red) surface reflectance, 0.636-0.673 um
B5 0.0001 Band 5 (Near Infrared) surface reflectance, 0.851-0.879 um
B6 0.0001 Band 6 (Shortwave Infrared 1) surface reflectance, 1.566-1.651 um
B7 0.0001 Band 7 (Shortwave Infrared 2) surface reflectance, 2.107-2.294 um
B10 0.1 Band 10 brightness temperature (Kelvin), 10.60-11.19 um
B11 0.1 Band 11 brightness temperature (Kelvin), 11.50-12.51 um
sr_aerosol Aerosol attributes, see Aerosol QA table
pixel_qa Pixel quality attributes, see Pixel QA table
radsat_qa Radiometric saturation QA, see Radsat QA table
'''
newnames = ['AEROS', 'BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2',
'TEMP1', 'TEMP2', 'sr_aerosol', 'pixel_qa', 'radsat_qa']
return img.rename(newnames)
@staticmethod
def rescale_l8(img: ee.Image) -> ee.Image:
'''
Args
- img: ee.Image, Landsat 8 image, with bands already renamed
by rename_l8()
Returns
- img: ee.Image, with bands rescaled
'''
opt = img.select(['AEROS', 'BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2'])
therm = img.select(['TEMP1', 'TEMP2'])
masks = img.select(['sr_aerosol', 'pixel_qa', 'radsat_qa'])
opt = opt.multiply(0.0001)
therm = therm.multiply(0.1)
scaled = ee.Image.cat([opt, therm, masks]).copyProperties(img)
# system properties are not copied
scaled = scaled.set('system:time_start', img.get('system:time_start'))
return scaled
@staticmethod
def rename_l57(img: ee.Image) -> ee.Image:
'''
Args
- img: ee.Image, Landsat 5/7 image
Returns
- img: ee.Image, with bands renamed
See: https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LT05_C01_T1_SR
https://developers.google.com/earth-engine/datasets/catalog/LANDSAT_LE07_C01_T1_SR
Name Scale Factor Description
B1 0.0001 Band 1 (blue) surface reflectance, 0.45-0.52 um
B2 0.0001 Band 2 (green) surface reflectance, 0.52-0.60 um
B3 0.0001 Band 3 (red) surface reflectance, 0.63-0.69 um
B4 0.0001 Band 4 (near infrared) surface reflectance, 0.77-0.90 um
B5 0.0001 Band 5 (shortwave infrared 1) surface reflectance, 1.55-1.75 um
B6 0.1 Band 6 brightness temperature (Kelvin), 10.40-12.50 um
B7 0.0001 Band 7 (shortwave infrared 2) surface reflectance, 2.08-2.35 um
sr_atmos_opacity 0.001 Atmospheric opacity; < 0.1 = clear; 0.1 - 0.3 = average; > 0.3 = hazy
sr_cloud_qa Cloud quality attributes, see SR Cloud QA table. Note:
pixel_qa is likely to present more accurate results
than sr_cloud_qa for cloud masking. See page 14 in
the LEDAPS product guide.
pixel_qa Pixel quality attributes generated from the CFMASK algorithm,
see Pixel QA table
radsat_qa Radiometric saturation QA, see Radiometric Saturation QA table
'''
newnames = ['BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'TEMP1', 'SWIR2',
'sr_atmos_opacity', 'sr_cloud_qa', 'pixel_qa', 'radsat_qa']
return img.rename(newnames)
@staticmethod
def rescale_l57(img: ee.Image) -> ee.Image:
'''
Args
- img: ee.Image, Landsat 5/7 image, with bands already renamed
by rename_157()
Returns
- img: ee.Image, with bands rescaled
'''
opt = img.select(['BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2'])
atmos = img.select(['sr_atmos_opacity'])
therm = img.select(['TEMP1'])
masks = img.select(['sr_cloud_qa', 'pixel_qa', 'radsat_qa'])
opt = opt.multiply(0.0001)
atmos = atmos.multiply(0.001)
therm = therm.multiply(0.1)
scaled = ee.Image.cat([opt, therm, masks, atmos]).copyProperties(img)
# system properties are not copied
scaled = scaled.set('system:time_start', img.get('system:time_start'))
return scaled