Skip to content

Commit

Permalink
Merge pull request #4 from nemac/efs
Browse files Browse the repository at this point in the history
2.1
  • Loading branch information
Matthew Geiger authored Aug 5, 2021
2 parents 0e63316 + cf59b71 commit 27f2006
Show file tree
Hide file tree
Showing 39 changed files with 2,564 additions and 3,583 deletions.
73 changes: 73 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Text file containing email addresses to send logs to
MAIL_TO_ADDRS_FILE=mail_to.txt

DKR_USER=nappl_fswms
DKR_GROUP=nappl

DOCKER=/usr/bin/docker

DKR_IMAGE_TAG=gdal:2.4.2

DKR_BUILD_DIR_HOST=./gdal_docker

# Name of the running container
DKR_CONTAINER_NAME=fw2_build

# Default current directory for a running container
DKR_BUILD_DIR=/build

# Path to precursor archive (symlink)
PRECURSORS_DIR=./precursors

# Path to graph data archive (symlink)
ALL_YEAR_MAXES_DIR=./graph_data

# Path template for log files
LOG_PATH_TEMPLATE=./log/FW2_{}.txt
# Time format to insert into the log tempate above
LOG_FILE_TIMESTAMP_FORMAT=%Y%m%d_%I_%M_%S%p

# Email address to send "from"
EMAIL_FROM_ADDRESS=[email protected]

# Meta product types: normal and muted (square root of normal)
FW2_ARCHIVE_DIR_NORMAL=ForWarn2
FW2_ARCHIVE_DIR_MUTED=ForWarn2_Sqrt

# Strings to search for in a filename to determine if a file
# output by the dodate script is either "normal" or "muted"
# (comma-separated for multiple values)
FW2_NORMAL_DODATE_FILENAME_CHECK=ForWarnLAEA,ALCLAEA
FW2_MUTED_DOATE_FILENAME_CHECK=ForWarn2LAEA,ALC2LAEA

# FW2 archive directories for all product types
FW2_PRODUCT_DIR_1YR=X_LC_1YEAR
FW2_PRODUCT_DIR_3YR=X_LC_3YEAR
FW2_PRODUCT_DIR_5YR=X_LC_5YEAR
FW2_PRODUCT_DIR_ALC=X_LC_ALC_1YR
FW2_PRODUCT_DIR_MEDIAN=X_LC_MEDIAN_ALL_YR
FW2_PRODUCT_DIR_10YR=X_LC_90_10_YR
FW2_PRODUCT_DIR_PCTPROGRESS=X_LC_PCTPROGRESS

# Temporary directories used by dodate
FW2_TMP_DIR_1YR=1-yr-max
FW2_TMP_DIR_3YR=3-yr-max
FW2_TMP_DIR_5YR=5-yr-90
FW2_TMP_DIR_ALC=ALC
FW2_TMP_DIR_MEDIAN=median-all-yr-max
FW2_TMP_DIR_PCTPROGRESS=pctprogress
FW2_TMP_DIR_10YR=10-yr-90

# maxMODIS.YYYY.DOY.[std|nrt].[img]
MAX_8DAY_PRECURSOR_FILENAME_TEMPLATE=maxMODIS.{}.{}.{}.{}
MAX_8DAY_PRECURSOR_FILENAME_EXT=img

# maxMODIS.YYYY.std.[tif]
ALL_YEAR_MAXES_PRECURSOR_FILENAME_TEMPLATE=maxMODIS.{}.std.{}
ALL_YEAR_MAXES_PRECURSOR_FILE_EXT=tif

# Path to the fw2 build bash script
DODATE_PATH=./dodate

# Earliest year for which NDVI data is available on GIMMS
MODIS_DATA_YEAR_START=2003
13 changes: 8 additions & 5 deletions .gitignore
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
mail_to_addrs.txt
RUNNING
*.nc
mail_to.txt
*.img
todo_product_days
1-yr-max/*
3-yr-max/*
5-yr-max/*
ALC/*
median-all-yr-max/*
pctprogress/*
logs/*
log/*
*.gz
*.vrt

# vim swap files
*.swp
Expand Down Expand Up @@ -119,3 +118,7 @@ venv.bak/

# mypy
.mypy_cache/
ForWarn2
ForWarn2_Sqrt
precursors
graph_data
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "raster-helpers"]
path = raster-helpers
url = [email protected]:nemac/raster-helpers
34 changes: 0 additions & 34 deletions Config.py

This file was deleted.

19 changes: 14 additions & 5 deletions README.md
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
# forwarn2_build
ForWarn 2 product build scripts
## ForWarn 2 Production System

Note: the directories `ForWarn2` and `ForWarn2_Sqrt` mirror the directory structure of the final resting place of generated products. They are used for testing purposes only! Do not put products here for production sites!
Requirements:

- Docker

Setup:

- Make a text file called `todo_product_days`. This file contains a list of julian days (see `all_product_days`) that still need products. An automated run of `make_products` with no `-d` argument will automatically write a new `todo_product_days` file and remove days that were completed successfully.
- Setup the precursor archive structure (make a script for this)
- Build gdal docker container
- `mail_to_addrs.txt`
- Setup SMTP server
- Abstract config for non-localhost case



- Make another text file called `mail_to_addrs.txt`. This file contains a list of email addresses that the system will send logs to. (Use the `--no-email` flag to suppress this function.
Run:

- Cron job
- Single date

46 changes: 0 additions & 46 deletions all_product_days

This file was deleted.

171 changes: 171 additions & 0 deletions bulk_maxes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@


import rasterio as rio
import xml.etree.ElementTree as ET

from util import *
from precursor_archive import PrecursorArchive


class YearMaxesArchive:

_file_tpl = 'maxMODIS.{}.std.{}'

_root_dir = './graph_data'

def __init__(self, precursors=None, root_dir=None, dryrun=False):
load_env()
self._root_dir = root_dir or self._root_dir
self.precursors = precursors or PrecursorArchive()


def update(self, dryrun=False, update_precursors=False):
all_updated = [] if not update_precursors else self.precursors.update()
std_updated = [ d for d in all_updated if d[-1] == 'std' ]
years_updated = sorted(set([ d[0] for d in std_updated ]))
years_missing = self._get_missing_years()
todo = years_updated + years_missing
for year in todo:
self.build_tif(year, dryrun=dryrun)


def build_tif(self, year, dryrun=False):
'''Build a new 46-band tif where each band represents an 8-day NDVI maximum.'''
vrt_filename = self._build_year_vrt(year, dryrun)
tif_filename = 'maxMODIS.{}.std.tif'.format(year)
new_tif_path_tmp = os.path.join(self._root_dir, '{}.tmp'.format(tif_filename))
self._gdal_translate_vrt(vrt_filename, new_tif_path_tmp, dryrun=dryrun)
try:
os.remove(os.path.join(self._root_dir, tif_filename))
except:
pass
os.rename(new_tif_path_tmp, os.path.join(self._root_dir, tif_filename))
os.remove(vrt_filename)


def _get_missing_years(self):
'''Returns a list of years (strings) with missing all-year maxes tifs'''
tpl = ALL_YEAR_MAXES_PRECURSOR_FILENAME_TEMPLATE
ext = ALL_YEAR_MAXES_PRECURSOR_FILE_EXT
all_years = get_all_modis_data_years()
return list(filter(lambda y: not os.path.exists(os.path.join(self._root_dir, tpl.format(y, ext))), all_years))


def _gdal_translate_vrt(self, vrt_path, tif_path, dryrun=False):
'''Use gdal_translate to convert a VRT to a GeoTIFF. Used for creating all-year maxes files.
'''
print(f'Converting VRT to TIF: {vrt_path} {tif_path}')
print(f'Here is the VRT:\n')
with open(vrt_path) as f:
for line in f.readlines():
print(line)

c = f'''gdal_translate
-of GTiff
-co TILED=YES
-co COMPRESS=DEFLATE
-co BIGTIFF=YES
{vrt_path}
{tif_path}
'''
if not dryrun:
run_process(c)


def _build_year_vrt(self, year, dryrun=False):
paths = self._get_vrt_bands(year)
bounds = self._get_extent(paths, dryrun=dryrun)
big_vrt_name = 'maxMODIS.{}.std.vrt'.format(year)
print("Generating VRT {}...".format(big_vrt_name))
if dryrun:
return big_vrt_name
for i in range(0, len(paths)):
band_num = str(i+1)
path = paths[i]
temp_vrt = self._build_8day_vrt(path, bounds=bounds, dryrun=dryrun)
if band_num == '1':
main_tree = ET.parse(temp_vrt)
main_root = main_tree.getroot()
else:
tree = ET.parse(temp_vrt)
root = tree.getroot()
bandElement = root.find('VRTRasterBand')
bandElement.attrib['band'] = band_num
main_root.append(bandElement)
try: os.remove(temp_vrt)
except: pass
main_tree.write(big_vrt_name)
return big_vrt_name


def _get_vrt_bands(self, year):
'''Get a list of paths to the 8-day max files for some year.'''
f_tpl = MAX_8DAY_PRECURSOR_FILENAME_TEMPLATE
ext = MAX_8DAY_PRECURSOR_FILENAME_EXT
bands = []
for jd in ALL_MODIS_JULIAN_DAYS:
jd_dir = os.path.join(PRECURSORS_DIR, jd)
f = f_tpl.format(year, jd, 'std', ext)
path = os.path.join(jd_dir, f)
if os.path.exists(path):
p = os.path.join(jd_dir, f)
bands.append(p)
else:
continue
return bands


def _build_8day_vrt(self, path, bounds=None, vrtnodata=255, band_num=1, dryrun=False):
'''Wrapper for gdalbuildvrt. Build a 1-band VRT.
Arguments:
path: path to the source file
bounds: a python list of the form [ xmin, ymin, xmax, ymax ].
These values are joined into a string that is passed to the -te flag.
vrtnodata: Value to use for the -vrtnodata flag.
band_num: Band number in the source dataset to use.
'''
vrtnodata = str(vrtnodata)
band_num = str(band_num)
temp_vrt = os.path.basename(path) + '.vrt'
c = f'''gdalbuildvrt
-vrtnodata {vrtnodata}
-b {band_num}
-overwrite
'''
if bounds:
bounds_string = ' '.join([ str(num) for num in bounds ])
c += f'-te {bounds_string} \n'
c += f'{temp_vrt} {path}'
if not dryrun:
run_process(c)
return temp_vrt


def _get_extent(self, paths, dryrun=False):
'''Returns the maximum value for each extent parameter for a list of rasters.'''
if dryrun:
return []
self._check_same_proj(paths)
def max_by_key(iterable, key):
return max([ getattr(obj, key) for obj in iterable ])
bounds = []
for p in paths:
with rio.Env():
with rio.open(p) as src:
bounds.append(src.bounds)
max_bounds = [ max_by_key(bounds, key) for key in ('left', 'bottom', 'right', 'top') ]
return max_bounds


def _check_same_proj(self, paths):
proj_strings = []
for p in paths:
with rio.Env():
with rio.open(p) as src:
proj_strings.append(src.profile['crs'].to_proj4())
proj_last = proj_strings[0]
for proj in proj_strings:
if proj_last != proj:
raise TypeError('All datasets must have the exact same projection!')

Loading

0 comments on commit 27f2006

Please sign in to comment.