Skip to content

Commit

Permalink
Replaced PROJECT_ID with PIXELS_PROJECT_ID in config.
Browse files Browse the repository at this point in the history
  • Loading branch information
yellowcap committed Nov 17, 2020
1 parent 973354e commit 8cdaddc
Show file tree
Hide file tree
Showing 13 changed files with 71 additions and 56 deletions.
5 changes: 3 additions & 2 deletions batch/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@

logging.basicConfig(
format='%(asctime)s %(levelname)s %(message)s',
level=logging.WARNING,
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger('botocore').setLevel(logging.ERROR)


def collect():
Expand All @@ -26,7 +27,7 @@ def collect():

# Get setup variables from env.
bucket = os.environ.get('AWS_S3_BUCKET', 'tesselo-pixels-results')
project_id = os.environ.get('PROJECT_ID', 'test')
project_id = os.environ.get('PIXELS_PROJECT_ID', 'test')
local_path = os.environ.get('PIXELS_LOCAL_PATH', None)
array_index = int(os.environ.get('AWS_BATCH_JOB_ARRAY_INDEX', 0))
features_per_job = int(os.environ.get('BATCH_FEATURES_PER_JOB', 100))
Expand Down
2 changes: 1 addition & 1 deletion batch/csv/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
logger.setLevel(logging.INFO)

# Get path from env.
project_id = os.environ.get('PROJECT_ID', 'pge_placer')
project_id = os.environ.get('PIXELS_PROJECT_ID', 'pge_placer')
filename = os.environ.get('GEO_FILE_NAME', 'pge_buff200_placer.gpkg')
bucket = os.environ.get('AWS_S3_BUCKET', 'tesselo-pixels-results')
tile_group_size = int(os.environ.get('TILE_GROUP_SIZE', 20))
Expand Down
4 changes: 2 additions & 2 deletions batch/csv/push_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
logger.setLevel(logging.INFO)

# Get path from env.
project_id = os.environ.get('PROJECT_ID', 'pge_placer')
project_id = os.environ.get('PIXELS_PROJECT_ID', 'pge_placer')
filename = os.environ.get('GEO_FILE_NAME', 'pge_buff200_placer.gpkg')
bucket = os.environ.get('AWS_S3_BUCKET', 'tesselo-pixels-results')
tile_group_size = int(os.environ.get('TILE_GROUP_SIZE', 20))
Expand Down Expand Up @@ -72,7 +72,7 @@
{'name': 'AWS_SECRET_ACCESS_KEY', 'value': os.environ.get('AWS_SECRET_ACCESS_KEY')},
{'name': 'ESA_SCIHUB_USERNAME', 'value': os.environ.get('ESA_SCIHUB_USERNAME')},
{'name': 'ESA_SCIHUB_PASSWORD', 'value': os.environ.get('ESA_SCIHUB_PASSWORD')},
{'name': 'PROJECT_ID', 'value': project_id},
{'name': 'PIXELS_PROJECT_ID', 'value': project_id},
{'name': 'GEO_FILE_NAME', 'value': filename},
{'name': 'TILE_GROUP_SIZE', 'value': str(tile_group_size)},
{'name': 'START_DATE', 'value': start},
Expand Down
2 changes: 1 addition & 1 deletion batch/pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
]

# Get path from env.
project_id = os.environ.get('PROJECT_ID', 'test')
project_id = os.environ.get('PIXELS_PROJECT_ID', 'test')
bucket = os.environ.get('AWS_S3_BUCKET', 'tesselo-pixels-results')
train_or_predict = 'predict' if os.environ.get('PREDICT', 'false').lower() == 'true' else 'train'
# Fetch config.
Expand Down
2 changes: 1 addition & 1 deletion batch/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
s3 = boto3.client('s3')

bucket = os.environ.get('AWS_S3_BUCKET', 'tesselo-pixels-results')
project_id = os.environ.get('PROJECT_ID', 'test')
project_id = os.environ.get('PIXELS_PROJECT_ID', 'test')

config = s3.get_object(Bucket=bucket, Key=project_id + '/config.json')
config = json.loads(config['Body'].read())
Expand Down
6 changes: 3 additions & 3 deletions batch/push.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def push_training_collection(bucket, project_id):
{'name': 'AWS_SECRET_ACCESS_KEY', 'value': os.environ.get('AWS_SECRET_ACCESS_KEY')},
{'name': 'ESA_SCIHUB_USERNAME', 'value': os.environ.get('ESA_SCIHUB_USERNAME')},
{'name': 'ESA_SCIHUB_PASSWORD', 'value': os.environ.get('ESA_SCIHUB_PASSWORD')},
{'name': 'PROJECT_ID', 'value': project_id},
{'name': 'PIXELS_PROJECT_ID', 'value': project_id},
{'name': 'AWS_S3_BUCKET', 'value': bucket},
{'name': 'BATCH_FILE_S3_URL', 'value': 's3://tesselo-pixels-scripts/batch.zip'},
{'name': 'BATCH_FILE_TYPE', 'value': 'zip'},
Expand All @@ -73,8 +73,8 @@ def push_training_collection(bucket, project_id):

# Get path from env.
bucket = os.environ.get('AWS_S3_BUCKET', 'tesselo-pixels-results')
project = os.environ.get('PROJECT_ID')
project = os.environ.get('PIXELS_PROJECT_ID')
if project is None:
raise ValueError('Specify PROJECT_ID env var.')
raise ValueError('Specify PIXELS_PROJECT_ID env var.')
jobid = push_training_collection(bucket, project)
print(jobid)
4 changes: 2 additions & 2 deletions batch/pyramid/push_pyramid.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
logger.setLevel(logging.INFO)

# Get path from env.
project_id = os.environ.get('PROJECT_ID', 'pge_placer')
project_id = os.environ.get('PIXELS_PROJECT_ID', 'pge_placer')
bucket = os.environ.get('AWS_S3_BUCKET', 'tesselo-pixels-results')
tile_group_size = int(os.environ.get('TILE_GROUP_SIZE', 5))

Expand Down Expand Up @@ -77,7 +77,7 @@
{'name': 'AWS_SECRET_ACCESS_KEY', 'value': os.environ.get('AWS_SECRET_ACCESS_KEY')},
{'name': 'ESA_SCIHUB_USERNAME', 'value': os.environ.get('ESA_SCIHUB_USERNAME')},
{'name': 'ESA_SCIHUB_PASSWORD', 'value': os.environ.get('ESA_SCIHUB_PASSWORD')},
{'name': 'PROJECT_ID', 'value': project_id},
{'name': 'PIXELS_PROJECT_ID', 'value': project_id},
{'name': 'TILE_GROUP_SIZE', 'value': str(tile_group_size)},
{'name': 'AWS_S3_BUCKET', 'value': bucket},
{'name': 'BATCH_FILE_S3_URL', 'value': 's3://tesselo-pixels-scripts/batch.zip'},
Expand Down
2 changes: 1 addition & 1 deletion batch/pyramid/pyramid.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
logger.setLevel(logging.INFO)

# Get path from env.
project_id = os.environ.get('PROJECT_ID')
project_id = os.environ.get('PIXELS_PROJECT_ID')

bucket = os.environ.get('AWS_S3_BUCKET', 'tesselo-pixels-results')
tile_group_size = int(os.environ.get('TILE_GROUP_SIZE', 50))
Expand Down
2 changes: 1 addition & 1 deletion batch/pyramid/pyramid_up.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
logger.setLevel(logging.INFO)

# Get path from env.
project_id = os.environ.get('PROJECT_ID')
project_id = os.environ.get('PIXELS_PROJECT_ID')
bucket = os.environ.get('AWS_S3_BUCKET', 'tesselo-pixels-results')
tile_group_size = int(os.environ.get('TILE_GROUP_SIZE', 2))
zoom = int(os.environ.get('TILE_ZOOM', 13))
Expand Down
57 changes: 30 additions & 27 deletions batch/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,24 @@

import boto3
import numpy
import tensorflow
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.utils import to_categorical

from pixels.clouds import cloud_or_snow_mask

# Setup tensorflow session for model to use GPU.
config = tensorflow.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tensorflow.compat.v1.InteractiveSession(config=config)
### Remove in production.

# Setup boto client.
s3 = boto3.client('s3')
# Fetch all data to memory.
bucket = os.environ.get('AWS_S3_BUCKET', 'tesselo-pixels-results')
project_id = os.environ.get('PROJECT_ID', 'test')
project_id = os.environ.get('PIXELS_PROJECT_ID', 'test')
# config = s3.get_object(Bucket=bucket, Key=project_id + '/config.json')
# config = json.loads(config['Body'].read())
paginator = s3.get_paginator('list_objects_v2')
Expand All @@ -37,19 +44,16 @@
Ys = []
ids = []
valuemap = {}
for path in glob.glob('/home/tam/Desktop/pixels_test/pixels_data/*.npz'):
for path in glob.glob('/home/tam/Desktop/esb/esblandcover/training/*.npz'):
with open(path, 'rb') as fl:
data = numpy.load(fl, allow_pickle=True)
X = data['data']
print('A', X.shape)
# Data shape is ("scenes", bands, height, width)
cloud_mask = cloud_or_snow_mask(X[:, 8], X[:, 7], X[:, 6], X[:, 2], X[:, 1], X[:, 0], X[:, 9])
# Reorder the data to have
X = X.swapaxes(0, 2).swapaxes(1, 3)
print('B', X.shape)
# Flatten the 2D data into pixel level.
X = X.reshape(X.shape[0] * X.shape[1], X.shape[2], X.shape[3])
print('C', X.shape)
# Remove zeros.
X = X[numpy.sum(X, axis=(1, 2)) != 0]
# Compute cloud and snow mask.
Expand Down Expand Up @@ -86,28 +90,27 @@
Y_test = to_categorical(Ys[numpy.logical_not(selector)])

# Build the model.
# model = Sequential()
# model.add(layers.BatchNormalization())
# model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
# model.add(layers.Dropout(0.5))
# model.add(layers.BatchNormalization())
# model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
# model.add(layers.Dropout(0.3))
# model.add(layers.BatchNormalization())
# model.add(layers.MaxPooling1D(pool_size=2))
# model.add(layers.Flatten())
# model.add(layers.Dense(100, activation='relu'))
# model.add(layers.Dense(len(valuemap), activation='softmax'))

model = Sequential()
model.add(layers.BatchNormalization())
model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.BatchNormalization())
model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(layers.Dropout(0.3))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling1D(pool_size=2))
model.add(layers.Flatten())
model.add(layers.Dense(100, activation='relu'))
model.add(layers.Dense(len(valuemap), activation='softmax'))

model = Sequential()
model.add(layers.BatchNormalization())
model.add(layers.GRU(300, return_sequences=False, return_state=False, dropout=0.5, recurrent_dropout=0.5))
model.add(layers.BatchNormalization())
model.add(layers.Dense(100, activation='relu'))
model.add(layers.Dense(len(valuemap), activation='softmax'))
# model = Sequential()
# model.add(layers.BatchNormalization())
# model.add(layers.GRU(300, return_sequences=False, return_state=False, dropout=0.5, recurrent_dropout=0.5))
# model.add(layers.BatchNormalization())
# model.add(layers.Dense(100, activation='relu'))
# model.add(layers.Dense(len(valuemap), activation='softmax'))

visible = layers.Input(shape=(25, 10))
visible = layers.Input(shape=X_train.shape[1:])
normed = layers.BatchNormalization()(visible)
# first feature extractor
conv1 = layers.Conv1D(filters=64, kernel_size=3, activation='relu')(normed)
Expand Down Expand Up @@ -147,8 +150,8 @@

# Fit the model.
fit_parms = config.get('keras_fit_arguments', {
'epochs': 10,
'batch_size': 1000,
'epochs': 50,
'batch_size': 10000,
'verbose': 1,
})
model.fit(X_train, Y_train, **fit_parms)
Expand Down
13 changes: 8 additions & 5 deletions pixels/mosaic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def latest_pixel_s2(geojson, date, scale, bands=S2_BANDS, limit=10, clip=False,
# Skip search if list of scenes was provided, otherwise assume input is a
# specific date to search with.
if isinstance(date, (list, tuple)):
logger.info('Latest pixels for {} item.'.format(len(date)))
logger.info('Latest pixels for {} items.'.format(len(date)))
items = date
else:
logger.info('Latest pixels for {}'.format(date))
Expand All @@ -58,6 +58,7 @@ def latest_pixel_s2(geojson, date, scale, bands=S2_BANDS, limit=10, clip=False,

stack = None
for item in items:
logger.info(str(item['id']))
# Prepare band list.
band_list = [(item['assets'][band]['href'], geojson, scale, False, False, False, None) for band in bands]

Expand Down Expand Up @@ -133,10 +134,12 @@ def latest_pixel_s2_stack(geojson, min_date, max_date, scale, interval='weeks',
else:
# Construct array of latest pixel calls with varying dates.
dates = [(geojson, step[1], scale, bands, limit, clip, pool, max_cloud_cover) for step in timeseries_steps(min_date, max_date, interval)]
logger.info('Getting {} {} for this geom.'.format(len(dates), interval))

# Call pixels calls asynchronously.
logger.info('Found {} scenes, getting asynchronously.'.format(len(dates)))
with Pool(len(dates)) as p:
pool_size = min(len(dates), 10)
logger.info('Found {} scenes, processing pool size is {}.'.format(len(dates), pool_size))
with Pool(pool_size) as p:
return p.starmap(latest_pixel_s2, dates)


Expand All @@ -159,8 +162,8 @@ def composite(geojson, start, end, scale, bands=S2_BANDS, limit=10, clip=False,
if 'features' not in response:
raise ValueError('No features in search response.')

print('Found {} input scenes.'.format(len(response['features'])))##
print('Cloud cover is {}.'.format([dat['properties']['eo:cloud_cover'] for dat in response['features']]))##
logger.info('Found {} input scenes.'.format(len(response['features'])))
logger.info('Cloud cover is {}.'.format([dat['properties']['eo:cloud_cover'] for dat in response['features']]))

stack = []
creation_args = None
Expand Down
26 changes: 17 additions & 9 deletions pixels/retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def retrieve(source, geojson, scale=None, discrete=False, clip=False, all_touche
"""
Get pixels from a source raster over the a geojson feature collection.
"""
logger.info('Retrieving {}'.format(source))
logger.debug('Retrieving {}'.format(source))

# Validate geojson by opening it with rasterio CRS class.
dst_crs = CRS.from_dict(geojson['crs'])
Expand All @@ -34,15 +34,15 @@ def retrieve(source, geojson, scale=None, discrete=False, clip=False, all_touche
'Can not auto-determine target scale because'
'the geom crs does not match the source crs.'
)
logger.info('Source CRS is {}.'.format(src.crs))
logger.debug('Source CRS is {}.'.format(src.crs))

# If no band indices were provided, process all bands.
if not bands:
bands = range(1, src.count + 1)

# Prepare target raster transform from the geometry input.
transform, width, height = compute_transform(geojson, scale)
logger.info('Target array shape is ({}, {})'.format(height, width))
logger.debug('Target array shape is ({}, {})'.format(height, width))

# Prepare creation parameters for memory raster.
creation_args = src.meta.copy()
Expand All @@ -54,6 +54,10 @@ def retrieve(source, geojson, scale=None, discrete=False, clip=False, all_touche
'height': height,
})

# Set different band count if bands were given as input.
if bands:
creation_args['count'] = len(bands)

# Open memory destination file.
with MemoryFile() as memfile:
with memfile.open(**creation_args) as dst:
Expand All @@ -78,20 +82,24 @@ def retrieve(source, geojson, scale=None, discrete=False, clip=False, all_touche
proj_args['src_crs'] = src_crs

# Transform raster bands from source to destination.
for band in bands:
for index, band in enumerate(bands):
proj_args.update({
'source': rasterio.band(src, band),
'destination': rasterio.band(dst, band),
'destination': rasterio.band(dst, index + 1),
})
reproject(**proj_args)

# Get pixel values of first band.
pixels = dst.read(1)
# Get pixel values.
pixels = dst.read()

if clip:
mask = compute_mask(geojson, height, width, transform)
mask = compute_mask(geojson, height, width, transform, all_touched=all_touched)
# Apply mask to all bands.
pixels[mask] = NODATA_VALUE
pixels[:, mask] = NODATA_VALUE

# If only one band was requested, reshape result to 2D array.
if len(bands) == 1:
pixels = pixels[bands[0] - 1]

# Return re-creation args and pixel data.
return creation_args, pixels
2 changes: 1 addition & 1 deletion scripts/run_clouds.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy
from PIL import Image

from pixels.clouds import composite_index, cloud_or_snow_mask
from pixels.clouds import cloud_or_snow_mask, composite_index

data = numpy.load('/home/tam/Desktop/esb/combined_data/pixels_9.npz', allow_pickle=True)

Expand Down

0 comments on commit 8cdaddc

Please sign in to comment.