Skip to content

Commit

Permalink
updated v0.2.2
Browse files Browse the repository at this point in the history
- Can now handle generalized metadata (metadata is now required field)
- Fixed issues with table upload
- Overall code optimization and handle streaming upload
  • Loading branch information
samapriya committed May 20, 2019
1 parent 3383306 commit ccc7954
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 163 deletions.
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# geeup: Simple CLI for Earth Engine Uploads with Selenium Support   [![Tweet](https://img.shields.io/twitter/url/http/shields.io.svg?style=social)](https://twitter.com/intent/tweet?text=Use%20porder%20CLI%20with%20@planetlabs%20new%20ordersv2%20API&url=https://github.com/samapriya/geeup)

[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2678142.svg)](https://doi.org/10.5281/zenodo.2678142)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2527157.svg)](https://doi.org/10.5281/zenodo.2527157)
[![PyPI version](https://badge.fury.io/py/geeup.svg)](https://badge.fury.io/py/geeup)
![Build Status](https://img.shields.io/badge/dynamic/json.svg?label=downloads&url=https%3A%2F%2Fpypistats.org%2Fapi%2Fpackages%2Fgeeup%2Frecent%3Fperiod%3Dmonth&query=%24.data.last_month&colorB=blue&suffix=%2fmonth)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
Expand All @@ -10,8 +10,8 @@ This tool came of the simple need to handle batch uploads of both image assets t
-If you find this tool useful, star and cite it as below

```
Samapriya Roy. (2019, May 9). samapriya/geeup: geeup: Simple CLI for Earth Engine Uploads (Version 0.1.9). Zenodo.
http://doi.org/10.5281/zenodo.2678142
Samapriya Roy. (2019, April 29). samapriya/geeup: geeup: Simple CLI for Earth Engine Uploads (Version 0.1.6). Zenodo.
http://doi.org/10.5281/zenodo.2653281
```

## Table of contents
Expand Down Expand Up @@ -190,6 +190,12 @@ optional arguments:
```
# Changelog

### v0.2.2

- Can now handle generalized metadata (metadata is now required field)
- Fixed issues with table upload
- Overall code optimization and handle streaming upload

### v0.1.9

- Changes to handle PyDL installation for Py2 and Py3
Expand Down
2 changes: 1 addition & 1 deletion geeup/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

__author__ = 'Samapriya Roy'
__email__ = '[email protected]'
__version__ = '0.2.1'
__version__ = '0.2.2'
80 changes: 38 additions & 42 deletions geeup/batch_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,12 @@
import os
import sys
import time
import json
import requests
import ast
import ee
import requests
import pandas as pd
import subprocess
import retrying
from bs4 import BeautifulSoup
Expand All @@ -74,31 +76,17 @@
sys.path.append(lp)
ee.Initialize()

def upload(user, source_path, destination_path, metadata_path=None, nodata_value=None, bucket_name=None, band_names=[]):
"""
Uploads content of a given directory to GEE. The function first uploads an asset to Google Cloud Storage (GCS)
and then uses ee.data.startIngestion to put it into GEE, Due to GCS intermediate step, users is asked for
Google's account name and password.
In case any exception happens during the upload, the function will repeat the call a given number of times, after
which the error will be propagated further.
:param user: name of a Google account
:param source_path: path to a directory
:param destination_path: where to upload (absolute path)
:param metadata_path: (optional) path to file with metadata
:param nodata_value: (optinal) value to burn into raster for missind data in the image
:return:
"""
slist=[]

def upload(user, source_path, destination_path, metadata_path=None, nodata_value=None, bucket_name=None):
submitted_tasks_id = {}

__verify_path_for_upload(destination_path)

path = os.path.join(os.path.expanduser(source_path), '*.tif')
all_images_paths = glob.glob(path)

if len(all_images_paths) == 0:
print(str(path)+' does not contain any tif images.')
print('%s does not contain any tif images.', path)
sys.exit(1)

metadata = load_metadata_from_csv(metadata_path) if metadata_path else None
Expand All @@ -115,7 +103,7 @@ def upload(user, source_path, destination_path, metadata_path=None, nodata_value
no_images = len(images_for_upload_path)

if no_images == 0:
print('No images found that match '+str(path)+' Exiting...')
print('No images found that match %s. Exiting...', path)
sys.exit(1)

failed_asset_writer = FailedAssetsWriter()
Expand All @@ -132,40 +120,48 @@ def upload(user, source_path, destination_path, metadata_path=None, nodata_value
continue

properties = metadata[filename] if metadata else None

try:
if user is not None:
gsid = __upload_file_gee(session=google_session,
file_path=image_path)
else:
gsid = __upload_file_gcs(storage_client, bucket_name, image_path)

asset_request = __create_asset_request(asset_full_path, gsid, properties, nodata_value, band_names)
print('Upload started for: '+str(asset_full_path))
subprocess.call("earthengine upload image "+'"'+gsid+'"'+' --asset_id "'+asset_full_path+'"',shell=True)
df=pd.read_csv(metadata_path)
dd=(df.applymap(type) == str).all(0)
for ind, val in dd.iteritems():
if val==True:
slist.append(ind)
intcol= list(df.select_dtypes(include=['int64']).columns)
floatcol = list(df.select_dtypes(include=['float64']).columns)
with open(metadata_path, 'r') as f:
reader = csv.DictReader(f,delimiter=",")
for i, line in enumerate(reader):
if line["id_no"]==os.path.basename(image_path).split('.')[0]:
j={}
for integer in intcol:
value=integer
j[value]=int(line[integer])
for s in slist:
value=s
j[value]=str(line[s])
for f in floatcol:
value=f
j[value]=float(line[f])
# j['id']=destination_path+'/'+line["id_no"]
# j['tilesets'][0]['sources'][0]['primaryPath']=gsid
json_data = json.dumps(j)
main_payload={"id": asset_full_path,"tilesets": [{"sources": [{"primaryPath": gsid,"additionalPaths": []}]}],"properties": j,"missingData": {"value": nodata_value}}
with open(os.path.join(lp,'data.json'), 'w') as outfile:
json.dump(main_payload, outfile)
subprocess.call("earthengine upload image --manifest "+'"'+os.path.join(lp,'data.json')+'"',shell=True)
except Exception as e:
print(e)
print('Upload of '+str(filename)+' has failed.')
failed_asset_writer.writerow([filename, 0, str(e)])

__check_for_failed_tasks_and_report(tasks=submitted_tasks_id, writer=failed_asset_writer)
failed_asset_writer.close()

def __create_asset_request(asset_full_path, gsid, properties, nodata_value, band_names):
if band_names:
band_names = [{'id': name} for name in band_names]

return {"id": asset_full_path,
"tilesets": [
{"sources": [
{"primaryPath": gsid,
"additionalPaths": []
}
]}
],
"bands": band_names,
"properties": properties,
"missingData": {"value": nodata_value}
}
__check_for_failed_tasks_and_report(tasks=submitted_tasks_id, writer=failed_asset_writer)
failed_asset_writer.close()

def __verify_path_for_upload(path):
folder = path[:path.rfind('/')]
Expand Down
10 changes: 2 additions & 8 deletions geeup/geeup.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,7 @@ def upload_from_parser(args):
source_path=args.source,
destination_path=args.dest,
metadata_path=args.metadata,
nodata_value=args.nodata,
bucket_name=args.bucket,
band_names=args.bands)
nodata_value=args.nodata)

def seltabup_from_parser(args):
seltabup(uname=args.user,
Expand Down Expand Up @@ -126,14 +124,10 @@ def main(args=None):
required_named = parser_upload.add_argument_group('Required named arguments.')
required_named.add_argument('--source', help='Path to the directory with images for upload.', required=True)
required_named.add_argument('--dest', help='Destination. Full path for upload to Google Earth Engine, e.g. users/pinkiepie/myponycollection', required=True)
required_named.add_argument('-m', '--metadata', help='Path to CSV with metadata.',required=True)
optional_named = parser_upload.add_argument_group('Optional named arguments')
optional_named.add_argument('-m', '--metadata', help='Path to CSV with metadata.')
optional_named.add_argument('--nodata', type=int, help='The value to burn into the raster as NoData (missing data)')
optional_named.add_argument('--bands', type=_comma_separated_strings, help='Comma-separated list of names to use for the image bands. Spaces'
'or other special characters are not allowed.')

required_named.add_argument('-u', '--user', help='Google account name (gmail address).')
optional_named.add_argument('-b', '--bucket', help='Google Cloud Storage bucket name.')

parser_upload.set_defaults(func=upload_from_parser)

Expand Down
50 changes: 0 additions & 50 deletions geeup/manifest_lib.py

This file was deleted.

40 changes: 0 additions & 40 deletions geeup/metadatfix_sr.py

This file was deleted.

Loading

0 comments on commit ccc7954

Please sign in to comment.