This repository has been archived by the owner on Mar 12, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathworkshop_utils.py
69 lines (57 loc) · 2.1 KB
/
workshop_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import urllib, shutil, os, sys
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import numpy as np
# Helper functions for queries
"""
This function downloads query results from the publicly accessible S3 bucket
and saves them to this folder as <query id>.csv
Input: The URL of the data on AWS (from the Athena link)
Returns: Pandas dataframe with query results
"""
def load_dataframe_from_s3(link):
base_url = "https://hot-aws-workshop.s3.us-east-2.amazonaws.com/"
if len(link)== 36:
query_id = link
else:
query_id = link.split('/')[-2]
if os.path.isfile(query_id+".csv"):
sys.stderr.write("Found file locally... ")
else:
sys.stderr.write("Downloading from S3... ")
with urllib.request.urlopen(base_url + query_id + ".csv") as response, open(query_id+".csv", 'wb') as out_file:
shutil.copyfileobj(response, out_file)
sys.stderr.write("Query results saved to: \n"+query_id+".csv\n")
sys.stderr.write("Creating dataframe... ")
d = pd.read_csv(query_id+".csv")
sys.stderr.write("done. Found {:,} rows".format(len(d)))
return d
"""
This function takes a string from Athena that represents a Map and returns the Python
dictionary equivalent
"""
def string_to_dict(string):
if type(string)==str:
arr = string[1:-1].split("=")
d = dict()
for k, v in zip(arr[:-1], arr[1:]):
d[k] = v
return d
elif type(string)==dict:
return string
def make_grid(length=0.1,width=0.1):
xmin,ymin,xmax,ymax = points.total_bounds
length = 0.1
wide = 0.1
cols = list(np.arange(int(np.floor(xmin)), int(np.ceil(xmax)), wide))
rows = list(np.arange(int(np.floor(ymin)), int(np.ceil(ymax)), length))
rows.reverse()
polygons = []
ids = []; count=0
for x in cols:
for y in rows:
count = count + 1
ids.append(count)
polygons.append( Polygon([(x,y), (x+wide, y), (x+wide, y-length), (x, y-length)]) )
return gpd.GeoDataFrame({'geometry':polygons, 'PolyID':ids})