-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpart1_script.py
207 lines (170 loc) · 6.18 KB
/
part1_script.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import urllib2
import xml.etree.ElementTree as ET
import copy
import matplotlib.pyplot as plt
from collections import defaultdict
#Generatin a dictionary where the 'key' is the country and the 'value' is the a list of santiation values. Parameters are api url and information to extract.
class AllCountries:
def __init__(self, url, paramC, paramV, paramD):
temp = {}
self.temp = temp
self.url = url
self.paramC = paramC
self.paramV = paramV
self.paramD = paramD
file = urllib2.urlopen(url)
tree = ET.parse(file)
root = tree.getroot()
country = ''
prev_country = ''
value = ''
year = ''
prev_year = ''
values = []
same = False
for child in root:
for child2 in child:
a = str(child2.tag)
if (a == '{http://www.worldbank.org}'+paramC):
if country != child2.text:
if country != '':
prev_country = country
country = child2.text
same = False
else:
same = True
elif (a == '{http://www.worldbank.org}'+paramV):
value = child2.text
elif (a == '{http://www.worldbank.org}'+paramD):
year = child2.text
else:
pass
if value == None:
value = 'None'
if same:
value = year +'-'+value
values.append(value)
temp[country] = values
elif prev_country!='':
new_list = copy.copy(values)
temp[prev_country] = new_list
del values[:]
#Generates a set of countries based on there income level. Parameters are api url and information to extract.
class IncomeCountries:
def __init__(self, url, paramN):
countries = set()
self.url = url
self.paramN = paramN
self.countries = countries
file = urllib2.urlopen(url)
tree = ET.parse(file)
root = tree.getroot()
for child in root:
for child2 in child:
a = str(child2.tag)
if (a == '{http://www.worldbank.org}'+paramN):
countries.add(child2.text)
#Set figure size and text box
fig = plt.figure(figsize=(18,18))
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
fig.text(0.05, 0.95, 'The general trend for sanitation facilities can be seen in the scatter plot titled "All Countries" this figure shows how the average of the sanitation values across the world were gradually increasing \n till about the mid 2000s then began to decrease. When we look at the average values in figures representing the "Income Group" the low income countries have the lowest average, \n while the high income countries have the highest, and understandably the middle income countries values are between the high income and low income countries.', fontsize='11', verticalalignment='top', bbox=props)
#Creates subplots. Parameters are titles and positions on chart.
class FIGURE:
def __init__(self,T1,POS1,T2,POS2,T3,POS3,T4,POS4):
self.T1 = T1
self.POS1 = POS1
self.T2 = T2
self.POS2 = POS2
self.T3 = T3
self.POS3 = POS3
self.T4 = T4
self.POS4 = POS4
plot1 = fig.add_subplot(POS1)
plot1.set_title(T1)
plot1.set_ylabel('Sanitation Values')
plot1.set_ylim([0, 10000])
plot1.set_xlim([1960,2018])
self.plot1 = plot1
plot2 = fig.add_subplot(POS2)
plot2.set_title(T2)
plot2.set_xlabel('Years')
plot2.set_ylabel('Sanitation Values')
plot2.set_ylim([0, 10000])
plot2.set_xlim([1960,2018])
self.plot2 = plot2
plot3 = fig.add_subplot(POS3)
plot3.set_title(T3)
plot3.set_ylabel('Sanitation Values')
plot3.set_ylim([0, 10000])
plot3.set_xlim([1960,2018])
self.plot3 = plot3
plot4 = fig.add_subplot(POS4)
plot4.set_title(T4)
plot4.set_xlabel('Years')
plot4.set_ylabel('Sanitation Values')
plot4.set_ylim([0, 10000])
plot4.set_xlim([1960,2018])
self.plot4 = plot4
#Creates scatter plot. Parameters are dictionary, graph instance, marker, and color for plot.
class XY:
def __init__(self, dictionary, graph, m, c):
x = []
y = []
self.dictionary = dictionary
self.graph = graph
self.m = m
self.c = c
#fills graph with average santiation values of that year
for year in dictionary:
avg = sum(dictionary[year])/float(len(dictionary[year]))
y.append(avg)
x.append(year)
graph.scatter(x,y, marker=m, color=c)
def main():
#Dictionary with key = country abd value = list of sanitation values.
ALL = AllCountries('http://api.worldbank.org/v2/countries/all/indicators/SH.STA.ACSN?date=1960:2018&per_page=17000', 'country', 'value', 'date')
#Sets of countries based on income level
low_income_countries = IncomeCountries('http://api.worldbank.org/v2/countries?incomeLevel=LIC','name')
middle_income_countries = IncomeCountries('http://api.worldbank.org/v2/countries?incomeLevel=MIC&per_page=150','name')
high_income_countries = IncomeCountries('http://api.worldbank.org/v2/countries?incomeLevel=HIC&per_page=100','name')
#dictionaries
temp_lic = defaultdict(list)
temp_mic = defaultdict(list)
temp_hic = defaultdict(list)
temp_world = defaultdict(list)
#fills above 4 dictionaries where key is year, and value is list of sanitation values of that year. Adds to appropriate dictionary based on income level of country
for country in ALL.temp:
for value in ALL.temp[country]:
parts = value.split('-')
year = parts[0]
sanitation = parts[1]
if (sanitation!='None'):
temp_world[year].append(float(sanitation))
if country in low_income_countries.countries:
parts = value.split('-')
year = parts[0]
sanitation = parts[1]
if (sanitation!='None'):
temp_lic[year].append(float(sanitation))
elif country in middle_income_countries.countries:
parts = value.split('-')
year = parts[0]
sanitation = parts[1]
if (sanitation!='None'):
temp_mic[year].append(float(sanitation))
elif country in high_income_countries.countries:
parts = value.split('-')
year = parts[0]
sanitation = parts[1]
if (sanitation!='None'):
temp_hic[year].append(float(sanitation))
#generates subplots
g = FIGURE("Low Income Countries",221,"Middle Income Countries",223,"High Income Countries",222,"All Countries",224)
#fills subplots with scatter plots of average values for relevant years from relevant income group dinctionary
l = XY(temp_lic, g.plot1,'o','red')
m = XY(temp_mic, g.plot2,'o','orange')
h = XY(temp_hic, g.plot3,'o', 'green')
w = XY(temp_world, g.plot4,'*','blue')
fig.savefig('part1_output.pdf')
if __name__ == "__main__":
main()