-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_functions.R
182 lines (144 loc) · 6.58 KB
/
get_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
######### get functions #############
# here we implement functions for getting things from the dataset
# REMARK: here are some "Countryname" you can use in the argument
# "myAgg" if you want to do analysis at an aggregateed level
#
#
# [1] "Arab World"
# [2] "Caribbean small states"
# [3] "Central Europe and the Baltics"
# [4] "East Asia & Pacific (all income levels)"
# [5] "East Asia & Pacific (developing only)"
# [6] "Euro area"
# [7] "Europe & Central Asia (all income levels)"
# [8] "Europe & Central Asia (developing only)" GDP per capita (current US$)
# [9] "European Union"
# [10] "Fragile and conflict affected situations"
# [11] "Heavily indebted poor countries (HIPC)"
# [12] "High income"
# [13] "High income: nonOECD"
# [14] "High income: OECD"
# [15] "Latin America & Caribbean (all income levels)"
# [16] "Latin America & Caribbean (developing only)"
# [17] "Least developed countries: UN classification"
# [18] "Low & middle income"
# [19] "Low income"
# [20] "Lower middle income"
# [21] "Middle East & North Africa (all income levels)"
# [22] "Middle East & North Africa (developing only)"
# [23] "Middle income"
# [24] "North America"
# [25] "OECD members"
# [26] "Other small states"
# [27] "Pacific island small states"
# [28] "Small states"
# [29] "South Asia"
# [30] "Sub-Saharan Africa (all income levels)"
# [31] "Sub-Saharan Africa (developing only)"
# [32] "Upper middle income"
# [33] "World"
get_Indicators <- function(myTopic=NULL,
myYear=NULL,
myRegion=NULL,
myCountries=NULL,
myAggregate = NULL,
myInd.Name = NULL,
ind = Indicators,
ser = Series,
count = Country,
clear_name = F){
# get_Indicators is a function for extracting the desired indicators
# from dataframe "Indicators"
#
#
#INPUT:
# myTopic = indicators topic [vector of strings]
# myYear = indicators year [vector of int]
# myRegion = country geographical region [vector of strings]
# myCounties = country names, it could be also a string
# from the list above [vector of strings]
# myAggregate = aggregated states from the list above [vector of string]
# if this argument is NULL the aggregated countries will be
# eliminated
# clear_name = TRUE if you don't want the unit of measure in the
# indicator names, FALSE otherwise (FALSE dafault)
# ind = "Indicator" dataframe
# count = "Country" dataframe
# ser = "Series" dataframe
#
#
#OUTPUT:
# Indicators = final dataframe ready for the analysis.
# row : Country name
# colums : indicator names
# example:
# GDP population growth
# italy 18 75485 7
# france 17 12545 6
# germany 35 652148 8
#
library(reshape2)
library(dplyr)
####### extract the indicators ###########
if(!is.null(myInd.Name)){# IndicatorName is an activated criteria
if(!all(myInd.Name %in% Indicators$IndicatorName))
stop("ERROR: at least one Indicator name in input is not present in the dataframe")
Indicators <- Indicators %>%
filter(IndicatorName %in% myInd.Name)
}
if(!is.null(myTopic)){ # Topic is an activated criteria
my.idx.code <-Series %>% filter(Topic %in% myTopic)
Indicators <- Indicators %>%
filter(IndicatorCode %in% my.idx.code$SeriesCode)
}
else{
# if there's no Topic in input, warn the user that Indicators
# are selected from all the Topics
warning("WARNING:no Topic has been specified, I selected them all")
}
if(!is.null(myYear)){ # Year is an activated criteria
if(!all(myYear %in% Indicators$Year))
stop("ERROR: at least one year is not present in the dataframe")
Indicators <- Indicators %>%
filter(Year %in% myYear)
}
if(!is.null(myRegion)){ # Region is an activated criteria
my.country.code <- Country %>%
filter(Region %in% myRegion)
Indicators <- Indicators %>%
filter(CountryCode %in% my.country.code$CountryCode)
}
if(!is.null(myAggregate)){# i'm looking for aggregate countries
# check for mispelling
if(!all(myAggregate %in% Indicators$CountryName))
stop("ERROR: aggregate country name not valid")
Indicators <- Indicators %>%
filter(CountryName %in% myAggregate)
}
else{# remove aggregated Country from Indicators
agg_code <- Country %>%
filter(Region == "") %>% # agg countries have "Region" blank
select(CountryCode)
Indicators <- Indicators %>%
filter(!(CountryCode %in% agg_code$CountryCode))
warning("I've eliminated aggregated countries from the dataframe")
}
if(!is.null(myCountries)){ # Countryname is an activated criteria
# check that all the county names in input are ok
if(!all(myCountries %in% Country$TableName))
stop("Error: some Countries in input are not in TableName")
my.country.name <- Country %>%
filter(TableName %in% myCountries)
Indicators <- Indicators %>%
filter(CountryCode %in% my.country.name$CountryCode)
}
######## indicators from observation to variables ############
# for each (CountryCode,Year) let IndicatorName be a variable (column)
Indicators <- dcast(Indicators,formula = CountryCode + Year ~ IndicatorName,
value.var = "Value")
# clear unit of measure from indicator name
if(clear_name){
names(Indicators) <- gsub("\\(.*$", "", names(Indicators))
}
return(Indicators)
}