forked from raenb0/madagascar
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_parking_lot_code.txt
153 lines (111 loc) · 5.28 KB
/
_parking_lot_code.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# Convert rice prices to Madagascar currency ------------------------------
#load data if needed
matched <- read_csv('outputs/mahalanobis_matched_13July2022.csv') #update date
names(matched)
#drop extra years and re-order data
# cfm_pa_data_subs <- cfm_pa_data_90m_no_na %>%
# dplyr::select(CFM, PA, cfm_id, pa_id, dist_cart, dist_road, dist_urb, DVSP, dist_vil, elev, precip, rice, slope, veg_type, edge10:edge14, fordens2005:fordens2014, for2005:for2017, drght2005:drght2017, pop2005:pop2017, precip2005:precip2017, riceav2005:riceav2017, ricesd2005:ricesd2017, temp2005:temp2017, wind2005:wind2017)
#names(cfm_pa_data_subs)
matched_subs <- matched %>%
dplyr::select(CFM, PA, cfm_id, pa_id, dist_cart, dist_road, dist_urb, DVSP, dist_vil, elev, precip, rice, slope, veg_type, edge10:edge14, fordens2005:fordens2014, for2005:for2017, drght2005:drght2017, pop2005:pop2017, precip2005:precip2017, riceav2005:riceav2017, ricesd2005:ricesd2017, temp2005:temp2017, wind2005:wind2017)
names(matched_subs)
#rename "precip" variable so it doesn't cause issues later
#cfm_pa_data_subs <- rename(cfm_pa_data_subs,
# precip_av = precip)
matched_subs <- rename(matched_subs,
precip_av = precip)
#subset data to split up PA and CFM data
#for PA data:
PA_data <- matched_subs %>%
filter(CFM==0)
#for CFM data:
CFM_data <- matched_subs %>%
filter(CFM==1)
#add unique ID columns to each subset:
PA_data$ID <- seq.int(nrow(PA_data))
CFM_data$ID <- seq.int(nrow(CFM_data))
#add new variable PA_CFM
PA_data$PA_CFM <- "PA"
CFM_data$PA_CFM <- "CFM"
#create character strings for new unique IDs
PA_data$UID <- do.call(paste0, PA_data[c("PA_CFM", "ID")])
CFM_data$UID <- do.call(paste0, CFM_data[c("PA_CFM", "ID")])
#check to make sure they look ok
View(PA_data)
View(CFM_data)
#join the tables back together
matched_UID <- rbind(PA_data, CFM_data)
View(matched_UID)
names(matched_UID)
#pivot longer
matched_longer <- matched_UID %>%
pivot_longer(
cols = for2005:wind2017,
names_to = c("timevariant", "year"),
names_pattern = "([A-Za-z]+)(\\d+)",
values_to = "timevariant_values"
)
#View
View(matched_longer)
#now pivot wider to get time variant variables as columns instead of rows
matched_wider <- matched_longer %>%
pivot_wider(
names_from = "timevariant",
values_from = "timevariant_values"
)
#View
View(matched_wider) #WORKED!
matched_wider <- matched_wider %>% mutate(year = as.double(year)) #convert year to numeric format
#load exchange rate data
MDG_exchange <- read_csv("data/MDG_exchange_rates.csv")
matched_exch <- left_join(matched_wider, MDG_exchange, by = c("year" = "Year")) %>%
mutate(riceav_mdg = riceav*Exchange_rate) %>%
mutate(ricesd_mdg = ricesd*Exchange_rate)
names(matched_exch)
#write to CSV
write_csv(matched_exch, 'outputs/matched_exch_25July2022.csv') #update date
#Clustered SE approach using lmtest package
#Stephen Parry from CSCU suggested this: https://stats.stackexchange.com/questions/10017/standard-error-clustering-in-r-either-manually-or-in-plm
# (doesn't work, because with this approach plm index variable must be used to cluster, and we want to cluster on "cluster", not "UID")
library(lmtest)
library(sandwich)
startTime <- Sys.time()
coeftest(did_m1, vcov=vcovHC(did_m1,type="HC0",cluster="cluster")) #Error in match.arg(cluster) : 'arg' should be one of “group”, “time”
endTime <- Sys.time()
print(endTime - startTime)
#hint: issue explained https://stats.stackexchange.com/questions/477332/r-plm-cluster-robust-standard-errors-with-multiple-imputations
#It is possible to cluster standard errors at a level other than the "group" or "time" indices of the plm model (e.g. industry).
#You would need to use the vcovCR function in the clubSandwich R package. Here is some example code I used: vcovCR(mypanelmodel, PanelData$Owner_ID,type="CR2").
#code adapted from Ranaivo
#try the vcovCR approach
library(tidyverse)
library(dplyr)
#library(lmtest)
#library(sandwich)
library(clubSandwich)
#load data if needed
matched_wider <- read_csv('outputs/matched_wider_26July2022.csv') #update date
#add a new alphanumeric variable: "cluster" with the format cfm00pa00
names(matched_wider)
matched_wider$cluster <- do.call(paste0, c("cfm", matched_wider["cfm_id"], "pa", matched_wider["pa_id"]))
matched_wider$cluster <- as.factor(matched_wider$cluster) #convert "cluster" to a factor
view(matched_wider)
#write to CSV
write_csv(matched_wider,'outputs/matched_wider_cluster_26July2022.csv') #update date
memory.limit(size=5000000)
#clustering SE #takes a long time
#doesn't work due to memory issues: "Error: cannot allocate vector of size 4.0 Gb"
startTime <- Sys.time() #to record time required to run this
V_CR2 <- vcovCR(did_m1, cluster=matched_wider$cluster, type="CR2") # clustering SE. "clusterID": CFM site or PA identification code in the data
endTime <- Sys.time()
print(endTime - startTime) #took 1.6 hours, then threw an error
#p-values
startTime <- Sys.time()
coef_test(did_m1, vcov=V_CR2, test="Satterthwaite") # p-values, also took a long time to run
endTime <- Sys.time()
print(endTime - startTime)
#95% CI
startTime <- Sys.time()
conf_int(did_m1, vcov=V_CR2, test="Satterthwaite") # 95% CI. I personally prefer presenting CI because of all the controversies surrounding p-values
endTime <- Sys.time()
print(endTime - startTime)