-
Notifications
You must be signed in to change notification settings - Fork 0
/
LSA Final Assesment.R
260 lines (260 loc) · 10.7 KB
/
LSA Final Assesment.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
data <- read.csv("/Users/kamalshashwat/Desktop/LSA Final Assesment/brazil.csv")
View(data)
install.packages("ggplot2")
library(ggplot2)
install.packages("textreg")
library(textreg)
setwd("/Users/kamalshashwat/Desktop/LSA Midterm Assesment")
#Question 1
#a. municipalities with no data
no_data <- sum(is.na(data$council.age))
print(no_data)
#b. boxplot of the health council age
# Basic boxplot for council.age
boxplot(data$council.age,
main = "Box Plot of Health Council Age",
ylab = "Council Age",
col = "lightblue")
# Optional: Add horizontal line at the median
abline(h = median(data$council.age, na.rm = TRUE), col = "red", lwd = 2, lty = 2)
#c. Interpret the median and mean of the variable corruption
corruption <- mean(data$corruption)
print(corruption)
corruption_median <- median(data$corruption)
print(corruption_median)
#Question 2
#a. liner regression with courruption index as the outcome
install.packages("stargazer")
library(stargazer)
munc <- lm(corruption ~ council.age, data = data)
summary(munc)
#b.
#Question 3
#a. fit a linear regression model adding margin, reelected, poverty
multi <- lm(corruption ~ council.age + margin + reelected + poverty, data = data)
summary(multi)
#d. corruption index score for a municipality health council that is >10 years olrd
# Fit the multiple regression model
simple <- lm(corruption ~ poverty, data=data)
munc_model <- lm(corruption ~ council.age + margin + reelected + poverty, data = data)
summary(munc_model)
stargazer(munc, multi, type = "text",
title = "Comparison of Linear Regression Models",
column.labeles = c("Model 1", "Model 2"),
dep.var.labels = "Courrption Index",
covariate.labels = c("Council Age", "Margin", "Reelected", "Poverty"),
out = "results.txt")
summary(multi, munc_model)
# Create a new data frame with the specified values
new_data <- data.frame(
council.age = 10, # 10 years old council
margin = 12, # Mayor won by 12 percentage points
reelected = 1, # Re-elected Mayor (1 = yes)
poverty = 50 # Poverty level is 50
)
# Predict the corruption index score
predicted_corruption <- predict(munc_model, newdata = new_data)
# Print the predicted corruption index score
predicted_corruption
#Question4
#a.
multi <- lm(corruption ~ council.age + margin + reelected + poverty, data = data)
multi_health <- lm(corruption ~ council.age * reelected + margin + reelected + poverty,
data = data)
# Present the models side by side
stargazer(multi, multi_health,
type = "text",
column.labels = c("Without Interaction", "With Interaction"),
dep.var.labels = "Corruption Index",
title = "Comparison of Models with and without Interaction")
#c.
multi_health <- lm(corruption ~ council.age * reelected + margin + reelected + poverty,
data = data)
summary(multi_health)
#e.
# Assuming 'data' is your dataset and the regression model has been fit as follows:
multi_health <- lm(corruption ~ council.age * reelected + margin + poverty, data = data)
# Create a sequence of council ages from 0 to 20 years
council_ages <- seq(0, 20, by = 1)
# Create a data frame for the fitted values, with separate rows for reelected and not
reelected mayors
fitted_values <- data.frame(
council.age = rep(council_ages, times = 2),
reelected = rep(c(0, 1), each = length(council_ages)),
margin = 10,
poverty = 50
)
# Calculate the fitted values using the predict function
fitted_values$predicted_corruption <- predict(multi_health, newdata = fitted_values)
# Load ggplot2 for visualization
install.packages("ggplot2")
library(ggplot2)
# Plot the fitted values
ggplot(fitted_values, aes(x = council.age, y = predicted_corruption, color =
factor(reelected))) +
geom_line(linewidth = 1.5) +
labs(
title = "Fitted Values of Corruption vs. Council Age",
x = "Council Age (years)",
y = "Predicted Corruption",
color = "Reelected Mayor"
) +
scale_color_manual(values = c("blue", "red"), labels = c("No", "Yes")) +
theme_minimal()
#SECTION2:
peace <- read.csv("/Users/kamalshashwat/Downloads/trading.csv")
View(peace)
#Question1
#a.
palestine <- sum(peace$palstock)
print(palestine)
israel <- sum(peace$isrstock)
print(israel)
none <- none <- sum(peace$assettreat == 1 & peace$isrstock == 0 & peace$palstock == 0)
print(none)
subset_data <- subset(peace,!(assettreat == 1 & isrstock == 0 & palstock ==0))
total_individuals <- nrow(subset_data)
print(total_individuals)
treated_data <- subset(subset_data, assettreat == 1)
overall_uptake <- mean(treated_data$asset_comp)
print(overall_uptake)
uptake_irstock <- mean(treated_data$asset_comp[treated_data$isrstock==1])
print(uptake_irstock)
uptake_palstock <- mean(treated_data$asset_comp[treated_data$palstock==1])
print(uptake_palstock)
uptake_difference <- uptake_irstock - uptake_palstock
print(uptake_difference)
took <- mean(peace$assettreat ==1 & peace$tradestock6all==1)
print(took)
took_israel <- mean(peace$assettreat == 1 & peace$isrstock==1)
print(took_israel)
took_palestine <- mean(peace$assettreat == 1 & peace$palstock==1)
print(took_palestine)
diff <- took_palestine - took_israel
print(diff)
# Assuming your data frame is named 'peace'
# Load necessary library
library(ggplot2)
peace_long <- peace_long[is.finite(peace_long$Peace_Index)]
# Create a new data frame in long format for easier plotting
peace_long <- data.frame(
Year = rep(c("2013", "2015"), each = nrow(peace)),
Peace_Index = c(peace$p_index_2013, peace$e_index_2015)
)
# Create the boxplot
ggplot(peace_long, aes(x = Year, y = Peace_Index)) +
geom_boxplot(fill = "lightblue", color = "darkblue") +
labs(title = "Distribution of Peace Index (2013 vs 2015)",
x = "Year",
y = "Peace Index") +
theme_minimal()
#Question 2
peace_clean <- subset_data[!is.na(subset_data$p_index_2015),]
mean_treatment <- mean(peace_clean$p_index_2015[peace_clean$assettreat==1])
print(mean_treatment)
mean_control <- mean(peace_clean$p_index_2015[peace_clean$assettreat==0])
print(mean_control)
diff_in_means <- mean_treatment - mean_control
print(diff_in_means)
# Perform the t-test
t_test_result <- t.test(peace_clean$p_index_2015 ~ peace_clean$assettreat)
# Print the results of the t-test
print(t_test_result)
t-value <- -1.5281
SE <- diff_in_means/t-value
#c
# Perform a t-test to check the balance of political orientation between treatment and
control groups
t_test_left_2013 <- t.test(left_2013 ~ assettreat, data = subset_data)
# Display the results
print(t_test_left_2013)
# Perform a t-test to check the balance of peace index between treatment and control
groups
t_test_p_index_2013 <- t.test(p_index_2013 ~ assettreat, data = subset_data)
# Display the results
print(t_test_p_index_2013)
# Perform a t-test to check the balance of age between treatment and control groups
t_test_age <- t.test(age ~ assettreat, data = subset_data)
# Display the results
print(t_test_age)
# Perform a t-test to check the balance of family income between treatment and control
groups
t_test_faminc <- t.test(faminc ~ assettreat, data = subset_data)
# Display the results
print(t_test_faminc)
#b.
# Calculate standard deviations and sample sizes for the control and treatment groups
sd_control <- sd(peace_clean$p_index_2015[peace_clean$assettreat == 0])
sd_treatment <- sd(peace_clean$p_index_2015[peace_clean$assettreat == 1])
n_control <- sum(peace_clean$assettreat == 0)
n_treatment <- sum(peace_clean$assettreat == 1)
# Calculate the standard error
SE <- sqrt((sd_control^2 / n_control) + (sd_treatment^2 / n_treatment))
print(paste("Standard Error:", SE))
# Calculate the critical t value for 99% confidence level
alpha <- 0.01
df <- n_control + n_treatment - 2 # Degrees of freedom
t_critical <- qt(1 - alpha/2, df = df)
# Calculate the 99% confidence interval
CI_lower <- diff_in_means - t_critical * SE
CI_upper <- diff_in_means + t_critical * SE
# Display the confidence interval
cat("99% Confidence Interval: [", CI_lower, ", ", CI_upper, "]\n")
#Question 3
# Compare the balance of 'right_2013' between treatment and control groups
t_test_right_2013 <- t.test(peace$right_2013 ~ peace$assettreat)
cat("T-test for 'right_2013':\n")
print(t_test_right_2013)
# Compare the balance of 'left_2013' between treatment and control groups
t_test_left_2013 <- t.test(peace$left_2013 ~ peace$assettreat)
cat("T-test for 'left_2013':\n")
print(t_test_left_2013)
# Compare the balance of 'age' between treatment and control groups
t_test_age <- t.test(peace$age ~ peace$assettreat)
cat("T-test for 'age':\n")
print(t_test_age)
#c.
# Subset the data for those who received either Israeli or Palestinian stocks
israel_vs_palestine <- subset(peace, peace$isrstock == 1 | peace$palstock == 1)
# Create a new variable to distinguish between Israeli and Palestinian stocks
israel_vs_palestine$stock_type <- ifelse(israel_vs_palestine$isrstock == 1, "Israeli",
"Palestinian")
# Perform a t-test to compare the Peace Index in 2015 between the two groups
t_test_result <- t.test(p_index_2015 ~ stock_type, data = israel_vs_palestine)
# Print the results of the t-test
print(t_test_result)
#Question 4
hypo <- lm(p_index_2015 ~ assettreat*palstock+p_index_2013, data = subset_data)
summary(hypo)
# Step 1: Create a combined treatment group variable
subset_data$treatment_group <- factor(
ifelse(subset_data$assettreat == 0, "Control",
ifelse(subset_data$palstock == 1, "Palestinian_Stock", "Israeli_Stock"))
)
# Step 2: Fit the linear model using the combined treatment group variable
simple_model <- lm(p_index_2015 ~ treatment_group + p_index_2013, data = subset_data)
# Step 3: Summarize the model to see the differences between groups
summary(simple_model)
install.packages("sjPlot")
library(sjPlot)
plot_model(simple_model, type = "est", show.values = TRUE, value.offset = .3)
# Save the plot to a file
ggsave("regression_plot.png", width = 8, height = 6)
confint(simple_model)
#Question5
# Compute the mean Peace Index for each group and time period
mean_pre_treatment <- mean(peace$p_index_2013[peace$assettreat == 1], na.rm = TRUE)
mean_post_treatment <- mean(peace$p_index_2015[peace$assettreat == 1], na.rm = TRUE)
mean_pre_control <- mean(peace$p_index_2013[peace$assettreat == 0], na.rm = TRUE)
mean_post_control <- mean(peace$p_index_2015[peace$assettreat == 0], na.rm = TRUE)
# Calculate the change over time for each group
change_treatment <- mean_post_treatment - mean_pre_treatment
change_control <- mean_post_control - mean_pre_control
# Compute the DiD estimate
DiD_estimate <- change_treatment - change_control
print(DiD_estimate)
# Compute the change in Peace Index for each individual
peace$change <- peace$p_index_2015 - peace$p_index_2013
# Perform a t-test to assess statistical significance of the DiD estimate
t_test_result <- t.test(change ~ assettreat, data = peace)
print(t_test_result)