-
Notifications
You must be signed in to change notification settings - Fork 25
/
0_ACS_download.R
301 lines (256 loc) · 12.2 KB
/
0_ACS_download.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
###################################################
### This script downloads the ACS data for many variables at the blockgroup level using the ACS R package.
### For each county in the US, it saves a file holding the income data for that county.
### Information on the ACS variables can be found here: https://www.socialexplorer.com/data/ACS2015_5yr/metadata/?ds=ACS15_5yr
### Variables:
# #Minutes to get to work
# B08303 Travel Time to Work
#
# #Percent Bachelor's Degree
# B15003 Educational Attainment for the Population 25 Years and Over [25]
#
# #Median Household Income (same as before)
# B19013 Median Household Income in the Past 12 Months (In 2015 Inflation-Adjusted Dollars) [1]
#
# #Per Capita Income
# B19301 Per Capita Income in the Past 12 Months (In 2015 Inflation-Adjusted Dollars) [1]
#
# #Percent below poverty level
# C17002 Ratio of Income to Poverty Level in the Past 12 Months [8]
#
# #Percent households recieving foodstamps
# B22010 Receipt of Food Stamps/Snap in the Past 12 Months by Disability Status for Households [7]
#
# #Rent as a percentage of income
# B25071 Median Gross Rent as a Percentage of Household Income in the Past 12 Months (Dollars) [1]
#
# #Number of Housing Units
# B25001 Housing Units [1]
#
# #Percent housing units vacant
# B25002 Occupancy Status [3] --are houses occupied
#
# # Year Structure built
# B25035 Median Year Structure Built [1]
#
# # Number of Rooms
# B25017 Rooms [10]
#
# #Median Value
# B25077 Median Value (Dollars) [1]
###################################################
rm(list=ls())
library(maps)
library(acs)
library(parallel)
library(foreach)
library(doParallel)
library(here)
## Import config.R to set filepaths
mosaiks_code <- Sys.getenv("MOSAIKS_CODE")
if (mosaiks_code=="") {
mosaiks_code = here("code")
}
source(file.path(mosaiks_code,"mosaiks","config.R"))
## Source the necessary helper files
source(file.path(utils_dir, "R_utils.R"))
###---###
library(acs)
### Get your own key here: https://api.census.gov/data/key_signup.html
keyString = "your key here"
if(keyString == "your key here"){print("update key"); break}
api.key.install(key=keyString)
acs.tables.install()
###---###
# Set the number of cores:
no_cores = 15
### We can do this to get all the block groups within a single county.
statefips = unique(state.fips$fips)
#take only the last three parts of the county fips
tmp = county.fips
tmp = tmp$fips
tmp = as.character(tmp)
countyfips = as.numeric(substr(x = tmp, start = (nchar(tmp) - 2), stop = nchar(tmp)))
#make the state fips as the first part of the county fips
statefips = tmp
statefips[nchar(tmp)==4] = substr(x = statefips[nchar(tmp)==4], start = 1, stop = 1)
statefips[nchar(tmp)==5] = substr(x = statefips[nchar(tmp)==5], start = 1, stop = 2)
statefips = as.numeric(statefips)
state = 06
county = 019
tableNumber = "B19013" #Income
endYear = 2015
ACStableNumbers = c(
"B08303",
"B15003",
"B19013",
"B19301",
"C17002",
"B22010",
"B25071",
"B25001",
"B25002",
"B25035",
"B25017",
"B25077")
ACSvariableNames = c(
"MinToWork",
"PctBachDeg",
"MedHHIncome",
"MedPerCapIncome",
"PctBelowPov",
"PctFoodStamp",
"PctIncomeRent",
"NumHouseUnits",
"PctVacant",
"YrBuilt",
"NumHouseRooms",
"MedHouseValue")
#For each table download all the ACS data
j = 1
for(tableNumber in ACStableNumbers) {
print(tableNumber)
print(ACSvariableNames[j])
### Now I'm going to download and save a file for each county to a single folder:
### Then I can load that folder later.
cl = makeCluster(no_cores)
registerDoParallel(cl)
#numbers = foreach (i = 1:1, .combine = 'c',
numbers = foreach (i = 1:length(countyfips), .combine = 'c',
.export = c(),
.packages=c("acs"), .errorhandling = 'pass') %dopar% {
#for(i in 2388:2390) {
print(i)
print("for county:")
print(paste(statefips[i],countyfips[i]))
fn = file.path(data_dir, "raw/applications/ACS/data_by_county",tableNumber, paste0(statefips[i],"_",countyfips[i],".csv"))
if(!file.exists(fn)) {
### Download the data
out = tryCatch(
{
blockGroupForCounty = acs.fetch(geography = geo.make(state = statefips[i], county = countyfips[i], tract = "*", block.group = "*"), table.number = tableNumber, endyear = endYear)
df = data.frame(estimate(blockGroupForCounty))
head(df)
### Do any simple pre-processing
### This will end up with a data frame with a single variable column named "Val"
if(tableNumber == "B08303") {
#MinToWork
#Raw data is given as number of people who fall into bins of travel time to work.
#To create a single average from these binned values I assume that each person in a bin
#took the average of the min and max of the bin -- e.g. people in the 5-9 min bin got a value of (5+9)/2
#I sum up all the travel time,
#and then divide by the number of people to get the average travel time for the county.
df2 = data.frame(2.5*df$B08303_002 + (5+9)/2*df$B08303_003 + (10+14)/2*df$B08303_004 +
(15+19)/2*df$B08303_005 +
(10+24)/2*df$B08303_006 +
(25+29)/2*df$B08303_007 +
(30+34)/2*df$B08303_008 +
(35+39)/2*df$B08303_009 +
(40+44)/2*df$B08303_010 +
(45+59)/2*df$B08303_011 +
(60+89)/2*df$B08303_012 +
(90+90)/2*df$B08303_013) / df$B08303_001
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "B15003") {
#PctBachDeg
df2 = data.frame(df$B15003_022 / df$B15003_001) * 100
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "B19013") {
#MedHHIncome
df2 = data.frame(df$B19013_001)
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "B19301") {
#MedPerCapIncome
df2 = data.frame(df$B19301_001)
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "C17002") {
#PctBelowPov
df2 = data.frame( (df$C17002_003 + df$C17002_002) / df$C17002_001) * 100
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "B22010") {
#PctFoodStamp
df2 = data.frame( df$B22010_002 / df$B22010_001) * 100
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "B25071") {
#PctIncomeRent
df2 = data.frame(df$B25071_001)
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "B25001") {
#NumHouseUnits
df2 = data.frame(df$B25001_001)
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "B25002") {
#PctVacant
df2 = data.frame(df$B25002_003 / df$B25002_001) * 100
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "B25035") {
#YrBuilt
df2 = data.frame(df$B25035_001)
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "B25017") {
#NumHouseRooms
df2 = data.frame( (1*df$B25017_002 +
2*df$B25017_003 +
3*df$B25017_004 +
4*df$B25017_005 +
5*df$B25017_006 +
6*df$B25017_007 +
7*df$B25017_008 +
8*df$B25017_009 +
9*df$B25017_010) / df$B25017_001)
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
} else if(tableNumber == "B25077") {
#MedHouseValue
df2 = data.frame(df$B25077_001)
rownames(df2) = rownames(df)
colnames(df2) = "Val"
df = df2
}
df$state = statefips[i]
df$county = countyfips[i]
df$tract = blockGroupForCounty@geography$tract
df$blockgroup = blockGroupForCounty@geography$blockgroup
dir.create(path = file.path(data_dir, "raw/applications/ACS/data_by_county",tableNumber))
fn = file.path(data_dir, "raw/applications/ACS/data_by_county",tableNumber,paste0(statefips[i],"_",countyfips[i],".csv"))
print(fn)
write.csv2(x = df, file = fn)
print("saved")},
error=function(cond){
message(cond)
message(paste0(" with i = ",i))
return(NA)},
#warning = function(cond){
# message(cond)
#return(NA)},
finally = {
print("worked!")
}
) #end TryCatch
} #end if file exists
return(i)
}
j = j + 1
}
print("DONE DONE DONE")