-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain-bks.R
162 lines (105 loc) · 6.28 KB
/
main-bks.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
# Name: main-bks.R
# Description: primary script for bikeshare r analysis
#
# -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- #
# ---- Opening ----
#-------------#
# packages #
#-------------#
if (!is.element("pacman", installed.packages())) {
install.packages("pacman", dep= T)
}
pacman::p_load(
tidyverse,
readstata13,
data.table,
leaflet,
sf,
gdata,
lubridate,
data.table,
janitor,
assertthat
)
#-------------#
# Set User #
#-------------#
# 1 buscandoaverroes
# 2 6k
user <- 1
if (user == 1) {
# scripts
repo <- "/Volumes/Al-Hakem-II/Scripts/bikeshare"
# data
data <- "/Volumes/Al-Hakem-II/Datasets/bks"
}
# same no matter the user.
scripts <- file.path(repo,"code")
shiny <- file.path(repo, "visuals/shiny")
raw <- file.path(data, "raw")
processed <- file.path(data, "bks")
keys <- file.path(processed, "keys")
plato <- file.path(processed, 'data/plato')
# values
crs <- 4326 # main crs for project
bike_metro_dist <- 250 # distance in meters; determines if bike station is "near" a metro station.
#-------------#
# run scripts #
#-------------#
# main scripts
s1 <- 0 # import variable harmonization, append. no data wrangling
# makes: bks-import.csv
s2 <- 0 # stations creates old/new station number dictionary and adds station features
# makes: station_key.Rda, station-geo-objects.Rdata
s3 <- 0 # construct: takes bks.Rda and makes other files, runs station-number.R
# makes: bks-full.Rda, bks-full.csv
s4 <- 0 # query: filters/queries main database and exports files.
# makes: bks_2020.Rda, bks1720.Rda
# Stats Processing
s5 <- 0 # stats10-14 takes years 10-14 from query, processes, adds station info, stats.
# makes: stats10-14.Rdata ~20 min
s6 <- 0 # stats15-16 takes years 15-16 from query, processes, adds station info, stats.
# makes: stats15-16.Rdata ~20 min
s7 <- 0 #stats17-21 takes years 17-21 from query, processes, adds station info, stats.
# makes: stats17-21.Rdata ~20 min
# recollection
r1 <- 0 # recollect takes the 'parallel processed' stats files and reassembles them into:
# days, station-sum, rides .Rda files under the /plato directory
# utilities
u1 <- 0 # weather.R queries weather data from NOAA to create by-day weather dictionary
# makes: data/weather/weather-daily.Rda
u2 <- 0 # names.R creates a tibble of all key variable names and text/labels for graphs,
# rmarkdown
md1 <- 0 # descriptives01.Rmd exploration markdown of basic plots
md2 <- 0 # regressions01.Rmd basic regressions
# main scripts --------------------------------------------------------------------------------------
if (s1 == 1) {source(file.path(scripts, "import.R"))}
if (s2 == 1) {source(file.path(scripts, "stations.R"))}
if (s3 == 1) {source(file.path(scripts, "construct.R"))}
if (s4 == 1) {source(file.path(scripts, "query.R"))}
if (s5 == 1) {source(file.path(scripts, "stats10-14.R"))}
if (s6 == 1) {source(file.path(scripts, "stats15-16.R"))}
if (s7 == 1) {source(file.path(scripts, "stats17-21.R"))}
if (r1 == 1) {source(file.path(scripts, "recollect.R"))}
# utilities --------------------------------------------------------------------------------------
if (u1 == 1) {source(file.path(scripts, "weather.R"))}
if (u2 == 1) {source(file.path(scripts, "names.R"))}
# markdown --------------------------------------------------------------------------------------
if (md1 == 1) {source(file.path(scripts, "analysis/Descriptives01.rmd"))}
if (md2 == 1) {source(file.path(scripts, "analysis/regeressions01.rmd"))}
# credits: =======================================================================================
# OpenStreetMaps, GADM, Dominic Royé, https://dominicroye.github.io/en/2018/accessing-openstreetmap-data-with-r/
# Matthias: https://www.gis-blog.com/nearest-neighbour-search-for-spatial-points-in-r/
# bzki: https://stackoverflow.com/questions/21977720/r-finding-closest-neighboring-point-and-number-of-neighbors-within-a-given-rad
# https://stackoverflow.com/questions/6778908/transpose-a-data-frame
# https://stackoverflow.com/questions/12925063/numbering-rows-within-groups-in-a-data-frame
# https://stackoverflow.com/questions/22337394/dplyr-mutate-with-conditional-values
# https://stackoverflow.com/questions/15344092/creating-a-new-variables-with-missing-values
# https://stackoverflow.com/questions/35697940/append-suffix-to-colnames
# https://stackoverflow.com/questions/22959635/remove-duplicated-rows-using-dplyr
# https://stackoverflow.com/questions/54734771/sf-write-lat-long-from-geometry-into-separate-column-and-keep-id-column
# https://stackoverflow.com/questions/32766325/fastest-way-of-determining-most-frequent-factor-in-a-grouped-data-frame-in-dplyr
# https://stackoverflow.com/questions/14800161/select-the-top-n-values-by-group
#
# ideas: map to a/g mobility data (use package covid19mobility?)