-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathatps.Rmd
90 lines (65 loc) · 2 KB
/
atps.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
---
title: "FAA ATP Issuances, 2012-2018"
author: "Matthew Hull"
date: "`r format(Sys.Date())`"
output: github_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, fig.retina = 2)
```
```{r libraries, message=FALSE}
library(readxl)
library(ggplot2)
```
```{r extract}
# bring in available data from last 5 years from original data files:
yrs <- as.character(seq(2013,2018,1))
if(!dir.exists("airmandata")) {
dir.create("airmandata")
}
for (y in yrs){
url <- paste("https://www.faa.gov/data_research/aviation_data_statistics/civil_airmen_statistics/media/",y,"-civil-airmen-stats.xlsx",sep="")
destfile <- paste("airmandata/",y,"airmandata.xlsx",sep="")
download.file(url, destfile)
}
airman_files <- dir(path="airmandata",pattern="*.xlsx")
```
```{r transform, message=FALSE, warning=FALSE}
# add in yr 2012, now archived
yrs <- as.character(seq(2012,2018,1))
df <- data.frame( matrix(c(yrs,rep(NA,length(yrs))), nrow=length(yrs), ncol=2) )
colnames(df) <- c("year", "atps")
df$atps <- as.numeric(df$atps)
df$year <- as.numeric(as.character(df$year))
for (a in airman_files) {
yr <- substr(a,1,4)
fn <- paste("airmandata/",a,sep="")
d <- read_xlsx( fn
, sheet = "Table 16"
, col_types = "guess")
cnames <- d$`Table 16`
d <- as.data.frame(t(d$...3))
colnames(d) <- cnames
atp_total <- as.numeric(as.character(d$`Airline Transport`))
idx <- which(df$year==as.numeric(yr))
df$atps[idx] <- atp_total
}
```
```{r plots}
title <- paste(yrs[1],"-",tail(yrs,1),"Original ATP Issuances")
ggplot(df, aes(x=year,y=atps)) +
geom_bar(stat="identity", aes(fill=year), show.legend = F, width=.5) +
ylim(0, max(df$atps)+2500) +
annotate("text"
,label=df$atps
,x=df$year
,y=df$atps+300) +
annotate("text"
,label=paste("Total ATPs: ",sum(df$atps))
, x=max(df$year)-.25
, y=max(df$atps)+2000) +
xlab("Year") +
ylab("Count") +
scale_fill_viridis_c() +
ggtitle(title)
```