-
Notifications
You must be signed in to change notification settings - Fork 0
/
plumber.R
68 lines (57 loc) · 2.13 KB
/
plumber.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
library(tidyverse)
library(tidyxl)
library(readxl)
library(rvest)
nms <- c("group", "sub_group", "sub_sub_group", "sub_sub_sub_group")
parse_sheet <- function(f, s) {
excel <- read_excel(f, s, col_names = FALSE)
cells <- xlsx_cells(f)
fmts <- xlsx_formats(f)
data <-
excel |>
mutate(id = row_number(),
indent = head(fmts$local$alignment$indent[filter(cells, sheet == s, col == 1)$local_format_id], n()),
fg = head(fmts$local$fill$patternFill$fgColor$rgb[filter(cells, sheet == s, col == 1)$local_format_id], n()),
hdr = nms[1+indent],
.before = 1) |>
filter(fg == "FFDCE6F1", !is.na(...1)) |>
pivot_wider(names_from = hdr, values_from = ...1)
for (i in 1:nrow(data)) {
if (data$indent[i] > 0) data$group[i] <- data$group[i-1]
if (data$indent[i] > 1) data$sub_group[i] <- data$sub_group[i-1]
if (data$indent[i] > 2) data$sub_sub_group[i] <- data$sub_sub_group[i-1]
}
data <-
data |>
select(-c(id, indent, fg)) |>
set_names(c(as.character(excel[first(data$id)-1,]), intersect(nms, names(data)))[-1]) |>
filter(!is.na(Total))
if (all(names(data) %in% c("Total", nms))) {
data <- data |> select(-Total, n = Total) |> mutate(n = parse_number(n))
} else {
data <-
data |>
select(-Total) |>
type_convert(na = "-") |>
pivot_longer(where(is.numeric), names_to = "month", values_to = "n") |>
mutate(month = as.Date(as.numeric(month), origin = "1899-12-30"))
}
data
}
aor <- function() {
tmpf <- fs::file_temp(ext = "xlsx")
r <- session("https://ohss.dhs.gov/topics/immigration/refugees-and-asylees/asylum-cohort")
href <- r |> html_elements("a[href$='xlsx']") |> html_attr("href") |> first()
r |> session_jump_to(href, httr::write_disk(tmpf))
excel_sheets(tmpf) |>
set_names() |>
discard(\(x) x %in% c("TOC", "Family AMI Cases")) |>
map(\(s) parse_sheet(tmpf, s)) |>
list_rbind(names_to = "sheet") |>
select(sheet, !!nms, month, n) |>
mutate(across(where(is.character),
\(x) str_replace(x, "\\d+$", "")))
}
#* @get /aor.csv
#* @serializer csv
aor