forked from nstrayer/cv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parsing_functions.R
89 lines (82 loc) · 2.49 KB
/
parsing_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Regex to locate links in text
find_link <- regex("
\\[ # Grab opening square bracket
.+? # Find smallest internal text as possible
\\] # Closing square bracket
\\( # Opening parenthesis
.+? # Link text, again as small as possible
\\) # Closing parenthesis
",
comments = TRUE)
# Function that removes links from text and replaces them with superscripts that are
# referenced in an end-of-document list.
sanitize_links <- function(text){
if(PDF_EXPORT){
str_extract_all(text, find_link) %>%
pluck(1) %>%
walk(function(link_from_text){
title <- link_from_text %>% str_extract('\\[.+\\]') %>% str_remove_all('\\[|\\]')
link <- link_from_text %>% str_extract('\\(.+\\)') %>% str_remove_all('\\(|\\)')
# add link to links array
links <<- c(links, link)
# Build replacement text
new_text <- glue('{title}<sup>{length(links)}</sup>')
# Replace text
text <<- text %>% str_replace(fixed(link_from_text), new_text)
})
}
text
}
# Take entire positions dataframe and removes the links
# in descending order so links for the same position are
# right next to eachother in number.
strip_links_from_cols <- function(data, cols_to_strip){
for(i in 1:nrow(data)){
for(col in cols_to_strip){
data[i, col] <- sanitize_links(data[i, col])
}
}
data
}
# Take a position dataframe and the section id desired
# and prints the section to markdown.
print_section <- function(position_data, section_id){
position_data %>%
filter(section == section_id) %>%
arrange(desc(end)) %>%
mutate(id = 1:n()) %>%
pivot_longer(
starts_with('description'),
names_to = 'description_num',
values_to = 'description',
values_drop_na = TRUE
) %>%
group_by(id) %>%
mutate(
descriptions = list(description)
) %>%
ungroup() %>%
filter(description_num == 'description_1') %>%
mutate(
timeline = ifelse(
is.na(start) | start == end,
end,
glue('{end} - {start}')
),
description_bullets = map_chr(descriptions, ~paste('-', ., collapse = '\n')),
) %>%
strip_links_from_cols(c('title', 'description_bullets')) %>%
mutate_all(~ifelse(is.na(.), 'N/A', .)) %>%
glue_data(
"### {title}",
"\n\n",
"{loc}",
"\n\n",
"{institution}",
"\n\n",
"{timeline}",
"\n\n",
"{description_bullets}",
"\n\n\n",
)
}