-
Notifications
You must be signed in to change notification settings - Fork 3
/
cleanup-data.R
31 lines (27 loc) · 958 Bytes
/
cleanup-data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
### clean up covid-data/data folder
# goal: only have 1 file per day per metric, keeping the latest we created
library(dplyr)
library(tidyr)
folders <- list.dirs(here::here('data'))
for(f in folders){
files <- setdiff(list.files(f), list.dirs(f, recursive = FALSE, full.names = FALSE))
reviewfiles <- files[grepl('^\\d{12}', files)]
if(length(reviewfiles)>0){
a <- tibble(filename = reviewfiles) %>%
separate(filename,into = c('date', 'group'),
sep='_', extra = 'merge',remove = FALSE) %>%
separate(date,into = c('date', 'time'),
sep=8) %>%
group_by(group, date) %>%
arrange(desc(time)) %>%
mutate(linenbr = seq_along(time),
keep = linenbr==1)
to_delete <- a %>% filter(!keep)
if(nrow(to_delete)>0){
files_to_delete <- file.path(f, to_delete$filename)
unlink(files_to_delete)
}
rm(a, to_delete, files_to_delete)
}
rm(files, reviewfiles)
}