forked from bmkramer/AJOL_OpenAlex
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path00_AJOL_journals.R
59 lines (42 loc) · 1.59 KB
/
00_AJOL_journals.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#This script queries the OpenAlex and Crossref APIs to check for presence of AJOL journals in OpenAlex
#More information:
#AJOL: https://www.ajol.info/index.php/ajol
#OpenAlex API: https://docs.openalex.org/api
#rcrossref package: https://cran.r-project.org/web/packages/rcrossref/rcrossref.pdf
#STEP 0 - collect ISSNs through web scraping
#load packages
library(tidyverse)
library(rvest)
source("R/query_AJOL.R")
#set date to date of sampling
date <- Sys.Date()
date <- "2022-02-13"
#set path, create directory
path <- file.path("data",date)
dir.create(path)
#set url for journal list on AJOL website
url <- "https://www.ajol.info/index.php/ajol/browseBy/alpha?letter=all"
#collect urls for all AJOL journals
journal_urls <- getJournalData(url)
#n=545 journals
#set counter for progress bar
pb <- progress_estimated(length(journal_urls))
#collect title and issns for each journal url
data_raw <- map_dfr(journal_urls, getURLData_progress)
data <- extractData(data_raw)
rm(pb)
filename <- paste0("AJOL_journals_",date,".csv")
filepath <- file.path(path, filename)
write_csv(data, filepath)
#data <- read_csv(filepath)
rm(data_raw)
#Transform data into long format with unique ISSN list
#keep selected columns, transform into long data with one issn column
data_issn <- transformISSN(data)
#NB This leaves out 13 titles with no ISSN (532 of 545 records)
# 667 journal-issn pairs,of which 665 unique issns
# 2 journals (SAFP and SAJCN) have the same issn/eissn
filename <- paste0("AJOL_issns_",date,".csv")
filepath <- file.path(path, filename)
write_csv(data_issn, filepath)
#data_issn <- read_csv(filepath)