-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathReferences.R
37 lines (31 loc) · 933 Bytes
/
References.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
library(quanteda)
library(data.table)
library(dplyr)
library('stringr')
library(boot)
list_doc<- list.files(path = "txt_papers/")
i=1
names<-seq(1:159)
text<-seq(1:159)
references<-seq(1:159)
#Read the documents
for (element in list_doc) {
names[i]<-element
file<- paste("txt_papers/", element, sep="")
text[i]<-readChar(file, file.info(file)$size)
#Count the number of references taking into account the regex (YEAR).
refText<-gsub(".*\nReferences\n","",ignore.case=TRUE,text[i])
pat<-"\\(\\d{4}\\)\\."
references[i]<-length(regmatches(refText, gregexpr(pat, refText, perl=TRUE))[[1]])
if (references[i]==0){
pat<-"\\d{4}\\;"
references[i]<-length(regmatches(refText, gregexpr(pat, refText, perl=TRUE))[[1]])
}
i=i+1
}
numfig<-function(text){
refText<-gsub(".*\n\n","",ignore.case=TRUE,text)
}
textdf<-as.data.frame(names)
textdf$references<-references
write.csv(file="references.csv", textdf)