Skip to content

Commit

Permalink
add additional html tags for math symbols & chi
Browse files Browse the repository at this point in the history
* add more html codes for <, =, >, -, and chi
* add html test file with these alternative html tags
* don't include this test file on git bc of copyright
  • Loading branch information
MicheleNuijten committed Mar 31, 2021
1 parent ba770b5 commit a35b5c3
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 20 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ statcheck.Rproj
# test materials
# can't share because of copyright restrictions
aloe*
chung*
costa*
mausbach*
nuijten*
Expand Down
23 changes: 23 additions & 0 deletions R/file-to-txt.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,43 @@ getHTML <- function(x){
strings <- lapply(strings, gsub, pattern = "<(.|\n)*?>", replacement = "")

# Replace html codes:
# from: https://dev.w3.org/html5/html-author/charref
strings <- lapply(strings, gsub, pattern = "&#60;", replacement = "<", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&lt;", replacement = "<", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&LT;", replacement = "<", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#x0003C;", replacement = "<", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#x0003c;", replacement = "<", fixed = TRUE)

strings <- lapply(strings, gsub, pattern = "&#61;", replacement = "=", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&equals;", replacement = "=", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#x0003D;", replacement = "=", fixed = TRUE)

strings <- lapply(strings, gsub, pattern = "&#62;", replacement = ">", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&gt;", replacement = ">", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&GT;", replacement = ">", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#x0003E;", replacement = ">", fixed = TRUE)

strings <- lapply(strings, gsub, pattern = "&#40;", replacement = "(", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#41;", replacement = ")", fixed = TRUE)

strings <- lapply(strings, gsub, pattern = "&thinsp;", replacement = " ", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&nbsp;", replacement = " ", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "\n", replacement = "")
strings <- lapply(strings, gsub, pattern = "\r", replacement = "")
strings <- lapply(strings, gsub, pattern = "\\s+", replacement = " ")

strings <- lapply(strings, gsub, pattern = "&minus;", replacement = "-", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#x02212;", replacement = "-", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#8722;", replacement = "-", fixed = TRUE)

strings <- lapply(strings, gsub, pattern = "&chi;", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#x003C7;", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#x003c7;", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#967;", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&Chi;", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#x003A7;", replacement = "X", fixed = TRUE)
strings <- lapply(strings, gsub, pattern = "&#935;", replacement = "X", fixed = TRUE)

return(strings)
}

Expand Down
25 changes: 5 additions & 20 deletions tests/testthat/test-file-to-txt.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,8 @@ test_that("statistics from all pdfs in a folder are correctly retrieved and pars
test_that("statistics from a html are correctly retrieved and parsed", {

html_file <- system.file("test_materials/nuijten.html",
<<<<<<< HEAD
package = "statcheck")
=======
package = "statcheck")
>>>>>>> e07877171783278043ffecab55fe9f9d784f4ae4


result <- checkHTML(html_file, messages = FALSE)
result_1tailed <- checkHTML(html_file, messages = FALSE, OneTailedTxt = TRUE)

Expand All @@ -80,20 +76,9 @@ test_that("statistics from all htmls in a folder are correctly retrieved and par
html_dir <- system.file("test_materials", package = "statcheck")

result <- checkHTMLdir(html_dir, messages = FALSE, subdir = FALSE)
result_1tailed <- checkHTMLdir(html_dir, messages = FALSE, subdir = FALSE, OneTailedTxt = TRUE)

# extract 6 tests from paper
expect_equal(nrow(result), 6)
expect_equal(as.character(result[[VAR_TYPE]]), c("t", "Chi2", "t", "F", "F", "t"))
expect_equal(result[[VAR_TEST_VALUE]], c(-4.93, 6.9, 2, 1.203, 12.03, 2))

# check errors
expect_equal(result[[VAR_ERROR]], c(FALSE, FALSE, FALSE, TRUE, FALSE, TRUE))
expect_equal(result[[VAR_DEC_ERROR]], c(FALSE, FALSE, FALSE, TRUE, FALSE, TRUE))

# check errors with one-tailed test detection
expect_equal(result_1tailed[[VAR_ERROR]], c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE))
expect_equal(result_1tailed[[VAR_DEC_ERROR]], c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE))
# extract 6+33 tests from papers
expect_equal(nrow(result), 39)

})

Expand All @@ -106,7 +91,7 @@ test_that("statistics from all pdfs and htmls in a folder are correctly retrieve

result <- checkdir(dir, subdir = FALSE, messages = FALSE)

# extract 59 tests (6 from html and 53 from pdf)
expect_equal(nrow(result), 59)
# extract 92 tests (39 from html and 53 from pdf)
expect_equal(nrow(result), 92)
})

0 comments on commit a35b5c3

Please sign in to comment.