From a35b5c3c8b3932538b20a6b92043bc1869c29bb9 Mon Sep 17 00:00:00 2001 From: MicheleNuijten Date: Wed, 31 Mar 2021 10:15:09 +0200 Subject: [PATCH] add additional html tags for math symbols & chi * add more html codes for <, =, >, -, and chi * add html test file with these alternative html tags * don't include this test file on git bc of copyright --- .gitignore | 1 + R/file-to-txt.R | 23 +++++++++++++++++++++++ tests/testthat/test-file-to-txt.R | 25 +++++-------------------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index 64aa92c..8436fa1 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ statcheck.Rproj # test materials # can't share because of copyright restrictions aloe* +chung* costa* mausbach* nuijten* diff --git a/R/file-to-txt.R b/R/file-to-txt.R index 8a7253a..77c1606 100644 --- a/R/file-to-txt.R +++ b/R/file-to-txt.R @@ -16,20 +16,43 @@ getHTML <- function(x){ strings <- lapply(strings, gsub, pattern = "<(.|\n)*?>", replacement = "") # Replace html codes: + # from: https://dev.w3.org/html5/html-author/charref strings <- lapply(strings, gsub, pattern = "<", replacement = "<", fixed = TRUE) strings <- lapply(strings, gsub, pattern = "<", replacement = "<", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "<", replacement = "<", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "<", replacement = "<", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "<", replacement = "<", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "=", replacement = "=", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "=", replacement = "=", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "=", replacement = "=", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = ">", replacement = ">", fixed = TRUE) strings <- lapply(strings, gsub, pattern = ">", replacement = ">", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = ">", replacement = ">", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = ">", replacement = ">", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "(", replacement = "(", fixed = TRUE) strings <- lapply(strings, gsub, pattern = ")", replacement = ")", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = " ", replacement = " ", fixed = TRUE) strings <- lapply(strings, gsub, pattern = " ", replacement = " ", fixed = TRUE) strings <- lapply(strings, gsub, pattern = "\n", replacement = "") strings <- lapply(strings, gsub, pattern = "\r", replacement = "") strings <- lapply(strings, gsub, pattern = "\\s+", replacement = " ") + strings <- lapply(strings, gsub, pattern = "−", replacement = "-", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "−", replacement = "-", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "−", replacement = "-", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "χ", replacement = "X", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "χ", replacement = "X", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "χ", replacement = "X", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "χ", replacement = "X", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "Χ", replacement = "X", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "Χ", replacement = "X", fixed = TRUE) + strings <- lapply(strings, gsub, pattern = "Χ", replacement = "X", fixed = TRUE) + return(strings) } diff --git a/tests/testthat/test-file-to-txt.R b/tests/testthat/test-file-to-txt.R index a15cb89..41d2794 100644 --- a/tests/testthat/test-file-to-txt.R +++ b/tests/testthat/test-file-to-txt.R @@ -50,12 +50,8 @@ test_that("statistics from all pdfs in a folder are correctly retrieved and pars test_that("statistics from a html are correctly retrieved and parsed", { html_file <- system.file("test_materials/nuijten.html", -<<<<<<< HEAD package = "statcheck") -======= - package = "statcheck") ->>>>>>> e07877171783278043ffecab55fe9f9d784f4ae4 - + result <- checkHTML(html_file, messages = FALSE) result_1tailed <- checkHTML(html_file, messages = FALSE, OneTailedTxt = TRUE) @@ -80,20 +76,9 @@ test_that("statistics from all htmls in a folder are correctly retrieved and par html_dir <- system.file("test_materials", package = "statcheck") result <- checkHTMLdir(html_dir, messages = FALSE, subdir = FALSE) - result_1tailed <- checkHTMLdir(html_dir, messages = FALSE, subdir = FALSE, OneTailedTxt = TRUE) - - # extract 6 tests from paper - expect_equal(nrow(result), 6) - expect_equal(as.character(result[[VAR_TYPE]]), c("t", "Chi2", "t", "F", "F", "t")) - expect_equal(result[[VAR_TEST_VALUE]], c(-4.93, 6.9, 2, 1.203, 12.03, 2)) - # check errors - expect_equal(result[[VAR_ERROR]], c(FALSE, FALSE, FALSE, TRUE, FALSE, TRUE)) - expect_equal(result[[VAR_DEC_ERROR]], c(FALSE, FALSE, FALSE, TRUE, FALSE, TRUE)) - - # check errors with one-tailed test detection - expect_equal(result_1tailed[[VAR_ERROR]], c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE)) - expect_equal(result_1tailed[[VAR_DEC_ERROR]], c(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE)) + # extract 6+33 tests from papers + expect_equal(nrow(result), 39) }) @@ -106,7 +91,7 @@ test_that("statistics from all pdfs and htmls in a folder are correctly retrieve result <- checkdir(dir, subdir = FALSE, messages = FALSE) - # extract 59 tests (6 from html and 53 from pdf) - expect_equal(nrow(result), 59) + # extract 92 tests (39 from html and 53 from pdf) + expect_equal(nrow(result), 92) })