-
Notifications
You must be signed in to change notification settings - Fork 76
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
is_file_text() checks whether a file is text or binary… (#239)
* isfiletext() checks whether a file is text or binary in a cross platform matter, completing ticket #236. This can be useful when a file extension is missing or ambiguous * Minor tweak-- removed redundant 'fwf' entry from 'txtformats' in the test for isfiletext() * Incorporating feedback from #239#pullrequestreview-335144814 and merging in changes to master * Incorporating review feedback
- Loading branch information
Showing
5 changed files
with
143 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#' @title Determine whether a file is "plain-text" or some sort of binary format | ||
#' | ||
#' | ||
#' @param file Path to the file | ||
#' @param maxsize Maximum number of bytes to read | ||
#' @param text_bytes Which characters are used by normal text (though not | ||
#' necessarily just ASCII). To detect just ASCII, the | ||
#' following value can be used: | ||
#' \code{as.raw(c(7:16, 18, 19, 32:127))} | ||
#' | ||
#' @return A logical | ||
#' @export | ||
#' @examples | ||
#' library(datasets) | ||
#' export(iris, "iris.yml") | ||
#' is_file_text("iris.yml") | ||
#' ## TRUE | ||
#' | ||
#' export(iris, "iris.sav") | ||
#' is_file_text("iris.sav") | ||
#' ## FALSE | ||
#' | ||
is_file_text <- function(file, maxsize = Inf, | ||
text_bytes = as.raw(c(0x7:0x10, 0x12, 0x13, 0x20:0xFF))) { | ||
|
||
bytes <- readBin(ff <- file(file, "rb"), raw(), | ||
n = min(file.info(file)$size, maxsize)) | ||
close(ff) | ||
|
||
return(length(setdiff(bytes, text_bytes)) == 0) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
context("correctly identifying files as text vs binary") | ||
require("datasets") | ||
|
||
txtformats <- c("arff", "csv", "csv2", "dump", "fwf", "psv", "r", "tsv", "txt") | ||
binformats <- c("dbf", "dta", "rda", "rdata", "rds", "sas7bdat", "sav", "xlsx", | ||
"xpt") | ||
names(iris) <- gsub("\\.", "_", names(iris)) | ||
|
||
test_that("Required text formats recognized as text", { | ||
for (xx in txtformats) { | ||
expect_true(is_file_text(export(iris, paste0("iris.", xx))), | ||
label = paste0(xx, " should be text")) | ||
} | ||
}) | ||
|
||
test_that("Required non-text formats recognized as non-text", { | ||
for (xx in binformats) { | ||
expect_false(is_file_text(export(iris, paste0("iris.", xx))), | ||
label = paste0(xx, " should not be text")) | ||
} | ||
}) | ||
|
||
test_that("csvy recognized as text", { | ||
skip_if_not_installed(pkg = "csvy") | ||
expect_true(is_file_text(export(iris, "iris.csvy"))) | ||
}) | ||
|
||
test_that("xml and html recognized as text", { | ||
skip_if_not_installed(pkg = "xml2") | ||
expect_true(is_file_text(export(iris, "iris.xml"))) | ||
expect_true(is_file_text(export(iris, "iris.html"))) | ||
}) | ||
|
||
test_that("json recognized as text", { | ||
skip_if_not_installed(pkg = "jsonlite") | ||
expect_true(is_file_text(export(iris, "iris.json"))) | ||
}) | ||
|
||
test_that("yml recognized as text", { | ||
skip_if_not_installed(pkg = "yaml") | ||
expect_true(is_file_text(export(iris, "iris.yml"))) | ||
}) | ||
|
||
test_that("pzfx recognized as text", { | ||
skip_if_not_installed(pkg = "pzfx") | ||
expect_true(is_file_text(export(iris[,-5], "iris.pzfx"))) | ||
}) | ||
|
||
test_that("matlab recognized as binary", { | ||
skip_if_not_installed(pkg = "rmatio") | ||
expect_false(is_file_text(export(iris, "iris.matlab"))) | ||
}) | ||
|
||
test_that("ods recognized as binary", { | ||
skip_if_not_installed(pkg = "readODS") | ||
expect_false(is_file_text(export(iris, "iris.ods"))) | ||
}) | ||
|
||
test_that("fst recognized as binary", { | ||
skip_if_not_installed(pkg = "fst") | ||
expect_false(is_file_text(export(iris, "iris.fst"))) | ||
}) | ||
|
||
test_that("feather recognized as binary", { | ||
skip_if_not_installed(pkg = "feather") | ||
expect_false(is_file_text(export(iris, "iris.feather"))) | ||
}) | ||
|
||
unlink(paste0("iris.", c(txtformats, binformats, "csvy", "xml", "html", "json", | ||
"yml", "pzfx", "matlab", "ods", "fst", "feather"))) | ||
rm(iris, txtformats, binformats) | ||
|