forked from gesistsa/rio
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
isfiletext() checks whether a file is text or binary in a cross platf…
…orm matter, completing ticket gesistsa#236. This can be useful when a file extension is missing or ambiguous
- Loading branch information
Showing
4 changed files
with
151 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#' Determine whether a file is "plain-text" or some sort of binary format | ||
#' | ||
#' @param filename Path to the file | ||
#' @param maxsize Maximum number of bytes to read | ||
#' @param textbytes Which characters are used by normal (though not necessarily | ||
#' just ASCII) text. To detect just ASCII, the following value | ||
#' can be used: `as.raw(c(7:16,18,19,32:127))` | ||
#' @param tf If `TRUE` (default) simply return `TRUE` when `filename` | ||
#' references a text-only file and `FALSE` otherwise. If set to | ||
#' `FALSE` then returns the "non text" bytes found in the file. | ||
#' | ||
#' @return boolean or raw | ||
#' @export | ||
#' @examples | ||
#' library(datasets) | ||
#' export(iris,"iris.yml") | ||
#' isfiletext("iris.yml") | ||
#' ## TRUE | ||
#' | ||
#' export(iris,"iris.sav") | ||
#' isfiletext("iris.sav") | ||
#' ## FALSE | ||
#' isfiletext("iris.sav", tf=FALSE) | ||
#' ## These are the characters found in "iris.sav" that are not printable text | ||
#' ## 02 00 05 03 06 04 01 14 15 11 17 16 1c 19 1b 1a 18 1e 1d 1f | ||
isfiletext <- function(filename,maxsize=Inf, | ||
textbytes=as.raw(c(0x7:0x10,0x12,0x13,0x20:0xFF)), | ||
tf=TRUE){ | ||
bytes <- readBin(ff<-file(filename,'rb'),raw(),n=min(file.info(filename)$size, | ||
maxsize)); | ||
close(ff); | ||
nontextbytes <- setdiff(bytes,textbytes); | ||
if(tf) return(length(nontextbytes)==0) else return(nontextbytes); | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
context("correctly identifying files as text vs binary") | ||
require("datasets") | ||
|
||
txtformats <- c("arff","csv","csv2","dump","fwf","psv","r","tsv","txt","fwf") | ||
binformats <- c("dbf","dta","rda","rdata","rds","sas7bdat","sav","xlsx","xpt") | ||
names(iris) <- gsub("\\.","_",names(iris)) | ||
|
||
test_that("Required text formats recognized as text", { | ||
for(xx in txtformats) expect_true(isfiletext(export(iris, | ||
paste0("iris.",xx))), | ||
label = paste0(xx," should be text")) | ||
}) | ||
|
||
test_that("Required non-text formats recognized as non-text", { | ||
for(xx in binformats) expect_false(isfiletext(export(iris, | ||
paste0("iris.",xx))), | ||
label = paste0(xx," should be text")) | ||
}) | ||
|
||
test_that("csvy recognized as text", { | ||
skip_if_not_installed(pkg="csvy") | ||
expect_true(isfiletext(export(iris,'iris.csvy'))) | ||
}) | ||
|
||
test_that("xml and html recognized as text", { | ||
skip_if_not_installed(pkg="xml2") | ||
expect_true(isfiletext(export(iris,'iris.xml'))) | ||
expect_true(isfiletext(export(iris,'iris.html'))) | ||
}) | ||
|
||
test_that("json recognized as text", { | ||
skip_if_not_installed(pkg="jsonlite") | ||
expect_true(isfiletext(export(iris,'iris.json'))) | ||
}) | ||
|
||
test_that("yml recognized as text", { | ||
skip_if_not_installed(pkg="yaml") | ||
expect_true(isfiletext(export(iris,'iris.yml'))) | ||
}) | ||
|
||
test_that("pzfx recognized as text", { | ||
skip_if_not_installed(pkg="pzfx") | ||
expect_true(isfiletext(export(iris[,-5],"iris.pzfx"))) | ||
}) | ||
|
||
# binformats_suggest <- c("matlab","ods","fst","feather") | ||
test_that("matlab recognized as binary", { | ||
skip_if_not_installed(pkg="rmatio") | ||
expect_false(isfiletext(export(iris,'iris.matlab'))) | ||
}) | ||
|
||
test_that("ods recognized as binary", { | ||
skip_if_not_installed(pkg="readODS") | ||
expect_false(isfiletext(export(iris,'iris.ods'))) | ||
}) | ||
|
||
test_that("fst recognized as binary", { | ||
skip_if_not_installed(pkg="fst") | ||
expect_false(isfiletext(export(iris,'iris.fst'))) | ||
}) | ||
|
||
test_that("feather recognized as binary", { | ||
skip_if_not_installed(pkg="feather") | ||
expect_false(isfiletext(export(iris,'iris.feather'))) | ||
}) | ||
|
||
unlink(paste0('iris.',c(txtformats,binformats,'csvy','xml','html','json', | ||
'yml','pzfx','matlab','ods', | ||
'fst','feather'))) | ||
rm(iris,txtformats,binformats) | ||
|