Skip to content

Commit

Permalink
isfiletext() checks whether a file is text or binary in a cross platf…
Browse files Browse the repository at this point in the history
…orm matter, completing ticket gesistsa#236. This can be useful when a file extension is missing or ambiguous
  • Loading branch information
bokov committed Oct 8, 2019
1 parent f3b3c2d commit 214a07f
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ export(get_ext)
export(import)
export(import_list)
export(install_formats)
export(isfiletext)
export(spread_attrs)
importFrom(curl,curl_fetch_memory)
importFrom(curl,parse_headers)
Expand Down
34 changes: 34 additions & 0 deletions R/isfiletext.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#' Determine whether a file is "plain-text" or some sort of binary format
#'
#' @param filename Path to the file
#' @param maxsize Maximum number of bytes to read
#' @param textbytes Which characters are used by normal (though not necessarily
#' just ASCII) text. To detect just ASCII, the following value
#' can be used: `as.raw(c(7:16,18,19,32:127))`
#' @param tf If `TRUE` (default) simply return `TRUE` when `filename`
#' references a text-only file and `FALSE` otherwise. If set to
#' `FALSE` then returns the "non text" bytes found in the file.
#'
#' @return boolean or raw
#' @export
#' @examples
#' library(datasets)
#' export(iris,"iris.yml")
#' isfiletext("iris.yml")
#' ## TRUE
#'
#' export(iris,"iris.sav")
#' isfiletext("iris.sav")
#' ## FALSE
#' isfiletext("iris.sav", tf=FALSE)
#' ## These are the characters found in "iris.sav" that are not printable text
#' ## 02 00 05 03 06 04 01 14 15 11 17 16 1c 19 1b 1a 18 1e 1d 1f
isfiletext <- function(filename,maxsize=Inf,
textbytes=as.raw(c(0x7:0x10,0x12,0x13,0x20:0xFF)),
tf=TRUE){
bytes <- readBin(ff<-file(filename,'rb'),raw(),n=min(file.info(filename)$size,
maxsize));
close(ff);
nontextbytes <- setdiff(bytes,textbytes);
if(tf) return(length(nontextbytes)==0) else return(nontextbytes);
}
45 changes: 45 additions & 0 deletions man/isfiletext.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

71 changes: 71 additions & 0 deletions tests/testthat/test_isfiletext.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
context("correctly identifying files as text vs binary")
require("datasets")

txtformats <- c("arff","csv","csv2","dump","fwf","psv","r","tsv","txt","fwf")
binformats <- c("dbf","dta","rda","rdata","rds","sas7bdat","sav","xlsx","xpt")
names(iris) <- gsub("\\.","_",names(iris))

test_that("Required text formats recognized as text", {
for(xx in txtformats) expect_true(isfiletext(export(iris,
paste0("iris.",xx))),
label = paste0(xx," should be text"))
})

test_that("Required non-text formats recognized as non-text", {
for(xx in binformats) expect_false(isfiletext(export(iris,
paste0("iris.",xx))),
label = paste0(xx," should be text"))
})

test_that("csvy recognized as text", {
skip_if_not_installed(pkg="csvy")
expect_true(isfiletext(export(iris,'iris.csvy')))
})

test_that("xml and html recognized as text", {
skip_if_not_installed(pkg="xml2")
expect_true(isfiletext(export(iris,'iris.xml')))
expect_true(isfiletext(export(iris,'iris.html')))
})

test_that("json recognized as text", {
skip_if_not_installed(pkg="jsonlite")
expect_true(isfiletext(export(iris,'iris.json')))
})

test_that("yml recognized as text", {
skip_if_not_installed(pkg="yaml")
expect_true(isfiletext(export(iris,'iris.yml')))
})

test_that("pzfx recognized as text", {
skip_if_not_installed(pkg="pzfx")
expect_true(isfiletext(export(iris[,-5],"iris.pzfx")))
})

# binformats_suggest <- c("matlab","ods","fst","feather")
test_that("matlab recognized as binary", {
skip_if_not_installed(pkg="rmatio")
expect_false(isfiletext(export(iris,'iris.matlab')))
})

test_that("ods recognized as binary", {
skip_if_not_installed(pkg="readODS")
expect_false(isfiletext(export(iris,'iris.ods')))
})

test_that("fst recognized as binary", {
skip_if_not_installed(pkg="fst")
expect_false(isfiletext(export(iris,'iris.fst')))
})

test_that("feather recognized as binary", {
skip_if_not_installed(pkg="feather")
expect_false(isfiletext(export(iris,'iris.feather')))
})

unlink(paste0('iris.',c(txtformats,binformats,'csvy','xml','html','json',
'yml','pzfx','matlab','ods',
'fst','feather')))
rm(iris,txtformats,binformats)

0 comments on commit 214a07f

Please sign in to comment.