-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathhashCheck.r
142 lines (129 loc) · 6.01 KB
/
hashCheck.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
##' Check for Changes in List of Objects
##'
##' Given an RDS file name and a list of objects, does the following:
##' * makes a vector of hashes, one for each object. Function objects are run through `deparse` so that the environment of the function will not be considered.
##' * see if the file exists; if not, return a list with result=NULL, `hash` = new vector of hashes, `changed='All'`
##' * if the file exists, read the file and its hash attribute as `prevhash`
##' * if `prevhash` is not identical to hash:
##' if `.print.=TRUE` (default), print to console a summary of what's changed
##' return a list with result=NULL, `hash` = new hash vector, changed
##' * if `prevhash = hash`, return a list with result=file object, `hash`=new hash, changed=''
##'
##' Set `options(debughash=TRUE)` to trace results in `/tmp/debughash.txt`
##' @title hashCheck
##' @param ... a list of objects including data frames, vectors, functions, and all other types of R objects that represent dependencies of a certain calculation
##' @param file name of file in which results are stored
##' @param .print. set to `FALSE` to suppress printing information messages about what has changed
##' @param .names. vector of names of original arguments if not calling `hashCheck` directly
##' @return
##' @author Frank Harrell
##' @md
hashCheck <- function(..., file, .print.=TRUE, .names.=NULL) {
.d. <- list(...)
.nam. <- if(length(.names.)) .names. else as.character(sys.call())[-1]
.nam. <- .nam.[1 : length(.d.)]
names(.d.) <- .nam.
.debug. <- length(.Options$debughash) && .Options$debughash
ct <- if(.debug.)
function(...) cat(..., '\n', file='/tmp/debughash.txt', append=TRUE)
else
function(...) {}
ct(.nam.)
.g. <- function(x) digest::digest(if(is.function(x)) deparse(x) else x)
.hash. <- sapply(.d., .g.)
if(.debug.) prn(.hash., fi='/tmp/debughash.txt')
.prevhash. <- NULL
if(! file.exists(file)) {
ct('no file', file)
return(list(result=NULL, hash=.hash., changed='All'))
}
R <- readRDS(file)
.prevhash. <- attr(R, 'hash')
if(! length(.prevhash.)) {
if(.print.) cat('\nRe-run because of no previous hash\n\n')
ct('no previous hash')
return(list(result=NULL, hash=.hash., changed='No previous hash'))
}
samelen <- length(.hash.) == length(.prevhash.)
if(samelen && all(.hash. == .prevhash.)) {
ct('no change')
return(list(result=R, hash=.hash., changed=''))
}
.s. <- character(0)
if(! samelen) {
.a. <- names(.prevhash.)
.b. <- names(.hash.)
.w. <- setdiff(.a., .b.)
if(length(.w.))
.s. <- c(.s., paste('objects removed:',
paste(.w., collapse=' ')))
.w. <- setdiff(.b., .a.)
if(length(.w.))
.s. <- c(.s., paste('objects added:',
paste(.w., collapse=' ')))
} else
.s. <- c(.s., paste('changes in the following objects:',
paste(.nam.[.hash. != .prevhash.], collapse=' ')))
.s. <- paste(.s., collapse=';')
ct(.s.)
if(.print.) cat('\nRe-run because of', .s., '\n\n')
list(result=NULL, hash=.hash., changed=.s.)
}
##' Re-run Code if an Input Changed
##'
##' Uses `hashCheck` to run a function and save the results if specified inputs have changed, otherwise to retrieve results from a file. This makes it easy to see if any objects changed that require re-running a long simulation, and reports on any changes. The file name is taken as the chunk name appended with `.rds` unless it is given as `file=`. `fun` has no arguments. Set `.inclfun.=FALSE` to not include `fun` in the hash check (for legacy uses). The typical workflow is as follows.
##' ```
##' f <- function( ) {
##' # . . . do the real work with multiple function calls ...
##' }
##' seed <- 3
##' set.seed(seed)
##' w <- runifChanged(f, seed, obj1, obj2, ....)
##' ```
##' `seed, obj1, obj2`, ... are all the objects that `f()` uses that if changed
##' would give a different result of `f()`. This can include functions such as
##' those in a package, and `f` will be re-run if any of the function's code
##' changes. `f` is also re-run if the code inside `f` changes.
##' The result of `f` is stored with `saveRDS` by default in file named `xxx.rds`
##' where `xxx` is the label for the current chunk. To control this use instead
##' `file=xxx.rds` add the file argument to `runifChanged(...)`. If nothing has
##' changed and the file already exists, the file is read to create the result
##' object (e.g., `w` above). If `f()` needs to be run, the hashed input objects
##' are stored as attributes for the result then the enhanced result is written to the file.
##'
##' @title runifChanged
##' @param fun the (usually slow) function to run
##' @param ... input objects the result of running the function is dependent on
##' @param file file in which to store the result of `fun` augmented by attributes containing hash digests
##' @param .print. set to `TRUE` to list which objects changed that neessitated re-running `f`
##' @param .inclfun. set to `FALSE` to not include `fun` in the hash digest, i.e., to not require re-running `fun` if only `fun` itself has changed
##' @return
##' @author Frank Harrell
##' @md
runifChanged <- function(fun, ..., file=NULL, .print.=TRUE, .inclfun.=TRUE) {
if(! length(file)) {
file <- knitr::opts_current$get('label')
if(! length(file))
stop('attempt to run runifChanged without file= outside a knitr chunk')
file <- paste0(file, '.rds')
}
w <- list(...)
.names. <- (as.character(sys.call())[-1])[1 : (length(w) + 1)]
hashobj <- if(! .inclfun.) hashCheck(..., file=file,
.print.=.print., .names.=.names.[-1])
else {
w <- c(list(fun), w)
w$file <- file
w$.print. <- .print.
w$.names. <- .names.
do.call(hashCheck, w)
}
hash <- hashobj$hash
result <- hashobj$result
if(! length(result)) {
result <- fun()
attr(result, 'hash') <- hash
saveRDS(result, file, compress='xz')
}
result
}