-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpeeding up code in R.R
452 lines (356 loc) · 12 KB
/
Speeding up code in R.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
#############################################################################################
### Load the necessary libraries for the class
require("microbenchmark")
require("compiler")
require("parallel")
require("doParallel")
require("foreach")
require("snow")
require("snowfall")
# Test the package microbenchmark
x <- runif(100)
sqrt(x)
x ^ 0.5
system.time(sqrt(x))
system.time(x^0.5)
microbenchmark(sqrt(x), x ^ 0.5)
#############################################################################################
### Extreme dynamism example
f <- function(x) NULL
s3 <- function(x) UseMethod("s3")
s3.integer <- f
A <- setClass("A", representation(a = "list"))
setGeneric("s4", function(x) standardGeneric("s4"))
setMethod(s4, "A", f)
B <- setRefClass("B", methods = list(rc = f))
a <- A()
b <- B$new()
# If this returns an error about negative time, just remove fun()=f from the call
microbenchmark(
fun = f(),
S3 = s3(1L),
S4 = s4(a),
RC = b$rc()
)
##############################################################################################
### Name lookup with mutable environments
# Example 1: Each time we print a, it comes from a different environment
a <- 1
f <- function() {
g <- function() {
print(a)
assign("a", 2, envir = parent.frame())
print(a)
a <- 3
print(a)
}
g()
}
f()
# Example 2: This illustrates the cost of looking for something through multiple environments
# This example comes from the Wickham book, but I cannot reproduce it. It might be that this is
# now improved in R 3.0.3 or something.
f <- function(x, y) {
(x + y) ^ 2
}
random_env <- function(parent = globalenv()) {
letter_list <- setNames(as.list(runif(26)), LETTERS)
list2env(letter_list, envir = new.env(parent = parent))
}
set_env <- function(f, e) {
environment(f) <- e
f
}
f2 <- set_env(f, random_env())
f3 <- set_env(f, random_env(environment(f2)))
f4 <- set_env(f, random_env(environment(f3)))
microbenchmark(
f(1, 2),
f2(1, 2),
f3(1, 2),
f4(1, 2),
times = 10000
)
##############################################################################################
### Lazy evaluation overhead
# As above, I cannot reproduce this example. Computers are much faster now than they were
# during Wickham's time.
f0 <- function() NULL
f1 <- function(a = 1) NULL
f2 <- function(a = 1, b = 1) NULL
f3 <- function(a = 1, b = 2, c = 3) NULL
f4 <- function(a = 1, b = 2, c = 4, d = 4) NULL
f5 <- function(a = 1, b = 2, c = 4, d = 4, e = 5) NULL
microbenchmark(f0(), f1(), f2(), f3(), f4(), f5(), times = 1000)
##############################################################################################
### R implementation example
# All these methods should be as fast as each other, but some are clearly faster than others
head(mtcars)
microbenchmark(
mtcars[32, 11],
mtcars[[c(11, 32)]],
mtcars[[11]][32],
mtcars$carb[32],
.subset2(mtcars, 11)[32]
)
# Another example showing two known slow functions: ifelse and pmin/pmax
squish_ife <- function(x, a, b) {
ifelse(x <= a, a, ifelse(x >= b, b, x))
}
squish_p <- function(x, a, b) {
pmax(pmin(x, b), a)
}
squish_in_place <- function(x, a, b) {
x[x <= a] <- a
x[x >= b] <- b
x
}
x <- runif(100, -1.5, 1.5)
microbenchmark(
squish_ife(x, -1, 1),
squish_p(x, -1, 1),
squish_in_place(x, -1, 1)
)
# EXERCISES:
# Compare the performance of extracting a column from a matrix, a data frame and a list
# How about a row? How do you think this could be improved?
# Compare the performance of the Squish functions for different sizes of x.
###########################################################################################
### Profiling with Rprof
# Functions which form a matrix of powers of the vector x, through degree dg
# powers1() grows objects (more on this later)
powers1 <- function(x,dg){
pw <- matrix(x,nrow=length(x))
prod <- x # current product
for(i in 2:dg){
prod <- prod * x
pw <- cbind(pw, prod)
}
return(pw)
}
# powers2() does the same thing, but declares the objects in advance
powers2 <- function(x,dg){
pw <- matrix(nrow=length(x), ncol=dg)
prod <- x # current product
pw[,1] <- prod
for(i in 2:dg){
prod <- prod * x
pw[,i] <- prod
}
return(pw)
}
# Compare the two with microbenchmark
x <- runif(1e4)
microbenchmark(powers1(x,40),
powers2(x,40))
# Use Rprof() to find bottlenecks
x <- runif(1e6)
Rprof()
invisible(powers1(x,30))
Rprof(NULL)
summaryRprof()
Rprof()
invisible(powers2(x,30))
Rprof(NULL)
summaryRprof()
# EXERCISE (optional):
# Write the 'powers' function using outer() and cumprod(). How do these vectorized functions
# compare with the options above?
###########################################################################################
### Vectorizing
# A trivial example
for_sum <- function(x){
y <- numeric()
for(i in 1:length(x))
y <- y + x[i]
y
}
x <- runif(1000)
microbenchmark(for_sum(x), sum(x))
# An easy example using if and ifelse
for_if <- function(x){
y <- numeric(length(x))
for(i in 1:length(x))
if(x[i] < 0) y[i] <- 0
y
}
x <- runif(10000)
microbenchmark(for_if(x), ifelse(x<0, 0, x))
# Apply is not really much faster than a for loop, and much slower than a truly vectorized function
for_fun <- function(x){
y <- numeric(dim(x)[1])
for(i in 1:dim(x)[1])
y[i] <- sum(x[,1])
y
}
apply_fun <- function(x){
y <- apply(x,2,sum)
y
}
x <- matrix(runif(500^2),ncol=500)
microbenchmark(for_fun(x), apply_fun(x), colSums(x))
# EXERCISES:
# How do these methods compare as the size of x changes?
# What about other functions from the apply family?
#################################################
# Over-vectorizing
# Higher-order functions for functional programming can be great when developing packages,
# but they do not improve time, and can even be slower
sum_fun <- function() {
s <- x[1] + x[2]
for (i in 3:length(x)) s <- s + x[i]
s
}
red_fun <- function(a,b) a + b
microbenchmark(sum_fun(),Reduce(red_fun,x),cumsum(x))
###########################################################################################
### Do less work
# Giving a function more information can make a large difference. For example, giving
# factor() the levels in advance.
letter_stuff <- sample(letters, size=1e5, replace=TRUE)
factor_a <- function(x) factor(x)
factor_b <- function(x) factor(x, levels=letters)
microbenchmark(factor_a(letter_stuff), factor_b(letter_stuff))
# Avoiding method dispatch can save a lot of time, but it is dangerous. If you give it bad input
# it won't know what to do and can fail in spectacular ways
quickdf <- function(l) {
class(l) <- "data.frame"
attr(l, "row.names") <- .set_row_names(length(l[[1]]))
l
}
l <- lapply(1:26, function(i) runif(1e3))
names(l) <- letters
microbenchmark(quickdf(l), as.data.frame.list(l), as.data.frame(l))
quickdf(list(x = 1, y = 1:2)) # This is going to fail
###########################################################################################
### Memory, growing objects
# Growing objects is very slow.
grow_obj <- function(x){
y <- numeric()
for(i in 1:x) y <- c(y,i)
y
}
subscript_obj <- function(x){
y <- numeric(x)
for(i in 1:x) y[i] <- i
y
}
colon_obj <- function(x){
y <- 1:x
y
}
microbenchmark(grow_obj(50), grow_obj(500), grow_obj(5000),
subscript_obj(50), subscript_obj(500), subscript_obj(5000),
colon_obj(50), colon_obj(500), colon_obj(5000)
)
###########################################################################################
### Byte code compilation
x <- runif(1e4)
y <- runif(1e4)
z <- numeric(1e4)
sum_f <- function() for(i in 1:length(x)) z[i] <<- x[i] + y[i]
csum_f <- cmpfun(sum_f)
microbenchmark(x+y,sum_f(),csum_f())
###########################################################################################
### Parallelization
# A matrix inversion example for parallelizing
x <- 1700
mat <- matrix(runif(x^2),ncol=x)
invert_mat <- function(mat){
y <- solve(mat)
y
}
# Check more or less how long does a single instance of invert_mat takes
system.time(invert_mat(mat))
#==================================================
#Example of multiple matrix inversions
# Doing matrix inversion for multiple
x <- 1700 #Dimensions of matrix to create and invert
n <- 10 #Number of times to create matrix and do matrix inversion
series <- 1:n
###############################################
# USING A LOOP - Slow (sad face)
loop_output <- list(n)
time_loop <- FALSE
time_loop <- system.time({
for(i in series){
mat <- matrix(runif(x^2), ncol=x)
loop_output[[i]] <- invert_mat(mat)
}
})
###############################################
#USING foreach IN foreach LIBRARY
# foreach() replaces a for loop directly,
# this simplifies things as you don't need to write a wrapper funciton.
time_foreach <- FALSE
time_foreach <- system.time({
foreach_func <- function(i, r) {
mat <- matrix(runif(r^2), ncol=r)
temp_output <- invert_mat(mat)
return(temp_output)
}
# Registering the doParallel backend
cl <- makeCluster(4)
registerDoParallel(cl)
# Call foreach
# Note that i and r have the same length
foreach_output <- foreach(i=series, r=rep(x,x)) %dopar%
foreach_func(i, r)
# Stop the cluster
stopCluster(cl=NULL)
# This step is VERY IMPORTANT. According to the internet, not stopping a cluster
# could harm your computer.
})
################################################
#USING sfLapply IN snowfall LIBRARY
time_snowfall <- FALSE
time_snowfall <- system.time({
r <- x # snowfall gets angry if you pass it something called 'x'
snowfall_func <- function(i, r=r) {
mat <- matrix(runif(r^2),ncol=r)
inv_mat <- invert_mat(mat)
return(inv_mat)
}
sfInit(parallel=TRUE, cpus=4, type='SOCK')
# After initializing the snowfall instance you must import all data objects
# and functions needed
# If you want to import everything in the current session, you can use:
# sfExportAll() # This can be very slow
# Otherwise, you can import only the invert_mat() function specified above
sfExport('invert_mat')
snow_output <- sfLapply(series, fun=snowfall_func, r=r)
#output_snowfall <- unlist(rbind(output))
sfStop()
})
###############################################
# USING mclapply IN parallel LIBRARY
# IMPORTANT: This won't do much if you're on a Windows machine!
?mclapply
time_mclapply <- FALSE
time_mclapply <- system.time({
#For mclapply you must create a wrapper function
# which is executed at each stage in the loop above
# i.e. the wrapper function is evaluated for each value of "series"
mclapply.func <- function(i, x=x) {
mat <- matrix(runif(x^2),ncol=x)
inv.mat <- invert_mat(mat)
return(inv.mat)
}
#Use the detectCores function in "parallel" library to determine number of cores on your computer
n.cores <- detectCores()
print(paste("### CONGRATULATIONS YOU HAVE:", n.cores, "CORES ON YOUR COMPUTER"))
output <- mclapply(series, FUN=mclapply.func, x=x, mc.cores=n.cores)
#Output is in list form... therefore we need to reconstruct it back into a 3d-array
mclapply_output <- array(data=unlist(output), dim=c(x,x,n))
#Alternatively the the list output can be reconstructed into
#mclapply.output.2 <- unlist(rbind(output))
})
print(time_loop)
print(time_foreach)
print(time_snowfall)
print(time_mclapply)
# EXERCISE:
# Compare the scalability of all approaches. How does system.time vary as you change
# the values for n and x? Remember to check first how long does it take to run a single
# instance of inv_mat. Run time does not increase linearly with x.
# If you're feeling brave, also try changing the number of cores you're using.