-
Notifications
You must be signed in to change notification settings - Fork 1k
/
Copy pathmergelist.Rraw
144 lines (133 loc) · 7.44 KB
/
mergelist.Rraw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
require(methods)
if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
if ((tt<-compiler::enableJIT(-1))>0)
cat("This is dev mode and JIT is enabled (level ", tt, ") so there will be a brief pause around the first test.\n", sep="")
} else {
require(data.table)
test = data.table:::test
perhaps.data.table = data.table:::perhaps.data.table
hasindex = data.table:::hasindex
fdistinct = data.table:::fdistinct
forderv = data.table:::forderv
}
addresses = function(x) vapply(x, address, "")
# internal helpers
test(1.01, perhaps.data.table(list()))
test(1.02, perhaps.data.table(list(a=1:2)))
test(1.03, perhaps.data.table(list(a=1:2, b=1:2)))
test(1.04, perhaps.data.table(list(1:2, 1:2)), FALSE)
test(2.01, fdistinct(list(x=c(1L,1:2), b=1:2), on="x", mult="last"), error="must be data.table")
test(2.02, fdistinct(data.table(x=c(1L,1:2)), on="z", mult="last"), error="must be character column names of")
test(2.03, fdistinct(data.table(x=c(1L,1:2)), on="x", mult="last", cols=character()), error="must be non-zero length, non-NA, integer or character columns of")
test(2.04, fdistinct(data.table(x=c(1L,1:2, y=1:3)), on="x", mult="last", copy=NA), error="must be TRUE or FALSE")
d = data.table(x=1:2, y=1:2)
test(2.05, ans<-fdistinct(d, on="x", mult="last"), d)
test(2.06, intersect(addresses(ans), addresses(d)), character())
test(2.07, ans<-fdistinct(d, on="x", mult="last", copy=FALSE), d)
test(2.08, addresses(ans), addresses(d))
d = data.table(x=c(2:1,2L), y=1:3)
test(2.09, fdistinct(d, on="x", mult="first"), data.table(x=2:1, y=1:2))
test(2.10, fdistinct(d, on="x", mult="last"), data.table(x=1:2, y=2:3))
setattr(attr(setattr(d, "index", integer()), "index", TRUE), "__x", forderv(d, "x", retGrp=TRUE)) ## retGrp=T index #4386
test(2.11, fdistinct(d, on="x", mult="first"), data.table(x=2:1, y=1:2))
test(3.01, hasindex(d, "x"))
test(3.02, hasindex(d, "x", retGrp=TRUE))
setattr(attr(setattr(d, "index", integer()), "index", TRUE), "__x", forderv(d, "x")) ## retGrp=F index #4386
test(3.03, hasindex(d, "x"))
test(3.04, !hasindex(d, "x", retGrp=TRUE))
setattr(d, "index", NULL)
test(3.05, !hasindex(d, "x"))
test(3.06, !hasindex(d, "x", retGrp=TRUE))
setattr(d, "index", integer())
test(3.07, !hasindex(d, "x"))
test(3.08, !hasindex(d, "x", retGrp=TRUE))
rm(d)
# cbindlist
l = list(
d1 = data.table(x=1:3, v1=1L),
d2 = data.table(y=3:1, v2=2L),
d3 = data.table(z=2:4, v3=3L)
)
ans = cbindlist(l)
expected = data.table(l$d1, l$d2, l$d3)
test(11.01, ans, expected)
test(11.02, intersect(addresses(ans), addresses(expected)), character())
ans = cbindlist(l, copy=FALSE)
expected = setDT(c(l$d1, l$d2, l$d3))
test(11.03, ans, expected)
test(11.04, length(intersect(addresses(ans), addresses(expected))), ncol(expected))
test(11.05, cbindlist(list(data.table(a=1L), data.table(), data.table(d=2L), data.table(f=3L))), data.table(a=1L,d=2L,f=3L))
rm(expected)
## codecov
test(12.01, cbindlist(data.frame(a=1L), data.frame(b=1L)), error="must be a list")
test(12.02, cbindlist(TRUE, FALSE), error="must be a list")
test(12.03, cbindlist(list(), NA), error="must be TRUE or FALSE")
test(12.04, cbindlist(list(data.table(a=1L), 1L)), error="is not of data.table type")
test(12.05, options = c(datatable.verbose=TRUE), cbindlist(list(data.table(a=1:2), data.table(b=1:2))), data.table(a=1:2, b=1:2), output="cbindlist.*took")
test(12.06, cbindlist(list(data.table(), data.table(a=1:2), data.table(b=1:2))), data.table(a=1:2, b=1:2))
test(12.07, cbindlist(list(data.table(), data.table(a=1:2), list(b=1:2))), data.table(a=1:2, b=1:2))
test(12.08, cbindlist(list(data.table(a=integer()), list(b=integer()))), data.table(a=integer(), b=integer()))
## duplicated names
test(12.09, cbindlist(list(data.table(a=1L, b=2L), data.table(b=3L, d=4L))), data.table(a=1L, b=2L, b=3L, d=4L))
ans = cbindlist(list(setindexv(data.table(a=2:1, b=1:2),"a"), data.table(a=1:2, b=2:1, key="a"), data.table(a=2:1, b=1:2)))
test(12.10, ans, data.table(a=2:1, b=1:2, a=1:2, b=2:1, a=2:1, b=1:2))
test(12.11, indices(ans), NULL)
## recycling, first ensure cbind recycling that we want to match to
test(12.12, cbind(data.table(x=integer()), data.table(a=1:2)), data.table(x=c(NA_integer_,NA), a=1:2))
test(12.13, cbind(data.table(x=1L), data.table(a=1:2)), data.table(x=c(1L,1L), a=1:2))
test(12.14, cbindlist(list(data.table(a=integer()), data.table(b=1:2))), error="recycling.*not yet implemented")
test(12.15, cbindlist(list(data.table(a=1L), data.table(b=1:2))), error="recycling.*not yet implemented")
test(12.16, cbindlist(list(data.table(a=integer()), data.table(b=1:2)), copy=FALSE), error="has to have equal nrow")
test(12.17, cbindlist(list(data.table(a=1L), data.table(b=1:2)), copy=FALSE), error="has to have equal nrow")
## retain indices
d = data.table(x=1:2, y=2:1, z=2:1, v1=1:2) ## ensure setDT will retain key and indices when it is called on the list, bc Ccbindlist returns list
setkeyv(d, "x"); setindexv(d, list("y", "z"))
a = attributes(d)
attributes(d) = a[!names(a) %in% c("class",".internal.selfref","row.names")]
test(13.01, class(d), "list")
setDT(d)
test(13.02, key(d), "x")
test(13.03, hasindex(d, "y") && hasindex(d, "z"))
l = list(
data.table(id1=1:5, id2=5:1, id3=1:5, v1=1:5),
data.table(id4=5:1, id5=1:5, v2=1:5),
data.table(id6=5:1, id7=1:5, v3=1:5),
data.table(id8=5:1, id9=5:1, v4=1:5)
)
setkeyv(l[[1L]], "id1"); setindexv(l[[1L]], list("id1", "id2", "id3", c("id1","id2","id3"))); setindexv(l[[3L]], list("id6", "id7")); setindexv(l[[4L]], "id9")
ii = lapply(l, indices)
ans = cbindlist(l)
test(13.04, key(ans), "id1")
test(13.05, indices(ans), c("id1","id2","id3","id1__id2__id3","id6","id7","id9"))
test(13.06, ii, lapply(l, indices)) ## this tests that original indices have not been touched, shallow_duplicate in mergeIndexAttrib
## fdistinct, another round
dt = data.table(x =
c(74L, 103L, 158L, 250L, 56L, 248L, 260L, 182L, 174L, 17L, 57L,
49L, 189L, 106L, 212L, 137L, 198L, 273L, 105L, 214L, 258L, 59L,
180L, 35L, 74L, 107L, 4L, 106L, 240L, 94L, 133L, 165L, 136L,
52L, 228L, 184L, 219L, 30L, 200L, 114L, 226L, 178L, 216L, 153L,
146L, 218L, 7L, 132L, 202L, 191L, 132L, 237L, 121L, 68L, 20L,
28L, 87L, 143L, 183L, 112L, 252L, 81L, 127L, 92L, 179L, 71L,
132L, 211L, 24L, 241L, 94L, 231L, 96L, 92L, 131L, 246L, 238L,
108L, 214L, 265L, 120L, 196L, 110L, 90L, 209L, 56L, 196L, 34L,
68L, 40L, 66L, 17L, 177L, 241L, 215L, 220L, 126L, 113L, 223L,
167L, 181L, 98L, 75L, 273L, 175L, 59L, 36L, 132L, 255L, 165L,
269L, 202L, 99L, 119L, 41L, 4L, 197L, 29L, 123L, 177L, 273L,
137L, 134L, 48L, 208L, 125L, 141L, 58L, 63L, 164L, 159L, 22L,
10L, 177L, 256L, 165L, 155L, 145L, 271L, 140L, 188L, 166L, 66L,
71L, 201L, 125L, 49L, 206L, 29L, 238L, 170L, 154L, 91L, 125L,
138L, 50L, 146L, 21L, 77L, 59L, 79L, 247L, 123L, 215L, 243L,
114L, 18L, 93L, 200L, 93L, 174L, 232L, 236L, 108L, 105L, 247L,
178L, 204L, 167L, 249L, 81L, 53L, 244L, 139L, 242L, 53L, 209L,
200L, 260L, 151L, 196L, 107L, 28L, 256L, 78L, 163L, 31L, 232L,
88L, 216L, 74L, 61L, 143L, 74L, 50L, 143L, 155L, 36L, 71L, 198L,
265L, 28L, 210L, 261L, 226L, 85L, 179L, 263L, 263L, 94L, 73L,
46L, 89L, 141L, 255L, 141L, 71L, 13L, 115L, 235L, 96L, 37L, 103L,
174L, 108L, 190L, 190L, 153L, 119L, 125L, 85L, 160L, 251L, 40L,
115L, 59L, 118L, 37L, 127L, 260L, 210L, 257L, 130L, 166L, 134L,
30L, 69L, 138L, 103L, 258L, 145L, 88L, 77L, 217L, 194L, 46L,
18L, 208L, 171L, 47L, 18L, 30L, 105L, 47L, 83L)
)
ans = unique(dt, by="x")
test(301.01, data.table(x=unique(dt$x)), ans) ## OK
test(301.02, fdistinct(dt, on="x"), ans) ## force sort=TRUE for the moment