-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathknnGen.R
executable file
·175 lines (147 loc) · 5.17 KB
/
knnGen.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#Run This to accesss hidden functions
fn <- unclass(lsf.str(envir = asNamespace("ArchR"), all = TRUE))
for (i in seq_along(fn)) {
tryCatch({
eval(parse(text = paste0(fn[i], "<-ArchR:::", fn[i])))
}, error = function(x) {
})
}
knnGen2 = function(
ArchRProj = NULL,
reducedDims = "IterativeLSI",
corCutOff = 0.75,
dimsToUse = 1:30,
knnIteration = 500,
k = 100, # automatically set if group is specified
min_knn = 2,
overlapCutoff = 0,
seed = 1,
cellsToUse = NULL
)
{
set.seed (seed)
#Get Reduced Dims
rD <- getReducedDims(ArchRProj, reducedDims = reducedDims, corCutOff = corCutOff, dimsToUse = dimsToUse)
if(!is.null(cellsToUse)){
rD <- rD[cellsToUse, ,drop=FALSE]
}
#Subsample
idx <- sample(seq_len(nrow(rD)), knnIteration, replace = !nrow(rD) >= knnIteration)
#KNN Matrix
knnObj <- .computeKNN(data = rD, query = rD[idx,], k = k)
#Determin Overlap
keepKnn <- determineOverlapCpp(knnObj, floor(overlapCutoff * k))
#Keep Above Cutoff
knnObj <- knnObj[keepKnn==0,, drop=F]
# if number of k doesnt meet minimum
if (sum(keepKnn == 0) < min_knn)
{
slice = function(x,n) {
N = length(x);
lapply(seq(1,N,n),function(i) x[i:min(i+n-1,N)])
}
message ('KNN search failed. Group cells by k')
knnObj = sapply (slice (rownames(rD), k), function(x) match (x,rownames(rD)))
knnObj = knnObj[sapply(knnObj, length) == k]
knnObj = do.call (rbind, knnObj)
}
#Convert To Names List
knnObj <- lapply(seq_len(nrow(knnObj)), function(x){
rownames(rD)[knnObj[x, ]]
}) %>% SimpleList
return (knnObj)
}
### KNN generator function ###
# k # number of neighbors per knnIteration NOTE: this get overwritten if group is specified
# knnIteration # number of cells sampled from which k neighbors are searched NOTE: ignored if gourp is specified
# min.cells_in_group # ignored if group is NULL
# min_knn_cluster # ignored if group is NULL
knnGen = function (
ArchRProj = NULL,
reducedDims = "IterativeLSI",
corCutOff = 0.75,
dimsToUse = 1:30,
knnIteration = 500,
k = 100, # automatically set if group is specified
overlapCutoff = 0.8,
seed = 1,
cellsToUse = NULL,
group = NULL,
min.cells_in_group = 50,
min_knn_cluster = 2,
max_attempts = 1000
)
{
require (progress)
set.seed (seed)
slice = function(x,n) {
N = length(x);
lapply(seq(1,N,n),function(i) x[i:min(i+n-1,N)])
}
#Get Reduced Dims
rD = getReducedDims (ArchRProj, reducedDims = reducedDims, corCutOff = corCutOff, dimsToUse = dimsToUse)
if(!is.null(cellsToUse))
{
rD = rD[cellsToUse, ,drop=FALSE]
rD = as.data.frame (rD)
ArchRProj = ArchRProj[cellsToUse,]
}
if (!is.null (group))
{
meta_group = getCellColData (ArchRProj)[,group]
meta_group = table (meta_group[!meta_group %in% names(table (meta_group)[table(meta_group) <= min.cells_in_group])])
#k = floor (min (meta_group) / min_knn_cluster)
#message (paste('k set to', k, 'to allow at least',min_knn_cluster,'knn group in each cluster'))
knnObj_l = list()
for (i in names(meta_group))
{
keepKnn = NULL
l = 1
message (paste('find at least',min_knn_cluster,'knn in cluster',i))
while (sum(keepKnn == 0) < min_knn_cluster)
{
#Subsample
idx <- unique (sample (seq_len(meta_group[i]), knnIteration, replace = !meta_group[i] >= knnIteration))
#KNN Matrix
rDs = rD[rownames(rD) %in% ArchRProj$cellNames[as.character(ArchRProj@cellColData[,group]) == i],]
knnObj <- .computeKNN (data = rDs, query = rDs[idx,], k = k)
#Determin Overlap
keepKnn <- determineOverlapCpp (knnObj, floor(overlapCutoff * k))
#Keep Above Cutoff
knnObj_group = knnObj[keepKnn==0,, drop=F] # remove knn with too many overlapping cells
l = l + 1
if (max_attempts == l)
{
message ('KNN search failed. Partition the cell group')
knnObj_group = lapply (slice (rownames(rDs), k), function(x) match (x,rownames(rDs)))
knnObj_group = knnObj_group[sapply(knnObj_group, length) == k]
knnObj_group = do.call (rbind, knnObj_group)
break
}
}
knnObj_l[[i]] = apply (knnObj_group, 2, function(x) rownames(rDs)[x])
if (!is.matrix(knnObj_l[[i]])) knnObj_l[[i]] = t(as.matrix (knnObj_l[[i]]) )
rownames (knnObj_l[[i]]) = paste0(i, 'KNN',1:nrow(knnObj_l[[i]]))
}
knnObj = Reduce (rbind, knnObj_l)
knn_names = rownames (knnObj)
knnObj = lapply (seq_len(nrow(knnObj)), function(x) knnObj[x,]) %>% SimpleList
names (knnObj) = knn_names
} else {
message ('Run cluster-agnostic knn')
#Subsample
idx <- sample (seq_len(nrow(rD)), knnIteration, replace = !nrow(rD) >= knnIteration)
#KNN Matrix
knnObj <- .computeKNN (data = rD, query = rD[idx,], k = k)
#Determin Overlap
keepKnn <- determineOverlapCpp (knnObj, floor(overlapCutoff * k))
#Keep Above Cutoff
knnObj <- knnObj[keepKnn==0,] # remove knn with too many overlapping cells
#Convert To Names List
knnObj <- lapply(seq_len(nrow(knnObj)), function(x){
rownames(rD)[knnObj[x, ]]
}) %>% SimpleList
names (knnObj) = paste0 ('KNN_',seq_along (knnObj))
}
return (knnObj)
}