Skip to content

Commit

Permalink
add assignment score
Browse files Browse the repository at this point in the history
  • Loading branch information
niklasmueboe committed Jul 7, 2024
1 parent 87acab7 commit ea4cfdf
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 29 deletions.
17 changes: 13 additions & 4 deletions docs/src/examples/ExampleAnalysis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,16 @@ adata = getlocalmaxima(AnnData, counts, lm, kernel; genes=names(signatures))
Once we have identified gene signatures from literature, previous experiments,
or by analysing local maxima we can assign a celltype to each pixel. This is usually the
most time-intensive processing step. [`assigncelltype`](@ref) returns 2 matrices;
most time-intensive processing step. [`assigncelltype`](@ref) returns 3 matrices;
a map of assigned celltypes of each pixel as
`CategoricalArrays.CategoricalMatrix` and the cosine similarity of the
assigned cell type for each pixel as matrix.
`CategoricalArrays.CategoricalMatrix`, the cosine similarity of the
assigned cell type for each pixel, and the assignment score as a confidence of the
cell type assignment.
=#

celltype_map, cosine_sim = assigncelltype(counts, signatures, kernel; celltypes=celltypes);
celltype_map, cosine_sim, assignment_conf = assigncelltype(
counts, signatures, kernel; celltypes=celltypes
);

#=
In the final step we can visualize the cell-type map.
Expand Down Expand Up @@ -151,6 +154,12 @@ celltype_img[total_rna .< 1.5] .= background_color;

simshow(imresize(celltype_img; ratio=1 / 45))

#=
The assignment score can be useful to evaluate the confidence of assignment.
=#

simshow(imresize(Gray.(assignment_conf); ratio=1 / 45))

#=
## Utils
Expand Down
25 changes: 19 additions & 6 deletions docs/src/examples/ExampleAnalysis.md

Large diffs are not rendered by default.

82 changes: 63 additions & 19 deletions src/KDE.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ using BlockArrays: Block, BlockArray, undef_blocks
using CategoricalArrays: CategoricalMatrix, recode, recode!
using DataFrames: AbstractDataFrame, nrow
using ImageFiltering: Algorithm, Fill, Kernel, imfilter, reflect
using LinearAlgebra: norm
using LinearAlgebra: dot, norm
using OffsetArrays: OffsetArray
using SparseArrays: AbstractSparseArray, SparseMatrixCSC, nnz, nonzeros, nzrange, rowvals
using Unzip: unzip

# KDE
"""
Expand Down Expand Up @@ -110,7 +111,32 @@ function chunk(counts, kernel, sx=500, sy=500)
return chunks, rowslices, colslices
end

function calculatecosinesim(counts, signatures, kernel, unpad; log=false)
function findcelltypescore(cosine, upperbound)
max = 0
max2 = 0
argmax = 1
argmax2 = 1
for (i, val) in pairs(cosine)
if val > max2
if val > max
max2 = max
max = val
argmax2 = argmax
argmax = i
else
max2 = val
argmax2 = i
end
end
end
score = (max - max2) / upperbound[argmax, argmax2]
return (max, score, argmax)
end

function calculatecosinesim(
counts, signatures, similaritycorrection, kernel, unpad; log=false
)
# signatures must be normed row-wise (celltype-wise)
n_celltypes = size(signatures, 1)

T = promote_type(eltype(signatures), eltype(kernel))
Expand Down Expand Up @@ -143,20 +169,24 @@ function calculatecosinesim(counts, signatures, kernel, unpad; log=false)

# fastpath if whole chunk was "empty"
if init
return zeros(UInt8, length.(unpad)), kde_norm
return zeros(UInt8, length.(unpad)), kde_norm, zeros(T, length.(unpad))
end

celltype_norm = map(norm, eachslice(signatures; dims=1))
cosine ./= reshape(celltype_norm, 1, 1, n_celltypes)
cosine, celltype = map((x -> dropdims(x; dims=3)), findmax(cosine; dims=3))
cosine, score, celltype = unzip(
map(x -> findcelltypescore(x, similaritycorrection), eachslice(cosine; dims=(1, 2)))
)

@. cosine /= sqrt(kde_norm)
@. cosine[iszero(kde_norm)] = 0
# set empty pixels to zero (otherwise there might be random results)
empty = iszero.(kde_norm)
cosine[empty] .= 0
score[empty] .= 0
celltype[empty] .= 0

celltypemap = map((x -> x[3]), celltype)
@. celltypemap[iszero(cosine)] = 0
@. kde_norm = sqrt(kde_norm)
cosine ./= kde_norm
score ./= kde_norm

return celltypemap, cosine
return celltype, cosine, score
end

function smallestuint(n::Integer)
Expand Down Expand Up @@ -199,36 +229,50 @@ function assigncelltype(counts, signatures, kernel; celltypes=nothing, log=false
T = eltype(kernel)
U = smallestuint(nrow(signatures))

signatures = signatures[!, exist]
signatures = Matrix{T}(signatures[!, exist])
signatures ./= map(norm, eachslice(signatures; dims=1))

signatures = Matrix{T}(signatures)
nsignatures = size(signatures, 1)
sigcorrection = zeros(T, nsignatures, nsignatures)
for (i, j) in Tuple.(CartesianIndices(sigcorrection))
if i != j
s = signatures[i, :] .- signatures[j, :]
@. s[s < 0] = 0
sigcorrection[i, j] = sqrt(dot(s, s))
end
end

chunked_counts, rowslices, colslices = chunk([counts[g] for g in genes], kernel)
rows, cols = length.(rowslices), length.(colslices)

cosine = BlockArray(undef_blocks, Matrix{T}, rows, cols)
score = BlockArray(undef_blocks, Matrix{T}, rows, cols)
celltypemap = BlockArray(undef_blocks, Matrix{U}, rows, cols)

@threads for (i, r) in collect(enumerate(rowslices))
@threads for (j, c) in collect(enumerate(colslices))
idx = Block(i, j)
celltypemap_chunk, cosine_chunk = calculatecosinesim(
pop!(chunked_counts, idx), signatures, kernel, (r, c); log=log
celltypemap[idx], cosine[idx], score[idx] = calculatecosinesim(
pop!(chunked_counts, idx),
signatures,
sigcorrection,
kernel,
(r, c);
log=log,
)
@views celltypemap[idx] = celltypemap_chunk
@views cosine[idx] = cosine_chunk
end
end

cosine = collect(cosine)
cosine = Matrix(cosine)
score = Matrix(score)
celltypemap = CategoricalMatrix{Union{Missing,U},U}(collect(celltypemap))
recode!(celltypemap, 0 => missing)

if !isnothing(celltypes)
celltypemap = recode(celltypemap, Dict(enumerate(celltypes))...)
end

return celltypemap, cosine
return celltypemap, cosine, score
end

end # module KDE

0 comments on commit ea4cfdf

Please sign in to comment.