Skip to content

Commit

Permalink
Merge pull request #9 from niklasmueboe/dev
Browse files Browse the repository at this point in the history
Prepare release
  • Loading branch information
niklasmueboe authored Aug 1, 2024
2 parents ece189f + 8e445c7 commit 253a472
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 46 deletions.
18 changes: 12 additions & 6 deletions src/GridCount.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module GridCount
export GridCounts, crop!, mask!, gridsize, totalrna

using Base.Threads: @threads
using DataFrames: DataFrame, groupby, transform!
using DataFrames: DataFrame, groupby, transform, transform!
using SparseArrays: SparseMatrixCSC, dropzeros!, findnz, nonzeros, sparse

"""
Expand Down Expand Up @@ -48,12 +48,12 @@ Construct from a DataFrame with columns 'x' (`Integer`), 'y' (`Integer`), 'count
'geneID' (`Pool`).
"""
function GridCounts(df::DataFrame)
transform!(df, [:x, :y] .=> (x -> x .- (minimum(x) - one(eltype(x)))) .=> [:x, :y])
df = transform(df, [:x, :y] .=> (x -> x .- (minimum(x) - one(eltype(x)))) .=> [:x, :y])

rows = maximum(df.x)
cols = maximum(df.y)

n = length(df.geneID.pool)
n = length(unique(df.geneID))
genes = Vector{eltype(df.geneID)}(undef, n)
counts = Vector{SparseMatrixCSC{eltype(df.count)}}(undef, n)
@threads for (i, (key, subdf)) in collect(enumerate(pairs(groupby(df, :geneID))))
Expand All @@ -65,15 +65,21 @@ function GridCounts(df::DataFrame)
end

"""
GridCounts(df::DataFrame, binsize::Integer)
GridCounts(df::DataFrame, binsize::Real)
Construct from a DataFrame with columns 'x' (`Real`), 'y' (`Real`), 'count', and
'geneID' (`Pool`) binning the data by `binsize`.
"""
function GridCounts(df::DataFrame, binsize::Integer)
transform!(df, [:x, :y] .=> (x -> x .- minimum(x)) .=> [:x, :y])
function GridCounts(df::DataFrame, binsize::Real)
df = transform(df, [:x, :y] .=> (x -> x .- minimum(x)) .=> [:x, :y])
transform!(df, @. [:x, :y] => (x -> div(x, binsize) + 1) => [:x, :y])

for column in [:x, :y]
if !(eltype(df[!, column]) isa Integer)
df[!, column] = convert.(UInt32, df[!, column])
end
end

return GridCounts(df)
end

Expand Down
87 changes: 47 additions & 40 deletions src/KDE.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module KDE

export gaussiankernel, kde, assigncelltype

using ..GridCount: GridCounts
using ..GridCount: GridCounts, gridsize

using Base.Broadcast: @__dot__
using Base.Threads: @threads
Expand Down Expand Up @@ -76,39 +76,44 @@ end

# Celltype assignment
function chunk_slices(i, step, n, pad)
bound1 = (i - 1) * step + 1
bound2 = i * step
start = (i - 1) * step + 1
stop = i * step

slice = max(1, bound1 + pad[1]):min(n, bound2 + pad[2])
reslice = range(1 + max(0, bound1 - slice[1]); length=min(step, n - bound1 + 1))
return slice, reslice
slice = max(1, start + pad[1]):min(n, stop + pad[2])
unpad = range(1 + max(0, start - slice[1]); length=min(step, n - start + 1))
return slice, unpad
end

function chunk(counts, kernel, sx=500, sy=500)
m, n = size(first(counts))
pad_x = extrema(axes(kernel, 1))
pad_y = extrema(axes(kernel, 2))
function padinfo(counts, kernel)
padrow = extrema(axes(kernel, 1))
padcol = extrema(axes(kernel, 2))
return size(first(counts)), (padrow, padcol)
end

chunks = Dict{Block{2,Int},Any}()
function chunkinfo(counts, kernel, chunksize)
function chunklengths(n, s, pad)
lengths = Int[]
for i in 1:cld(n, s)
_, unpad = chunk_slices(i, s, n, pad)
push!(lengths, length(unpad))
end
return lengths
end
srow, scol = chunksize
(m, n), (padrow, padcol) = padinfo(counts, kernel)

colslices = UnitRange{Int}[]
rowslices = UnitRange{Int}[]
return chunklengths(m, srow, padrow), chunklengths(n, scol, padcol)
end

for j in 1:cld(n, sx)
slice_x, reslice_x = chunk_slices(j, sx, n, pad_x)
col_chunk = map(x -> x[:, slice_x], counts)
push!(colslices, reslice_x)
function getchunk(counts, kernel, i, j; chunksize=(500, 500))
(m, n), (padrow, padcol) = padinfo(counts, kernel)
srow, scol = chunksize

for i in 1:cld(m, sy)
slice_y, reslice_y = chunk_slices(i, sy, m, pad_y)
chunks[Block(i, j)] = map(x -> x[slice_y, :], col_chunk)
if j == 1
push!(rowslices, reslice_y)
end
end
end
slicerow, unpadrow = chunk_slices(i, srow, m, padrow)
slicecol, unpadcol = chunk_slices(j, scol, n, padcol)
chunk = map(x -> x[slicerow, slicecol], counts)

return chunks, rowslices, colslices
return chunk, (unpadrow, unpadcol)
end

function findcelltypescore(cosine, upperbound)
Expand Down Expand Up @@ -214,7 +219,9 @@ The `eltype(kernel)` will be used for calculations and `signatures` will be cast
- `log::Bool`: whether to log-transform the KDE. Useful if `signatures` are calculated
from log-transformed gene expression.
"""
function assigncelltype(counts, signatures, kernel; celltypes=nothing, log=false)
function assigncelltype(
counts, signatures, kernel; celltypes=nothing, log=false, chunksize=(500, 500)
)
if !isnothing(celltypes) && length(celltypes) != nrow(signatures)
error("Length of 'celltypes' must match number of rows in 'signatures'")
end
Expand Down Expand Up @@ -243,23 +250,23 @@ function assigncelltype(counts, signatures, kernel; celltypes=nothing, log=false
end
end

chunked_counts, rowslices, colslices = chunk([counts[g] for g in genes], kernel)
rows, cols = length.(rowslices), length.(colslices)
m, n = gridsize(counts)

counts = [counts[g] for g in genes]

chunklengths = chunkinfo(counts, kernel, chunksize)
cosine = BlockArray(undef_blocks, Matrix{T}, chunklengths...)
score = BlockArray(undef_blocks, Matrix{T}, chunklengths...)
celltypemap = BlockArray(undef_blocks, Matrix{U}, chunklengths...)

cosine = BlockArray(undef_blocks, Matrix{T}, rows, cols)
score = BlockArray(undef_blocks, Matrix{T}, rows, cols)
celltypemap = BlockArray(undef_blocks, Matrix{U}, rows, cols)
srow, scol = chunksize

@threads for (i, r) in collect(enumerate(rowslices))
@threads for (j, c) in collect(enumerate(colslices))
@threads for i in 1:cld(m, srow)
@threads for j in 1:cld(n, scol)
idx = Block(i, j)
chunk, unpad = getchunk(counts, kernel, i, j; chunksize=chunksize)
celltypemap[idx], cosine[idx], score[idx] = calculatecosinesim(
pop!(chunked_counts, idx),
signatures,
sigcorrection,
kernel,
(r, c);
log=log,
chunk, signatures, sigcorrection, kernel, unpad; log=log
)
end
end
Expand Down

0 comments on commit 253a472

Please sign in to comment.