Skip to content

Commit

Permalink
Merge pull request #2 from niklasmueboe/dev
Browse files Browse the repository at this point in the history
GridCounts
  • Loading branch information
niklasmueboe authored Jun 17, 2024
2 parents 0ca8a7b + d39e5ec commit e3a0e71
Show file tree
Hide file tree
Showing 17 changed files with 299 additions and 190 deletions.
2 changes: 0 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ BlockArrays = "8e7c35d0-a365-5155-bbbb-fb81a777f24e"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
ImageFiltering = "6a3955dd-da59-5b1f-98d4-e7296123deb5"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
Expand All @@ -33,7 +32,6 @@ BlockArrays = "0.16,1"
CSV = "0.10"
CategoricalArrays = "0.9,0.10"
DataFrames = "1"
DimensionalData = "0.25"
ImageFiltering = "0.7"
LinearAlgebra = "1"
Muon = "0.2"
Expand Down
3 changes: 2 additions & 1 deletion docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ DocumenterInterLinks = "d12716ef-a0f6-4df4-a9f1-a5a34e75c656"
StereoSSAM = "4c4340ea-7f9e-46b4-8512-61f1f6c41a11"

[compat]
Documenter = "1.3"
Documenter = "1.3"
DocumenterInterLinks = "1"
3 changes: 2 additions & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ using StereoSSAM

links = InterLinks(
# "CategoricalArrays" => "https://categoricalarrays.juliadata.org/stable/",
# "DimensionalData" => "https://rafaqz.github.io/DimensionalData.jl/dev/",
# "DataFrames" => "https://dataframes.juliadata.org/stable/",
# "OffsetArrays" => "https://juliaarrays.github.io/OffsetArrays.jl/stable/",
"Muon" => "https://scverse.org/Muon.jl/dev/",
"SparseArrays" => (
"https://docs.julialang.org/en/v1/stdlib/SparseArrays/",
Expand Down
1 change: 1 addition & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Pages = ["api.md"]

```@autodocs
Modules = [
StereoSSAM.GridCount,
StereoSSAM.IO,
StereoSSAM.Utils,
StereoSSAM.KDE,
Expand Down
11 changes: 5 additions & 6 deletions docs/src/examples/ExampleAnalysis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,28 @@ color_file = joinpath(data_path, "celltype_colors.json");
We read the data from file using [`readstereoseq`](@ref). The input will
usually be a GEM file of a StereoSeq experiment. The output is a
`DimensionalData.DimArray` of gene counts stored as
[`GridCounts`](@ref) of gene counts stored as
[`SparseArrays.SparseMatrixCSC`](@extref). The input data stems from the
original [Stereo-seq publication](https://doi.org/10.1016/j.cell.2022.04.003) and can
be downloaded [here](https://db.cngb.org/stomics/mosta/download/).
=#

counts = readstereoseq(stereoseq_file);
counts = readstereoseq(stereoseq_file)

#=
We also load pre-defined cell-type signatures
=#

using CSV
using DataFrames
using DimensionalData

## read pre-determined cell-type signatures
signatures = CSV.read(signature_file, DataFrame);
celltypes = signatures.Celltype;
select!(signatures, Not(:Celltype));

## remove genes that are not detected in Stereo-seq experiment
signatures = signatures[:, names(signatures) .∈ [dims(counts, 1)]];
signatures = signatures[:, names(signatures) .∈ [keys(counts)]];

print(signatures[1:5, 1:8])

Expand Down Expand Up @@ -104,7 +103,7 @@ as `Muon.AnnData` object.

using Muon

ad = getlocalmaxima(AnnData, counts, lm, kernel; genes=names(signatures))
adata = getlocalmaxima(AnnData, counts, lm, kernel; genes=names(signatures))

#=
### Cell-type map
Expand Down Expand Up @@ -188,4 +187,4 @@ highly variable genes that can be used for analysis. We again here demonstrate i
`Muon.AnnData`.
=#

ad = StereoSSAM.IO.readstereoseqbinned(AnnData, stereoseq_file, 50)
adata = readstereoseqbinned(AnnData, stereoseq_file, 50)
100 changes: 52 additions & 48 deletions docs/src/examples/ExampleAnalysis.md

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion docs/src/examples/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
ImageIO = "82e4d734-157c-48bb-816b-45c225c6df19"
ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1"
ImageShow = "4e3cecfd-b093-5904-9786-8bbb286a6a31"
Expand Down
8 changes: 3 additions & 5 deletions ext/AnnDataExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ using Muon: AnnData

using StereoSSAM

function StereoSSAM.getlocalmaxima(
T::Type{AnnData}, counts, localmax, kernel; genes=nothing
)
function StereoSSAM.getlocalmaxima(::Type{AnnData}, counts, localmax, kernel; genes=nothing)
X, genes, obs = getlocalmaxima(counts, localmax, kernel; genes=genes)

return AnnData(;
Expand All @@ -17,8 +15,8 @@ function StereoSSAM.getlocalmaxima(
)
end

function StereoSSAM.readstereoseqbinned(T::Type{AnnData}, file, s::Integer)
X, genes, obs = readstereoseqbinned(file, s)
function StereoSSAM.readstereoseqbinned(::Type{AnnData}, file, binsize::Integer)
X, genes, obs = readstereoseqbinned(file, binsize)

return AnnData(;
X=permutedims(X),
Expand Down
6 changes: 3 additions & 3 deletions ext/AutomaticSingleCellToolboxExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ using SparseArrays: nzrange

using StereoSSAM

function StereoSSAM.getlocalmaxima(T::Type{WsObj}, counts, localmax, kernel; genes=nothing)
function StereoSSAM.getlocalmaxima(::Type{WsObj}, counts, localmax, kernel; genes=nothing)
X, genes, coordinates = getlocalmaxima(counts, localmax, kernel; genes=genes)

coordinates[!, "cell_counts"] = vec(sum(X; dims=1))
Expand All @@ -17,8 +17,8 @@ function StereoSSAM.getlocalmaxima(T::Type{WsObj}, counts, localmax, kernel; gen
return WsObj(Dict("raw_dat" => X), coordinates, genes, String[], Dict())
end

function StereoSSAM.readstereoseqbinned(T::Type{WsObj}, file, s::Integer)
X, genes, coordinates = readstereoseqbinned(file, s)
function StereoSSAM.readstereoseqbinned(::Type{WsObj}, file, binsize::Integer)
X, genes, coordinates = readstereoseqbinned(file, binsize)

coordinates[!, "cell_counts"] = vec(sum(X; dims=1))
coordinates[!, "cell_features"] = map(i -> length(nzrange(X, i)), 1:size(X, 2))
Expand Down
6 changes: 3 additions & 3 deletions ext/CellScopesExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ using CellScopes: RawCountObject
using StereoSSAM

function StereoSSAM.getlocalmaxima(
T::Type{RawCountObject}, counts, localmax, kernel; genes=nothing
::Type{RawCountObject}, counts, localmax, kernel; genes=nothing
)
mat, genes, coordinates = getlocalmaxima(counts, localmax, kernel; genes=genes)

Expand All @@ -15,8 +15,8 @@ function StereoSSAM.getlocalmaxima(
return RawCountObject(mat, coordinates.cell_name, genes.gene), coordinates
end

function StereoSSAM.readstereoseqbinned(T::Type{RawCountObject}, file, s::Integer)
counts, genes, coordinates = readstereoseqbinned(file, s)
function StereoSSAM.readstereoseqbinned(::Type{RawCountObject}, file, binsize::Integer)
counts, genes, coordinates = readstereoseqbinned(file, binsize)

rename!(coordinates, Dict(:id => "cell_name"))

Expand Down
6 changes: 3 additions & 3 deletions ext/SingleCellProjectionsExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ using SingleCellProjections: DataMatrix
using StereoSSAM

function StereoSSAM.getlocalmaxima(
T::Type{DataMatrix}, counts, localmax, kernel; genes=nothing
::Type{DataMatrix}, counts, localmax, kernel; genes=nothing
)
return DataMatrix(getlocalmaxima(counts, localmax, kernel; genes=genes))
end

function StereoSSAM.readstereoseqbinned(T::Type{DataMatrix}, file, s::Integer)
return DataMatrix(readstereoseqbinned(file, s))
function StereoSSAM.readstereoseqbinned(::Type{DataMatrix}, file, binsize::Integer)
return DataMatrix(readstereoseqbinned(file, binsize))
end

end # module SingleCellProjectionsExt
163 changes: 163 additions & 0 deletions src/GridCount.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
module GridCount

export GridCounts, crop!, mask!, gridsize, totalrna

using Base.Threads: @threads
using DataFrames: DataFrame, groupby, transform!
using SparseArrays: SparseMatrixCSC, dropzeros!, findnz, nonzeros, sparse

"""
AbstractGridCounts
A stack of genes each consisting of a 2D count array of the same size.
"""
abstract type AbstractGridCounts end

"""
GridCounts{G,T<:Number} <: AbstractGridCounts
A stack of genes`::G` each consisting of a 2D count`::T` array of the same size.
"""
mutable struct GridCounts{G,T<:Number} <: AbstractGridCounts
counts::Dict{G,SparseMatrixCSC{T}}
shape::Tuple{Int,Int}
@doc """
GridCounts(counts::Dict)
All values of the dict must be arrays of the same size.
"""
function GridCounts{G,T}(counts::Dict{G,SparseMatrixCSC{T}}) where {G,T<:Number}
shape = size(first(values(counts)))
for (_, c) in counts
if size(c) != shape
throw(DimensionMismatch("All `counts` must have the same size"))
end
end
return new{G,T}(counts, shape)
end
end

function GridCounts(counts::Dict{G,SparseMatrixCSC{T}}) where {G,T<:Number}
return GridCounts{G,T}(counts)
end

"""
GridCounts(df::DataFrame)
Construct from a DataFrame with columns 'x' (`Integer`), 'y' (`Integer`), 'count', and
'geneID' (`Pool`).
"""
function GridCounts(df::DataFrame)
transform!(df, [:x, :y] .=> (x -> x .- (minimum(x) - one(eltype(x)))) .=> [:x, :y])

rows = maximum(df.x)
cols = maximum(df.y)

n = length(df.geneID.pool)
genes = Vector{eltype(df.geneID)}(undef, n)
counts = Vector{SparseMatrixCSC{eltype(df.count)}}(undef, n)
@threads for (i, (key, subdf)) in collect(enumerate(pairs(groupby(df, :geneID))))
genes[i] = key.geneID
counts[i] = sparse(subdf.x, subdf.y, subdf.count, rows, cols)
end

return GridCounts(Dict(zip(genes, counts)))
end

"""
GridCounts(df::DataFrame, binsize::Integer)
Construct from a DataFrame with columns 'x' (`Real`), 'y' (`Real`), 'count', and
'geneID' (`Pool`) binning the data by `binsize`.
"""
function GridCounts(df::DataFrame, binsize::Integer)
transform!(df, [:x, :y] .=> (x -> x .- minimum(x)) .=> [:x, :y])
transform!(df, @. [:x, :y] => (x -> div(x, binsize) + 1) => [:x, :y])

return GridCounts(df)
end

Base.iterate(iter::GridCounts) = iterate(iter.counts)
Base.iterate(iter::GridCounts, state) = iterate(iter.counts, state)
Base.length(collection::GridCounts) = length(collection.counts)
Base.eltype(type::GridCounts) = eltype(type.counts)
Base.haskey(collection::GridCounts, key) = haskey(collection.counts, key)
Base.getindex(collection::GridCounts, key) = getindex(collection.counts, key)
Base.get(collection::GridCounts, key, default) = get(collection.counts, key, default)
Base.delete!(collection::GridCounts, key) = delete!(collection.counts, key)
Base.pop!(collection::GridCounts) = pop!(collection.counts)
Base.keys(iterator::GridCounts) = keys(iterator.counts)
Base.values(iterator::GridCounts) = values(iterator.counts)
Base.pairs(collection::GridCounts) = pairs(collection.counts)
Base.keytype(type::GridCounts) = keytype(type.counts)
Base.valtype(type::GridCounts) = valtype(type.counts)

function Base.setindex!(collection::GridCounts, value, key)
if size(value) == collection.shape
setindex!(collection.counts, value, key)
else
throw(DimensionMismatch("All `counts` must have the same size"))
end
end

function Base.show(io::IO, x::GridCounts)
type = "GridCounts{$(keytype(x)),$(eltype(valtype(x)))}"
return print(io, type, " ", x.shape, " with ", length(x), " genes")
end
# Base.show(io::IO, m::MIME"text/plain", x::GridCounts) = print(io, x)

"""
gridsize(counts)
The size of the grid of each layer.
"""
gridsize(counts) = counts.shape

"""
crop!(counts, slice)
Crop each gene layer in counts by indexing with `slice`.
"""
function crop!(counts, slice)
for (g, c) in counts
counts.counts[g] = c[slice...]
end
counts.shape = size(first(values(counts)))
return nothing
end

"""
mask!(counts, mask::AbstractMatrix{Bool})
Remove all counts in each gene layer for which `mask` is `false`.
"""
function mask!(counts::GridCounts{<:Any,T}, mask::AbstractMatrix{Bool}) where {T<:Any}
if gridsize(counts) != size(mask)
throw(DimensionMismatch("Size of `mask` must match gridsize of `counts`"))
end

inv_mask = .!mask
z = zero(T)
@threads for c in collect(values(counts))
x, y, _ = findnz(c)
nonzeros(c)[inv_mask[[CartesianIndex(i) for i in zip(x, y)]]] .= z
dropzeros!(c)
end
end

"""
totalrna(counts)
Caclulate the totalrna as sum of all genes for each pixel.
"""
function totalrna(counts)
n = length(counts)
x, y, v = Vector{Vector}(undef, n), Vector{Vector}(undef, n), Vector{Vector}(undef, n)

@threads for (i, c) in collect(enumerate(values(counts)))
x[i], y[i], v[i] = findnz(c)
end
return sparse((reduce(vcat, i) for i in (x, y, v))..., gridsize(counts)...)
end

end # module GridCount
Loading

0 comments on commit e3a0e71

Please sign in to comment.