diff --git a/README.md b/README.md index a9af61e..2b71f75 100644 --- a/README.md +++ b/README.md @@ -8,15 +8,15 @@ This package provides a quick way to assign secondary structure using the [DSSP] This is not a complete implementation of DSSP, as it only assigns '-' for loops, 'H' for alpha helices, and 'E' for beta strands. In spite of that, it matches the original DSSP to a large extent, with the added advantage of being more than 10x faster. For the full DSSP algorithm, check out [BioStructures.jl](https://github.com/BioJulia/BioStructures.jl) or [ProteinSecondaryStructures.jl](https://github.com/m3g/ProteinSecondaryStructures.jl), which both use the [DSSP_jll.jl](https://docs.juliahub.com/General/DSSP_jll/stable/) package that was auto-generated using [BinaryBuilder.jl](https://github.com/JuliaPackaging/BinaryBuilder.jl). ```julia -julia> ss_nums = dssp("test/data/1ASS.pdb") # 1 chain, returns numeric codes -1-element Vector{Vector{Int64}}: - [1, 1, 1, 3, 3, 3, 1, 1, 1, 1 … 3, 3, 3, 3, 3, 3, 3, 1, 1, 1] +julia> dssp("test/data/1ASS.pdb") # 1 chain, returns numeric codes +1-element Vector{Vector{SSClass}}: + [Loop, Loop, Loop, Strand, Strand, Strand … Strand, Strand, Strand, Loop, Loop, Loop] -julia> join.(sscodes.(ss_nums)) # 1 chain, converted to chars with sscodes, joined into string +julia> string.(dssp("test/data/1ASS.pdb")) # 1 chain, Vector{SSClass} converted to string 1-element Vector{String}: "---EEE-----------EEE-EEEEEE---E" ⋯ 90 bytes ⋯ "--------EEE-EEEEEEE--EEEEEEE---" -julia> join.(sscodes.(dssp("test/data/3GOU.pdb"))) # 4 chains +julia> string.(dssp("test/data/3GOU.pdb")) # 4 chains 4-element Vector{String}: "---HHHHHHHHHHHHHH---HHHHHHHHHHH" ⋯ 79 bytes ⋯ "HH------HHHHHHHHHHHHHHHHHH-----" "---HHHHHHHHHHHHHH---HHHHHHHHHHH" ⋯ 84 bytes ⋯ "---HHHHHHHHHHHHHHHHHH---------H" diff --git a/src/AssigningSecondaryStructure.jl b/src/AssigningSecondaryStructure.jl index d8e2f37..bba4901 100644 --- a/src/AssigningSecondaryStructure.jl +++ b/src/AssigningSecondaryStructure.jl @@ -1,8 +1,8 @@ module AssigningSecondaryStructure +include("ssclass.jl") include("utils.jl") include("dssp.jl") include("pdb.jl") -include("ss.jl") end diff --git a/src/dssp.jl b/src/dssp.jl index 33166a6..6b6e1b7 100644 --- a/src/dssp.jl +++ b/src/dssp.jl @@ -7,14 +7,14 @@ const Q1Q2_F = 0.084 * 332 const DEFAULT_CUTOFF = -0.5 const DEFAULT_MARGIN = 1.0 -function _unfold(a::Array, window::Int, axis::Int) +function _unfold(a::AbstractArray, window::Int, axis::Int) axis = axis < 0 ? ndims(a) + axis + 1 : axis idx = (0:window-1) .+ (1:size(a, axis) - window + 1)' unfolded = selectdim(a, axis, idx) return _moveaxis(unfolded, axis, ndims(unfolded)) end -function get_hydrogen_positions(coord::Array{T, 3}) where T <: Real +function get_hydrogen_positions(coord::AbstractArray{T, 3}) where T <: Real vec_cn = coord[2:end, 1, :] .- coord[1:end-1, 3, :] vec_cn ./= mapslices(norm, vec_cn, dims=2) vec_can = coord[2:end, 1, :] .- coord[2:end, 2, :] @@ -25,7 +25,7 @@ function get_hydrogen_positions(coord::Array{T, 3}) where T <: Real end function get_hbond_map( - coord::Array{T, 3}; + coord::AbstractArray{T, 3}; cutoff::Float64 = DEFAULT_CUTOFF, margin::Float64 = DEFAULT_MARGIN, return_e::Bool = false, @@ -75,7 +75,7 @@ function get_hbond_map( end """ - dssp(coords_chains::Vararg{Array{T, 3}, N}) + dssp(coords_chains::Vararg{AbstractArray{T, 3}, N}) Takes a variable number of chains, each of which is a 3D array of shape `(residue_count, 4, 3)`. Returns a vector of vector of integers denoting the secondary structure of each residue in each chain: @@ -84,7 +84,7 @@ Returns a vector of vector of integers denoting the secondary structure of each - `3` for strands Use the `sscodes` function to convert the integers to characters. """ -function dssp(coords_chains::Vararg{Array{T, 3}, N}) where {T, N} +function dssp(coords_chains::Vararg{AbstractArray{T, 3}, N}) where {T, N} chain_lengths = size.(coords_chains, 1) coords = vcat(coords_chains...) @@ -127,14 +127,14 @@ function dssp(coords_chains::Vararg{Array{T, 3}, N}) where {T, N} strand = ladder loop = .!helix .& .!strand - ss_nums = findfirst.(eachrow(cat(loop, helix, strand, dims=2))) + classes = SSClass.(findfirst.(eachrow(cat(loop, helix, strand, dims=2)))) - ss_nums_chains = Vector{Int}[] + classes_chains = Vector{SSClass}[] i = 0 for l in chain_lengths - push!(ss_nums_chains, ss_nums[i+1:i+l]) + push!(classes_chains, classes[i+1:i+l]) end - return ss_nums_chains + return classes_chains end diff --git a/src/ss.jl b/src/ss.jl deleted file mode 100644 index 3edf000..0000000 --- a/src/ss.jl +++ /dev/null @@ -1,8 +0,0 @@ -export ss_composition, sscode, sscodes - -ss_composition(ss::Vector{Int}) = [count(==(1), ss) count(==(2), ss) count(==(3), ss)] - -const SSCODES = ['-', 'H', 'E'] - -sscode(ss::Integer) = SSCODES[ss] -sscodes(ss::AbstractVector{<:Integer}) = sscode.(ss) \ No newline at end of file diff --git a/src/ssclass.jl b/src/ssclass.jl new file mode 100644 index 0000000..b99581c --- /dev/null +++ b/src/ssclass.jl @@ -0,0 +1,19 @@ +export SSClass, Loop, Helix, Strand, ss_composition + +struct SSClass + n::Int +end + +const CHAR_VEC = ['-', 'H', 'E'] +Base.convert(::Type{Char}, cls::SSClass) = CHAR_VEC[cls.n] +Base.string(ss::Vector{SSClass}) = join(convert(Char, cls) for cls in ss) + +const Loop = SSClass(1) +const Helix = SSClass(2) +const Strand = SSClass(3) + +const SSCLASS_NAMES = Dict(Loop => "Loop", Helix => "Helix", Strand => "Strand") + +Base.show(io::IO, cls::SSClass) = print(io, get(SSCLASS_NAMES, cls, "SSClass($(cls.n))")) + +ss_composition(ss::Vector{SSClass}) = [count(==(cls), ss) for cls in [Loop, Helix, Strand]] \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index c42fa62..78d9087 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,7 +2,7 @@ using AssigningSecondaryStructure using Test @testset "AssigningSecondaryStructure.jl" begin - @test ss_composition.(dssp("data/1ASS.pdb")) == [[60 53 39]] - @test ss_composition.(dssp("data/1ZAK.pdb")) == [[72 116 32], [72 116 32]] - @test ss_composition.(dssp("data/3GOU.pdb")) == [[40 101 0], [44 102 0], [40 101 0], [44 102 0]] + @test ss_composition.(dssp("data/1ASS.pdb")) == [[60, 53, 39]] + @test ss_composition.(dssp("data/1ZAK.pdb")) == [[72, 116, 32], [72, 116, 32]] + @test ss_composition.(dssp("data/3GOU.pdb")) == [[40, 101, 0], [44, 102, 0], [40, 101, 0], [44, 102, 0]] end