Skip to content

Commit

Permalink
Added an SSClass type
Browse files Browse the repository at this point in the history
  • Loading branch information
anton083 committed Oct 26, 2023
1 parent 30d90ba commit 3c13a85
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 26 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ This package provides a quick way to assign secondary structure using the [DSSP]
This is not a complete implementation of DSSP, as it only assigns '-' for loops, 'H' for alpha helices, and 'E' for beta strands. In spite of that, it matches the original DSSP to a large extent, with the added advantage of being more than 10x faster. For the full DSSP algorithm, check out [BioStructures.jl](https://github.com/BioJulia/BioStructures.jl) or [ProteinSecondaryStructures.jl](https://github.com/m3g/ProteinSecondaryStructures.jl), which both use the [DSSP_jll.jl](https://docs.juliahub.com/General/DSSP_jll/stable/) package that was auto-generated using [BinaryBuilder.jl](https://github.com/JuliaPackaging/BinaryBuilder.jl).

```julia
julia> ss_nums = dssp("test/data/1ASS.pdb") # 1 chain, returns numeric codes
1-element Vector{Vector{Int64}}:
[1, 1, 1, 3, 3, 3, 1, 1, 1, 1 3, 3, 3, 3, 3, 3, 3, 1, 1, 1]
julia> dssp("test/data/1ASS.pdb") # 1 chain, returns numeric codes
1-element Vector{Vector{SSClass}}:
[Loop, Loop, Loop, Strand, Strand, Strand Strand, Strand, Strand, Loop, Loop, Loop]

julia> join.(sscodes.(ss_nums)) # 1 chain, converted to chars with sscodes, joined into string
julia> string.(dssp("test/data/1ASS.pdb")) # 1 chain, Vector{SSClass} converted to string
1-element Vector{String}:
"---EEE-----------EEE-EEEEEE---E" 90 bytes "--------EEE-EEEEEEE--EEEEEEE---"

julia> join.(sscodes.(dssp("test/data/3GOU.pdb"))) # 4 chains
julia> string.(dssp("test/data/3GOU.pdb")) # 4 chains
4-element Vector{String}:
"---HHHHHHHHHHHHHH---HHHHHHHHHHH" 79 bytes "HH------HHHHHHHHHHHHHHHHHH-----"
"---HHHHHHHHHHHHHH---HHHHHHHHHHH" 84 bytes "---HHHHHHHHHHHHHHHHHH---------H"
Expand Down
2 changes: 1 addition & 1 deletion src/AssigningSecondaryStructure.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
module AssigningSecondaryStructure

include("ssclass.jl")
include("utils.jl")
include("dssp.jl")
include("pdb.jl")
include("ss.jl")

end
18 changes: 9 additions & 9 deletions src/dssp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ const Q1Q2_F = 0.084 * 332
const DEFAULT_CUTOFF = -0.5
const DEFAULT_MARGIN = 1.0

function _unfold(a::Array, window::Int, axis::Int)
function _unfold(a::AbstractArray, window::Int, axis::Int)
axis = axis < 0 ? ndims(a) + axis + 1 : axis
idx = (0:window-1) .+ (1:size(a, axis) - window + 1)'
unfolded = selectdim(a, axis, idx)
return _moveaxis(unfolded, axis, ndims(unfolded))
end

function get_hydrogen_positions(coord::Array{T, 3}) where T <: Real
function get_hydrogen_positions(coord::AbstractArray{T, 3}) where T <: Real
vec_cn = coord[2:end, 1, :] .- coord[1:end-1, 3, :]
vec_cn ./= mapslices(norm, vec_cn, dims=2)
vec_can = coord[2:end, 1, :] .- coord[2:end, 2, :]
Expand All @@ -25,7 +25,7 @@ function get_hydrogen_positions(coord::Array{T, 3}) where T <: Real
end

function get_hbond_map(
coord::Array{T, 3};
coord::AbstractArray{T, 3};
cutoff::Float64 = DEFAULT_CUTOFF,
margin::Float64 = DEFAULT_MARGIN,
return_e::Bool = false,
Expand Down Expand Up @@ -75,7 +75,7 @@ function get_hbond_map(
end

"""
dssp(coords_chains::Vararg{Array{T, 3}, N})
dssp(coords_chains::Vararg{AbstractArray{T, 3}, N})
Takes a variable number of chains, each of which is a 3D array of shape `(residue_count, 4, 3)`.
Returns a vector of vector of integers denoting the secondary structure of each residue in each chain:
Expand All @@ -84,7 +84,7 @@ Returns a vector of vector of integers denoting the secondary structure of each
- `3` for strands
Use the `sscodes` function to convert the integers to characters.
"""
function dssp(coords_chains::Vararg{Array{T, 3}, N}) where {T, N}
function dssp(coords_chains::Vararg{AbstractArray{T, 3}, N}) where {T, N}
chain_lengths = size.(coords_chains, 1)
coords = vcat(coords_chains...)

Expand Down Expand Up @@ -127,14 +127,14 @@ function dssp(coords_chains::Vararg{Array{T, 3}, N}) where {T, N}
strand = ladder
loop = .!helix .& .!strand

ss_nums = findfirst.(eachrow(cat(loop, helix, strand, dims=2)))
classes = SSClass.(findfirst.(eachrow(cat(loop, helix, strand, dims=2))))

ss_nums_chains = Vector{Int}[]
classes_chains = Vector{SSClass}[]
i = 0
for l in chain_lengths
push!(ss_nums_chains, ss_nums[i+1:i+l])
push!(classes_chains, classes[i+1:i+l])
end

return ss_nums_chains
return classes_chains
end

8 changes: 0 additions & 8 deletions src/ss.jl

This file was deleted.

19 changes: 19 additions & 0 deletions src/ssclass.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
export SSClass, Loop, Helix, Strand, ss_composition

struct SSClass
n::Int
end

const CHAR_VEC = ['-', 'H', 'E']
Base.convert(::Type{Char}, cls::SSClass) = CHAR_VEC[cls.n]
Base.string(ss::Vector{SSClass}) = join(convert(Char, cls) for cls in ss)

const Loop = SSClass(1)
const Helix = SSClass(2)
const Strand = SSClass(3)

const SSCLASS_NAMES = Dict(Loop => "Loop", Helix => "Helix", Strand => "Strand")

Base.show(io::IO, cls::SSClass) = print(io, get(SSCLASS_NAMES, cls, "SSClass($(cls.n))"))

ss_composition(ss::Vector{SSClass}) = [count(==(cls), ss) for cls in [Loop, Helix, Strand]]
6 changes: 3 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ using AssigningSecondaryStructure
using Test

@testset "AssigningSecondaryStructure.jl" begin
@test ss_composition.(dssp("data/1ASS.pdb")) == [[60 53 39]]
@test ss_composition.(dssp("data/1ZAK.pdb")) == [[72 116 32], [72 116 32]]
@test ss_composition.(dssp("data/3GOU.pdb")) == [[40 101 0], [44 102 0], [40 101 0], [44 102 0]]
@test ss_composition.(dssp("data/1ASS.pdb")) == [[60, 53, 39]]
@test ss_composition.(dssp("data/1ZAK.pdb")) == [[72, 116, 32], [72, 116, 32]]
@test ss_composition.(dssp("data/3GOU.pdb")) == [[40, 101, 0], [44, 102, 0], [40, 101, 0], [44, 102, 0]]
end

0 comments on commit 3c13a85

Please sign in to comment.