From 94b6b0ceb71d25c0dab4b8ad83b1d0aa575925a9 Mon Sep 17 00:00:00 2001 From: anton083 Date: Thu, 18 Apr 2024 03:31:32 +0200 Subject: [PATCH] Add sheet_directions --- src/AssigningSecondaryStructure.jl | 2 ++ src/assign.jl | 7 +++- src/directions.jl | 8 +++++ src/dssp.jl | 51 ++++++++++++++++++------------ src/hydrogen.jl | 23 ++++++++------ test/runtests.jl | 8 +++++ 6 files changed, 69 insertions(+), 30 deletions(-) create mode 100644 src/directions.jl diff --git a/src/AssigningSecondaryStructure.jl b/src/AssigningSecondaryStructure.jl index 842158f..572447a 100644 --- a/src/AssigningSecondaryStructure.jl +++ b/src/AssigningSecondaryStructure.jl @@ -4,5 +4,7 @@ include("utils.jl") include("hydrogen.jl") include("dssp.jl") include("assign.jl") +include("arrangement.jl") +# TODO: sheet topology end diff --git a/src/assign.jl b/src/assign.jl index 3395378..bbf4c51 100644 --- a/src/assign.jl +++ b/src/assign.jl @@ -31,8 +31,13 @@ end import Backboner, Backboner.Protein import Backboner.Protein: ncaco_coords, readpdb -assign_secondary_structure(chains::Vector{Protein.Chain}) = assign_secondary_structure(ncaco_coords.(chains)) +get_coords(chains::Vector{Protein.Chain}) = cat(ncaco_coords.(chains)..., dims=3) +get_coords(chain::Protein.Chain) = get_coords([chain]) +assign_secondary_structure(chains::Vector{Protein.Chain}) = assign_secondary_structure(ncaco_coords.(chains)) assign_secondary_structure(chain::Protein.Chain) = assign_secondary_structure([chain])[1] +assign_secondary_structure(backbones::Vector{Backboner.Backbone}) = assign_secondary_structure(Backboner.Protein.Chain.(backbones)) +assign_secondary_structure(backbone::Backboner.Backbone) = assign_secondary_structure(Protein.Chain(backbone)) + assign_secondary_structure(filename::AbstractString) = assign_secondary_structure(readpdb(filename)) \ No newline at end of file diff --git a/src/directions.jl b/src/directions.jl new file mode 100644 index 0000000..6488910 --- /dev/null +++ b/src/directions.jl @@ -0,0 +1,8 @@ +export sheet_directions + +function sheet_directions(hbond_map::AbstractMatrix{<:Real}) + p_bridge, a_bridge = get_bridges(hbond_map) + vec(sum(p_bridge, dims=2)), vec(sum(a_bridge, dims=2)) +end + +sheet_directions(chains::Union{Protein.Chain, Vector{Protein.Chain}}) = sheet_directions(get_hbond_map(get_coords(chains))) \ No newline at end of file diff --git a/src/dssp.jl b/src/dssp.jl index 9f6490f..48875da 100644 --- a/src/dssp.jl +++ b/src/dssp.jl @@ -14,21 +14,7 @@ function _unfold(a::AbstractArray, window::Int, axis::Int) return _moveaxis(unfolded, axis, ndims(unfolded)) end -# not differentiable like the PyDSSP version cause we use bitwise operators -function dssp(coords::AbstractArray{T, 3}) where T - size(coords, 1) == 3 || throw(DimensionMismatch("Expected 3 coordinates per atom, got $(size(coords, 1))")) - size(coords, 2) == 4 || throw(DimensionMismatch("Expected 4 atoms per residue, got $(size(coords, 2))")) - - N = size(coords, 3) - if N < 6 - coords = cat(coords, fill(Inf, 3, 4, 6-N), dims=3) - end - - coords = permutedims(coords, (3, 2, 1)) - - hbmap = get_hbond_map(coords) - hbmap = permutedims(hbmap, (2, 1)) # Rearrange to "i:C=O, j:N-H" form - +function get_helix_bools(hbmap::AbstractMatrix{<:Real}) # Identify turn 3, 4, 5 turn3 = diag(hbmap, 3) .> 0 turn4 = diag(hbmap, 4) .> 0 @@ -46,7 +32,13 @@ function dssp(coords::AbstractArray{T, 3}) where T helix3 = h3 .| circshift(h3, 1) .| circshift(h3, 2) helix5 = h5 .| circshift(h5, 1) .| circshift(h5, 2) .| circshift(h5, 3) .| circshift(h5, 4) + + helix = helix3 .| helix4 .| helix5 + + return helix +end +function get_bridges(hbmap::AbstractMatrix{<:Real}) # Identify bridge unfoldmap = _unfold(_unfold(hbmap, 3, -2), 3, -2) .> 0 unfoldmap_rev = permutedims(unfoldmap, (2, 1, 3, 4)) @@ -57,12 +49,31 @@ function dssp(coords::AbstractArray{T, 3}) where T a_bridge = (unfoldmap[:, :, 2, 2] .& unfoldmap_rev[:, :, 2, 2]) .| (unfoldmap[:, :, 1, 3] .& unfoldmap_rev[:, :, 1, 3]) a_bridge = _pad(false, a_bridge, (1,1), (1,1)) - # Ladder - ladder = dropdims(reduce(|, p_bridge .| a_bridge, dims=2), dims=2) + return p_bridge, a_bridge +end + +function get_ladder_bools(hbmap::AbstractMatrix{<:Real}) + p_bridge, a_bridge = get_bridges(hbmap) + ladder = vec(reduce(|, p_bridge .| a_bridge, dims=2)) + return ladder +end + +# not differentiable like the PyDSSP version cause we use bitwise operators +function dssp(coords::AbstractArray{T, 3}) where T + size(coords, 1) == 3 || throw(DimensionMismatch("Expected 3 coordinates per atom, got $(size(coords, 1))")) + size(coords, 2) == 4 || throw(DimensionMismatch("Expected 4 atoms per residue, got $(size(coords, 2))")) + + N = size(coords, 3) + if N < 6 + coords = cat(coords, fill(Inf, 3, 4, 6-N), dims=3) + end + + hbmap = get_hbond_map(coords) # "i:C=O, j:N-H" form + # H, E, L of C3 - helix = helix3 .| helix4 .| helix5 - strand = ladder - loop = .!helix .& .!strand + helix = get_helix_bools(hbmap) + strand = get_ladder_bools(hbmap) + loop = .!(helix .| strand) num_vector = findfirst.(eachrow(hcat(loop, helix, strand)))[1:N] diff --git a/src/hydrogen.jl b/src/hydrogen.jl index 0c68742..34b5746 100644 --- a/src/hydrogen.jl +++ b/src/hydrogen.jl @@ -1,11 +1,14 @@ -function get_hydrogen_positions(coords::AbstractArray{T, 3}) where T <: Real - vec_cn = coords[2:end, 1, :] .- coords[1:end-1, 3, :] +function get_hydrogen_positions(coords::AbstractArray{T, 3}; permute=true) where T <: Real + _, atoms_per_residue, residue_count = size(coords) + atoms_per_residue == 4 || throw(DimensionMismatch("Expected 4 atoms per residue, got $atoms_per_residue")) + permute && (coords_p = permutedims(coords, (3, 2, 1))) + vec_cn = coords_p[2:end, 1, :] .- coords_p[1:end-1, 3, :] vec_cn ./= mapslices(norm, vec_cn, dims=2) - vec_can = coords[2:end, 1, :] .- coords[2:end, 2, :] + vec_can = coords_p[2:end, 1, :] .- coords_p[2:end, 2, :] vec_can ./= mapslices(norm, vec_can, dims=2) vec_nh = vec_cn .+ vec_can vec_nh ./= mapslices(norm, vec_nh, dims=2) - return coords[2:end, 1, :] .+ 1.01 .* vec_nh + return coords_p[2:end, 1, :] .+ 1.01 .* vec_nh end function get_hbond_map( @@ -13,12 +16,14 @@ function get_hbond_map( cutoff::Float64 = DEFAULT_CUTOFF, margin::Float64 = DEFAULT_MARGIN, ) where T <: Real - residue_count, atoms_per_residue, _ = size(coords) + _, atoms_per_residue, residue_count = size(coords) atoms_per_residue == 4 || throw(DimensionMismatch("Expected 4 atoms per residue, got $atoms_per_residue")) - cpos = coords[1:end-1, 3, :] - opos = coords[1:end-1, 4, :] - npos = coords[2:end, 1, :] + coords_p = permutedims(coords, (3, 2, 1)) + + cpos = coords_p[1:end-1, 3, :] + opos = coords_p[1:end-1, 4, :] + npos = coords_p[2:end, 1, :] hpos = get_hydrogen_positions(coords) cmap = repeat(reshape(cpos, 1, :, 3), outer=(residue_count-1, 1, 1)) @@ -50,5 +55,5 @@ function get_hbond_map( hbond_map .= (sin.(hbond_map .* (π / 2 / margin)) .+ 1.0) ./ 2.0 hbond_map .*= local_mask - return hbond_map + return hbond_map' end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 5d91d6c..89695ec 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,4 +30,12 @@ ss_composition(ss::Vector{Int}) = [count(==(code), ss) for code in 1:3] end + @testset "sheet arrangement" begin + hbond_map = ASS.get_hbond_map(ASS.ncaco_coords.(ASS.readpdb("data/1ASS.pdb"))[1]) + p, ap = sheet_arrangement(hbond_map) + @test p isa Vector{Int64} + @test Set(unique(p)) == Set([0, 1, 2]) + @test Set(unique(ap)) == Set([0, 1, 2]) + end + end