Skip to content

Commit

Permalink
Use Backboner for reading PDBs
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonOresten committed Apr 13, 2024
1 parent 4d3ca30 commit cfe493f
Show file tree
Hide file tree
Showing 10 changed files with 74 additions and 155 deletions.
14 changes: 3 additions & 11 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,29 +1,21 @@
name = "AssigningSecondaryStructure"
uuid = "8ed43e74-60fb-4e11-99b9-91deed37aef7"
authors = ["Anton Oresten <[email protected]>", "Shintaro Minami"]
version = "0.3.2"
version = "0.3.3"

[deps]
Backboner = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
PDBTools = "e29189f1-7114-4dbd-93d0-c5673a921a58"
PaddedViews = "5432bcbf-9aad-5242-b902-cca2824c8663"

[weakdeps]
Backboner = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7"

[extensions]
BackbonerExt = "Backboner"

[compat]
Backboner = "0.9"
LinearAlgebra = "1"
PDBTools = "1"
PaddedViews = "0.5"
julia = "^1.7"

[extras]
Backboner = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "Backboner"]
test = ["Test"]
14 changes: 0 additions & 14 deletions ext/BackbonerExt.jl

This file was deleted.

2 changes: 1 addition & 1 deletion src/AssigningSecondaryStructure.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
module AssigningSecondaryStructure

include("utils.jl")
include("hydrogen.jl")
include("dssp.jl")
include("assign.jl")
include("pdb.jl")

end
9 changes: 9 additions & 0 deletions src/assign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,12 @@ function assign_secondary_structure(coords_chains::Vector{<:AbstractArray{T, 3}}

return code_vectors_by_chain
end

import Backboner, Backboner.Protein
import Backboner.Protein: ncaco_coords, readpdb

assign_secondary_structure(chains::Vector{Protein.Chain}) = assign_secondary_structure(ncaco_coords.(chains))

assign_secondary_structure(chain::Protein.Chain) = assign_secondary_structure([chain])[1]

assign_secondary_structure(filename::AbstractString) = assign_secondary_structure(readpdb(filename))
61 changes: 3 additions & 58 deletions src/dssp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,65 +14,10 @@ function _unfold(a::AbstractArray, window::Int, axis::Int)
return _moveaxis(unfolded, axis, ndims(unfolded))
end

function _get_hydrogen_positions(coord::AbstractArray{T, 3}) where T <: Real
vec_cn = coord[2:end, 1, :] .- coord[1:end-1, 3, :]
vec_cn ./= mapslices(norm, vec_cn, dims=2)
vec_can = coord[2:end, 1, :] .- coord[2:end, 2, :]
vec_can ./= mapslices(norm, vec_can, dims=2)
vec_nh = vec_cn .+ vec_can
vec_nh ./= mapslices(norm, vec_nh, dims=2)
return coord[2:end, 1, :] .+ 1.01 .* vec_nh
end

function _get_hbond_map(
coord::AbstractArray{T, 3};
cutoff::Float64 = DEFAULT_CUTOFF,
margin::Float64 = DEFAULT_MARGIN,
) where T <: Real
residue_count, atoms_per_residue, _ = size(coord)
@assert atoms_per_residue == 4

cpos = coord[1:end-1, 3, :]
opos = coord[1:end-1, 4, :]
npos = coord[2:end, 1, :]
hpos = _get_hydrogen_positions(coord)

cmap = repeat(reshape(cpos, 1, :, 3), outer=(residue_count-1, 1, 1))
omap = repeat(reshape(opos, 1, :, 3), outer=(residue_count-1, 1, 1))
nmap = repeat(reshape(npos, :, 1, 3), outer=(1, residue_count-1, 1))
hmap = repeat(reshape(hpos, :, 1, 3), outer=(1, residue_count-1, 1))

d_on = dropdims(sqrt.(sum(abs2.(omap .- nmap), dims=3)), dims=3)
d_ch = dropdims(sqrt.(sum(abs2.(cmap .- hmap), dims=3)), dims=3)
d_oh = dropdims(sqrt.(sum(abs2.(omap .- hmap), dims=3)), dims=3)
d_cn = dropdims(sqrt.(sum(abs2.(cmap .- nmap), dims=3)), dims=3)

arr = Q1Q2_F .* (1.0 ./ d_on .+ 1.0 ./ d_ch .- 1.0 ./ d_oh .- 1.0 ./ d_cn)

e = _pad(0.0, arr, (1,0), (0,1))

local_mask = trues(residue_count, residue_count)
for i in 1:residue_count
local_mask[i, i] = false
if i > 1
local_mask[i, i-1] = false
end
if i > 2
local_mask[i, i-2] = false
end
end

hbond_map = clamp.(cutoff - margin .- e, -margin, margin)
hbond_map .= (sin.(hbond_map ./ margin .* π ./ 2) .+ 1.0) ./ 2.0
hbond_map .*= local_mask

return hbond_map
end

# not differentiable like the PyDSSP version cause we use bitwise operators
function dssp(coords::AbstractArray{T, 3}) where T
@assert size(coords, 1) == 3
@assert size(coords, 2) == 4
size(coords, 1) == 3 || throw(DimensionMismatch("Expected 3 coordinates per atom, got $(size(coords, 1))"))
size(coords, 2) == 4 || throw(DimensionMismatch("Expected 4 atoms per residue, got $(size(coords, 2))"))

N = size(coords, 3)
if N < 6
Expand All @@ -81,7 +26,7 @@ function dssp(coords::AbstractArray{T, 3}) where T

coords = permutedims(coords, (3, 2, 1))

hbmap = _get_hbond_map(coords)
hbmap = get_hbond_map(coords)
hbmap = permutedims(hbmap, (2, 1)) # Rearrange to "i:C=O, j:N-H" form

# Identify turn 3, 4, 5
Expand Down
54 changes: 54 additions & 0 deletions src/hydrogen.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
function get_hydrogen_positions(coords::AbstractArray{T, 3}) where T <: Real
vec_cn = coords[2:end, 1, :] .- coords[1:end-1, 3, :]
vec_cn ./= mapslices(norm, vec_cn, dims=2)
vec_can = coords[2:end, 1, :] .- coords[2:end, 2, :]
vec_can ./= mapslices(norm, vec_can, dims=2)
vec_nh = vec_cn .+ vec_can
vec_nh ./= mapslices(norm, vec_nh, dims=2)
return coords[2:end, 1, :] .+ 1.01 .* vec_nh
end

function get_hbond_map(
coords::AbstractArray{T, 3};
cutoff::Float64 = DEFAULT_CUTOFF,
margin::Float64 = DEFAULT_MARGIN,
) where T <: Real
residue_count, atoms_per_residue, _ = size(coords)
atoms_per_residue == 4 || throw(DimensionMismatch("Expected 4 atoms per residue, got $atoms_per_residue"))

cpos = coords[1:end-1, 3, :]
opos = coords[1:end-1, 4, :]
npos = coords[2:end, 1, :]
hpos = get_hydrogen_positions(coords)

cmap = repeat(reshape(cpos, 1, :, 3), outer=(residue_count-1, 1, 1))
omap = repeat(reshape(opos, 1, :, 3), outer=(residue_count-1, 1, 1))
nmap = repeat(reshape(npos, :, 1, 3), outer=(1, residue_count-1, 1))
hmap = repeat(reshape(hpos, :, 1, 3), outer=(1, residue_count-1, 1))

d_on = dropdims(sqrt.(sum(abs2.(omap .- nmap), dims=3)), dims=3)
d_ch = dropdims(sqrt.(sum(abs2.(cmap .- hmap), dims=3)), dims=3)
d_oh = dropdims(sqrt.(sum(abs2.(omap .- hmap), dims=3)), dims=3)
d_cn = dropdims(sqrt.(sum(abs2.(cmap .- nmap), dims=3)), dims=3)

arr = Q1Q2_F .* (1.0 ./ d_on .+ 1.0 ./ d_ch .- 1.0 ./ d_oh .- 1.0 ./ d_cn)

e = _pad(0.0, arr, (1,0), (0,1))

local_mask = trues(residue_count, residue_count)
for i in 1:residue_count
local_mask[i, i] = false
if i > 1
local_mask[i, i-1] = false
end
if i > 2
local_mask[i, i-2] = false
end
end

hbond_map = clamp.(cutoff - margin .- e, -margin, margin)
hbond_map .= (sin.(hbond_map .*/ 2 / margin)) .+ 1.0) ./ 2.0
hbond_map .*= local_mask

return hbond_map
end
41 changes: 0 additions & 41 deletions src/pdb.jl

This file was deleted.

2 changes: 1 addition & 1 deletion src/utils.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# These functions come from numpy and were used to port the code from python to julia.

function _pad(x::T, arr::AbstractArray{T, N}, paddings::Vararg{Tuple{Int, Int}, N}) where {T, N}
@assert ndims(arr) == length(paddings)
ndims(arr) == length(paddings) || throw(DimensionMismatch("Number of paddings must match the number of dimensions of the array"))
new_size = Int[]
offsets = UnitRange{Int}[]
for (n, (a,b)) in zip(size(arr), paddings)
Expand Down
10 changes: 0 additions & 10 deletions test/BackbonerExt.jl

This file was deleted.

22 changes: 3 additions & 19 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,30 +1,16 @@
using AssigningSecondaryStructure
using Test

import AssigningSecondaryStructure as ASS

ss_composition(ss::Vector{Int}) = [count(==(code), ss) for code in 1:3]

@testset "AssigningSecondaryStructure.jl" begin

@testset "io.jl" begin

@testset "1ASS" begin
backbone = load_pdb_chains("data/1ASS.pdb")
@test length(backbone) == 1
@test size.(backbone, 3) == [152]
end

@testset "1ZAK" begin
backbone = load_pdb_chains("data/1ZAK.pdb")
@test length(backbone) == 2
@test size.(backbone, 3) == [220, 220]
end

end

@testset "DSSP" begin

@testset "dssp" begin
coords = load_pdb_chains("data/1ASS.pdb")[1]
coords = ASS.ncaco_coords.(ASS.readpdb("data/1ASS.pdb"))[1]
@test AssigningSecondaryStructure.dssp(coords[:, :, 35:39]) == [1, 1, 1, 1, 1] # minimum helix length is 4
@test AssigningSecondaryStructure.dssp(coords[:, :, 35:40]) == [1, 2, 2, 2, 2, 1] # ends don't count towards helix length :shrug:
@test AssigningSecondaryStructure.dssp(coords[:, :, 35:41]) == [1, 2, 2, 2, 2, 2, 1]
Expand All @@ -44,6 +30,4 @@ ss_composition(ss::Vector{Int}) = [count(==(code), ss) for code in 1:3]

end

include("BackbonerExt.jl")

end

2 comments on commit cfe493f

@AntonOresten
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register

Release notes:

  • More robust PDB-reading
  • Remove Backboner extension
  • Import Backboner directly and define assign_secondary_structure for Backboner.Protein.Chain and Vector{Backboner.Protein.Chain}

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/104836

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.3 -m "<description of version>" cfe493f0fcbb703e71345279f42cb6f6de66b37f
git push origin v0.3.3

Please sign in to comment.