Skip to content

Commit

Permalink
Argument type changes, oxygen and hydrogen padding
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonOresten committed Jul 19, 2024
1 parent 4b95b53 commit 1a001de
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 27 deletions.
14 changes: 7 additions & 7 deletions src/assign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ export assign_secondary_structure!, assign_secondary_structure
function assign_secondary_structure! end

"""
assign_secondary_structure(coords_by_chains::Vector{<:AbstractArray{T, 3}}) where T
assign_secondary_structure(chain_backbones::Vector{Array{T,3}}) where T<:Real
Given a vector of chain backbones, each represented as a 3-dimensional array of size 3x3xL, this function assigns the secondary structure to each residue.
Expand All @@ -13,11 +13,11 @@ for the corresponding chain and their respective residues. The integers are enco
- `2`: helix; α, 3_10, or π.
- `3`: strand; part of a β-sheet.
"""
function assign_secondary_structure(coords_by_chains::Vector{<:AbstractArray{<:Real, 3}})
coords = cat(coords_by_chains..., dims=3)
ss_numbers = dssp(coords)
lengths = size.(coords_by_chains, 3)
function assign_secondary_structure(chain_backbones::Vector{Array{T,3}}) where T<:Real
concatenated_backbone = cat(chain_backbones..., dims=3)
secondary_structure = dssp(convert(Array{Float64}, concatenated_backbone))
lengths = size.(chain_backbones, 3)
chain_ends = [0; cumsum(lengths)]
ss_by_chain = [ss_numbers[i+1:j] for (i,j) in zip(chain_ends, chain_ends[2:end])]
return ss_by_chain
secondary_structure_by_chain = [secondary_structure[i+1:j] for (i,j) in zip(chain_ends, chain_ends[2:end])]
return secondary_structure_by_chain
end
9 changes: 3 additions & 6 deletions src/dssp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,16 @@ function get_strands(Hbonds::AbstractMatrix{Bool})
return mapslices(any, Parallel_Bridge .| Antiparallel_Bridge, dims=1) |> vec |> collect
end

function dssp(coords::Array{<:Real,3})
function dssp(coords::Array{T,3}) where T <: Real
size(coords)[1:2] == (3, 3) || throw(DimensionMismatch("Expected 3x3xL array, got $(size(coords))"))
size(coords, 3) < 5 && return ones(Int, size(coords, 3))
coords = convert(Array{Float32}, coords)

Hbonds = get_Hbonds(coords)

helix = get_helices(Hbonds)
strand = get_strands(Hbonds)
loop = .!(helix .| strand)

# 1 for helix, 2 for strand, 3 for loop
ss_numbers = vec(mapslices(findfirst, [loop helix strand], dims=2))

return ss_numbers
secondary_structure = vec(mapslices(findfirst, [loop helix strand], dims=2))
return secondary_structure
end
30 changes: 17 additions & 13 deletions src/hbonds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,45 @@ using LinearAlgebra: norm, diagind
col_norms(arr::AbstractArray{<:Real}) = mapslices(norm, arr, dims=1)
normalize_cols(arr::AbstractArray{<:Real}) = arr ./ col_norms(arr)

const CO_DISTANCE = 1.23
const NH_DISTANCE = 1.01

const Q1Q2 = 0.42 * 0.20 # charges
const F = 332.0 # dimensional factor

const CUTOFF = -0.5

function get_oxygen_positions(coords::Array{<:Real,3})
function get_oxygen_positions(coords::Array{T,3}) where T<:Real
Cα_pos, C_pos, N_pos = coords[:, 2, 1:end-1], coords[:, 3, 1:end-1], coords[:, 1, 2:end]
CαC_vec = C_pos - Cα_pos
NC_vec = C_pos - N_pos
CO_vec = 1.23 * normalize_cols(normalize_cols(CαC_vec) + normalize_cols(NC_vec))
return C_pos + CO_vec
CO_vec = T(CO_DISTANCE) * normalize_cols(normalize_cols(CαC_vec) + normalize_cols(NC_vec))
O_pos = C_pos + CO_vec
return [O_pos fill(T(NaN), 3)]
end

function get_hydrogen_positions(coords::Array{<:Real,3})
function get_hydrogen_positions(coords::Array{T,3}) where T<:Real
C_pos, N_pos, Cα_pos = coords[:, 3, 1:end-1], coords[:, 1, 2:end], coords[:, 2, 2:end]
CN_vec = N_pos - C_pos
CαN_vec = N_pos - Cα_pos
NH_vec = 1.01 * normalize_cols(normalize_cols(CN_vec) + normalize_cols(CαN_vec))
return N_pos + NH_vec
NH_vec = T(NH_DISTANCE) * normalize_cols(normalize_cols(CN_vec) + normalize_cols(CαN_vec))
H_pos = N_pos + NH_vec
return [fill(T(NaN), 3) H_pos]
end

# i:C=O, j:N-H
function get_Hbonds(coords::Array{<:Real,3}, cutoff::Real=CUTOFF)
pad_pos = fill(NaN, 3)
C_pos = [coords[:, 3, 1:end-1] pad_pos]
O_pos = [get_oxygen_positions(coords) pad_pos]
N_pos = [pad_pos coords[:, 1, 2:end]]
H_pos = [pad_pos get_hydrogen_positions(coords)]
function get_Hbonds(coords::Array{T,3}, cutoff::Real=CUTOFF) where T<:Real
C_pos = [coords[:, 3, 1:end-1] fill(T(NaN), 3)]
O_pos = get_oxygen_positions(coords)
N_pos = [fill(T(NaN), 3) coords[:, 1, 2:end]]
H_pos = get_hydrogen_positions(coords)

ON_dist = col_norms(O_pos .- reshape(N_pos, 3, 1, :))
CH_dist = col_norms(C_pos .- reshape(H_pos, 3, 1, :))
OH_dist = col_norms(O_pos .- reshape(H_pos, 3, 1, :))
CN_dist = col_norms(C_pos .- reshape(N_pos, 3, 1, :))

E = Q1Q2 * F * (1 ./ ON_dist + 1 ./ CH_dist - 1 ./ OH_dist - 1 ./ CN_dist)
E = T(Q1Q2 * F) * (1 ./ ON_dist + 1 ./ CH_dist - 1 ./ OH_dist - 1 ./ CN_dist)
E = dropdims(E, dims=1)

Hbonds = E .< cutoff
Expand Down
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using Test
import AssigningSecondaryStructure as ASS
import Backboner

ss_composition(ss::AbstractVector{Int}) = [count(==(code), ss) for code in 1:3]
ss_composition(secondary_structure::Vector{Int}) = [count(==(ss), secondary_structure) for ss in 1:3]

@testset "AssigningSecondaryStructure.jl" begin

Expand Down

0 comments on commit 1a001de

Please sign in to comment.