diff --git a/src/assign.jl b/src/assign.jl index faef80b..92fda72 100644 --- a/src/assign.jl +++ b/src/assign.jl @@ -3,7 +3,7 @@ export assign_secondary_structure!, assign_secondary_structure function assign_secondary_structure! end """ - assign_secondary_structure(coords_by_chains::Vector{<:AbstractArray{T, 3}}) where T + assign_secondary_structure(chain_backbones::Vector{Array{T,3}}) where T<:Real Given a vector of chain backbones, each represented as a 3-dimensional array of size 3x3xL, this function assigns the secondary structure to each residue. @@ -13,11 +13,11 @@ for the corresponding chain and their respective residues. The integers are enco - `2`: helix; α, 3_10, or π. - `3`: strand; part of a β-sheet. """ -function assign_secondary_structure(coords_by_chains::Vector{<:AbstractArray{<:Real, 3}}) - coords = cat(coords_by_chains..., dims=3) - ss_numbers = dssp(coords) - lengths = size.(coords_by_chains, 3) +function assign_secondary_structure(chain_backbones::Vector{Array{T,3}}) where T<:Real + concatenated_backbone = cat(chain_backbones..., dims=3) + secondary_structure = dssp(convert(Array{Float64}, concatenated_backbone)) + lengths = size.(chain_backbones, 3) chain_ends = [0; cumsum(lengths)] - ss_by_chain = [ss_numbers[i+1:j] for (i,j) in zip(chain_ends, chain_ends[2:end])] - return ss_by_chain + secondary_structure_by_chain = [secondary_structure[i+1:j] for (i,j) in zip(chain_ends, chain_ends[2:end])] + return secondary_structure_by_chain end diff --git a/src/dssp.jl b/src/dssp.jl index 9433e38..96f6048 100644 --- a/src/dssp.jl +++ b/src/dssp.jl @@ -48,11 +48,9 @@ function get_strands(Hbonds::AbstractMatrix{Bool}) return mapslices(any, Parallel_Bridge .| Antiparallel_Bridge, dims=1) |> vec |> collect end -function dssp(coords::Array{<:Real,3}) +function dssp(coords::Array{T,3}) where T <: Real size(coords)[1:2] == (3, 3) || throw(DimensionMismatch("Expected 3x3xL array, got $(size(coords))")) size(coords, 3) < 5 && return ones(Int, size(coords, 3)) - coords = convert(Array{Float32}, coords) - Hbonds = get_Hbonds(coords) helix = get_helices(Hbonds) @@ -60,7 +58,6 @@ function dssp(coords::Array{<:Real,3}) loop = .!(helix .| strand) # 1 for helix, 2 for strand, 3 for loop - ss_numbers = vec(mapslices(findfirst, [loop helix strand], dims=2)) - - return ss_numbers + secondary_structure = vec(mapslices(findfirst, [loop helix strand], dims=2)) + return secondary_structure end \ No newline at end of file diff --git a/src/hbonds.jl b/src/hbonds.jl index 4a6d379..75b66e1 100644 --- a/src/hbonds.jl +++ b/src/hbonds.jl @@ -3,41 +3,45 @@ using LinearAlgebra: norm, diagind col_norms(arr::AbstractArray{<:Real}) = mapslices(norm, arr, dims=1) normalize_cols(arr::AbstractArray{<:Real}) = arr ./ col_norms(arr) +const CO_DISTANCE = 1.23 +const NH_DISTANCE = 1.01 + const Q1Q2 = 0.42 * 0.20 # charges const F = 332.0 # dimensional factor const CUTOFF = -0.5 -function get_oxygen_positions(coords::Array{<:Real,3}) +function get_oxygen_positions(coords::Array{T,3}) where T<:Real Cα_pos, C_pos, N_pos = coords[:, 2, 1:end-1], coords[:, 3, 1:end-1], coords[:, 1, 2:end] CαC_vec = C_pos - Cα_pos NC_vec = C_pos - N_pos - CO_vec = 1.23 * normalize_cols(normalize_cols(CαC_vec) + normalize_cols(NC_vec)) - return C_pos + CO_vec + CO_vec = T(CO_DISTANCE) * normalize_cols(normalize_cols(CαC_vec) + normalize_cols(NC_vec)) + O_pos = C_pos + CO_vec + return [O_pos fill(T(NaN), 3)] end -function get_hydrogen_positions(coords::Array{<:Real,3}) +function get_hydrogen_positions(coords::Array{T,3}) where T<:Real C_pos, N_pos, Cα_pos = coords[:, 3, 1:end-1], coords[:, 1, 2:end], coords[:, 2, 2:end] CN_vec = N_pos - C_pos CαN_vec = N_pos - Cα_pos - NH_vec = 1.01 * normalize_cols(normalize_cols(CN_vec) + normalize_cols(CαN_vec)) - return N_pos + NH_vec + NH_vec = T(NH_DISTANCE) * normalize_cols(normalize_cols(CN_vec) + normalize_cols(CαN_vec)) + H_pos = N_pos + NH_vec + return [fill(T(NaN), 3) H_pos] end # i:C=O, j:N-H -function get_Hbonds(coords::Array{<:Real,3}, cutoff::Real=CUTOFF) - pad_pos = fill(NaN, 3) - C_pos = [coords[:, 3, 1:end-1] pad_pos] - O_pos = [get_oxygen_positions(coords) pad_pos] - N_pos = [pad_pos coords[:, 1, 2:end]] - H_pos = [pad_pos get_hydrogen_positions(coords)] +function get_Hbonds(coords::Array{T,3}, cutoff::Real=CUTOFF) where T<:Real + C_pos = [coords[:, 3, 1:end-1] fill(T(NaN), 3)] + O_pos = get_oxygen_positions(coords) + N_pos = [fill(T(NaN), 3) coords[:, 1, 2:end]] + H_pos = get_hydrogen_positions(coords) ON_dist = col_norms(O_pos .- reshape(N_pos, 3, 1, :)) CH_dist = col_norms(C_pos .- reshape(H_pos, 3, 1, :)) OH_dist = col_norms(O_pos .- reshape(H_pos, 3, 1, :)) CN_dist = col_norms(C_pos .- reshape(N_pos, 3, 1, :)) - E = Q1Q2 * F * (1 ./ ON_dist + 1 ./ CH_dist - 1 ./ OH_dist - 1 ./ CN_dist) + E = T(Q1Q2 * F) * (1 ./ ON_dist + 1 ./ CH_dist - 1 ./ OH_dist - 1 ./ CN_dist) E = dropdims(E, dims=1) Hbonds = E .< cutoff diff --git a/test/runtests.jl b/test/runtests.jl index 23ac3c5..21ae25e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,7 +4,7 @@ using Test import AssigningSecondaryStructure as ASS import Backboner -ss_composition(ss::AbstractVector{Int}) = [count(==(code), ss) for code in 1:3] +ss_composition(secondary_structure::Vector{Int}) = [count(==(ss), secondary_structure) for ss in 1:3] @testset "AssigningSecondaryStructure.jl" begin