From 845d33c638adbaa4346b98c374e6c4a23871c4b7 Mon Sep 17 00:00:00 2001 From: AntonOresten Date: Thu, 7 Nov 2024 01:57:12 +0100 Subject: [PATCH 1/2] add kdtree --- Project.toml | 4 +++ src/assign.jl | 62 ++++++++++++++++++++++++++++++------------- src/hydrogen_bonds.jl | 39 ++++++++++++++++++--------- 3 files changed, 74 insertions(+), 31 deletions(-) diff --git a/Project.toml b/Project.toml index caa6119..28d4c4d 100644 --- a/Project.toml +++ b/Project.toml @@ -5,9 +5,13 @@ version = "0.5.0" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [compat] LinearAlgebra = "1" +NearestNeighbors = "0.4.20" +SparseArrays = "1.11.0" julia = "1" [extras] diff --git a/src/assign.jl b/src/assign.jl index 06d185e..18f9eed 100644 --- a/src/assign.jl +++ b/src/assign.jl @@ -5,44 +5,68 @@ using LinearAlgebra: diag # 5-turn >>555<< # MINIMAL X X X # LONGER GGG HHHH IIIII -function get_helices(Hbonds::AbstractMatrix{Bool}) - turn3 = [diag(Hbonds, 3) .> 0; falses(3)] - turn4 = [diag(Hbonds, 4) .> 0; falses(4)] - turn5 = [diag(Hbonds, 5) .> 0; falses(5)] +function get_helices(Hbonds::SparseMatrixCSC{Bool, Int}) + n = size(Hbonds, 1) - # "Minimal" helices: the previous and current - # residue is bonding to a residue n steps ahead respectively + # Extract diagonals from sparse matrix + turn3 = [Hbonds[i, i+3] for i in 1:(n - 3)] + turn3 = vcat(turn3, falses(3)) + + turn4 = [Hbonds[i, i+4] for i in 1:(n - 4)] + turn4 = vcat(turn4, falses(4)) + + turn5 = [Hbonds[i, i+5] for i in 1:(n - 5)] + turn5 = vcat(turn5, falses(5)) + + # "Minimal" helices h3 = [get(turn3, i-1, false) & turn3[i] for i in eachindex(turn3)] h4 = [get(turn4, i-1, false) & turn4[i] for i in eachindex(turn4)] h5 = [get(turn5, i-1, false) & turn5[i] for i in eachindex(turn5)] - # Longer helices: smearing out the minimal helix - # residues to the residues they bond to - helix4 = reduce(.|, circshift(h4, i) for i in 0:3) + # Longer helices + helix4 = reduce(.|, (circshift(h4, i) for i in 0:3)) mask = .!(helix4 .| circshift(helix4, 1)) h3 = h3 .& mask h5 = h5 .& mask - helix3 = reduce(.|, circshift(h3, i) for i in 0:2) - helix5 = reduce(.|, circshift(h5, i) for i in 0:4) + helix3 = reduce(.|, (circshift(h3, i) for i in 0:2)) + helix5 = reduce(.|, (circshift(h5, i) for i in 0:4)) helix = helix3 .| helix4 .| helix5 return helix |> collect end -function get_bridges(Hbonds::AbstractMatrix{Bool}) - Parallel_Bridge = falses(size(Hbonds)) - Antiparallel_Bridge = falses(size(Hbonds)) - for j in 2:size(Hbonds, 2)-1, i in 2:size(Hbonds, 1)-1 - Parallel_Bridge[i,j] = (Hbonds[i-1,j] & Hbonds[j,i+1]) | - (Hbonds[j-1,i] & Hbonds[i,j+1]) - Antiparallel_Bridge[i,j] = (Hbonds[i,j] & Hbonds[j,i]) | - (Hbonds[i-1,j+1] & Hbonds[j-1,i+1]) +function get_bridges(Hbonds::SparseMatrixCSC{Bool, Int}) + n = size(Hbonds, 1) + Parallel_Bridge = spzeros(Bool, n, n) + Antiparallel_Bridge = spzeros(Bool, n, n) + + is, js, vals = findnz(Hbonds) + + # Iterate over non-zero entries in Hbonds + for (i0, j0) in zip(is, js) + + for (i, j) in Iterators.product(i0-1:i0+1, j0-1:j0+1) + + 1 < i < n && 1 < j < n || continue + + # Parallel bridges + if (Hbonds[i-1, j] && Hbonds[j, i+1]) || (Hbonds[j-1, i] && Hbonds[i, j+1]) + Parallel_Bridge[i, j] = true + end + + # Antiparallel bridges + if (Hbonds[i, j] && Hbonds[j, i]) || (Hbonds[i-1, j+1] && Hbonds[j-1, i+1]) + Antiparallel_Bridge[i, j] = true + end + end end + return Parallel_Bridge, Antiparallel_Bridge end + function get_strands(Hbonds::AbstractMatrix{Bool}) Parallel_Bridge, Antiparallel_Bridge = get_bridges(Hbonds) return mapslices(any, Parallel_Bridge .| Antiparallel_Bridge, dims=1) |> vec |> collect diff --git a/src/hydrogen_bonds.jl b/src/hydrogen_bonds.jl index 1325cea..75aadce 100644 --- a/src/hydrogen_bonds.jl +++ b/src/hydrogen_bonds.jl @@ -29,25 +29,40 @@ function get_hydrogen_positions(coords::Array{T,3}) where T<:Real return [fill(T(NaN), 3) H_pos] end -# i:C=O, j:N-H +using NearestNeighbors +using SparseArrays + function get_hbond_map(coords::Array{T,3}, cutoff::Real=CUTOFF) where T<:Real C_pos = coords[:, 3, :] O_pos = get_oxygen_positions(coords) N_pos = coords[:, 1, :] H_pos = get_hydrogen_positions(coords) - ON_dist = col_norms(O_pos .- reshape(N_pos, 3, 1, :)) - CH_dist = col_norms(C_pos .- reshape(H_pos, 3, 1, :)) - OH_dist = col_norms(O_pos .- reshape(H_pos, 3, 1, :)) - CN_dist = col_norms(C_pos .- reshape(N_pos, 3, 1, :)) + num_residues = size(coords, 3) + Hbonds = spzeros(Bool, num_residues, num_residues) - E = T(Q1Q2 * F) * (1 ./ ON_dist + 1 ./ CH_dist - 1 ./ OH_dist - 1 ./ CN_dist) - E = dropdims(E, dims=1) + # Build KD-tree for N positions + N_tree = KDTree(N_pos) - Hbonds = E .< cutoff - Hbonds[diagind(Hbonds, 0)] .= false - Hbonds[diagind(Hbonds, 1)] .= false - Hbonds[diagind(Hbonds, 2)] .= false + # For each O_i, find N_j within cutoff + for i in 1:num_residues + O_i = O_pos[:, i] + idxs = inrange(N_tree, O_i, 10.0) + for j in idxs + # Exclude self and neighboring residues + if abs(i - j) > 2 # Exclude i == j, i+1, i+2 + # Compute E[i,j] as before + ON_dist = norm(O_pos[:, i] - N_pos[:, j]) + CH_dist = norm(C_pos[:, i] - H_pos[:, j]) + OH_dist = norm(O_pos[:, i] - H_pos[:, j]) + CN_dist = norm(C_pos[:, i] - N_pos[:, j]) + E_ij = T(Q1Q2 * F) * (1 / ON_dist + 1 / CH_dist - 1 / OH_dist - 1 / CN_dist) + if E_ij < cutoff + Hbonds[i, j] = true + end + end + end + end return Hbonds -end \ No newline at end of file +end From a545ba0bcf0626db3e7b521360f8d0b4fddc7843 Mon Sep 17 00:00:00 2001 From: AntonOresten Date: Thu, 7 Nov 2024 02:44:16 +0100 Subject: [PATCH 2/2] Use KDTree --- Project.toml | 4 +++- src/hydrogen_bonds.jl | 22 ++++++++++++++-------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Project.toml b/Project.toml index 28d4c4d..4a52e12 100644 --- a/Project.toml +++ b/Project.toml @@ -1,17 +1,19 @@ name = "AssigningSecondaryStructure" uuid = "8ed43e74-60fb-4e11-99b9-91deed37aef7" authors = ["Anton Oresten "] -version = "0.5.0" +version = "0.5.1" [deps] LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [compat] LinearAlgebra = "1" NearestNeighbors = "0.4.20" SparseArrays = "1.11.0" +StaticArrays = "1.9.8" julia = "1" [extras] diff --git a/src/hydrogen_bonds.jl b/src/hydrogen_bonds.jl index 75aadce..47de76b 100644 --- a/src/hydrogen_bonds.jl +++ b/src/hydrogen_bonds.jl @@ -31,11 +31,12 @@ end using NearestNeighbors using SparseArrays +using StaticArrays function get_hbond_map(coords::Array{T,3}, cutoff::Real=CUTOFF) where T<:Real - C_pos = coords[:, 3, :] + C_pos = @views coords[:, 3, :] O_pos = get_oxygen_positions(coords) - N_pos = coords[:, 1, :] + N_pos = @views coords[:, 1, :] H_pos = get_hydrogen_positions(coords) num_residues = size(coords, 3) @@ -44,18 +45,23 @@ function get_hbond_map(coords::Array{T,3}, cutoff::Real=CUTOFF) where T<:Real # Build KD-tree for N positions N_tree = KDTree(N_pos) + C_pos_static = SVector{3, T}.(eachslice(C_pos, dims=2)) + O_pos_static = SVector{3, T}.(eachslice(O_pos, dims=2)) + N_pos_static = SVector{3, T}.(eachslice(N_pos, dims=2)) + H_pos_static = SVector{3, T}.(eachslice(H_pos, dims=2)) + # For each O_i, find N_j within cutoff for i in 1:num_residues - O_i = O_pos[:, i] - idxs = inrange(N_tree, O_i, 10.0) + O_i = O_pos_static[i] + idxs = inrange(N_tree, O_i, 5.0) for j in idxs # Exclude self and neighboring residues if abs(i - j) > 2 # Exclude i == j, i+1, i+2 # Compute E[i,j] as before - ON_dist = norm(O_pos[:, i] - N_pos[:, j]) - CH_dist = norm(C_pos[:, i] - H_pos[:, j]) - OH_dist = norm(O_pos[:, i] - H_pos[:, j]) - CN_dist = norm(C_pos[:, i] - N_pos[:, j]) + ON_dist = norm(O_pos_static[i] - N_pos_static[j]) + CH_dist = norm(C_pos_static[i] - H_pos_static[j]) + OH_dist = norm(O_pos_static[i] - H_pos_static[j]) + CN_dist = norm(C_pos_static[i] - N_pos_static[j]) E_ij = T(Q1Q2 * F) * (1 / ON_dist + 1 / CH_dist - 1 / OH_dist - 1 / CN_dist) if E_ij < cutoff Hbonds[i, j] = true