Skip to content

Commit

Permalink
Rework type interface and internals
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonOresten committed Aug 18, 2024
1 parent 4bf2b78 commit 04c9b5e
Show file tree
Hide file tree
Showing 16 changed files with 454 additions and 321 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ version = "0.0.1"
AssigningSecondaryStructure = "8ed43e74-60fb-4e11-99b9-91deed37aef7"
Backboner = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7"
BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
PeriodicTable = "7b2266bf-644c-5ea3-82d8-af4bbd25a884"

[weakdeps]
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
Expand Down
8 changes: 6 additions & 2 deletions src/ProteinChains.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ using Backboner

include("properties.jl")

include("atom.jl")

include("chain.jl")
export ProteinChain
export countresidues
Expand All @@ -13,6 +15,8 @@ export ProteinStructure

include("idealize.jl")
export BackboneGeometry
export append_residue
export prepend_residue

include("secondary-structure.jl")

Expand All @@ -21,13 +25,13 @@ export assign_bond_lengths!
export assign_bond_angles!
export assign_torsion_angles!
export assign_secondary_structure!
export assign_standard_residue!
export assign_ideal_residue!
export assign_residue_rotations!
export assign_residue_rotations_quat!
export assign_residue_translations!
export assign_is_knotted!

include("io.jl")
include("io/io.jl")
export readchains
export readpdb
export readcif
Expand Down
27 changes: 27 additions & 0 deletions src/atom.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using PeriodicTable: elements

const ELEMENT_SYMBOL_TO_ATOMIC_NUMBER = Dict(uppercase(elements[atomic_number].symbol) => atomic_number for atomic_number in 1:118)
const ATOMIC_NUMBER_TO_ELEMENT_SYMBOL = Dict(n => s for (s, n) in ELEMENT_SYMBOL_TO_ATOMIC_NUMBER)

element_symbol_to_atomic_number(element_symbol::AbstractString) = ELEMENT_SYMBOL_TO_ATOMIC_NUMBER[uppercase(element_symbol)]
atomic_number_to_element_symbol(atomic_number::Integer) = ATOMIC_NUMBER_TO_ELEMENT_SYMBOL[atomic_number]

encode_atom_name(atom_name::AbstractString) = reinterpret(UInt32, codeunits(lpad(atom_name, 4)))[1]
decode_atom_name(atom_name::UInt32) = String(reinterpret(UInt8, [atom_name]))

mutable struct Atom{T<:AbstractFloat}
atom_name::UInt32
atomic_number::UInt8
x::T
y::T
z::T
end

coords(atom::Atom) = [atom.x, atom.y, atom.z]

Base.summary(atom::Atom) = "$(elements[atom.atomic_number].name) atom at [$(atom.x), $(atom.y), $(atom.z)])"

Atom(atom_name::UInt32, atomic_number::Integer, x::T, y::T, z::T) where T = Atom{T}(atom_name, atomic_number, x, y, z)
Atom(atom_name::UInt32, element_symbol::AbstractString, args...) = Atom(atom_name, element_symbol_to_atomic_number(element_symbol), args...)
Atom(atom_name::AbstractString, args...) = Atom(encode_atom_name(atom_name), args...)
Atom(atom_name::AbstractString, element_symbol, coords::AbstractVector{<:AbstractFloat}) = Atom(atom_name, element_symbol, coords...)
75 changes: 57 additions & 18 deletions src/chain-properties.jl
Original file line number Diff line number Diff line change
@@ -1,40 +1,79 @@
# TODO: store non-backbone atoms

function assign_bond_lengths!(chain::ProteinChain)
chain.bond_lengths = Backboner.get_bond_lengths(Backboner.Backbone(chain.backbone))
end
export assign_property!

function assign_bond_angles!(chain::ProteinChain)
chain.bond_angles = Backboner.get_bond_angles(Backboner.Backbone(chain.backbone))
end
export residue_indexing_function
export ResidueIndexingUndefined

function assign_torsion_angles!(chain::ProteinChain)
chain.torsion_angles = Backboner.get_torsional_angles(Backboner.Backbone(chain.backbone))
end
assign_property!(x, name::Symbol, args...) = assign_property!(x, Val(name), args...)

function assign_secondary_structure!(chain::ProteinChain, dict::Union{Dict{ProteinChain,Any},Nothing}=nothing)
function assign_property!(chain::ProteinChain, ::Val{:secondary_structure}, dict::Union{Dict{ProteinChain,Any},Nothing}=nothing)
number_vector = isnothing(dict) ? ASS.assign_secondary_structure(chain) : dict[chain]
chain.secondary_structure = number_vector_to_code_string(number_vector)
end

function assign_standard_residue!(chain::ProteinChain, standard_residue::Matrix{<:Real}=STANDARD_RESIDUE_ANGSTROM)
chain.standard_residue = standard_residue
function assign_property!(chain::ProteinChain, ::Val{:ideal_residue}, ideal_residue::AbstractMatrix{<:Real}=DEFAULT_IDEAL_RESIDUE)
chain.ideal_residue = collect(ideal_residue)
end

function assign_residue_rotations!(chain::ProteinChain)
frames = Backboner.Frames(Backboner.Backbone(chain.backbone), chain.standard_residue)
function assign_property!(chain::ProteinChain, ::Val{:residue_rotations})
frames = Backboner.Frames(Backboner.Backbone(chain.backbone), chain.ideal_residue)
chain.residue_rotations = frames.rotations
end

function assign_residue_rotations_quat!(chain::ProteinChain)
frames = Backboner.Frames(Backboner.Backbone(chain.backbone), chain.standard_residue)
function assign_property!(chain::ProteinChain, ::Val{:residue_rotations_quat})
frames = Backboner.Frames(Backboner.Backbone(chain.backbone), chain.ideal_residue)
chain.residue_rotations_quat = rotation_matrices_to_quaternions(frames.rotations)
end

function assign_residue_translations!(chain::ProteinChain)
function assign_property!(chain::ProteinChain, ::Val{:residue_translations})
chain.residue_translations = Backboner.centroid(chain.backbone; dims=2)
end

function assign_is_knotted!(chain::ProteinChain)
function assign_property!(chain::ProteinChain, ::Val{:bond_lengths})
chain.bond_lengths = Backboner.get_bond_lengths(Backboner.Backbone(chain.backbone))
end

function assign_property!(chain::ProteinChain, ::Val{:bond_angles})
chain.bond_angles = Backboner.get_bond_angles(Backboner.Backbone(chain.backbone))
end

function assign_property!(chain::ProteinChain, ::Val{:torsion_angles})
chain.torsion_angles = Backboner.get_torsional_angles(Backboner.Backbone(chain.backbone))
end

function assign_property!(chain::ProteinChain, ::Val{:is_knotted})
chain.is_knotted = Backboner.is_knotted(Backboner.Backbone(chain.backbone)[2:3:end])
end


struct ResidueIndexingUndefined <: Exception
name::Symbol
end

ResidueIndexingUndefined(val::Val) = typeof(val).type_parameters[1]

Base.showerror(io::IO, err::ResidueIndexingUndefined) = print(io, "$(typeof(err)): property `:$(err.name)` does not have a defined reorder function. Property needs reassignment.")

residue_indexing_function(name::Symbol) = residue_indexing_function(Val(name))

# move non-properties to getindex(::ProteinChain)
residue_indexing_function(::Val{:id}) = (x, i) -> x[i]
residue_indexing_function(::Val{:sequence}) = (x, i) -> x[i]
residue_indexing_function(::Val{:backbone}) = (x, i) -> x[:, :, i]
residue_indexing_function(::Val{:atoms}) = (x, i) -> x[i]

residue_indexing_function(::Val{:secondary_structure}) = (x, i) -> x[i]
residue_indexing_function(::Val{:residue_rotations}) = (x, i) -> x[:, :, i]
residue_indexing_function(::Val{:residue_rotations_quat}) = (x, i) -> x[:, i]
residue_indexing_function(::Val{:residue_translations}) = (x, i) -> x[:, i]

residue_indexing_function(name::Val{:bond_lengths}) = throw(ResidueIndexingUndefined(name))
residue_indexing_function(name::Val{:bond_angles}) = throw(ResidueIndexingUndefined(name))
residue_indexing_function(name::Val{:torsion_angles}) = throw(ResidueIndexingUndefined(name))
residue_indexing_function(name::Val{:is_knotted}) = throw(ResidueIndexingUndefined(name))

flatten_property(chains::AbstractVector{<:ProteinChain}, ::Val{:sequence}) = join(chain -> chain.sequence, chains)


function extend_property! end
74 changes: 32 additions & 42 deletions src/chain.jl
Original file line number Diff line number Diff line change
@@ -1,64 +1,54 @@
"""
ProteinChain
ProteinChain{T<:AbstractFloat}
## Examples
```jldoctest
julia> structure = pdb"1EYE"
julia> structure = pdb"1EYE";
[ Info: Downloading file from PDB: 1EYE
1-chain ProteinStructure "1EYE.cif" with 0 properties:
256-residue ProteinChain "A" with 0 properties
julia> structure[1]
256-residue ProteinChain "A" with 0 properties:
253-residue ProteinChain "A" with 2 properties:
fields:
id::String = "A"
aminoacids::String = "PVQVMGVLNVTDDSFSDGGCYLDLDDAVKHGLAMAAAGAGIVDVGGETSRVIPVVKELAAQGITVSIDTMRADVARAALQNGAQMVNDVSGGRADPAM…
backbone::Array{Float64, 3} = [45.592 44.171 43.719; -10.864 -10.936 -9.688; 30.192 30.504 31.278;;; 42.568 42.02 40.707; -9.163 …
numbers::Vector{Int64} = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14 … 265, 266, 267, 268, 269, 270, 271, 272, 273, 274]
properties: (none)
sequence::String = "PVQVMGVLNVTDDSFSDGGCYLDLDDAVKHGLAMAAAGAGIVDVGGETSRVIPVVKELAAQGITVIDTMRADVARAALQNGAQMVNDVSGGRADPAMG…
backbone::Array{Float64,3} = [45.592 44.171 43.719; -10.864 -10.936 -9.688; 30.192 30.504 31.278;;; 42.568 42.02 40.707; -9.163 …
atoms::Vector{Vector{ProteinChains.Atom{Float64}}} = Vector{Atom{Float64}}[[Atom{Float64}(0x4f202020, 0x08, 44.41, -9.176, 32.157), Atom{Float64}(0x4243…
properties:
numbering::Vector{Int64} = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14 … 265, 266, 267, 268, 269, 270, 271, 272, 273, 274]
modelnum::Int64 = 1
```
"""
mutable struct ProteinChain
mutable struct ProteinChain{T<:AbstractFloat}
id::String
aminoacids::String
backbone::Array{Float64,3}
numbers::Vector{Int}
properties::Dict{Symbol,Any}

function ProteinChain(id::String, aminoacids::String, backbone::Array{Float64,3}, numbers::Vector{Int}, properties::Dict{Symbol,Any})
chain = new(id, aminoacids, backbone, numbers, properties)
countresidues(chain)
@assert all(!in(fieldnames(ProteinChain)), keys(properties))
return chain
end
sequence::String
backbone::Array{T,3}
atoms::Vector{Vector{Atom{T}}}
properties::Properties
end

Base.getproperty(chain::ProteinChain, property::Symbol) = _getproperty(chain, property)
Base.setproperty!(chain::ProteinChain, property::Symbol, value) = _setproperty!(chain, property, value)

function countresidues(chain::ProteinChain)
@assert length(chain.aminoacids) == size(chain.backbone, 3) == length(chain.numbers)
return length(chain.aminoacids)
@assert length(chain.sequence) == size(chain.backbone, 3)
return length(chain.sequence)
end

function ProteinChain(id::String, aminoacids::String, backbone::Array{Float64,3}, numbers::Vector{Int}; kwargs...)
chain = ProteinChain(id, aminoacids, backbone, numbers, Dict{Symbol,Any}())
!isempty(kwargs) && push!(chain.properties, kwargs...)
function ProteinChain(id::String, sequence::String, backbone::Array{T,3}, atoms::Vector{Vector{Atom{T}}}, properties::Properties) where T
chain = ProteinChain{T}(id, sequence, backbone, atoms, properties)
countresidues(chain)
@assert size(backbone)[1:2] == (3, 3)
return chain
end

function ProteinChain(id::String, sequence::String, backbone::Array{T,3}, atoms::Vector{Vector{Atom{T}}}=Vector{Atom{T}}[]; kwargs...) where T
return ProteinChain(id, sequence, backbone, atoms, Properties(; kwargs...))
end

Base.hasproperty(chain::ProteinChain, name::Symbol) = hasproperty(HasProperties(chain), name)
Base.getproperty(chain::ProteinChain, name::Symbol) = getproperty(HasProperties(chain), name)
Base.setproperty!(chain::ProteinChain, name::Symbol, value) = setproperty!(HasProperties(chain), name, value)
Base.propertynames(chain::ProteinChain) = propertynames(HasProperties(chain))

Base.summary(chain::ProteinChain) = "$(countresidues(chain))-residue ProteinChain \"$(chain.id)\" with $(length(chain.properties)) properties"
# where T <: Union{ProteinChain,ProteinStructure}

function Base.show(io::IO, ::MIME"text/plain", chain::ProteinChain)
context = IOContext(io, :compact => true, :limit => true)
print(context, summary(chain), ":")
printstyled(context, "\n fields:", color=:yellow)
for fieldname in fieldnames(ProteinChain)[1:end-1]
printfield(context, chain, fieldname)
end
printstyled(context, "\n properties:", color=:yellow)
isempty(chain.properties) && print(io, " (none)")
for property in keys(chain.properties)
printfield(context, chain, property)
end
end
Base.show(io::IO, ::MIME"text/plain", chain::ProteinChain) = showproperties(io, chain)
Empty file added src/flatten.jl
Empty file.
80 changes: 67 additions & 13 deletions src/idealize.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
"""
BackboneGeometry(;
N_Ca_length = 1.46,
Ca_C_length = 1.52,
C_N_length = 1.33,
N_Ca_C_angle = 1.94,
Ca_C_N_angle = 2.03,
C_N_Ca_angle = 2.13,
)
Define the idealized bond lengths and bond angles of a protein backbone.
"""
Base.@kwdef struct BackboneGeometry
N_Ca_length = 1.46
N_Ca_length = 1.46
Ca_C_length = 1.52
C_N_length = 1.33

Expand All @@ -8,25 +21,66 @@ Base.@kwdef struct BackboneGeometry
C_N_Ca_angle = 2.13
end

# TODO: make this align with default BackboneGeometry
const STANDARD_RESIDUE_ANGSTROM = [
const DEFAULT_BACKBONE_GEOMETRY = BackboneGeometry()

"""
IdealResidue{T<:AbstractFloat} <: AbstractMatrix{T}
IdealResidue{T}(backbone_geometry::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY)
"""
struct IdealResidue{T<:AbstractFloat} <: AbstractMatrix{T}
backbone_geometry::BackboneGeometry
N_Ca_C_coords::Matrix{T}

function IdealResidue{T}(backbone_geometry::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY) where T
N_Ca_C_coords = Matrix{T}(undef, 3, 3)
N_pos, Ca_pos, C_pos = eachcol(N_Ca_C_coords)
Θ = backbone_geometry.N_Ca_C_angle - π/2
N_pos .= [0, 0, 0]
Ca_pos .= [backbone_geometry.N_Ca_length, 0, 0]
C_pos .= Ca_pos + backbone_geometry.Ca_C_length * [sin(Θ), cos(Θ), 0]
centroid = (N_pos + Ca_pos + C_pos) / 3
N_Ca_C_coords .-= centroid
new{T}(backbone_geometry, N_Ca_C_coords)
end
end

Base.size(::IdealResidue) = (3,3)
Base.getindex(ideal_residue::IdealResidue, args...) = ideal_residue.N_Ca_C_coords[args...]

const DEFAULT_IDEAL_RESIDUE = IdealResidue{Float64}()

const OLD_STANDARD_RESIDUE_ANGSTROM = [
-1.066 -0.200 1.266;
0.645 -0.527 -0.118;
0.000 0.000 0.000;
] # N Ca Cs

function append_residue(backbone::Backbone, dihedrals::Vector{<:Real}; ideal::BackboneGeometry=BackboneGeometry())
length(dihedrals) == 3 || throw(ArgumentError("length of `dihedrals` must be 3, as only 3 backbone atoms are introduced."))
lengths = [ideal.C_N_length, ideal.N_Ca_length, ideal.Ca_C_length]
angles = [ideal.Ca_C_N_angle, ideal.C_N_Ca_angle, ideal.N_Ca_C_angle]
append_bonds(backbone, Float64.(lengths), Float64.(angles), Float64.(dihedrals))
"""
append_residue(Backbone::Backbone, torsion_angles::Vector{<:Real}; ideal::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY)
Create a new backbone by appending 3 torsion angles (ψ, ω, ϕ) at the end, using bond lengths and bond angles specified in BackboneGeometry.
"""
function append_residue(backbone::Backbone, torsion_angles::Vector{<:Real}; ideal::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY)
length(torsion_angles) == 3 || throw(ArgumentError("length of `torsion_angles` must be 3, as only 3 backbone atoms are introduced."))
bond_lengths = [ideal.C_N_length, ideal.N_Ca_length, ideal.Ca_C_length]
bond_angles = [ideal.Ca_C_N_angle, ideal.C_N_Ca_angle, ideal.N_Ca_C_angle]
append_bonds(backbone, Float64.(bond_lengths), Float64.(bond_angles), Float64.(torsion_angles))
end

function prepend_residue(backbone::Backbone, dihedrals::Vector{<:Real}; ideal::BackboneGeometry=BackboneGeometry())
length(dihedrals) == 3 || throw(ArgumentError("length of `dihedrals` must be 3, as only 3 backbone atoms are introduced."))
lengths = [ideal.N_Ca_length, ideal.Ca_C_length, ideal.C_N_length]
angles = [ideal.N_Ca_C_angle, ideal.Ca_C_N_angle, ideal.C_N_Ca_angle]
return prepend_bonds(backbone, Float64.(lengths), Float64.(angles), Float64.(dihedrals))
"""
append_residue(Backbone::Backbone, torsion_angles::Vector{<:Real}; ideal::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY)
Create a new backbone by prepending 3 torsion angles (ψ, ω, ϕ) at the end, using bond lengths and bond angles specified in BackboneGeometry.
!!! note
The torsion angle order is the same as it would be when appending. The order is *not* reversed.
"""
function prepend_residue(backbone::Backbone, torsion_angles::Vector{<:Real}; ideal::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY)
length(torsion_angles) == 3 || throw(ArgumentError("length of `torsion_angles` must be 3, as only 3 backbone atoms are introduced."))
bond_lengths = [ideal.N_Ca_length, ideal.Ca_C_length, ideal.C_N_length]
bond_angles = [ideal.N_Ca_C_angle, ideal.Ca_C_N_angle, ideal.C_N_Ca_angle]
return prepend_bonds(backbone, Float64.(bond_lengths), Float64.(bond_angles), Float64.(torsion_angles))
end

# TODO: use cooldown (and Float64?) to increase precision and accuracy
Expand Down
Loading

0 comments on commit 04c9b5e

Please sign in to comment.