Skip to content

Commit

Permalink
Merge pull request #9 from MurrellGroup/simplify-properties
Browse files Browse the repository at this point in the history
Simplify properties
  • Loading branch information
AntonOresten authored Oct 7, 2024
2 parents 51c3c32 + bc36e46 commit 4bb00a0
Show file tree
Hide file tree
Showing 17 changed files with 374 additions and 262 deletions.
8 changes: 4 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
name = "ProteinChains"
uuid = "b8e8f2a5-48d3-44f1-ba0d-c71cb7726ff8"
authors = ["Anton Oresten <[email protected]> and contributors"]
version = "0.2.0"
version = "0.3.0"

[deps]
AssigningSecondaryStructure = "8ed43e74-60fb-4e11-99b9-91deed37aef7"
Backboner = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7"
BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
DynamicStructs = "e139c391-eeee-4818-b359-c8725224fb1f"
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
PeriodicTable = "7b2266bf-644c-5ea3-82d8-af4bbd25a884"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[compat]
AssigningSecondaryStructure = "0.5"
Backboner = "0.12"
BioStructures = "4"
DynamicStructs = "0.1"
HDF5 = "0.17"
LinearAlgebra = "1"
PeriodicTable = "1"
julia = "1"
Expand Down
48 changes: 17 additions & 31 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[![Build Status](https://github.com/MurrellGroup/ProteinChains.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/MurrellGroup/ProteinChains.jl/actions/workflows/CI.yml?query=branch%3Amain)
[![Coverage](https://codecov.io/gh/MurrellGroup/ProteinChains.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/MurrellGroup/ProteinChains.jl)

This Julia package provides implements the `ProteinChain` type: a GPU-friendly structure-of-arrays representation of protein chains.
This Julia package provides implements the `ProteinChain` type: a chain-level structure-of-arrays type representation of proteins, with support for indexing by residue index.

## Installation

Expand All @@ -16,51 +16,37 @@ Pkg.add("ProteinChains")

## Examples

The `ProteinChain` type is meant to only store a set of quintessential fields, from which most other properties can be derived.
The `ProteinChain` type is meant to only store a basic set of fields, from which some other properties might be derived.

```julia
julia> using ProteinChains

julia> structure = pdb"1EYE" # string macro to fetch proteins from the PDB
[ Info: Downloading file from PDB: 1EYE
1-chain ProteinStructure "1EYE.cif"
256-residue ProteinChain{Float64} (A)
256-residue ProteinChain{Float64, @NamedTuple{}} (A)

julia> propertynames(chain)
(:id, :sequence, :backbone, :numbering, :atoms)
(:id, :atoms, :sequence, :numbering)
```
To store additional properties, `AnnotatedProteinChain` can be used to add dynamic properties to the chain:
To store additional properties, `annotate` can be used to attach persistent chain-level properties or indexable residue-level properties:
```julia
julia> annotated_chain = annotate(chain; model=1)
256-residue AnnotatedProteinChain{Float64} (A):
6 fields:
id::String = "A"
sequence::String = <exceeds max length>
backbone::Array{Float64,3} = <exceeds max length>
numbering::Vector{Int64} = <exceeds max length>
atoms::Vector{Vector{ProteinChains.Atom{Float64}}} = <exceeds max length>
indexable_properties::Vector{Symbol} = Symbol[]
1 property:
model::Int64 = 1
```
julia> chain = structure["A"]
256-residue ProteinChain{Float64, @NamedTuple{}} (A)

For properties of type `<:AbstractArray` that represent residue-level information, `annotate_indexable!` will index the last dimension of the property when the chain is indexed:
julia> annotated_chain = annotate(chain; taxid=ChainProperty(83332))
256-residue ProteinChain{Float64, @NamedTuple{taxid::ChainProperty{Int64}}} (A)

```julia
julia> annotate_indexable!(annotated_chain; secondary_structure=assign_secondary_structure(annotated_chain)
256-residue AnnotatedProteinChain{Float64} (A):
6 fields:
id::String = "A"
sequence::String = <exceeds max length>
backbone::Array{Float64,3} = <exceeds max length>
numbering::Vector{Int64} = <exceeds max length>
atoms::Vector{Vector{ProteinChains.Atom{Float64}}} = <exceeds max length>
indexable_properties::Vector{Symbol} = [:secondary_structure]
2 properties:
model::Int64 = 1
secondary_structure::Vector{Int64} = [1, 1, 3, 3, 3, 3, 3, 3, 3, 1 2, 2, 2, 2, 2, 2, 2, 1, 1, 1]
julia> annotated_chain = annotate(annotated_chain; some_residue_property=ResidueProperty(rand(3,256))) # last dimension gets indexed
256-residue ProteinChain{Float64, @NamedTuple{ORGANISM_TAXID::ChainProperty{Int64}, some_residue_property::ResidueProperty{Matrix{Float64}}}} (A)

julia> annotated_chain[1:100].some_residue_property
3×100 Matrix{Float64}:
0.273545 0.639173 0.92708 0.459441 0.196407 0.880034
0.981498 0.70263 0.279264 0.552049 0.89274 0.0328866
0.169268 0.117848 0.732741 0.301921 0.187094 0.281187
```
## See also
Expand Down
26 changes: 12 additions & 14 deletions src/ProteinChains.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
module ProteinChains

using DynamicStructs
using StaticArrays: SVector

using Backboner
export Backbone
export ChainedBonds
export get_bond_lengths, get_bond_angles, get_torsion_angles
export Frames

include("ideal.jl")
export BackboneGeometry
Expand All @@ -16,23 +12,25 @@ export append_residue
export prepend_residue

include("atom.jl")
export Atom
export encode_atom_name, decode_atom_name
export coords

include("properties.jl")
export ChainProperty, ResidueProperty

include("chain.jl")
export AbstractProteinChain
export ProteinChain
export length
export annotate
export map_atoms!
export psi_angles, omega_angles, phi_angles

include("annotated-chain.jl")
export AnnotatedProteinChain
export annotate!, annotate
export annotate_indexable!, annotate_indexable
export get_atoms, get_backbone

include("structure.jl")
export ProteinStructure

include("secondary-structure.jl")
export assign_secondary_structure, assign_secondary_structure!
include("dataset.jl")
export ProteinDataset

include("io/io.jl")
export readcif, readpdb
Expand Down
52 changes: 0 additions & 52 deletions src/annotated-chain.jl

This file was deleted.

35 changes: 17 additions & 18 deletions src/atom.jl
Original file line number Diff line number Diff line change
@@ -1,36 +1,35 @@
using PeriodicTable: elements

const ELEMENT_SYMBOL_TO_ATOMIC_NUMBER = Dict(uppercase(elements[atomic_number].symbol) => atomic_number for atomic_number in 1:118)
const ATOMIC_NUMBER_TO_ELEMENT_SYMBOL = Dict(n => s for (s, n) in ELEMENT_SYMBOL_TO_ATOMIC_NUMBER)
const ELEMENT_SYMBOL_TO_NUMBER = Dict(uppercase(elements[number].symbol) => number for number in 1:118)
const number_TO_ELEMENT_SYMBOL = Dict(n => s for (s, n) in ELEMENT_SYMBOL_TO_NUMBER)

element_symbol_to_atomic_number(element_symbol::AbstractString) = ELEMENT_SYMBOL_TO_ATOMIC_NUMBER[uppercase(strip(element_symbol))]
atomic_number_to_element_symbol(atomic_number::Integer) = ATOMIC_NUMBER_TO_ELEMENT_SYMBOL[atomic_number]
element_symbol_to_number(element_symbol::AbstractString) = ELEMENT_SYMBOL_TO_NUMBER[uppercase(strip(element_symbol))]
number_to_element_symbol(number::Integer) = number_TO_ELEMENT_SYMBOL[number]

pad_atom_name(name::AbstractString, element_symbol::AbstractString) = rpad(" "^(2-length(strip(element_symbol)))*strip(name), 4)

encode_atom_name(name::AbstractString, element_symbol::AbstractString) = reinterpret(UInt32, codeunits(pad_atom_name(name, element_symbol)))[1]
decode_atom_name(name::UInt32) = String(reinterpret(UInt8, [name]))

mutable struct Atom{T<:AbstractFloat}
struct Atom{T<:AbstractFloat}
name::UInt32
atomic_number::Int8
number::Int8
x::T
y::T
z::T
end

Atom(name::UInt32, atomic_number::Integer, x::T, y::T, z::T) where T = Atom{T}(name, atomic_number, x, y, z)
Atom(name::AbstractString, element_symbol::AbstractString, coords::AbstractVector{<:AbstractFloat}) =
Atom(encode_atom_name(name, element_symbol), element_symbol_to_atomic_number(element_symbol), coords...)
Atom{T}(atom::Atom) where T = Atom{T}(atom.name, atom.number, atom.x, atom.y, atom.z)

coords(atom::Atom) = [atom.x, atom.y, atom.z]
@inline Atom(name::UInt32, number::Integer, x::T, y::T, z::T) where T = Atom{T}(name, number, x, y, z)
@inline Atom(name::AbstractString, element_symbol::AbstractString, x::T, y::T, z::T) where T =
Atom(encode_atom_name(name, element_symbol), element_symbol_to_number(element_symbol), x, y, z)

Base.summary(atom::Atom) = "$(elements[atom.atomic_number].name) atom at [$(atom.x), $(atom.y), $(atom.z)])"
@inline Atom(name, number, coords::AbstractVector{T}) where T = Atom(name, number, coords...)

function offset!(atom::Atom, coords::Vector{<:Real})
@assert length(coords) == 3
atom.x += coords[1]
atom.y += coords[2]
atom.z += coords[3]
return atom
end
coords(atom::Atom) = SVector(atom.x, atom.y, atom.z)

Base.summary(atom::Atom) = "$(elements[atom.number].name) atom at [$(atom.x), $(atom.y), $(atom.z)])"

Base.show(io::IO, atom::Atom{T}) where T = print(io,
"Atom(\"$(decode_atom_name(atom.name))\", \"$(number_to_element_symbol(atom.number))\", $(coords(atom)))")
Loading

0 comments on commit 4bb00a0

Please sign in to comment.