Skip to content

Commit

Permalink
IO overhaul
Browse files Browse the repository at this point in the history
  • Loading branch information
AntonOresten committed Oct 29, 2024
1 parent 02266bf commit 249231d
Show file tree
Hide file tree
Showing 12 changed files with 179 additions and 119 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ProteinChains"
uuid = "b8e8f2a5-48d3-44f1-ba0d-c71cb7726ff8"
authors = ["Anton Oresten <[email protected]> and contributors"]
version = "0.3.3"
version = "0.4.0"

[deps]
AssigningSecondaryStructure = "8ed43e74-60fb-4e11-99b9-91deed37aef7"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ julia> chain = structure["A"]
256-residue ProteinChain{Float64} (A)

julia> propertynames(chain)
(:id, :atoms, :sequence, :numbering, :ins_codes, :modelnum, :renumbering)
(:id, :atoms, :sequence, :numbering, :ins_codes, :renumbering)
```
To store additional properties, `addproperties` can be used to attach persistent chain-level properties or indexable residue-level properties:
Expand All @@ -48,7 +48,7 @@ julia> new_chain[1:100].rand3
0.169268 0.117848 0.732741 0.301921 0.187094 0.281187

julia> propertynames(new_chain)
(:id, :atoms, :sequence, :numbering, :ins_codes, :modelnum, :rand3, :renumbering, :taxid)
(:id, :atoms, :sequence, :numbering, :ins_codes, :rand3, :renumbering, :taxid)
```
## See also
Expand Down
4 changes: 2 additions & 2 deletions src/ProteinChains.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ export Atom
@compat public (atom_name, atom_number, atom_coords, atom_symbol)

include("properties.jl")
export PersistentProperty, IndexableProperty
export StandardProperty, IndexableProperty
export addproperties, removeproperties
@compat public (AbstractProperty, NamedProperties)
@compat public AbstractProperty

include("chain.jl")
export ProteinChain
Expand Down
60 changes: 27 additions & 33 deletions src/chain.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
ProteinChain{T<:Real,Ps<:NamedProperties}
ProteinChain{T<:Real}
Represents a protein chain with a basic set of fields from which some other properties might be derived.
The [`addproperties`](@ref) function can be used to instantiate new chains with additional properties.
Expand All @@ -11,7 +11,7 @@ The [`addproperties`](@ref) function can be used to instantiate new chains with
- `numbering::Vector{Int32}`: Residue numbering (author). See [`renumber`](@ref) for renumbering.
- `properties::Ps`: Named properties associated with the chain.
See also [`addproperties`](@ref), [`PersistentProperty`](@ref), [`IndexableProperty`](@ref).
See also [`addproperties`](@ref), [`StandardProperty`](@ref), [`IndexableProperty`](@ref).
```
"""
struct ProteinChain{T<:Real}
Expand All @@ -22,27 +22,30 @@ struct ProteinChain{T<:Real}
properties::NamedProperties

function ProteinChain{T}(
id, atoms::Vector{Vector{Atom{T}}}, sequence::String, numbering::Vector{<:Integer}, properties::NamedProperties,
id::AbstractString,
atoms::Vector{Vector{Atom{T}}},
sequence::AbstractString,
numbering::Vector{Int32},
properties::NamedProperties,
) where T
len = length(atoms)
@assert sizeof(sequence) == len
@assert length(numbering) == len
@assert length(atoms) == sizeof(sequence) == length(numbering)
chain = new{T}(id, atoms, sequence, numbering, sortnames(properties))
for property in properties
property isa IndexableProperty && @assert size(property[], ndims(property[])) == len
checkproperty(chain, property)
end
new{T}(id, atoms, sequence, Int32.(numbering), sortnames(properties))
return chain
end
end

ProteinChain(id, atoms::Vector{Vector{Atom{T}}}, sequence, numbering, properties) where T =
ProteinChain{T}(id, atoms, sequence, numbering, properties)
function ProteinChain(id, atoms::Vector{Vector{Atom{T}}}, sequence, numbering::Vector{<:Integer}, properties::NamedTuple) where T
ProteinChain{T}(id, atoms, sequence, Int32.(numbering), namedproperties(properties))
end

ProteinChain(id, atoms, sequence, numbering) = ProteinChain(id, atoms, sequence, numbering, (;))

Base.convert(::Type{ProteinChain{T}}, chain::ProteinChain) where T =
ProteinChain(chain.id, convert(Vector{Vector{Atom{T}}}, chain.atoms), chain.sequence, chain.numbering, chain.properties)

# cursor do your thing! don't sortnames cause we now ensure that the order of properties is consistent
function Base.:(==)(chain1::ProteinChain, chain2::ProteinChain)
propertynames(chain1, false) != propertynames(chain2, false) && return false
!any(getproperty(chain1, name) != getproperty(chain2, name) for name in propertynames(chain1, false))
Expand All @@ -51,32 +54,27 @@ end
Base.length(chain::ProteinChain) = length(chain.atoms)

Base.getproperty(chain::ProteinChain, name::Symbol) =
name in fieldnames(ProteinChain) ? getfield(chain, name) : getfield(getfield(chain, :properties), name)[]
name in fieldnames(ProteinChain) ? getfield(chain, name) : unpack(getfield(getfield(chain, :properties), name))

Base.propertynames(chain::ProteinChain, private::Bool=false) = (setdiff(fieldnames(ProteinChain), private ? () : (:properties,))..., propertynames(chain.properties)...)

function Base.getindex(chain::ProteinChain, i::AbstractVector)
function Base.getindex(chain::ProteinChain, i::Union{AbstractVector,Colon})
properties = map(p -> p[i], chain.properties)
ProteinChain(chain.id, chain.atoms[i], chain.sequence[i], chain.numbering[i], properties)
end

setproperties(chain::ProteinChain, ps::NamedProperties) = ProteinChain(chain.id, chain.atoms, chain.sequence, chain.numbering, ps)
setproperties(chain::ProteinChain, ps::NamedTuple) = ProteinChain(chain.id, chain.atoms, chain.sequence, chain.numbering, ps)

"""
addproperties(chain::ProteinChain; properties...)
Creates a new `ProteinChain` instance with the added properties.
Indexing behavior of property values can be specified by wrapping
them with `PersistentProperty` or `IndexableProperty`.
Values get wrapped by `PersistentProperty` by default.
Indexing of property values can be specified with a wrapper type,
such as `IndexableProperty`.
See also [`removeproperties`](@ref), [`PersistentProperty`](@ref), [`IndexableProperty`](@ref)
See also [`removeproperties`](@ref), [`IndexableProperty`](@ref).
"""
function addproperties(chain::ProteinChain; properties...)
properties = map(p -> p isa AbstractProperty ? p : PersistentProperty(p), NamedTuple(properties))
setproperties(chain, merge(chain.properties, properties))
end
addproperties(chain::ProteinChain; properties...) = setproperties(chain, merge(chain.properties, properties))

"""
removeproperties(chain::ProteinChain, names::Symbol...)
Expand All @@ -85,11 +83,7 @@ Creates a new `ProteinChain` instance with the property names in `names` removed
See also [`addproperties`](@ref)
"""
function removeproperties(chain::ProteinChain, names::Symbol...)
new_propertynames = filter(name -> name names, propertynames(chain.properties))
properties = NamedTuple{new_propertynames}(chain.properties)
setproperties(chain, properties)
end
removeproperties(chain::ProteinChain, names::Symbol...) = setproperties(chain, NamedTuple{filter(name -> name ∉ names, propertynames(chain.properties))}(chain.properties))

Base.summary(chain::ProteinChain) = "$(length(chain))-residue $(typeof(chain)) ($(chain.id))"

Expand Down Expand Up @@ -163,11 +157,11 @@ addproperties(chain::ProteinChain, names::Symbol...) = addproperties(chain; Name

calculate_property(x, name::Symbol, args...) = calculate_property(x, Val(name), args...)

calculate_property(chain::ProteinChain, ::Val{:ideal_residue}) = collect(STANDARD_RESIDUE) |> PersistentProperty
calculate_property(chain::ProteinChain, ::Val{:bond_lengths}) = Backboner.get_bond_lengths(Backboner.Backbone(get_backbone(chain))) |> PersistentProperty
calculate_property(chain::ProteinChain, ::Val{:bond_angles}) = Backboner.get_bond_angles(Backboner.Backbone(get_backbone(chain))) |> PersistentProperty
calculate_property(chain::ProteinChain, ::Val{:torsion_angles}) = Backboner.get_torsion_angles(Backboner.Backbone(get_backbone(chain))) |> PersistentProperty
calculate_property(chain::ProteinChain, ::Val{:is_knotted}) = Backboner.is_knotted(Backboner.Backbone(get_backbone(chain)[:,2,:])) |> PersistentProperty
calculate_property(chain::ProteinChain, ::Val{:ideal_residue}) = collect(STANDARD_RESIDUE)
calculate_property(chain::ProteinChain, ::Val{:bond_lengths}) = Backboner.get_bond_lengths(Backboner.Backbone(get_backbone(chain)))
calculate_property(chain::ProteinChain, ::Val{:bond_angles}) = Backboner.get_bond_angles(Backboner.Backbone(get_backbone(chain)))
calculate_property(chain::ProteinChain, ::Val{:torsion_angles}) = Backboner.get_torsion_angles(Backboner.Backbone(get_backbone(chain)))
calculate_property(chain::ProteinChain, ::Val{:is_knotted}) = Backboner.is_knotted(Backboner.Backbone(get_backbone(chain)[:,2,:]))

calculate_property(chain::ProteinChain, ::Val{:backbone}) = get_backbone(chain) |> IndexableProperty
calculate_property(chain::ProteinChain, ::Val{:secondary_structure}) = Int8.(assign_secondary_structure(get_backbone(chain))) |> IndexableProperty
Expand Down
21 changes: 11 additions & 10 deletions src/io/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Base.convert(::Type{Atom{T}}, atom::BioStructures.Atom) where T = convert(Atom{T
Base.convert(::Type{Atom{T}}, atom::BioStructures.DisorderedAtom) where T = convert(Atom{T}, BioStructures.defaultatom(atom))

function get_atoms(::Type{Atom{T}}, residues::Vector{BioStructures.AbstractResidue}) where T
atoms = Vector{Atom{Float64}}[]
atoms = Vector{Atom{T}}[]
for residue in residues
residue = residue isa BioStructures.DisorderedResidue ? BioStructures.defaultresidue(residue) : residue
residue_atoms = map(atom -> convert(Atom{T}, atom), BioStructures.collectatoms(residue))
Expand All @@ -28,26 +28,27 @@ function ProteinChain{T}(chain::BioStructures.Chain) where T
proteinchain = if isempty(residues)
ProteinChain(BioStructures.chainid(chain), Vector{Atom{T}}[], "", Int[])
else
id = only(unique(map(BioStructures.chainid, residues)))
id = BioStructures.chainid(chain)
atoms = get_atoms(Atom{T}, residues)
sequence = get_sequence(residues)
numbering = map(BioStructures.resnumber, residues)
ProteinChain(id, atoms, sequence, numbering)
end
return addproperties(proteinchain;
modelnum = BioStructures.modelnumber(chain),
ins_codes = IndexableProperty(map(Int8 BioStructures.inscode, residues)),
)
end

function ProteinStructure{T}(model::BioStructures.Model) where T
return ProteinStructure(
model.structure.name,
map(atom -> convert(Atom{T}, atom), BioStructures.collectatoms(BioStructures.collectresidues(model, !backbone_residue_selector))),
[ProteinChain{T}(chain) for chain in model]
)
end

function ProteinStructure{T}(struc::BioStructures.MolecularStructure; mmcifdict=nothing) where T
proteinchains = ProteinChain{T}[]
atoms = map(atom -> convert(Atom{T}, atom), BioStructures.collectatoms(BioStructures.collectresidues(struc, !backbone_residue_selector)))
for model in struc, chain in model
push!(proteinchains, ProteinChain{T}(chain))
end
proteinstructure = ProteinStructure(struc.name, atoms, proteinchains)
!isnothing(mmcifdict) && renumber!(proteinstructure, mmcifdict)
proteinstructure = ProteinStructure{T}(first(BioStructures.collectmodels(struc)))
return proteinstructure
end

Expand Down
7 changes: 4 additions & 3 deletions src/io/write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ function BioStructures.MolecularStructure(proteinstruc::ProteinStructure)
models = Dict{Int64, BioStructures.Model}()
struc = BioStructures.MolecularStructure(proteinstruc.name, models)
for proteinchain in proteinstruc
modelnum = hasproperty(proteinchain, :modelnum) ? proteinchain.modelnum : 1
modelnum in keys(models) || (models[modelnum] = BioStructures.Model(modelnum, Dict{String, BioStructures.Chain}(), struc))
model = models[modelnum]
modelnum = 1 # only supporting one model for now
model = get!(models, modelnum) do
BioStructures.Model(modelnum, Dict{String, BioStructures.Chain}(), struc)
end
model.chains[proteinchain.id] = BioStructures.Chain(proteinchain, model)
end
return struc
Expand Down
48 changes: 28 additions & 20 deletions src/properties.jl
Original file line number Diff line number Diff line change
@@ -1,27 +1,35 @@
abstract type AbstractProperty end
function setproperties end
function addproperties end
function removeproperties end

Base.getindex(p::AbstractProperty) = p.value
sortnames(np::NamedTuple{names}) where names = NamedTuple{Tuple(sort(collect(names)))}(np)

"""
PersistentProperty(value)
AbstractProperty
A property of arbitrary type that persists after residue indexing of a chain.
Abstract type for wrapped properties associated with a [`ProteinChain`](@ref) to define custom behavior.
"""
abstract type AbstractProperty end

```jldoctest
julia> chain = addproperties(pdb"1ASS"A; x=PersistentProperty(1));
const NamedProperties{names} = NamedTuple{names,<:Tuple{Vararg{AbstractProperty}}}

julia> chain.x == chain[1:10].x
true
```
"""
struct PersistentProperty{T} <: AbstractProperty
namedproperties(properties::NamedTuple) = map(properties) do value
value isa AbstractProperty ? value : StandardProperty(value)
end

checkproperty(::Any, ::AbstractProperty) = nothing

unpack(x) = x
unpack(p::AbstractProperty) = p.value

struct StandardProperty{T} <: AbstractProperty
value::T
end

Base.getindex(p::PersistentProperty, ::AbstractVector) = p
Base.getindex(p::AbstractProperty, ::Any) = unpack(p)

"""
IndexableProperty <: AbstractProperty
IndexableProperty
IndexableProperty(value::AbstractArray)
Expand All @@ -43,11 +51,11 @@ true
struct IndexableProperty{T<:AbstractArray} <: AbstractProperty
value::T
end
Base.getindex(p::IndexableProperty, i::Union{AbstractVector,Colon}) = selectdim(p.value, ndims(p.value), i) |> IndexableProperty

Base.getindex(p::IndexableProperty, i::AbstractVector) = selectdim(p.value, ndims(p.value), i) |> IndexableProperty

const NamedProperties{names} = NamedTuple{names,<:Tuple{Vararg{AbstractProperty}}}

sortnames(np::NamedProperties{names}) where names = NamedTuple{Tuple(sort(collect(names)))}(np)

function addproperties end
function checkproperty(parent, p::IndexableProperty)
if size(p.value, ndims(p.value)) != length(parent)
throw(DimensionMismatch("Property $(p.value) has length $(size(p.value, ndims(p.value))) but parent has length $(length(parent))"))
end
return nothing
end
Loading

0 comments on commit 249231d

Please sign in to comment.