From 51c3c32ea162b83913f36a144b42e84dd373bd10 Mon Sep 17 00:00:00 2001
From: anton083 <anton.oresten42@gmail.com>
Date: Wed, 11 Sep 2024 16:42:00 +0200
Subject: [PATCH] Add AnnotatedProteinChain, cache PDB downloads, include
 disordered atoms/residues

---
 Project.toml               |  2 +-
 README.md                  | 63 +++++++++++++++++++--------------
 src/ProteinChains.jl       | 15 +++++---
 src/annotated-chain.jl     | 52 ++++++++++++++++++++++++++++
 src/atom.jl                | 16 ++++-----
 src/chain.jl               | 71 +++++++++++++++++++-------------------
 src/io/download.jl         | 39 +++++++++++++++++++++
 src/io/io.jl               | 10 +-----
 src/io/read.jl             | 68 +++++++++++++++---------------------
 src/io/write.jl            | 27 ++++++++-------
 src/secondary-structure.jl | 16 +++------
 src/structure.jl           | 34 +++++++++---------
 test/runtests.jl           | 34 ++++++++++--------
 13 files changed, 266 insertions(+), 181 deletions(-)
 create mode 100644 src/annotated-chain.jl
 create mode 100644 src/io/download.jl
diff --git a/Project.toml b/Project.toml
index 9f8cc59..b7e8d3c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "ProteinChains"
 uuid = "b8e8f2a5-48d3-44f1-ba0d-c71cb7726ff8"
 authors = ["Anton Oresten <anton.oresten42@gmail.com> and contributors"]
-version = "0.1.1"
+version = "0.2.0"
 
 [deps]
 AssigningSecondaryStructure = "8ed43e74-60fb-4e11-99b9-91deed37aef7"
diff --git a/README.md b/README.md
index 43ea5ea..c4249cf 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![Build Status](https://github.com/MurrellGroup/ProteinChains.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/MurrellGroup/ProteinChains.jl/actions/workflows/CI.yml?query=branch%3Amain)
 [![Coverage](https://codecov.io/gh/MurrellGroup/ProteinChains.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/MurrellGroup/ProteinChains.jl)
 
-This Julia package provides implements the `ProteinChain` type: an efficient and GPU-friendly representation of protein chains. 
+This Julia package provides implements the `ProteinChain` type: a GPU-friendly structure-of-arrays representation of protein chains.
 
 ## Installation
 
@@ -16,44 +16,55 @@ Pkg.add("ProteinChains")
 
 ## Examples
 
+The `ProteinChain` type is meant to only store a set of quintessential fields, from which most other properties can be derived.
+
 ```julia
 julia> using ProteinChains
 
-julia> structure = pdb"1EYE" # convenient macro to download proteins from the PDB
+julia> structure = pdb"1EYE" # string macro to fetch proteins from the PDB
 [ Info: Downloading file from PDB: 1EYE
-1-chain ProteinStructure "1EYE.cif":
-  2 fields:
-    name::String = "1EYE.cif"
-    chains::Vector{ProteinChain{Float64}} = <exceeds max length>
-  2 properties:
-    ids::Vector{String} = ["A"]
-    lengths::Vector{Int64} = [253]
+1-chain ProteinStructure "1EYE.cif"
+ 256-residue ProteinChain{Float64} (A)
+
+julia> propertynames(chain)
+(:id, :sequence, :backbone, :numbering, :atoms)
+```
+
+To store additional properties, `AnnotatedProteinChain` can be used to add dynamic properties to the chain:
 
-julia> chain = structure["A"]
-253-residue ProteinChain "A":
-  4 fields:
+```julia
+julia> annotated_chain = annotate(chain; model=1)
+256-residue AnnotatedProteinChain{Float64} (A):
+  6 fields:
     id::String = "A"
     sequence::String = <exceeds max length>
     backbone::Array{Float64,3} = <exceeds max length>
+    numbering::Vector{Int64} = <exceeds max length>
     atoms::Vector{Vector{ProteinChains.Atom{Float64}}} = <exceeds max length>
-  2 properties:
+    indexable_properties::Vector{Symbol} = Symbol[]
+  1 property:
+    model::Int64 = 1
+```
+
+For properties of type `<:AbstractArray` that represent residue-level information, `annotate_indexable!` will index the last dimension of the property when the chain is indexed:
+
+```julia
+julia> annotate_indexable!(annotated_chain; secondary_structure=assign_secondary_structure(annotated_chain)
+256-residue AnnotatedProteinChain{Float64} (A):
+  6 fields:
+    id::String = "A"
+    sequence::String = <exceeds max length>
+    backbone::Array{Float64,3} = <exceeds max length>
     numbering::Vector{Int64} = <exceeds max length>
-    modelnum::Int64 = 1
-
-julia> chain.numbering
-253-element Vector{Int64}:
-   5
-   6
-   7
-   8
-   ⋮
- 271
- 272
- 273
- 274
+    atoms::Vector{Vector{ProteinChains.Atom{Float64}}} = <exceeds max length>
+    indexable_properties::Vector{Symbol} = [:secondary_structure]
+  2 properties:
+    model::Int64 = 1
+    secondary_structure::Vector{Int64} = [1, 1, 3, 3, 3, 3, 3, 3, 3, 1  …  2, 2, 2, 2, 2, 2, 2, 1, 1, 1]
 ```
 
 ## See also
+
 - [Backboner.jl](https://github.com/MurrellGroup/Backboner.jl)
 - [BioStructures.jl](https://github.com/BioJulia/BioStructures.jl)
 - [PDBTools.jl](https://github.com/m3g/PDBTools.jl)
diff --git a/src/ProteinChains.jl b/src/ProteinChains.jl
index 589bee8..0fd42d2 100644
--- a/src/ProteinChains.jl
+++ b/src/ProteinChains.jl
@@ -18,20 +18,25 @@ export prepend_residue
 include("atom.jl")
 
 include("chain.jl")
+export AbstractProteinChain
 export ProteinChain
-export countresidues
+export length
 export psi_angles, omega_angles, phi_angles
 
+include("annotated-chain.jl")
+export AnnotatedProteinChain
+export annotate!, annotate
+export annotate_indexable!, annotate_indexable
+
 include("structure.jl")
 export ProteinStructure
 
 include("secondary-structure.jl")
+export assign_secondary_structure, assign_secondary_structure!
 
 include("io/io.jl")
-export readchains
-export readpdb
-export readcif
-export writechains
+export readcif, readpdb
+export writecif, writepdb
 export PDBFormat, MMCIFFormat
 export pdbentry, @pdb_str
 
diff --git a/src/annotated-chain.jl b/src/annotated-chain.jl
new file mode 100644
index 0000000..9454f14
--- /dev/null
+++ b/src/annotated-chain.jl
@@ -0,0 +1,52 @@
+@dynamic mutable struct AnnotatedProteinChain{T} <: AbstractProteinChain{T}
+    id::String
+    sequence::String
+    backbone::Array{T,3}
+    numbering::Vector{Int64}
+    atoms::Vector{Vector{Atom{T}}}
+    indexable_properties::Vector{Symbol}
+
+    function AnnotatedProteinChain(
+        id::AbstractString, sequence::AbstractString, backbone::AbstractArray{T,3},
+        numbering::AbstractVector{<:Integer}, atoms::Vector{Vector{Atom{T}}},
+        indexable_properties=Symbol[]; kwargs...
+    ) where T
+        @assert length(sequence) == size(backbone, 3) == length(atoms) == length(numbering)
+        AnnotatedProteinChain{T}(id, sequence, backbone, atoms, numbering, indexable_properties; kwargs...)
+    end
+end
+
+ProteinChain(chain::AbstractProteinChain) = ProteinChain(chain.id, chain.sequence, chain.backbone, chain.numbering, chain.atoms)
+AnnotatedProteinChain(chain::AbstractProteinChain) = AnnotatedProteinChain(chain.id, chain.sequence, chain.backbone, chain.numbering, chain.atoms)
+
+function Base.getindex(chain::AnnotatedProteinChain, i::AbstractVector{<:Integer})
+    properties = Dict{Symbol,Any}()
+    for name in getproperties(chain; fields=false)
+        value = getproperty(chain, name)
+        properties[name] = name in chain.indexable_properties ? collect(selectdim(value, ndims(value), i)) : value
+    end
+    return annotate(ProteinChain(chain)[i]; properties...)
+end
+
+function annotate_indexable!(chain::AnnotatedProteinChain; annotations...)
+    for (name, value) in annotations
+        @assert value isa AbstractArray
+        @assert size(value)[end] == length(chain)
+        push!(chain.indexable_properties, name)
+        setproperty!(chain, name, value)
+    end
+    return chain
+end
+
+annotate_indexable(chain::AnnotatedProteinChain; annotations...) = annotate_indexable!(deepcopy(chain); annotations...)
+annotate_indexable(chain::ProteinChain; annotations...) = annotate_indexable!(AnnotatedProteinChain(chain); annotations...)
+
+function annotate!(chain::AnnotatedProteinChain; annotations...)
+    for (name, value) in annotations
+        setproperty!(chain, name, value)
+    end
+    return chain
+end
+
+annotate(chain::AnnotatedProteinChain; annotations...) = annotate(deepcopy(chain); annotations...)
+annotate(chain::ProteinChain; annotations...) = annotate!(AnnotatedProteinChain(chain); annotations...)
diff --git a/src/atom.jl b/src/atom.jl
index 518e936..d893879 100644
--- a/src/atom.jl
+++ b/src/atom.jl
@@ -6,22 +6,22 @@ const ATOMIC_NUMBER_TO_ELEMENT_SYMBOL = Dict(n => s for (s, n) in ELEMENT_SYMBOL
 element_symbol_to_atomic_number(element_symbol::AbstractString) = ELEMENT_SYMBOL_TO_ATOMIC_NUMBER[uppercase(strip(element_symbol))]
 atomic_number_to_element_symbol(atomic_number::Integer) = ATOMIC_NUMBER_TO_ELEMENT_SYMBOL[atomic_number]
 
-pad_atom_name(atom_name::AbstractString, element_symbol::AbstractString) = rpad(" "^(2-length(strip(element_symbol)))*strip(atom_name), 4)
+pad_atom_name(name::AbstractString, element_symbol::AbstractString) = rpad(" "^(2-length(strip(element_symbol)))*strip(name), 4)
 
-encode_atom_name(atom_name::AbstractString, element_symbol::AbstractString) = reinterpret(UInt32, codeunits(pad_atom_name(atom_name, element_symbol)))[1]
-decode_atom_name(atom_name::UInt32) = String(reinterpret(UInt8, [atom_name]))
+encode_atom_name(name::AbstractString, element_symbol::AbstractString) = reinterpret(UInt32, codeunits(pad_atom_name(name, element_symbol)))[1]
+decode_atom_name(name::UInt32) = String(reinterpret(UInt8, [name]))
 
 mutable struct Atom{T<:AbstractFloat}
-    atom_name::UInt32
+    name::UInt32
     atomic_number::Int8
     x::T
     y::T
     z::T
 end
 
-Atom(atom_name::UInt32, atomic_number::Integer, x::T, y::T, z::T) where T = Atom{T}(atom_name, atomic_number, x, y, z)
-Atom(atom_name::AbstractString, element_symbol::AbstractString, coords::AbstractVector{<:AbstractFloat}) =
-    Atom(encode_atom_name(atom_name, element_symbol), element_symbol_to_atomic_number(element_symbol), coords...)
+Atom(name::UInt32, atomic_number::Integer, x::T, y::T, z::T) where T = Atom{T}(name, atomic_number, x, y, z)
+Atom(name::AbstractString, element_symbol::AbstractString, coords::AbstractVector{<:AbstractFloat}) =
+    Atom(encode_atom_name(name, element_symbol), element_symbol_to_atomic_number(element_symbol), coords...)
 
 coords(atom::Atom) = [atom.x, atom.y, atom.z]
 
@@ -33,4 +33,4 @@ function offset!(atom::Atom, coords::Vector{<:Real})
     atom.y += coords[2]
     atom.z += coords[3]
     return atom
-end 
\ No newline at end of file
+end
diff --git a/src/chain.jl b/src/chain.jl
index dbe4644..f2cfc18 100644
--- a/src/chain.jl
+++ b/src/chain.jl
@@ -1,41 +1,53 @@
-@dynamic mutable struct ProteinChain{T<:AbstractFloat}
+"""
+    AbstractProteinChain{T<:AbstractFloat}
+
+An abstract type representing a protein chain, with a type parameter
+`T` that specifies the floating-point type of the coordinates.
+
+## Subtypes
+- `ProteinChain{T}`: A concrete subtype of `AbstractProteinChain` that represents a protein chain with a spe.
+"""
+abstract type AbstractProteinChain{T<:AbstractFloat} end
+
+Base.summary(chain::AbstractProteinChain) = "$(length(chain))-residue $(typeof(chain)) ($(chain.id))"
+Base.length(chain::AbstractProteinChain) = size(chain.backbone, 3)
+
+mutable struct ProteinChain{T} <: AbstractProteinChain{T}
     id::String
     sequence::String
     backbone::Array{T,3}
+    numbering::Vector{Int}
     atoms::Vector{Vector{Atom{T}}}
+
+    function ProteinChain(
+        id::AbstractString, sequence::AbstractString, backbone::AbstractArray{T,3},
+        numbering::AbstractVector{<:Integer} = collect(1:length(sequence)),
+        atoms::Vector{Vector{Atom{T}}} = [Atom{T}[] for _ in sequence],
+    ) where T
+        @assert length(sequence) == size(backbone, 3) == length(numbering) == length(atoms)
+        new{T}(id, sequence, backbone, numbering, atoms)
+    end
 end
 
 """
     ProteinChain{T<:AbstractFloat}
-
-## Examples
-
-```jldoctest
-julia> structure = pdb"1EYE";
-[ Info: Downloading file from PDB: 1EYE
-
-julia> structure[1]
-253-residue ProteinChain "A":
-  4 fields:
-    id::String = "A"
-    sequence::String = <field value exceeds max length>
-    backbone::Array{Float64,3} = <field value exceeds max length>
-    atoms::Vector{Vector{ProteinChains.Atom{Float64}}} = <field value exceeds max length>
-  2 properties:
-    numbering::Vector{Int64} = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14  …  265, 266, 267, 268, 269, 270, 271, 272, 273, 274]
-    modelnum::Int64 = 1
-```
 """
 ProteinChain
 
-countresidues(chain::ProteinChain) = length(chain.sequence)
+Base.getindex(chain::ProteinChain, i::AbstractVector{<:Integer}) =
+    ProteinChain(chain.id, chain.sequence[i], chain.backbone[:,:,i], chain.numbering[i], chain.atoms[i])
+
+import Backboner
 
-Base.getindex(chain::ProteinChain, i::AbstractVector{<:Integer}) = ProteinChain(chain.id, chain.sequence[i], chain.backbone[:,:,i], chain.atoms[i])
+Backboner.Backbone(chain::AbstractProteinChain) = Backbone(chain.backbone)
+Backboner.ChainedBonds(chain::AbstractProteinChain) = ChainedBonds(Backbone(chain))
+Backboner.Frames(chain::AbstractProteinChain, ideal_residue=STANDARD_RESIDUE) = Frames(Backbone(chain), ideal_residue)
 
-Base.summary(chain::ProteinChain) = "$(countresidues(chain))-residue ProteinChain \"$(chain.id)\""
+psi_angles(chain::AbstractProteinChain) = get_torsion_angles(Backbone(chain))[1:3:end]
+omega_angles(chain::AbstractProteinChain) = get_torsion_angles(Backbone(chain))[2:3:end]
+phi_angles(chain::AbstractProteinChain) = get_torsion_angles(Backbone(chain))[3:3:end]
 
-function offset!(chain::ProteinChain, coords::Vector{<:Real})
-    @assert length(coords) == 3
+function offset!(chain::AbstractProteinChain, coords::Vector{<:Real})
     chain.backbone .+= coords
     for residue_atoms in chain.atoms
         for atom in residue_atoms
@@ -44,14 +56,3 @@ function offset!(chain::ProteinChain, coords::Vector{<:Real})
     end
     return chain
 end
-
-import Backboner
-
-Backboner.Backbone(chain::ProteinChain) = Backbone(chain.backbone)
-Backboner.ChainedBonds(chain::ProteinChain) = ChainedBonds(Backbone(chain))
-Backboner.Frames(chain::ProteinChain, ideal_residue=STANDARD_RESIDUE) = Frames(Backbone(chain), ideal_residue)
-
-# TODO: "contiguity" field/property to mask outputs with NaN where residues are not contiguous
-psi_angles(chain::ProteinChain) = get_torsion_angles(Backbone(chain))[1:3:end]
-omega_angles(chain::ProteinChain) = get_torsion_angles(Backbone(chain))[2:3:end]
-phi_angles(chain::ProteinChain) = get_torsion_angles(Backbone(chain))[3:3:end]
\ No newline at end of file
diff --git a/src/io/download.jl b/src/io/download.jl
new file mode 100644
index 0000000..075662d
--- /dev/null
+++ b/src/io/download.jl
@@ -0,0 +1,39 @@
+using Base.Filesystem
+
+const MAX_CACHE_ENTRIES = Ref{Int}(4)
+const CACHE_DIR = Ref{Union{String, Nothing}}(nothing)
+
+function initialize_cache_dir()
+    if CACHE_DIR[] === nothing
+        CACHE_DIR[] = mktempdir(prefix="pdb_cache_")
+        atexit(() -> rm(CACHE_DIR[], recursive=true))
+    end
+end
+
+function manage_cache()
+    entries = readdir(CACHE_DIR[])
+    while length(entries) > MAX_CACHE_ENTRIES[]
+        oldest_entry = entries[argmin([mtime(joinpath(CACHE_DIR[], entry)) for entry in entries])]
+        rm(joinpath(CACHE_DIR[], oldest_entry))
+        entries = readdir(CACHE_DIR[])
+    end
+end
+
+function pdbentry(pdbid::AbstractString; format=BioStructures.MMCIFFormat, kwargs...)
+    initialize_cache_dir()
+    path = BioStructures.downloadpdb(pdbid; dir=CACHE_DIR[], format=format, kwargs...)
+    manage_cache()
+    return read(path, ProteinStructure, format)
+end
+
+macro pdb_str(pdbid::AbstractString)
+    :(pdbentry($(esc(pdbid))))
+end
+
+macro pdb_str(pdbid::AbstractString, chain::AbstractString)
+    :(pdbentry($(esc(pdbid)))[$chain])
+end
+
+macro pdb_str(pdbid::AbstractString, ba_number::Integer)
+    :(pdbentry($(esc(pdbid)), ba_number=$ba_number))
+end
diff --git a/src/io/io.jl b/src/io/io.jl
index a234029..3c53971 100644
--- a/src/io/io.jl
+++ b/src/io/io.jl
@@ -17,12 +17,4 @@ get_format(path::AbstractString) = get(pdbextension_to_format, lowercase(last(sp
 include("renumber.jl")
 include("read.jl")
 include("write.jl")
-
-pdbentry(pdbid::AbstractString; format=BioStructures.MMCIFFormat, kwargs...) = mktempdir() do dir
-    path = BioStructures.downloadpdb(pdbid; dir, format, kwargs...)
-    readchains(path, format)
-end
-
-macro pdb_str(pdbid::AbstractString)
-    :(pdbentry($(esc(pdbid))))
-end
+include("download.jl")
diff --git a/src/io/read.jl b/src/io/read.jl
index 35820f0..6b3eb91 100644
--- a/src/io/read.jl
+++ b/src/io/read.jl
@@ -1,75 +1,61 @@
 backbone_atom_selector(atom::BioStructures.AbstractAtom) = BioStructures.atomnameselector(atom, BACKBONE_ATOM_NAMES)
+
 backbone_residue_selector(residue::BioStructures.AbstractResidue) =
     oneletter_resname(residue) in AMINOACIDS &&
     BioStructures.countatoms(residue, backbone_atom_selector) == 3 &&
-    BioStructures.standardselector(residue) &&
-    !BioStructures.disorderselector(residue) &&
-    !any(atom -> atom isa BioStructures.DisorderedAtom, BioStructures.collectatoms(residue))
+    BioStructures.standardselector(residue)
 
-function get_atom(residue::BioStructures.AbstractResidue, atom_name::AbstractString)
-    selector = atom -> BioStructures.atomnameselector(atom, [atom_name])
-    residue_atoms = BioStructures.collectatoms(residue, selector)
-    return only(residue_atoms)
-end
+get_sequence(residues::Vector{BioStructures.AbstractResidue}) = join(map(oneletter_resname, residues))
 
-function get_backbone(residue::BioStructures.AbstractResidue)
-    atom_name_to_atom_coords = atom_name -> BioStructures.coords(get_atom(residue, atom_name))
-    residue_coords = stack(atom_name_to_atom_coords, BACKBONE_ATOM_NAMES)
-    return Backbone(residue_coords)
-end
+get_atom(residue::BioStructures.AbstractResidue, name::AbstractString) =
+    BioStructures.collectatoms(residue, a -> BioStructures.atomnameselector(a, [name])) |> only
 
-function get_backbone(residues::Vector{BioStructures.AbstractResidue})
-    chain_coords = mapreduce(residue -> get_backbone(residue).coords, hcat, residues; init=Matrix{Float64}(undef, 3, 0))
-    return Backbone(chain_coords)
-end
+get_backbone(residue::BioStructures.AbstractResidue) =
+    stack(name -> BioStructures.coords(get_atom(residue, name)), BACKBONE_ATOM_NAMES)
 
-get_backbone(chain::BioStructures.Chain, selector) = get_backbone(BioStructures.collectresidues(chain, selector))
+get_backbone(residues::Vector{BioStructures.AbstractResidue}) =
+    mapreduce(get_backbone, hcat, residues; init=Matrix{Float64}(undef, 3, 0))
 
-aminoacid_sequence(residues::Vector{BioStructures.AbstractResidue}) = join(oneletter_resname.(residues))
-aminoacid_sequence(chain::BioStructures.Chain, selector) = aminoacid_sequence(BioStructures.collectresidues(chain, selector))
+Atom(atom::BioStructures.Atom) = Atom(atom.name, atom.element, atom.coords)
+Atom(atom::BioStructures.DisorderedAtom) = Atom(BioStructures.defaultatom(atom))
 
 function get_nonbackbone_atoms(residues::Vector{BioStructures.AbstractResidue})
     atoms = Vector{Atom{Float64}}[]
     for residue in residues
-        residue_atoms_bs = BioStructures.collectatoms(residue, a -> !backbone_atom_selector(a) && BioStructures.standardselector(a) && !BioStructures.disorderselector(a))
-        @assert !isempty(residue_atoms_bs)
-        push!(atoms, map(atom_bs -> Atom(atom_bs.name, atom_bs.element, atom_bs.coords), residue_atoms_bs))
+        residue = residue isa BioStructures.DisorderedResidue ? BioStructures.defaultresidue(residue) : residue
+        residue_atoms = map(Atom, BioStructures.collectatoms(residue, !backbone_atom_selector))
+        push!(atoms, residue_atoms)
     end
     return atoms
 end
 
 function ProteinChain(residues::Vector{BioStructures.AbstractResidue})
-    id = only(unique(BioStructures.chainid.(residues)))
-    sequence = aminoacid_sequence(residues)
-    backbone = reshape(get_backbone(residues).coords, 3, 3, :)
+    id = only(unique(map(BioStructures.chainid, residues)))
+    sequence = get_sequence(residues)
+    backbone = reshape(get_backbone(residues), 3, 3, :)
     atoms = get_nonbackbone_atoms(residues)
-    numbering = BioStructures.resnumber.(residues)
-    modelnum = only(unique(BioStructures.modelnumber.(residues)))
-    return ProteinChain(id, sequence, backbone, atoms; numbering, modelnum)
+    numbering = map(BioStructures.resnumber, residues)
+    return ProteinChain(id, sequence, backbone, numbering, atoms)
 end
 
 function ProteinChain(chain::BioStructures.Chain, selector=backbone_residue_selector)
     residues = BioStructures.collectresidues(chain, selector)
-    isempty(residues) && return ProteinChain(BioStructures.chainid(chain), "", zeros(3, 3, 0), Int[])
+    isempty(residues) && return ProteinChain(BioStructures.chainid(chain), "", zeros(3, 3, 0), Int[], Vector{Atom{Float64}}[])
     return ProteinChain(residues)
 end
 
 function ProteinStructure(struc::BioStructures.MolecularStructure, selector=backbone_residue_selector; path=nothing)
-    name = struc.name
-    chains = ProteinChain{Float64}[]
+    chains = ProteinChain[]
     for model in struc, chain in model
-        if !isempty(chain)
-            pc = ProteinChain(chain, selector)
-            countresidues(pc) > 0 && push!(chains, pc)
-        end
+        push!(chains, ProteinChain(chain, selector))
     end
-    structure = ProteinStructure(name, chains; ids=map(chain -> chain.id, chains), lengths=map(countresidues, chains))
+    structure = ProteinStructure(struc.name, chains)
     !isnothing(path) && endswith(path, ".cif") && renumber!(structure, BioStructures.MMCIFDict(path))
     return structure
 end
 
-readchains(path::AbstractString, format::Type{<:ProteinFileFormat}) = ProteinStructure(read(path, format); path)
-readchains(path::AbstractString) = readchains(path, get_format(path))
+Base.read(path::AbstractString, ::Type{ProteinStructure}, format::Type{<:ProteinFileFormat}) = ProteinStructure(read(path, format); path)
+Base.read(path::AbstractString, T::Type{ProteinStructure}) = read(path, T, get_format(path))
 
-readpdb(path::AbstractString) = readchains(path, PDBFormat)
-readcif(path::AbstractString) = readchains(path, MMCIFFormat)
\ No newline at end of file
+readcif(path::AbstractString) = read(path, ProteinStructure, MMCIFFormat)
+readpdb(path::AbstractString) = read(path, ProteinStructure, PDBFormat)
diff --git a/src/io/write.jl b/src/io/write.jl
index 8f7d7ad..3e198ab 100644
--- a/src/io/write.jl
+++ b/src/io/write.jl
@@ -13,8 +13,8 @@ function estimate_last_oxygen_position(backbone::Array{T,3}) where T<:Real
     return O_pos
 end
 
-function BioStructures.Chain(proteinchain::ProteinChain, model::BioStructures.Model)
-    numbering = hasproperty(proteinchain, :numbering) ? proteinchain.numbering : collect(1:countresidues(proteinchain))
+function BioStructures.Chain(proteinchain::AbstractProteinChain, model::BioStructures.Model)
+    numbering = proteinchain.numbering
     residue_list = Vector{String}()
     residues = Dict{String, BioStructures.AbstractResidue}()
     chain = BioStructures.Chain(proteinchain.id, residue_list, residues, model)
@@ -25,22 +25,22 @@ function BioStructures.Chain(proteinchain::ProteinChain, model::BioStructures.Mo
     oxygen_coords[:,end] = estimate_last_oxygen_position(proteinchain.backbone)
 
     atom_serial = 0
-    for residue_index in 1:countresidues(proteinchain)
+    for residue_index in 1:length(proteinchain)
         atom_list = Vector{String}()
         atoms = Dict{String, BioStructures.AbstractAtom}()
         resname = threeletter_resname(proteinchain.sequence[residue_index])
         number = numbering[residue_index]
         residue = BioStructures.Residue(resname, number, ' ', false, atom_list, atoms, chain, '-') # TODO: secondary structure
 
-        for (i, (atom_name, element)) in enumerate(zip(BACKBONE_ATOM_NAMES, BACKBONE_ATOM_SYMBOLS))
+        for (i, (name, element)) in enumerate(zip(BACKBONE_ATOM_NAMES, BACKBONE_ATOM_SYMBOLS))
             atom_serial += 1
             coords = proteinchain.backbone[:, i, residue_index]
-            atom = BioStructures.Atom(atom_serial, atom_name, ' ', coords, 1.0, 0.0, element, "  ", residue)
+            atom = BioStructures.Atom(atom_serial, name, ' ', coords, 1.0, 0.0, element, "  ", residue)
             push!(atom_list, atom.name)
             atoms[atom.name] = atom
         end
 
-        if !any(atom -> atom.atom_name == oxygen_name, proteinchain.atoms[residue_index])
+        if !any(atom -> atom.name == oxygen_name, proteinchain.atoms[residue_index])
             atom_serial += 1
             coords = oxygen_coords[:, residue_index]
             atom = BioStructures.Atom(atom_serial, "O", ' ', coords, 1.0, 0.0, "O", "  ", residue)
@@ -50,10 +50,10 @@ function BioStructures.Chain(proteinchain::ProteinChain, model::BioStructures.Mo
 
         for atom in proteinchain.atoms[residue_index]
             atom_serial += 1
-            atom_name = decode_atom_name(atom.atom_name)
+            name = decode_atom_name(atom.name)
             coords = [atom.x, atom.y, atom.z]
             element = atomic_number_to_element_symbol(atom.atomic_number)
-            atom = BioStructures.Atom(atom_serial, atom_name, ' ', coords, 1.0, 0.0, element, "  ", residue)
+            atom = BioStructures.Atom(atom_serial, name, ' ', coords, 1.0, 0.0, element, "  ", residue)
             push!(atom_list, atom.name)
             atoms[atom.name] = atom
         end
@@ -76,7 +76,7 @@ function BioStructures.MolecularStructure(proteinstruc::ProteinStructure)
     return struc
 end
 
-function writechains(path::AbstractString, proteinstruc::ProteinStructure, format::Type{<:ProteinFileFormat})
+function Base.write(path::AbstractString, proteinstruc::ProteinStructure, format::Type{<:ProteinFileFormat})
     struc = BioStructures.MolecularStructure(proteinstruc)
     if format == MMCIFFormat
         BioStructures.writemmcif(path, struc)
@@ -88,11 +88,12 @@ function writechains(path::AbstractString, proteinstruc::ProteinStructure, forma
     return nothing
 end
 
-function writechains(path::AbstractString, proteinchains::AbstractVector{<:ProteinChain}, format)
+function Base.write(path::AbstractString, proteinchains::AbstractVector{<:AbstractProteinChain}, format=get_format(path))
     proteinstruc = ProteinStructure(basename(path), proteinchains)
-    writechains(path, proteinstruc, format)
+    write(path, proteinstruc, format)
 end
 
-writechains(path::AbstractString, proteinchain::ProteinChain, format) = writechains(path, [proteinchain], format)
+Base.write(path::AbstractString, proteinchain::AbstractProteinChain, format=get_format(path)) = write(path, [proteinchain], format)
 
-writechains(path::AbstractString, arg) = writechains(path, arg, get_format(path))
+writecif(path::AbstractString, x) = write(path, x, MMCIFFormat)
+writepdb(path::AbstractString, x) = write(path, x, PDBFormat)
diff --git a/src/secondary-structure.jl b/src/secondary-structure.jl
index 4af836e..afdff1c 100644
--- a/src/secondary-structure.jl
+++ b/src/secondary-structure.jl
@@ -1,19 +1,13 @@
 import AssigningSecondaryStructure: assign_secondary_structure!, assign_secondary_structure
 
-export assign_secondary_structure!
-
 const SS_CODES = ['-', 'H', 'E']
 
 number_vector_to_codes(v::AbstractVector{<:Integer}) = SS_CODES[v]
-number_vector_to_code_string(v::AbstractVector{<:Integer}) = join(SS_CODES)[v]
 
-function assign_secondary_structure!(chain::ProteinChain)
-    number_vector = assign_secondary_structure(chain.backbone)
-    chain.secondary_structure = number_vector_to_code_string(number_vector)
-    return chain
-end
+assign_secondary_structure(chain::AbstractProteinChain) = assign_secondary_structure(chain.backbone)
 
-function assign_secondary_structure!(structure::AbstractVector{<:ProteinChain})
-    assign_secondary_structure!.(structure)
-    return structure
+function assign_secondary_structure!(chain::AnnotatedProteinChain)
+    number_vector = assign_secondary_structure(chain)
+    annotate_indexable!(chain; secondary_structure=number_vector_to_codes(number_vector))
+    return chain
 end
diff --git a/src/structure.jl b/src/structure.jl
index 1021afd..a4ee53d 100644
--- a/src/structure.jl
+++ b/src/structure.jl
@@ -1,35 +1,33 @@
-@dynamic mutable struct ProteinStructure{T<:AbstractFloat} <: AbstractVector{ProteinChain{T}}
-    name::String
-    chains::Vector{ProteinChain{T}}
-end
-
 """
-    ProteinStructure{T<:AbstractFloat} <: AbstractVector{ProteinChain{T}}
-
-## Examples
+    ProteinStructure <: AbstractVector{AbstractProteinChain}
 
 ```jldoctest
 julia> pdb"1EYE"
-1-chain ProteinStructure "1EYE.cif" with 2 dynamic properties:
-  2 fields:
-    name::String = "1EYE.cif"
-    chains::Vector{ProteinChain{Float64}} = <field value exceeds max length>
-  2 properties:
-    ids::Vector{String} = ["A"]
-    lengths::Vector{Int64} = [253]
+[ Info: Downloading file from PDB: 1EYE
+1-chain ProteinStructure "1EYE.cif"
+ 253-residue AnnotatedProteinChain{Float64} (A)
 ```
 """
-ProteinStructure
+mutable struct ProteinStructure <: AbstractVector{AbstractProteinChain}
+    name::String
+    chains::Vector{AbstractProteinChain}
+end
 
 Base.size(structure::ProteinStructure) = (length(structure.chains),)
 Base.getindex(structure::ProteinStructure, i) = structure.chains[i]
 
 Base.getindex(structure::ProteinStructure, id::AbstractString) = structure[findfirst(c -> c.id == id, structure.chains)]
 
-Base.summary(structure::ProteinStructure) = "$(length(structure))-chain ProteinStructure \"$(structure.name)\" with $(length(getproperties(structure; fields=false))) dynamic properties"
+Base.summary(structure::ProteinStructure) = "$(length(structure))-chain ProteinStructure \"$(structure.name)\""
+
+function Base.show(io::IO, ::MIME"text/plain", structure::ProteinStructure)
+    print(io, summary(structure))
+    for chain in structure
+        print(io, "\n ", summary(chain))
+    end
+end
 
 function offset!(structure::ProteinStructure, coords::Vector{<:Real})
-    @assert length(coords) == 3
     for chain in structure
         offset!(chain, coords)
     end
diff --git a/test/runtests.jl b/test/runtests.jl
index 80f35e5..33017ec 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,24 +4,36 @@ using Test
 @testset "ProteinChains.jl" begin
 
     @testset "ProteinChain" begin
-        chain = ProteinChain("A", "AMINO", rand(3, 3, 5), [ProteinChains.Atom{Float64}[] for _ in 1:5]; modelnum=1)
-        @test countresidues(chain) == 5
-        @test summary(chain) == "5-residue ProteinChain \"A\""
-        @test chain[5:-1:1] == ProteinChain("A", "ONIMA", chain.backbone[:,:,5:-1:1], chain.atoms[5:-1:1])
+        chain = ProteinChain("A", "AMINO", rand(3, 3, 5), collect(1:5), [ProteinChains.Atom{Float64}[] for _ in 1:5])
+        @test length(chain) == 5
+        @test summary(chain) == "5-residue ProteinChain{Float64} (A)"
+
+        i = 5:-1:1
+        @test chain[i].backbone == chain.backbone[:,:,i]
     end
 
     @testset "ProteinStructure" begin
-        chain = ProteinChain("A", "AMINO", rand(3, 3, 5), [ProteinChains.Atom{Float64}[] for _ in 1:5])
+        chain = ProteinChain("A", "AMINO", rand(3, 3, 5), collect(1:5), [ProteinChains.Atom{Float64}[] for _ in 1:5])
         structure = ProteinStructure("1CHN", [chain])
         @test structure[1] === chain
         @test structure["A"] === chain
     end
 
+    @testset "AnnotatedProteinChain" begin
+        chain = ProteinChain("A", "AMINO", rand(3, 3, 5), collect(1:5), [ProteinChains.Atom{Float64}[] for _ in 1:5])
+        annotated_chain = annotate(chain; modelnum=1)
+        @test annotated_chain isa AnnotatedProteinChain
+        @test annotated_chain.modelnum == 1
+        annotate_indexable!(annotated_chain; secondary_structure=assign_secondary_structure(annotated_chain))
+        @test length(annotated_chain.secondary_structure) == 5
+        @test length(annotated_chain[1:3].secondary_structure) == 3
+    end
+
     @testset "read/write" begin
 
         @testset "read" begin
             chains_pdb = pdbentry("1ASS"; format=PDBFormat)
-            @test countresidues.(collect(chains_pdb)) == [152]
+            @test length.(collect(chains_pdb)) == [152]
             chains_cif = pdbentry("1ASS"; format=MMCIFFormat)
             @test chains_pdb[1].backbone == chains_cif[1].backbone
         end
@@ -30,18 +42,12 @@ using Test
             chains = pdb"1ASS"
             new_chains = mktempdir() do temp_dir
                 temp_path = joinpath(temp_dir, "temp.pdb")
-                writechains(temp_path, chains)
-                readchains(temp_path)
+                write(temp_path, chains)
+                read(temp_path, ProteinStructure)
             end
             @test chains[1].backbone == new_chains[1].backbone
         end
 
     end
 
-    @testset "secondary structure" begin
-        ss_composition(chain::ProteinChain) = [count(==(ss), chain.secondary_structure) for ss in ['-', 'H', 'E']]
-        structure = assign_secondary_structure!(pdb"1ZAK")
-        @test ss_composition.(collect(structure)) == [[72, 116, 32], [72, 116, 32]]
-    end
-
 end