diff --git a/Project.toml b/Project.toml
index b7e8d3c..eed5f6d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,21 +1,21 @@
 name = "ProteinChains"
 uuid = "b8e8f2a5-48d3-44f1-ba0d-c71cb7726ff8"
 authors = ["Anton Oresten <anton.oresten42@gmail.com> and contributors"]
-version = "0.2.0"
+version = "0.3.0"
 
 [deps]
 AssigningSecondaryStructure = "8ed43e74-60fb-4e11-99b9-91deed37aef7"
 Backboner = "9ac9c2a2-1cfe-46d3-b3fd-6fa470ea56a7"
 BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
-DynamicStructs = "e139c391-eeee-4818-b359-c8725224fb1f"
+HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 PeriodicTable = "7b2266bf-644c-5ea3-82d8-af4bbd25a884"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 
 [compat]
-AssigningSecondaryStructure = "0.5"
 Backboner = "0.12"
 BioStructures = "4"
-DynamicStructs = "0.1"
+HDF5 = "0.17"
 LinearAlgebra = "1"
 PeriodicTable = "1"
 julia = "1"
diff --git a/README.md b/README.md
index c4249cf..f5fbc3d 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![Build Status](https://github.com/MurrellGroup/ProteinChains.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/MurrellGroup/ProteinChains.jl/actions/workflows/CI.yml?query=branch%3Amain)
 [![Coverage](https://codecov.io/gh/MurrellGroup/ProteinChains.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/MurrellGroup/ProteinChains.jl)
 
-This Julia package provides implements the `ProteinChain` type: a GPU-friendly structure-of-arrays representation of protein chains.
+This Julia package provides implements the `ProteinChain` type: a chain-level structure-of-arrays type representation of proteins, with support for indexing by residue index.
 
 ## Installation
 
@@ -16,7 +16,7 @@ Pkg.add("ProteinChains")
 
 ## Examples
 
-The `ProteinChain` type is meant to only store a set of quintessential fields, from which most other properties can be derived.
+The `ProteinChain` type is meant to only store a basic set of fields, from which some other properties might be derived.
 
 ```julia
 julia> using ProteinChains
@@ -24,43 +24,29 @@ julia> using ProteinChains
 julia> structure = pdb"1EYE" # string macro to fetch proteins from the PDB
 [ Info: Downloading file from PDB: 1EYE
 1-chain ProteinStructure "1EYE.cif"
- 256-residue ProteinChain{Float64} (A)
+ 256-residue ProteinChain{Float64, @NamedTuple{}} (A)
 
 julia> propertynames(chain)
-(:id, :sequence, :backbone, :numbering, :atoms)
+(:id, :atoms, :sequence, :numbering)
 ```
 
-To store additional properties, `AnnotatedProteinChain` can be used to add dynamic properties to the chain:
+To store additional properties, `annotate` can be used to attach persistent chain-level properties or indexable residue-level properties:
 
 ```julia
-julia> annotated_chain = annotate(chain; model=1)
-256-residue AnnotatedProteinChain{Float64} (A):
-  6 fields:
-    id::String = "A"
-    sequence::String = <exceeds max length>
-    backbone::Array{Float64,3} = <exceeds max length>
-    numbering::Vector{Int64} = <exceeds max length>
-    atoms::Vector{Vector{ProteinChains.Atom{Float64}}} = <exceeds max length>
-    indexable_properties::Vector{Symbol} = Symbol[]
-  1 property:
-    model::Int64 = 1
-```
+julia> chain = structure["A"]
+256-residue ProteinChain{Float64, @NamedTuple{}} (A)
 
-For properties of type `<:AbstractArray` that represent residue-level information, `annotate_indexable!` will index the last dimension of the property when the chain is indexed:
+julia> annotated_chain = annotate(chain; taxid=ChainProperty(83332))
+256-residue ProteinChain{Float64, @NamedTuple{taxid::ChainProperty{Int64}}} (A)
 
-```julia
-julia> annotate_indexable!(annotated_chain; secondary_structure=assign_secondary_structure(annotated_chain)
-256-residue AnnotatedProteinChain{Float64} (A):
-  6 fields:
-    id::String = "A"
-    sequence::String = <exceeds max length>
-    backbone::Array{Float64,3} = <exceeds max length>
-    numbering::Vector{Int64} = <exceeds max length>
-    atoms::Vector{Vector{ProteinChains.Atom{Float64}}} = <exceeds max length>
-    indexable_properties::Vector{Symbol} = [:secondary_structure]
-  2 properties:
-    model::Int64 = 1
-    secondary_structure::Vector{Int64} = [1, 1, 3, 3, 3, 3, 3, 3, 3, 1  …  2, 2, 2, 2, 2, 2, 2, 1, 1, 1]
+julia> annotated_chain = annotate(annotated_chain; some_residue_property=ResidueProperty(rand(3,256))) # last dimension gets indexed
+256-residue ProteinChain{Float64, @NamedTuple{ORGANISM_TAXID::ChainProperty{Int64}, some_residue_property::ResidueProperty{Matrix{Float64}}}} (A)
+
+julia> annotated_chain[1:100].some_residue_property
+3×100 Matrix{Float64}:
+ 0.273545  0.639173  0.92708   …  0.459441  0.196407  0.880034       
+ 0.981498  0.70263   0.279264     0.552049  0.89274   0.0328866      
+ 0.169268  0.117848  0.732741     0.301921  0.187094  0.281187
 ```
 
 ## See also
diff --git a/src/ProteinChains.jl b/src/ProteinChains.jl
index 0fd42d2..07768ae 100644
--- a/src/ProteinChains.jl
+++ b/src/ProteinChains.jl
@@ -1,12 +1,8 @@
 module ProteinChains
 
-using DynamicStructs
+using StaticArrays: SVector
 
 using Backboner
-export Backbone
-export ChainedBonds
-export get_bond_lengths, get_bond_angles, get_torsion_angles
-export Frames
 
 include("ideal.jl")
 export BackboneGeometry
@@ -16,23 +12,25 @@ export append_residue
 export prepend_residue
 
 include("atom.jl")
+export Atom
+export encode_atom_name, decode_atom_name
+export coords
+
+include("properties.jl")
+export ChainProperty, ResidueProperty
 
 include("chain.jl")
-export AbstractProteinChain
 export ProteinChain
-export length
+export annotate
+export map_atoms!
 export psi_angles, omega_angles, phi_angles
-
-include("annotated-chain.jl")
-export AnnotatedProteinChain
-export annotate!, annotate
-export annotate_indexable!, annotate_indexable
+export get_atoms, get_backbone
 
 include("structure.jl")
 export ProteinStructure
 
-include("secondary-structure.jl")
-export assign_secondary_structure, assign_secondary_structure!
+include("dataset.jl")
+export ProteinDataset
 
 include("io/io.jl")
 export readcif, readpdb
diff --git a/src/annotated-chain.jl b/src/annotated-chain.jl
deleted file mode 100644
index 9454f14..0000000
--- a/src/annotated-chain.jl
+++ /dev/null
@@ -1,52 +0,0 @@
-@dynamic mutable struct AnnotatedProteinChain{T} <: AbstractProteinChain{T}
-    id::String
-    sequence::String
-    backbone::Array{T,3}
-    numbering::Vector{Int64}
-    atoms::Vector{Vector{Atom{T}}}
-    indexable_properties::Vector{Symbol}
-
-    function AnnotatedProteinChain(
-        id::AbstractString, sequence::AbstractString, backbone::AbstractArray{T,3},
-        numbering::AbstractVector{<:Integer}, atoms::Vector{Vector{Atom{T}}},
-        indexable_properties=Symbol[]; kwargs...
-    ) where T
-        @assert length(sequence) == size(backbone, 3) == length(atoms) == length(numbering)
-        AnnotatedProteinChain{T}(id, sequence, backbone, atoms, numbering, indexable_properties; kwargs...)
-    end
-end
-
-ProteinChain(chain::AbstractProteinChain) = ProteinChain(chain.id, chain.sequence, chain.backbone, chain.numbering, chain.atoms)
-AnnotatedProteinChain(chain::AbstractProteinChain) = AnnotatedProteinChain(chain.id, chain.sequence, chain.backbone, chain.numbering, chain.atoms)
-
-function Base.getindex(chain::AnnotatedProteinChain, i::AbstractVector{<:Integer})
-    properties = Dict{Symbol,Any}()
-    for name in getproperties(chain; fields=false)
-        value = getproperty(chain, name)
-        properties[name] = name in chain.indexable_properties ? collect(selectdim(value, ndims(value), i)) : value
-    end
-    return annotate(ProteinChain(chain)[i]; properties...)
-end
-
-function annotate_indexable!(chain::AnnotatedProteinChain; annotations...)
-    for (name, value) in annotations
-        @assert value isa AbstractArray
-        @assert size(value)[end] == length(chain)
-        push!(chain.indexable_properties, name)
-        setproperty!(chain, name, value)
-    end
-    return chain
-end
-
-annotate_indexable(chain::AnnotatedProteinChain; annotations...) = annotate_indexable!(deepcopy(chain); annotations...)
-annotate_indexable(chain::ProteinChain; annotations...) = annotate_indexable!(AnnotatedProteinChain(chain); annotations...)
-
-function annotate!(chain::AnnotatedProteinChain; annotations...)
-    for (name, value) in annotations
-        setproperty!(chain, name, value)
-    end
-    return chain
-end
-
-annotate(chain::AnnotatedProteinChain; annotations...) = annotate(deepcopy(chain); annotations...)
-annotate(chain::ProteinChain; annotations...) = annotate!(AnnotatedProteinChain(chain); annotations...)
diff --git a/src/atom.jl b/src/atom.jl
index d893879..37f3b96 100644
--- a/src/atom.jl
+++ b/src/atom.jl
@@ -1,36 +1,35 @@
 using PeriodicTable: elements
 
-const ELEMENT_SYMBOL_TO_ATOMIC_NUMBER = Dict(uppercase(elements[atomic_number].symbol) => atomic_number for atomic_number in 1:118)
-const ATOMIC_NUMBER_TO_ELEMENT_SYMBOL = Dict(n => s for (s, n) in ELEMENT_SYMBOL_TO_ATOMIC_NUMBER)
+const ELEMENT_SYMBOL_TO_NUMBER = Dict(uppercase(elements[number].symbol) => number for number in 1:118)
+const number_TO_ELEMENT_SYMBOL = Dict(n => s for (s, n) in ELEMENT_SYMBOL_TO_NUMBER)
 
-element_symbol_to_atomic_number(element_symbol::AbstractString) = ELEMENT_SYMBOL_TO_ATOMIC_NUMBER[uppercase(strip(element_symbol))]
-atomic_number_to_element_symbol(atomic_number::Integer) = ATOMIC_NUMBER_TO_ELEMENT_SYMBOL[atomic_number]
+element_symbol_to_number(element_symbol::AbstractString) = ELEMENT_SYMBOL_TO_NUMBER[uppercase(strip(element_symbol))]
+number_to_element_symbol(number::Integer) = number_TO_ELEMENT_SYMBOL[number]
 
 pad_atom_name(name::AbstractString, element_symbol::AbstractString) = rpad(" "^(2-length(strip(element_symbol)))*strip(name), 4)
 
 encode_atom_name(name::AbstractString, element_symbol::AbstractString) = reinterpret(UInt32, codeunits(pad_atom_name(name, element_symbol)))[1]
 decode_atom_name(name::UInt32) = String(reinterpret(UInt8, [name]))
 
-mutable struct Atom{T<:AbstractFloat}
+struct Atom{T<:AbstractFloat}
     name::UInt32
-    atomic_number::Int8
+    number::Int8
     x::T
     y::T
     z::T
 end
 
-Atom(name::UInt32, atomic_number::Integer, x::T, y::T, z::T) where T = Atom{T}(name, atomic_number, x, y, z)
-Atom(name::AbstractString, element_symbol::AbstractString, coords::AbstractVector{<:AbstractFloat}) =
-    Atom(encode_atom_name(name, element_symbol), element_symbol_to_atomic_number(element_symbol), coords...)
+Atom{T}(atom::Atom) where T = Atom{T}(atom.name, atom.number, atom.x, atom.y, atom.z)
 
-coords(atom::Atom) = [atom.x, atom.y, atom.z]
+@inline Atom(name::UInt32, number::Integer, x::T, y::T, z::T) where T = Atom{T}(name, number, x, y, z)
+@inline Atom(name::AbstractString, element_symbol::AbstractString, x::T, y::T, z::T) where T =
+    Atom(encode_atom_name(name, element_symbol), element_symbol_to_number(element_symbol), x, y, z)
 
-Base.summary(atom::Atom) = "$(elements[atom.atomic_number].name) atom at [$(atom.x), $(atom.y), $(atom.z)])"
+@inline Atom(name, number, coords::AbstractVector{T}) where T = Atom(name, number, coords...)
 
-function offset!(atom::Atom, coords::Vector{<:Real})
-    @assert length(coords) == 3
-    atom.x += coords[1]
-    atom.y += coords[2]
-    atom.z += coords[3]
-    return atom
-end
+coords(atom::Atom) = SVector(atom.x, atom.y, atom.z)
+
+Base.summary(atom::Atom) = "$(elements[atom.number].name) atom at [$(atom.x), $(atom.y), $(atom.z)])"
+
+Base.show(io::IO, atom::Atom{T}) where T = print(io,
+    "Atom(\"$(decode_atom_name(atom.name))\", \"$(number_to_element_symbol(atom.number))\", $(coords(atom)))")
\ No newline at end of file
diff --git a/src/chain.jl b/src/chain.jl
index f2cfc18..38a69d7 100644
--- a/src/chain.jl
+++ b/src/chain.jl
@@ -1,58 +1,119 @@
 """
-    AbstractProteinChain{T<:AbstractFloat}
+    ProteinChain{T<:Real,Ps<:NamedProperties}
+"""
+struct ProteinChain{T<:Real,Ps<:NamedProperties}
+    id::String
+    atoms::Vector{Vector{Atom{T}}}
+    sequence::String
+    numbering::Vector{Int32}
+    properties::Ps
+end
 
-An abstract type representing a protein chain, with a type parameter
-`T` that specifies the floating-point type of the coordinates.
+function ProteinChain(
+    id, atoms::Vector{Vector{Atom{T}}}, sequence::String, numbering::Vector{<:Integer}, properties::Ps,
+) where {T,Ps<:NamedProperties}
+    len = length(atoms)
+    @assert sizeof(sequence) == len
+    @assert length(numbering) == len
+    for property in properties
+        property isa ResidueProperty && @assert size(property[], ndims(property[])) == len
+    end
+    ProteinChain{T,Ps}(id, atoms, sequence, Int32.(numbering), properties)
+end
 
-## Subtypes
-- `ProteinChain{T}`: A concrete subtype of `AbstractProteinChain` that represents a protein chain with a spe.
-"""
-abstract type AbstractProteinChain{T<:AbstractFloat} end
+ProteinChain(id, atoms, sequence, numbering) = ProteinChain(id, atoms, sequence, numbering, (;))
 
-Base.summary(chain::AbstractProteinChain) = "$(length(chain))-residue $(typeof(chain)) ($(chain.id))"
-Base.length(chain::AbstractProteinChain) = size(chain.backbone, 3)
+Base.:(==)(chain1::ProteinChain, chain2::ProteinChain) = propertynames(chain1) == propertynames(chain2) &&
+    !any(getproperty(chain1, name) != getproperty(chain2, name) for name in propertynames(chain1, false))
 
-mutable struct ProteinChain{T} <: AbstractProteinChain{T}
-    id::String
-    sequence::String
-    backbone::Array{T,3}
-    numbering::Vector{Int}
-    atoms::Vector{Vector{Atom{T}}}
+Base.length(chain::ProteinChain) = length(chain.atoms)
+
+Base.getproperty(chain::ProteinChain, name::Symbol) =
+    name in fieldnames(ProteinChain) ? getfield(chain, name) : getfield(getfield(chain, :properties), name)[]
+
+Base.propertynames(chain::ProteinChain, private::Bool=false) = (setdiff(fieldnames(ProteinChain), private ? () : (:properties,))..., propertynames(chain.properties)...)
+
+function Base.getindex(chain::ProteinChain, i::AbstractVector)
+    properties = map(p -> p[i], chain.properties)
+    ProteinChain(chain.id, chain.atoms[i], chain.sequence[i], chain.numbering[i], properties)
+end
+
+annotate(chain::ProteinChain; annotations...) =
+    ProteinChain(chain.id, chain.atoms, chain.sequence, chain.numbering, merge(chain.properties, annotations))
 
-    function ProteinChain(
-        id::AbstractString, sequence::AbstractString, backbone::AbstractArray{T,3},
-        numbering::AbstractVector{<:Integer} = collect(1:length(sequence)),
-        atoms::Vector{Vector{Atom{T}}} = [Atom{T}[] for _ in sequence],
-    ) where T
-        @assert length(sequence) == size(backbone, 3) == length(numbering) == length(atoms)
-        new{T}(id, sequence, backbone, numbering, atoms)
+Base.summary(chain::ProteinChain) = "$(length(chain))-residue $(typeof(chain)) ($(chain.id))"
+
+# wrap io with IOContext(io, :compact=>false) to make parseable
+function Base.show(io::IO, chain::ProteinChain)
+    print(io, "ProteinChain(")
+    for fieldname in fieldnames(ProteinChain)
+        show(io, getproperty(chain, fieldname))
+        fieldname == :properties || print(io, ", ")
     end
+    print(io, ")")
 end
 
-"""
-    ProteinChain{T<:AbstractFloat}
-"""
-ProteinChain
+function Base.show(io::IO, ::MIME"text/plain", chain::ProteinChain)
+    print(io, summary(chain))
+end
+
+function get_atoms(backbone_coords::Array{T,3}) where T
+    @assert size(backbone_coords)[1:2] == (3,3)
+    atoms = [Vector{Atom{T}}(undef, 3) for _ in 1:size(backbone_coords, 3)]
+    for (i, slice) in enumerate(eachslice(backbone_coords, dims=3))
+        for (j, pos) in enumerate(eachcol(slice))
+            atoms[i][j] = Atom(BACKBONE_ATOM_NAMES[j], BACKBONE_ATOM_SYMBOLS[j], pos)
+        end
+    end
+    return atoms
+end
 
-Base.getindex(chain::ProteinChain, i::AbstractVector{<:Integer}) =
-    ProteinChain(chain.id, chain.sequence[i], chain.backbone[:,:,i], chain.numbering[i], chain.atoms[i])
+get_atoms(backbone::Backbone) = get_atoms(reshape(backbone.coords, 3, 3, :))
+get_atoms(chain::ProteinChain) = chain.atoms
 
-import Backboner
+function get_backbone(atoms::Vector{Vector{Atom{T}}}) where T
+    backbone_coords = Array{T,3}(undef, 3, 3, length(atoms))
+    encoded_names = encode_atom_name.(BACKBONE_ATOM_NAMES, BACKBONE_ATOM_SYMBOLS)
+    for (i, residue_atoms) in enumerate(atoms)
+        for (j, name) in enumerate(encoded_names)
+            backbone_coords[:,j,i] = coords(argmax(atom -> atom.name == name, residue_atoms))
+        end
+    end
+    return backbone_coords
+end
+
+get_backbone(chain::ProteinChain) = hasproperty(chain, :backbone) ? chain.backbone : get_backbone(chain.atoms)
 
-Backboner.Backbone(chain::AbstractProteinChain) = Backbone(chain.backbone)
-Backboner.ChainedBonds(chain::AbstractProteinChain) = ChainedBonds(Backbone(chain))
-Backboner.Frames(chain::AbstractProteinChain, ideal_residue=STANDARD_RESIDUE) = Frames(Backbone(chain), ideal_residue)
+Backboner.Backbone(chain::ProteinChain) = Backbone(get_backbone(chain))
+Backboner.ChainedBonds(chain::ProteinChain) = ChainedBonds(Backbone(chain))
+Backboner.Frames(chain::ProteinChain, ideal_residue=STANDARD_RESIDUE) = Frames(Backbone(chain), ideal_residue)
 
-psi_angles(chain::AbstractProteinChain) = get_torsion_angles(Backbone(chain))[1:3:end]
-omega_angles(chain::AbstractProteinChain) = get_torsion_angles(Backbone(chain))[2:3:end]
-phi_angles(chain::AbstractProteinChain) = get_torsion_angles(Backbone(chain))[3:3:end]
+psi_angles(chain::ProteinChain) = get_torsion_angles(Backbone(chain))[1:3:end]
+omega_angles(chain::ProteinChain) = get_torsion_angles(Backbone(chain))[2:3:end]
+phi_angles(chain::ProteinChain) = get_torsion_angles(Backbone(chain))[3:3:end]
 
-function offset!(chain::AbstractProteinChain, coords::Vector{<:Real})
-    chain.backbone .+= coords
+function map_atoms!(f::Function, chain::ProteinChain, args...)
     for residue_atoms in chain.atoms
-        for atom in residue_atoms
-            offset!(atom, coords)
+        for i in eachindex(residue_atoms)
+            residue_atoms[i] = f(residue_atoms[i], args...)
         end
     end
     return chain
 end
+
+using AssigningSecondaryStructure: assign_secondary_structure
+
+calculate_property(x, name::Symbol, args...) = calculate_property(x, Val(name), args...)
+annotate(chain::ProteinChain, names::Vararg{Symbol}) = annotate(chain; NamedTuple{names}(calculate_property(chain, name) for name in names)...)
+
+calculate_property(chain::ProteinChain, ::Val{:ideal_residue}) = collect(STANDARD_RESIDUE) |> ChainProperty
+calculate_property(chain::ProteinChain, ::Val{:bond_lengths}) = Backboner.get_bond_lengths(Backboner.Backbone(get_backbone(chain))) |> ChainProperty
+calculate_property(chain::ProteinChain, ::Val{:bond_angles}) = Backboner.get_bond_angles(Backboner.Backbone(get_backbone(chain))) |> ChainProperty
+calculate_property(chain::ProteinChain, ::Val{:torsion_angles}) = Backboner.get_torsion_angles(Backboner.Backbone(get_backbone(chain))) |> ChainProperty
+calculate_property(chain::ProteinChain, ::Val{:is_knotted}) = Backboner.is_knotted(Backboner.Backbone(get_backbone(chain)[:,2,:])) |> ChainProperty
+
+calculate_property(chain::ProteinChain, ::Val{:backbone}) = get_backbone(chain) |> ResidueProperty
+calculate_property(chain::ProteinChain, ::Val{:secondary_structure}) = Int8.(assign_secondary_structure(get_backbone(chain))) |> ResidueProperty
+calculate_property(chain::ProteinChain, ::Val{:residue_rotations}) = Backboner.Frames(Backbone(get_backbone(chain)), hasproperty(chain, :ideal_residue) ? chain.ideal_residue : STANDARD_RESIDUE).rotations |> ResidueProperty
+calculate_property(chain::ProteinChain, ::Val{:residue_translations}) = dropdims(Backboner.centroid(get_backbone(chain); dims=2); dims=2) |> ResidueProperty
+calculate_property(chain::ProteinChain{T}, ::Val{:residue_torsion_angles}) where T = [reshape(calculate_property(chain, :torsion_angles)[], 3, :) fill(T(NaN), 3)] |> ResidueProperty
diff --git a/src/dataset.jl b/src/dataset.jl
new file mode 100644
index 0000000..e67dfc3
--- /dev/null
+++ b/src/dataset.jl
@@ -0,0 +1,13 @@
+"""
+    ProteinDataset <: AbstractDict{String,ProteinStructure}
+"""
+struct ProteinDataset <: AbstractDict{String,ProteinStructure}
+    dict::Dict{String,ProteinStructure}
+end
+
+ProteinDataset(structures::AbstractVector{<:ProteinStructure}) =
+    ProteinDataset(Dict(structure.name => structure for structure in structures))
+
+Base.length(d::ProteinDataset) = length(d.dict)
+Base.iterate(d::ProteinDataset, args...) = iterate(d.dict, args...)
+Base.:(==)(d1::ProteinDataset, d2::ProteinDataset) = d1.dict == d2.dict
diff --git a/src/ideal.jl b/src/ideal.jl
index 1af4c6a..e51109b 100644
--- a/src/ideal.jl
+++ b/src/ideal.jl
@@ -34,21 +34,21 @@ the N, Ca, and C atom positions of a residue positioned at the origin.
 struct IdealResidue{T<:AbstractFloat} <: AbstractMatrix{T}
     backbone_geometry::BackboneGeometry
     N_Ca_C_coords::Matrix{T}
+end
 
-    function IdealResidue{T}(backbone_geometry::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY; template=nothing) where T
-        N_Ca_C_coords = Matrix{T}(undef, 3, 3)
-        N_pos, Ca_pos, C_pos = eachcol(N_Ca_C_coords)
-        Θ = backbone_geometry.N_Ca_C_angle - π/2
-        N_pos .= [0, 0, 0]
-        Ca_pos .= [backbone_geometry.N_Ca_length, 0, 0]
-        C_pos .= Ca_pos + backbone_geometry.Ca_C_length * [sin(Θ), cos(Θ), 0]
-        N_Ca_C_coords .-= Backboner.centroid(N_Ca_C_coords; dims=2)
-        if !isnothing(template)
-            wanted_orientation, current_offset, wanted_offset = Backboner.kabsch_algorithm(N_Ca_C_coords, template)
-            N_Ca_C_coords .= wanted_orientation * (N_Ca_C_coords .- current_offset) .+ wanted_offset
-        end
-        new{T}(backbone_geometry, N_Ca_C_coords)
+function IdealResidue{T}(backbone_geometry::BackboneGeometry=DEFAULT_BACKBONE_GEOMETRY; template=nothing) where T
+    N_Ca_C_coords = Matrix{T}(undef, 3, 3)
+    N_pos, Ca_pos, C_pos = eachcol(N_Ca_C_coords)
+    Θ = backbone_geometry.N_Ca_C_angle - π/2
+    N_pos .= [0, 0, 0]
+    Ca_pos .= [backbone_geometry.N_Ca_length, 0, 0]
+    C_pos .= Ca_pos + backbone_geometry.Ca_C_length * [sin(Θ), cos(Θ), 0]
+    N_Ca_C_coords .-= Backboner.centroid(N_Ca_C_coords; dims=2)
+    if !isnothing(template)
+        wanted_orientation, current_offset, wanted_offset = Backboner.kabsch_algorithm(N_Ca_C_coords, template)
+        N_Ca_C_coords .= wanted_orientation * (N_Ca_C_coords .- current_offset) .+ wanted_offset
     end
+    IdealResidue{T}(backbone_geometry, N_Ca_C_coords)
 end
 
 Base.size(::IdealResidue) = (3,3)
@@ -65,7 +65,7 @@ Thus, we can use this residue as a template for aligning other residues with ver
 geometry to it.
 
 ```jldocotest
-julia> IdealResidue{Float64}(BackboneGeometry(N_Ca_C_angle = 1.93); template=ProteinChains.STANDARD_RESIDUE)
+julia> IdealResidue{Float64}(BackboneGeometry(N_Ca_C_angle = 1.93); template=ProteinChains.STANDARD_RESIDUE_TEMPLATE)
 3×3 IdealResidue{Float64}:
  -1.06447   -0.199174   1.26364
   0.646303  -0.529648  -0.116655
diff --git a/src/io/io.jl b/src/io/io.jl
index 3c53971..458c21f 100644
--- a/src/io/io.jl
+++ b/src/io/io.jl
@@ -5,7 +5,7 @@ const AMINOACIDS = Set("ACDEFGHIKLMNPQRSTVWY")
 const BACKBONE_ATOM_NAMES = ["N", "CA", "C"]
 const BACKBONE_ATOM_SYMBOLS = ["N", "C", "C"]
 
-const aa_to_threeletter = Dict{Char, String}([Char(v) => k for (k, v) in BioStructures.threeletter_to_aa])
+const aa_to_threeletter = Dict{Char,String}([Char(v) => k for (k, v) in BioStructures.threeletter_to_aa])
 threeletter_resname(aa::Char) = get(aa_to_threeletter, aa, "XXX")
 oneletter_resname(threeletter::AbstractString) = Char(get(BioStructures.threeletter_to_aa, threeletter, 'X'))
 oneletter_resname(residue::BioStructures.AbstractResidue) = oneletter_resname(BioStructures.resname(residue))
@@ -18,3 +18,5 @@ include("renumber.jl")
 include("read.jl")
 include("write.jl")
 include("download.jl")
+
+include("serialization.jl")
\ No newline at end of file
diff --git a/src/io/read.jl b/src/io/read.jl
index 6b3eb91..5718d29 100644
--- a/src/io/read.jl
+++ b/src/io/read.jl
@@ -10,20 +10,14 @@ get_sequence(residues::Vector{BioStructures.AbstractResidue}) = join(map(onelett
 get_atom(residue::BioStructures.AbstractResidue, name::AbstractString) =
     BioStructures.collectatoms(residue, a -> BioStructures.atomnameselector(a, [name])) |> only
 
-get_backbone(residue::BioStructures.AbstractResidue) =
-    stack(name -> BioStructures.coords(get_atom(residue, name)), BACKBONE_ATOM_NAMES)
-
-get_backbone(residues::Vector{BioStructures.AbstractResidue}) =
-    mapreduce(get_backbone, hcat, residues; init=Matrix{Float64}(undef, 3, 0))
-
 Atom(atom::BioStructures.Atom) = Atom(atom.name, atom.element, atom.coords)
 Atom(atom::BioStructures.DisorderedAtom) = Atom(BioStructures.defaultatom(atom))
 
-function get_nonbackbone_atoms(residues::Vector{BioStructures.AbstractResidue})
+function get_atoms(residues::Vector{BioStructures.AbstractResidue})
     atoms = Vector{Atom{Float64}}[]
     for residue in residues
         residue = residue isa BioStructures.DisorderedResidue ? BioStructures.defaultresidue(residue) : residue
-        residue_atoms = map(Atom, BioStructures.collectatoms(residue, !backbone_atom_selector))
+        residue_atoms = map(Atom, BioStructures.collectatoms(residue))
         push!(atoms, residue_atoms)
     end
     return atoms
@@ -31,30 +25,31 @@ end
 
 function ProteinChain(residues::Vector{BioStructures.AbstractResidue})
     id = only(unique(map(BioStructures.chainid, residues)))
+    atoms = get_atoms(residues)
     sequence = get_sequence(residues)
-    backbone = reshape(get_backbone(residues), 3, 3, :)
-    atoms = get_nonbackbone_atoms(residues)
     numbering = map(BioStructures.resnumber, residues)
-    return ProteinChain(id, sequence, backbone, numbering, atoms)
+    return ProteinChain(id, atoms, sequence, numbering)
 end
 
-function ProteinChain(chain::BioStructures.Chain, selector=backbone_residue_selector)
-    residues = BioStructures.collectresidues(chain, selector)
-    isempty(residues) && return ProteinChain(BioStructures.chainid(chain), "", zeros(3, 3, 0), Int[], Vector{Atom{Float64}}[])
+function ProteinChain(chain::BioStructures.Chain, backbone_residue_selector=backbone_residue_selector)
+    residues = BioStructures.collectresidues(chain, backbone_residue_selector)
+    isempty(residues) && return ProteinChain(BioStructures.chainid(chain), Vector{Atom{Float64}}[], "", Int[])
     return ProteinChain(residues)
 end
 
-function ProteinStructure(struc::BioStructures.MolecularStructure, selector=backbone_residue_selector; path=nothing)
-    chains = ProteinChain[]
+function ProteinStructure(struc::BioStructures.MolecularStructure, backbone_residue_selector=backbone_residue_selector; mmcifdict=nothing)
+    proteinchains = ProteinChain{Float64}[]
+    atoms = Atom.(BioStructures.collectatoms(BioStructures.collectresidues(struc, !backbone_residue_selector)))
     for model in struc, chain in model
-        push!(chains, ProteinChain(chain, selector))
+        push!(proteinchains, ProteinChain(chain, backbone_residue_selector))
     end
-    structure = ProteinStructure(struc.name, chains)
-    !isnothing(path) && endswith(path, ".cif") && renumber!(structure, BioStructures.MMCIFDict(path))
-    return structure
+    proteinstructure = ProteinStructure(struc.name, atoms, proteinchains)
+    mmcifdict isa BioStructures.MMCIFDict && renumber!(proteinstructure, mmcifdict)
+    return proteinstructure
 end
 
-Base.read(path::AbstractString, ::Type{ProteinStructure}, format::Type{<:ProteinFileFormat}) = ProteinStructure(read(path, format); path)
+Base.read(path::AbstractString, ::Type{ProteinStructure}, format::Type{MMCIFFormat}) = ProteinStructure(read(path, format); mmcifdict=BioStructures.MMCIFDict(path))
+Base.read(path::AbstractString, ::Type{ProteinStructure}, format::Type{<:ProteinFileFormat}) = ProteinStructure(read(path, format))
 Base.read(path::AbstractString, T::Type{ProteinStructure}) = read(path, T, get_format(path))
 
 readcif(path::AbstractString) = read(path, ProteinStructure, MMCIFFormat)
diff --git a/src/io/renumber.jl b/src/io/renumber.jl
index 33d389b..a4f3029 100644
--- a/src/io/renumber.jl
+++ b/src/io/renumber.jl
@@ -7,7 +7,6 @@ function renumber!(structure::ProteinStructure, mmcif_dict::BioStructures.MMCIFD
     label_seq_ids = mmcif_dict["_atom_site.label_seq_id"]
     auth_seq_ids = mmcif_dict["_atom_site.auth_seq_id"]
     auth_asym_ids = mmcif_dict["_atom_site.auth_asym_id"]
-
     reduced = Int[]
     current_auth_seq_id = ""
     for (i, auth_seq_id) in enumerate(auth_seq_ids)
@@ -31,7 +30,7 @@ function renumber!(structure::ProteinStructure, mmcif_dict::BioStructures.MMCIFD
         renum_dict = chainwise_auth_to_label_seq_ids[chain.id]
         numbering_str = map(n -> get(renum_dict, n, "?"), string.(chain.numbering))
         "?" in numbering_str && break
-        chain.numbering = parse.(Int, numbering_str)
+        chain.numbering .= parse.(Int, numbering_str)
     end
 
     return structure
diff --git a/src/io/serialization.jl b/src/io/serialization.jl
new file mode 100644
index 0000000..a0fcfa3
--- /dev/null
+++ b/src/io/serialization.jl
@@ -0,0 +1,98 @@
+import HDF5
+
+function Base.write(parent::Union{HDF5.File,HDF5.Group}, chain::ProteinChain{T}, path::AbstractString=chain.id) where T
+    chain_group = HDF5.create_group(parent, path)
+    HDF5.attributes(chain_group)["T"] = string(T)
+
+    chain_group["id"] = chain.id
+    chain_group["atom_chunk_sizes"] = map(UInt32∘length, chain.atoms)
+    chain_group["atoms_flattened"] = reduce(vcat, chain.atoms)
+    chain_group["sequence"] = chain.sequence
+    chain_group["numbering"] = map(UInt32, chain.numbering)
+
+    indexable = HDF5.create_group(chain_group, "indexable")
+    persistent = HDF5.create_group(chain_group, "persistent")
+    for (name, property) in pairs(chain.properties)
+        g = property isa ResidueProperty ? indexable : persistent
+        g[string(name)] = property[]
+    end
+
+    return parent
+end
+
+function Base.read(group::Union{Union{HDF5.File,HDF5.Group}}, ::Type{ProteinChain})
+    T = eval(Symbol(read(HDF5.attributes(group)["T"])))
+    id = read(group["id"])
+    atom_chunk_sizes = read(group["atom_chunk_sizes"])
+    atoms_flattened = [Atom(nt.name, nt.number, nt.x, nt.y, nt.z) for nt in read(group["atoms_flattened"])]
+    sequence = read(group["sequence"])
+    numbering = read(group["numbering"])
+
+    atoms = Vector{Atom{T}}[]
+    for (i, k) in zip(Iterators.accumulate(+, atom_chunk_sizes), atom_chunk_sizes)
+        residue_atoms = atoms_flattened[i-k+1:i]
+        push!(atoms, residue_atoms)
+    end
+
+    indexable = group["indexable"]
+    persistent = group["persistent"]
+    properties = merge(
+        NamedTuple((Symbol(key) => ResidueProperty(read(indexable[key])) for key in keys(group["indexable"]))),
+        NamedTuple((Symbol(key) => ChainProperty(read(persistent[key])) for key in keys(group["persistent"])))
+    )
+
+    ProteinChain(id, atoms, sequence, numbering, properties)
+end
+
+function Base.write(parent::Union{HDF5.File,HDF5.Group}, structure::ProteinStructure{T}, path::AbstractString=structure.name) where T
+    structure_group = HDF5.create_group(parent, path)
+    HDF5.attributes(structure_group)["T"] = string(T)
+
+    structure_group["name"] = structure.name
+    structure_group["atoms"] = structure.atoms
+    structure_group["numbering"] = structure.numbering
+
+    chains_group = HDF5.create_group(structure_group, "chains")
+
+    for (chain, number) in zip(structure.chains, structure.numbering)
+        write(chains_group, chain, string(number))
+    end
+
+    return parent
+end
+
+function Base.read(group::Union{Union{HDF5.File,HDF5.Group}}, ::Type{ProteinStructure})
+    T = eval(Symbol(read(HDF5.attributes(group)["T"])))
+    name = read(group["name"])
+    atoms = [Atom(nt.name, nt.number, nt.x, nt.y, nt.z) for nt in read(group["atoms"])]
+    numbering = read(group["numbering"])
+
+    chains_group = group["chains"]
+    chains = ProteinChain{T}[]
+    for i in numbering
+        chain = read(chains_group[string(i)], ProteinChain)
+        push!(chains, chain)
+    end
+
+    return ProteinStructure(name, atoms, chains, numbering)
+end
+
+function serialize(path::AbstractString, dataset::ProteinDataset; mode="w")
+    HDF5.h5open(path, mode) do f
+        foreach(((key, structure),) -> write(f, structure, key), dataset)
+    end
+    return path
+end
+
+serialize(path::AbstractString, structures::AbstractVector{<:ProteinStructure}; kwargs...) =
+    serialize(path, ProteinDataset(structures), kwargs...)
+
+function deserialize(path::AbstractString)
+    structures = ProteinStructure[]
+    HDF5.h5open(path, "r") do f
+        for key in keys(f)
+            push!(structures, read(f[key], ProteinStructure))
+        end
+    end
+    return ProteinDataset(structures)
+end
diff --git a/src/io/write.jl b/src/io/write.jl
index 3e198ab..ea2c413 100644
--- a/src/io/write.jl
+++ b/src/io/write.jl
@@ -1,6 +1,6 @@
 using LinearAlgebra: cross, normalize
 
-using AssigningSecondaryStructure: get_oxygen_positions
+_normalize(A::AbstractArray; dims=1) = mapslices(normalize, A; dims)
 
 function estimate_last_oxygen_position(backbone::Array{T,3}) where T<:Real
     N_pos, Ca_pos, C_pos = eachcol(backbone[:,:,end])
@@ -13,16 +13,26 @@ function estimate_last_oxygen_position(backbone::Array{T,3}) where T<:Real
     return O_pos
 end
 
-function BioStructures.Chain(proteinchain::AbstractProteinChain, model::BioStructures.Model)
+function get_oxygen_positions(backbone::Array{T,3}) where T<:Real
+    Ca_pos, C_pos, N_pos = backbone[:, 2, 1:end-1], backbone[:, 3, 1:end-1], backbone[:, 1, 2:end]
+    CaC_vec = C_pos - Ca_pos
+    NC_vec = C_pos - N_pos
+    CO_vec = T(1.23) * _normalize(_normalize(CaC_vec) + _normalize(NC_vec))
+    O_pos = C_pos + CO_vec
+    return [O_pos estimate_last_oxygen_position(backbone)]
+end
+
+function BioStructures.Chain(proteinchain::ProteinChain, model::BioStructures.Model)
     numbering = proteinchain.numbering
     residue_list = Vector{String}()
     residues = Dict{String, BioStructures.AbstractResidue}()
     chain = BioStructures.Chain(proteinchain.id, residue_list, residues, model)
 
-    # some visualization tools require oxygen atoms to be present, so these get used if a residue is missing the oxygen atom
+    # some secondary structure visualization tools require oxygen atoms to be present,
+    # so these get used if a residue is missing the oxygen atom
     oxygen_name = encode_atom_name("O", "O")
-    oxygen_coords = get_oxygen_positions(proteinchain.backbone)
-    oxygen_coords[:,end] = estimate_last_oxygen_position(proteinchain.backbone)
+    backbone_coords = get_backbone(proteinchain)
+    oxygen_coords = get_oxygen_positions(backbone_coords)
 
     atom_serial = 0
     for residue_index in 1:length(proteinchain)
@@ -32,9 +42,12 @@ function BioStructures.Chain(proteinchain::AbstractProteinChain, model::BioStruc
         number = numbering[residue_index]
         residue = BioStructures.Residue(resname, number, ' ', false, atom_list, atoms, chain, '-') # TODO: secondary structure
 
-        for (i, (name, element)) in enumerate(zip(BACKBONE_ATOM_NAMES, BACKBONE_ATOM_SYMBOLS))
+        # each residue has at least N, CA, C atoms
+        for atom in proteinchain.atoms[residue_index]
             atom_serial += 1
-            coords = proteinchain.backbone[:, i, residue_index]
+            name = decode_atom_name(atom.name)
+            coords = [atom.x, atom.y, atom.z]
+            element = number_to_element_symbol(atom.number)
             atom = BioStructures.Atom(atom_serial, name, ' ', coords, 1.0, 0.0, element, "  ", residue)
             push!(atom_list, atom.name)
             atoms[atom.name] = atom
@@ -44,19 +57,10 @@ function BioStructures.Chain(proteinchain::AbstractProteinChain, model::BioStruc
             atom_serial += 1
             coords = oxygen_coords[:, residue_index]
             atom = BioStructures.Atom(atom_serial, "O", ' ', coords, 1.0, 0.0, "O", "  ", residue)
-            push!(atom_list, atom.name)
+            insert!(atom_list, 4, atom.name)
             atoms[atom.name] = atom
         end
 
-        for atom in proteinchain.atoms[residue_index]
-            atom_serial += 1
-            name = decode_atom_name(atom.name)
-            coords = [atom.x, atom.y, atom.z]
-            element = atomic_number_to_element_symbol(atom.atomic_number)
-            atom = BioStructures.Atom(atom_serial, name, ' ', coords, 1.0, 0.0, element, "  ", residue)
-            push!(atom_list, atom.name)
-            atoms[atom.name] = atom
-        end
         key = string(residue.number)
         push!(residue_list, key)
         residues[key] = residue
@@ -85,15 +89,14 @@ function Base.write(path::AbstractString, proteinstruc::ProteinStructure, format
     else
         error("Unsupported format: $format")
     end
-    return nothing
 end
 
-function Base.write(path::AbstractString, proteinchains::AbstractVector{<:AbstractProteinChain}, format=get_format(path))
+function Base.write(path::AbstractString, proteinchains::AbstractVector{<:ProteinChain}, format=get_format(path))
     proteinstruc = ProteinStructure(basename(path), proteinchains)
     write(path, proteinstruc, format)
 end
 
-Base.write(path::AbstractString, proteinchain::AbstractProteinChain, format=get_format(path)) = write(path, [proteinchain], format)
+Base.write(path::AbstractString, proteinchain::ProteinChain, format=get_format(path)) = write(path, [proteinchain], format)
 
 writecif(path::AbstractString, x) = write(path, x, MMCIFFormat)
 writepdb(path::AbstractString, x) = write(path, x, PDBFormat)
diff --git a/src/properties.jl b/src/properties.jl
new file mode 100644
index 0000000..b2d326e
--- /dev/null
+++ b/src/properties.jl
@@ -0,0 +1,17 @@
+abstract type AbstractProperty end
+
+Base.getindex(p::AbstractProperty) = p.value
+
+struct ChainProperty{T} <: AbstractProperty
+    value::T
+end
+
+Base.getindex(prop::ChainProperty, ::AbstractVector) = prop
+
+struct ResidueProperty{T<:AbstractArray} <: AbstractProperty
+    value::T
+end
+
+Base.getindex(prop::ResidueProperty, i::AbstractVector) = selectdim(prop.value, ndims(prop.value), i) |> ResidueProperty
+
+const NamedProperties{names} = NamedTuple{names,<:Tuple{Vararg{AbstractProperty}}}
diff --git a/src/secondary-structure.jl b/src/secondary-structure.jl
deleted file mode 100644
index afdff1c..0000000
--- a/src/secondary-structure.jl
+++ /dev/null
@@ -1,13 +0,0 @@
-import AssigningSecondaryStructure: assign_secondary_structure!, assign_secondary_structure
-
-const SS_CODES = ['-', 'H', 'E']
-
-number_vector_to_codes(v::AbstractVector{<:Integer}) = SS_CODES[v]
-
-assign_secondary_structure(chain::AbstractProteinChain) = assign_secondary_structure(chain.backbone)
-
-function assign_secondary_structure!(chain::AnnotatedProteinChain)
-    number_vector = assign_secondary_structure(chain)
-    annotate_indexable!(chain; secondary_structure=number_vector_to_codes(number_vector))
-    return chain
-end
diff --git a/src/structure.jl b/src/structure.jl
index a4ee53d..0cd564b 100644
--- a/src/structure.jl
+++ b/src/structure.jl
@@ -1,20 +1,22 @@
 """
-    ProteinStructure <: AbstractVector{AbstractProteinChain}
-
-```jldoctest
-julia> pdb"1EYE"
-[ Info: Downloading file from PDB: 1EYE
-1-chain ProteinStructure "1EYE.cif"
- 253-residue AnnotatedProteinChain{Float64} (A)
-```
+    ProteinStructure{T} <: AbstractVector{ProteinChain{T}}
 """
-mutable struct ProteinStructure <: AbstractVector{AbstractProteinChain}
+struct ProteinStructure{T} <: AbstractVector{ProteinChain{T}}
     name::String
-    chains::Vector{AbstractProteinChain}
+    atoms::Vector{Atom{T}}
+    chains::Vector{<:ProteinChain{T}}
+    numbering::Vector{Int}
 end
 
+ProteinStructure(name, atoms, chains) = ProteinStructure(name, atoms, chains, collect(1:length(chains)))
+
 Base.size(structure::ProteinStructure) = (length(structure.chains),)
-Base.getindex(structure::ProteinStructure, i) = structure.chains[i]
+
+Base.getindex(structure::ProteinStructure, i::Integer) = structure.chains[i]
+
+function Base.getindex(structure::ProteinStructure, i::AbstractVector)
+    ProteinStructure(structure.name, structure.atoms, structure.chains[i], structure.numbering[i])
+end
 
 Base.getindex(structure::ProteinStructure, id::AbstractString) = structure[findfirst(c -> c.id == id, structure.chains)]
 
@@ -27,9 +29,15 @@ function Base.show(io::IO, ::MIME"text/plain", structure::ProteinStructure)
     end
 end
 
-function offset!(structure::ProteinStructure, coords::Vector{<:Real})
+function map_atoms!(f::Function, structure::ProteinStructure, args...)
     for chain in structure
-        offset!(chain, coords)
+        map_atoms!(f, chain, args...)
+    end
+    for i in eachindex(structure.atoms)
+        structure.atoms[i] = f(structure.atoms[i], args...)
     end
     return structure
-end 
\ No newline at end of file
+end
+
+annotate(structure::ProteinStructure, names::Vararg{Symbol}) =
+    ProteinStructure(structure.name, structure.atoms, annotate.(structure.chains, names...), structure.numbering)
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 33017ec..6da8ae4 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,38 +4,24 @@ using Test
 @testset "ProteinChains.jl" begin
 
     @testset "ProteinChain" begin
-        chain = ProteinChain("A", "AMINO", rand(3, 3, 5), collect(1:5), [ProteinChains.Atom{Float64}[] for _ in 1:5])
+        chain = ProteinChain("A", get_atoms(ProteinChains.Backbone(rand(3, 3, 5))), "AMINO", collect(1:5))
         @test length(chain) == 5
-        @test summary(chain) == "5-residue ProteinChain{Float64} (A)"
-
-        i = 5:-1:1
-        @test chain[i].backbone == chain.backbone[:,:,i]
     end
 
     @testset "ProteinStructure" begin
-        chain = ProteinChain("A", "AMINO", rand(3, 3, 5), collect(1:5), [ProteinChains.Atom{Float64}[] for _ in 1:5])
-        structure = ProteinStructure("1CHN", [chain])
+        chain = ProteinChain("A", get_atoms(ProteinChains.Backbone(rand(3, 3, 5))), "AMINO", collect(1:5))
+        structure = ProteinStructure("1CHN", Atom{Float64}[], [chain])
         @test structure[1] === chain
         @test structure["A"] === chain
     end
 
-    @testset "AnnotatedProteinChain" begin
-        chain = ProteinChain("A", "AMINO", rand(3, 3, 5), collect(1:5), [ProteinChains.Atom{Float64}[] for _ in 1:5])
-        annotated_chain = annotate(chain; modelnum=1)
-        @test annotated_chain isa AnnotatedProteinChain
-        @test annotated_chain.modelnum == 1
-        annotate_indexable!(annotated_chain; secondary_structure=assign_secondary_structure(annotated_chain))
-        @test length(annotated_chain.secondary_structure) == 5
-        @test length(annotated_chain[1:3].secondary_structure) == 3
-    end
-
     @testset "read/write" begin
 
         @testset "read" begin
             chains_pdb = pdbentry("1ASS"; format=PDBFormat)
             @test length.(collect(chains_pdb)) == [152]
             chains_cif = pdbentry("1ASS"; format=MMCIFFormat)
-            @test chains_pdb[1].backbone == chains_cif[1].backbone
+            @test chains_pdb[1].sequence == chains_cif[1].sequence
         end
 
         @testset "write" begin
@@ -45,9 +31,21 @@ using Test
                 write(temp_path, chains)
                 read(temp_path, ProteinStructure)
             end
-            @test chains[1].backbone == new_chains[1].backbone
+            @test chains[1].sequence == new_chains[1].sequence
         end
 
     end
 
+    @testset "serialization" begin
+        mktempdir() do dir
+            filename = "dataset.h5"
+            path = joinpath(dir, filename)
+            dataset1 = ProteinDataset([annotate(pdb"1EYE", :backbone)])
+            ProteinChains.serialize(path, dataset1)
+            dataset2 = ProteinChains.deserialize(path)
+            @test dataset1 == dataset2
+            @test hasproperty(first(values(dataset1))["A"], :backbone)
+        end
+    end
+
 end