diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 49bc892e5d..7666d2933d 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1 @@
-* @nadove-ucsc
+* @hannes-ucsc
diff --git a/.github/pull_request_template.md.template.py b/.github/pull_request_template.md.template.py
index 61a2d9ac95..eb28c95dc8 100644
--- a/.github/pull_request_template.md.template.py
+++ b/.github/pull_request_template.md.template.py
@@ -33,6 +33,7 @@
     OrderedSet,
 )
 from azul.strings import (
+    back_quote as bq,
     join_grammatically,
 )
 from azul.template import (
@@ -225,10 +226,6 @@ def shared_deploy_target(self, target_branch: str) -> str:
         return 'apply' + iif(self.shared_deploy_is_two_phase(target_branch), '_keep_unused')
 
 
-def bq(s):
-    return '`' + s + '`'
-
-
 def main():
     path = Path(sys.argv[1])
     for t in T:
diff --git a/README.md b/README.md
index f4f0a9ea08..0261e39932 100644
--- a/README.md
+++ b/README.md
@@ -1819,14 +1819,14 @@ the private key and can always be regenerated again later using `make config`.
 
 ### 9.1.2 Ensuring split tunnel on client
 
-It is important that you configure the client to only route VPC traffic
-through the VPN. The VPN server will not forward any other traffic, in what's
-commonly referred to as a *split tunnel*. The key indicator of a split tunnel
-is that it doesn't set up a default route on the client system. There will
-only be a route to the private 172.… subnet of the GitLab VPC but the default
-route remains in place. If you configure the VPN connection to set up a
-default route, your Internet access will be severed as soon as you establish
-the VPN connection. 
+Except on stable deployments, you should configure the client to only route VPC
+traffic through the VPN. The VPN server will not forward any other traffic, in
+what's commonly referred to as a *split tunnel*. The key indicator of a split
+tunnel is that it doesn't set up a default route on the client system. There
+will only be a route to the private 172.… subnet of the GitLab VPC but the
+default route remains in place.
+
+On stable deployments, split tunnels are prohibited.
 
 The `make config` step prints instruction on how to configure a split tunnel
 on Ubuntu. 
diff --git a/deployments/prod/environment.py b/deployments/prod/environment.py
index 708536467d..297961d7bf 100644
--- a/deployments/prod/environment.py
+++ b/deployments/prod/environment.py
@@ -1126,6 +1126,21 @@ def mkdict(previous_catalog: dict[str, str],
     # @formatter:on
 ]))
 
+dcp43_sources = mkdict(dcp42_sources, 476, mkdelta([
+    # @formatter:off
+    mksrc('bigquery', 'datarepo-ac7cee91', 'hca_prod_087efc3c26014de6bbe90114593050d1__20241004_dcp2_20241007_dcp43'),
+    mksrc('bigquery', 'datarepo-e9df1043', 'hca_prod_248c5dc36b754fb4ad8acc771968483f__20240806_dcp2_20241007_dcp43'),
+    mksrc('bigquery', 'datarepo-65c49269', 'hca_prod_2ef3655a973d4d699b4121fa4041eed7__20220111_dcp2_20241004_dcp43'),
+    mksrc('bigquery', 'datarepo-456691e5', 'hca_prod_3627473eb6d645c987b5b9f12ce57a10__20241004_dcp2_20241007_dcp43'),
+    mksrc('bigquery', 'datarepo-c577eed5', 'hca_prod_7f351a4cd24c4fcd9040f79071b097d0__20220906_dcp2_20241004_dcp43'),
+    mksrc('bigquery', 'datarepo-1dbd3c50', 'hca_prod_ae9f439bbd474d6ebd7232dc70b35d97__20241004_dcp2_20241004_dcp43', ma),  # noqa E501
+    mksrc('bigquery', 'datarepo-21d1f89b', 'hca_prod_b39381584e8d4fdb9e139e94270dde16__20241004_dcp2_20241004_dcp43'),
+    mksrc('bigquery', 'datarepo-550c8f98', 'hca_prod_c3dd819dabab4957b20988f1e0900368__20241004_dcp2_20241004_dcp43'),
+    mksrc('bigquery', 'datarepo-06a00830', 'hca_prod_c5ca43aa3b2b42168eb3f57adcbc99a1__20220118_dcp2_20241004_dcp43'),
+    mksrc('bigquery', 'datarepo-55151ed4', 'hca_prod_cdabcf0b76024abf9afb3b410e545703__20230201_dcp2_20241008_dcp43')
+    # @formatter:on
+]))
+
 pilot1_sources = mkdict({}, 4, mkdelta([
     # @formatter:off
     mksrc('bigquery', 'datarepo-11e4dc06', 'hca_prod_59b3bfd9cf454d538c8ee240273cba71__20240410_dcp2_20240410_dcpPilot'), # noqa E501
@@ -1171,6 +1186,17 @@ def mkdict(previous_catalog: dict[str, str],
     mksrc('bigquery', 'datarepo-43814140', 'lungmap_prod_fdadee7e209745d5bf81cc280bd8348e__20240206_20240626_lm7')
 ]))
 
+lm8_sources = mkdict(lm7_sources, 12, mkdelta([
+    mksrc('bigquery', 'datarepo-2b15227b', 'lungmap_prod_1977dc4784144263a8706b0f207d8ab3__20240206_20241002_lm8'),
+    mksrc('bigquery', 'datarepo-c9158593', 'lungmap_prod_20037472ea1d4ddb9cd356a11a6f0f76__20220307_20241002_lm8'),
+    mksrc('bigquery', 'datarepo-35a6d7ca', 'lungmap_prod_3a02d15f9c6a4ef7852b4ddec733b70b__20241001_20241002_lm8'),
+    mksrc('bigquery', 'datarepo-131a1234', 'lungmap_prod_4ae8c5c91520437198276935661f6c84__20231004_20241002_lm8'),
+    mksrc('bigquery', 'datarepo-3377446f', 'lungmap_prod_6135382f487d4adb9cf84d6634125b68__20230207_20241002_lm8'),
+    mksrc('bigquery', 'datarepo-3c4905d2', 'lungmap_prod_834e0d1671b64425a8ab022b5000961c__20241001_20241002_lm8'),
+    mksrc('bigquery', 'datarepo-d7447983', 'lungmap_prod_f899709cae2c4bb988f0131142e6c7ec__20220310_20241002_lm8'),
+    mksrc('bigquery', 'datarepo-c11ef363', 'lungmap_prod_fdadee7e209745d5bf81cc280bd8348e__20240206_20241002_lm8'),
+]))
+
 
 def env() -> Mapping[str, Optional[str]]:
     """
@@ -1211,8 +1237,10 @@ def env() -> Mapping[str, Optional[str]]:
                                        sources=mklist(sources))
             for atlas, catalog, sources in [
                 ('hca', 'dcp42', dcp42_sources),
+                ('hca', 'dcp43', dcp43_sources),
                 ('hca', 'pilot1', pilot1_sources),
-                ('lungmap', 'lm7', lm7_sources)
+                ('lungmap', 'lm7', lm7_sources),
+                ('lungmap', 'lm8', lm8_sources)
             ] for suffix, internal in [
                 ('', False),
                 ('-it', True)
diff --git a/scripts/convert_environment.py b/scripts/convert_environment.py
index 65adeee8c0..1e330cd27b 100644
--- a/scripts/convert_environment.py
+++ b/scripts/convert_environment.py
@@ -23,6 +23,9 @@
 from azul.files import (
     write_file_atomically,
 )
+from azul.strings import (
+    single_quote as sq,
+)
 
 
 class Variable(NamedTuple):
@@ -144,7 +147,7 @@ def sub(m: re.Match):
                 return '{{' + m[1] + '}}'
 
         value = re.sub(r'\$?{([^}]+)}|\$([_A-Za-z][_A-Za-z0-9]*)', sub, value)
-        return f"'{value}'"
+        return sq(value)
 
 
 if __name__ == '__main__':
diff --git a/src/azul/bigquery.py b/src/azul/bigquery.py
index aa6f166a54..c95179e6f8 100644
--- a/src/azul/bigquery.py
+++ b/src/azul/bigquery.py
@@ -10,8 +10,8 @@
     Union,
 )
 
-from azul import (
-    require,
+from azul.strings import (
+    back_quote as bq,
 )
 
 BigQueryValue = Union[int, float, bool, str, bytes, datetime, None]
@@ -42,10 +42,9 @@ def backtick(table_name: str) -> str:
     >>> backtick('foo-2.bar`s.my_table')
     Traceback (most recent call last):
     ...
-    azul.RequirementError: foo-2.bar`s.my_table
+    azul.RequirementError: ('`', 'must not occur in', 'foo-2.bar`s.my_table')
     """
     if table_name_re.fullmatch(table_name):
         return table_name
     else:
-        require('`' not in table_name, table_name)
-        return f'`{table_name}`'
+        return bq(table_name)
diff --git a/src/azul/chalice.py b/src/azul/chalice.py
index f7ad22b12c..0405133ff2 100644
--- a/src/azul/chalice.py
+++ b/src/azul/chalice.py
@@ -61,6 +61,10 @@
     copy_json,
     json_head,
 )
+from azul.strings import (
+    join_words as jw,
+    single_quote as sq,
+)
 from azul.types import (
     JSON,
     LambdaContext,
@@ -189,15 +193,33 @@ def _api_gateway_context_middleware(self, event, get_response):
         finally:
             config.lambda_is_handling_api_gateway_request = False
 
+    hsts_max_age = 60 * 60 * 24 * 365 * 2
+
+    # Headers added to every response from the app, as well as canned 4XX and
+    # 5XX responses from API Gateway. Use of these headers addresses known
+    # security vulnerabilities.
+    #
+    security_headers = {
+        'Content-Security-Policy': jw('default-src', sq('self')),
+        'Referrer-Policy': 'strict-origin-when-cross-origin',
+        'Strict-Transport-Security': jw(f'max-age={hsts_max_age};',
+                                        'includeSubDomains;',
+                                        'preload'),
+        'X-Content-Type-Options': 'nosniff',
+        'X-Frame-Options': 'DENY',
+        'X-XSS-Protection': '1; mode=block'
+    }
+
     def _security_headers_middleware(self, event, get_response):
         """
         Add headers to the response
         """
         response = get_response(event)
-        seconds = 60 * 60 * 24 * 365
-        response.headers['Strict-Transport-Security'] = f'max-age={seconds}; includeSubDomains'
-        response.headers['X-Content-Type-Options'] = 'nosniff'
-        response.headers['X-Frame-Options'] = 'DENY'
+        response.headers.update(self.security_headers)
+        # FIXME: Add a CSP header with a nonce value to text/html responses
+        #        https://github.com/DataBiosphere/azul-private/issues/6
+        if response.headers.get('Content-Type') == 'text/html':
+            del response.headers['Content-Security-Policy']
         view_function = self.routes[event.path][event.method].view_function
         cache_control = getattr(view_function, 'cache_control')
         response.headers['Cache-Control'] = cache_control
diff --git a/src/azul/collections.py b/src/azul/collections.py
index d996297a9d..9ddc64629a 100644
--- a/src/azul/collections.py
+++ b/src/azul/collections.py
@@ -131,6 +131,17 @@ def explode_dict(d: Mapping[K, Union[V, list[V], set[V], tuple[V]]]
         yield dict(zip(d.keys(), t))
 
 
+def none_safe_apply(f: Callable[[K], V], o: K | None) -> V | None:
+    """
+    >>> none_safe_apply(str, 123)
+    '123'
+
+    >>> none_safe_apply(str, None) is None
+    True
+    """
+    return None if o is None else f(o)
+
+
 def none_safe_key(none_last: bool = False) -> Callable[[Any], Any]:
     """
     Returns a sort key that handles None values.
@@ -270,6 +281,10 @@ def adict(seq: Union[Mapping[K, V], Iterable[tuple[K, V]]] = None,
     return kwargs if seq is None else dict(seq, **kwargs)
 
 
+def _athing(cls: type, *args):
+    return cls(arg for arg in args if arg is not None)
+
+
 def atuple(*args: V) -> tuple[V, ...]:
     """
     >>> atuple()
@@ -281,7 +296,7 @@ def atuple(*args: V) -> tuple[V, ...]:
     >>> atuple(0, None)
     (0,)
     """
-    return tuple(arg for arg in args if arg is not None)
+    return _athing(tuple, *args)
 
 
 def alist(*args: V) -> list[V]:
@@ -295,7 +310,21 @@ def alist(*args: V) -> list[V]:
     >>> alist(0, None)
     [0]
     """
-    return list(arg for arg in args if arg is not None)
+    return _athing(list, *args)
+
+
+def aset(*args: V) -> set[V]:
+    """
+    >>> aset()
+    set()
+
+    >>> aset(None)
+    set()
+
+    >>> aset(0, None)
+    {0}
+    """
+    return _athing(set, *args)
 
 
 class NestedDict(defaultdict):
diff --git a/src/azul/indexer/index_service.py b/src/azul/indexer/index_service.py
index 8fc5140e61..ab9571afdc 100644
--- a/src/azul/indexer/index_service.py
+++ b/src/azul/indexer/index_service.py
@@ -38,7 +38,6 @@
 from more_itertools import (
     first,
     one,
-    unzip,
 )
 
 from azul import (
@@ -303,19 +302,21 @@ def transform(self,
             log.info('Transforming %i entities in partition %s of bundle %s, version %s.',
                      num_entities, partition, bundle.uuid, bundle.version)
             contributions = []
-            replicas = []
+            replicas_by_coords = {}
             for transformer in transformers:
-                # The cast is necessary because unzip()'s type stub doesn't
-                # support heterogeneous tuples.
-                transforms = cast(
-                    tuple[Iterable[Optional[Contribution]], Iterable[Optional[Replica]]],
-                    unzip(transformer.transform(partition))
-                )
-                if transforms:
-                    contributions_part, replicas_part = transforms
-                    contributions.extend(filter(None, contributions_part))
-                    replicas.extend(filter(None, replicas_part))
-            return contributions, replicas
+                for document in transformer.transform(partition):
+                    if isinstance(document, Contribution):
+                        contributions.append(document)
+                    elif isinstance(document, Replica):
+                        try:
+                            dup = replicas_by_coords[document.coordinates]
+                        except KeyError:
+                            replicas_by_coords[document.coordinates] = document
+                        else:
+                            dup.hub_ids.extend(document.hub_ids)
+                    else:
+                        assert False, document
+            return contributions, list(replicas_by_coords.values())
 
     def create_indices(self, catalog: CatalogName):
         es_client = ESClientFactory.get()
diff --git a/src/azul/indexer/transform.py b/src/azul/indexer/transform.py
index cd8ab6fae1..4f6efb7f50 100644
--- a/src/azul/indexer/transform.py
+++ b/src/azul/indexer/transform.py
@@ -11,6 +11,9 @@
 
 import attr
 
+from azul.collections import (
+    alist,
+)
 from azul.indexer import (
     Bundle,
     BundleFQID,
@@ -34,11 +37,8 @@
 )
 from azul.types import (
     JSON,
-    MutableJSON,
 )
 
-Transform = tuple[Optional[Contribution], Optional[Replica]]
-
 
 @attr.s(frozen=True, kw_only=True, auto_attribs=True)
 class Transformer(metaclass=ABCMeta):
@@ -55,12 +55,14 @@ def entity_type(cls) -> EntityType:
         raise NotImplementedError
 
     @abstractmethod
-    def replica_type(self, entity: EntityReference) -> str:
+    def _replicate(self, entity: EntityReference) -> tuple[str, JSON]:
         """
-        The name of the type of replica emitted by this transformer for a given
-        entity.
+        A tuple consisting of:
+
+            1. The name of the type of replica emitted by this transformer for a
+               given entity. See :py:attr:`Replica.replica_type`.
 
-        See :py:attr:`Replica.replica_type`
+            2. The contents of the replica for that entity.
         """
         raise NotImplementedError
 
@@ -88,7 +90,9 @@ def estimate(self, partition: BundlePartition) -> int:
         """
 
     @abstractmethod
-    def transform(self, partition: BundlePartition) -> Iterable[Transform]:
+    def transform(self,
+                  partition: BundlePartition
+                  ) -> Iterable[Contribution | Replica]:
         """
         Return the contributions by the current bundle to the entities it
         contains metadata about. More than one bundle can contribute to a
@@ -114,9 +118,10 @@ def aggregator(cls, entity_type: EntityType) -> Optional[EntityAggregator]:
         raise NotImplementedError
 
     def _contribution(self,
-                      contents: MutableJSON,
-                      entity: EntityReference
+                      contents: JSON,
+                      entity_id: EntityID
                       ) -> Contribution:
+        entity = EntityReference(entity_type=self.entity_type(), entity_id=entity_id)
         coordinates = ContributionCoordinates(entity=entity,
                                               bundle=self.bundle.fqid.upcast(),
                                               deleted=self.deleted)
@@ -126,17 +131,20 @@ def _contribution(self,
                             contents=contents)
 
     def _replica(self,
-                 contents: MutableJSON,
                  entity: EntityReference,
-                 hub_ids: list[EntityID]
+                 *,
+                 file_hub: EntityID | None,
                  ) -> Replica:
+        replica_type, contents = self._replicate(entity)
         coordinates = ReplicaCoordinates(content_hash=json_hash(contents).hexdigest(),
                                          entity=entity)
         return Replica(coordinates=coordinates,
                        version=None,
-                       replica_type=self.replica_type(entity),
+                       replica_type=replica_type,
                        contents=contents,
-                       hub_ids=hub_ids)
+                       # The other hubs will be added when the indexer
+                       # consolidates duplicate replicas.
+                       hub_ids=alist(file_hub))
 
     @classmethod
     @abstractmethod
diff --git a/src/azul/plugins/metadata/anvil/__init__.py b/src/azul/plugins/metadata/anvil/__init__.py
index 32afeb30a1..fe0c0203af 100644
--- a/src/azul/plugins/metadata/anvil/__init__.py
+++ b/src/azul/plugins/metadata/anvil/__init__.py
@@ -36,7 +36,6 @@
     BiosampleTransformer,
     BundleTransformer,
     DatasetTransformer,
-    DiagnosisTransformer,
     DonorTransformer,
     FileTransformer,
 )
@@ -96,7 +95,6 @@ def transformer_types(self) -> Iterable[type[BaseTransformer]]:
             BiosampleTransformer,
             BundleTransformer,
             DatasetTransformer,
-            DiagnosisTransformer,
             DonorTransformer,
             FileTransformer,
         )
diff --git a/src/azul/plugins/metadata/anvil/bundle.py b/src/azul/plugins/metadata/anvil/bundle.py
index ae25f4b423..e288018574 100644
--- a/src/azul/plugins/metadata/anvil/bundle.py
+++ b/src/azul/plugins/metadata/anvil/bundle.py
@@ -1,21 +1,26 @@
 from abc import (
     ABC,
 )
+from collections import (
+    defaultdict,
+)
 from typing import (
     AbstractSet,
     Generic,
-    Iterable,
+    Mapping,
+    Self,
     TypeVar,
 )
 
 import attrs
-from more_itertools import (
-    one,
-)
 
 from azul import (
     CatalogName,
 )
+from azul.collections import (
+    aset,
+    none_safe_apply,
+)
 from azul.indexer import (
     BUNDLE_FQID,
     Bundle,
@@ -43,50 +48,76 @@ class KeyReference:
     entity_type: EntityType
 
 
-ENTITY_REF = TypeVar('ENTITY_REF', bound=EntityReference | KeyReference)
+REF = TypeVar('REF', bound=EntityReference | KeyReference)
 
 
 @attrs.frozen(kw_only=True, order=False)
-class Link(Generic[ENTITY_REF]):
-    inputs: AbstractSet[ENTITY_REF] = attrs.field(factory=frozenset,
-                                                  converter=frozenset)
+class Link(Generic[REF]):
+    inputs: AbstractSet[REF] = attrs.field(factory=frozenset,
+                                           converter=frozenset)
 
-    activity: ENTITY_REF | None = attrs.field(default=None)
+    activity: REF | None = attrs.field(default=None)
 
-    outputs: AbstractSet[ENTITY_REF] = attrs.field(factory=frozenset,
-                                                   converter=frozenset)
+    outputs: AbstractSet[REF] = attrs.field(factory=frozenset,
+                                            converter=frozenset)
 
     @property
-    def all_entities(self) -> AbstractSet[ENTITY_REF]:
-        return self.inputs | self.outputs | (set() if self.activity is None else {self.activity})
+    def all_entities(self) -> AbstractSet[REF]:
+        return self.inputs | self.outputs | aset(self.activity)
 
     @classmethod
-    def from_json(cls, link: JSON) -> 'Link':
+    def from_json(cls, link: JSON) -> Self:
         return cls(inputs=set(map(EntityReference.parse, link['inputs'])),
-                   activity=None if link['activity'] is None else EntityReference.parse(link['activity']),
+                   activity=none_safe_apply(EntityReference.parse, link['activity']),
                    outputs=set(map(EntityReference.parse, link['outputs'])))
 
     def to_json(self) -> MutableJSON:
         return {
             'inputs': sorted(map(str, self.inputs)),
-            'activity': None if self.activity is None else str(self.activity),
+            'activity': none_safe_apply(str, self.activity),
             'outputs': sorted(map(str, self.outputs))
         }
 
     @classmethod
-    def merge(cls, links: Iterable['Link']) -> 'Link':
-        return cls(inputs=frozenset.union(*[link.inputs for link in links]),
-                   activity=one({link.activity for link in links}),
-                   outputs=frozenset.union(*[link.outputs for link in links]))
-
-    def __lt__(self, other: 'Link') -> bool:
+    def group_by_activity(cls, links: set[Self]):
+        """
+        Merge links that share the same (non-null) activity.
+        """
+        groups_by_activity: Mapping[KeyReference, set[Self]] = defaultdict(set)
+        for link in links:
+            if link.activity is not None:
+                groups_by_activity[link.activity].add(link)
+        for activity, group in groups_by_activity.items():
+            if len(group) > 1:
+                links -= group
+                merged_link = cls(inputs=frozenset.union(*[link.inputs for link in group]),
+                                  activity=activity,
+                                  outputs=frozenset.union(*[link.outputs for link in group]))
+                links.add(merged_link)
+
+    def __lt__(self, other: Self) -> bool:
         return min(self.inputs) < min(other.inputs)
 
 
+class EntityLink(Link[EntityReference]):
+    pass
+
+
+class KeyLink(Link[KeyReference]):
+
+    def to_entity_link(self,
+                       entities_by_key: Mapping[KeyReference, EntityReference]
+                       ) -> EntityLink:
+        lookup = entities_by_key.__getitem__
+        return EntityLink(inputs=set(map(lookup, self.inputs)),
+                          activity=none_safe_apply(lookup, self.activity),
+                          outputs=set(map(lookup, self.outputs)))
+
+
 @attrs.define(kw_only=True)
 class AnvilBundle(Bundle[BUNDLE_FQID], ABC):
     entities: dict[EntityReference, MutableJSON] = attrs.field(factory=dict)
-    links: set[Link[EntityReference]] = attrs.field(factory=set)
+    links: set[EntityLink] = attrs.field(factory=set)
 
     def reject_joiner(self, catalog: CatalogName):
         # FIXME: Optimize joiner rejection and re-enable it for AnVIL
@@ -103,12 +134,12 @@ def to_json(self) -> MutableJSON:
         }
 
     @classmethod
-    def from_json(cls, fqid: BUNDLE_FQID, json_: JSON) -> 'AnvilBundle':
+    def from_json(cls, fqid: BUNDLE_FQID, json_: JSON) -> Self:
         return cls(
             fqid=fqid,
             entities={
                 EntityReference.parse(entity_ref): entity
                 for entity_ref, entity in json_['entities'].items()
             },
-            links=set(map(Link.from_json, json_['links']))
+            links=set(map(EntityLink.from_json, json_['links']))
         )
diff --git a/src/azul/plugins/metadata/anvil/indexer/transform.py b/src/azul/plugins/metadata/anvil/indexer/transform.py
index bb6b954ff8..20107fb91d 100644
--- a/src/azul/plugins/metadata/anvil/indexer/transform.py
+++ b/src/azul/plugins/metadata/anvil/indexer/transform.py
@@ -22,6 +22,7 @@
     Callable,
     Collection,
     Iterable,
+    Self,
 )
 from uuid import (
     UUID,
@@ -53,6 +54,7 @@
     EntityReference,
     EntityType,
     FieldTypes,
+    Replica,
     null_bool,
     null_int,
     null_str,
@@ -60,12 +62,11 @@
     pass_thru_json,
 )
 from azul.indexer.transform import (
-    Transform,
     Transformer,
 )
 from azul.plugins.metadata.anvil.bundle import (
     AnvilBundle,
-    Link,
+    EntityLink,
 )
 from azul.plugins.metadata.anvil.indexer.aggregate import (
     ActivityAggregator,
@@ -97,11 +98,17 @@ class LinkedEntities:
     def __getitem__(self, item: EntityType) -> set[EntityReference]:
         return self.ancestors[item] | self.descendants[item]
 
+    def __iter__(self) -> Iterable[EntityReference]:
+        for entities in self.ancestors.values():
+            yield from entities
+        for entities in self.descendants.values():
+            yield from entities
+
     @classmethod
     def from_links(cls,
                    origin: EntityReference,
-                   links: Collection[Link[EntityReference]]
-                   ) -> 'LinkedEntities':
+                   links: Collection[EntityLink]
+                   ) -> Self:
         return cls(origin=origin,
                    ancestors=cls._search(origin, links, from_='outputs', to='inputs'),
                    descendants=cls._search(origin, links, from_='inputs', to='outputs'))
@@ -109,7 +116,7 @@ def from_links(cls,
     @classmethod
     def _search(cls,
                 entity_ref: EntityReference,
-                links: Collection[Link[EntityReference]],
+                links: Collection[EntityLink],
                 entities: EntityRefsByType | None = None,
                 *,
                 from_: str,
@@ -133,9 +140,6 @@ def _search(cls,
 class BaseTransformer(Transformer, metaclass=ABCMeta):
     bundle: AnvilBundle
 
-    def replica_type(self, entity: EntityReference) -> str:
-        return f'anvil_{entity.entity_type}'
-
     @classmethod
     def field_types(cls) -> FieldTypes:
         return {
@@ -167,32 +171,24 @@ def aggregator(cls, entity_type) -> EntityAggregator:
     def estimate(self, partition: BundlePartition) -> int:
         return sum(map(partial(self._contains, partition), self.bundle.entities))
 
-    def transform(self, partition: BundlePartition) -> Iterable[Transform]:
-        return (
-            self._transform(entity)
-            for entity in self._list_entities()
-            if self._contains(partition, entity)
-        )
+    def transform(self,
+                  partition: BundlePartition
+                  ) -> Iterable[Contribution | Replica]:
+        for entity in self._list_entities():
+            if self._contains(partition, entity):
+                yield from self._transform(entity)
 
     def _list_entities(self) -> Iterable[EntityReference]:
         return self.bundle.entities
 
     @abstractmethod
-    def _transform(self, entity: EntityReference) -> Transform:
+    def _transform(self,
+                   entity: EntityReference
+                   ) -> Iterable[Contribution | Replica]:
         raise NotImplementedError
 
-    def _add_replica(self,
-                     contribution: MutableJSON | None,
-                     entity: EntityReference,
-                     hub_ids: list[EntityID]
-                     ) -> Transform:
-        no_replica = not config.enable_replicas or self.entity_type() == 'bundles'
-        return (
-            None if contribution is None else self._contribution(contribution, entity),
-            None if no_replica else self._replica(self.bundle.entities[entity],
-                                                  entity,
-                                                  hub_ids)
-        )
+    def _replicate(self, entity: EntityReference) -> tuple[str, JSON]:
+        return f'anvil_{entity.entity_type}', self.bundle.entities[entity]
 
     def _pluralize(self, entity_type: str) -> str:
         if entity_type == 'diagnosis':
@@ -334,19 +330,6 @@ def get_bound(field_name: str) -> float | None:
             for field_prefix in field_prefixes
         }
 
-    def _contribution(self,
-                      contents: MutableJSON,
-                      entity: EntityReference,
-                      ) -> Contribution:
-        # The entity type is used to determine the index name.
-        # All activities go into the same index, regardless of their polymorphic type.
-        # Index names use plural forms.
-        entity_type = pluralize('activity'
-                                if entity.entity_type.endswith('activity') else
-                                entity.entity_type)
-        entity = attr.evolve(entity, entity_type=entity_type)
-        return super()._contribution(contents, entity)
-
     def _entity(self,
                 entity: EntityReference,
                 field_types: FieldTypes,
@@ -473,8 +456,8 @@ def _complete_dataset_keys(cls) -> AbstractSet[str]:
 
 class SingletonTransformer(BaseTransformer, metaclass=ABCMeta):
 
-    def _contents(self) -> MutableJSON:
-        return dict(
+    def _transform(self, entity: EntityReference) -> Iterable[Contribution]:
+        contents = dict(
             activities=self._entities(self._activity, chain.from_iterable(
                 self._entities_by_type[activity_type]
                 for activity_type in self._activity_polymorphic_types
@@ -485,6 +468,7 @@ def _contents(self) -> MutableJSON:
             donors=self._entities(self._donor, self._entities_by_type['donor']),
             files=self._entities(self._file, self._entities_by_type['file'])
         )
+        yield self._contribution(contents, entity.entity_id)
 
     @classmethod
     def field_types(cls) -> FieldTypes:
@@ -503,8 +487,11 @@ def _duos_types(cls) -> FieldTypes:
     def _duos(self, dataset: EntityReference) -> MutableJSON:
         return self._entity(dataset, self._duos_types())
 
+    def _is_duos(self, dataset: EntityReference) -> bool:
+        return 'description' in self.bundle.entities[dataset]
+
     def _dataset(self, dataset: EntityReference) -> MutableJSON:
-        if 'description' in self.bundle.entities[dataset]:
+        if self._is_duos(dataset):
             return self._duos(dataset)
         else:
             return super()._dataset(dataset)
@@ -523,19 +510,17 @@ class ActivityTransformer(BaseTransformer):
     def entity_type(cls) -> str:
         return 'activities'
 
-    def _transform(self, entity: EntityReference) -> Transform:
+    def _transform(self, entity: EntityReference) -> Iterable[Contribution]:
         linked = self._linked_entities(entity)
-        files = linked['file']
         contents = dict(
             activities=[self._activity(entity)],
             biosamples=self._entities(self._biosample, linked['biosample']),
             datasets=[self._dataset(self._only_dataset())],
             diagnoses=self._entities(self._diagnosis, linked['diagnosis']),
             donors=self._entities(self._donor, linked['donor']),
-            files=self._entities(self._file, files),
+            files=self._entities(self._file, linked['file'])
         )
-        hub_ids = [f.entity_id for f in files]
-        return self._add_replica(contents, entity, hub_ids)
+        yield self._contribution(contents, entity.entity_id)
 
 
 class BiosampleTransformer(BaseTransformer):
@@ -544,9 +529,8 @@ class BiosampleTransformer(BaseTransformer):
     def entity_type(cls) -> str:
         return 'biosamples'
 
-    def _transform(self, entity: EntityReference) -> Transform:
+    def _transform(self, entity: EntityReference) -> Iterable[Contribution]:
         linked = self._linked_entities(entity)
-        files = linked['file']
         contents = dict(
             activities=self._entities(self._activity, chain.from_iterable(
                 linked[activity_type]
@@ -556,22 +540,9 @@ def _transform(self, entity: EntityReference) -> Transform:
             datasets=[self._dataset(self._only_dataset())],
             diagnoses=self._entities(self._diagnosis, linked['diagnosis']),
             donors=self._entities(self._donor, linked['donor']),
-            files=self._entities(self._file, files),
+            files=self._entities(self._file, linked['file']),
         )
-        hub_ids = [f.entity_id for f in files]
-        return self._add_replica(contents, entity, hub_ids)
-
-
-class DiagnosisTransformer(BaseTransformer):
-
-    def _transform(self, entity: EntityReference) -> Transform:
-        files = self._linked_entities(entity)['file']
-        hub_ids = [f.entity_id for f in files]
-        return self._add_replica(None, entity, hub_ids)
-
-    @classmethod
-    def entity_type(cls) -> EntityType:
-        return 'diagnoses'
+        yield self._contribution(contents, entity.entity_id)
 
 
 class BundleTransformer(SingletonTransformer):
@@ -584,11 +555,6 @@ def _singleton(self) -> EntityReference:
         return EntityReference(entity_type='bundle',
                                entity_id=self.bundle.uuid)
 
-    def _transform(self, entity: EntityReference) -> Transform:
-        contents = self._contents()
-        hub_ids = [f.entity_id for f in self._entities_by_type['file']]
-        return self._add_replica(contents, entity, hub_ids)
-
 
 class DatasetTransformer(SingletonTransformer):
 
@@ -599,17 +565,12 @@ def entity_type(cls) -> str:
     def _singleton(self) -> EntityReference:
         return self._only_dataset()
 
-    def _transform(self, entity: EntityReference) -> Transform:
-        contents = self._contents()
-        # Every file in a snapshot is linked to that snapshot's singular
-        # dataset, making an explicit list of hub IDs for the dataset both
-        # redundant and impractically large (we observe that for large
-        # snapshots, trying to track this many files in a single data structure
-        # causes a prohibitively high rate of conflicts during replica updates).
-        # Therefore, we leave the hub IDs field empty for datasets and rely on
-        # the tenet that every file is an implicit hub of its parent dataset.
-        hub_ids = []
-        return self._add_replica(contents, entity, hub_ids)
+    def _transform(self,
+                   entity: EntityReference
+                   ) -> Iterable[Contribution | Replica]:
+        yield from super()._transform(entity)
+        if self._is_duos(entity):
+            yield self._replica(entity, file_hub=None)
 
 
 class DonorTransformer(BaseTransformer):
@@ -618,9 +579,8 @@ class DonorTransformer(BaseTransformer):
     def entity_type(cls) -> str:
         return 'donors'
 
-    def _transform(self, entity: EntityReference) -> Transform:
+    def _transform(self, entity: EntityReference) -> Iterable[Contribution]:
         linked = self._linked_entities(entity)
-        files = linked['file']
         contents = dict(
             activities=self._entities(self._activity, chain.from_iterable(
                 linked[activity_type]
@@ -630,10 +590,9 @@ def _transform(self, entity: EntityReference) -> Transform:
             datasets=[self._dataset(self._only_dataset())],
             diagnoses=self._entities(self._diagnosis, linked['diagnosis']),
             donors=[self._donor(entity)],
-            files=self._entities(self._file, files),
+            files=self._entities(self._file, linked['file']),
         )
-        hub_ids = [f.entity_id for f in files]
-        return self._add_replica(contents, entity, hub_ids)
+        yield self._contribution(contents, entity.entity_id)
 
 
 class FileTransformer(BaseTransformer):
@@ -642,7 +601,9 @@ class FileTransformer(BaseTransformer):
     def entity_type(cls) -> str:
         return 'files'
 
-    def _transform(self, entity: EntityReference) -> Transform:
+    def _transform(self,
+                   entity: EntityReference
+                   ) -> Iterable[Contribution | Replica]:
         linked = self._linked_entities(entity)
         contents = dict(
             activities=self._entities(self._activity, chain.from_iterable(
@@ -655,8 +616,16 @@ def _transform(self, entity: EntityReference) -> Transform:
             donors=self._entities(self._donor, linked['donor']),
             files=[self._file(entity)],
         )
-        # The result of the link traversal does not include the starting entity,
-        # so without this step the file itself wouldn't be included in its hubs
-        files = (entity, *linked['file'])
-        hub_ids = [f.entity_id for f in files]
-        return self._add_replica(contents, entity, hub_ids)
+        yield self._contribution(contents, entity.entity_id)
+        if config.enable_replicas:
+            yield self._replica(entity, file_hub=entity.entity_id)
+            for linked_entity in linked:
+                yield self._replica(
+                    linked_entity,
+                    # Datasets are linked to every file in their snapshot,
+                    # making an explicit list of hub IDs for the dataset both
+                    # redundant and impractically large. Therefore, we leave the
+                    # hub IDs field empty for datasets and rely on the tenet
+                    # that every file is an implicit hub of its parent dataset.
+                    file_hub=None if linked_entity.entity_type == 'dataset' else entity.entity_id,
+                )
diff --git a/src/azul/plugins/metadata/hca/indexer/transform.py b/src/azul/plugins/metadata/hca/indexer/transform.py
index 87cfd380fa..00380d8564 100644
--- a/src/azul/plugins/metadata/hca/indexer/transform.py
+++ b/src/azul/plugins/metadata/hca/indexer/transform.py
@@ -69,6 +69,7 @@
     Nested,
     NullableString,
     PassThrough,
+    Replica,
     null_bool,
     null_datetime,
     null_int,
@@ -78,7 +79,6 @@
     pass_thru_json,
 )
 from azul.indexer.transform import (
-    Transform,
     Transformer,
 )
 from azul.iterators import (
@@ -457,10 +457,6 @@ class BaseTransformer(Transformer, metaclass=ABCMeta):
     bundle: HCABundle
     api_bundle: api.Bundle
 
-    def replica_type(self, entity: EntityReference) -> str:
-        api_entity = self.api_bundle.entities[UUID(entity.entity_id)]
-        return api_entity.schema_name
-
     @classmethod
     def aggregator(cls, entity_type: EntityType) -> EntityAggregator | None:
         if entity_type == 'files':
@@ -497,22 +493,13 @@ def aggregator(cls, entity_type: EntityType) -> EntityAggregator | None:
         else:
             return SimpleAggregator()
 
-    def _add_replica(self,
-                     contribution: MutableJSON,
-                     entity: api.Entity | DatedEntity,
-                     hub_ids: list[EntityID]
-                     ) -> Transform:
-        entity_ref = EntityReference(entity_id=str(entity.document_id),
-                                     entity_type=self.entity_type())
-        if not config.enable_replicas:
-            replica = None
-        elif self.entity_type() == 'bundles':
-            links = self.bundle.links
-            replica = self._replica(links, entity_ref, hub_ids)
+    def _replicate(self, entity: EntityReference) -> tuple[str, JSON]:
+        if entity == self.api_bundle.ref:
+            content = self.bundle.links
         else:
-            assert isinstance(entity, api.Entity), entity
-            replica = self._replica(entity.json, entity_ref, hub_ids)
-        return self._contribution(contribution, entity_ref), replica
+            api_entity = self.api_bundle.entities[UUID(entity.entity_id)]
+            content = api_entity.json
+        return entity.entity_type, content
 
     def _find_ancestor_samples(self,
                                entity: api.LinkedEntity,
@@ -1385,10 +1372,18 @@ def visit(self, entity: api.Entity) -> None:
             # noinspection PyDeprecation
             file_name = entity.manifest_entry.name
             is_zarr, zarr_name, sub_name = _parse_zarr_file_name(file_name)
-            # FIXME: Remove condition once https://github.com/HumanCellAtlas/metadata-schema/issues/623 is resolved
+            # zarray files no longer exist in DCP2. This condition may no longer
+            # be needed to support them, but we don't want to risk removing it.
             if not is_zarr or sub_name.endswith('.zattrs'):
                 self.files[entity.document_id] = entity
 
+    @property
+    def entities(self) -> Iterable[EntityReference]:
+        for entity_dict in vars(self).values():
+            for entity in entity_dict.values():
+                yield EntityReference(entity_type=entity.schema_name,
+                                      entity_id=str(entity.document_id))
+
 
 ENTITY = TypeVar('ENTITY', bound=api.Entity)
 
@@ -1396,7 +1391,9 @@ def visit(self, entity: api.Entity) -> None:
 class PartitionedTransformer(BaseTransformer, Generic[ENTITY]):
 
     @abstractmethod
-    def _transform(self, entities: Iterable[ENTITY]) -> Iterable[Transform]:
+    def _transform(self,
+                   entities: Iterable[ENTITY]
+                   ) -> Iterable[Contribution | Replica]:
         """
         Transform the given outer entities into contributions.
         """
@@ -1415,7 +1412,9 @@ def _entities_in(self, partition: BundlePartition) -> Iterator[ENTITY]:
     def estimate(self, partition: BundlePartition) -> int:
         return ilen(self._entities_in(partition))
 
-    def transform(self, partition: BundlePartition) -> Iterable[Transform]:
+    def transform(self,
+                  partition: BundlePartition
+                  ) -> Iterable[Contribution | Replica]:
         return self._transform(generable(self._entities_in, partition))
 
 
@@ -1428,12 +1427,15 @@ def entity_type(cls) -> str:
     def _entities(self) -> Iterable[api.File]:
         return self.api_bundle.not_stitched(self.api_bundle.files)
 
-    def _transform(self, files: Iterable[api.File]) -> Iterable[Contribution]:
+    def _transform(self,
+                   files: Iterable[api.File]
+                   ) -> Iterable[Contribution | Replica]:
         zarr_stores: Mapping[str, list[api.File]] = self.group_zarrs(files)
         for file in files:
             file_name = file.manifest_entry.name
             is_zarr, zarr_name, sub_name = _parse_zarr_file_name(file_name)
-            # FIXME: Remove condition once https://github.com/HumanCellAtlas/metadata-schema/issues/579 is resolved
+            # zarray files no longer exist in DCP2. This condition may no longer
+            # be needed to support them, but we don't want to risk removing it.
             if not is_zarr or sub_name.endswith('.zattrs'):
                 if is_zarr:
                     # This is the representative file, so add the related files
@@ -1467,8 +1469,18 @@ def _transform(self, files: Iterable[api.File]) -> Iterable[Contribution]:
                         additional_contents = self.matrix_stratification_values(file)
                         for entity_type, values in additional_contents.items():
                             contents[entity_type].extend(values)
-                hub_ids = list(map(str, visitor.files))
-                yield self._add_replica(contents, file, hub_ids)
+                file_id = file.ref.entity_id
+                yield self._contribution(contents, file_id)
+                if config.enable_replicas:
+                    yield self._replica(self.api_bundle.ref, file_hub=file_id)
+                    # Projects are linked to every file in their snapshot,
+                    # making an explicit list of hub IDs for the project both
+                    # redundant and impractically large. Therefore, we leave the
+                    # hub IDs field empty for projects and rely on the tenet
+                    # that every file is an implicit hub of its parent project.
+                    yield self._replica(self._api_project.ref, file_hub=None)
+                    for linked_entity in visitor.entities:
+                        yield self._replica(linked_entity, file_hub=file_id)
 
     def matrix_stratification_values(self, file: api.File) -> JSON:
         """
@@ -1563,8 +1575,7 @@ def _transform(self,
                             ),
                             dates=[self._date(cell_suspension)],
                             projects=[self._project(self._api_project)])
-            hub_ids = list(map(str, visitor.files))
-            yield self._add_replica(contents, cell_suspension, hub_ids)
+            yield self._contribution(contents, cell_suspension.ref.entity_id)
 
 
 class SampleTransformer(PartitionedTransformer):
@@ -1609,8 +1620,7 @@ def _transform(self, samples: Iterable[Sample]) -> Iterable[Contribution]:
                             ),
                             dates=[self._date(sample)],
                             projects=[self._project(self._api_project)])
-            hub_ids = list(map(str, visitor.files))
-            yield self._add_replica(contents, sample, hub_ids)
+            yield self._contribution(contents, sample.ref.entity_id)
 
 
 class BundleAsEntity(DatedEntity):
@@ -1646,13 +1656,11 @@ def _dated_entities(self) -> Iterable[DatedEntity]:
     def estimate(self, partition: BundlePartition) -> int:
         return int(partition.contains(self._singleton_id))
 
-    def transform(self, partition: BundlePartition) -> Iterable[Transform]:
+    def transform(self, partition: BundlePartition) -> Iterable[Contribution]:
         if partition.contains(self._singleton_id):
             yield self._transform()
-        else:
-            return ()
 
-    def _transform(self) -> Transform:
+    def _transform(self) -> Contribution:
         # Project entities are not explicitly linked in the graph. The mere
         # presence of project metadata in a bundle indicates that all other
         # entities in that bundle belong to that project. Because of that we
@@ -1710,12 +1718,7 @@ def _transform(self) -> Transform:
                         contributed_analyses=contributed_analyses,
                         dates=[self._date(self._singleton_entity())],
                         projects=[self._project(self._api_project)])
-        hub_ids = self._hub_ids(visitor)
-        return self._add_replica(contents, self._singleton_entity(), hub_ids)
-
-    @abstractmethod
-    def _hub_ids(self, visitor: TransformerVisitor) -> list[str]:
-        raise NotImplementedError
+        return self._contribution(contents, str(self._singleton_id))
 
 
 class ProjectTransformer(SingletonTransformer):
@@ -1727,24 +1730,12 @@ def _singleton_entity(self) -> DatedEntity:
     def entity_type(cls) -> str:
         return 'projects'
 
-    def _hub_ids(self, visitor: TransformerVisitor) -> list[str]:
-        # Every file in a snapshot is linked to that snapshot's singular
-        # project, making an explicit list of hub IDs for the project both
-        # redundant and impractically large. Therefore, we leave the hub IDs
-        # field empty for projects and rely on the tenet that every file is an
-        # implicit hub of its parent project.
-        return []
-
 
 class BundleTransformer(SingletonTransformer):
 
     def _singleton_entity(self) -> DatedEntity:
         return BundleAsEntity(self.api_bundle)
 
-    def replica_type(self, entity: EntityReference) -> str:
-        assert entity.entity_type == self.entity_type(), entity
-        return 'links'
-
     @classmethod
     def aggregator(cls, entity_type: EntityType) -> EntityAggregator | None:
         if entity_type == 'files':
@@ -1755,6 +1746,3 @@ def aggregator(cls, entity_type: EntityType) -> EntityAggregator | None:
     @classmethod
     def entity_type(cls) -> str:
         return 'bundles'
-
-    def _hub_ids(self, visitor: TransformerVisitor) -> list[str]:
-        return list(map(str, visitor.files))
diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py
index a39ce5b3ac..e276fe6b8a 100644
--- a/src/azul/plugins/repository/tdr_anvil/__init__.py
+++ b/src/azul/plugins/repository/tdr_anvil/__init__.py
@@ -1,7 +1,4 @@
-﻿from collections import (
-    defaultdict,
-)
-import datetime
+﻿import datetime
 from enum import (
     Enum,
 )
@@ -12,7 +9,7 @@
 from typing import (
     AbstractSet,
     Callable,
-    Mapping,
+    Iterable,
 )
 
 import attrs
@@ -41,9 +38,10 @@
 )
 from azul.plugins.metadata.anvil.bundle import (
     AnvilBundle,
+    EntityLink,
     Key,
+    KeyLink,
     KeyReference,
-    Link,
 )
 from azul.plugins.metadata.anvil.schema import (
     anvil_schema,
@@ -71,7 +69,7 @@
 MutableKeys = set[KeyReference]
 KeysByType = dict[EntityType, AbstractSet[Key]]
 MutableKeysByType = dict[EntityType, set[Key]]
-KeyLinks = set[Link[KeyReference]]
+KeyLinks = set[KeyLink]
 
 
 class BundleEntityType(Enum):
@@ -153,21 +151,9 @@ def add_entity(self,
                             crc32='')
         self.entities[entity] = metadata
 
-    def add_links(self,
-                  links: KeyLinks,
-                  entities_by_key: Mapping[KeyReference, EntityReference]) -> None:
-        def key_ref_to_entity_ref(key_ref: KeyReference) -> EntityReference:
-            return entities_by_key[key_ref]
-
-        def optional_key_ref_to_entity_ref(key_ref: KeyReference | None) -> EntityReference | None:
-            return None if key_ref is None else key_ref_to_entity_ref(key_ref)
-
-        self.links.update(
-            Link(inputs=set(map(key_ref_to_entity_ref, link.inputs)),
-                 activity=optional_key_ref_to_entity_ref(link.activity),
-                 outputs=set(map(key_ref_to_entity_ref, link.outputs)))
-            for link in links
-        )
+    def add_links(self, links: Iterable[EntityLink]):
+        self.links.update(links)
+        EntityLink.group_by_activity(self.links)
 
 
 class Plugin(TDRPlugin[TDRAnvilBundle, TDRSourceSpec, TDRSourceRef, TDRAnvilBundleFQID]):
@@ -307,7 +293,6 @@ def _primary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle:
         log.info('Found %i entities linked to bundle %r: %r',
                  len(keys), bundle_fqid.uuid, arg)
 
-        self._simplify_links(links)
         result = TDRAnvilBundle(fqid=bundle_fqid)
         entities_by_key: dict[KeyReference, EntityReference] = {}
         for entity_type, typed_keys in sorted(keys_by_type.items()):
@@ -328,7 +313,7 @@ def _primary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle:
                 entity = EntityReference(entity_id=row['datarepo_row_id'], entity_type=entity_type)
                 entities_by_key[key] = entity
                 result.add_entity(entity, self._version, row)
-        result.add_links(links, entities_by_key)
+        result.add_links((link.to_entity_link(entities_by_key) for link in links))
         return result
 
     def _supplementary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle:
@@ -350,18 +335,15 @@ def _supplementary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBund
             SELECT {', '.join(sorted(columns))}
             FROM {backtick(self._full_table_name(source, linked_entity_type))}
         ''')))
-        entities_by_key = {}
         link_args = {}
         for entity_type, row, arg in [
             (bundle_entity_type, bundle_entity, 'outputs'),
             (linked_entity_type, linked_entity, 'inputs')
         ]:
             entity_ref = EntityReference(entity_type=entity_type, entity_id=row['datarepo_row_id'])
-            key_ref = KeyReference(key=row[entity_type + '_id'], entity_type=entity_type)
-            entities_by_key[key_ref] = entity_ref
             result.add_entity(entity_ref, self._version, row)
-            link_args[arg] = {key_ref}
-        result.add_links({Link(**link_args)}, entities_by_key)
+            link_args[arg] = {entity_ref}
+        result.add_links({EntityLink(**link_args)})
         return result
 
     def _duos_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle:
@@ -410,15 +392,6 @@ def _consolidate_by_type(self, entities: Keys) -> MutableKeysByType:
             result[e.entity_type].add(e.key)
         return result
 
-    def _simplify_links(self, links: KeyLinks) -> None:
-        grouped_links: Mapping[KeyReference, KeyLinks] = defaultdict(set)
-        for link in links:
-            grouped_links[link.activity].add(link)
-        for activity, convergent_links in grouped_links.items():
-            if activity is not None and len(convergent_links) > 1:
-                links -= convergent_links
-                links.add(Link.merge(convergent_links))
-
     def _follow_upstream(self,
                          source: TDRSourceSpec,
                          entities: KeysByType
@@ -478,13 +451,13 @@ def _upstream_from_biosamples(self,
             for row in rows:
                 downstream_ref = KeyReference(entity_type='biosample',
                                               key=row['biosample_id'])
-                result.add(Link(outputs={downstream_ref},
-                                inputs={KeyReference(entity_type='dataset',
-                                                     key=one(row['part_of_dataset_id']))}))
+                result.add(KeyLink(outputs={downstream_ref},
+                                   inputs={KeyReference(entity_type='dataset',
+                                                        key=one(row['part_of_dataset_id']))}))
                 for donor_id in row['donor_id']:
-                    result.add(Link(outputs={downstream_ref},
-                                    inputs={KeyReference(entity_type='donor',
-                                                         key=donor_id)}))
+                    result.add(KeyLink(outputs={downstream_ref},
+                                       inputs={KeyReference(entity_type='donor',
+                                                            key=donor_id)}))
             return result
         else:
             return set()
@@ -546,7 +519,7 @@ def _upstream_from_files(self,
                     ON f.file_id IN UNNEST(a.generated_file_id)
             ''')
             return {
-                Link(
+                KeyLink(
                     activity=KeyReference(entity_type=row['activity_table'], key=row['activity_id']),
                     # The generated link is not a complete representation of the
                     # upstream activity because it does not include generated files
@@ -575,9 +548,9 @@ def _diagnoses_from_donors(self,
                 WHERE dgn.donor_id IN ({', '.join(map(repr, donor_ids))})
             ''')
             return {
-                Link(inputs={KeyReference(key=row['diagnosis_id'], entity_type='diagnosis')},
-                     outputs={KeyReference(key=row['donor_id'], entity_type='donor')},
-                     activity=None)
+                KeyLink(inputs={KeyReference(key=row['diagnosis_id'], entity_type='diagnosis')},
+                        outputs={KeyReference(key=row['donor_id'], entity_type='donor')},
+                        activity=None)
                 for row in rows
             }
         else:
@@ -620,12 +593,12 @@ def _downstream_from_biosamples(self,
                 WHERE biosample_id IN ({', '.join(map(repr, biosample_ids))})
             ''')
             return {
-                Link(inputs={KeyReference(key=row['biosample_id'], entity_type='biosample')},
-                     outputs={
-                         KeyReference(key=output_id, entity_type='file')
-                         for output_id in row['generated_file_id']
-                     },
-                     activity=KeyReference(key=row['activity_id'], entity_type=row['activity_table']))
+                KeyLink(inputs={KeyReference(key=row['biosample_id'], entity_type='biosample')},
+                        outputs={
+                            KeyReference(key=output_id, entity_type='file')
+                            for output_id in row['generated_file_id']
+                        },
+                        activity=KeyReference(key=row['activity_id'], entity_type=row['activity_table']))
                 for row in rows
             }
         else:
@@ -666,12 +639,12 @@ def _downstream_from_files(self,
                 WHERE used_file_id IN ({', '.join(map(repr, file_ids))})
             ''')
             return {
-                Link(inputs={KeyReference(key=row['used_file_id'], entity_type='file')},
-                     outputs={
-                         KeyReference(key=file_id, entity_type='file')
-                         for file_id in row['generated_file_id']
-                     },
-                     activity=KeyReference(key=row['activity_id'], entity_type=row['activity_table']))
+                KeyLink(inputs={KeyReference(key=row['used_file_id'], entity_type='file')},
+                        outputs={
+                            KeyReference(key=file_id, entity_type='file')
+                            for file_id in row['generated_file_id']
+                        },
+                        activity=KeyReference(key=row['activity_id'], entity_type=row['activity_table']))
                 for row in rows
             }
         else:
diff --git a/src/azul/plugins/repository/tdr_hca/__init__.py b/src/azul/plugins/repository/tdr_hca/__init__.py
index dcc9f733b6..320b0a4f1b 100644
--- a/src/azul/plugins/repository/tdr_hca/__init__.py
+++ b/src/azul/plugins/repository/tdr_hca/__init__.py
@@ -56,6 +56,9 @@
     TDRBundleFQID,
     TDRPlugin,
 )
+from azul.strings import (
+    single_quote as sq,
+)
 from azul.terra import (
     TDRSourceRef,
     TDRSourceSpec,
@@ -439,22 +442,19 @@ def _retrieve_entities(self,
         table_name = backtick(self._full_table_name(source, entity_type))
         entity_id_type = one(set(map(type, entity_ids)))
 
-        def quote(s):
-            return f"'{s}'"
-
         if entity_type == 'links':
             assert issubclass(entity_id_type, BundleFQID), entity_id_type
             entity_ids = cast(set[BundleFQID], entity_ids)
             where_columns = (pk_column, version_column)
             where_values = (
-                (quote(fqid.uuid), f'TIMESTAMP({quote(fqid.version)})')
+                (sq(fqid.uuid), f'TIMESTAMP({sq(fqid.version)})')
                 for fqid in entity_ids
             )
             expected = {fqid.uuid for fqid in entity_ids}
         else:
             assert issubclass(entity_id_type, str), (entity_type, entity_id_type)
             where_columns = (pk_column,)
-            where_values = ((quote(entity_id),) for entity_id in entity_ids)
+            where_values = ((sq(str(entity_id)),) for entity_id in entity_ids)
             expected = entity_ids
         query = f'''
             SELECT {', '.join({pk_column, *non_pk_columns})}
diff --git a/src/azul/service/manifest_service.py b/src/azul/service/manifest_service.py
index 953c58ece7..c82222fda2 100644
--- a/src/azul/service/manifest_service.py
+++ b/src/azul/service/manifest_service.py
@@ -140,6 +140,9 @@
     StorageObjectNotFound,
     StorageService,
 )
+from azul.strings import (
+    double_quote as dq,
+)
 from azul.types import (
     AnyJSON,
     FlatJSON,
@@ -887,9 +890,8 @@ def _cmd_exe_quote(cls, s: str) -> str:
         """
         Escape a string for insertion into a `cmd.exe` command line
         """
-        assert '"' not in s, s
         assert '\\' not in s, s
-        return f'"{s}"'
+        return dq(s)
 
     @classmethod
     def command_lines(cls,
diff --git a/src/azul/strings.py b/src/azul/strings.py
index 494880c8e9..72b7995fca 100644
--- a/src/azul/strings.py
+++ b/src/azul/strings.py
@@ -9,6 +9,10 @@
     minmax,
 )
 
+from azul import (
+    reject,
+)
+
 
 def to_camel_case(text: str) -> str:
     camel_cased = ''.join(part.title() for part in text.split('_'))
@@ -223,3 +227,109 @@ def longest_common_prefix(strings: Iterable[str]) -> Optional[str]:
         if s2[i] != c:
             return s1[:i]
     return s1
+
+
+def join_lines(*lines: str) -> str:
+    """
+    Join the arguments with a newline character.
+
+    >>> join_lines()
+    ''
+
+    >>> join_lines('a')
+    'a'
+
+    >>> join_lines('a', 'b')
+    'a\\nb'
+    """
+    return '\n'.join(lines)
+
+
+def join_words(*words: str) -> str:
+    """
+    Join the arguments with a space character.
+
+    >>> join_words()
+    ''
+
+    >>> join_words('a')
+    'a'
+
+    >>> join_words('a', 'b')
+    'a b'
+    """
+    return ' '.join(words)
+
+
+def delimit(s: str, delimiter: str) -> str:
+    """
+    Prepend and append a delimiter to a string after ensuring that the former
+    does not occur in the latter.
+
+    >>> delimit('foo', "'")
+    "'foo'"
+
+    >>> delimit("foo's", "'")
+    Traceback (most recent call last):
+    ...
+    azul.RequirementError: ("'", 'must not occur in', "foo's")
+    """
+    reject(delimiter in s, delimiter, 'must not occur in', s)
+    return delimiter + s + delimiter
+
+
+def back_quote(*words: str) -> str:
+    """
+    Join the arguments with a space character and enclose the result in back
+    quotes. The arguments must not contain back quotes.
+
+    >>> back_quote()
+    '``'
+
+    >>> back_quote('foo', 'bar')
+    '`foo bar`'
+
+    >>> back_quote('foo`s')
+    Traceback (most recent call last):
+    ...
+    azul.RequirementError: ('`', 'must not occur in', 'foo`s')
+    """
+    return delimit(join_words(*words), '`')
+
+
+def single_quote(*words: str) -> str:
+    """
+    Join the arguments with a space character and enclose the result in single
+    quotes. The arguments must not contain single quotes.
+
+    >>> single_quote()
+    "''"
+
+    >>> single_quote('foo', 'bar')
+    "'foo bar'"
+
+    >>> single_quote("foo", "bar's")
+    Traceback (most recent call last):
+    ...
+    azul.RequirementError: ("'", 'must not occur in', "foo bar's")
+    """
+    return delimit(join_words(*words), "'")
+
+
+def double_quote(*words: str) -> str:
+    """
+    Join the arguments with a space character and enclose the result in double
+    quotes. The arguments must not contain double quotes.
+
+    >>> double_quote()
+    '""'
+
+    >>> double_quote('foo', 'bar')
+    '"foo bar"'
+
+    >>> double_quote('foo', 'b"a"r')
+    Traceback (most recent call last):
+    ...
+    azul.RequirementError: ('"', 'must not occur in', 'foo b"a"r')
+    """
+    return delimit(join_words(*words), '"')
diff --git a/src/azul/terraform.py b/src/azul/terraform.py
index cd9afe287f..f40382e80f 100644
--- a/src/azul/terraform.py
+++ b/src/azul/terraform.py
@@ -28,6 +28,9 @@
     config,
     require,
 )
+from azul.chalice import (
+    AzulChaliceApp,
+)
 from azul.deployment import (
     aws,
 )
@@ -802,11 +805,34 @@ def tf_config(self, app_name):
         # Setting this property using AWS API Gateway extensions to the OpenAPI
         # specification works around this issue.
         #
+        # We ran into similar difficulties when using Terraform to configure
+        # default responses for the API, so we use these extensions for that
+        # purpose, too.
+        #
+        openapi_spec = json.loads(locals[app_name])
         rest_api = resources['aws_api_gateway_rest_api'][app_name]
         assert 'minimum_compression_size' not in rest_api, rest_api
-        openapi_spec = json.loads(locals[app_name])
         key = 'x-amazon-apigateway-minimum-compression-size'
         openapi_spec[key] = config.minimum_compression_size
+        assert 'aws_api_gateway_gateway_response' not in resources, resources
+        openapi_spec['x-amazon-apigateway-gateway-responses'] = {
+            f'DEFAULT_{response_type}': {
+                'responseParameters': {
+                    # Static value response header parameters must be enclosed
+                    # within a pair of single quotes.
+                    #
+                    # https://docs.aws.amazon.com/apigateway/latest/developerguide/request-response-data-mappings.html#mapping-response-parameters
+                    # https://docs.aws.amazon.com/apigateway/latest/developerguide/api-gateway-swagger-extensions-gateway-responses.html
+                    #
+                    # Note that azul.strings.single_quote() is not used here
+                    # since API Gateway allows internal single quotes in the
+                    # value, which that function would prohibit.
+                    #
+                    f'gatewayresponse.header.{k}': f"'{v}'"
+                    for k, v in AzulChaliceApp.security_headers.items()
+                }
+            } for response_type in ['4XX', '5XX']
+        }
         locals[app_name] = json.dumps(openapi_spec)
 
         return {
diff --git a/src/humancellatlas/data/metadata/api.py b/src/humancellatlas/data/metadata/api.py
index 26c650c250..bd637f664c 100644
--- a/src/humancellatlas/data/metadata/api.py
+++ b/src/humancellatlas/data/metadata/api.py
@@ -138,8 +138,9 @@ def _optional_datetime(self, s: str | None) -> datetime | None:
         return s if s is None else self._datetime(s)
 
     @property
-    def address(self):
-        return self.schema_name + '@' + str(self.document_id)
+    def ref(self) -> EntityReference:
+        return EntityReference(entity_type=self.schema_name,
+                               entity_id=str(self.document_id))
 
     @property
     def schema_name(self):
@@ -202,9 +203,10 @@ def accept(self, visitor: EntityVisitor):
 class LinkError(RuntimeError):
 
     def __init__(self, entity: LinkedEntity, other_entity: Entity, forward: bool) -> None:
-        super().__init__(entity.address +
-                         ' cannot ' + ('reference ' if forward else 'be referenced by ') +
-                         other_entity.address)
+        super().__init__(
+            f'{entity.ref} cannot {"reference" if forward else "be referenced by"} '
+            f'{other_entity.ref}'
+        )
 
 
 L = TypeVar('L', bound=LinkedEntity)
@@ -1042,6 +1044,10 @@ def sequencing_output(self) -> list[SequenceFile]:
                 if isinstance(f, SequenceFile)
                 and any(ps.is_sequencing_process() for ps in f.from_processes.values())]
 
+    @property
+    def ref(self) -> EntityReference:
+        return EntityReference(entity_type='links', entity_id=str(self.uuid))
+
 
 entity_types = {
     # Biomaterials
diff --git a/terraform/browser/browser.tf.json.template.py b/terraform/browser/browser.tf.json.template.py
index d104051235..c1818ce65e 100644
--- a/terraform/browser/browser.tf.json.template.py
+++ b/terraform/browser/browser.tf.json.template.py
@@ -31,6 +31,11 @@
 from azul.deployment import (
     aws,
 )
+from azul.strings import (
+    double_quote as dq,
+    join_words as jw,
+    single_quote as sq,
+)
 from azul.terraform import (
     block_public_s3_bucket_access,
     emit_tf,
@@ -370,7 +375,7 @@ def emit():
                                             '--fail',
                                             '--silent',
                                             gitlab_helper.curl_auth_flags(),
-                                            quote(gitlab_helper.tarball_url(site))
+                                            dq(gitlab_helper.tarball_url(site))
                                         ]),
                                         ' '.join([
                                             # --transform is specific to GNU Tar, which, on macOS must be installed
@@ -492,60 +497,54 @@ def predicate(s):
 
 
 def content_security_policy() -> str:
-    def q(s: str) -> str:
-        return f"'{s}'"
-
-    def s(*args: str) -> str:
-        return ' '.join(args)
-
-    self = q('self')
-    none = q('none')
-    unsafe_inline = q('unsafe-inline')
-    unsafe_eval = q('unsafe-eval')
+    self = sq('self')
+    none = sq('none')
+    unsafe_inline = sq('unsafe-inline')
+    unsafe_eval = sq('unsafe-eval')
 
     return ';'.join([
-        s('default-src', self),
-        s('object-src', none),
-        s('frame-src', none),
-        s('frame-ancestors', none),
-        s('child-src', none),
-        s('img-src',
-          self,
-          'data:',
-          'https://lh3.googleusercontent.com',
-          'https://www.google-analytics.com',
-          'https://www.googletagmanager.com'),
-        s('script-src',
-          self,
-          unsafe_inline,
-          unsafe_eval,
-          'https://accounts.google.com/gsi/client',
-          'https://www.google-analytics.com',
-          'https://www.googletagmanager.com'),
-        s('style-src',
-          self,
-          unsafe_inline,
-          'https://fonts.googleapis.com',
-          'https://p.typekit.net',
-          'https://use.typekit.net'),
-        s('font-src',
-          self,
-          'data:',
-          'https://fonts.gstatic.com',
-          'https://use.typekit.net/af/'),
-        s('connect-src',
-          self,
-          'https://www.google-analytics.com',
-          'https://www.googleapis.com/oauth2/v3/userinfo',
-          'https://www.googletagmanager.com',
-          'https://support.terra.bio/api/v2/',
-          str(furl(config.sam_service_url,
-                   path='/register/user/v1')),
-          str(furl(config.sam_service_url,
-                   path='/register/user/v2/self/termsOfServiceDetails')),
-          str(furl(config.terra_service_url,
-                   path='/api/nih/status')),
-          str(config.service_endpoint))
+        jw('default-src', self),
+        jw('object-src', none),
+        jw('frame-src', none),
+        jw('frame-ancestors', none),
+        jw('child-src', none),
+        jw('img-src',
+           self,
+           'data:',
+           'https://lh3.googleusercontent.com',
+           'https://www.google-analytics.com',
+           'https://www.googletagmanager.com'),
+        jw('script-src',
+           self,
+           unsafe_inline,
+           unsafe_eval,
+           'https://accounts.google.com/gsi/client',
+           'https://www.google-analytics.com',
+           'https://www.googletagmanager.com'),
+        jw('style-src',
+           self,
+           unsafe_inline,
+           'https://fonts.googleapis.com',
+           'https://p.typekit.net',
+           'https://use.typekit.net'),
+        jw('font-src',
+           self,
+           'data:',
+           'https://fonts.gstatic.com',
+           'https://use.typekit.net/af/'),
+        jw('connect-src',
+           self,
+           'https://www.google-analytics.com',
+           'https://www.googleapis.com/oauth2/v3/userinfo',
+           'https://www.googletagmanager.com',
+           'https://support.terra.bio/api/v2/',
+           str(furl(config.sam_service_url,
+                    path='/register/user/v1')),
+           str(furl(config.sam_service_url,
+                    path='/register/user/v2/self/termsOfServiceDetails')),
+           str(furl(config.terra_service_url,
+                    path='/api/nih/status')),
+           str(config.service_endpoint))
     ])
 
 
@@ -613,7 +612,7 @@ def curl_auth_flags(self) -> str:
         token_type, token = 'PRIVATE', config.gitlab_access_token
         if token is None:
             token_type, token = 'JOB', os.environ['CI_JOB_TOKEN']
-        header = quote(f'{token_type}-TOKEN: {token}')
+        header = dq(token_type + '-TOKEN:', token)
         return '--header ' + header
 
     def tarball_hash(self, site: config.BrowserSite) -> str:
@@ -652,11 +651,6 @@ def tarball_version(self, branch: str) -> str:
         return branch.replace('/', '_')
 
 
-def quote(s):
-    assert '"' not in s, s
-    return '"' + s + '"'
-
-
 gitlab_helper = GitLabHelper()
 del GitLabHelper
 
diff --git a/terraform/gitlab/gitlab.tf.json.template.py b/terraform/gitlab/gitlab.tf.json.template.py
index 6e4f544853..6c5e0f28a6 100644
--- a/terraform/gitlab/gitlab.tf.json.template.py
+++ b/terraform/gitlab/gitlab.tf.json.template.py
@@ -26,6 +26,10 @@
 )
 from azul.strings import (
     departition,
+    double_quote as dq,
+    join_lines as jl,
+    join_words as jw,
+    single_quote as sq,
 )
 from azul.terraform import (
     chalice,
@@ -272,22 +276,6 @@ def merge(sets: Iterable[Iterable[str]]) -> Iterable[str]:
     return sorted(set(chain(*sets)))
 
 
-def jw(*words):
-    return ' '.join(words)
-
-
-def jl(*lines):
-    return '\n'.join(lines)
-
-
-def qq(*words):
-    return '"' + jw(*words) + '"'
-
-
-def sq(*words):
-    return "'" + jw(*words) + "'"
-
-
 emit_tf({} if config.terraform_component != 'gitlab' else {
     'data': {
         'aws_sns_topic': {
@@ -1882,7 +1870,7 @@ def sq(*words):
                                     str(clamav_image),
                                     '/bin/sh',
                                     '-c',
-                                    qq(
+                                    dq(
                                         'freshclam',
                                         '&& echo freshclam succeeded',
                                         '|| (echo freshclam failed; false)',
diff --git a/terraform/shared/shared.tf.json.template.py b/terraform/shared/shared.tf.json.template.py
index 129d879c68..0c46a1a467 100644
--- a/terraform/shared/shared.tf.json.template.py
+++ b/terraform/shared/shared.tf.json.template.py
@@ -385,9 +385,8 @@ def conformance_pack(name: str) -> str:
             # security boundary, so vulnerabilities that are detected within
             # them do not need to be addressed with the same urgency.
             #
-            # Using CF stack because the AWS provider does not support
-            # Inspector filters and the AWSCC provider fails due to
-            # https://github.com/hashicorp/terraform-provider-awscc/issues/1364
+            # FIXME: Remove workaround for false Terraform bug
+            #        https://github.com/DataBiosphere/azul/issues/6577
             'inspector_filters': {
                 'name': config.qualified_resource_name('inspectorfilters'),
                 'template_body': json.dumps({
diff --git a/test/hca_metadata_api/test.py b/test/hca_metadata_api/test.py
index 6ae9ac7077..bdb1514158 100644
--- a/test/hca_metadata_api/test.py
+++ b/test/hca_metadata_api/test.py
@@ -437,7 +437,7 @@ def _assert_bundle(self, uuid, version, manifest, metadata, links,
         self.assertIn(DonorOrganism, root_entity_types)
         self.assertTrue({DonorOrganism, SupplementaryFile}.issuperset(root_entity_types))
         root_entity = next(iter(root_entities))
-        self.assertRegex(root_entity.address, 'donor_organism@.*')
+        self.assertEqual(root_entity.ref.entity_type, 'donor_organism')
         self.assertIsInstance(root_entity, DonorOrganism)
         self.assertEqual(root_entity.organism_age_in_seconds, age_range)
         self.assertTrue(root_entity.sex in ('female', 'male', 'unknown'))
diff --git a/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T11:33:44.698028Z.results.json b/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T11:33:44.698028Z.results.json
index b1db16dbda..d7b65274b4 100644
--- a/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T11:33:44.698028Z.results.json
+++ b/test/indexer/data/aaa96233-bf27-44c7-82df-b4dc15ad4d9d.2018-11-02T11:33:44.698028Z.results.json
@@ -1,6 +1,6 @@
 [
     {
-        "_id": "bundles_aaa96233-bf27-44c7-82df-b4dc15ad4d9d_50b88c774d130ce5cb9e680241549c2df665efca",
+        "_id": "links_aaa96233-bf27-44c7-82df-b4dc15ad4d9d_50b88c774d130ce5cb9e680241549c2df665efca",
         "_index": "azul_v2_dev_test_replica",
         "_score": 1.0,
         "_type": "_doc",
@@ -3460,7 +3460,7 @@
         }
     },
     {
-        "_id": "files_0c5ac7c0-817e-40d4-b1b1-34c3d5cfecdb_4628c80cede8a4dd39584f8b50619edacc7f62e7",
+        "_id": "sequence_file_0c5ac7c0-817e-40d4-b1b1-34c3d5cfecdb_4628c80cede8a4dd39584f8b50619edacc7f62e7",
         "_index": "azul_v2_dev_test_replica",
         "_score": 1.0,
         "_source": {
@@ -3491,7 +3491,7 @@
         "_type": "_doc"
     },
     {
-        "_id": "cell_suspensions_412898c5-5b9b-4907-b07c-e9b89666e204_f8ca7504548249c223277135af497beb5fba29df",
+        "_id": "cell_suspension_412898c5-5b9b-4907-b07c-e9b89666e204_f8ca7504548249c223277135af497beb5fba29df",
         "_index": "azul_v2_dev_test_replica",
         "_score": 1.0,
         "_source": {
@@ -3530,7 +3530,7 @@
         "_type": "_doc"
     },
     {
-        "_id": "files_70d1af4a-82c8-478a-8960-e9028b3616ca_8ee6fbddb7c933ebe39b31b977c51af21687f036",
+        "_id": "sequence_file_70d1af4a-82c8-478a-8960-e9028b3616ca_8ee6fbddb7c933ebe39b31b977c51af21687f036",
         "_index": "azul_v2_dev_test_replica",
         "_score": 1.0,
         "_source": {
@@ -3561,7 +3561,7 @@
         "_type": "_doc"
     },
     {
-        "_id": "samples_a21dc760-a500-4236-bcff-da34a0e873d2_0bb754590d94de62d6fb15bdbac5f64f54072cb0",
+        "_id": "specimen_from_organism_a21dc760-a500-4236-bcff-da34a0e873d2_0bb754590d94de62d6fb15bdbac5f64f54072cb0",
         "_index": "azul_v2_dev_test_replica",
         "_score": 1.0,
         "_source": {
@@ -3614,7 +3614,7 @@
         "_type": "_doc"
     },
     {
-        "_id": "projects_e8642221-4c2c-4fd7-b926-a68bce363c88_42d51043dcd0030b5ee9ce47fa30c85bcc92bac6",
+        "_id": "project_e8642221-4c2c-4fd7-b926-a68bce363c88_42d51043dcd0030b5ee9ce47fa30c85bcc92bac6",
         "_index": "azul_v2_dev_test_replica",
         "_score": 1.0,
         "_source": {
@@ -3691,5 +3691,175 @@
             "hub_ids": []
         },
         "_type": "_doc"
+    },
+    {
+        "_id": "donor_organism_7b07b9d0-cc0e-4098-9f64-f4a569f7d746_7dd78e18008a0c036bb25e84f7742c075e16873f",
+        "_index": "azul_v2_dummy_test_replica",
+        "_score": 1.0,
+        "_source": {
+            "contents": {
+                "biomaterial_core": {
+                    "biomaterial_id": "DID_scRSq06",
+                    "ncbi_taxon_id": [
+                        9606
+                    ]
+                },
+                "death": {
+                    "cause_of_death": "stroke"
+                },
+                "describedBy": "https://schema.humancellatlas.org/type/biomaterial/10.1.2/donor_organism",
+                "diseases": [
+                    {
+                        "ontology": "PATO:0000461",
+                        "ontology_label": "normal",
+                        "text": "normal"
+                    }
+                ],
+                "genus_species": [
+                    {
+                        "ontology": "NCBITaxon:9606",
+                        "ontology_label": "Australopithecus",
+                        "text": "Australopithecus"
+                    }
+                ],
+                "human_specific": {
+                    "body_mass_index": 29.5,
+                    "ethnicity": [
+                        {
+                            "ontology": "hancestro:0005",
+                            "ontology_label": "European",
+                            "text": "European"
+                        }
+                    ]
+                },
+                "is_living": "no",
+                "organism_age": "38",
+                "organism_age_unit": {
+                    "ontology": "UO:0000036",
+                    "ontology_label": "year",
+                    "text": "year"
+                },
+                "provenance": {
+                    "document_id": "7b07b9d0-cc0e-4098-9f64-f4a569f7d746",
+                    "submission_date": "2018-11-02T10:02:12.191Z",
+                    "update_date": "2018-11-02T10:07:39.622Z"
+                },
+                "schema_type": "biomaterial",
+                "sex": "female"
+            },
+            "entity_id": "7b07b9d0-cc0e-4098-9f64-f4a569f7d746",
+            "hub_ids": [
+                "0c5ac7c0-817e-40d4-b1b1-34c3d5cfecdb",
+                "70d1af4a-82c8-478a-8960-e9028b3616ca"
+            ],
+            "replica_type": "donor_organism"
+        },
+        "_type": "_doc"
+    },
+    {
+        "_id": "library_preparation_protocol_9c32cf70-3ed7-4720-badc-5ee71e8a38af_33c8ff808fc57c3dcb4b3872d1dcbc859e45a85a",
+        "_index": "azul_v2_dummy_test_replica",
+        "_score": 1.0,
+        "_source": {
+            "contents": {
+                "describedBy": "https://schema.humancellatlas.org/type/protocol/sequencing/4.3.3/library_preparation_protocol",
+                "end_bias": "full length",
+                "input_nucleic_acid_molecule": {
+                    "ontology": "OBI:0000869",
+                    "text": "polyA RNA"
+                },
+                "library_construction_approach": {
+                    "ontology": "EFO:0008931",
+                    "ontology_label": "Smart-seq2",
+                    "text": "Smart-seq2"
+                },
+                "library_construction_kit": {
+                    "manufacturer": "Illumina",
+                    "retail_name": "Nextera XT kit"
+                },
+                "nucleic_acid_source": "single cell",
+                "primer": "poly-dT",
+                "protocol_core": {
+                    "protocol_id": "library_preparation_protocol_1"
+                },
+                "provenance": {
+                    "document_id": "9c32cf70-3ed7-4720-badc-5ee71e8a38af",
+                    "submission_date": "2018-11-02T10:05:05.547Z",
+                    "update_date": "2018-11-02T10:05:10.360Z"
+                },
+                "schema_type": "protocol",
+                "strand": "unstranded"
+            },
+            "entity_id": "9c32cf70-3ed7-4720-badc-5ee71e8a38af",
+            "hub_ids": [
+                "0c5ac7c0-817e-40d4-b1b1-34c3d5cfecdb",
+                "70d1af4a-82c8-478a-8960-e9028b3616ca"
+            ],
+            "replica_type": "library_preparation_protocol"
+        },
+        "_type": "_doc"
+    },
+    {
+        "_id": "sequencing_protocol_61e629ed-0135-4492-ac8a-5c4ab3ccca8a_187e49da61e5d3fbf91f3a6cf626bc4ce3c2d2be",
+        "_index": "azul_v2_dummy_test_replica",
+        "_score": 1.0,
+        "_source": {
+            "contents": {
+                "describedBy": "https://schema.humancellatlas.org/type/protocol/sequencing/9.0.3/sequencing_protocol",
+                "instrument_manufacturer_model": {
+                    "ontology": "EFO:0008566",
+                    "ontology_label": "Illumina NextSeq 500",
+                    "text": "Illumina NextSeq 500"
+                },
+                "paired_end": true,
+                "protocol_core": {
+                    "protocol_id": "sequencing_protocol_1"
+                },
+                "provenance": {
+                    "document_id": "61e629ed-0135-4492-ac8a-5c4ab3ccca8a",
+                    "submission_date": "2018-11-02T10:05:05.555Z",
+                    "update_date": "2018-11-02T10:05:10.376Z"
+                },
+                "schema_type": "protocol",
+                "sequencing_approach": {
+                    "ontology": "EFO:0008896",
+                    "ontology_label": "RNA-Seq",
+                    "text": "RNA-Seq"
+                }
+            },
+            "entity_id": "61e629ed-0135-4492-ac8a-5c4ab3ccca8a",
+            "hub_ids": [
+                "0c5ac7c0-817e-40d4-b1b1-34c3d5cfecdb",
+                "70d1af4a-82c8-478a-8960-e9028b3616ca"
+            ],
+            "replica_type": "sequencing_protocol"
+        },
+        "_type": "_doc"
+    },
+    {
+        "_id": "process_771ddaf6-3a4f-4314-97fe-6294ff8e25a4_2e08df0ed1adfe582cd63f4daef287d9a637cdf1",
+        "_index": "azul_v2_dummy_test_replica",
+        "_score": 1.0,
+        "_source": {
+            "contents": {
+                "describedBy": "https://schema.humancellatlas.org/type/process/6.0.2/process",
+                "process_core": {
+                    "process_id": "SRR3562915"
+                },
+                "provenance": {
+                    "document_id": "771ddaf6-3a4f-4314-97fe-6294ff8e25a4",
+                    "submission_date": "2018-11-02T10:06:37.087Z",
+                    "update_date": "2018-11-02T10:13:43.197Z"
+                },
+                "schema_type": "process"
+            },
+            "entity_id": "771ddaf6-3a4f-4314-97fe-6294ff8e25a4",
+            "hub_ids": [
+                "0c5ac7c0-817e-40d4-b1b1-34c3d5cfecdb",
+                "70d1af4a-82c8-478a-8960-e9028b3616ca"
+            ],
+            "replica_type": "process"
+        },
+        "_type": "_doc"
     }
 ]
diff --git a/test/indexer/test_anvil.py b/test/indexer/test_anvil.py
index bf0ddbed18..3ad0d3b719 100644
--- a/test/indexer/test_anvil.py
+++ b/test/indexer/test_anvil.py
@@ -204,6 +204,8 @@ def test_dataset_description(self):
         bundles = [self.primary_bundle(), self.duos_bundle()]
         for bundle_fqid in bundles:
             bundle = cast(TDRAnvilBundle, self._load_canned_bundle(bundle_fqid))
+            # To simplify the test, we drop all entities from the bundles
+            # except for the dataset
             bundle.links.clear()
             bundle.entities = {dataset_ref: bundle.entities[dataset_ref]}
             self._index_bundle(bundle, delete=False)
@@ -231,5 +233,7 @@ def test_dataset_description(self):
         self.assertDictEqual(doc_counts, {
             DocumentType.aggregate: 1,
             DocumentType.contribution: 2,
-            **({DocumentType.replica: 2} if config.enable_replicas else {})
+            # No replica is emitted for the primary dataset because we dropped
+            # the files (hubs) from its bundle above
+            **({DocumentType.replica: 1} if config.enable_replicas else {})
         })
diff --git a/test/indexer/test_indexer.py b/test/indexer/test_indexer.py
index 34c395d77b..2fbf8206ba 100644
--- a/test/indexer/test_indexer.py
+++ b/test/indexer/test_indexer.py
@@ -132,28 +132,12 @@ def new_bundle(self):
     def metadata_plugin(self) -> MetadataPlugin:
         return MetadataPlugin.load(self.catalog).create()
 
-    def _num_replicas(self, *, num_additions: int, num_dups: int = 0) -> int:
-        """
-        The number of replicas the index is expected to contain.
-
-        :param num_additions: Number of addition contributions written to the
-                              indices
-
-        :param num_dups: How many of those contributions had identical contents
-                         with another contribution
-        """
-        if config.enable_replicas:
-            assert num_dups <= num_additions
-            return num_additions - num_dups
-        else:
-            return 0
-
     def _assert_hit_counts(self,
                            hits: list[JSON],
                            num_contribs: int,
+                           num_replicas: int,
                            *,
                            num_aggs: Optional[int] = None,
-                           num_replicas: Optional[int] = None
                            ):
         """
         Verify that the indices contain the correct number of hits of each
@@ -163,22 +147,19 @@ def _assert_hit_counts(self,
 
         :param num_contribs: Expected number of contributions
 
+        :param num_replicas: Expected number of replicas, assuming replicas are
+                             enabled.
+
         :param num_aggs: Expected number of aggregates. If unspecified,
                          ``num_contribs`` becomes the default.
-
-        :param num_replicas: Expected number of replicas. If unspecified,
-                             ``num_contribs`` is used to calculate it, assuming
-                             all contributions have distinct contents.
         """
         if num_aggs is None:
             # By default, assume 1 aggregate per contribution.
             num_aggs = num_contribs
-        if num_replicas is None:
-            num_replicas = self._num_replicas(num_additions=num_contribs)
         expected = {
             DocumentType.contribution: num_contribs,
             DocumentType.aggregate: num_aggs,
-            DocumentType.replica: num_replicas
+            DocumentType.replica: num_replicas if config.enable_replicas else 0,
         }
         actual = dict.fromkeys(expected.keys(), 0)
         actual |= Counter(self._parse_index_name(h)[1] for h in hits)
@@ -235,13 +216,13 @@ def test_indexing(self):
                                     if self._parse_index_name(h)[1] is not DocumentType.replica
                                 ]
                             self.assertElasticEqual(expected_hits, hits)
-                            contributions = []
-                            replicas = []
+                            contributions = set()
+                            replicas = set()
                             for hit in hits:
                                 qualifier, doc_type = self._parse_index_name(hit)
                                 if doc_type is DocumentType.replica:
                                     entity_id = ReplicaCoordinates.from_hit(hit).entity.entity_id
-                                    replicas.append(entity_id)
+                                    replicas.add(entity_id)
                                     if entity_id == bundle.uuid:
                                         expected = bundle.links
                                     else:
@@ -257,11 +238,15 @@ def test_indexing(self):
                                     self.assertEqual(expected, actual)
                                 elif doc_type is DocumentType.contribution:
                                     entity_id = ContributionCoordinates.from_hit(hit).entity.entity_id
-                                    contributions.append(entity_id)
-                            contributions.sort()
-                            replicas.sort()
-                            # Every contribution should be replicated
-                            self.assertEqual(contributions if enable_replicas else [], replicas)
+                                    contributions.add(entity_id)
+                            # Every contribution should be replicated, but many
+                            # replicated entities never manifest as outer
+                            # entities in the index and thus have no
+                            # corresponding contribution.
+                            if enable_replicas:
+                                self.assertIsSubset(contributions, replicas)
+                            else:
+                                self.assertEqual(set(), replicas)
                         finally:
                             self.index_service.delete_indices(self.catalog)
 
@@ -271,21 +256,21 @@ def test_deletion(self):
         """
         # Ensure that we have a bundle whose documents are written individually
         # and another one that's written in bulk.
-        bundle_sizes = {
-            self.new_bundle: 6,
-            self.bundle_fqid(uuid='2a87dc5c-0c3c-4d91-a348-5d784ab48b92',
-                             version='2018-03-29T10:39:45.437487Z'): 258
-        }
+        bundle_sizes = [
+            (self.new_bundle, 6, 10),
+            (self.bundle_fqid(uuid='2a87dc5c-0c3c-4d91-a348-5d784ab48b92',
+                              version='2018-03-29T10:39:45.437487Z'), 258, 263)
+        ]
         self.assertTrue(
-            min(bundle_sizes.values())
+            bundle_sizes[0][1]
             < IndexWriter.bulk_threshold
-            < max(bundle_sizes.values())
+            < bundle_sizes[1][1]
         )
 
         field_types = self.index_service.catalogued_field_types()
         aggregate_cls = self.metadata_plugin.aggregate_class()
-        for bundle_fqid, size in bundle_sizes.items():
-            with self.subTest(size=size):
+        for bundle_fqid, num_contribs, num_replicas in bundle_sizes:
+            with self.subTest(num_contribs=num_contribs):
                 bundle = self._load_canned_bundle(bundle_fqid)
                 bundle = DSSBundle(fqid=bundle_fqid,
                                    manifest=bundle.manifest,
@@ -295,7 +280,7 @@ def test_deletion(self):
                 try:
                     self._index_bundle(bundle)
                     hits = self._get_all_hits()
-                    self._assert_hit_counts(hits, size)
+                    self._assert_hit_counts(hits, num_contribs, num_replicas)
                     for hit in hits:
                         qualifier, doc_type = self._parse_index_name(hit)
                         if doc_type is DocumentType.aggregate:
@@ -315,12 +300,11 @@ def test_deletion(self):
                     hits = self._get_all_hits()
                     # Twice the number of contributions because deletions create
                     # new documents instead of removing them.
-                    num_contribs = size * 2
+                    num_contribs += num_contribs
                     # The aggregates are removed when the deletions cause their
                     # contents to become emtpy.
                     num_aggs = 0
                     # Deletions do not affect the number of replicas.
-                    num_replicas = self._num_replicas(num_additions=size)
                     self._assert_hit_counts(hits,
                                             num_contribs=num_contribs,
                                             num_aggs=num_aggs,
@@ -396,10 +380,10 @@ def test_duplicate_notification(self):
         entity = next(e for e in tallies_1.keys() if e.entity_type != 'bundles')
         tallies_1.pop(entity)
 
-        entity_contents = one(
-            metadata
+        replica_ref, entity_contents = one(
+            (parsed_ref, metadata)
             for ref, metadata in bundle.metadata.items()
-            if EntityReference.parse(ref).entity_id == entity.entity_id
+            if (parsed_ref := EntityReference.parse(ref)).entity_id == entity.entity_id
         )
         coordinates = [
             ContributionCoordinates(
@@ -411,7 +395,7 @@ def test_duplicate_notification(self):
         if config.enable_replicas:
             coordinates.append(
                 ReplicaCoordinates(
-                    entity=entity,
+                    entity=replica_ref,
                     content_hash=json_hash(entity_contents).hexdigest()
                 ).with_catalog(self.catalog)
             )
@@ -483,9 +467,9 @@ def test_deletion_before_addition(self):
 
     def _assert_index_counts(self, *, just_deletion: bool):
         # Two files, a project, a cell suspension, a sample, and a bundle
-        num_entities = 6
-        num_addition_contribs = 0 if just_deletion else num_entities
-        num_deletion_contribs = num_entities
+        num_old_contribs = 6
+        num_addition_contribs = 0 if just_deletion else num_old_contribs
+        num_deletion_contribs = num_old_contribs
 
         hits = self._get_all_hits()
 
@@ -500,7 +484,7 @@ def _assert_index_counts(self, *, just_deletion: bool):
         num_contribs = num_addition_contribs + num_deletion_contribs
         # These deletion markers do not affect the number of replicas because we
         # don't support deleting replicas.
-        num_replicas = self._num_replicas(num_additions=num_addition_contribs)
+        num_replicas = 0 if just_deletion else 10
         self._assert_hit_counts(hits,
                                 num_contribs=num_contribs,
                                 num_aggs=0,
@@ -1135,7 +1119,9 @@ def test_derived_files(self):
         hits = self._get_all_hits()
         num_files = 33
         num_expected = dict(files=num_files, samples=1, cell_suspensions=1, projects=1, bundles=1)
-        self._assert_hit_counts(hits, sum(num_expected.values()))
+        num_contribs = sum(num_expected.values())
+        num_replicas = 41
+        self._assert_hit_counts(hits, num_contribs, num_replicas)
         num_contribs, num_aggregates = Counter(), Counter()
         for hit in hits:
             qualifier, doc_type = self._parse_index_name(hit)
@@ -1202,37 +1188,27 @@ def _assert_old_bundle(self, *, expect_new_version: bool | None) -> HitsById:
         :return: A dictionary with all hits from the old bundle
         """
         # Two files, a project, a cell suspension, a sample, and a bundle
-        num_entities = 6
-
-        # Expect a replica for each entity in the old version
-        num_additions = num_entities
-        if expect_new_version is True:
-            # Expect an updated replica for each entity in the new version
-            num_additions += num_entities
-        elif expect_new_version is None:
-            # Even after the new version is deleted, the updated replicas
-            # remain, since deletion of replicas is not supported
-            num_additions += num_entities
-        # New and old replicas for `links.json` are identical
-        num_dups = 0 if expect_new_version is False else 1
-        num_replicas = self._num_replicas(num_additions=num_additions,
-                                          num_dups=num_dups)
-
+        num_new_contribs = 6
+        num_contribs = num_new_contribs
         # Expect the old version's contributions
-        num_contribs = num_entities
-        if expect_new_version is True:
-            # Expect the new version's contributions
-            num_contribs += num_entities
-        elif expect_new_version is None:
-            # Expect the new version's contributions …
-            num_contribs += num_entities
-            # as well as deletion markers for them
-            num_contribs += num_entities
+        num_aggs = num_new_contribs
+        # Expect a replica for each entity in the old version
+        num_replicas = 10
+        if expect_new_version in (True, None):
+            # New and old replicas for `links.json` are identical
+            num_replicas += num_replicas - 1
+            # Expect the new version's contributions...
+            num_contribs += num_new_contribs
+            if expect_new_version is None:
+                # ....as well as deletion markers for them
+                num_contribs += num_new_contribs
+                # Even after the new version is deleted, the updated replicas
+                # remain, since deletion of replicas is not supported
 
         hits = self._get_all_hits()
         self._assert_hit_counts(hits,
                                 num_contribs=num_contribs,
-                                num_aggs=num_entities,
+                                num_aggs=num_aggs,
                                 num_replicas=num_replicas)
 
         num_actual_new_contributions = 0
@@ -1272,9 +1248,9 @@ def _assert_old_bundle(self, *, expect_new_version: bool | None) -> HitsById:
 
         # We count the deleted contributions here too since they should have a
         # corresponding addition contribution
-        self.assertEqual(0 if expect_new_version is False else num_entities,
+        self.assertEqual(0 if expect_new_version is False else num_new_contribs,
                          num_actual_new_contributions)
-        self.assertEqual(num_entities if expect_new_version is None else 0,
+        self.assertEqual(num_new_contribs if expect_new_version is None else 0,
                          num_actual_new_deleted_contributions)
 
         return hits_by_id
@@ -1297,23 +1273,21 @@ def _assert_new_bundle(self,
             self.assertTrue(expect_old_version)
 
         # Two files, a project, a cell suspension, a sample, and a bundle
-        num_entities = 6
+        num_old_contribs = 6
+        num_contribs = num_old_contribs
+        num_aggs = num_old_contribs
+        num_replicas = 10
 
-        # Expect an updaded replica for each entity in the new version
-        num_contribs = num_entities
         if expect_old_version:
-            # Expect a replica for each entity in the old version
-            num_contribs += num_entities
-
-        # New and old replicas for `links.json` are identical
-        num_dups = 1 if expect_old_version else 0
-        num_replicas = self._num_replicas(num_additions=num_contribs,
-                                          num_dups=num_dups)
+            # Expect an updated contribution for each entity in the old version
+            num_contribs += num_old_contribs
+            # New and old replicas for `links.json` are identical
+            num_replicas += num_replicas - 1
 
         hits = self._get_all_hits()
         self._assert_hit_counts(hits,
                                 num_contribs=num_contribs,
-                                num_aggs=num_entities,
+                                num_aggs=num_aggs,
                                 num_replicas=num_replicas)
 
         num_actual_old_contributions = 0
@@ -1360,7 +1334,7 @@ def _assert_new_bundle(self,
                     elif doc_type is DocumentType.contribution:
                         self.assertNotIn('Farmers Trucks', [c.get('institution') for c in project['contributors']])
 
-        self.assertEqual(num_entities if expect_old_version else 0,
+        self.assertEqual(num_old_contribs if expect_old_version else 0,
                          num_actual_old_contributions)
 
     def _extract_bundle_version(self, doc_type: DocumentType, source: JSON) -> str:
@@ -1409,17 +1383,16 @@ def mocked_mget(self, body, _source_includes):
         hits = self._get_all_hits()
         file_uuids = set()
         # Two files, a project, a cell suspension, a sample, and a bundle
-        num_entities = 6
+        num_contribs = 6
         # Two bundles
-        num_contribs = num_entities * 2
+        num_contribs = num_contribs * 2
         # Both bundles share the same sample and the project, so they get
         # aggregated only once
         num_aggs = num_contribs - 2
         # The sample contributions from each bundle are identical and yield a
         # single replica, but the project contributions have different schema
         # versions and thus yield two.
-        num_replicas = self._num_replicas(num_additions=num_contribs,
-                                          num_dups=1)
+        num_replicas = 14
         self._assert_hit_counts(hits,
                                 num_contribs=num_contribs,
                                 num_aggs=num_aggs,
@@ -1501,7 +1474,9 @@ def test_indexing_matrix_related_files(self):
                 self.assertNotIn('!', related_file['name'])
 
     def test_indexing_with_skipped_matrix_file(self):
-        # FIXME: Remove once https://github.com/HumanCellAtlas/metadata-schema/issues/579 is resolved
+        # zarray files no longer exist in DCP2. This test covers behavior that
+        # may no longer be needed to support them, but we don't want to risk
+        # removing it.
         bundle_fqid = self.bundle_fqid(uuid='587d74b4-1075-4bbf-b96a-4d1ede0481b2',
                                        version='2018-10-10T02:23:43.182000Z')
         self._index_canned_bundle(bundle_fqid)
@@ -1519,8 +1494,6 @@ def test_indexing_with_skipped_matrix_file(self):
                     aggregate_file_names.add(one(files)['name'])
                 else:
                     for file in files:
-                        # FIXME: need for one() is odd, file_format is a group field
-                        #        https://github.com/DataBiosphere/azul/issues/612
                         if qualifier == 'bundles':
                             if file['file_format'] == 'matrix':
                                 entities_with_matrix_files.add(hit['_source']['entity_id'])
@@ -2169,7 +2142,7 @@ def test_mapper_parsing(self):
                     self._index_canned_bundle(bundle_fqid)
 
                 hits = self._get_all_hits()
-                self._assert_hit_counts(hits, 21)
+                self._assert_hit_counts(hits, num_contribs=21, num_replicas=28)
 
     def test_disallow_manifest_column_joiner(self):
         bundle_fqid = self.bundle_fqid(uuid='1b6d8348-d6e9-406a-aa6a-7ee886e52bf9',
diff --git a/test/integration_test.py b/test/integration_test.py
index 9623692a3f..ee66f595c2 100644
--- a/test/integration_test.py
+++ b/test/integration_test.py
@@ -104,6 +104,9 @@
     AzulClient,
     AzulClientNotificationError,
 )
+from azul.chalice import (
+    AzulChaliceApp,
+)
 from azul.drs import (
     AccessMethod,
 )
@@ -2042,11 +2045,6 @@ def test_response_security_headers(self):
             '/oauth2_redirect': {'Cache-Control': 'no-store'},
             '/health/basic': {'Cache-Control': 'no-store'}
         }
-        global_headers = {
-            'Strict-Transport-Security': 'max-age=31536000; includeSubDomains',
-            'X-Content-Type-Options': 'nosniff',
-            'X-Frame-Options': 'DENY',
-        }
         for endpoint in (config.service_endpoint, config.indexer_endpoint):
             for path, expected_headers in test_cases.items():
                 with self.subTest(endpoint=endpoint, path=path):
@@ -2055,5 +2053,17 @@ def test_response_security_headers(self):
                     else:
                         response = requests.get(str(endpoint / path))
                         response.raise_for_status()
-                        expected = expected_headers | global_headers
+                        expected = AzulChaliceApp.security_headers | expected_headers
+                        # FIXME: Add a CSP header with a nonce value to text/html responses
+                        #        https://github.com/DataBiosphere/azul-private/issues/6
+                        if path in ['/', '/oauth2_redirect']:
+                            del expected['Content-Security-Policy']
                         self.assertIsSubset(expected.items(), response.headers.items())
+
+    def test_default_4xx_response_headers(self):
+        for endpoint in (config.service_endpoint, config.indexer_endpoint):
+            with self.subTest(endpoint=endpoint):
+                response = requests.get(str(endpoint / 'does-not-exist'))
+                self.assertEqual(403, response.status_code)
+                self.assertIsSubset(AzulChaliceApp.security_headers.items(),
+                                    response.headers.items())
diff --git a/test/service/data/verbatim/hca/pfb_entities.json b/test/service/data/verbatim/hca/pfb_entities.json
index cdb1266adb..7b586923c1 100644
--- a/test/service/data/verbatim/hca/pfb_entities.json
+++ b/test/service/data/verbatim/hca/pfb_entities.json
@@ -12,6 +12,20 @@
                     "properties": [],
                     "values": {}
                 },
+                {
+                    "links": [],
+                    "name": "donor_organism",
+                    "ontology_reference": "",
+                    "properties": [],
+                    "values": {}
+                },
+                {
+                    "links": [],
+                    "name": "library_preparation_protocol",
+                    "ontology_reference": "",
+                    "properties": [],
+                    "values": {}
+                },
                 {
                     "links": [],
                     "name": "links",
@@ -19,6 +33,13 @@
                     "properties": [],
                     "values": {}
                 },
+                {
+                    "links": [],
+                    "name": "process",
+                    "ontology_reference": "",
+                    "properties": [],
+                    "values": {}
+                },
                 {
                     "links": [],
                     "name": "project",
@@ -33,6 +54,13 @@
                     "properties": [],
                     "values": {}
                 },
+                {
+                    "links": [],
+                    "name": "sequencing_protocol",
+                    "ontology_reference": "",
+                    "properties": [],
+                    "values": {}
+                },
                 {
                     "links": [],
                     "name": "specimen_from_organism",
@@ -120,6 +148,140 @@
         },
         "relations": []
     },
+    {
+        "id": "61e629ed-0135-4492-ac8a-5c4ab3ccca8a",
+        "name": "sequencing_protocol",
+        "object": {
+            "describedBy": "https://schema.humancellatlas.org/type/protocol/sequencing/9.0.3/sequencing_protocol",
+            "instrument_manufacturer_model": {
+                "ontology": "EFO:0008566",
+                "ontology_label": "Illumina NextSeq 500",
+                "text": "Illumina NextSeq 500"
+            },
+            "paired_end": true,
+            "protocol_core": {
+                "protocol_id": "sequencing_protocol_1"
+            },
+            "provenance": {
+                "document_id": "61e629ed-0135-4492-ac8a-5c4ab3ccca8a",
+                "submission_date": "2018-11-02T10:05:05.555Z",
+                "update_date": "2018-11-02T10:05:10.376Z"
+            },
+            "schema_type": "protocol",
+            "sequencing_approach": {
+                "ontology": "EFO:0008896",
+                "ontology_label": "RNA-Seq",
+                "text": "RNA-Seq"
+            }
+        },
+        "relations": []
+    },
+    {
+        "id": "771ddaf6-3a4f-4314-97fe-6294ff8e25a4",
+        "name": "process",
+        "object": {
+            "describedBy": "https://schema.humancellatlas.org/type/process/6.0.2/process",
+            "process_core": {
+                "process_id": "SRR3562915"
+            },
+            "provenance": {
+                "document_id": "771ddaf6-3a4f-4314-97fe-6294ff8e25a4",
+                "submission_date": "2018-11-02T10:06:37.087Z",
+                "update_date": "2018-11-02T10:13:43.197Z"
+            },
+            "schema_type": "process"
+        },
+        "relations": []
+    },
+    {
+        "id": "7b07b9d0-cc0e-4098-9f64-f4a569f7d746",
+        "name": "donor_organism",
+        "object": {
+            "biomaterial_core": {
+                "biomaterial_id": "DID_scRSq06",
+                "ncbi_taxon_id": [
+                    9606
+                ]
+            },
+            "death": {
+                "cause_of_death": "stroke"
+            },
+            "describedBy": "https://schema.humancellatlas.org/type/biomaterial/10.1.2/donor_organism",
+            "diseases": [
+                {
+                    "ontology": "PATO:0000461",
+                    "ontology_label": "normal",
+                    "text": "normal"
+                }
+            ],
+            "genus_species": [
+                {
+                    "ontology": "NCBITaxon:9606",
+                    "ontology_label": "Australopithecus",
+                    "text": "Australopithecus"
+                }
+            ],
+            "human_specific": {
+                "body_mass_index": 29.5,
+                "ethnicity": [
+                    {
+                        "ontology": "hancestro:0005",
+                        "ontology_label": "European",
+                        "text": "European"
+                    }
+                ]
+            },
+            "is_living": "no",
+            "organism_age": "38",
+            "organism_age_unit": {
+                "ontology": "UO:0000036",
+                "ontology_label": "year",
+                "text": "year"
+            },
+            "provenance": {
+                "document_id": "7b07b9d0-cc0e-4098-9f64-f4a569f7d746",
+                "submission_date": "2018-11-02T10:02:12.191Z",
+                "update_date": "2018-11-02T10:07:39.622Z"
+            },
+            "schema_type": "biomaterial",
+            "sex": "female"
+        },
+        "relations": []
+    },
+    {
+        "id": "9c32cf70-3ed7-4720-badc-5ee71e8a38af",
+        "name": "library_preparation_protocol",
+        "object": {
+            "describedBy": "https://schema.humancellatlas.org/type/protocol/sequencing/4.3.3/library_preparation_protocol",
+            "end_bias": "full length",
+            "input_nucleic_acid_molecule": {
+                "ontology": "OBI:0000869",
+                "text": "polyA RNA"
+            },
+            "library_construction_approach": {
+                "ontology": "EFO:0008931",
+                "ontology_label": "Smart-seq2",
+                "text": "Smart-seq2"
+            },
+            "library_construction_kit": {
+                "manufacturer": "Illumina",
+                "retail_name": "Nextera XT kit"
+            },
+            "nucleic_acid_source": "single cell",
+            "primer": "poly-dT",
+            "protocol_core": {
+                "protocol_id": "library_preparation_protocol_1"
+            },
+            "provenance": {
+                "document_id": "9c32cf70-3ed7-4720-badc-5ee71e8a38af",
+                "submission_date": "2018-11-02T10:05:05.547Z",
+                "update_date": "2018-11-02T10:05:10.360Z"
+            },
+            "schema_type": "protocol",
+            "strand": "unstranded"
+        },
+        "relations": []
+    },
     {
         "id": "a21dc760-a500-4236-bcff-da34a0e873d2",
         "name": "specimen_from_organism",
diff --git a/test/service/data/verbatim/hca/pfb_schema.json b/test/service/data/verbatim/hca/pfb_schema.json
index af50aa32f7..9a7a044fc4 100644
--- a/test/service/data/verbatim/hca/pfb_schema.json
+++ b/test/service/data/verbatim/hca/pfb_schema.json
@@ -222,6 +222,365 @@
                     "name": "cell_suspension",
                     "type": "record"
                 },
+                {
+                    "fields": [
+                        {
+                            "name": "biomaterial_core",
+                            "namespace": "donor_organism",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "biomaterial_id",
+                                        "namespace": "donor_organism.biomaterial_core",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "ncbi_taxon_id",
+                                        "namespace": "donor_organism.biomaterial_core",
+                                        "type": {
+                                            "items": "long",
+                                            "type": "array"
+                                        }
+                                    }
+                                ],
+                                "name": "donor_organism.biomaterial_core",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "death",
+                            "namespace": "donor_organism",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "cause_of_death",
+                                        "namespace": "donor_organism.death",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "donor_organism.death",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "describedBy",
+                            "namespace": "donor_organism",
+                            "type": "string"
+                        },
+                        {
+                            "name": "diseases",
+                            "namespace": "donor_organism",
+                            "type": {
+                                "items": {
+                                    "fields": [
+                                        {
+                                            "name": "ontology",
+                                            "namespace": "donor_organism.diseases",
+                                            "type": "string"
+                                        },
+                                        {
+                                            "name": "ontology_label",
+                                            "namespace": "donor_organism.diseases",
+                                            "type": "string"
+                                        },
+                                        {
+                                            "name": "text",
+                                            "namespace": "donor_organism.diseases",
+                                            "type": "string"
+                                        }
+                                    ],
+                                    "name": "donor_organism.diseases",
+                                    "type": "record"
+                                },
+                                "type": "array"
+                            }
+                        },
+                        {
+                            "name": "genus_species",
+                            "namespace": "donor_organism",
+                            "type": {
+                                "items": {
+                                    "fields": [
+                                        {
+                                            "name": "ontology",
+                                            "namespace": "donor_organism.genus_species",
+                                            "type": "string"
+                                        },
+                                        {
+                                            "name": "ontology_label",
+                                            "namespace": "donor_organism.genus_species",
+                                            "type": "string"
+                                        },
+                                        {
+                                            "name": "text",
+                                            "namespace": "donor_organism.genus_species",
+                                            "type": "string"
+                                        }
+                                    ],
+                                    "name": "donor_organism.genus_species",
+                                    "type": "record"
+                                },
+                                "type": "array"
+                            }
+                        },
+                        {
+                            "name": "human_specific",
+                            "namespace": "donor_organism",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "body_mass_index",
+                                        "namespace": "donor_organism.human_specific",
+                                        "type": "double"
+                                    },
+                                    {
+                                        "name": "ethnicity",
+                                        "namespace": "donor_organism.human_specific",
+                                        "type": {
+                                            "items": {
+                                                "fields": [
+                                                    {
+                                                        "name": "ontology",
+                                                        "namespace": "donor_organism.human_specific.ethnicity",
+                                                        "type": "string"
+                                                    },
+                                                    {
+                                                        "name": "ontology_label",
+                                                        "namespace": "donor_organism.human_specific.ethnicity",
+                                                        "type": "string"
+                                                    },
+                                                    {
+                                                        "name": "text",
+                                                        "namespace": "donor_organism.human_specific.ethnicity",
+                                                        "type": "string"
+                                                    }
+                                                ],
+                                                "name": "donor_organism.human_specific.ethnicity",
+                                                "type": "record"
+                                            },
+                                            "type": "array"
+                                        }
+                                    }
+                                ],
+                                "name": "donor_organism.human_specific",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "is_living",
+                            "namespace": "donor_organism",
+                            "type": "string"
+                        },
+                        {
+                            "name": "organism_age",
+                            "namespace": "donor_organism",
+                            "type": "string"
+                        },
+                        {
+                            "name": "organism_age_unit",
+                            "namespace": "donor_organism",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "ontology",
+                                        "namespace": "donor_organism.organism_age_unit",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "ontology_label",
+                                        "namespace": "donor_organism.organism_age_unit",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "text",
+                                        "namespace": "donor_organism.organism_age_unit",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "donor_organism.organism_age_unit",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "provenance",
+                            "namespace": "donor_organism",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "document_id",
+                                        "namespace": "donor_organism.provenance",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "submission_date",
+                                        "namespace": "donor_organism.provenance",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "update_date",
+                                        "namespace": "donor_organism.provenance",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "donor_organism.provenance",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "schema_type",
+                            "namespace": "donor_organism",
+                            "type": "string"
+                        },
+                        {
+                            "name": "sex",
+                            "namespace": "donor_organism",
+                            "type": "string"
+                        }
+                    ],
+                    "name": "donor_organism",
+                    "type": "record"
+                },
+                {
+                    "fields": [
+                        {
+                            "name": "describedBy",
+                            "namespace": "library_preparation_protocol",
+                            "type": "string"
+                        },
+                        {
+                            "name": "end_bias",
+                            "namespace": "library_preparation_protocol",
+                            "type": "string"
+                        },
+                        {
+                            "name": "input_nucleic_acid_molecule",
+                            "namespace": "library_preparation_protocol",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "ontology",
+                                        "namespace": "library_preparation_protocol.input_nucleic_acid_molecule",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "text",
+                                        "namespace": "library_preparation_protocol.input_nucleic_acid_molecule",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "library_preparation_protocol.input_nucleic_acid_molecule",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "library_construction_approach",
+                            "namespace": "library_preparation_protocol",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "ontology",
+                                        "namespace": "library_preparation_protocol.library_construction_approach",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "ontology_label",
+                                        "namespace": "library_preparation_protocol.library_construction_approach",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "text",
+                                        "namespace": "library_preparation_protocol.library_construction_approach",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "library_preparation_protocol.library_construction_approach",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "library_construction_kit",
+                            "namespace": "library_preparation_protocol",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "manufacturer",
+                                        "namespace": "library_preparation_protocol.library_construction_kit",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "retail_name",
+                                        "namespace": "library_preparation_protocol.library_construction_kit",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "library_preparation_protocol.library_construction_kit",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "nucleic_acid_source",
+                            "namespace": "library_preparation_protocol",
+                            "type": "string"
+                        },
+                        {
+                            "name": "primer",
+                            "namespace": "library_preparation_protocol",
+                            "type": "string"
+                        },
+                        {
+                            "name": "protocol_core",
+                            "namespace": "library_preparation_protocol",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "protocol_id",
+                                        "namespace": "library_preparation_protocol.protocol_core",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "library_preparation_protocol.protocol_core",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "provenance",
+                            "namespace": "library_preparation_protocol",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "document_id",
+                                        "namespace": "library_preparation_protocol.provenance",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "submission_date",
+                                        "namespace": "library_preparation_protocol.provenance",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "update_date",
+                                        "namespace": "library_preparation_protocol.provenance",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "library_preparation_protocol.provenance",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "schema_type",
+                            "namespace": "library_preparation_protocol",
+                            "type": "string"
+                        },
+                        {
+                            "name": "strand",
+                            "namespace": "library_preparation_protocol",
+                            "type": "string"
+                        }
+                    ],
+                    "name": "library_preparation_protocol",
+                    "type": "record"
+                },
                 {
                     "fields": [
                         {
@@ -310,6 +669,62 @@
                     "name": "links",
                     "type": "record"
                 },
+                {
+                    "fields": [
+                        {
+                            "name": "describedBy",
+                            "namespace": "process",
+                            "type": "string"
+                        },
+                        {
+                            "name": "process_core",
+                            "namespace": "process",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "process_id",
+                                        "namespace": "process.process_core",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "process.process_core",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "provenance",
+                            "namespace": "process",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "document_id",
+                                        "namespace": "process.provenance",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "submission_date",
+                                        "namespace": "process.provenance",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "update_date",
+                                        "namespace": "process.provenance",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "process.provenance",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "schema_type",
+                            "namespace": "process",
+                            "type": "string"
+                        }
+                    ],
+                    "name": "process",
+                    "type": "record"
+                },
                 {
                     "fields": [
                         {
@@ -592,6 +1007,117 @@
                     "name": "sequence_file",
                     "type": "record"
                 },
+                {
+                    "fields": [
+                        {
+                            "name": "describedBy",
+                            "namespace": "sequencing_protocol",
+                            "type": "string"
+                        },
+                        {
+                            "name": "instrument_manufacturer_model",
+                            "namespace": "sequencing_protocol",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "ontology",
+                                        "namespace": "sequencing_protocol.instrument_manufacturer_model",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "ontology_label",
+                                        "namespace": "sequencing_protocol.instrument_manufacturer_model",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "text",
+                                        "namespace": "sequencing_protocol.instrument_manufacturer_model",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "sequencing_protocol.instrument_manufacturer_model",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "paired_end",
+                            "namespace": "sequencing_protocol",
+                            "type": "boolean"
+                        },
+                        {
+                            "name": "protocol_core",
+                            "namespace": "sequencing_protocol",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "protocol_id",
+                                        "namespace": "sequencing_protocol.protocol_core",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "sequencing_protocol.protocol_core",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "provenance",
+                            "namespace": "sequencing_protocol",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "document_id",
+                                        "namespace": "sequencing_protocol.provenance",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "submission_date",
+                                        "namespace": "sequencing_protocol.provenance",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "update_date",
+                                        "namespace": "sequencing_protocol.provenance",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "sequencing_protocol.provenance",
+                                "type": "record"
+                            }
+                        },
+                        {
+                            "name": "schema_type",
+                            "namespace": "sequencing_protocol",
+                            "type": "string"
+                        },
+                        {
+                            "name": "sequencing_approach",
+                            "namespace": "sequencing_protocol",
+                            "type": {
+                                "fields": [
+                                    {
+                                        "name": "ontology",
+                                        "namespace": "sequencing_protocol.sequencing_approach",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "ontology_label",
+                                        "namespace": "sequencing_protocol.sequencing_approach",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "text",
+                                        "namespace": "sequencing_protocol.sequencing_approach",
+                                        "type": "string"
+                                    }
+                                ],
+                                "name": "sequencing_protocol.sequencing_approach",
+                                "type": "record"
+                            }
+                        }
+                    ],
+                    "name": "sequencing_protocol",
+                    "type": "record"
+                },
                 {
                     "fields": [
                         {
diff --git a/test/service/test_app_logging.py b/test/service/test_app_logging.py
index 2d4251f561..9585bde07f 100644
--- a/test/service/test_app_logging.py
+++ b/test/service/test_app_logging.py
@@ -15,6 +15,9 @@
 from azul.logging import (
     configure_test_logging,
 )
+from azul.strings import (
+    single_quote as sq,
+)
 from indexer import (
     DCP1CannedBundleTestCase,
 )
@@ -66,9 +69,12 @@ def test_request_logs(self):
                             'Returning 200 response with headers {"Access-Control-Allow-Origin": '
                             '"*", "Access-Control-Allow-Headers": '
                             '"Authorization,Content-Type,X-Amz-Date,X-Amz-Security-Token,X-Api-Key", '
-                            '"Strict-Transport-Security": "max-age=31536000; includeSubDomains", '
+                            f'"Content-Security-Policy": "default-src {sq("self")}", '
+                            '"Referrer-Policy": "strict-origin-when-cross-origin", '
+                            '"Strict-Transport-Security": "max-age=63072000; includeSubDomains; preload", '
                             '"X-Content-Type-Options": "nosniff", '
                             '"X-Frame-Options": "DENY", '
+                            '"X-XSS-Protection": "1; mode=block", '
                             '"Cache-Control": "no-store"}. '
                             'See next line for the first 1024 characters of the body.\n'
                             '{"up": true}'
diff --git a/test/service/test_manifest.py b/test/service/test_manifest.py
index 4513bbbe75..92d0b5546f 100644
--- a/test/service/test_manifest.py
+++ b/test/service/test_manifest.py
@@ -106,6 +106,9 @@
 from azul.service.storage_service import (
     StorageService,
 )
+from azul.strings import (
+    single_quote as sq,
+)
 from azul.types import (
     JSON,
     JSONs,
@@ -1346,7 +1349,11 @@ def test_verbatim_jsonl_manifest(self):
                 'project/e8642221-4c2c-4fd7-b926-a68bce363c88',
                 'sequence_file/70d1af4a-82c8-478a-8960-e9028b3616ca',
                 'sequence_file/0c5ac7c0-817e-40d4-b1b1-34c3d5cfecdb',
-                'specimen_from_organism/a21dc760-a500-4236-bcff-da34a0e873d2'
+                'specimen_from_organism/a21dc760-a500-4236-bcff-da34a0e873d2',
+                'donor_organism/7b07b9d0-cc0e-4098-9f64-f4a569f7d746',
+                'library_preparation_protocol/9c32cf70-3ed7-4720-badc-5ee71e8a38af',
+                'sequencing_protocol/61e629ed-0135-4492-ac8a-5c4ab3ccca8a',
+                'process/771ddaf6-3a4f-4314-97fe-6294ff8e25a4'
             ]:
                 expected.append({
                     'type': EntityReference.parse(ref).entity_type,
@@ -1611,7 +1618,7 @@ def test(*, format: ManifestFormat, fetch: bool, url: Optional[furl] = None):
                 expected_url_for_bash = expected_url
             else:
                 expected_url = object_url
-                expected_url_for_bash = f"'{expected_url}'"
+                expected_url_for_bash = sq(str(expected_url))
             if format is ManifestFormat.curl:
                 manifest_options = '--location --fail'
                 file_options = '--fail-early --continue-at - --retry 15 --retry-delay 10'
diff --git a/test/test_app_logging.py b/test/test_app_logging.py
index 6efda81bf5..2372707577 100644
--- a/test/test_app_logging.py
+++ b/test/test_app_logging.py
@@ -31,6 +31,9 @@
     azul_log_level,
     configure_test_logging,
 )
+from azul.strings import (
+    single_quote as sq,
+)
 from azul_test_case import (
     AlwaysTearDownTestCase,
     AzulUnitTestCase,
@@ -105,9 +108,12 @@ def fail():
                         self.assertEqual(
                             azul_log.output[2],
                             'DEBUG:azul.chalice:Returning 500 response with headers {"Content-Type": "text/plain", '
-                            '"Strict-Transport-Security": "max-age=31536000; includeSubDomains", '
+                            f'"Content-Security-Policy": "default-src {sq("self")}", '
+                            '"Referrer-Policy": "strict-origin-when-cross-origin", '
+                            '"Strict-Transport-Security": "max-age=63072000; includeSubDomains; preload", '
                             '"X-Content-Type-Options": "nosniff", '
                             '"X-Frame-Options": "DENY", '
+                            '"X-XSS-Protection": "1; mode=block", '
                             '"Cache-Control": "no-store"}. '
                             'See next line for the first 1024 characters of the body.\n' + response)
                     else:
diff --git a/test/test_log_forwarding.py b/test/test_log_forwarding.py
index 519fb06144..9994cee56d 100644
--- a/test/test_log_forwarding.py
+++ b/test/test_log_forwarding.py
@@ -23,6 +23,9 @@
 from azul.indexer.log_forwarding_controller import (
     LogForwardingController,
 )
+from azul.strings import (
+    double_quote as dq,
+)
 from azul.types import (
     JSONs,
 )
@@ -77,23 +80,23 @@ def test_alb(self):
                     '0.000', '0.002', '0.000',
                     '204', '204',
                     '963', '229',
-                    '"POST '
-                    f'https://gitlab.dev.singlecell.gi.ucsc.edu:443/api/v4/jobs/request?chars={escaped} '
-                    'HTTP/1.1"',
-                    f'"gitlab-runner 15.6.1 (15-6-stable; go1.18.8; linux/amd64; {escaped})"',
+                    dq('POST',
+                       f'https://gitlab.dev.singlecell.gi.ucsc.edu:443/api/v4/jobs/request?chars={escaped}',
+                       'HTTP/1.1'),
+                    dq(f'gitlab-runner 15.6.1 (15-6-stable; go1.18.8; linux/amd64; {escaped})'),
                     'ECDHE-RSA-AES128-GCM-SHA256',
                     'TLSv1.2',
                     'arn:aws:elasticloadbalancing:us-east-1:122796619775:targetgroup/azul-gitlab-http/136c2d6db59941f6',
-                    '"Root=1-63b0cbd4-7d218b82786295005dbf8b6d"',
-                    '"gitlab.dev.singlecell.gi.ucsc.edu"',
-                    '"arn:aws:acm:us-east-1:122796619775:certificate/81241b8e-c875-4a22-a30e-58003ee139ae"',
+                    dq('Root=1-63b0cbd4-7d218b82786295005dbf8b6d'),
+                    dq('gitlab.dev.singlecell.gi.ucsc.edu'),
+                    dq('arn:aws:acm:us-east-1:122796619775:certificate/81241b8e-c875-4a22-a30e-58003ee139ae'),
                     '0',
                     '2022-12-31T23:55:00.386000Z',
-                    '"forward"',
-                    '"-"', '"-"',
-                    '"172.71.0.215:80"',
-                    '"204"',
-                    '"-"', '"-"',
+                    dq('forward'),
+                    dq('-'), dq('-'),
+                    dq('172.71.0.215:80'),
+                    dq('204'),
+                    dq('-'), dq('-'),
                 ])]
                 expected_output = [{
                     '_source_bucket': self.log_bucket,
@@ -142,16 +145,16 @@ def test_s3(self):
             'K829N8AH88F1RX7K',
             'REST.PUT.OBJECT',
             'health/service',
-            '"PUT /edu-ucsc-gi-platform-anvil-dev-storage-anvilbox.us-east-1/health/service HTTP/1.1"',
+            dq('PUT /edu-ucsc-gi-platform-anvil-dev-storage-anvilbox.us-east-1/health/service HTTP/1.1'),
             '200',
             '-',
             '-',
             '523',
             '85',
             '52',
-            '"-"',
-            '"Boto3/1.24.94 Python/3.11.5 Linux/4.14.255-301-238.520.amzn2.x86_64 '
-            + 'exec-env/AWS_Lambda_python3.11 aws-chalice/1.30.0 Botocore/1.27.94"',
+            dq('-'),
+            dq('Boto3/1.24.94 Python/3.11.5 Linux/4.14.255-301-238.520.amzn2.x86_64',
+               'exec-env/AWS_Lambda_python3.11 aws-chalice/1.30.0 Botocore/1.27.94'),
             '-',
             'jcmyLMRqqJ7dT4ovtY21rtgwmuTC3qs24vgAtLAkcad9sRV92zC90gf2zGvCkxxsLSaKm48AMjo=',
             'SigV4',