Skip to content

Commit

Permalink
[r] fixup! Fix: No replicas for donors in HCA (#6582)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Sep 28, 2024
1 parent 7d00561 commit e035899
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 21 deletions.
10 changes: 8 additions & 2 deletions src/azul/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@

import attr

from azul.collections import (
alist,
)
from azul.indexer import (
Bundle,
BundleFQID,
Expand Down Expand Up @@ -127,7 +130,8 @@ def _contribution(self,

def _replica(self,
entity: EntityReference,
hub_ids: list[EntityID]
*,
file_hub: EntityID | None,
) -> Replica:
replica_type, contents = self._replicate(entity)
coordinates = ReplicaCoordinates(content_hash=json_hash(contents).hexdigest(),
Expand All @@ -136,7 +140,9 @@ def _replica(self,
version=None,
replica_type=replica_type,
contents=contents,
hub_ids=hub_ids)
# The other hubs will be added when the indexer
# consolidates duplicate replicas.
hub_ids=alist(file_hub))

@classmethod
@abstractmethod
Expand Down
20 changes: 11 additions & 9 deletions src/azul/plugins/metadata/anvil/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ def _transform(self,
) -> Iterable[Contribution | Replica]:
yield from super()._transform(entity)
if self._is_duos(entity):
yield self._replica(entity, [])
yield self._replica(entity, file_hub=None)


class DonorTransformer(BaseTransformer):
Expand Down Expand Up @@ -617,12 +617,14 @@ def _transform(self,
)
yield self._contribution(contents, entity.entity_id)
if config.enable_replicas:
# The other hubs will be added when the indexer consolidates duplicate replicas.
yield self._replica(entity, [entity.entity_id])
yield self._replica(entity, file_hub=entity.entity_id)
for linked_entity in linked:
# Datasets are linked to every file in their snapshot, making an explicit list
# of hub IDs for the dataset both redundant and impractically large. Therefore,
# we leave the hub IDs field empty for datasets and rely on the tenet that every
# file is an implicit hub of its parent dataset.
yield self._replica(linked_entity,
hub_ids=[] if linked_entity.entity_type == 'dataset' else [entity.entity_id])
yield self._replica(
linked_entity,
# Datasets are linked to every file in their snapshot,
# making an explicit list of hub IDs for the dataset both
# redundant and impractically large. Therefore, we leave the
# hub IDs field empty for datasets and rely on the tenet
# that every file is an implicit hub of its parent dataset.
file_hub=None if linked_entity.entity_type == 'dataset' else entity.entity_id,
)
18 changes: 8 additions & 10 deletions src/azul/plugins/metadata/hca/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1472,17 +1472,15 @@ def _transform(self,
file_id = file.ref.entity_id
yield self._contribution(contents, file_id)
if config.enable_replicas:
yield self._replica(self.api_bundle.ref,
# The other hubs will be added when the indexer consolidates duplicate replicas.
[file_id])
yield self._replica(one(self.api_bundle.projects.values()).ref,
# Projects are linked to every file in their snapshot, making an explicit list
# of hub IDs for the project both redundant and impractically large. Therefore,
# we leave the hub IDs field empty for projects and rely on the tenet that every
# file is an implicit hub of its parent project.
[])
yield self._replica(self.api_bundle.ref, file_hub=file_id)
# Projects are linked to every file in their snapshot,
# making an explicit list of hub IDs for the project both
# redundant and impractically large. Therefore, we leave the
# hub IDs field empty for projects and rely on the tenet
# that every file is an implicit hub of its parent project.
yield self._replica(self._api_project.ref, file_hub=None)
for linked_entity in visitor.entities:
yield self._replica(linked_entity, [file_id])
yield self._replica(linked_entity, file_hub=file_id)

def matrix_stratification_values(self, file: api.File) -> JSON:
"""
Expand Down

0 comments on commit e035899

Please sign in to comment.