Skip to content

Commit

Permalink
[r] Support for AnVIL duos_id (#6620)
Browse files Browse the repository at this point in the history
  • Loading branch information
dsotirho-ucsc committed Nov 21, 2024
1 parent 93637ff commit 9032559
Show file tree
Hide file tree
Showing 9 changed files with 33 additions and 3 deletions.
5 changes: 5 additions & 0 deletions src/azul/plugins/metadata/anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'registered_identifier',
'title',
'data_modality',
'duos_id',
]
},
'donors': {
Expand Down Expand Up @@ -351,6 +352,10 @@ def verbatim_pfb_schema(self,
is_polymorphic=is_duos_type)
]
if is_duos_type:
field_schemas.append(self._pfb_schema_from_anvil_column(table_name=table_name,
column_name='duos_id',
anvil_datatype='string',
is_polymorphic=True))
field_schemas.append(self._pfb_schema_from_anvil_column(table_name=table_name,
column_name='description',
anvil_datatype='string',
Expand Down
1 change: 1 addition & 0 deletions src/azul/plugins/metadata/anvil/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,7 @@ def _duos_types(cls) -> FieldTypes:
return {
'document_id': null_str,
'description': null_str,
'duos_id': null_str,
}

def _duos(self, dataset: EntityReference) -> MutableJSON:
Expand Down
1 change: 1 addition & 0 deletions src/azul/plugins/metadata/anvil/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def _non_pivotal_fields_by_entity_type(self) -> dict[str, set[str]]:
},
'datasets': {
'dataset_id',
'duos_id',
'title'
},
'diagnoses': {
Expand Down
4 changes: 3 additions & 1 deletion src/azul/plugins/repository/tdr_anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,14 +457,16 @@ def _supplementary_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBund
def _duos_bundle(self, bundle_fqid: TDRAnvilBundleFQID) -> TDRAnvilBundle:
assert not bundle_fqid.is_batched, bundle_fqid
duos_info = self.tdr.get_duos(bundle_fqid.source)
duos_id = None if duos_info is None else one(duos_info['consentGroups'])['datasetIdentifier']
description = None if duos_info is None else duos_info.get('studyDescription')
ref, row = self._get_dataset(bundle_fqid.source.spec)
expected_entity_id = change_version(bundle_fqid.uuid,
self.bundle_uuid_version,
self.datarepo_row_uuid_version)
assert ref.entity_id == expected_entity_id, (ref, bundle_fqid)
bundle = TDRAnvilBundle(fqid=bundle_fqid)
bundle.add_entity(ref, self._version, {'description': description})
entity_row = {'duos_id': duos_id, 'description': description}
bundle.add_entity(ref, self._version, entity_row)
# Classify as orphan to suppress the emission of a contribution
bundle.add_entity(ref, self._version, dict(row), is_orphan=True)
return bundle
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions test/indexer/test_anvil.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def setUpClass(cls) -> None:

mock_duos_url = furl('https:://mock_duos.lan')

duos_id = 'foo'
duos_id = 'DUOS-000000'
duos_description = 'Study description from DUOS'

@classmethod
Expand All @@ -93,6 +93,9 @@ def _patch_duos(cls) -> None:
}
})),
Mock(spec=HTTPResponse, status=200, data=json.dumps({
'consentGroups': [{
'datasetIdentifier': cls.duos_id
}],
'studyDescription': cls.duos_description
}))
]))
Expand Down Expand Up @@ -251,8 +254,9 @@ def test_dataset_description(self):
# These fields are populated only in the primary bundle
self.assertEqual(dataset_ref.entity_id, contents['document_id'])
self.assertEqual(['phs000693'], contents['registered_identifier'])
# This field is populated only in the DUOS bundle
# These fields are populated only in the DUOS bundle
self.assertEqual('Study description from DUOS', contents['description'])
self.assertEqual('DUOS-000000', contents['duos_id'])
else:
self.fail(qualifier)
self.assertDictEqual(doc_counts, {
Expand Down
2 changes: 2 additions & 0 deletions test/service/data/verbatim/anvil/pfb_entities.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
"datarepo_row_id": null,
"dataset_id": null,
"description": "Study description from DUOS",
"duos_id": "DUOS-000000",
"owner": null,
"principal_investigator": null,
"registered_identifier": null,
Expand Down Expand Up @@ -265,6 +266,7 @@
"datarepo_row_id": "2370f948-2783-4eb6-afea-e022897f4dcf",
"dataset_id": "52ee7665-7033-63f2-a8d9-ce8e32666739",
"description": null,
"duos_id": null,
"owner": [
"Debbie Nickerson"
],
Expand Down
8 changes: 8 additions & 0 deletions test/service/data/verbatim/anvil/pfb_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,14 @@
"string"
]
},
{
"name": "duos_id",
"namespace": "anvil_dataset",
"type": [
"null",
"string"
]
},
{
"name": "owner",
"namespace": "anvil_dataset",
Expand Down
6 changes: 6 additions & 0 deletions test/service/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1801,6 +1801,12 @@ def test_compact_manifest(self):
'',
''
),
(
'datasets.duos_id',
'',
'',
'',
),
(
'donors.document_id',
'',
Expand Down

0 comments on commit 9032559

Please sign in to comment.