Skip to content

Commit

Permalink
Add new study groups for human data split from Illumina runs
Browse files Browse the repository at this point in the history
In Illumina sequencing runs of non-human DNA, assign any contamination identified as being human DNA to a special iRODS group named after the original study i.e. `ss_1000` -> `ss_1000_human`
  • Loading branch information
zb32 authored Jan 25, 2024
1 parent e0a3fd4 commit 2d4413c
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 11 deletions.
6 changes: 3 additions & 3 deletions src/npg_irods/illumina.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright © 2023 Genome Research Ltd. All rights reserved.
# Copyright © 2023, 2024 Genome Research Ltd. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -234,7 +234,7 @@ def empty_acl(*args):
for fc in flowcells:
secondary_metadata.extend(sample_fn(fc.sample))
secondary_metadata.extend(study_fn(fc.study))
acl.extend(acl_fn(fc.sample, fc.study, zone=zone))
acl.extend(acl_fn(fc.sample, fc.study, subset=c.subset, zone=zone))

# Remove duplicates
secondary_metadata = sorted(set(secondary_metadata))
Expand All @@ -248,7 +248,7 @@ def empty_acl(*args):
cons_update = ensure_consent_withdrawn(item)
elif any(c.contains_nonconsented_human() for c in components): # Illumina specific
log.info("Non-consented human data", path=item)
xahu_update = ensure_consent_withdrawn(item)
xahu_update = update_permissions(item, acl)
else:
perm_update = update_permissions(item, acl)

Expand Down
16 changes: 13 additions & 3 deletions src/npg_irods/metadata/lims.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright © 2021, 2022, 2023 Genome Research Ltd. All rights reserved.
# Copyright © 2021, 2022, 2023, 2024 Genome Research Ltd. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -38,6 +38,7 @@
from npg_irods.db.mlwh import Sample, Study
from npg_irods.metadata.common import (
SeqConcept,
SeqSubset,
ensure_avus_present,
avu_if_value,
)
Expand Down Expand Up @@ -168,7 +169,9 @@ def make_reduced_study_metadata(study: Study) -> list[AVU]:
return [avu_if_value(TrackedStudy.ID, study.id_study_lims)]


def make_sample_acl(sample: Sample, study: Study, zone=None) -> list[AC]:
def make_sample_acl(
sample: Sample, study: Study, subset: SeqSubset = None, zone=None
) -> list[AC]:
"""Returns an ACL for a given Sample in a Study.
This method takes into account all factors influencing access control, which are:
Expand All @@ -187,12 +190,19 @@ def make_sample_acl(sample: Sample, study: Study, zone=None) -> list[AC]:
sample: A sample, which will be used to confirm consent, which modifies the
ACL.
study: A study, which will provide permissions for the ACL.
subset: Subset of sequence reads.
zone: The iRODS zone.
Returns:
An ACL
"""
irods_group = f"{STUDY_IDENTIFIER_PREFIX}{study.id_study_lims}"
if subset is not None and subset is subset.XAHUMAN:
return []

if subset is not None and subset is subset.HUMAN:
irods_group = f"{STUDY_IDENTIFIER_PREFIX}{study.id_study_lims}_human"
else:
irods_group = f"{STUDY_IDENTIFIER_PREFIX}{study.id_study_lims}"
perm = Permission.NULL if sample.consent_withdrawn else Permission.READ

return [AC(irods_group, perm, zone=zone)]
Expand Down
13 changes: 12 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright © 2020, 2022, 2023 Genome Research Ltd. All rights reserved.
# Copyright © 2020, 2022, 2023, 2024 Genome Research Ltd. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -1029,6 +1029,17 @@ def illumina_synthetic_irods(tmp_path):
*run_pos,
AVU(tag, 1),
),
"12345/12345#1_xahuman.cram": (
AVU(idp, "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
AVU(
cmp, '{"id_run":12345, "position":1, "tag_index":1, "subset":"xahuman"}'
),
AVU(
cmp, '{"id_run":12345, "position":2, "tag_index":1, "subset":"xahuman"}'
),
*run_pos,
AVU(tag, 1),
),
"12345/12345#2.cram": (
AVU(idp, "0b3bd00f1d186247f381aa87e213940b8c7ab7e5"),
AVU(cmp, '{"id_run":12345, "position":1, "tag_index":2}'),
Expand Down
9 changes: 6 additions & 3 deletions tests/test_illumina.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright © 2023 Genome Research Ltd. All rights reserved.
# Copyright © 2023, 2024 Genome Research Ltd. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -412,7 +412,7 @@ def test_updates_changed_study_permissions(

@m.context("When data are multiplexed")
@m.context("When data contain a human subset")
@m.it("Removes managed access permissions")
@m.it("Updates managed access permissions to restricted human access group")
def test_updates_human_permissions_mx(
self, illumina_synthetic_irods, illumina_synthetic_mlwh
):
Expand All @@ -425,7 +425,10 @@ def test_updates_human_permissions_mx(
AC("irods", perm=Permission.OWN, zone=zone),
AC("ss_4000", perm=Permission.READ, zone=zone),
]
new_permissions = [AC("irods", perm=Permission.OWN, zone=zone)]
new_permissions = [
AC("irods", perm=Permission.OWN, zone=zone),
AC("ss_4000_human", perm=Permission.READ, zone=zone),
]

for obj in [DataObject(path), DataObject(qc_path)]:
obj.add_permissions(*old_permissions)
Expand Down
1 change: 1 addition & 0 deletions tests/test_locate_data_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def test_illumina_updates(
"12345#1.cram",
"12345#1_human.cram",
"12345#1_phix.cram",
"12345#1_xahuman.cram",
"12345#2.cram",
"12345#888.cram",
"12345.cram",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_pacbio.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright © 2023 Genome Research Ltd. All rights reserved.
# Copyright © 2023, 2024 Genome Research Ltd. All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down

0 comments on commit 2d4413c

Please sign in to comment.