From c67cfb9233ec7fcae0cf3181b9d61cb6e5a525bb Mon Sep 17 00:00:00 2001 From: kedhammar Date: Fri, 14 Jun 2024 13:40:51 +0000 Subject: [PATCH] make names more intuitive --- anglerfish/anglerfish.py | 8 ++++--- anglerfish/demux/demux.py | 22 +++++++++---------- anglerfish/explore/explore.py | 4 ++-- .../test_anglerfish/test_demux/test_demux.py | 6 ++--- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/anglerfish/anglerfish.py b/anglerfish/anglerfish.py index 4b3450d..1243a07 100755 --- a/anglerfish/anglerfish.py +++ b/anglerfish/anglerfish.py @@ -18,7 +18,7 @@ Alignment, categorize_matches, cluster_matches, - parse_reads_alns_from_paf, + map_reads_to_alns, run_minimap2, write_demuxedfastq, ) @@ -139,12 +139,14 @@ def run_demux(args): for i in f: num_fq += 1 num_fq = int(num_fq / 4) - reads_alns: dict[str, list[Alignment]] = parse_reads_alns_from_paf(align_path) + reads_to_alns: dict[str, list[Alignment]] = map_reads_to_alns(align_path) # Make stats log.info(f" Searching for adaptor hits in {adaptor_bc_name}") fragments, singletons, concats, unknowns = categorize_matches( - adaptor_name + "_i5", adaptor_name + "_i7", reads_alns + i5_name=f"{adaptor_name}_i5", + i7_name=f"{adaptor_name}_i7", + reads_to_alns=reads_to_alns, ) stats = AlignmentStat(adaptor_bc_name) stats.compute_pafstats(num_fq, fragments, singletons, concats, unknowns) diff --git a/anglerfish/demux/demux.py b/anglerfish/demux/demux.py index eeb7424..989c3e7 100644 --- a/anglerfish/demux/demux.py +++ b/anglerfish/demux/demux.py @@ -99,7 +99,7 @@ def __init__(self, paf_line: str): self.sample: str | None = None -def parse_reads_alns_from_paf( +def map_reads_to_alns( paf_path: str, min_qual: int = 1, complex_identifier: bool = False ) -> dict[str, list[Alignment]]: """ @@ -107,7 +107,7 @@ def parse_reads_alns_from_paf( Outputs: - reads_alns = { + reads_to_alns = { "read1": [ aln_read1_adaptor1_i5, aln_read1_adaptor1_i7, @@ -124,12 +124,12 @@ def parse_reads_alns_from_paf( } complex_identifier = False (default) - --> The keys will be on the form "{read}". + --> The keys will be on the form "{read_name}". complex_identifier = True - --> The keys will be on the form "{read}_{i5_or_i7}_{strand_str}". + --> The keys will be on the form "{read_name}_{i5_or_i7}_{strand_str}". """ - reads_alns: dict = {} + reads_to_alns: dict = {} with open(paf_path) as paf: for paf_line in paf: try: @@ -153,16 +153,16 @@ def parse_reads_alns_from_paf( log.debug(f"Low quality alignment: {aln.read_name}") continue - if key in reads_alns.keys(): - reads_alns[key].append(aln) + if key in reads_to_alns.keys(): + reads_to_alns[key].append(aln) else: - reads_alns[key] = [aln] + reads_to_alns[key] = [aln] - return reads_alns + return reads_to_alns def categorize_matches( - i5_name: str, i7_name: str, reads_alns: dict[str, list[Alignment]] + i5_name: str, i7_name: str, reads_to_alns: dict[str, list[Alignment]] ) -> tuple[ dict[str, list[Alignment]], dict[str, list[Alignment]], @@ -183,7 +183,7 @@ def categorize_matches( singletons = {} concats = {} unknowns = {} - for read, alns in reads_alns.items(): + for read, alns in reads_to_alns.items(): sorted_alns = [] for i in range(len(alns) - 1): aln_i: Alignment = alns[i] diff --git a/anglerfish/explore/explore.py b/anglerfish/explore/explore.py index 99f3e2c..e39396a 100644 --- a/anglerfish/explore/explore.py +++ b/anglerfish/explore/explore.py @@ -6,7 +6,7 @@ import pandas as pd from anglerfish.demux.adaptor import Adaptor, load_adaptors -from anglerfish.demux.demux import Alignment, parse_reads_alns_from_paf, run_minimap2 +from anglerfish.demux.demux import Alignment, map_reads_to_alns, run_minimap2 from anglerfish.explore.entropy import calculate_relative_entropy logging.basicConfig(level=logging.INFO) @@ -72,7 +72,7 @@ def run_explore( adaptors_included = [] for adaptor, aln_path in adaptors_and_aln_paths: log.info(f"Parsing {adaptor.name}") - reads_alns: dict[str, list[Alignment]] = parse_reads_alns_from_paf( + reads_alns: dict[str, list[Alignment]] = map_reads_to_alns( aln_path, complex_identifier=True ) diff --git a/tests/test_anglerfish/test_demux/test_demux.py b/tests/test_anglerfish/test_demux/test_demux.py index feb8d6b..60669ac 100644 --- a/tests/test_anglerfish/test_demux/test_demux.py +++ b/tests/test_anglerfish/test_demux/test_demux.py @@ -112,7 +112,7 @@ def test_run_minimap2(fixture): def test_parse_alns_from_path(fixture): - reads_alns = to_test.parse_reads_alns_from_paf(fixture["paf_single"]) + reads_alns = to_test.map_reads_to_alns(fixture["paf_single"]) for read_name, alns in reads_alns.items(): assert read_name == "0ad8bdb6-e009-43c5-95b1-d381e699f983" @@ -165,10 +165,10 @@ def test_parse_cs(fixture): def test_layout_matches(fixture): i5_name = "truseq_i5" i7_name = "truseq_i7" - reads_alns = to_test.parse_reads_alns_from_paf(fixture["paf_multiple"]) + reads_alns = to_test.map_reads_to_alns(fixture["paf_multiple"]) layout = to_test.categorize_matches( - i5_name=i5_name, i7_name=i7_name, reads_alns=reads_alns + i5_name=i5_name, i7_name=i7_name, reads_to_alns=reads_alns ) fragments, singletons, concats, unknowns = layout