From 9308f2c29178c501d6c5a76cf862484b8a88122d Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Thu, 25 Jan 2024 16:08:02 +0100 Subject: [PATCH 01/10] First UMI support experiment --- anglerfish/config/adaptors.yaml | 23 ++++++++------ anglerfish/demux/demux.py | 21 ++++++++++--- anglerfish/demux/samplesheet.py | 53 ++++++++++++++++++++++++++++----- 3 files changed, 77 insertions(+), 20 deletions(-) diff --git a/anglerfish/config/adaptors.yaml b/anglerfish/config/adaptors.yaml index e7a1380..510b3e9 100644 --- a/anglerfish/config/adaptors.yaml +++ b/anglerfish/config/adaptors.yaml @@ -1,24 +1,29 @@ # More adaptors can be added manually, following the format below. -# The position of an index within an adaptor is represented by the delimiter "-NNN-". +# The position of an index within an adaptor is represented by the delimiter "". +# The position and length of the UMI within an adaptor is represented by the delimiter "" where # is the length of the UMI. # The indexes themselves are represented in the sample sheet. # Ilumina unique dual indexes, see https://web.archive.org/web/20231129095351/https://support-docs.illumina.com/SHARE/AdapterSequences/Content/SHARE/AdapterSeq/Illumina_DNA/IlluminaUDIndexes.htm illumina_ud: - i5: AATGATACGGCGACCACCGAGATCTACAC-NNN-TCGTCGGCAGCGTC - i7: CAAGCAGAAGACGGCATACGAGAT-NNN-GTCTCGTGGGCTCGG + i5: AATGATACGGCGACCACCGAGATCTACACTCGTCGGCAGCGTC + i7: CAAGCAGAAGACGGCATACGAGATGTCTCGTGGGCTCGG truseq: i5: AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT - i7: GATCGGAAGAGCACACGTCTGAACTCCAGTCAC-NNN-ATCTCGTATGCCGTCTTCTGCTTG + i7: GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCTCGTATGCCGTCTTCTGCTTG truseq_dual: - i5: AATGATACGGCGACCACCGAGATCTACAC-NNN-ACACTCTTTCCCTACACGACGCTCTTCCGATCT - i7: GATCGGAAGAGCACACGTCTGAACTCCAGTCAC-NNN-ATCTCGTATGCCGTCTTCTGCTTG + i5: AATGATACGGCGACCACCGAGATCTACACACACTCTTTCCCTACACGACGCTCTTCCGATCT + i7: GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCTCGTATGCCGTCTTCTGCTTG + +truseq_umi: + i5: AATGATACGGCGACCACCGAGATCTACACACACTCTTTCCCTACACGACGCTCTTCCGATCT + i7: GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCTCGTATGCCGTCTTCTGCTTG nextera_legacy: i5: AATGATACGGCGACCACCGAGATCTACACGCCTCCCTCGCGCCATCAG - i7: CAAGCAGAAGACGGCATACGAGAT-NNN-CGGTCTGCCTTGCCAGCCCGCTCAG + i7: CAAGCAGAAGACGGCATACGAGATCGGTCTGCCTTGCCAGCCCGCTCAG nextera_dual: - i5: AATGATACGGCGACCACCGAGATCTACAC-NNN-GTCTCGTGGGCTCGG - i7: CAAGCAGAAGACGGCATACGAGAT-NNN-ATCTCGTATGCCGTCTTCTGCTTG + i5: AATGATACGGCGACCACCGAGATCTACACGTCTCGTGGGCTCGG + i7: CAAGCAGAAGACGGCATACGAGATATCTCGTATGCCGTCTTCTGCTTG diff --git a/anglerfish/demux/demux.py b/anglerfish/demux/demux.py index 444bb01..af01ee5 100644 --- a/anglerfish/demux/demux.py +++ b/anglerfish/demux/demux.py @@ -12,13 +12,16 @@ log = logging.getLogger("anglerfish") -def parse_cs(cs_string, index, max_distance): +def parse_cs(cs_string, index, umi_before=0, umi_after=0): """ Parses the CS string of a paf alignment and matches it to the given index using a max Levenshtein distance - TODO / idea: Do something big-brained with ONT squigglies """ nt = re.compile("\*n([atcg])") nts = "".join(re.findall(nt, cs_string)) + if umi_before > 0: + nts = nts[umi_before:] + if umi_after > 0: + nts = nts[:-umi_after] # Allow for mismatches return nts, lev.distance(index.lower(), nts) @@ -169,14 +172,24 @@ def cluster_matches( i5_seq = adaptor.i5_index if i5_reversed and i5_seq is not None: i5_seq = str(Seq(i5_seq).reverse_complement()) - fi5, d1 = parse_cs(i5["cs"], i5_seq, max_distance) + fi5, d1 = parse_cs( + i5["cs"], + i5_seq, + umi_before=adaptor.i5_umi_before, + umi_after=adaptor.i5_umi_after, + ) except AttributeError: d1 = 0 # presumably it's single index, so no i5 i7_seq = adaptor.i7_index if i7_reversed and i7_seq is not None: i7_seq = str(Seq(i7_seq).reverse_complement()) - fi7, d2 = parse_cs(i7["cs"], i7_seq, max_distance) + fi7, d2 = parse_cs( + i7["cs"], + i7_seq, + umi_before=adaptor.i7_umi_before, + umi_after=adaptor.i7_umi_after, + ) dists.append(d1 + d2) index_min = min(range(len(dists)), key=dists.__getitem__) diff --git a/anglerfish/demux/samplesheet.py b/anglerfish/demux/samplesheet.py index ed0c2d6..4041d18 100644 --- a/anglerfish/demux/samplesheet.py +++ b/anglerfish/demux/samplesheet.py @@ -13,7 +13,10 @@ assert isinstance(p, os.PathLike) with open(p) as stream: adaptors = yaml.safe_load(stream) -delim = "-NNN-" + +idelim = re.compile(r"\") +udelim = re.compile(r"(\)") +ulen = re.compile(r"\") @dataclass @@ -30,22 +33,58 @@ def __init__(self, adaptor, i7_index=None, i5_index=None): self.i7 = adaptors[adaptor]["i7"] self.i5_index = i5_index self.i7_index = i7_index + self.i5_umi = re.findall(udelim, self.i5) + self.i5_umi_before = 0 + self.i5_umi_after = 0 + self.i7_umi = re.findall(udelim, self.i7) + self.i7_umi_before = 0 + self.i7_umi_after = 0 self.name = f"{adaptor}_len{len(i7_index)}" - if delim in self.i5 and i5_index is None: + if len(self.i5_umi) > 1 or len(self.i7_umi) > 1: + raise UserWarning( + f"Adaptor {adaptor} has more than one UMI in either i5 or i7. This is not supported." + ) + # Check if UMI is before or after i5 index + if len(self.i5_umi) > 0 and ">" + self.i5_umi[0] in self.i5: + self.i5_umi_before = int(re.search(ulen, self.i5_umi[0]).group(1)) + elif len(self.i5_umi) > 0 and self.i5_umi[0] + "<" in self.i5: + self.i5_umi_after = int(re.search(ulen, self.i5_umi[0]).group(1)) + elif len(self.i5_umi) > 0: + raise UserWarning( + f"Adaptor {adaptor} has UMI but it does not flank an index. This is not supported." + ) + # Check if UMI is before or after i7 index + if len(self.i7_umi) > 0 and ">" + self.i7_umi[0] in self.i7: + self.i7_umi_before = int(re.search(ulen, self.i7_umi[0]).group(1)) + elif len(self.i7_umi) > 0 and self.i7_umi[0] + "<" in self.i7: + self.i7_umi_after = int(re.search(ulen, self.i7_umi[0]).group(1)) + elif len(self.i7_umi) > 0: + raise UserWarning( + f"Adaptor {adaptor} has UMI but it does not flank an index. This is not supported." + ) + if re.search(idelim, self.i5) is not None and i5_index is None: raise UserWarning("Adaptor has i5 but no sequence was specified") - if delim in self.i7 and i7_index is None: + if re.search(idelim, self.i7) is not None and i7_index is None: raise UserWarning("Adaptor has i7 but no sequence was specified") def get_i5_mask(self): - if delim in self.i5: - return self.i5.replace(delim, "N" * len(self.i5_index)) + if self.i5_index is not None: + new_i5 = re.sub(idelim, "N" * len(self.i5_index), self.i5) + new_i5 = re.sub( + udelim, "N" * max(self.i5_umi_after, self.i5_umi_before), new_i5 + ) + return new_i5 else: return self.i5 def get_i7_mask(self): - if delim in self.i7: - return self.i7.replace(delim, "N" * len(self.i7_index)) + if self.i7_index is not None: + new_i7 = re.sub(idelim, "N" * len(self.i7_index), self.i7) + new_i7 = re.sub( + udelim, "N" * max(self.i7_umi_after, self.i7_umi_before), new_i7 + ) + return new_i7 else: return self.i7 From a3a7cdf3303c9cdd808e6baf460a5b2cd64c76c3 Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Wed, 31 Jan 2024 15:51:09 +0100 Subject: [PATCH 02/10] Fix umi detection --- anglerfish/demux/demux.py | 3 +-- anglerfish/demux/samplesheet.py | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/anglerfish/demux/demux.py b/anglerfish/demux/demux.py index af01ee5..0f33c8c 100644 --- a/anglerfish/demux/demux.py +++ b/anglerfish/demux/demux.py @@ -22,7 +22,6 @@ def parse_cs(cs_string, index, umi_before=0, umi_after=0): nts = nts[umi_before:] if umi_after > 0: nts = nts[:-umi_after] - # Allow for mismatches return nts, lev.distance(index.lower(), nts) @@ -53,7 +52,7 @@ def run_minimap2(fastq_in, indexfile, output_paf, threads): subprocess.run(cmd, stdout=ofile, check=True) -def parse_paf_lines(paf, min_qual=10): +def parse_paf_lines(paf, min_qual=1): """ Read and parse one paf alignment lines. Returns a dict with the import values for later use diff --git a/anglerfish/demux/samplesheet.py b/anglerfish/demux/samplesheet.py index 4041d18..dcc4d37 100644 --- a/anglerfish/demux/samplesheet.py +++ b/anglerfish/demux/samplesheet.py @@ -16,7 +16,7 @@ idelim = re.compile(r"\") udelim = re.compile(r"(\)") -ulen = re.compile(r"\") +ulen = re.compile(r"\") @dataclass @@ -47,18 +47,18 @@ def __init__(self, adaptor, i7_index=None, i5_index=None): ) # Check if UMI is before or after i5 index if len(self.i5_umi) > 0 and ">" + self.i5_umi[0] in self.i5: - self.i5_umi_before = int(re.search(ulen, self.i5_umi[0]).group(1)) - elif len(self.i5_umi) > 0 and self.i5_umi[0] + "<" in self.i5: self.i5_umi_after = int(re.search(ulen, self.i5_umi[0]).group(1)) + elif len(self.i5_umi) > 0 and self.i5_umi[0] + "<" in self.i5: + self.i5_umi_before = int(re.search(ulen, self.i5_umi[0]).group(1)) elif len(self.i5_umi) > 0: raise UserWarning( f"Adaptor {adaptor} has UMI but it does not flank an index. This is not supported." ) # Check if UMI is before or after i7 index if len(self.i7_umi) > 0 and ">" + self.i7_umi[0] in self.i7: - self.i7_umi_before = int(re.search(ulen, self.i7_umi[0]).group(1)) - elif len(self.i7_umi) > 0 and self.i7_umi[0] + "<" in self.i7: self.i7_umi_after = int(re.search(ulen, self.i7_umi[0]).group(1)) + elif len(self.i7_umi) > 0 and self.i7_umi[0] + "<" in self.i7: + self.i7_umi_before = int(re.search(ulen, self.i7_umi[0]).group(1)) elif len(self.i7_umi) > 0: raise UserWarning( f"Adaptor {adaptor} has UMI but it does not flank an index. This is not supported." From cc7fd2c27c605687dc38ab5a281d8fcf048325b6 Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Wed, 31 Jan 2024 16:38:37 +0100 Subject: [PATCH 03/10] minor simplification --- anglerfish/demux/demux.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/anglerfish/demux/demux.py b/anglerfish/demux/demux.py index 0f33c8c..07f4d10 100644 --- a/anglerfish/demux/demux.py +++ b/anglerfish/demux/demux.py @@ -174,8 +174,8 @@ def cluster_matches( fi5, d1 = parse_cs( i5["cs"], i5_seq, - umi_before=adaptor.i5_umi_before, - umi_after=adaptor.i5_umi_after, + adaptor.i5_umi_before, + adaptor.i5_umi_after, ) except AttributeError: d1 = 0 # presumably it's single index, so no i5 @@ -186,8 +186,8 @@ def cluster_matches( fi7, d2 = parse_cs( i7["cs"], i7_seq, - umi_before=adaptor.i7_umi_before, - umi_after=adaptor.i7_umi_after, + adaptor.i7_umi_before, + adaptor.i7_umi_after, ) dists.append(d1 + d2) From 92483fad3951e05ab2a6f459eaa58596dacfe833 Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Wed, 31 Jan 2024 17:24:46 +0100 Subject: [PATCH 04/10] Slowly unbreaking merge --- anglerfish/demux/adaptor.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/anglerfish/demux/adaptor.py b/anglerfish/demux/adaptor.py index 69dc0c5..1a54080 100644 --- a/anglerfish/demux/adaptor.py +++ b/anglerfish/demux/adaptor.py @@ -15,10 +15,10 @@ def __init__(self, adaptors, delim, adaptor, i7_index=None, i5_index=None): self.i7 = AdaptorPart(adaptors[adaptor]["i5"], adaptor, delim, i5_index) self.i5_index = i5_index self.i7_index = i7_index - self.i5_umi = re.findall(udelim, self.i5) + self.i5_umi = re.findall(udelim, self.i5.sequence) self.i5_umi_before = 0 self.i5_umi_after = 0 - self.i7_umi = re.findall(udelim, self.i7) + self.i7_umi = re.findall(udelim, self.i7.sequence) self.i7_umi_before = 0 self.i7_umi_after = 0 self.name = f"{adaptor}" @@ -29,31 +29,31 @@ def __init__(self, adaptors, delim, adaptor, i7_index=None, i5_index=None): f"Adaptor {adaptor} has more than one UMI in either i5 or i7. This is not supported." ) # Check if UMI is before or after i5 index - if len(self.i5_umi) > 0 and ">" + self.i5_umi[0] in self.i5: + if len(self.i5_umi) > 0 and ">" + self.i5_umi[0] in self.i5.sequence: self.i5_umi_after = int(re.search(ulen, self.i5_umi[0]).group(1)) - elif len(self.i5_umi) > 0 and self.i5_umi[0] + "<" in self.i5: + elif len(self.i5_umi) > 0 and self.i5_umi[0] + "<" in self.i5.sequence: self.i5_umi_before = int(re.search(ulen, self.i5_umi[0]).group(1)) elif len(self.i5_umi) > 0: raise UserWarning( f"Adaptor {adaptor} has UMI but it does not flank an index. This is not supported." ) # Check if UMI is before or after i7 index - if len(self.i7_umi) > 0 and ">" + self.i7_umi[0] in self.i7: + if len(self.i7_umi) > 0 and ">" + self.i7_umi[0] in self.i7.sequence: self.i7_umi_after = int(re.search(ulen, self.i7_umi[0]).group(1)) - elif len(self.i7_umi) > 0 and self.i7_umi[0] + "<" in self.i7: + elif len(self.i7_umi) > 0 and self.i7_umi[0] + "<" in self.i7.sequence: self.i7_umi_before = int(re.search(ulen, self.i7_umi[0]).group(1)) elif len(self.i7_umi) > 0: raise UserWarning( f"Adaptor {adaptor} has UMI but it does not flank an index. This is not supported." ) - if re.search(idelim, self.i5) is not None and i5_index is None: + if re.search(idelim, self.i5.sequence) is not None and i5_index is None: raise UserWarning("Adaptor has i5 but no sequence was specified") - if re.search(idelim, self.i7) is not None and i7_index is None: + if re.search(idelim, self.i7.sequence) is not None and i7_index is None: raise UserWarning("Adaptor has i7 but no sequence was specified") def get_i5_mask(self): if self.i5_index is not None: - new_i5 = re.sub(idelim, "N" * len(self.i5_index), self.i5) + new_i5 = re.sub(idelim, "N" * len(self.i5_index), self.i5.sequence) new_i5 = re.sub( udelim, "N" * max(self.i5_umi_after, self.i5_umi_before), new_i5 ) @@ -63,7 +63,7 @@ def get_i5_mask(self): def get_i7_mask(self): if self.i7_index is not None: - new_i7 = re.sub(idelim, "N" * len(self.i7_index), self.i7) + new_i7 = re.sub(idelim, "N" * len(self.i7_index), self.i7.sequence) new_i7 = re.sub( udelim, "N" * max(self.i7_umi_after, self.i7_umi_before), new_i7 ) @@ -111,8 +111,9 @@ def load_adaptors(raw=False): return adaptors_raw adaptors = [] for adaptor in adaptors_raw: + # This is now broken, I think adaptors.append( - Adaptor(adaptors_raw, "-NNN-", adaptor, i7_index=None, i5_index=None) + Adaptor(adaptors_raw, "", adaptor, i7_index=None, i5_index=None) ) return adaptors From 7713574e73810012a4758f3ee094720d9482d7ab Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Thu, 8 Feb 2024 13:33:14 +0100 Subject: [PATCH 05/10] Merge reconciliation part 2 --- anglerfish/demux/adaptor.py | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/anglerfish/demux/adaptor.py b/anglerfish/demux/adaptor.py index 1a54080..ba0f499 100644 --- a/anglerfish/demux/adaptor.py +++ b/anglerfish/demux/adaptor.py @@ -10,9 +10,13 @@ class Adaptor: - def __init__(self, adaptors, delim, adaptor, i7_index=None, i5_index=None): - self.i5 = AdaptorPart(adaptors[adaptor]["i7"], adaptor, delim, i7_index) - self.i7 = AdaptorPart(adaptors[adaptor]["i5"], adaptor, delim, i5_index) + def __init__(self, adaptors, delim, adaptor_type, i7_index=None, i5_index=None): + self.i5 = AdaptorPart( + adaptors[adaptor_type]["i5"], adaptor_type, delim, i5_index + ) + self.i7 = AdaptorPart( + adaptors[adaptor_type]["i7"], adaptor_type, delim, i7_index + ) self.i5_index = i5_index self.i7_index = i7_index self.i5_umi = re.findall(udelim, self.i5.sequence) @@ -21,12 +25,12 @@ def __init__(self, adaptors, delim, adaptor, i7_index=None, i5_index=None): self.i7_umi = re.findall(udelim, self.i7.sequence) self.i7_umi_before = 0 self.i7_umi_after = 0 - self.name = f"{adaptor}" + self.name = f"{adaptor_type}" self.delim = delim if len(self.i5_umi) > 1 or len(self.i7_umi) > 1: raise UserWarning( - f"Adaptor {adaptor} has more than one UMI in either i5 or i7. This is not supported." + f"Adaptor {adaptor_type} has more than one UMI in either i5 or i7. This is not supported." ) # Check if UMI is before or after i5 index if len(self.i5_umi) > 0 and ">" + self.i5_umi[0] in self.i5.sequence: @@ -35,7 +39,7 @@ def __init__(self, adaptors, delim, adaptor, i7_index=None, i5_index=None): self.i5_umi_before = int(re.search(ulen, self.i5_umi[0]).group(1)) elif len(self.i5_umi) > 0: raise UserWarning( - f"Adaptor {adaptor} has UMI but it does not flank an index. This is not supported." + f"Adaptor {adaptor_type} has UMI but it does not flank an index. This is not supported." ) # Check if UMI is before or after i7 index if len(self.i7_umi) > 0 and ">" + self.i7_umi[0] in self.i7.sequence: @@ -44,11 +48,13 @@ def __init__(self, adaptors, delim, adaptor, i7_index=None, i5_index=None): self.i7_umi_before = int(re.search(ulen, self.i7_umi[0]).group(1)) elif len(self.i7_umi) > 0: raise UserWarning( - f"Adaptor {adaptor} has UMI but it does not flank an index. This is not supported." + f"Adaptor {adaptor_type} has UMI but it does not flank an index. This is not supported." ) - if re.search(idelim, self.i5.sequence) is not None and i5_index is None: + + # Test if the index is specified in the adaptor sequence when it shouldn't be + if has_match(idelim, self.i5.sequence) and i5_index is None: raise UserWarning("Adaptor has i5 but no sequence was specified") - if re.search(idelim, self.i7.sequence) is not None and i7_index is None: + if has_match(idelim, self.i7.sequence) and i7_index is None: raise UserWarning("Adaptor has i7 but no sequence was specified") def get_i5_mask(self): @@ -59,7 +65,7 @@ def get_i5_mask(self): ) return new_i5 else: - return self.i5 + return self.i5.sequence def get_i7_mask(self): if self.i7_index is not None: @@ -69,10 +75,11 @@ def get_i7_mask(self): ) return new_i7 else: - return self.i7 + return self.i7.sequence class AdaptorPart: + # This class is used either the i5 or i7 adaptor def __init__(self, sequence, name, delim, index): self.sequence = sequence self.name = name @@ -98,6 +105,14 @@ def get_mask(self, insert_Ns): return self.sequence +# General function to check if a string contains a pattern +def has_match(delim, seq): + match = re.search(delim, seq) + if match is None: + return False + return True + + # Fetch all adaptors def load_adaptors(raw=False): p = importlib.resources.files("anglerfish.config").joinpath("adaptors.yaml") From 51523bd8614914389bbbe881b1945a2c4fa146ae Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Thu, 8 Feb 2024 14:26:49 +0100 Subject: [PATCH 06/10] Added anglerfish-explore endpoint --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 0b50d3f..3cf86dd 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,7 @@ entry_points={ "console_scripts": [ "anglerfish=anglerfish.anglerfish:anglerfish", + "anglerfish-explore=anglerfish.explore.cli:main", ], }, zip_safe=False, From 9f065278a3520a737b00eac36b5df95ea88eb895 Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Thu, 8 Feb 2024 14:28:40 +0100 Subject: [PATCH 07/10] Merge reconciliation part 3 --- anglerfish/demux/adaptor.py | 39 +++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/anglerfish/demux/adaptor.py b/anglerfish/demux/adaptor.py index ba0f499..ea369fb 100644 --- a/anglerfish/demux/adaptor.py +++ b/anglerfish/demux/adaptor.py @@ -51,32 +51,37 @@ def __init__(self, adaptors, delim, adaptor_type, i7_index=None, i5_index=None): f"Adaptor {adaptor_type} has UMI but it does not flank an index. This is not supported." ) + def get_i5_mask(self, insert_Ns=True): + ilen = len(self.i5_index) if self.i5_index is not None and insert_Ns else 0 + ulen = max(self.i5_umi_after, self.i5_umi_before) if insert_Ns else 0 # Test if the index is specified in the adaptor sequence when it shouldn't be - if has_match(idelim, self.i5.sequence) and i5_index is None: + if has_match(idelim, self.i5.sequence) and self.i5_index is None and insert_Ns: raise UserWarning("Adaptor has i5 but no sequence was specified") - if has_match(idelim, self.i7.sequence) and i7_index is None: - raise UserWarning("Adaptor has i7 but no sequence was specified") - - def get_i5_mask(self): - if self.i5_index is not None: - new_i5 = re.sub(idelim, "N" * len(self.i5_index), self.i5.sequence) - new_i5 = re.sub( - udelim, "N" * max(self.i5_umi_after, self.i5_umi_before), new_i5 - ) + if self.i5_index is not None or not insert_Ns: + new_i5 = re.sub(idelim, "N" * ilen, self.i5.sequence) + new_i5 = re.sub(udelim, "N" * ulen, new_i5) return new_i5 else: return self.i5.sequence - def get_i7_mask(self): - if self.i7_index is not None: - new_i7 = re.sub(idelim, "N" * len(self.i7_index), self.i7.sequence) - new_i7 = re.sub( - udelim, "N" * max(self.i7_umi_after, self.i7_umi_before), new_i7 - ) + def get_i7_mask(self, insert_Ns=True): + ilen = len(self.i7_index) if self.i7_index is not None and insert_Ns else 0 + ulen = max(self.i7_umi_after, self.i7_umi_before) if insert_Ns else 0 + # Test if the index is specified in the adaptor sequence when it shouldn't be + if has_match(idelim, self.i7.sequence) and self.i7_index is None and insert_Ns: + raise UserWarning("Adaptor has i7 but no sequence was specified") + if self.i7_index is not None or not insert_Ns: + new_i7 = re.sub(idelim, "N" * ilen, self.i7.sequence) + new_i7 = re.sub(udelim, "N" * ulen, new_i7) return new_i7 else: return self.i7.sequence + def get_fastastring(self, insert_Ns=True): + fasta_i5 = f">{self.name}_i5\n{self.get_i5_mask(insert_Ns)}\n" + fasta_i7 = f">{self.name}_i7\n{self.get_i7_mask(insert_Ns)}\n" + return fasta_i5 + fasta_i7 + class AdaptorPart: # This class is used either the i5 or i7 adaptor @@ -128,7 +133,7 @@ def load_adaptors(raw=False): for adaptor in adaptors_raw: # This is now broken, I think adaptors.append( - Adaptor(adaptors_raw, "", adaptor, i7_index=None, i5_index=None) + Adaptor(adaptors_raw, "N", adaptor, i7_index=None, i5_index=None) ) return adaptors From 17a96672b9f2f00cd6fdd150da99b7f4433cdccc Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Thu, 8 Feb 2024 14:37:31 +0100 Subject: [PATCH 08/10] Minor cleanup of comments --- anglerfish/demux/adaptor.py | 1 - anglerfish/demux/samplesheet.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/anglerfish/demux/adaptor.py b/anglerfish/demux/adaptor.py index ea369fb..69e7ea1 100644 --- a/anglerfish/demux/adaptor.py +++ b/anglerfish/demux/adaptor.py @@ -131,7 +131,6 @@ def load_adaptors(raw=False): return adaptors_raw adaptors = [] for adaptor in adaptors_raw: - # This is now broken, I think adaptors.append( Adaptor(adaptors_raw, "N", adaptor, i7_index=None, i5_index=None) ) diff --git a/anglerfish/demux/samplesheet.py b/anglerfish/demux/samplesheet.py index 60dc324..f01c9cc 100644 --- a/anglerfish/demux/samplesheet.py +++ b/anglerfish/demux/samplesheet.py @@ -12,7 +12,7 @@ udelim = re.compile(r"(\)") ulen = re.compile(r"\") adaptors = load_adaptors(raw=True) -# Holy merge conflict, Batman! Fix this later +# This is some leftover ugliness from a merge conflict to reconcile the old and new adaptor classes delim = "" From a52fd06b12a71e2b864b5f21764a076dbc0ccf52 Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Thu, 8 Feb 2024 14:53:46 +0100 Subject: [PATCH 09/10] Keep devcontainer.json ugly --- .prettierignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .prettierignore diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..73fcfa6 --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +devcontainer.json From 4b027d83658c287b919ed904fc4a205db0e6061a Mon Sep 17 00:00:00 2001 From: Remi-Andre Olsen Date: Fri, 9 Feb 2024 14:01:18 +0100 Subject: [PATCH 10/10] Changed to anglerfish-explore command --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index be02c73..63bd962 100644 --- a/README.md +++ b/README.md @@ -173,14 +173,12 @@ In folder `anglerfish_????_??_??_?????/` ## Anglerfish Explore (Experimental) -`anglerfish explore` is a command that aims to explore a sequencing pool without a given samplesheet and give hints on what adapter types are present, which index lenghts are used and whether there are any UMIs within the index sequence. The Anglerfish explore command is still under heavy development but can be triggered by running: +`anglerfish-explore` is a command that aims to explore a sequencing pool without a given samplesheet and give hints on what adapter types are present, which index lenghts are used and whether there are any UMIs within the index sequence. The Anglerfish explore command is still under heavy development but can be triggered by running, e.g. for help text: ```shell -python anglerfish/explore/cli.py +anglerfish-explore --help ``` -inside the anglerfish directory. - ## Credits The Anglerfish code was written by [@remiolsen](https://github.com/remiolsen) but it would not exist without the contributions of [@FranBonath](https://github.com/FranBonath), [@taborsak](https://github.com/taborsak), [@ssjunnebo](https://github.com/ssjunnebo) and Carl Rubin.