From caed1433dfadc0dbd0a480d8b5aea1062da6c6b6 Mon Sep 17 00:00:00 2001 From: Rasheed El-Bouri Date: Wed, 26 Jan 2022 22:54:38 +0000 Subject: [PATCH] Remora motif expand --- megalodon/backends.py | 11 +++++++---- megalodon/mods.py | 13 ++++++------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/megalodon/backends.py b/megalodon/backends.py index d9e1d02..3cd18a6 100755 --- a/megalodon/backends.py +++ b/megalodon/backends.py @@ -1107,8 +1107,6 @@ def pyguppy_set_model_attributes( ) self.is_flipflop = init_called_read.model_type == FF_GUPPY_NAME self.is_crf = init_called_read.model_type == CRF_GUPPY_NAME - if self.is_crf: - LOGGER.info("CRF models are not fully supported.") self.stride = init_called_read.model_stride if remora_model_filename is not None or remora_model_spec is not None: @@ -1116,10 +1114,16 @@ def pyguppy_set_model_attributes( from remora import model_util if remora_model_filename is not None: + LOGGER.debug( + f"Loading Remora model from file: {remora_model_filename}" + ) _, remora_metadata = model_util.load_model( remora_model_filename, quiet=True ) else: + LOGGER.debug( + f"Loading Remora model from specs: {remora_model_spec}" + ) _, remora_metadata = model_util.load_model( pore=remora_model_spec[0], basecall_model_type=remora_model_spec[1], @@ -1130,8 +1134,7 @@ def pyguppy_set_model_attributes( quiet=True, ) self.ordered_mod_long_names = remora_metadata["mod_long_names"] - motif, motif_offset = remora_metadata["motif"] - can_idx = "ACGT".find(motif[motif_offset]) + 1 + can_idx = "ACGT".find(remora_metadata["can_base"]) + 1 self.output_alphabet = ( "ACGT"[:can_idx] + remora_metadata["mod_bases"] diff --git a/megalodon/mods.py b/megalodon/mods.py index c5022bb..334a8e8 100755 --- a/megalodon/mods.py +++ b/megalodon/mods.py @@ -2298,12 +2298,11 @@ def __init__( self.remora_RemoraRead = data_chunks.RemoraRead self.load_remora_model() self.mod_bases = self.remora_metadata["mod_bases"] - motif, motif_offset = self.remora_metadata["motif"] - can_base = motif[motif_offset] + can_base = self.remora_metadata["can_base"] can_idx = "ACGT".find(can_base) + 1 if can_idx == 0: raise mh.MegaError( - f"Invalid Remora model motif {motif}:{motif_offset}" + f"Invalid Remora model canonical base {can_base}" ) self.alphabet = "ACGT" self.output_alphabet = ( @@ -2322,12 +2321,12 @@ def __init__( for mod_b, mln in self.mod_long_names ) LOGGER.info(f"Loaded Remora model calls modified bases: {mod_str}") - if all_mod_motifs_raw is None: + if all_mod_motifs_raw is not None: LOGGER.info( - f'Setting --mod-motif to "{motif} {motif_offset}" loaded ' - "from Remora model" + "Modified base motifs derived from Remora model. " + "Ignoring --mod-motif." ) - all_mod_motifs_raw = [(self.mod_bases, motif, motif_offset)] + all_mod_motifs_raw = None # only applicable to flip-flop modbases self.can_mods_offsets = None self.str_to_int_mod_labels = None