From f90c836ec5aa6b4bb63eaa61f9a0f7995c17340d Mon Sep 17 00:00:00 2001 From: Oliver Schwengers Date: Tue, 16 Jul 2024 15:52:10 +0200 Subject: [PATCH] Fix CRISPR parser #299 (#302) * fix CRISPR parser #299 The assumed minimal length of CRISPR spacers was reduced from at least 10 down to at least 1 in the regex. * polish code * relaxe CRISPR spacer length regex to 1 or 2 digits --- bakta/features/crispr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bakta/features/crispr.py b/bakta/features/crispr.py index 17440b29..b15818f2 100644 --- a/bakta/features/crispr.py +++ b/bakta/features/crispr.py @@ -11,7 +11,7 @@ import bakta.utils as bu -RE_CRISPR = re.compile(r'(\d{1,8})\s+(\d{2})\s+(\d{1,3}\.\d)\s+(?:(\d{2})\s+)?([ATGCN]+)?\s+([ATGCN\.-]+)\s*(?:([ATGCN]+))?') +RE_CRISPR = re.compile(r'(\d{1,8})\s+(\d{2})\s+(\d{1,3}\.\d)\s+(?:(\d{1,2})\s+)?([ATGCN]+)?\s+([ATGCN\.-]+)\s*(?:([ATGCN]+))?') log = logging.getLogger('CRISPR') @@ -98,7 +98,7 @@ def predict_crispr(genome: dict, contigs_path: Path): spacer_length = len(spacer_seq) crispr_spacer = OrderedDict() crispr_spacer['strand'] = bc.STRAND_UNKNOWN - crispr_spacer['start'] = position + repeat_length - gap_count + crispr_spacer['start'] = position + repeat_length - gap_count crispr_spacer['stop'] = position + repeat_length + spacer_length - 1 - gap_count crispr_spacer['sequence'] = spacer_seq crispr_array['spacers'].append(crispr_spacer)