From 18dcfe34fac99d24907168d01a50b5b13d2f16ef Mon Sep 17 00:00:00 2001 From: CBeelen Date: Fri, 23 Jul 2021 16:36:14 -0700 Subject: [PATCH 01/23] Use Haploflow instead of IVA --- micall/core/denovo.py | 23 ++++++----------------- requirements.txt | 1 + 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index c3ce3c9c7..807c91935 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -20,7 +20,7 @@ from micall.core.project_config import ProjectConfig -IVA = "iva" +HAPLOFLOW = "haploflow" DEFAULT_DATABASE = os.path.join(os.path.dirname(__file__), '..', 'blast_db', @@ -201,26 +201,15 @@ def denovo(fastq1_path: str, '--interleave', '-o', joined_path], check=True) - iva_out_path = os.path.join(tmp_dir, 'iva_out') - contigs_fasta_path = os.path.join(iva_out_path, 'contigs.fasta') - iva_args = [IVA, '--fr', joined_path, '-t', '2'] - if merged_contigs_csv is not None: - seeds_fasta_path = os.path.join(tmp_dir, 'seeds.fasta') - with open(seeds_fasta_path, 'w') as seeds_fasta: - SeqIO.write((SeqRecord(Seq(row['contig']), f'seed-{i}', '', '') - for i, row in enumerate(DictReader(merged_contigs_csv))), - seeds_fasta, - 'fasta') - seeds_size = seeds_fasta.tell() - if seeds_size > 0: - iva_args.extend(['--contigs', seeds_fasta_path, '--make_new_seeds']) - iva_args.append(iva_out_path) + haplo_out_path = os.path.join(tmp_dir, 'haplo_out') + contigs_fasta_path = os.path.join(haplo_out_path, 'contigs.fa') + haplo_args = [HAPLOFLOW, '--read-file', joined_path, '--out', haplo_out_path] try: - run(iva_args, check=True, stdout=PIPE, stderr=STDOUT) + run(haplo_args, check=True, stdout=PIPE, stderr=STDOUT) except CalledProcessError as ex: output = ex.output and ex.output.decode('UTF8') if output != 'Failed to make first seed. Cannot continue\n': - logger.warning('iva failed to assemble.', exc_info=True) + logger.warning('Haploflow failed to assemble.', exc_info=True) logger.warning(output) with open(contigs_fasta_path, 'a'): pass diff --git a/requirements.txt b/requirements.txt index 377d82966..1f8e66cdb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ reportlab==3.5.68 pysam==0.16.0.1 git+https://github.com/cfe-lab/genetracks.git@v0.2.dev0 mappy==2.21 +git+https://github.com/hzi-bifo/Haploflow.git From 8eb81be750effb07a677c4fb29f36c1e356dcfd1 Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 4 Aug 2021 14:58:00 -0700 Subject: [PATCH 02/23] Read in Haploflow's input arguments --- micall/core/denovo.py | 21 +++++++++++++++++++-- micall/drivers/sample.py | 10 ++++++---- micall_docker.py | 39 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 63 insertions(+), 7 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index 807c91935..5e727d01b 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -176,7 +176,8 @@ def denovo(fastq1_path: str, contigs_csv: typing.TextIO, work_dir: str = '.', merged_contigs_csv: typing.TextIO = None, - blast_csv: typing.TextIO = None): + blast_csv: typing.TextIO = None, + haplo_args=None): """ Use de novo assembly to build contigs from reads. :param fastq1_path: FASTQ file name for read 1 reads @@ -201,9 +202,25 @@ def denovo(fastq1_path: str, '--interleave', '-o', joined_path], check=True) + + if haplo_args is None: + haplo_args = {'long': 0, + 'filter': 500, + 'thres': -1, + 'strict': 5, + 'error': 2, + 'kmer': 41} haplo_out_path = os.path.join(tmp_dir, 'haplo_out') contigs_fasta_path = os.path.join(haplo_out_path, 'contigs.fa') - haplo_args = [HAPLOFLOW, '--read-file', joined_path, '--out', haplo_out_path] + haplo_args = [HAPLOFLOW, + '--read-file', joined_path, + '--out', haplo_out_path, + '--k', haplo_args['kmer'], + '--error-rate', haplo_args['error'], + '--strict', haplo_args['strict'], + '--filter', haplo_args['filter'], + '--thres', haplo_args['thres'], + '--long', haplo_args['long']] try: run(haplo_args, check=True, stdout=PIPE, stderr=STDOUT) except CalledProcessError as ex: diff --git a/micall/drivers/sample.py b/micall/drivers/sample.py index 714e6cec0..fc605380e 100644 --- a/micall/drivers/sample.py +++ b/micall/drivers/sample.py @@ -109,7 +109,8 @@ def process(self, excluded_seeds=(), excluded_projects=(), force_gzip=False, - use_denovo=False): + use_denovo=False, + haplo_args=None): """ Process a single sample. :param pssm: the pssm library for running G2P analysis @@ -171,7 +172,7 @@ def process(self, merged_contigs_csv=merged_contigs_csv) if use_denovo: - self.run_denovo(excluded_seeds) + self.run_denovo(excluded_seeds, haplo_args=haplo_args) else: self.run_mapping(excluded_seeds) @@ -309,7 +310,7 @@ def run_mapping(self, excluded_seeds): scratch_path, debug_file_prefix=debug_file_prefix) - def run_denovo(self, excluded_seeds): + def run_denovo(self, excluded_seeds, haplo_args=None): logger.info('Running de novo assembly on %s.', self) scratch_path = self.get_scratch_path() with open(self.merged_contigs_csv) as merged_contigs_csv, \ @@ -320,7 +321,8 @@ def run_denovo(self, excluded_seeds): contigs_csv, self.scratch_path, merged_contigs_csv, - blast_csv=blast_csv) + blast_csv=blast_csv, + haplo_args=haplo_args) logger.info('Running remap on %s.', self) if self.debug_remap: debug_file_prefix = os.path.join(scratch_path, 'debug') diff --git a/micall_docker.py b/micall_docker.py index 4ada18389..034264aa9 100644 --- a/micall_docker.py +++ b/micall_docker.py @@ -343,6 +343,36 @@ def get_parser(default_max_active): "--project_code", "-p", help="Select primers to trim: HCV or SARSCOV2.") + command_parser.add_argument( + "-haplo_long", + type=int, + default=0, + ) + command_parser.add_argument( + "-haplo_filter", + type=int, + default=500, + ) + command_parser.add_argument( + "-haplo_thres", + type=int, + default=-1, + ) + command_parser.add_argument( + "-haplo_strict", + type=int, + default=5, + ) + command_parser.add_argument( + "-haplo_error", + type=int, + default=2, + ) + command_parser.add_argument( + "-haplo_kmer", + type=int, + default=41, + ) return parser @@ -943,13 +973,20 @@ def process_sample(sample, args, pssm, use_denovo=False): """ sample.debug_remap = args.debug_remap sample.skip = args.skip + args_haplo = {'long': args.haplo_long, + 'filter': args.haplo_filter, + 'thres': args.haplo_thres, + 'strict': args.haplo_strict, + 'error': args.haplo_error, + 'kmer': args.haplo_kmer} try: excluded_seeds = [] if args.all_projects else EXCLUDED_SEEDS excluded_projects = [] if args.all_projects else EXCLUDED_PROJECTS sample.process(pssm, excluded_seeds, excluded_projects, - use_denovo=use_denovo) + use_denovo=use_denovo, + haplo_args=args_haplo) except Exception: message = 'Failed to process {}.'.format(sample) logger.error(message, exc_info=True) From 550bb55b6abfcc06a7f48c1b5e8e791f5966af03 Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 4 Aug 2021 15:53:01 -0700 Subject: [PATCH 03/23] Add debug mode to generate graphs --- micall/core/denovo.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index 5e727d01b..1c9657fb7 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -215,12 +215,13 @@ def denovo(fastq1_path: str, haplo_args = [HAPLOFLOW, '--read-file', joined_path, '--out', haplo_out_path, - '--k', haplo_args['kmer'], - '--error-rate', haplo_args['error'], - '--strict', haplo_args['strict'], - '--filter', haplo_args['filter'], - '--thres', haplo_args['thres'], - '--long', haplo_args['long']] + '--k', str(haplo_args['kmer']), + '--error-rate', str(haplo_args['error']), + '--strict', str(haplo_args['strict']), + '--filter', str(haplo_args['filter']), + '--thres', str(haplo_args['thres']), + '--long', str(haplo_args['long']), + '--debug', '1'] try: run(haplo_args, check=True, stdout=PIPE, stderr=STDOUT) except CalledProcessError as ex: From b0a7e0a13651f12d06a10c8b420ee6fbb34da28c Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 4 Aug 2021 16:03:44 -0700 Subject: [PATCH 04/23] Correct default error rate --- micall/core/denovo.py | 2 +- micall_docker.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index 1c9657fb7..407956c30 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -208,7 +208,7 @@ def denovo(fastq1_path: str, 'filter': 500, 'thres': -1, 'strict': 5, - 'error': 2, + 'error': 0.02, 'kmer': 41} haplo_out_path = os.path.join(tmp_dir, 'haplo_out') contigs_fasta_path = os.path.join(haplo_out_path, 'contigs.fa') diff --git a/micall_docker.py b/micall_docker.py index 034264aa9..3b076169c 100644 --- a/micall_docker.py +++ b/micall_docker.py @@ -365,8 +365,8 @@ def get_parser(default_max_active): ) command_parser.add_argument( "-haplo_error", - type=int, - default=2, + type=float, + default=0.02, ) command_parser.add_argument( "-haplo_kmer", From 7960ef9dab6808711cf9504facc78c4aad27f95c Mon Sep 17 00:00:00 2001 From: CBeelen Date: Mon, 9 Aug 2021 14:19:09 -0700 Subject: [PATCH 05/23] Trim and merge contigs after assembly using IVA's tools --- micall/core/denovo.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index 407956c30..ab8b99d48 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -20,6 +20,12 @@ from micall.core.project_config import ProjectConfig +from tempfile import mkstemp +from shutil import move, copymode +from os import fdopen, remove +from iva.assembly import Assembly +from pyfastaq.tasks import deinterleave + HAPLOFLOW = "haploflow" DEFAULT_DATABASE = os.path.join(os.path.dirname(__file__), '..', @@ -232,6 +238,32 @@ def denovo(fastq1_path: str, with open(contigs_fasta_path, 'a'): pass + fh, abs_path = mkstemp() + i = 0 + with fdopen(fh, 'w') as new_file: + with open(contigs_fasta_path) as old_file: + for line in old_file: + if line.startswith('>'): + new_file.write(f">contig{i}\n") + i += 1 + else: + new_file.write(line) + copymode(contigs_fasta_path, abs_path) + remove(contigs_fasta_path) + move(abs_path, contigs_fasta_path) + print(f"Number of contigs before trimming and joining: {i}") + + haplo_assembly = Assembly(contigs_file=contigs_fasta_path) + reads_prefix = os.path.join(tmp_dir, 'reads') + reads_1 = reads_prefix + '_1.fa' + reads_2 = reads_prefix + '_2.fa' + deinterleave(joined_path, reads_1, reads_2, fasta_out=True) + haplo_assembly._trim_strand_biased_ends(reads_prefix, tag_as_trimmed=True) + haplo_assembly._remove_contained_contigs(list(haplo_assembly.contigs.keys())) + haplo_assembly._merge_overlapping_contigs(list(haplo_assembly.contigs.keys())) + contigs_fasta_path = os.path.join(haplo_out_path, 'finalcontigs.fasta') + haplo_assembly.write_contigs_to_file(contigs_fasta_path) + os.chdir(start_dir) duration = datetime.now() - start_time contig_count = write_contig_refs(contigs_fasta_path, From 8a36f336513bc86b467982a2e0c3f1623d28840e Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 11 Aug 2021 13:07:33 -0700 Subject: [PATCH 06/23] Add optional scaffolding and patching --- micall/core/denovo.py | 93 ++++++++++++++++++++++++++++--------------- micall_docker.py | 23 ++++++++++- 2 files changed, 84 insertions(+), 32 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index ab8b99d48..ec1d0bc20 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -27,6 +27,7 @@ from pyfastaq.tasks import deinterleave HAPLOFLOW = "haploflow" +RAGTAG = ".venv/bin/RagTag/ragtag.py" DEFAULT_DATABASE = os.path.join(os.path.dirname(__file__), '..', 'blast_db', @@ -211,14 +212,18 @@ def denovo(fastq1_path: str, if haplo_args is None: haplo_args = {'long': 0, - 'filter': 500, - 'thres': -1, - 'strict': 5, - 'error': 0.02, - 'kmer': 41} + 'filter': 500, + 'thres': -1, + 'strict': 5, + 'error': 0.02, + 'kmer': 41, + 'merge': False, + 'scaffold': False, + 'patch': False, + 'ref': None} haplo_out_path = os.path.join(tmp_dir, 'haplo_out') contigs_fasta_path = os.path.join(haplo_out_path, 'contigs.fa') - haplo_args = [HAPLOFLOW, + haplo_cmd = [HAPLOFLOW, '--read-file', joined_path, '--out', haplo_out_path, '--k', str(haplo_args['kmer']), @@ -229,7 +234,7 @@ def denovo(fastq1_path: str, '--long', str(haplo_args['long']), '--debug', '1'] try: - run(haplo_args, check=True, stdout=PIPE, stderr=STDOUT) + run(haplo_cmd, check=True, stdout=PIPE, stderr=STDOUT) except CalledProcessError as ex: output = ex.output and ex.output.decode('UTF8') if output != 'Failed to make first seed. Cannot continue\n': @@ -238,31 +243,57 @@ def denovo(fastq1_path: str, with open(contigs_fasta_path, 'a'): pass - fh, abs_path = mkstemp() - i = 0 - with fdopen(fh, 'w') as new_file: - with open(contigs_fasta_path) as old_file: - for line in old_file: - if line.startswith('>'): - new_file.write(f">contig{i}\n") - i += 1 - else: - new_file.write(line) - copymode(contigs_fasta_path, abs_path) - remove(contigs_fasta_path) - move(abs_path, contigs_fasta_path) - print(f"Number of contigs before trimming and joining: {i}") + if haplo_args['merge']: + fh, abs_path = mkstemp() + i = 0 + with fdopen(fh, 'w') as new_file: + with open(contigs_fasta_path) as old_file: + for line in old_file: + if line.startswith('>'): + new_file.write(f">contig{i}\n") + i += 1 + else: + new_file.write(line) + copymode(contigs_fasta_path, abs_path) + remove(contigs_fasta_path) + move(abs_path, contigs_fasta_path) + print(f"Number of contigs before trimming and joining: {i}") - haplo_assembly = Assembly(contigs_file=contigs_fasta_path) - reads_prefix = os.path.join(tmp_dir, 'reads') - reads_1 = reads_prefix + '_1.fa' - reads_2 = reads_prefix + '_2.fa' - deinterleave(joined_path, reads_1, reads_2, fasta_out=True) - haplo_assembly._trim_strand_biased_ends(reads_prefix, tag_as_trimmed=True) - haplo_assembly._remove_contained_contigs(list(haplo_assembly.contigs.keys())) - haplo_assembly._merge_overlapping_contigs(list(haplo_assembly.contigs.keys())) - contigs_fasta_path = os.path.join(haplo_out_path, 'finalcontigs.fasta') - haplo_assembly.write_contigs_to_file(contigs_fasta_path) + haplo_assembly = Assembly(contigs_file=contigs_fasta_path) + reads_prefix = os.path.join(tmp_dir, 'reads') + reads_1 = reads_prefix + '_1.fa' + reads_2 = reads_prefix + '_2.fa' + deinterleave(joined_path, reads_1, reads_2, fasta_out=True) + haplo_assembly._trim_strand_biased_ends(reads_prefix, tag_as_trimmed=True) + haplo_assembly._remove_contained_contigs(list(haplo_assembly.contigs.keys())) + haplo_assembly._merge_overlapping_contigs(list(haplo_assembly.contigs.keys())) + contigs_fasta_path = os.path.join(haplo_out_path, 'contigs_merged.fasta') + haplo_assembly.write_contigs_to_file(contigs_fasta_path) + + if haplo_args['scaffold']: + new_contigs_fasta_path = os.path.join(haplo_out_path, 'contigs_scaffolded.fasta') + scaffold_cmd = ['python3.8', + RAGTAG, + 'scaffold', + haplo_args['ref'], + contigs_fasta_path, + '-o', new_contigs_fasta_path, + '--aligner', 'nucmer', + '--nucmer_params', "'--maxmatch -l 100 -c 65'"] + run(scaffold_cmd, check=True, stdout=PIPE, stderr=STDOUT) + contigs_fasta_path = new_contigs_fasta_path + + if haplo_args['patch']: + new_contigs_fasta_path = os.path.join(haplo_out_path, 'contigs_patched.fasta') + patch_cmd = ['python3.8', + RAGTAG, + 'patch', + contigs_fasta_path, + haplo_args['ref'], + '-o', new_contigs_fasta_path, + '--nucmer_params', "'--maxmatch -l 100 -c 65'"] + run(patch_cmd, check=True, stdout=PIPE, stderr=STDOUT) + contigs_fasta_path = new_contigs_fasta_path os.chdir(start_dir) duration = datetime.now() - start_time diff --git a/micall_docker.py b/micall_docker.py index 3b076169c..1ba0cd594 100644 --- a/micall_docker.py +++ b/micall_docker.py @@ -373,6 +373,23 @@ def get_parser(default_max_active): type=int, default=41, ) + command_parser.add_argument( + "-denovo_merge", + action='store_true', + ) + command_parser.add_argument( + "-scaffold", + action='store_true', + ) + command_parser.add_argument( + "-patch", + action='store_true', + ) + command_parser.add_argument( + "-ref", + type=str, + default=None, + ) return parser @@ -978,7 +995,11 @@ def process_sample(sample, args, pssm, use_denovo=False): 'thres': args.haplo_thres, 'strict': args.haplo_strict, 'error': args.haplo_error, - 'kmer': args.haplo_kmer} + 'kmer': args.haplo_kmer, + 'merge':args.denovo_merge, + 'scaffold': args.scaffold, + 'patch': args.patch, + 'ref': args.ref} try: excluded_seeds = [] if args.all_projects else EXCLUDED_SEEDS excluded_projects = [] if args.all_projects else EXCLUDED_PROJECTS From d8b1e46cf110d319c0ce4c99102e2673a4b12d44 Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 11 Aug 2021 14:27:23 -0700 Subject: [PATCH 07/23] Correct contig paths --- micall/core/denovo.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index ec1d0bc20..c67d91783 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -271,29 +271,29 @@ def denovo(fastq1_path: str, haplo_assembly.write_contigs_to_file(contigs_fasta_path) if haplo_args['scaffold']: - new_contigs_fasta_path = os.path.join(haplo_out_path, 'contigs_scaffolded.fasta') + scaffolding_path = os.path.join(haplo_out_path, 'scaffolding') scaffold_cmd = ['python3.8', RAGTAG, 'scaffold', haplo_args['ref'], contigs_fasta_path, - '-o', new_contigs_fasta_path, + '-o', scaffolding_path, '--aligner', 'nucmer', '--nucmer_params', "'--maxmatch -l 100 -c 65'"] run(scaffold_cmd, check=True, stdout=PIPE, stderr=STDOUT) - contigs_fasta_path = new_contigs_fasta_path + contigs_fasta_path = os.path.join(scaffolding_path, 'ragtag.scaffold.fasta') if haplo_args['patch']: - new_contigs_fasta_path = os.path.join(haplo_out_path, 'contigs_patched.fasta') + patching_path = os.path.join(haplo_out_path, 'patching') patch_cmd = ['python3.8', RAGTAG, 'patch', contigs_fasta_path, haplo_args['ref'], - '-o', new_contigs_fasta_path, + '-o', patching_path, '--nucmer_params', "'--maxmatch -l 100 -c 65'"] run(patch_cmd, check=True, stdout=PIPE, stderr=STDOUT) - contigs_fasta_path = new_contigs_fasta_path + contigs_fasta_path = os.path.join(patching_path, 'ragtag.patch.fasta') os.chdir(start_dir) duration = datetime.now() - start_time From 06c043ffa1ab1fe20cfa7f4dff22f55ab54ad44d Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 11 Aug 2021 16:16:23 -0700 Subject: [PATCH 08/23] Handle unsuccessful scaffolding or patching --- micall/core/denovo.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index c67d91783..a43a1e6b4 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -27,7 +27,7 @@ from pyfastaq.tasks import deinterleave HAPLOFLOW = "haploflow" -RAGTAG = ".venv/bin/RagTag/ragtag.py" +RAGTAG = "/home/charlotte/Documents/Git/MiCall/.venv/bin/RagTag/ragtag.py" DEFAULT_DATABASE = os.path.join(os.path.dirname(__file__), '..', 'blast_db', @@ -279,9 +279,13 @@ def denovo(fastq1_path: str, contigs_fasta_path, '-o', scaffolding_path, '--aligner', 'nucmer', - '--nucmer_params', "'--maxmatch -l 100 -c 65'"] - run(scaffold_cmd, check=True, stdout=PIPE, stderr=STDOUT) - contigs_fasta_path = os.path.join(scaffolding_path, 'ragtag.scaffold.fasta') + '--nucmer-params', "'--maxmatch -l 100 -c 65'"] + try: + run(scaffold_cmd, check=True, stdout=PIPE, stderr=STDOUT) + contigs_fasta_path = os.path.join(scaffolding_path, 'ragtag.scaffold.fasta') + except CalledProcessError as e: + print(e) + pass if haplo_args['patch']: patching_path = os.path.join(haplo_out_path, 'patching') @@ -291,9 +295,13 @@ def denovo(fastq1_path: str, contigs_fasta_path, haplo_args['ref'], '-o', patching_path, - '--nucmer_params', "'--maxmatch -l 100 -c 65'"] - run(patch_cmd, check=True, stdout=PIPE, stderr=STDOUT) - contigs_fasta_path = os.path.join(patching_path, 'ragtag.patch.fasta') + '--nucmer-params', "'--maxmatch -l 100 -c 65'"] + try: + run(patch_cmd, check=True, stdout=PIPE, stderr=STDOUT) + contigs_fasta_path = os.path.join(patching_path, 'ragtag.patch.fasta') + except CalledProcessError as e: + print(e) + pass os.chdir(start_dir) duration = datetime.now() - start_time From 75cb275aa0b0f47f57c59582150013fddb9808ef Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 11 Aug 2021 16:50:33 -0700 Subject: [PATCH 09/23] Better handle unsuccessful scaffolding or patching --- micall/core/denovo.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index a43a1e6b4..a441bbf8e 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -280,12 +280,13 @@ def denovo(fastq1_path: str, '-o', scaffolding_path, '--aligner', 'nucmer', '--nucmer-params', "'--maxmatch -l 100 -c 65'"] - try: - run(scaffold_cmd, check=True, stdout=PIPE, stderr=STDOUT) - contigs_fasta_path = os.path.join(scaffolding_path, 'ragtag.scaffold.fasta') - except CalledProcessError as e: - print(e) - pass + run(scaffold_cmd, check=True, stdout=PIPE, stderr=STDOUT) + new_contigs_fasta_path = os.path.join(scaffolding_path, 'ragtag.scaffold.fasta') + if os.path.getsize(new_contigs_fasta_path) > 0: + print('Scaffolding was successful!') + contigs_fasta_path = new_contigs_fasta_path + else: + print('Scaffolding was not successful') if haplo_args['patch']: patching_path = os.path.join(haplo_out_path, 'patching') @@ -296,12 +297,13 @@ def denovo(fastq1_path: str, haplo_args['ref'], '-o', patching_path, '--nucmer-params', "'--maxmatch -l 100 -c 65'"] - try: - run(patch_cmd, check=True, stdout=PIPE, stderr=STDOUT) - contigs_fasta_path = os.path.join(patching_path, 'ragtag.patch.fasta') - except CalledProcessError as e: - print(e) - pass + run(patch_cmd, check=True, stdout=PIPE, stderr=STDOUT) + new_contigs_fasta_path = os.path.join(patching_path, 'ragtag.patch.fasta') + if os.path.getsize(new_contigs_fasta_path) > 0: + print('Patching was successful!') + contigs_fasta_path = new_contigs_fasta_path + else: + print('Patching was not successful') os.chdir(start_dir) duration = datetime.now() - start_time From bdf5704c002b5ec33bf5f236610583e4e3d09beb Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 11 Aug 2021 17:12:08 -0700 Subject: [PATCH 10/23] Correct nucmer options --- micall/core/denovo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index a441bbf8e..883d02c22 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -279,7 +279,7 @@ def denovo(fastq1_path: str, contigs_fasta_path, '-o', scaffolding_path, '--aligner', 'nucmer', - '--nucmer-params', "'--maxmatch -l 100 -c 65'"] + '--nucmer-params', '--maxmatch -l 100 -c 65'] run(scaffold_cmd, check=True, stdout=PIPE, stderr=STDOUT) new_contigs_fasta_path = os.path.join(scaffolding_path, 'ragtag.scaffold.fasta') if os.path.getsize(new_contigs_fasta_path) > 0: @@ -296,7 +296,7 @@ def denovo(fastq1_path: str, contigs_fasta_path, haplo_args['ref'], '-o', patching_path, - '--nucmer-params', "'--maxmatch -l 100 -c 65'"] + '--nucmer-params', '--maxmatch -l 100 -c 65'] run(patch_cmd, check=True, stdout=PIPE, stderr=STDOUT) new_contigs_fasta_path = os.path.join(patching_path, 'ragtag.patch.fasta') if os.path.getsize(new_contigs_fasta_path) > 0: From 89d5c2eb419f32f85a51334af9d1a4b93677e376 Mon Sep 17 00:00:00 2001 From: CBeelen Date: Thu, 12 Aug 2021 14:06:40 -0700 Subject: [PATCH 11/23] Add option for a second try at assembly with filtered reads --- micall/core/denovo.py | 87 +++++++++++++++++++++++++++++++++++++++++-- micall_docker.py | 7 +++- 2 files changed, 90 insertions(+), 4 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index 883d02c22..cb90565e9 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -25,6 +25,7 @@ from os import fdopen, remove from iva.assembly import Assembly from pyfastaq.tasks import deinterleave +from micall.core.remap import remap, map_to_contigs HAPLOFLOW = "haploflow" RAGTAG = "/home/charlotte/Documents/Git/MiCall/.venv/bin/RagTag/ragtag.py" @@ -178,6 +179,32 @@ def genotype(fasta, db=DEFAULT_DATABASE, blast_csv=None, group_refs=None): return samples +def separate_contigs(contigs_csv, ref_contigs_csv, noref_contigs_csv): + """ Separate contigs into those that mapped to or did not map to a reference. + :param contigs_csv: file with contigs, open in read mode + :param ref_contigs_csv: file for contigs that mapped to a reference, open in write mode + :param noref_contigs_csv: file for contigs that did not map to a reference, open in write mode + """ + threshold = 0.1 + # is a match threshold sufficient or do we need info from blast_csv as well? + fieldnames = ['ref', 'match', 'group_ref', 'contig'] + ref_contig_writer = DictWriter(ref_contigs_csv, fieldnames) + ref_contig_writer.writeheader() + noref_contig_writer = DictWriter(noref_contigs_csv, fieldnames) + noref_contig_writer.writeheader() + contig_reader = DictReader(contigs_csv) + num_total = 0 + num_match = 0 + for row in contig_reader: + num_total += 1 + if float(row['match']) > threshold: + ref_contig_writer.writerow(row) + num_match += 1 + else: + noref_contig_writer.writerow(row) + return num_total - num_match + + def denovo(fastq1_path: str, fastq2_path: str, contigs_csv: typing.TextIO, @@ -220,7 +247,8 @@ def denovo(fastq1_path: str, 'merge': False, 'scaffold': False, 'patch': False, - 'ref': None} + 'ref': None, + 'RP': False} haplo_out_path = os.path.join(tmp_dir, 'haplo_out') contigs_fasta_path = os.path.join(haplo_out_path, 'contigs.fa') haplo_cmd = [HAPLOFLOW, @@ -231,8 +259,7 @@ def denovo(fastq1_path: str, '--strict', str(haplo_args['strict']), '--filter', str(haplo_args['filter']), '--thres', str(haplo_args['thres']), - '--long', str(haplo_args['long']), - '--debug', '1'] + '--long', str(haplo_args['long'])] try: run(haplo_cmd, check=True, stdout=PIPE, stderr=STDOUT) except CalledProcessError as ex: @@ -243,6 +270,60 @@ def denovo(fastq1_path: str, with open(contigs_fasta_path, 'a'): pass + if haplo_args['RP']: + contigs_firstpass = os.path.join(haplo_out_path, "contigs_firstpass.csv") + blast_firstpass = os.path.join(haplo_out_path, "blast_firstpass.csv") + ref_contigs = os.path.join(haplo_out_path, "ref_contigs.csv") + noref_contigs = os.path.join(haplo_out_path, "noref_contigs.csv") + with open(contigs_firstpass, 'w') as contigs_firstpass_csv, \ + open(blast_firstpass, 'w') as blast_firstpass_csv: + contig_count = write_contig_refs(contigs_fasta_path, + contigs_firstpass_csv, + blast_csv=blast_firstpass_csv) + with open(contigs_firstpass, 'r') as contigs_firstpass_csv, \ + open(ref_contigs, 'w') as ref_contigs_csv, \ + open(noref_contigs, 'w') as noref_contigs_csv: + num_noref = separate_contigs(contigs_firstpass_csv, ref_contigs_csv, noref_contigs_csv) + print(f"Assembled {contig_count} contigs in the first pass, of which {num_noref} did not map to a reference.") + unmapped1_path = os.path.join(haplo_out_path, 'firstpass_unmapped1.fastq') + unmapped2_path = os.path.join(haplo_out_path, 'firstpass_unmapped2.fastq') + if num_noref: + with open(os.path.join(haplo_out_path, 'firstpass_remap.csv'), 'w') as remap_csv, \ + open(os.path.join(haplo_out_path, 'firstpass_remap_counts.csv'), 'w') as counts_csv, \ + open(os.path.join(haplo_out_path, 'firstpass_remap_conseq.csv'), 'w') as conseq_csv, \ + open(unmapped1_path, 'w') as unmapped1, \ + open(unmapped2_path, 'w') as unmapped2, \ + open(noref_contigs, 'r') as noref_contigs_csv: + map_to_contigs(fastq1_path, + fastq2_path, + noref_contigs_csv, + remap_csv, + counts_csv, + conseq_csv, + unmapped1, + unmapped2, + haplo_out_path, ) + # we want to use the reads that did not map to the contigs that did not blast to the refs + filtered_joined_path = os.path.join(haplo_out_path, 'filtered_joined.fastq') + run(['merge-mates', + unmapped1_path, + unmapped2_path, + '--interleave', + '-o', filtered_joined_path], + check=True) + haplo_out_path = os.path.join(tmp_dir, 'haplo_secondpass_out') + contigs_fasta_path = os.path.join(haplo_out_path, 'contigs.fa') + haplo_cmd = [HAPLOFLOW, + '--read-file', filtered_joined_path, + '--out', haplo_out_path, + '--k', str(haplo_args['kmer']), + '--error-rate', str(haplo_args['error']), + '--strict', str(haplo_args['strict']), + '--filter', str(haplo_args['filter']), + '--thres', str(haplo_args['thres']), + '--long', str(haplo_args['long'])] + run(haplo_cmd, check=True, stdout=PIPE, stderr=STDOUT) + if haplo_args['merge']: fh, abs_path = mkstemp() i = 0 diff --git a/micall_docker.py b/micall_docker.py index 1ba0cd594..9ed7fa4cb 100644 --- a/micall_docker.py +++ b/micall_docker.py @@ -390,6 +390,10 @@ def get_parser(default_max_active): type=str, default=None, ) + command_parser.add_argument( + "-RP", + action='store_true', + ) return parser @@ -999,7 +1003,8 @@ def process_sample(sample, args, pssm, use_denovo=False): 'merge':args.denovo_merge, 'scaffold': args.scaffold, 'patch': args.patch, - 'ref': args.ref} + 'ref': args.ref, + 'RP': args.RP} try: excluded_seeds = [] if args.all_projects else EXCLUDED_SEEDS excluded_projects = [] if args.all_projects else EXCLUDED_PROJECTS From 7b75e640593265f164bdcfad6ac66fa9bb52f6f8 Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 18 Aug 2021 18:04:23 -0700 Subject: [PATCH 12/23] Use information from remap.csv to separate reads --- micall/core/denovo.py | 52 +++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 12 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index cb90565e9..d6f85d4db 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -205,6 +205,32 @@ def separate_contigs(contigs_csv, ref_contigs_csv, noref_contigs_csv): return num_total - num_match +def separate_reads(remap_csv, ref_reads_file, noref_reads_file, unmapped1, unmapped2): + """ Separate reads from remap.csv file into those that mapped to un unknown partial and the rest. + + :param remap_csv: remap output file created by map_to_contigs, open in read mode + :param ref_reads_file: file to write potentially useful reads (that mapped to useful contigs or that did not map) + :param noref_reads_file: file to write useless reads (that mapped to unknown contig) + :param unmapped1: fasta file 1 of reads that did not map + :param unmapped2: fasta file 2 of reads that did not map + """ + fieldnames = ['qname', 'flag', 'rname', 'pos', 'mapq', 'cigar', 'rnext', 'pnext', 'tlen', 'seq', 'qual'] + remap_reader = DictReader(remap_csv) + for row in remap_reader: + if row['rname'][-16:] == "-unknown-partial": + file_to_write = noref_reads_file + else: + file_to_write = ref_reads_file + file_to_write.write('@'+row['qname']+'\n') + file_to_write.write(row['seq']+'\n') + file_to_write.write('+\n') + file_to_write.write(row['qual']+'\n') + for line in unmapped1: + ref_reads_file.write(line) + for line in unmapped2: + ref_reads_file.write(line) + + def denovo(fastq1_path: str, fastq2_path: str, contigs_csv: typing.TextIO, @@ -287,34 +313,36 @@ def denovo(fastq1_path: str, print(f"Assembled {contig_count} contigs in the first pass, of which {num_noref} did not map to a reference.") unmapped1_path = os.path.join(haplo_out_path, 'firstpass_unmapped1.fastq') unmapped2_path = os.path.join(haplo_out_path, 'firstpass_unmapped2.fastq') + remap_path = os.path.join(haplo_out_path, 'firstpass_remap.csv') if num_noref: - with open(os.path.join(haplo_out_path, 'firstpass_remap.csv'), 'w') as remap_csv, \ + with open(remap_path, 'w') as remap_csv, \ open(os.path.join(haplo_out_path, 'firstpass_remap_counts.csv'), 'w') as counts_csv, \ open(os.path.join(haplo_out_path, 'firstpass_remap_conseq.csv'), 'w') as conseq_csv, \ open(unmapped1_path, 'w') as unmapped1, \ open(unmapped2_path, 'w') as unmapped2, \ - open(noref_contigs, 'r') as noref_contigs_csv: + open(contigs_firstpass, 'r') as contigs_csv: map_to_contigs(fastq1_path, fastq2_path, - noref_contigs_csv, + contigs_csv, remap_csv, counts_csv, conseq_csv, unmapped1, unmapped2, haplo_out_path, ) - # we want to use the reads that did not map to the contigs that did not blast to the refs - filtered_joined_path = os.path.join(haplo_out_path, 'filtered_joined.fastq') - run(['merge-mates', - unmapped1_path, - unmapped2_path, - '--interleave', - '-o', filtered_joined_path], - check=True) + # we want to discard the reads that mapped to the contigs that did not blast to the refs + ref_reads_path = os.path.join(haplo_out_path, 'ref_reads.fasta') + noref_reads_path = os.path.join(haplo_out_path, 'noref_reads.fasta') + with open(remap_path, 'r') as remap_csv, \ + open(ref_reads_path, 'w') as ref_reads_file, \ + open(noref_reads_path, 'w') as noref_reads_file, \ + open(unmapped1_path, 'r') as unmapped1, \ + open(unmapped2_path, 'r') as unmapped2: + separate_reads(remap_csv, ref_reads_file, noref_reads_file, unmapped1, unmapped2) haplo_out_path = os.path.join(tmp_dir, 'haplo_secondpass_out') contigs_fasta_path = os.path.join(haplo_out_path, 'contigs.fa') haplo_cmd = [HAPLOFLOW, - '--read-file', filtered_joined_path, + '--read-file', ref_reads_path, '--out', haplo_out_path, '--k', str(haplo_args['kmer']), '--error-rate', str(haplo_args['error']), From cb5949b1b20c53c774606c8315f38ab22b61c4c3 Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 18 Aug 2021 20:29:15 -0700 Subject: [PATCH 13/23] Correct filename --- micall/core/denovo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index d6f85d4db..50d80696c 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -320,10 +320,10 @@ def denovo(fastq1_path: str, open(os.path.join(haplo_out_path, 'firstpass_remap_conseq.csv'), 'w') as conseq_csv, \ open(unmapped1_path, 'w') as unmapped1, \ open(unmapped2_path, 'w') as unmapped2, \ - open(contigs_firstpass, 'r') as contigs_csv: + open(contigs_firstpass, 'r') as contigs_firstpass_csv: map_to_contigs(fastq1_path, fastq2_path, - contigs_csv, + contigs_firstpass_csv, remap_csv, counts_csv, conseq_csv, From adf13f59d58f403be9f9a52f8a45f0eaf35ea2c1 Mon Sep 17 00:00:00 2001 From: CBeelen Date: Wed, 1 Sep 2021 11:47:06 -0700 Subject: [PATCH 14/23] Add option for IVA assembly and modify scaffolding parameters --- micall/core/denovo.py | 109 ++++++++++++++++++++++++++---------------- micall_docker.py | 7 ++- 2 files changed, 74 insertions(+), 42 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index 50d80696c..643f727cc 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -28,6 +28,7 @@ from micall.core.remap import remap, map_to_contigs HAPLOFLOW = "haploflow" +IVA = "iva" RAGTAG = "/home/charlotte/Documents/Git/MiCall/.venv/bin/RagTag/ragtag.py" DEFAULT_DATABASE = os.path.join(os.path.dirname(__file__), '..', @@ -274,33 +275,59 @@ def denovo(fastq1_path: str, 'scaffold': False, 'patch': False, 'ref': None, - 'RP': False} - haplo_out_path = os.path.join(tmp_dir, 'haplo_out') - contigs_fasta_path = os.path.join(haplo_out_path, 'contigs.fa') - haplo_cmd = [HAPLOFLOW, - '--read-file', joined_path, - '--out', haplo_out_path, - '--k', str(haplo_args['kmer']), - '--error-rate', str(haplo_args['error']), - '--strict', str(haplo_args['strict']), - '--filter', str(haplo_args['filter']), - '--thres', str(haplo_args['thres']), - '--long', str(haplo_args['long'])] - try: - run(haplo_cmd, check=True, stdout=PIPE, stderr=STDOUT) - except CalledProcessError as ex: - output = ex.output and ex.output.decode('UTF8') - if output != 'Failed to make first seed. Cannot continue\n': - logger.warning('Haploflow failed to assemble.', exc_info=True) - logger.warning(output) - with open(contigs_fasta_path, 'a'): - pass + 'RP': False, + 'IVA': False} + if not haplo_args['IVA']: + assembly_out_path = os.path.join(tmp_dir, 'haplo_out') + contigs_fasta_path = os.path.join(assembly_out_path, 'contigs.fa') + haplo_cmd = [HAPLOFLOW, + '--read-file', joined_path, + '--out', assembly_out_path, + '--k', str(haplo_args['kmer']), + '--error-rate', str(haplo_args['error']), + '--strict', str(haplo_args['strict']), + '--filter', str(haplo_args['filter']), + '--thres', str(haplo_args['thres']), + '--long', str(haplo_args['long'])] + try: + run(haplo_cmd, check=True, stdout=PIPE, stderr=STDOUT) + except CalledProcessError as ex: + output = ex.output and ex.output.decode('UTF8') + if output != 'Failed to make first seed. Cannot continue\n': + logger.warning('Haploflow failed to assemble.', exc_info=True) + logger.warning(output) + with open(contigs_fasta_path, 'a'): + pass + else: + assembly_out_path = os.path.join(tmp_dir, 'iva_out') + contigs_fasta_path = os.path.join(assembly_out_path, 'contigs.fasta') + iva_args = [IVA, '--fr', joined_path, '-t', '2'] + if merged_contigs_csv is not None: + seeds_fasta_path = os.path.join(tmp_dir, 'seeds.fasta') + with open(seeds_fasta_path, 'w') as seeds_fasta: + SeqIO.write((SeqRecord(Seq(row['contig']), f'seed-{i}', '', '') + for i, row in enumerate(DictReader(merged_contigs_csv))), + seeds_fasta, + 'fasta') + seeds_size = seeds_fasta.tell() + if seeds_size > 0: + iva_args.extend(['--contigs', seeds_fasta_path, '--make_new_seeds']) + iva_args.append(assembly_out_path) + try: + run(iva_args, check=True, stdout=PIPE, stderr=STDOUT) + except CalledProcessError as ex: + output = ex.output and ex.output.decode('UTF8') + if output != 'Failed to make first seed. Cannot continue\n': + logger.warning('iva failed to assemble.', exc_info=True) + logger.warning(output) + with open(contigs_fasta_path, 'a'): + pass if haplo_args['RP']: - contigs_firstpass = os.path.join(haplo_out_path, "contigs_firstpass.csv") - blast_firstpass = os.path.join(haplo_out_path, "blast_firstpass.csv") - ref_contigs = os.path.join(haplo_out_path, "ref_contigs.csv") - noref_contigs = os.path.join(haplo_out_path, "noref_contigs.csv") + contigs_firstpass = os.path.join(assembly_out_path, "contigs_firstpass.csv") + blast_firstpass = os.path.join(assembly_out_path, "blast_firstpass.csv") + ref_contigs = os.path.join(assembly_out_path, "ref_contigs.csv") + noref_contigs = os.path.join(assembly_out_path, "noref_contigs.csv") with open(contigs_firstpass, 'w') as contigs_firstpass_csv, \ open(blast_firstpass, 'w') as blast_firstpass_csv: contig_count = write_contig_refs(contigs_fasta_path, @@ -311,13 +338,13 @@ def denovo(fastq1_path: str, open(noref_contigs, 'w') as noref_contigs_csv: num_noref = separate_contigs(contigs_firstpass_csv, ref_contigs_csv, noref_contigs_csv) print(f"Assembled {contig_count} contigs in the first pass, of which {num_noref} did not map to a reference.") - unmapped1_path = os.path.join(haplo_out_path, 'firstpass_unmapped1.fastq') - unmapped2_path = os.path.join(haplo_out_path, 'firstpass_unmapped2.fastq') - remap_path = os.path.join(haplo_out_path, 'firstpass_remap.csv') + unmapped1_path = os.path.join(assembly_out_path, 'firstpass_unmapped1.fastq') + unmapped2_path = os.path.join(assembly_out_path, 'firstpass_unmapped2.fastq') + remap_path = os.path.join(assembly_out_path, 'firstpass_remap.csv') if num_noref: with open(remap_path, 'w') as remap_csv, \ - open(os.path.join(haplo_out_path, 'firstpass_remap_counts.csv'), 'w') as counts_csv, \ - open(os.path.join(haplo_out_path, 'firstpass_remap_conseq.csv'), 'w') as conseq_csv, \ + open(os.path.join(assembly_out_path, 'firstpass_remap_counts.csv'), 'w') as counts_csv, \ + open(os.path.join(assembly_out_path, 'firstpass_remap_conseq.csv'), 'w') as conseq_csv, \ open(unmapped1_path, 'w') as unmapped1, \ open(unmapped2_path, 'w') as unmapped2, \ open(contigs_firstpass, 'r') as contigs_firstpass_csv: @@ -329,21 +356,21 @@ def denovo(fastq1_path: str, conseq_csv, unmapped1, unmapped2, - haplo_out_path, ) + assembly_out_path, ) # we want to discard the reads that mapped to the contigs that did not blast to the refs - ref_reads_path = os.path.join(haplo_out_path, 'ref_reads.fasta') - noref_reads_path = os.path.join(haplo_out_path, 'noref_reads.fasta') + ref_reads_path = os.path.join(assembly_out_path, 'ref_reads.fasta') + noref_reads_path = os.path.join(assembly_out_path, 'noref_reads.fasta') with open(remap_path, 'r') as remap_csv, \ open(ref_reads_path, 'w') as ref_reads_file, \ open(noref_reads_path, 'w') as noref_reads_file, \ open(unmapped1_path, 'r') as unmapped1, \ open(unmapped2_path, 'r') as unmapped2: separate_reads(remap_csv, ref_reads_file, noref_reads_file, unmapped1, unmapped2) - haplo_out_path = os.path.join(tmp_dir, 'haplo_secondpass_out') - contigs_fasta_path = os.path.join(haplo_out_path, 'contigs.fa') + assembly_out_path = os.path.join(tmp_dir, 'haplo_secondpass_out') + contigs_fasta_path = os.path.join(assembly_out_path, 'contigs.fa') haplo_cmd = [HAPLOFLOW, '--read-file', ref_reads_path, - '--out', haplo_out_path, + '--out', assembly_out_path, '--k', str(haplo_args['kmer']), '--error-rate', str(haplo_args['error']), '--strict', str(haplo_args['strict']), @@ -376,11 +403,11 @@ def denovo(fastq1_path: str, haplo_assembly._trim_strand_biased_ends(reads_prefix, tag_as_trimmed=True) haplo_assembly._remove_contained_contigs(list(haplo_assembly.contigs.keys())) haplo_assembly._merge_overlapping_contigs(list(haplo_assembly.contigs.keys())) - contigs_fasta_path = os.path.join(haplo_out_path, 'contigs_merged.fasta') + contigs_fasta_path = os.path.join(assembly_out_path, 'contigs_merged.fasta') haplo_assembly.write_contigs_to_file(contigs_fasta_path) if haplo_args['scaffold']: - scaffolding_path = os.path.join(haplo_out_path, 'scaffolding') + scaffolding_path = os.path.join(assembly_out_path, 'scaffolding') scaffold_cmd = ['python3.8', RAGTAG, 'scaffold', @@ -388,7 +415,7 @@ def denovo(fastq1_path: str, contigs_fasta_path, '-o', scaffolding_path, '--aligner', 'nucmer', - '--nucmer-params', '--maxmatch -l 100 -c 65'] + '--nucmer-params', '--maxmatch -l 30 -c 20'] run(scaffold_cmd, check=True, stdout=PIPE, stderr=STDOUT) new_contigs_fasta_path = os.path.join(scaffolding_path, 'ragtag.scaffold.fasta') if os.path.getsize(new_contigs_fasta_path) > 0: @@ -398,14 +425,14 @@ def denovo(fastq1_path: str, print('Scaffolding was not successful') if haplo_args['patch']: - patching_path = os.path.join(haplo_out_path, 'patching') + patching_path = os.path.join(assembly_out_path, 'patching') patch_cmd = ['python3.8', RAGTAG, 'patch', contigs_fasta_path, haplo_args['ref'], '-o', patching_path, - '--nucmer-params', '--maxmatch -l 100 -c 65'] + '--nucmer-params', '--maxmatch -l 30 -c 20'] run(patch_cmd, check=True, stdout=PIPE, stderr=STDOUT) new_contigs_fasta_path = os.path.join(patching_path, 'ragtag.patch.fasta') if os.path.getsize(new_contigs_fasta_path) > 0: diff --git a/micall_docker.py b/micall_docker.py index f33ea7c6c..2b0c3fae0 100644 --- a/micall_docker.py +++ b/micall_docker.py @@ -394,6 +394,10 @@ def get_parser(default_max_active): "-RP", action='store_true', ) + command_parser.add_argument( + "-IVA", + action='store_true', + ) return parser @@ -1004,7 +1008,8 @@ def process_sample(sample, args, pssm, use_denovo=False): 'scaffold': args.scaffold, 'patch': args.patch, 'ref': args.ref, - 'RP': args.RP} + 'RP': args.RP, + 'IVA': args.IVA} try: excluded_seeds = [] if args.all_projects else EXCLUDED_SEEDS excluded_projects = [] if args.all_projects else EXCLUDED_PROJECTS From 138dcb20fc9bbece819bb761d091016c3e26ac46 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 18 Sep 2023 17:51:23 -0700 Subject: [PATCH 15/23] Install Haploflow during docker initialization --- Dockerfile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Dockerfile b/Dockerfile index 2a0c7ea41..cc4729cca 100644 --- a/Dockerfile +++ b/Dockerfile @@ -83,6 +83,13 @@ RUN apt-get install -q -y zlib1g-dev libncurses5-dev libncursesw5-dev && \ tar -xzf smalt-0.7.6-bin.tar.gz --no-same-owner && \ ln -s /opt/smalt-0.7.6-bin/smalt_x86_64 /bin/smalt +## Installing Haploflow +RUN apt-get install -y build-essential sudo git ronn \ + cd /opt/ && + git clone https://github.com/hzi-bifo/Haploflow \ + cd /opt/Haploflow && sh build.sh \ + ln -s /opt/Haploflow/build/haploflow /bin/haploflow + ## Install dependencies for genetracks/drawsvg RUN apt-get install -q -y libcairo2-dev From c621522561d0e3b97ddfdd663db170db552d52a0 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Mon, 18 Sep 2023 17:59:39 -0700 Subject: [PATCH 16/23] Install Haploflow on CI --- .github/workflows/build-and-test.yml | 9 +++++++++ Dockerfile | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index de5ebe481..edd1309f9 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -16,6 +16,15 @@ jobs: with: go-version: '^1.15.6' + - name: Install Haploflow + run: | + sudo apt-get update + sudo apt-get install -y build-essential git ronn + cd /opt/ + git clone https://github.com/hzi-bifo/Haploflow + cd /opt/Haploflow && sh build.sh + sudo ln -s /opt/Haploflow/build/haploflow ~/bin/haploflow + - name: Install IVA assembler dependencies run: | sudo apt-get install -qq zlib1g-dev libncurses5-dev libncursesw5-dev mummer ncbi-blast+ diff --git a/Dockerfile b/Dockerfile index cc4729cca..ca8c41ee3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -83,7 +83,7 @@ RUN apt-get install -q -y zlib1g-dev libncurses5-dev libncursesw5-dev && \ tar -xzf smalt-0.7.6-bin.tar.gz --no-same-owner && \ ln -s /opt/smalt-0.7.6-bin/smalt_x86_64 /bin/smalt -## Installing Haploflow +## Install Haploflow RUN apt-get install -y build-essential sudo git ronn \ cd /opt/ && git clone https://github.com/hzi-bifo/Haploflow \ From 272635dd822af6da69279518cc3401ba05e06f35 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Tue, 19 Sep 2023 08:45:16 -0700 Subject: [PATCH 17/23] Pin exact Haploflow version Note that I am using a specific commit version for pinning. Rationalization: Haploflow is not developed actively, and because of that, the last release is quite far behind master. Thus, I think it's better to pin its version to the current master than to the latest release tag. --- .github/workflows/build-and-test.yml | 4 +++- Dockerfile | 11 +++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index edd1309f9..62fee2da4 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -22,7 +22,9 @@ jobs: sudo apt-get install -y build-essential git ronn cd /opt/ git clone https://github.com/hzi-bifo/Haploflow - cd /opt/Haploflow && sh build.sh + cd Haploflow + git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50 + sh build.sh sudo ln -s /opt/Haploflow/build/haploflow ~/bin/haploflow - name: Install IVA assembler dependencies diff --git a/Dockerfile b/Dockerfile index ca8c41ee3..181f61910 100644 --- a/Dockerfile +++ b/Dockerfile @@ -84,10 +84,13 @@ RUN apt-get install -q -y zlib1g-dev libncurses5-dev libncursesw5-dev && \ ln -s /opt/smalt-0.7.6-bin/smalt_x86_64 /bin/smalt ## Install Haploflow -RUN apt-get install -y build-essential sudo git ronn \ - cd /opt/ && - git clone https://github.com/hzi-bifo/Haploflow \ - cd /opt/Haploflow && sh build.sh \ +RUN apt-get update && \ + apt-get install -y build-essential sudo git ronn cmake && \ + cd /opt/ && \ + git clone https://github.com/hzi-bifo/Haploflow && \ + cd Haploflow && \ + git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50 && \ + yes | sh build.sh && \ ln -s /opt/Haploflow/build/haploflow /bin/haploflow ## Install dependencies for genetracks/drawsvg From 5ecb109a9e1cd9613168efb803fb91edd1e0a38e Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Wed, 20 Sep 2023 13:54:30 -0700 Subject: [PATCH 18/23] Switch to Debian in Singularity image --- Singularity | 79 +++++++++++++++++------------------------------------ 1 file changed, 25 insertions(+), 54 deletions(-) diff --git a/Singularity b/Singularity index e32803957..b59394534 100644 --- a/Singularity +++ b/Singularity @@ -1,6 +1,6 @@ # Generate the Singularity container to run MiCall on Kive. Bootstrap: docker -From: centos:7 +From: python:3.8 %help MiCall maps all the reads from a sample against a set of reference @@ -53,48 +53,28 @@ From: centos:7 %post echo ===== Installing Prerequisites ===== >/dev/null - yum update -q -y - - yum groupinstall -q -y 'development tools' - yum install -q -y epel-release - yum install -q -y unzip wget fontconfig bzip2-devel xz-devel openssl-devel \ - libffi-devel sqlite-devel - - echo ===== Installing Python ===== >/dev/null - wget -q https://www.python.org/ftp/python/3.8.3/Python-3.8.3.tar.xz - tar xJf Python* - rm Python*.xz - cd Python* - ./configure --enable-optimizations - make altinstall - cd .. - rm -rf Python* - ln -s /usr/local/bin/python3.8 /usr/local/bin/python3 + apt-get update -q + apt-get install -q -y unzip wget echo ===== Installing blast ===== >/dev/null - cd /root - # Saved our own copy, because download was slow from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.6.0/ncbi-blast-2.6.0+-1.x86_64.rpm - wget -q https://github.com/cfe-lab/MiCall/releases/download/v7.12.dev28/ncbi-blast-2.6.0+-1.x86_64.rpm - yum install -q -y ncbi-blast-2.6.0+-1.x86_64.rpm - rm ncbi-blast-2.6.0+-1.x86_64.rpm - python3 /opt/micall/micall/blast_db/make_blast_db.py + apt-get install -q -y ncbi-blast+ echo ===== Installing Rust and merge-mates ===== >/dev/null - yum install -q -y rust cargo + wget -qO rustup.sh https://sh.rustup.rs + chmod +x /rustup.sh + /rustup.sh -y -q + . /root/.cargo/env + rm rustup.sh cargo install --quiet --root / --git https://github.com/jeff-k/merge-mates.git --rev 2fec61363f645e2008a4adff553d098beae21469 - ## Miniconda (Python 2) (Don't use this) - #wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh - #bash miniconda.sh -b -p /opt/miniconda - echo ===== Installing bowtie2 ===== >/dev/null wget -q -O bowtie2.zip https://github.com/BenLangmead/bowtie2/releases/download/v2.2.8/bowtie2-2.2.8-linux-x86_64.zip - unzip -qq bowtie2.zip -d /opt/ + unzip bowtie2.zip -d /opt/ ln -s /opt/bowtie2-2.2.8/ /opt/bowtie2 rm bowtie2.zip echo ===== Installing IVA dependencies ===== >/dev/null - yum install -q -y tcsh ncurses-devel zlib-devel + apt-get install -q -y zlib1g-dev libncurses5-dev libncursesw5-dev cd /bin wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc_dump @@ -122,32 +102,23 @@ From: centos:7 tar -xzf smalt-0.7.6-bin.tar.gz --no-same-owner ln -s /opt/smalt-0.7.6-bin/smalt_x86_64 /bin/smalt + echo ===== Installing Haploflow ===== >/dev/null + apt-get install -q -y libboost-all-dev build-essential sudo git ronn cmake + cd /opt/ + git clone https://github.com/hzi-bifo/Haploflow + cd Haploflow + git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50 + yes | sh build.sh + echo ===== Installing Python packages ===== >/dev/null + # Install dependencies for genetracks/drawsvg + apt-get install -q -y libcairo2-dev # Also trigger matplotlib to build its font cache. - wget -q https://bootstrap.pypa.io/get-pip.py - python3 get-pip.py - rm get-pip.py cd /opt - pip install --quiet -r /opt/micall/requirements.txt - ln -s /usr/local/bin/cutadapt /usr/local/bin/cutadapt-1.11 - python3 -c 'import matplotlib; matplotlib.use("Agg"); import matplotlib.pyplot' - - # Install dependencies for genetracks/drawsvg - yum install -q -y cairo-devel cairo cairo-tools zlib-devel - - yum groupremove -q -y 'development tools' - yum remove -q -y epel-release wget unzip - yum autoremove -q -y - yum clean all - - rm -rf /var/cache/yum - - ## CAUTION! This changes the default python command to python3! - ## This breaks many things, including yum! - ## To switch back to python2, use this command: - # sudo alternatives --set python /usr/bin/python2 - alternatives --install /usr/bin/python python /usr/bin/python2 50 - alternatives --install /usr/bin/python python /usr/local/bin/python3 60 + pip install --upgrade pip + pip install -r /opt/micall/requirements-basespace.txt + python -c 'import matplotlib; matplotlib.use("Agg"); import matplotlib.pyplot' + python /opt/micall/micall/blast_db/make_blast_db.py %environment export PATH=/opt/bowtie2:/bin:/usr/local/bin From c958723863f228c09ed2d7274722c0b7ac584b29 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Wed, 6 Nov 2024 13:13:32 -0800 Subject: [PATCH 19/23] Reset to 'master' branch --- .devcontainer/devcontainer.json | 29 + .dockerignore | 5 +- .github/workflows/build-and-test.yml | 81 +- .gitignore | 2 +- CONTRIBUTING.md | 188 +- Dockerfile | 40 +- README.md | 3 - Singularity | 65 +- dev.dockerfile | 21 - docs/Gemfile.lock | 137 +- docs/_config.yml | 2 +- docs/_data/navigation.yml | 6 +- docs/admin.md | 159 +- docs/compute_micall_results.ipynb | 3 +- docs/design/stitcher.md | 744 +++++++ docs/design/stitcher_practical_plot.svg | 582 +++++ docs/design/stitcher_rule_1_input.svg | 83 + docs/design/stitcher_rule_1_result.svg | 51 + docs/design/stitcher_rule_2_input.svg | 83 + docs/design/stitcher_rule_2_result.svg | 51 + docs/design/stitcher_rule_3_input.svg | 83 + docs/design/stitcher_rule_3_result.svg | 109 + docs/design/stitcher_rule_4_input.svg | 115 + docs/design/stitcher_rule_4_result.svg | 185 ++ docs/micall_at_bccfe.md | 83 + docs/steps.md | 25 +- gui/README.md | 14 - gui/micall.ico | Bin 293950 -> 0 bytes gui/micall.xcf | Bin 121234 -> 0 bytes micall/core/aln2counts.py | 3 +- micall/core/contig_stitcher.py | 692 ++++++ micall/core/coverage_plots.py | 5 +- micall/core/denovo.py | 483 +---- micall/core/filter_quality.py | 2 +- micall/core/plot_contigs.py | 890 +++++++- micall/core/project_config.py | 5 +- micall/core/remap.py | 6 +- micall/core/sam2aln.py | 2 +- micall/core/trim_fastqs.py | 6 +- micall/data/landmark_reader.py | 11 +- .../data/test_samples_default.csv | 0 micall/drivers/sample.py | 196 +- micall/g2p/fastq_g2p.py | 4 +- micall/main.py | 161 ++ micall/monitor/kive_watcher.py | 29 +- .../monitor/micall_watcher.py | 4 +- micall/monitor/update_qai.py | 28 +- .../data/contig_stitcher_fuzz_nogaps.json | 264 +++ micall/tests/data/exact_parts_contigs.csv | 3 + micall/tests/data/exact_parts_contigs.fasta | 4 + .../tests/data/exact_parts_contigs.plot.svg | 187 ++ .../data/exact_parts_contigs_stitched.csv | 2 + .../test_big_insertion_in_a_single_contig.svg | 45 + ...est_big_insertion_in_a_single_contig_2.svg | 45 + ...test_correct_processing_complex_nogaps.svg | 240 +++ ...wo_overlapping_and_one_separate_contig.svg | 221 ++ ..._overlapping_and_one_separate_contig_2.svg | 221 ++ ...titching_of_one_normal_and_one_unknown.svg | 147 ++ ...g_of_two_partially_overlapping_contigs.svg | 71 + ...partially_overlapping_contigs_real_hcv.svg | 145 ++ ...partially_overlapping_contigs_real_hiv.svg | 187 ++ ...apping_contigs_with_insignificant_gaps.svg | 290 +++ ...ially_overlapping_contigs_with_padding.svg | 183 ++ ...overlapping_different_organism_contigs.svg | 84 + ...cross_alignment_around_small_insertion.svg | 217 ++ .../test_forward_and_reverse_match.svg | 119 ++ .../test_gap_around_big_insertion.svg | 211 ++ .../test_gap_around_small_insertion.svg | 211 ++ ...test_identical_stitching_of_one_contig.svg | 45 + ...st_multiple_reverse_complement_matches.svg | 51 + ...everse_complement_matches_out_of_order.svg | 51 + .../test_overlaping_in_reference_space.svg | 173 ++ .../test_partial_align_consensus.svg | 51 + ...onsensus_multiple_overlaping_sequences.svg | 177 ++ ...ial_align_consensus_multiple_sequences.svg | 71 + .../test_reverse_complement_match.svg | 45 + ..._reverse_complement_match_with_padding.svg | 51 + ...stitching_of_non_overlapping_contigs_1.svg | 71 + ...stitching_of_non_overlapping_contigs_2.svg | 71 + .../test_stitch_cross_alignment.svg | 179 ++ .../test_stitch_with_insertion.svg | 51 + ..._stitching_contig_with_big_covered_gap.svg | 93 + ...itching_contig_with_big_noncovered_gap.svg | 45 + ...ching_contig_with_big_noncovered_gap_2.svg | 71 + ...titching_contig_with_small_covered_gap.svg | 93 + ..._overlapping_contigs_into_one_sequence.svg | 97 + ...tching_of_completely_identical_contigs.svg | 155 ++ .../test_stitching_of_identical_contigs.svg | 155 ++ .../test_stitching_of_zero_contigs.svg | 18 + .../test_stitching_partial_align.svg | 51 + ...ching_partial_align_multiple_sequences.svg | 183 ++ ...gs_completely_covered_by_other_contigs.svg | 211 ++ ...e_contig_completely_covered_by_another.svg | 159 ++ .../test_stitching_with_empty_contigs.svg | 147 ++ ...labeling_of_different_organism_contigs.svg | 500 +++++ micall/tests/microtest/make_sample.py | 10 + micall/tests/test_aln2counts_report.py | 1 - micall/tests/test_consensus_aligner.py | 347 +-- micall/tests/test_contig_stitcher.py | 1887 +++++++++++++++++ micall/tests/test_contig_stitcher_fuzz.py | 145 ++ micall/tests/test_denovo.py | 241 +-- micall/tests/test_fasta_to_csv.py | 227 ++ micall/tests/test_installation.py | 156 ++ micall/tests/test_kive_watcher.py | 49 +- micall/tests/test_release_test_compare.py | 2 +- micall/tests/test_sample_sheet_parser.py | 46 + micall/tests/test_tests_utils.py | 74 + micall/tests/utils.py | 78 + micall/utils/alignment.py | 60 + micall/utils/aln2counts_simplify.py | 187 ++ micall/utils/consensus_aligner.py | 368 ++-- micall/utils/contig_blaster.py | 4 +- micall/utils/contig_stitcher_context.py | 53 + micall/utils/contig_stitcher_contigs.py | 67 + micall/utils/contig_stitcher_events.py | 305 +++ micall/utils/contig_summary.py | 23 +- micall/utils/denovo_simplify.py | 2 +- .../utils/docker_build.py | 0 micall/utils/externals.py | 9 +- micall/utils/fasta_to_csv.py | 293 +++ micall/utils/find_chimera.py | 2 +- micall/utils/get_list_of_executables.py | 68 + micall/utils/make_fda_refs.py | 6 +- micall/utils/make_hcv_genotype_refs.py | 6 +- .../utils/micall_docker.py | 105 +- micall_kive.py => micall/utils/micall_kive.py | 23 +- .../utils/micall_kive_resistance.py | 7 + .../utils/micall_logging_config.py | 2 +- micall/utils/project_seeds_from_compendium.py | 2 +- micall/utils/ref_aligner.py | 2 +- micall/utils/reference_distances.py | 2 +- .../utils/release_test_compare.py | 42 +- .../utils/release_test_microtest.py | 105 +- .../utils/release_test_publish.py | 0 .../utils/release_test_setup.py | 9 +- micall/utils/remap_fastq_simplify.py | 12 +- micall/utils/report_amino.py | 5 +- micall/utils/sam_g2p_simplify.py | 4 +- micall/utils/sample_fastq.py | 4 +- micall/utils/sample_sheet_parser.py | 7 +- pyproject.toml | 302 +++ pytest.ini | 3 - requirements-basespace.txt | 3 - requirements-dev.txt | 5 - requirements-test.txt | 12 - requirements-watcher.txt | 3 - requirements.txt | 18 - 147 files changed, 15370 insertions(+), 1661 deletions(-) create mode 100644 .devcontainer/devcontainer.json delete mode 100644 dev.dockerfile create mode 100644 docs/design/stitcher.md create mode 100644 docs/design/stitcher_practical_plot.svg create mode 100644 docs/design/stitcher_rule_1_input.svg create mode 100644 docs/design/stitcher_rule_1_result.svg create mode 100644 docs/design/stitcher_rule_2_input.svg create mode 100644 docs/design/stitcher_rule_2_result.svg create mode 100644 docs/design/stitcher_rule_3_input.svg create mode 100644 docs/design/stitcher_rule_3_result.svg create mode 100644 docs/design/stitcher_rule_4_input.svg create mode 100644 docs/design/stitcher_rule_4_result.svg create mode 100644 docs/micall_at_bccfe.md delete mode 100644 gui/README.md delete mode 100644 gui/micall.ico delete mode 100644 gui/micall.xcf create mode 100644 micall/core/contig_stitcher.py rename test_samples_default.csv => micall/data/test_samples_default.csv (100%) create mode 100644 micall/main.py rename micall_watcher.py => micall/monitor/micall_watcher.py (98%) create mode 100644 micall/tests/data/contig_stitcher_fuzz_nogaps.json create mode 100644 micall/tests/data/exact_parts_contigs.csv create mode 100644 micall/tests/data/exact_parts_contigs.fasta create mode 100644 micall/tests/data/exact_parts_contigs.plot.svg create mode 100644 micall/tests/data/exact_parts_contigs_stitched.csv create mode 100644 micall/tests/data/stitcher_plots/test_big_insertion_in_a_single_contig.svg create mode 100644 micall/tests/data/stitcher_plots/test_big_insertion_in_a_single_contig_2.svg create mode 100644 micall/tests/data/stitcher_plots/test_correct_processing_complex_nogaps.svg create mode 100644 micall/tests/data/stitcher_plots/test_correct_processing_of_two_overlapping_and_one_separate_contig.svg create mode 100644 micall/tests/data/stitcher_plots/test_correct_processing_of_two_overlapping_and_one_separate_contig_2.svg create mode 100644 micall/tests/data/stitcher_plots/test_correct_stitching_of_one_normal_and_one_unknown.svg create mode 100644 micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs.svg create mode 100644 micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_real_hcv.svg create mode 100644 micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_real_hiv.svg create mode 100644 micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_with_insignificant_gaps.svg create mode 100644 micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_with_padding.svg create mode 100644 micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_different_organism_contigs.svg create mode 100644 micall/tests/data/stitcher_plots/test_cross_alignment_around_small_insertion.svg create mode 100644 micall/tests/data/stitcher_plots/test_forward_and_reverse_match.svg create mode 100644 micall/tests/data/stitcher_plots/test_gap_around_big_insertion.svg create mode 100644 micall/tests/data/stitcher_plots/test_gap_around_small_insertion.svg create mode 100644 micall/tests/data/stitcher_plots/test_identical_stitching_of_one_contig.svg create mode 100644 micall/tests/data/stitcher_plots/test_multiple_reverse_complement_matches.svg create mode 100644 micall/tests/data/stitcher_plots/test_multiple_reverse_complement_matches_out_of_order.svg create mode 100644 micall/tests/data/stitcher_plots/test_overlaping_in_reference_space.svg create mode 100644 micall/tests/data/stitcher_plots/test_partial_align_consensus.svg create mode 100644 micall/tests/data/stitcher_plots/test_partial_align_consensus_multiple_overlaping_sequences.svg create mode 100644 micall/tests/data/stitcher_plots/test_partial_align_consensus_multiple_sequences.svg create mode 100644 micall/tests/data/stitcher_plots/test_reverse_complement_match.svg create mode 100644 micall/tests/data/stitcher_plots/test_reverse_complement_match_with_padding.svg create mode 100644 micall/tests/data/stitcher_plots/test_separate_stitching_of_non_overlapping_contigs_1.svg create mode 100644 micall/tests/data/stitcher_plots/test_separate_stitching_of_non_overlapping_contigs_2.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitch_cross_alignment.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitch_with_insertion.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_contig_with_big_covered_gap.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_contig_with_big_noncovered_gap.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_contig_with_big_noncovered_gap_2.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_contig_with_small_covered_gap.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_of_all_overlapping_contigs_into_one_sequence.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_of_completely_identical_contigs.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_of_identical_contigs.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_of_zero_contigs.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_partial_align.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_partial_align_multiple_sequences.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_when_multiple_contigs_completely_covered_by_other_contigs.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_when_one_contig_completely_covered_by_another.svg create mode 100644 micall/tests/data/stitcher_plots/test_stitching_with_empty_contigs.svg create mode 100644 micall/tests/data/stitcher_plots/test_visualizer_correct_labeling_of_different_organism_contigs.svg create mode 100644 micall/tests/test_contig_stitcher.py create mode 100644 micall/tests/test_contig_stitcher_fuzz.py create mode 100644 micall/tests/test_fasta_to_csv.py create mode 100644 micall/tests/test_installation.py create mode 100644 micall/tests/test_tests_utils.py create mode 100644 micall/tests/utils.py create mode 100644 micall/utils/alignment.py create mode 100644 micall/utils/aln2counts_simplify.py create mode 100644 micall/utils/contig_stitcher_context.py create mode 100644 micall/utils/contig_stitcher_contigs.py create mode 100644 micall/utils/contig_stitcher_events.py rename docker_build.py => micall/utils/docker_build.py (100%) create mode 100644 micall/utils/fasta_to_csv.py create mode 100644 micall/utils/get_list_of_executables.py rename micall_docker.py => micall/utils/micall_docker.py (95%) rename micall_kive.py => micall/utils/micall_kive.py (89%) rename micall_kive_resistance.py => micall/utils/micall_kive_resistance.py (95%) rename micall_logging_config.py => micall/utils/micall_logging_config.py (98%) rename release_test_compare.py => micall/utils/release_test_compare.py (95%) rename release_test_microtest.py => micall/utils/release_test_microtest.py (90%) rename release_test_publish.py => micall/utils/release_test_publish.py (100%) rename release_test_setup.py => micall/utils/release_test_setup.py (97%) create mode 100644 pyproject.toml delete mode 100644 pytest.ini delete mode 100644 requirements-basespace.txt delete mode 100644 requirements-dev.txt delete mode 100644 requirements-test.txt delete mode 100644 requirements-watcher.txt delete mode 100644 requirements.txt diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..4d7fc8ea5 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,29 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/universal +{ + "name": "MiCall dev", + + // More info: https://containers.dev/guide/dockerfile + "build": { + "dockerfile": "../Dockerfile" + }, + + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Use 'onCreateCommand' to run commands after the container is created, but before anything else starts. + "onCreateCommand": "python -m pip install .[denovo,watcher,dev,test]", + + // Configure tool-specific properties. + "customizations": { + "vscode": { + "extensions": ["ms-python.python"] + } + } + + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} diff --git a/.dockerignore b/.dockerignore index cf4867d76..172286671 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,6 +2,9 @@ .idea bin docs +*cache* +.github +Dockerfile micall/tests/working micall/tests/microtest/scratch micall/tests/microtest/micall-results @@ -12,4 +15,4 @@ venv_micall */*.pyc */*/*.pyc */*/*/*.pyc -simgs \ No newline at end of file +simgs diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 62fee2da4..c2d870fe0 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -1,31 +1,19 @@ name: Build and Test MiCall on: push jobs: - build: + main: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.11 - run: mkdir -p ~/bin - run: echo ~/bin >> $GITHUB_PATH - - uses: actions/setup-go@v2 - with: - go-version: '^1.15.6' - - - name: Install Haploflow - run: | - sudo apt-get update - sudo apt-get install -y build-essential git ronn - cd /opt/ - git clone https://github.com/hzi-bifo/Haploflow - cd Haploflow - git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50 - sh build.sh - sudo ln -s /opt/Haploflow/build/haploflow ~/bin/haploflow + - name: Run apt update + run: sudo apt-get update - name: Install IVA assembler dependencies run: | @@ -65,15 +53,54 @@ jobs: echo "/opt/bowtie2" >> $GITHUB_PATH - name: Install MiCall's Python dependencies - # Have to clean up alignment build files. Otherwise, they'll get reused - # in Singularity build with the wrong GLIBC libraries. run: | python -m pip install --upgrade pip - pip install -r requirements-test.txt + pip install .[denovo,watcher,dev,test] + + - name: Run type checker + run: | + mypy + + - name: Run linter + run: | + ruff check - name: Test with pytest run: coverage run --source=micall/core,micall/g2p,micall/resistance,micall/monitor -m pytest + - name: Report code coverage + run: | + rm -rf micall/tests/microtest/scratch + codecov + + docker-test: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - name: Build docker image + run: docker build --tag micall --file Dockerfile . + - name: Run docker image entrypoint + run: docker run micall --help + - name: Check docker image entrypoint + run: docker run micall --help | grep -i -e 'docker' + + singularity-test: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.11 + + - uses: actions/setup-go@v2 + with: + go-version: '^1.15.6' + + - name: Run apt update + run: sudo apt-get update + - name: Install Singularity run: | sudo apt-get install -qq \ @@ -95,19 +122,15 @@ jobs: cd .. rm -rf singularity singularity --version + - name: Singularity build run: sudo singularity build micall.simg Singularity + + - name: Singularity test + run: python micall/main.py release_test_microtest micall.simg + - name: Upload Singularity Image - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: micall.simg path: micall.simg - - - name: Singularity test - run: python release_test_microtest.py micall.simg - - - name: Report code coverage - run: | - rm -rf micall/tests/microtest/scratch - python -m pip install codecov - codecov diff --git a/.gitignore b/.gitignore index 5e2eb7a2e..039db64ff 100755 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -/micall_logging_override.py +/micall/utils/micall_logging_override.py /micall/monitor/HIV1_COM_2015_genome_DNA.csv /micall/monitor/compare_454_samples.csv /micall/monitor/compare_454_samples.png diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8551084f5..7c190d791 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -12,6 +12,23 @@ you planning to submit your own fix in a pull request? [issues]: https://github.com/cfe-lab/MiCall/issues ## Development + +The easiest way to start developing MiCall is by using DevContainers. + +1. **Open Project**: + - If you're using Visual Studio Code on your local machine, open the MiCall project folder and select the "Reopen in Container" prompt to initialize the DevContainer environment. Make sure you have the necessary DevContainer extension installed beforehand, available [here](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers). + - For a web-based development environment, you can develop directly on GitHub using GitHub Codespaces by navigating to the MiCall repository on GitHub and selecting "Code" > "Open with Codespaces" to launch a pre-configured environment. + +2. **Dependency Installation**: All required dependencies will be automatically installed whether you are using a local DevContainer or GitHub Codespace. + +3. **Interpreter Selection**: + - Following the setup, if you encounter dependency errors, manually select the Python interpreter version `3.8`. This is necessary because the container includes multiple Python versions, and the default selection might not be appropriate. + - This issue is tracked and further details can be found [here](https://github.com/cfe-lab/MiCall/issues/1033). + +4. **Verification**: To ensure that the environment is correctly configured, execute `pytest` within the DevContainer or Codespace. All tests should pass, indicating that the setup is successful. + +### Local install + To see how all the tools should be installed, follow the steps in `Dockerfile` and `dev.dockerfile`. If you prefer, you can run your development environment under docker, as described in `dev.dockerfile`. The same installation steps are @@ -24,39 +41,29 @@ in our [milestones]. [milestones]: https://github.com/cfe-lab/MiCall/milestones ### Python + Check that Python is already installed. - python --version +```shell +python --version +``` -We have tested with Python 3.8. +We have tested with Python `3.11`. ### BaseSpace -Set up the [native apps virtual machine][bsvm], and configure a shared folder -called MiCall that points to the source code. To get the shared folder working, -you'll probably need to [update the VBox guest additions][vbox guest] and add the -basespace user to the `vboxsf` group. Then, run - sudo mount -t vboxsf MiCall /media/sf_MiCall +Set up the [native apps virtual machine][bsvm]. Make sure you have a +developer account on illumina.com. The first time you run this, you +will have to log in to your account using -Make sure you have a developer account on illumina.com. The first time you run -this, you will have to log in to your account using - - sudo docker login docker.illumina.com - -Use the `docker_build.py` script to build a Docker image and push it to -BaseSpace. If you add `-t vX.Y`, it will add a tag to the Docker image. If you -add `-a `, it will launch the spacedock tool to process samples as a -local agent. You can also set the `BASESPACE_AGENT_ID` environment variable so -you don't have to supply it every time. You can get the agent id from the Form -Builder page on BaseSpace. - - sudo python3 /media/sf_MiCall/docker_build.py -a abcde12345 +```shell +sudo docker login docker.illumina.com +``` [bsvm]: https://developer.basespace.illumina.com/docs/content/documentation/native-apps/setup-dev-environment -[vbox guest]: https://linuxize.com/post/how-to-install-virtualbox-guest-additions-in-ubuntu/ ### Test data -If you want to run `micall_watcher.py`, you have to set up data folders for raw +If you want to run `micall/monitor/micall_watcher.py`, you have to set up data folders for raw data and for the working folders. You'll also need to set up the QAI project and the MiseqQCReport so you can download QC data and upload results. @@ -79,7 +86,7 @@ and the MiseqQCReport so you can download QC data and upload results. 10. Run the Ruby console for QAI and `LabMiseqRun.import('01-Jan-2000')` for the date of your sample run. 11. Upload the projects to a micall pipelines in QAI, use `micall.utils.projects_upload` to create a new pipeline in QAI -11. Run micall_watcher.py, it does need arguments. Look up the container app ids from Kive, check the Kive server URL and ports as well as QAI server and port +11. Run `micall/monitor/micall_watcher.py`, it does need arguments. Look up the container app ids from Kive, check the Kive server URL and ports as well as QAI server and port [cifs]: https://wiki.ubuntu.com/MountWindowsSharesPermanently @@ -88,7 +95,9 @@ When you don't understand the pipeline's output, it can be helpful to look at the raw reads in a sequence viewer like [Tablet][tablet]. Run the `micall_docker` script on a run folder or a single sample, like this: - python micall_docker.py folder --debug_remap --all_projects --keep_scratch /path/to/run +```shell +python micall/utils/micall_docker.py folder --debug_remap --all_projects --keep_scratch /path/to/run +``` The options tell it to write the debug files, use all projects, and save the scratch folder that holds all the debug files. Look through the scratch folders @@ -114,18 +123,24 @@ out the web site locally before publishing it. 1. Install Ruby 2.6, preferably with [Ruby Version Manager]. - rvm install 2.6 - rvm use 2.6 + ```shell + rvm install 2.6 + rvm use 2.6 + ``` 2. Install the gems for the web site. - cd MiCall/docs - gem install bundler - bundle install + ```shell + cd MiCall/docs + gem install bundler + bundle install + ``` 3. Serve the web site. - bundle exec jekyll serve + ```shell + bundle exec jekyll serve + ``` What changes might you want to make? The web site is based on the [Bulma Clean Theme], so read through the documentation there to see if it @@ -150,13 +165,13 @@ similar steps to setting up a development workstation. Follow these steps: 1. Check that all the issues in the current milestone are closed, and make sure the code works in your development environment. Run all the unit tests as described above, process the microtest data set with - `release_test_microtest.py`. + `micall/utils/release_test_microtest.py`. 2. Check if the kiveapi package needs a new release by looking for new commits. Make sure you tested with the latest version. 3. Determine what version number should be used next. -4. Use the `projects_dump.py` script for the previous version and compare +4. Use the `micall/utils/projects_dump.py` script for the previous version and compare `projects.json` to check that the projects match, or that the differences - were intended. Test the `projects_upload.py` script with your updated project + were intended. Test the `micall/utils/projects_upload.py` script with your updated project files in your local test QAI. 5. Check the history of the HIV and HCV rules files in the `micall/resistance` folder. If they have changed, create a new display file in the `docs` folder @@ -165,9 +180,9 @@ similar steps to setting up a development workstation. Follow these steps: 5. Check the history of the `micall.alignment` folder. If it has changed since the last release, then update the version number in `setup.py`. 5. Update the change notes in the Singularity file, and commit those changes. -6. [Create a release][release] on Github. Use "vX.Y" as the tag, where X.Y +6. [Create a release][release] on Github. Use "vX.Y.Z" as the tag, where X.Y matches the version you used in QAI. If you have to redo - a release, you can create additional releases with tags vX.Y.1, vX.Y.2, and + a release, you can create additional releases with tags vX.Y.Z.1, vX.Y.Z.2, and so on. Mark the release as pre-release until you finish deploying it. 7. Rebuild the Singularity image, and upload it to your local Kive server. Process the microtest data. @@ -175,31 +190,35 @@ similar steps to setting up a development workstation. Follow these steps: ids of the new apps. 8. Process all the samples from test_samples.csv on the Kive test server, and run the micall_watcher service on a VirtualBox. Use the - `release_test_*.py` scripts to compare the results of the new release with + `micall/utils/release_test_*.py` scripts to compare the results of the new release with the previous version. Also run the internal scripts `miseq_gen_results.rb` and `miseq_compare_results.rb` to look for differences. Get the comparison signed off to begin the release process. 8. Upload the Singularity image to the main Kive server, and record the id of the new apps. -8. Upload the pipeline definitions to QAI, using the `projects_upload.py` +8. Upload the pipeline definitions to QAI, using the `micall/utils/projects_upload.py` script. There is no need to create the new pipeline version in QAI beforehand, the script will do this for you - just remember to update the `Order by` field afterwards. 8. Stop the micall_watcher service on the main Kive server after you check that it's not processing any important runs. - ssh user@server - tail /var/log/micall/micall.log - sudo systemctl stop micall_watcher + ```shell + ssh user@server + tail /var/log/micall/micall.log + sudo systemctl stop micall_watcher + ``` 9. Get the code from Github into the server's environment. - ssh user@server - cd /usr/local/share/MiCall - git fetch - git checkout tags/vX.Y + ```shell + ssh user@server + cd /usr/local/share/MiCall + git fetch + git checkout tags/vX.Y.Z + ``` -10. Look for changes in [`micall_watcher.py`'s `parse_args()` function][parse_args]. +10. Look for changes in [`micall/monitor/micall_watcher.py`'s `parse_args()` function][parse_args]. Either look at the blame annotations at the link above, or review the changes in the new release. If there are new or changed settings, adjust the configuration in `/etc/systemd/system/micall_watcher.service` or @@ -207,33 +226,48 @@ similar steps to setting up a development workstation. Follow these steps: 11. Update the container app ids and pipeline version number in `/etc/systemd/system/micall_watcher.service`. If you change the configuration, reload it: - sudo systemctl daemon-reload + ```shell + sudo systemctl daemon-reload + ``` 12. Check that the kiveapi package is the same version you tested with. If not, do a Kive release first. - cd /usr/local/share/Kive - /usr/local/share/venv-micall/bin/pip show kiveapi - cat api/setup.py + ```shell + cd /usr/local/share/Kive + /usr/local/share/venv-micall/bin/pip show kiveapi + cat api/setup.py + ``` 13. Start the micall_watcher service, and tail the log to see that it begins processing all the runs with the new version of the pipeline. - sudo systemctl start micall_watcher - sudo systemctl status micall_watcher - tail -f /var/log/micall/micall.log + ```shell + sudo systemctl start micall_watcher + sudo systemctl status micall_watcher + tail -f /var/log/micall/micall.log + ``` If the log doesn't help, look in `/var/log/messages` on CentOS or `/var/log/syslog` on Ubuntu. -14. Launch the basespace virtual machine (see BaseSpace section above), and build - a new Docker image from GitHub. Tag it with the release number. +14. Launch the basespace virtual machine (see BaseSpace section above) and copy + MiCall source files into it. The easiest way to copy is via scp: - cd /media/sf_micall - sudo python3 docker_build.py -t vX.Y --nopush + ```shell + scp -P 2222 /path/to/micall/on/host basespace@localhost:MiCall + # (password is "basespace") + ``` - The script is able to push the docker image to the illumina repo and launch - spacedock as well, but that is currently broken because of the old docker version + Then login to virtual machine and build the docker image: + + ```shell + ssh basespace@localhost -p2222 # (password is "basespace") + sudo python3 MiCall/micall/utils/docker_build.py -t vX.Y.Z --nopush + ``` + + The script is able to push the docker image to the illumina repo and launch + spacedock as well, but that is currently broken because of the old docker version in the VM. If this is ever updated, or we build our own VM, you won't have to do these steps manually anymore and can remove the `--nopush`. @@ -241,31 +275,35 @@ similar steps to setting up a development workstation. Follow these steps: Unfortunately, the old version of docker that comes with the basespace virtual machine [can't log in] to docker hub or illumina, so you'll have to save it to a tar file and - load that into your host system's version of docker. Before pushing it anywhere, + load that into your host system's version of docker. Before pushing it anywhere, check that the docker image works by running the microtests. If the docker push fails with mysterious error messages (access to the resource is denied), try `docker logout` and `docker login` again, and make sure you are on the owner team of cfelab on [docker hub]. - ssh basespace@localhost -p2222 - sudo su - cd /media/sf_micall - sudo docker save cfelab/micall:vX.Y >micall-vX.Y.tar - exit (twice) - sudo docker load micall-vX.Y.Z.tar + exit # Exit the root shell. + exit # Exit the virtual machine. + sudo docker load < micall-vX.Y.Z.tar + sudo docker login docker.illumina.com + sudo docker tag docker.illumina.com/cfe_lab/micall:vX.Y.Z cfelab/micall:vX.Y.Z + sudo docker push docker.illumina.com/cfe_lab/micall:vX.Y.Z + rm micall-vX.Y.Z.tar + ``` 16. Duplicate the MiCall form in the revisions section of the form builder, then - edit the `callbacks.js` in the form builder itself, and add the `:vX.Y` tag to the - `containerImageId` field. In My Apps, create a new version of the App with the new - version number. Record the new agent ID (click the arrow on the bottom right of the + edit the `callbacks.js` in the form builder itself, and add the `:vX.Y.Z` tag to the + `containerImageId` field. In My Apps, create a new version of the App with the new + version number. Record the new agent ID (click the arrow on the bottom right of the form builder). 17. Launch the spacedock version by running this in your basespace VM: - sudo spacedock -a [agent ID] -m https://mission.basespace.illumina.com + ```shell + sudo spacedock -a [agent ID] -m https://mission.basespace.illumina.com + ``` 18. Check that the new MiCall version works as expected by processing some of the microtests in BaseSpace. @@ -283,7 +321,7 @@ similar steps to setting up a development workstation. Follow these steps: Zenodo version. [release]: https://help.github.com/categories/85/articles -[parse_args]: https://github.com/cfe-lab/MiCall/blame/master/micall_watcher.py +[parse_args]: https://github.com/cfe-lab/MiCall/blame/master/micall/monitor/micall_watcher.py [Zenodo]: https://doi.org/10.5281/zenodo.2644171 [can't log in]: https://www.docker.com/blog/registry-v1-api-deprecation/ -[docker hub]: https://hub.docker.com/orgs/cfelab/members \ No newline at end of file +[docker hub]: https://hub.docker.com/orgs/cfelab/members diff --git a/Dockerfile b/Dockerfile index 181f61910..6c65f2eb2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,7 +24,7 @@ # If you omit the `--target` tag altogether, `docker build` will build # the development image. -FROM python:3.8 +FROM python:3.11 MAINTAINER BC CfE in HIV/AIDS https://github.com/cfe-lab/MiCall @@ -83,41 +83,17 @@ RUN apt-get install -q -y zlib1g-dev libncurses5-dev libncursesw5-dev && \ tar -xzf smalt-0.7.6-bin.tar.gz --no-same-owner && \ ln -s /opt/smalt-0.7.6-bin/smalt_x86_64 /bin/smalt -## Install Haploflow -RUN apt-get update && \ - apt-get install -y build-essential sudo git ronn cmake && \ - cd /opt/ && \ - git clone https://github.com/hzi-bifo/Haploflow && \ - cd Haploflow && \ - git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50 && \ - yes | sh build.sh && \ - ln -s /opt/Haploflow/build/haploflow /bin/haploflow - ## Install dependencies for genetracks/drawsvg RUN apt-get install -q -y libcairo2-dev +RUN pip install --upgrade pip -COPY requirements.txt requirements-basespace.txt /opt/micall/ - -## Python packages, plus trigger matplotlib to build its font cache -WORKDIR /opt -RUN pip install --upgrade pip && \ - pip install -r /opt/micall/requirements-basespace.txt && \ - python -c 'import matplotlib; matplotlib.use("Agg"); import matplotlib.pyplot' - -## MiCall -COPY micall_docker.py micall_kive.py micall_kive_resistance.py version.tx[t] /opt/micall/ -COPY micall/__init__.py micall/project* /opt/micall/micall/ +COPY . /opt/micall/ -COPY micall/blast_db/make_blast_db.py /opt/micall/micall/blast_db/make_blast_db.py -COPY micall/core /opt/micall/micall/core/ -COPY micall/data /opt/micall/micall/data/ -COPY micall/drivers /opt/micall/micall/drivers/ -COPY micall/g2p /opt/micall/micall/g2p/ -COPY micall/resistance /opt/micall/micall/resistance/ -COPY micall/monitor /opt/micall/micall/monitor/ -COPY micall/utils /opt/micall/micall/utils/ +RUN pip install /opt/micall[denovo,basespace] +RUN micall make_blast_db -RUN python /opt/micall/micall/blast_db/make_blast_db.py +## Trigger matplotlib to build its font cache +RUN python -c 'import matplotlib; matplotlib.use("Agg"); import matplotlib.pyplot' WORKDIR /data -ENTRYPOINT ["python", "/opt/micall/micall_docker.py"] +ENTRYPOINT ["micall", "micall_docker"] diff --git a/README.md b/README.md index 1778dc796..a928002f5 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,9 @@ # MiCall # ## Processing FASTQ data from an Illumina MiSeq ## -[![Build Status]][travis] [![Code Coverage]][codecov] [![DOI]][zenodo] -[Build Status]: https://travis-ci.com/cfe-lab/MiCall.svg?branch=master -[travis]: https://travis-ci.com/cfe-lab/MiCall [Code Coverage]: https://codecov.io/github/cfe-lab/MiCall/coverage.svg?branch=master [codecov]: https://codecov.io/github/cfe-lab/MiCall?branch=master [DOI]: https://zenodo.org/badge/DOI/10.5281/zenodo.1289989.svg diff --git a/Singularity b/Singularity index b59394534..ef346e276 100644 --- a/Singularity +++ b/Singularity @@ -1,6 +1,6 @@ # Generate the Singularity container to run MiCall on Kive. Bootstrap: docker -From: python:3.8 +From: python:3.11 %help MiCall maps all the reads from a sample against a set of reference @@ -9,7 +9,8 @@ From: python:3.8 This Singularity container can be run on Kive: http://cfe-lab.github.io/Kive - Change Notes: Fix alignment bugs, and updated to HIVdb 9.4. + Change Notes: Comprehensive updates to the contig stitcher, + including bug fixes and visualization enhancements. %labels MAINTAINER BC CfE in HIV/AIDS https://github.com/cfe-lab/MiCall @@ -27,35 +28,23 @@ From: python:3.8 # Unneeded once Singularity creates parent dirs: # https://github.com/singularityware/singularity/issues/1549 mkdir ${SINGULARITY_ROOTFS}/opt/micall - mkdir ${SINGULARITY_ROOTFS}/opt/micall/micall %files - ## MiCall - micall_docker.py /opt/micall/ - micall_kive.py /opt/micall/ - micall_kive_resistance.py /opt/micall/ - micall/__init__.py /opt/micall/micall/ - micall/project* /opt/micall/micall/ - - micall/core /opt/micall/micall/core - micall/data /opt/micall/micall/data - micall/drivers /opt/micall/micall/drivers - micall/g2p /opt/micall/micall/g2p - micall/resistance /opt/micall/micall/resistance - micall/monitor /opt/micall/micall/monitor - micall/utils /opt/micall/micall/utils - - requirements.txt /opt/micall/ - requirements-basespace.txt /opt/micall/ - - ## HCV genotyping database - micall/blast_db /opt/micall/micall/blast_db + ## These files will be deleted after the install. + . /opt/micall/ %post echo ===== Installing Prerequisites ===== >/dev/null apt-get update -q apt-get install -q -y unzip wget + echo ===== Saving git version ===== >/dev/null + # Git is expected to be already installed. + mkdir -p /etc/micall + git -C /opt/micall/ rev-parse HEAD > /etc/micall/git-version + git -C /opt/micall/ -c 'core.fileMode=false' describe --tags --dirty 1>&2 > /etc/micall/git-describe || true + git -C /opt/micall/ log -n 10 > /etc/micall/git-log + echo ===== Installing blast ===== >/dev/null apt-get install -q -y ncbi-blast+ @@ -102,30 +91,25 @@ From: python:3.8 tar -xzf smalt-0.7.6-bin.tar.gz --no-same-owner ln -s /opt/smalt-0.7.6-bin/smalt_x86_64 /bin/smalt - echo ===== Installing Haploflow ===== >/dev/null - apt-get install -q -y libboost-all-dev build-essential sudo git ronn cmake - cd /opt/ - git clone https://github.com/hzi-bifo/Haploflow - cd Haploflow - git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50 - yes | sh build.sh - echo ===== Installing Python packages ===== >/dev/null # Install dependencies for genetracks/drawsvg apt-get install -q -y libcairo2-dev - # Also trigger matplotlib to build its font cache. - cd /opt + # Install micall main executable. pip install --upgrade pip - pip install -r /opt/micall/requirements-basespace.txt + pip install /opt/micall[denovo] + micall make_blast_db + # Also trigger matplotlib to build its font cache. python -c 'import matplotlib; matplotlib.use("Agg"); import matplotlib.pyplot' - python /opt/micall/micall/blast_db/make_blast_db.py + + # Cleanup. + rm -rf /opt/micall %environment export PATH=/opt/bowtie2:/bin:/usr/local/bin export LANG=en_US.UTF-8 %runscript - python /opt/micall/micall_kive.py "$@" + micall micall_kive "$@" %apphelp filter_quality Post-processing of short-read alignments. @@ -137,7 +121,7 @@ From: python:3.8 KIVE_MEMORY 200 %apprun filter_quality - PYTHONPATH=/opt/micall python -m micall.core.filter_quality "$@" + micall filter_quality "$@" %apphelp resistance Combine HCV results with HCV-Midi results, and generate resistance @@ -151,10 +135,10 @@ From: python:3.8 KIVE_MEMORY 200 %apprun resistance - python /opt/micall/micall_kive_resistance.py "$@" + micall micall_kive_resistance "$@" %apprun denovo - python /opt/micall/micall_kive.py --denovo "$@" + micall micall_kive --denovo "$@" %applabels denovo KIVE_INPUTS sample_info_csv fastq1 fastq2 bad_cycles_csv @@ -163,7 +147,8 @@ From: python:3.8 failed_csv cascade_csv nuc_csv amino_csv insertions_csv conseq_csv \ conseq_all_csv concordance_csv concordance_seed_csv failed_align_csv \ coverage_scores_csv coverage_maps_tar aligned_csv g2p_aligned_csv \ - genome_coverage_csv genome_coverage_svg genome_concordance_svg contigs_csv \ + genome_coverage_csv genome_coverage_svg genome_concordance_svg \ + unstitched_cascade_csv unstitched_conseq_csv unstitched_contigs_csv contigs_csv \ read_entropy_csv conseq_region_csv conseq_stitched_csv KIVE_THREADS 2 KIVE_MEMORY 6000 diff --git a/dev.dockerfile b/dev.dockerfile deleted file mode 100644 index ca63d75ba..000000000 --- a/dev.dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -# Dockerized version of MiCall development tools. -# To build this image, first build the production image in Dockerfile, probably -# using the docker_build.py script. Use the docker images command to see that -# docker.illumina.com/cfe_lab/micall:latest is the image you want to use, then -# build this image, using a command like this: -# -# docker build -t micall:dev --file dev.dockerfile . -# -# To test out the image, run the test suite, with a command like this: -# -# docker run --rm -it --entrypoint pytest -w /opt/micall \ -# --volume ~/git/micall:/opt/micall micall:dev -# -# That lets you edit the source code on your host system, but run it under -# docker with all the tools installed for you. - -FROM docker.illumina.com/cfe_lab/micall:latest - -## Add the dev packages. -COPY requirements-test.txt requirements-watcher.txt requirements-dev.txt /opt/micall/ -RUN pip install -r /opt/micall/requirements-dev.txt diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 46a78a557..036f1aa2f 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -1,46 +1,56 @@ GEM remote: https://rubygems.org/ specs: - activesupport (7.0.4.3) + activesupport (7.1.3.2) + base64 + bigdecimal concurrent-ruby (~> 1.0, >= 1.0.2) + connection_pool (>= 2.2.5) + drb i18n (>= 1.6, < 2) minitest (>= 5.1) + mutex_m tzinfo (~> 2.0) - addressable (2.8.1) - public_suffix (>= 2.0.2, < 6.0) + addressable (2.8.7) + public_suffix (>= 2.0.2, < 7.0) + base64 (0.2.0) + bigdecimal (3.1.6) coffee-script (2.4.1) coffee-script-source execjs - coffee-script-source (1.11.1) + coffee-script-source (1.12.2) colorator (1.1.0) - commonmarker (0.23.9) - concurrent-ruby (1.2.2) - dnsruby (1.61.9) - simpleidn (~> 0.1) + commonmarker (0.23.10) + concurrent-ruby (1.3.4) + connection_pool (2.4.1) + csv (3.3.0) + dnsruby (1.71.0) + simpleidn (~> 0.2.1) + drb (2.2.1) em-websocket (0.5.3) eventmachine (>= 0.12.9) http_parser.rb (~> 0) ethon (0.16.0) ffi (>= 1.15.0) eventmachine (1.2.7) - execjs (2.8.1) - faraday (2.7.4) - faraday-net_http (>= 2.0, < 3.1) - ruby2_keywords (>= 0.0.4) - faraday-net_http (3.0.2) - ffi (1.15.5) + execjs (2.9.1) + faraday (2.9.0) + faraday-net_http (>= 2.0, < 3.2) + faraday-net_http (3.1.0) + net-http + ffi (1.17.0) forwardable-extended (2.6.0) - gemoji (3.0.1) - github-pages (228) - github-pages-health-check (= 1.17.9) - jekyll (= 3.9.3) - jekyll-avatar (= 0.7.0) - jekyll-coffeescript (= 1.1.1) - jekyll-commonmark-ghpages (= 0.4.0) - jekyll-default-layout (= 0.1.4) - jekyll-feed (= 0.15.1) + gemoji (4.1.0) + github-pages (232) + github-pages-health-check (= 1.18.2) + jekyll (= 3.10.0) + jekyll-avatar (= 0.8.0) + jekyll-coffeescript (= 1.2.2) + jekyll-commonmark-ghpages (= 0.5.1) + jekyll-default-layout (= 0.1.5) + jekyll-feed (= 0.17.0) jekyll-gist (= 1.5.0) - jekyll-github-metadata (= 2.13.0) + jekyll-github-metadata (= 2.16.1) jekyll-include-cache (= 0.2.1) jekyll-mentions (= 1.6.0) jekyll-optional-front-matter (= 0.3.2) @@ -67,30 +77,32 @@ GEM jekyll-theme-tactile (= 0.2.0) jekyll-theme-time-machine (= 0.2.0) jekyll-titles-from-headings (= 0.5.3) - jemoji (= 0.12.0) - kramdown (= 2.3.2) + jemoji (= 0.13.0) + kramdown (= 2.4.0) kramdown-parser-gfm (= 1.1.0) liquid (= 4.0.4) mercenary (~> 0.3) minima (= 2.5.1) - nokogiri (>= 1.13.6, < 2.0) - rouge (= 3.26.0) + nokogiri (>= 1.16.2, < 2.0) + rouge (= 3.30.0) terminal-table (~> 1.4) - github-pages-health-check (1.17.9) + webrick (~> 1.8) + github-pages-health-check (1.18.2) addressable (~> 2.3) dnsruby (~> 1.60) - octokit (~> 4.0) - public_suffix (>= 3.0, < 5.0) + octokit (>= 4, < 8) + public_suffix (>= 3.0, < 6.0) typhoeus (~> 1.3) html-pipeline (2.14.3) activesupport (>= 2) nokogiri (>= 1.4) http_parser.rb (0.8.0) - i18n (1.12.0) + i18n (1.14.6) concurrent-ruby (~> 1.0) - jekyll (3.9.3) + jekyll (3.10.0) addressable (~> 2.4) colorator (~> 1.0) + csv (~> 3.0) em-websocket (~> 0.5) i18n (>= 0.7, < 2) jekyll-sass-converter (~> 1.0) @@ -101,27 +113,28 @@ GEM pathutil (~> 0.9) rouge (>= 1.7, < 4) safe_yaml (~> 1.0) - jekyll-avatar (0.7.0) + webrick (>= 1.0) + jekyll-avatar (0.8.0) jekyll (>= 3.0, < 5.0) - jekyll-coffeescript (1.1.1) + jekyll-coffeescript (1.2.2) coffee-script (~> 2.2) - coffee-script-source (~> 1.11.1) + coffee-script-source (~> 1.12) jekyll-commonmark (1.4.0) commonmarker (~> 0.22) - jekyll-commonmark-ghpages (0.4.0) - commonmarker (~> 0.23.7) - jekyll (~> 3.9.0) + jekyll-commonmark-ghpages (0.5.1) + commonmarker (>= 0.23.7, < 1.1.0) + jekyll (>= 3.9, < 4.0) jekyll-commonmark (~> 1.4.0) rouge (>= 2.0, < 5.0) - jekyll-default-layout (0.1.4) - jekyll (~> 3.0) - jekyll-feed (0.15.1) + jekyll-default-layout (0.1.5) + jekyll (>= 3.0, < 5.0) + jekyll-feed (0.17.0) jekyll (>= 3.7, < 5.0) jekyll-gist (1.5.0) octokit (~> 4.2) - jekyll-github-metadata (2.13.0) + jekyll-github-metadata (2.16.1) jekyll (>= 3.4, < 5.0) - octokit (~> 4.0, != 4.4.0) + octokit (>= 4, < 7, != 4.4.0) jekyll-include-cache (0.2.1) jekyll (>= 3.7, < 5.0) jekyll-mentions (1.6.0) @@ -192,41 +205,43 @@ GEM jekyll (>= 3.3, < 5.0) jekyll-watch (2.2.1) listen (~> 3.0) - jemoji (0.12.0) - gemoji (~> 3.0) + jemoji (0.13.0) + gemoji (>= 3, < 5) html-pipeline (~> 2.2) jekyll (>= 3.0, < 5.0) - kramdown (2.3.2) + kramdown (2.4.0) rexml kramdown-parser-gfm (1.1.0) kramdown (~> 2.0) liquid (4.0.4) - listen (3.8.0) + listen (3.9.0) rb-fsevent (~> 0.10, >= 0.10.3) rb-inotify (~> 0.9, >= 0.9.10) mercenary (0.3.6) - mini_portile2 (2.8.1) + mini_portile2 (2.8.6) minima (2.5.1) jekyll (>= 3.5, < 5.0) jekyll-feed (~> 0.9) jekyll-seo-tag (~> 2.1) - minitest (5.18.0) - nokogiri (1.14.3) - mini_portile2 (~> 2.8.0) + minitest (5.22.2) + mutex_m (0.2.0) + net-http (0.4.1) + uri + nokogiri (1.16.5) + mini_portile2 (~> 2.8.2) racc (~> 1.4) octokit (4.25.1) faraday (>= 1, < 3) sawyer (~> 0.9) pathutil (0.16.2) forwardable-extended (~> 2.6) - public_suffix (4.0.7) - racc (1.6.2) + public_suffix (5.1.1) + racc (1.7.3) rb-fsevent (0.11.2) - rb-inotify (0.10.1) + rb-inotify (0.11.1) ffi (~> 1.0) - rexml (3.2.5) - rouge (3.26.0) - ruby2_keywords (0.0.5) + rexml (3.3.9) + rouge (3.30.0) rubyzip (2.3.2) safe_yaml (1.0.5) sass (3.7.4) @@ -241,14 +256,16 @@ GEM unf (~> 0.1.4) terminal-table (1.8.0) unicode-display_width (~> 1.1, >= 1.1.1) - typhoeus (1.4.0) + typhoeus (1.4.1) ethon (>= 0.9.0) tzinfo (2.0.6) concurrent-ruby (~> 1.0) unf (0.1.4) unf_ext - unf_ext (0.0.8.2) + unf_ext (0.0.9.1) unicode-display_width (1.8.0) + uri (0.13.0) + webrick (1.8.2) PLATFORMS ruby diff --git a/docs/_config.yml b/docs/_config.yml index f8f858e88..1b1548bf2 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -1,2 +1,2 @@ -remote_theme: chrisrhymes/bulma-clean-theme +remote_theme: chrisrhymes/bulma-clean-theme@v0.14.0 title: MiCall diff --git a/docs/_data/navigation.yml b/docs/_data/navigation.yml index b59910515..b2df72767 100644 --- a/docs/_data/navigation.yml +++ b/docs/_data/navigation.yml @@ -11,9 +11,13 @@ link: /design/remap.html - name: De Novo Assembly link: /design/assembly.html + - name: Contig stitcher + link: /design/stitcher.html - name: Resistance link: /design/resistance.html - name: Genome Coverage link: /genome_coverage.html +- name: MiCall at BC CfE + link: /micall_at_bccfe.html - name: GitHub - link: https://github.com/cfe-lab/MiCall \ No newline at end of file + link: https://github.com/cfe-lab/MiCall diff --git a/docs/admin.md b/docs/admin.md index b039fc166..92637ddc8 100644 --- a/docs/admin.md +++ b/docs/admin.md @@ -61,91 +61,126 @@ If a MiCall sample finishes processing and the number of active samples dips bel that limit, MiCall Watcher looks at its list of Samples That Need Reprocessing and starts the next one, moving it from that list to Samples In Progress. +### Installing base packages ### + +MiCall is written in python, thus we need the following packages: + +```shell +apt-get install -y python3 python3-venv git # on Ubuntu & Debian. +``` + ### Installing MiCall Watcher ### + Install the MiCall source code in a shared location: - $ cd /usr/local/share - $ sudo git clone https://github.com/cfe-lab/MiCall.git +```shell +cd /usr/local/share +sudo git clone https://github.com/cfe-lab/MiCall.git +``` -Create a Python 3.6 virtual environment to run MiCall. +Create a Python virtual environment to run MiCall. - $ cd /usr/local/share - $ sudo python3.6 -m venv venv-micall - $ cd MiCall - $ sudo ../venv-micall/bin/pip install -r requirements-watcher.txt +```shell +sudo python3 -m venv venv-micall +``` + +Configure micall logging, and then install micall package: + +```shell Copy the logging configuration if you want to change any of the settings. - $ cp micall_logging_config.py micall_logging_override.py +```shell +sudo cp micall/utils/micall_logging_config.py micall/utils/micall_logging_override.py +sudo emacs micall/utils/micall_logging_override.py +sudo venv-micall/bin/pip install ./MiCall[watcher] +``` Read the instructions in the file, and edit the override copy. If the default settings are fine, you don't need the override file. -It should be run as a service, under its own user account, so first create the -new user: - - $ sudo useradd --system micall - -A system user won't show up in the list of accounts to log in, and it won't get -a home folder. - Depending on how you configured the logging, you'll probably need to create a log folder and grant access to the micall user. - $ sudo mkdir /var/log/micall - $ sudo chown micall:micall /var/log/micall +```shell +sudo mkdir /var/log/micall +sudo chown micall:micall /var/log/micall +``` + +MiCall watcher should be run as a service, under its own user account, +so first create the new user: + +```shell +sudo useradd --system micall +sudo su micall # switch to micall account. +. venv-micall/bin/activate # activate the virtual environment. +``` Test that everything is installed with the right permissions: - $ sudo su -c "/usr/local/share/venv-micall/bin/python micall_watcher.py --help" micall +```shell +micall micall_watcher --help +``` -Look at the options you can give to the `micall_watcher.py` script when you +Look at the options you can give to the `micall_watcher` script when you configure the service file in the next step. Now configure the service using a systemd [service unit] configuration. Here's an example configuration, in `/etc/systemd/system/micall_watcher.service`: - [Unit] - Description=micall_watcher +```toml +[Unit] +Description=micall_watcher - [Service] - ExecStart=/usr/local/share/venv-micall/bin/python3.6 \ - /usr/local/share/MiCall/micall_watcher.py \ - --pipeline_version=8.0 --raw_data=/data/raw \ - --micall_filter_quality_pipeline_id=100 --micall_main_pipeline_id=101 \ - --micall_resistance_pipeline_id=102 - EnvironmentFile=/etc/micall/micall.conf - User=micall +[Service] +ExecStart=/usr/local/share/venv-micall/bin/micall micall_watcher +EnvironmentFile=/etc/micall/micall.conf +User=micall - # Allow the process to log its exit. - KillSignal=SIGINT +# Allow the process to log its exit. +KillSignal=SIGINT - [Install] - WantedBy=multi-user.target - -The settings can either be given on the command line or set as -environment variables. Environment variables are a better option for -sensitive parameters like passwords, because the command line is visible to all -users. Environment variables go in the configuration file listed in the +[Install] +WantedBy=multi-user.target +``` + +Micall watcher accepts multiple settings which can be passed +directly as command line arguments, or as environment variables. +Environment variables are a better option for sensitive parameters like passwords, +because the command line is visible to all users. +Environment variables go in the configuration file listed in the `EnvironmentFile=` setting. In this example, it's `/etc/micall/micall.conf` - $ sudo mkdir /etc/micall - $ sudo emacs -nw /etc/micall/micall.conf - $ sudo chmod 600 /etc/micall/micall.conf +```shell +exit # logout from "micall" account. +sudo mkdir /etc/micall +sudo emacs /etc/micall/micall.conf +sudo chmod 600 /etc/micall/micall.conf +``` Make sure you reduce the read permissions on the `.conf` file so other users can't read it. The environment variable names are the same as the command options, but they add a `MICALL_` prefix, if it's not already there. +To list all the available options, run `micall micall_watcher --help`. +Below is the example config: + +```shell +# This is an example of /etc/micall/micall.conf +# You can add comment lines that start with # +MICALL_KIVE_SERVER=https://kive.example.com +MICALL_KIVE_USER=kiveusername +MICALL_KIVE_PASSWORD=kivepassword + +MICALL_QAI_SERVER=https://qai.example.com +MICALL_QAI_USER=qaiuser +MICALL_QAI_PASSWORD=qaipassword - # This is an example of /etc/micall/micall.conf - # You can add comment lines that start with # - MICALL_KIVE_SERVER=https://example.com - MICALL_KIVE_USER=badexample - MICALL_KIVE_PASSWORD=badexample +MICALL_RAW_DATA=/data/raw - MICALL_QAI_SERVER=https://example.com - MICALL_QAI_USER=badexample - MICALL_QAI_PASSWORD=badexample +MICALL_MAIN_PIPELINE_ID=100 +MICALL_FILTER_QUALITY_PIPELINE_ID=101 +MICALL_RESISTANCE_PIPELINE_ID=102 +``` Don't put the environment variables directly in the `.service` file, because its contents are visible to all users with `systemctl show micall_watcher`. @@ -154,10 +189,12 @@ Once you write the configuration file, you have to enable and start the service. From then on, it will start automatically when the server boots up. - $ sudo systemctl daemon-reload - $ sudo systemctl enable micall_watcher - $ sudo systemctl start micall_watcher - $ sudo systemctl status micall_watcher +```shell +sudo systemctl daemon-reload +sudo systemctl enable micall_watcher +sudo systemctl start micall_watcher +sudo systemctl status micall_watcher +``` If the service fails to start, look for detailed messages in the log file, in `/var/log/syslog`, or in `/var/log/messages`. @@ -167,9 +204,11 @@ If the service fails to start, look for detailed messages in the log file, in ### Restarting the MiCall Watcher ### If you installed it as a service as described above, then it's easy: - sudo systemctl restart micall_watcher +```shell +sudo systemctl restart micall_watcher +``` -Don't launch the `micall_watcher.py` script on its own, or the service will run +Don't launch the `micall/monitor/micall_watcher.py` script on its own, or the service will run won't know that it's running. That can end up running two copies of the watcher process, and it gets confused. @@ -215,9 +254,13 @@ in a run under `Data/Intensities/BaeCalls/L001/*/*.bcl`. You can see how much space they take within a run folder: - find -name "*.bcl" -print0 | du -ch --files0-from - +```shell +find -name "*.bcl" -print0 | du -ch --files0-from - +``` We usually keep the last year's worth of BCL files around, so to delete all the BCL files from before May 2022, we ran this command in the runs folder: - find */Data/Intensities/BaseCalls/L001 -name "*.bcl" -not -newer 220527_M04401_0226_000000000-K5YRD/SampleSheet.csv -print -delete +```shell +find */Data/Intensities/BaseCalls/L001 -name "*.bcl" -not -newer 220527_M04401_0226_000000000-K5YRD/SampleSheet.csv -print -delete +``` diff --git a/docs/compute_micall_results.ipynb b/docs/compute_micall_results.ipynb index 73e4e6d22..bd25163b5 100644 --- a/docs/compute_micall_results.ipynb +++ b/docs/compute_micall_results.ipynb @@ -66,7 +66,6 @@ "import os\n", "import csv\n", "import pandas as pd\n", - "import yaml\n", "import numpy as np\n", "import statistics\n", "from operator import itemgetter" @@ -144,7 +143,7 @@ " nucsfile = nucsfile[\n", " ~nucsfile['region'].str.contains('nsp')\n", " ]\n", - " nucsfile['mixtures'] = nucsfile.apply(compute, axis=1)\n", + " nucsfile['mixtures'] = nucsfile.apply(compute, axis=1) # noqa\n", " nucsfile = nucsfile[\n", " ~nucsfile['mixtures'].isnull()\n", " ]['mixtures']\n", diff --git a/docs/design/stitcher.md b/docs/design/stitcher.md new file mode 100644 index 000000000..52e14e930 --- /dev/null +++ b/docs/design/stitcher.md @@ -0,0 +1,744 @@ +--- +title: Contig Stitching in MiCall +--- + +DeNovo assembly does not invariably translate input reads into a +single contiguous sequence akin to a genomic consensus. Typically, +errors in input data lead to fragmented sequences — referred to as +contigs — which furthermore may overlap, thus encoding the same region +of a genome more than once. Assembling a unified consensus sequence +necessitates the systematic arrangement of these contigs while +addressing discrepancies within overlapping regions. That is the +Stitcher's function. + +# Structure + +The Stitcher is a specialized component within the MiCall system. It +is designed to operate as an independent module which processes the +assembled contigs, generally derived from DeNovo assembler outputs, +and produce a singular, coherent sequence. + +## Modular Aspect + +The Stitcher maintains a distinct and autonomous role within +MiCall. Its implementation is fully isolated to the +`contig_stitcher*.py` files within the MiCall's source code. The +stitcher module can be run as a CLI script, separately from the rest +of the pipeline. The following command runs the Stitcher: + +```sh +micall contig_stitcher --help +``` + + + + + + + + + + +## Interaction + +Stitching is initiated either as a pipeline step in MiCall, or as a +command line call given above. In each case: + +**Input:** The Stitcher receives a single input file in CSV +format. This file contains 1 or more contigs that are the outcomes of +the previous assembly step, together with associated reference genome +information. These contigs are essentially segments of DNA +sequences. They can vary significantly in length. + +**Output:** The sole output from the Stitcher is a CSV +file. This file holds the stitched sequences -- longer or fully +continuous sequences that represent the genomic consensus formed by +merging the initial fragmented contigs, and additional metadata, +such as the inferred reference genome's name. + + + +# Operational procedure + +To clarify operations of the Stitcher, the subsequent section +introduces a vocabulary that is necessary for a precise description. + +## Definitions + +- An **input nucleotide** refers to a nucleotide of an initial + assembly contig sequence. +- A **reference nucleotide** refers to a nucleotide of a reference + genome sequence. +- A **non-conflicting nucleotide** is a **reference nucleotide** that + has at most one candidate **input nucleotide**. +- A **non-ambiguous nucleotide** is an **input nucleotide**, which has + a clear positioning with respect to all **input nucleotides** of all + other contigs associated with the same reference genome. In + particular, all **conflicting nucleotides** are **ambiguous + nucleotides** because they do not have a clear positioning with + respect to their competing **conflicting nucleotide**. +- An **overlap** is a continuos segement of **conflicting + nucleotides**. +- **Multidirectional alignment** is a property of a contig such that: + 1. the contig has aligned in multiple parts. + 2. some parts have been aligned to the forward strand, and some to + the reverse strand of the reference genome. +- **Cross-alignment** is a property of a contig such that: + 1. the contig has aligned in multiple parts. + 2. the contig-order of the aligned parts does not agree with the + reference-order of the aligned parts. +- A **non-aligned contig** is a contig that has been assinged a + reference sequence, but did not align to it. +- An **invalid contig** is a contig with **multidirectional + alignment**. +- A **stitched consensus** is a **valid contig** in the output of the + Stitcher. +- The **final output** refers to the contents of the only output CSV + file produced by the Stitcher. + +## Principles + +The reason the Stitcher operates effectively is due to its utilization +of reference genomes as additional source of truth. More precisely, +the Stitcher integrates two sets of data: + +1. Sequences generated by the initial assembly. +2. Sequences of reference genomes to which assembled contigs get aligned. + +We will say that 1. is the assembler's data, and 2. is aligner's. + +The core belief is that a reference genome can be used to enhance the +quality of and resolve conflicts within initial assembly contigs. + +In applying this approach, the Stitcher is guided by the following principles: + +### Principle of Scale-Dependent Credibility + +The reliability of sequence alignments increases as the length of the +aligned segment increases. +Therefore: + +- **Micro Scale**: For shorter segments, assembler's findings are more + reliable, because of expected abundance of small, local mutations + not present in the reference genome. + +- **Macro Scale**: For longer segments, the aligner's interpretations + are prioritized. The exponential decrease in alignment errors with + increased sequence length makes long alignments particularly + trustworthy. + +### Principle of Length Prioritization + +A longer contig typically arises from a greater number of reads +spanning a larger genomic region. While this does not imply more reads +per individual position, it suggests that the initial set of reads has +successfully assembled over a more extensive sequence, reflecting a +broader and more robust dataset. Moreover, aligning a longer sequence +to the reference genome is statistically less probable, compared to a +shorter sequence. This means that a successful alignment of a longer +contig to the reference genome provides further confidence in its +accuracy. + +Therefore in scenarios where multiple contigs cover the same region of +the reference genome, longer contigs are prioritized over shorter +ones. + +### Ambiguity Omission Principle + +To mitigate the potential propagation of uncertainties, any data that +lacks a definitive, unambiguous position within the reference genome +should be entirely excluded. This approach acknowledges that absolute +certainty in complex genomic datasets is often unattainable, and tries +to establish a reasonable default. + +## Regulations + +Guided by the previously outlined principles, +several precise regulations governing the Stitcher can be extracted: + +1. For every reference genome, at most one **stitched consensus** + must result. +2. No **ambiguous, non-conflicting nucleotide** + shall be included into the **final output**. +3. Every **non-conflicting-** and **non-ambiguous-** nucleotide + pertaining to a **valid contig** is required to be included in the + **stitched consensus** for the associated reference genome. +4. The relative positions of **non-conflicting-** and + **non-ambiguous-** nucleotides must be preserved in the **final output**. +5. All nucleotides present in the **final output** must exclusively + originate from the initial assembly data. + +## Setup + +The setup process for the Stitcher ensures that each contig is +properly aligned and prepared for the stitching process. The steps are +as follows: + +1. **Align Contigs**: Align each contig to its corresponding reference + genome to approximate their positions within a global reference + framework, allowing for spatial comparison between different contigs. + +2. **Split Multi-Alignment Contigs**: Split contigs that align to + multiple distinct parts of the reference genome into separate + segments. + +3. **Handle Reverse Complement**: Reverse complement contigs that + align to the reverse strand of the reference genome to ensure all + sequences are oriented in the same direction. + +4. **Sort Contigs**: Arrange the contigs based on their starting + positions along the reference genome. + +5. **Group by Reference**: Group contigs such that all contigs + associated with the same reference genome are processed together. + +These setup steps perform minimal alteration to the original contigs +and are primarily guided by straightforward, logical +considerations. Therefore, they do not require extensive +rationalization compared to the subsequent rules. + +## Rules of operation + +Stitching is an iterative process, governed by the following rules: + +### Rule 1: Merge Non-Overlapping Contigs + +1. **Verify Non-Overlap**: Ensure that the end of the first contig is + less or equal to the start of the second contig according to their + positions on the reference genome. + +2. **Delete adjacent non-aligned parts**: Filter out any non-aligned + nucleotides positioned after the first contig's aligned part and + before the second contig's aligned part. + +3. **Concatenate Sequences**: Directly join the end of the first + contig to the start of the second contig. + +#### Example: + +**Input:** + +![non overlaping example input illustration](stitcher_rule_1_input.svg) + +- Contig 1: Sequence = `GG[ATGCCC]AA`, aligned to Referece X at + position 10, with first two and last two nucleotides not aligned. +- Contig 2: Sequence = `AC[TTAG]TA`, aligned to Referece X at position + 30, with first two and last two nucleotides not aligned. + +**Procedure:** +- Verify that Contig 1 ends before Contig 2 begins. +- Delete non-aligned nucleotides resulting in Contig 1 = `GG[ATGCCC]` and Contig 2 = `[TTAG]TA`. +- Concatenate Contig 1 and Contig 2 to form `GG[ATGCCC][TTAG]TA`. + +**Result:** + +![non overlaping example result illustration](stitcher_rule_1_result.svg) + +- The new sequence, `GG[ATGCCCTTAG]TA`, spans positions 10 to 34 on the reference genome. + +#### Rationale + +There isn't many alternative actions available to us in these circumstances. +This enables us to consider all of them: + +1. **Leaving contigs as separate**: + + Separate contigs would result in multiple consensus outputs for one genome. + Thus it fails to comply with **regulation 1**. + +2. **Omitting the strip step**: + + Note that the adjacent non-aligned nucleotides of the two sequences + are **ambiguous, non-conflicting nucleotides**. Therefore, leaving + them in place violates **regulation 2**. + +3. **Introducing additional modifications**: + + Since given contigs do not overlap, every nucleotide in them is **non-conflicting**. + Additionally, we have stripped all the **ambiguous nucleotides**. + Therefore, all modifications that can be introduced + would either violate **regulation 3**, **regulation 4** or **regulation 5**. + +### Rule 2: Merge Overlapping Contigs + +1. **Verify Overlap**: Check if the ending position of the first + contig is greater than the starting position of the second contig. + +2. **Delete adjacent non-aligned parts**: Filter out any non-aligned + nucleotides positioned after the first contig's aligned part and + before the second contig's aligned part. + +3. **Align Overlapping Regions**: + - Extract the sequences from the overlapping region in both + contigs. + - Use a global alignment method to align these overlapping + sub-sequences. + +4. **Calculate Concordance Scores**: + - Compute concordance scores for each position within the + overlapping region. Importantly, the concordance calculation is + done purely between the aligned overlapping subsequences of the + contigs, with no regard to the reference genome sequence. The + concordance score represents how well the nucleotides from the + two contigs match at each position. + - The score is calculated using a sliding average approach, + emphasizing regions with high sequence agreement. + +5. **Determine Optimal Cut Point**: + - Identify the cut point based on the concordance scores such that + the it lies in the middle of regions with the highest + concordance. + - This means making cuts as far away from disagreeing nucleotides + as possible. + +6. **Segment and Combine**: + - Segment the overlapping sequences at the determined cut point. + - Concatenate the non-overlapping parts of the contigs with the + segmented parts from the overlapping region. + +#### Example + +**Input:** + +![overlaping example input illustration](stitcher_rule_2_input.svg) + +- Contig 1: Sequence = `G[GGCC A--TAC]T T`, aligned to Reference X from positions 10 to 19. +- Contig 2: Sequence = `--CCAC[AAATAC C]GGG`, aligned to Reference X from positions 14 to 20. + +**Procedure:** + +1. **Verify Overlap**: + - Contig 1 ends at position 19, and Contig 2 starts at position 14 + (both on Reference X), resulting in an overlap from positions 14 + to 19. + +2. **Delete adjacent non-aligned parts**: Contig 1 is right-stripped + to become `G[GGCCA--TAC]`, contig B is left-stripped to become + `[AAATACC]GGG`. + +3. **Align Overlapping Regions**: + - The overlaping sequence is `A--TAC` from contig A, and `AAATAC` + from contig B. + - Align them globally to produce the following alignments: `--ATAC` + and `AAATAC` + +4. **Calculate Concordance**: + - Calculate concordance scores for positions 15 to 20, considering + only the overlap between the two aligned sequences. + - Approximate concordance: `[0.1, 0.2, 0.3, 0.8, 0.8, 0.3]`. + +5. **Determine Cut Point**: + - Use the computed concordance scores to identify the cut point. + - In this example, the highest concordance scores are around + positions with the score 0.9, so choose it as the cut point. + + ``` + Aligned sequences: + + A: --ATAC + B: AAATAC + + Concordance: + 0.1 0.2 0.3 0.8 0.8 0.3 + + Based on the concordance, cut between the positions: + A: --AT|AC + B: AAAT|AC + ``` + +6. **Segment and Combine**: + - Cut the sequences at the determined cut points. + - Combine sequence parts: `G[GGCC][--AT][AC][C]GGG`. + +**Result:** + +![overlaping example result illustration](stitcher_rule_2_result.svg) + +- The new sequence `G[GGC--ATACC]GGG` spans positions 10 to 20 on Reference X, + representing the most accurate combined sequence. + +#### Rationale + +This rule is similar to Rule 1, but deals with overlapping +regions. When contigs overlap, there is a need to choose a cut point +due to: + +1. **Aligner Constraints**: The aligner constrains the size of the + overlapping sequence (by the **Principle of Scale-Dependent + Credibility**), making it impossible to keep both versions of the + overlapping region simultaneously. +2. **Small scale adjustments**: Overlaps are usually small enough that + assembler data is the highest quality data we have for the + nucleotide positions within it. Thus interleaving segments from + both contigs would again violate the **Principle of Scale Dependent + Credibility**. + +We base the choice on concordance +scores, which measure the degree of agreement between the overlapping +sequences of the two contigs. We look for the highest concordance +because: + +**Choice of Cut Point**: +- If a cut point is chosen where concordance is lower than the + maximum, it implies that in the neighboring region around the cut + point, either to the left or right, there will almost certainly be + some incorrect nucleotides due to disagreement between the contigs. +- Conversely, if the concordance is high at the chosen cut point, the + neighboring region is similar between the two contigs. The selected + extensions (left of the cut point from the left contig and right of + the cut point from the right contig) are longer than the alternative + from the conflicting contig, ensuring greater trust in these regions + based on their length (by the **Principle of Length Prioritization**). + +While this method of choosing the cut point based on concordance +scores aligns with the Principles, we acknowledge that there might be +other ways to determine the optimal cut point. However, given the +complexity of overlapping regions and the necessity to preserve +relative ordering, this concordance-based approach is the best we have +identified so far. + +### Rule 3: Split Contigs with Gaps Which Are Covered by Other Contigs + +1. **Identify Large Gaps**: + - For each contig, identify regions within its alignment to the + reference genome that lack coverage, i.e., gaps. Both small gaps + resulting from sequencing errors and large gaps are recognized. + - Significant gaps are determined based on a pre-defined + threshold. In the context of HIV genome analysis, a gap size of + greater than 21 nucleotides is considered significant due to + common RNA secondary structure phenomena. + +2. **Verify Coverage by Other Contigs**: + - For each identified significant gap, check if other contigs span + or cover this gap. Specifically, check if other contigs have + aligned reference coordinates that overlap with the coordinates + of the gap. + +3. **Split Contig at Gap Midpoint**: + - If a significant gap is covered by another contig, split the + contig containing the gap into two separate contigs at the + midpoint of the gap. + - Left-trim the new right contig segment and right-trim the new + left contig segment to remove ambiguity from their ends. + +4. **Update Contig List**: + - Replace the original contig with its two new segments in the list + of contigs. + +#### Example + +**Input:** + +![gap example input illustration](stitcher_rule_3_input.svg) + +- Contig 1: Sequence = `AGC[TTAC---------------------GGCACATATCATA]CTA`, + aligned to Reference X from positions 10 to 48. +- Contig 2: Sequence = `G[TGAC-----GGACG-TCGTCG--TACGATCAG]G`, + aligned to Reference X from positions 8 to 40. + +**Procedure:** + +1. **Identify Large Gaps**: + - Contig 1 has a significant gap between positions 14 and 35. + +2. **Verify Coverage by Other Contigs**: + - Contig 2 covers the gap region from positions 8 to 40. + +3. **Split Contig at Gap Midpoint**: + - Split Contig 1 into two parts at the midpoint of the gap (i.e., position 24). + This creates two new contigs: + - Contig 1a: Sequence = `AGC[TTAC----------]`, + aligned to Reference X from positions 10 to 24. + - Contig 1b: Sequence = `[-----------GGCACATATCATA]CTA`, + aligned to Reference X from positions 25 to 48. + - Trim the new segments: + - Contig 1a becomes `AGC[TTAC]`. + - Contig 1b becomes `[GGCACATATCATA]CTA`. + +4. **Update Contig List**: + - Discard the original Contig 1 and add Contig 1a and Contig 1b to + the list of contigs. + +**Result:** + +![gap example result illustration](stitcher_rule_3_result.svg) + +- Modified list of contigs now includes Contig 2, Contig 11, and Contig 12. + +#### Rationale + +The decision to split contigs at large gaps covered by other contigs +is grounded in the **Principle of Scale-Dependent +Credibility**. Assemblers can occasionally join sequence fragments +incorrectly if the end of one segment appears similar to the start of +another. Relying on the aligner's macro-scale credibility helps +identify these erroneous joins. Large gaps within a contig are +suspicious and suggest potential assembler errors, whereas small gaps +are generally due to sequencing errors or micro-scale mutations and do +not warrant splitting. By leveraging the aligner's high reliability on +a macro scale, we can effectively pinpoint these errors. If other +contigs cover large gaps, it confirms the aligner's indication that +the assembly might have joined unrelated segments. Splitting contigs +at the midpoint of significant gaps ensures that only those segments +supported by both the assembler's micro-scale data and the aligner's +macro-scale alignment are included in the final stitched consensus. + +The threshold for considering a gap significant is set at 21 +nucleotides. This value was chosen because it correlates with the +average pitch of the RNA helix, which reflects how reverse +transcription periodic deletions are structured around 21 nucleotides +in HIV sequences. Choosing this cutoff recognizes that deletions of +approximately this length are a common feature due to RNA secondary +structures and should not automatically warrant a split. This way, we +avoid splitting on every small gap, which is expected given the nature +of micro-scale mutations, but effectively identify and act on larger, +suspect gaps indicative of potential assembler errors. + +### Rule 4: Discard Contigs That Are Fully Covered By Other Contigs + +1. **Identify Covered Contigs**: + - For each contig in the input set, calculate its aligned interval on the reference genome. + - Identify intervals (regions) that are completely covered by input contigs. + +2. **Compare Intervals**: + - Assess the intervals of each contig to find any contig that falls entirely within the span of other contig intervals. + These are the contigs that are fully covered by others. + +3. **Discard Fully Covered Contigs**: + - Once identified, remove the covered contigs. + +#### Example + +**Input:** + +![covered example input illustration](stitcher_rule_4_input.svg) + +- Contig 1: Sequence = `A[ATCGA]GCT`, aligned to Reference X from positions 10 to 15. +- Contig 2: Sequence = `C[TAGTTG]A`, aligned to Reference X from positions 14 to 19. +- Contig 3: Sequence = `G[CGTACC]G`, aligned to Reference X from positions 12 to 17. + +**Procedure:** + +1. **Identify Covered Contigs**: + - Calculate the intervals: + - Contig 1: `[10-15]` + - Contig 2: `[14-19]` + - Contig 3: `[12-17]` + +2. **Compare Intervals**: + - Assess intervals and find Contig 3: `[12-17]` is completely within the intervals `[10-15]` of Contig 1 and `[14-19]` of Contig 2. + +3. **Discard Fully Covered Contigs**: + - Remove Contig 3 from the analysis. + +**Result:** + +![covered example result illustration](stitcher_rule_4_result.svg) + +- Unchanged remaining contigs Contig 1 and Contig 3. + +#### Rationale + +The underlying idea for this rule is founded on the two following principles: + +1. **Principle of Length Prioritization**: longer contigs are + inherently more reliable. + +2. **Principle of Scale-Dependent Credibility**: Fully covered contigs + might introduce small-scale inconsistencies that the longer + contig can resolve more credibly, given the enhanced reliability + associated with its length and alignment. + +Moreover, keeping all contigs would violate **Regulation 1**. + +--- + +**Note**: rules apply to contigs that are in the same group. + +# Diagnostics + +The Stitcher includes diagnostic tools to ensure transparency and +correctness throughout the stitching process. Two primary methods are +used for diagnostics: visualizer plots and traditional log +files. These tools help users understand and verify the decisions made +by the Stitcher during the stitching process. + +## The Optional Visualizer Tool + +The visualizer can be enabled through the `--plot` flag when running +the Stitcher executable. Running the Stitcher with this flag will +produce an SVG file that visualizes the stitching process, helping to +confirm and debug the Stitcher's operations. + +To use the visualizer, run the Stitcher with an additional argument +specifying the path to the output plot file. Here's an example of how +to stitch contigs and retrieve a visualizer plot: + +```sh +PYTHONPATH="/path/to/micall/repository" python3 -m micall.core.contig_stitcher "contigs.csv" "stitched_contigs.csv" --plot "visualized.svg" +``` + +**Command Line Arguments:** + +- `contigs.csv`: Input file in CSV format containing assembled + contigs and related information. +- `stitched_contigs.csv`: Output CSV file that will contain the + stitched contigs. +- `--plot visualized.svg`: The optional argument to generate a visual + representation of the stitching process, saved as `visualized.svg`. + +### Understanding the Output + +In practice, a visualizer plot might look something like this: + +![practical visualizer plot](stitcher_practical_plot.svg) + +From such a diagram, you can gain insights into the following aspects +of the stitching process: + +- **Reference genome**: The best matching reference genome for this + group of contigs was determined to be `HIV1-A1-RW-KF716472`. + +- **Dropped Contigs**: Contigs that were dropped due to being fully + covered by other contigs, as per Rule 4. In the example plot: + - Contigs 2, 4, 7, 8, and 6 were dropped. + +- **Split Contigs**: Contigs split at large gaps covered by other + contigs, according to Rule 3. The resulting parts are shown as + individual segments. + - Contig 1 was split around Contig 3, producing segments labeled as + 1.1 and 1.3. + +- **Joined Contigs**: Contigs that were merged due to overlap: + - Contigs 1 and 3, which were joined as per Rule 2, with + **ambiguous, non-conflicting** nucleotides discarded, shown as + segments labeled 1.2 and 3.1. + +- **Unaligned Contigs**: Contigs that failed to align to the reference + genome during the alignment step of the setup. + - Contig 5 failed to align. + +- **Contigs without a Reference**: Contigs for which a reference + genome could not be determined during the reference detection step + of the setup. + - Contigs 9 and 10 failed to determine a reference genome. + +Understanding these basics will help to interpret other scenarios +displayed by the visualizer plot. + +## Traditional Logs + +In addition to visual tools, the Stitcher produces traditional log +files that provide textual details of the stitching process. These +logs are crucial for debugging and understanding the sequence of +operations performed by the Stitcher. The verbosity of logs can be +adjusted using command-line options (`--verbose`, `--debug`, `--quiet`). + +Here is an example of typical log entries: + +```text +DEBUG:micall.core.contig_stitcher:Introduced contig 'contig.00001' (seq = TA...CA) of ref 'HIV1-C-BR-JX140663-seed', group_ref HIV1-A1-RW-KF716472-seed (seq = GA...AC), and length 7719. +DEBUG:micall.core.contig_stitcher:Introduced contig 'contig.00002' (seq = CG...AG) of ref 'HIV1-A1-RW-KF716472-seed', group_ref HIV1-A1-RW-KF716472-seed (seq = GA...AC), and length 1634. +... +DEBUG:micall.core.contig_stitcher:Contig 'contig.00006' produced 1 aligner hits. After connecting them, the number became 1. +DEBUG:micall.core.contig_stitcher:Part 0 of contig 'contig.00006' re-aligned as (5) at 7M...3D@[8,1433]->[7461,8946]. +DEBUG:micall.core.contig_stitcher:Part 0 of contig 'contig.00007' aligned at 76M...3D@[0,732]->[5536,6277]. +DEBUG:micall.core.contig_stitcher:Contig 'contig.00007' produced 1 aligner hits. After connecting them, the number became 1. +DEBUG:micall.core.contig_stitcher:Part 0 of contig 'contig.00007' re-aligned as (6) at 76M...3D@[0,732]->[5536,6277]. +... +DEBUG:micall.core.contig_stitcher:Ignored insignificant gap of (5), 3D@[790,789]->[8280,8282]. +DEBUG:micall.core.contig_stitcher:Ignored insignificant gap of (5), 19D@[1324,1323]->[8817,8835]. +DEBUG:micall.core.contig_stitcher:Ignored insignificant gap of (5), 2D@[1354,1353]->[8866,8867]. +... +DEBUG:micall.core.contig_stitcher:Created contigs (8) at 24M...1I@[14,3864]->[0,4558] and (9) at 708D...92I@[3865,7691]->[4559,9032] by cutting (1) at 24M...1I@[14,7691]->[0,9032] at cut point = 4558.5. +DEBUG:micall.core.contig_stitcher:Doing rstrip of (8) at 24M...1I@[14,3864]->[0,4558] (len 7719) resulted in (10) at 24M...1I@[14,3864]->[0,3850] (len 3865). +DEBUG:micall.core.contig_stitcher:Doing lstrip of (9) at 708D...92I@[3865,7691]->[4559,9032] (len 7719) resulted in (11) at 14M...1I@[0,3734]->[5267,9032] (len 3762). +DEBUG:micall.core.contig_stitcher:Split contig (1) at 24M...1I@[14,7691]->[0,9032] around its gap at [3864, 3863]->[3851, 5266]. Left part: (10) at 24M...1I@[14,3864]->[0,3850], right part: (11) at 14M...1I@[0,3734]->[5267,9032]. +... +DEBUG:micall.core.contig_stitcher:Created a frankenstein (34) at 24M...1I@[14,4185]->[0,4171] (len 4186) from [(26) at 24M...1I@[14,3041]->[0,3027] (len 3042), (28) at 271M2D3M2I395M@[0,670]->[3028,3698] (len 671), (30) at 152M@[0,151]->[3699,3850] (len 152), (31) at 321M@[0,320]->[3851,4171] (len 321)]. +DEBUG:micall.core.plot_contigs:Contig name (26) is displayed as '1.1'. +DEBUG:micall.core.plot_contigs:Contig name (36) is displayed as '1.3'. +DEBUG:micall.core.plot_contigs:Contig name 'contig.00002' is displayed as '2'. +DEBUG:micall.core.plot_contigs:Contig name (2) is displayed as '2'. +DEBUG:micall.core.plot_contigs:Contig name 'contig.00003' is displayed as '3'. +DEBUG:micall.core.plot_contigs:Contig name (31) is displayed as '3.2'. +DEBUG:micall.core.plot_contigs:Contig name 'contig.00004' is displayed as '4'. +DEBUG:micall.core.plot_contigs:Contig name (4) is displayed as '4'. +DEBUG:micall.core.plot_contigs:Contig name 'contig.00005' is displayed as '5'. +DEBUG:micall.core.plot_contigs:Contig name 'contig.00006' is displayed as '6'. +DEBUG:micall.core.plot_contigs:Contig name (5) is displayed as '6'. +DEBUG:micall.core.plot_contigs:Contig name 'contig.00007' is displayed as '7'. +DEBUG:micall.core.plot_contigs:Contig name (6) is displayed as '7'. +DEBUG:micall.core.plot_contigs:Contig name 'contig.00008' is displayed as '8'. +DEBUG:micall.core.plot_contigs:Contig name (7) is displayed as '8'. +DEBUG:micall.core.plot_contigs:Contig name 'contig.00009' is displayed as '9'. +DEBUG:micall.core.plot_contigs:Contig name 'contig.00010' is displayed as '10'. +``` + +The following points illustrate how these logs can facilitate +understanding the stitching process: + +- **Contig Introduction**: Provides details about the contigs + introduced for stitching. + - `Introduced contig 'contig.00001'...` + +- **Alignment Details**: Shows the alignment results for each contig. + - `Part 0 of contig 'contig.00006' re-aligned as (5) at + 7M...3D@[8,1433]->[7461,8946].` + +- **Gap Handling**: Indicates which gaps were ignored as + insignificant. + - `Ignored insignificant gap of (5), 3D@[790,789]->[8280,8282].` + +- **Splitting and Merging Contigs**: Documents the splitting of + contigs at identified gaps and merging of overlapping segments. + - `Split contig (1) at 24M...1I@[14,7691]->[0,9032]...` + - `Created a frankenstein (34) at 24M...1I@[14,4185]->[0,4171]...` + +- **Visualizer Compatibility**: The visualizer diagrams are produced + exclusively from these logs, ensuring compatibility and consistency + between the logs and visual output. + +# Limitations + +Following limitations stem from the choice of principles and various +assumptions that guide the Stitcher's operation. Understanding them +allows users to better interpret the results and apply post-processing +steps to mitigate potential issues. + +One of the critical challenges is the handling of ambiguous +nucleotides. The Stitcher's **Ambiguity Omission Principle**, which +aims to avoid propagating uncertainties, might lead to the exclusion +of significant sequence data, resulting in the loss of potentially +valuable variations or mutations. + +Moreover, the calculation of concordance in overlapping regions +assumes that local concordance is the best indicator of the correct +sequence. This approach may not fully account for complex genomic +rearrangements or context outside the overlap, potentially +compromising the accuracy of the stitched sequence. + +The predefined threshold for significant gaps, based on specific +assumptions about RNA secondary structures of organisms like HIV, +might not generalize well to other organisms or genomic regions. This +can lead to over-splitting or under-splitting contigs, further +fragmenting the consensus sequence. + +Additionally, The Stitcher’s principle of scale-dependent credibility +might overlook important small-scale variations, such as single +nucleotide polymorphisms (SNPs) or small indels, especially if they +are lost in longer contigs deemed more reliable. + +Another critical limitation arises in the context of pipelines dealing +with proviral sequences. The Stitcher might attempt to "fix" sequences +that are inherently "broken", such as those that are scrambled, +contain long deletions, or exhibit hypermutation. In such cases, the +tool's corrective measures may not be desirable, as they risk +introducing inaccuracies. This limitation makes the Stitcher +unsuitable for certain pipelines where the integrity of such broken +sequences should be preserved without alteration. + +Finally, the handling of multidirectional and cross-alignments may +fall short when addressing complex genomic rearrangements, such as +translocations or inversions, potentially resulting in misalignments +and stitching errors in the consensus sequence. diff --git a/docs/design/stitcher_practical_plot.svg b/docs/design/stitcher_practical_plot.svg new file mode 100644 index 000000000..c2a7587f7 --- /dev/null +++ b/docs/design/stitcher_practical_plot.svg @@ -0,0 +1,582 @@ + + + + + + + + +5' LTR + + + + + +gag + + + + + +vif + + + + + +tat + + + + + +nef + + + + + + + +tat + + + + + +vpu + + + + + +rev + + + + + +3' LTR + + + + + + + +pol + + + + + +vpr + + + + + +rev + + + + + +env + + + + + + + +PR + + + + + +RT + + + + + +INT + + + + + +V3 + + + + + +GP41 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +HIV1-A1-RW-KF716472-seed + + + + + + + +1.1 + + + + + +3.2 + + + + + +1.3 + + + + + + + + + + + + + + + + + + + +1.1 + + + + + + + + +1.3 + + + + + + + + + + + + + +3.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + + + + +2 + + + + + + +3.1 + + + + + + +4 + + + + + + + + + + +6 + + + + + + + + + + +7 + + + + + + + + + + +8 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +anomaly: + + + + + + +5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +unknown: + + + + + + +9 + + + + + + + +10 + + + + \ No newline at end of file diff --git a/docs/design/stitcher_rule_1_input.svg b/docs/design/stitcher_rule_1_input.svg new file mode 100644 index 000000000..1916fe621 --- /dev/null +++ b/docs/design/stitcher_rule_1_input.svg @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + +Reference X + + + + + + + +1 + + + + + +2 + + + + + + + + + + + + + + + + + + +1 + + + + + + + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/docs/design/stitcher_rule_1_result.svg b/docs/design/stitcher_rule_1_result.svg new file mode 100644 index 000000000..f0adf1210 --- /dev/null +++ b/docs/design/stitcher_rule_1_result.svg @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + +Reference X + + + + + + + +1 + + + + + + + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/docs/design/stitcher_rule_2_input.svg b/docs/design/stitcher_rule_2_input.svg new file mode 100644 index 000000000..f6fdba0f2 --- /dev/null +++ b/docs/design/stitcher_rule_2_input.svg @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + +Reference X + + + + + + + +1 + + + + + +2 + + + + + + + + + + + + + + + + + + +1 + + + + + + + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/docs/design/stitcher_rule_2_result.svg b/docs/design/stitcher_rule_2_result.svg new file mode 100644 index 000000000..cfec558b1 --- /dev/null +++ b/docs/design/stitcher_rule_2_result.svg @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + +Reference X + + + + + + + +1 + + + + + + + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/docs/design/stitcher_rule_3_input.svg b/docs/design/stitcher_rule_3_input.svg new file mode 100644 index 000000000..be44cfe22 --- /dev/null +++ b/docs/design/stitcher_rule_3_input.svg @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + +Reference X + + + + + + + +2 + + + + + +1 + + + + + + + + + + + + + + + + + + +1 + + + + + + + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/docs/design/stitcher_rule_3_result.svg b/docs/design/stitcher_rule_3_result.svg new file mode 100644 index 000000000..f9289a002 --- /dev/null +++ b/docs/design/stitcher_rule_3_result.svg @@ -0,0 +1,109 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Reference X + + + + + + + +1 + + + + + +3 + + + + + +2 + + + + + + + + + + + + + + + +1 + + + + + + + + + + + + + + + + +2 + + + + + + + + + + + + + + + + + + + +3 + + + + \ No newline at end of file diff --git a/docs/design/stitcher_rule_4_input.svg b/docs/design/stitcher_rule_4_input.svg new file mode 100644 index 000000000..e7acfad78 --- /dev/null +++ b/docs/design/stitcher_rule_4_input.svg @@ -0,0 +1,115 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Reference X + + + + + + + +1 + + + + + +3 + + + + + +2 + + + + + + + + + + + + + + + + + + +1 + + + + + + + + + + + + + + + + + + + +2 + + + + + + + + + + + + + + + + + + + +3 + + + + \ No newline at end of file diff --git a/docs/design/stitcher_rule_4_result.svg b/docs/design/stitcher_rule_4_result.svg new file mode 100644 index 000000000..a29e15d60 --- /dev/null +++ b/docs/design/stitcher_rule_4_result.svg @@ -0,0 +1,185 @@ + + + + + + + + + + + + + + + + + + + + + + + + +Reference X + + + + + + + +1 + + + + + +3 + + + + + + + + + + + + + + + + + + +1 + + + + + + + + + + + + + + + + + + + +3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discarded: + + + + + + +2 + + + + \ No newline at end of file diff --git a/docs/micall_at_bccfe.md b/docs/micall_at_bccfe.md new file mode 100644 index 000000000..5d7e96850 --- /dev/null +++ b/docs/micall_at_bccfe.md @@ -0,0 +1,83 @@ + +# MiCall at BC CfE + +The **MiCall** tool is designed to process **FASTQ** data generated by +the **Illumina MiSeq** sequencing platform. MiCall is primarily +utilized for **HIV** resistance testing, alongside research into +various types of sequence analysis, such as the examination of +**proviral** sequences for defects and other characteristics. As +open-source software, MiCall offers transparency and adaptability, +meeting diverse research needs by supporting the sequencing of various +organisms and accommodating the unique requirements of different +studies. The tool is specialized in deep sequencing of viral samples, +differentiating it from the lab's other sequencing tool, **ReCall**, +which is employed for population-level genomic sequencing. Our +laboratory uses MiCall for sequencing **HIV**, **HCV**, and +**SARS-CoV-2**. + +While the results obtained from MiCall are mainly intended for +research purposes and are not used in clinical settings, there is an +exception for specific **V3 loop** analysis requests. This particular +analysis aids in drug prescription decisions based on mutations in the +**V3** region of **HIV**. However, for general clinical results, the +lab relies on **ReCall**. MiCall remains an invaluable research tool, +offering comprehensive deep sequencing capabilities and robust +analysis of **proviral** sequences. + +### MiCall operates in two modes: + +- **Remapping Mode** + In this mode, MiCall maps all reads from a sample against a set of + reference sequences, updates the sequences, remaps the reads, then + combines the reads into consensus sequences and coverage maps. This + mode is typically used for clinical decisions. + +- **De-Novo Assembly Mode** + This mode assembles sequences from scratch without relying on a + reference genome, then uses them to provide the same kinds of + consensus sequences and coverage maps. Currently, it is used + exclusively for research purposes. + +The operational behavior of MiCall is highly automated to ensure that +results are generated without the need for manual intervention. This +automation allows researchers to focus on interpreting sequencing data +rather than managing the pipeline itself. This is particularly +beneficial for labs with high throughput, providing timely and +consistent data processing. + +### MiCall programmatically interacts with several systems: + +- **QAI** - our general **Laboratory Information Management System**. + Files that define individual runs (these are the `SampleSheet.xml` + files) are produced via QAI's graphical interface and placed in a + specific network location (`/MiSeq/runs/` directory on the + network-mounted **RAW_DATA** drive) monitored by + MiCall. Additionally, MiCall utilizes QAI's web server **REST** + interface to update the database with new run results. + +- **CFE-scripts** - a collection of scripts responsible for producing resistance interpretation reports. + These scripts monitor available and unprocessed results produced by + MiCall, extract and reshape them, and upload them to the + laboratory's database. More specifically, the `miseq_gen_results.rb` + script polls MiCall's resistance interpretation scores, stored in + the same location as all other inputs and outputs, and uploads all + fresh ones. + +- **Kive** - our platform for version control of bioinformatic pipelines. + MiCall uses the **Python** interface of Kive to initiate new jobs, + upload inputs, and download processing results. + +- **MiSeq Hardware** - the physical machines that perform sequencing for the laboratory. + Internally, MiCall consistently monitors specific network locations + for new data from the **MiSeq** sequencers. This data, in the form + of **FASTQ** files (and supporting files, like those containing the + read quality information), triggers MiCall to interact with Kive to + initiate and manage analysis workflows. + +MiCall incorporates several features that enhance the quality and +reliability of sequence analysis. It can be executed using **Docker** +for simpler setups or via **Singularity** containers within the +**Kive** platform for production use. Singularity provides lightweight +and reproducible environments suited for high-performance computing +contexts. Combined with Kive, this ensures that MiCall can operate +efficiently at scale, facilitating reliable and scalable deployment. diff --git a/docs/steps.md b/docs/steps.md index 57d0695c9..bed3813e1 100644 --- a/docs/steps.md +++ b/docs/steps.md @@ -44,8 +44,9 @@ Individual files are described after the list of steps. * in - fastq1 * in - fastq2 * in - merged_contigs.csv - * contigs.csv - the assembled contigs, plus any merged contigs, including + * unstitched_contigs.csv - the assembled contigs, plus any merged contigs, including the best blast results + * contigs.csv - stitched version of `unstitched_contigs` * blast.csv - multiple blast results for each contig * `remap`: iteratively use consensus from previous mapping as reference to try and map more reads. See [remap design] for more details. (The denovo version @@ -58,6 +59,8 @@ Individual files are described after the list of steps. each stage. * remap_conseq.csv - downloaded - consensus sequence that reads were mapped to on the final iteration + * unstitched_conseq.csv - downloaded - consensus sequence that reads were + mapped to the unstitched contigs. * unmapped1.fastq - FASTQ format (unstructured text) reads that didn't map to any of the final references. * unmapped2.fastq - FASTQ @@ -178,7 +181,7 @@ Individual files are described after the list of steps. * remap - mapped to other references after remapping * aligned - aligned with a reference and merged with mate * conseq.csv - * region - seed region it mapped to + * region - the name of the contig. Includes the name of the reference seed, plus an optional prefix, which is a number that makes the name unique. * q-cutoff - minimum quality score * consensus-percent-cutoff - to be included in a mixture, a variant must make up at least this fraction of the total valid counts @@ -215,11 +218,15 @@ Individual files are described after the list of steps. * pos - 1-based position in the consensus sequence that this insertion follows * insert - the nucleotide sequence that was inserted * qual - the Phred quality scores for the inserted sequence -* contigs.csv - * genotype - the reference name with the best BLAST result +* unstitched_contigs.csv + * ref - the reference name with the best BLAST result * match - the fraction of the contig that matched in BLAST, negative for reverse-complemented matches + * group_ref - the reference name chosen to best match all of + the contigs in a sample * contig - the nucleotide sequence of the assembled contig +* contigs.csv + Same as `unstitched_contigs.csv`, but contigs are stitched by `micall/core/contig_stitcher.py`. * coverage_scores.csv * project - the project this score is defined by * region - the region being displayed @@ -343,6 +350,16 @@ Individual files are described after the list of steps. * remap_conseq.csv * region - the region mapped to * sequence - the consensus sequence used +* unstitched_conseq.csv + * region - the region mapped to + * sequence - the consensus sequence used +* unstitched_cascade.csv - number of read pairs that flow through the pipeline steps + * demultiplexed - count from the raw FASTQ + * v3loop - aligned with V3LOOP + * g2p - valid reads to count in G2P + * prelim_map - mapped to other references on first pass + * remap - mapped to other references after remapping + * aligned - aligned with a reference and merged with mate * resistance.csv * region - the region code, like PR or RT * drug_class - the drug class code from the HIVdb rules, like NRTI diff --git a/gui/README.md b/gui/README.md deleted file mode 100644 index 77700267e..000000000 --- a/gui/README.md +++ /dev/null @@ -1,14 +0,0 @@ -To create an icon, create a GIMP image with the [following sizes][sizes], then export as a .ico file with the listed colour settings for each layer. - - - 256x256 will be saved as 32bpp 8bit alpha - - 48x48 will be saved as 32bpp 8bit alpha - - 48x48 will be saved as 8bpp 1bit alpha - - 32x32 will be saved as 32bpp 8bit alpha - - 32x32 will be saved as 8bpp 1bit alpha - - 32x32 will be saved as 4bpp 1bit alpha - - 16x16 will be saved as 32bpp 8bit alpha - - 16x16 will be saved as 8bpp 1bit alpha - - 16x16 will be saved as 4bpp 1bit alpha - -[sizes]: http://stackoverflow.com/a/10819673/4794 - diff --git a/gui/micall.ico b/gui/micall.ico deleted file mode 100644 index d603925258ab2f41e9e90305d660ff2cd1c0dd96..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 293950 zcmeEP2VfP&)*gFBk%tuvg4mx5HdJ^hV!;BUbW~8Pf>e~|`+wie&hG5qdz0{zJSBft53_g6%PMb3uAhJqdZsK zewGlUg}`rt-xrEj=LwOHW9oNl(dt|wZoNQ=(xuh)2hJ8^=q3EEu8+M?i1#kjze|f) z)N|w2Lfm(seIDml+@^oyd^sW7K7;s}qU(NNpDXNgoiV&svJ4>E*?gsM=!D!q#Hb#-U=vhN)uTp5@}$ zvHh|}OK)Z5cGIze0|yQoII!mNe&KhH95t}r@yNH&A3hxAq{q%nuGPKW@u*VwFTTHF zyW=UB-naDXM(vKzJ?EVJUVXpa@wDyRw{Ls3-SN!rJFwI4_}cARS=%$)9pAnqE9=O1 z97l=c$EIgyZp&P4S@+5ftqYNX@$GZEbdC_4>-onCgP+ZI&N=6ZOE0}dlq`9E*^g99`V?tcZo+Iy-!qntb%y)c_C`N zATZ7qM9mk4s8v&lr=F@Ho~vFSbia7z<$e2nhY+te zdRDY~yM$=c_zCewlNzGydzXl=T`m#byIv{Y?{=l=*}atL`Tq5ySC5-Tr;g8w-aX6U znkPi(cb^k|dfg`a_P$N@>vI>5y(0Scy<2?H_g>NegL}n?ACwm#_OBp%bgzf<0WqNe zgJQsk4~UOGd`Ju&@G$5l5f^=-2p>~lL`^Oao~S1#j;kT2$CnZb@z;x~lb;q-COs#@ zKkEQqxmzTI5~h_G<0CqW*yx&KX3|4qR`O$F_KYebDWML^>S9`aa}gKYL!{1oNuH&^%08~wG+#hbPyTo!>~VCEY27rRxax;K3~yStXTe;SP5Fc zwvWhMJw&Wp86nnZ4ijH&2oq~oM~RK=KNB0)PZVEm{sd)$*t}`7_zfkPkI}hbD z&ZtQi>xDCLEFTS zS=+?VKYuTN0UbTMPyG7pA#vijpTwVkp2&vlicbAAN8sX1F1bie@kr>rrS823j`(!Um`_Ix?A^Vx5H~k$-0bbP z@4nmi?WRo{am23_CB+r^uLMdV@>kB5W&=5G(2N37I#9Dw0GVq+=2?(&7UY=)8D>F-S&(6pxX)vdcNS!zycRMVnn-Ps%@$;(1$k~k zK3b5E7G$~w`Da0P%6-U23-Vpo2YG2hmOJ-DrdyDW{B1#oTab-%AF|kj?3DYE$FhX{ zL><*F=m7qf8+3sMS!zK?S z1-)cJw^@**7W9|}on}FPTF`YC^cK$FZ$Z~skkuCSjdMTrjRjeazP}>df(*By&n)Oq z3$ogR{33p!QqLkC)r@fP%s1=(*w zr#bgSw^`5u7UaDJnQcLKTaeWjWU>YMY(ZwrW03t8^r{8jU_lo;_d_RI&>48vL<@QV z*RHT2+bzg*N!*A0x1b*^=ni=;bf|lO{|7AS77Ka=_b#)b4=m^a3v%Cr{Kx(_d5q_H z4fK%(o$K5W9c)1t$@-yNEa(sm`oe;)ki>l+<2m+$*Fs-A_j4b5%!2N+pqDJ@9Sgd~ zf}WAYeIDaE=r(yRbiH#w?2PTu!+5WQE$CSbIu&D`U_oa};y#Z-4_na77WB2%=Iwv& zB=gmN8KSAe#plAdc^Nj2B-lJJ!}jqIkLmNg_Q8iQIIs3AwwwX#Pure@{YM!^ySFsx zaZq-4?-eD4c;*=)n!GN=`0%S?%RDTir?i9}^14Wy*F!8{+70%_2(e{LEcAb>IB;OS z)Z2fa`U5qyt$^)v9MOc`pNtJZ&r$}m`uAST`sMy7tJ&KF_up(;WgmC|KcD8IC!Z{D zS^wgGixy9}Xodq7aK3uhoYq;Mk-#QlQeF%fJao z!qbn0uREN0U_e!zAJ{7^Jn_ib^dl1v4aE6dyNu7;x-RS69Y-SOkHPP9-Nt9ZGSABT zAufE(o0fH3ctqCPtRL29ZJo9)T%I2rKP`1lYTC4ES@Ji3rlzgQOkEHk-VxW2jZaO> z%*@sSwSZ3DKclG10MoaWQhJ z5T6Va;3aC&Z-5RmHTJd&R`@b;R`8`^2OP zbw$*qnj#^tf|xq>X%Q7!UnIugFD6fDBBCa~A)=?gERqu*5;4)Qi0H}9#LUEsA~vSB zn3ePh{EIpwc1jB|8@@+;TwReeqmr0Dt$~=GR9&PbKPRS4?F=90O)+Q2^CCI<6)|__ z3nDtEi%6W_Ow5~AL&U{(6Um7!#LQWB#DbKU#H`tM#k6Vfi{52mGC$V%3UXV%4$%;`5d8 zeU^VDRh7 zV$a?UV$c57;_$w8V(&iqLGX$8?f*jTKk%hEaB!12ba=Bk{KHq`ha+FZH`*zVWPJnQ zXqU+P@mu&uyTy+`Z52QL^qusRj{dS&9HXCvSi|vO4~XB69~37}91 z+G|Uk&kKrOS+-ocyYIN8dUYtk%8ysChW~E8=Z>;vZ@lVKb^g{0m7l`VTD9ubs`YZ+ z`n9T7tyb-^d+siGRWY8w^qL3WPoF>h*_Vf;r+-kl{)F`TT|StffGZxjO`b1S=7HAf z=}~nX_eoD5)3hTU>hjMz_ut8J&$^!dHIeVm>?t6#s->FLqE)6#pjZS$nM z|KaxCI?YH=j~zKVeSUh(C-G_NiM@sl>D{gEldAnEJ9OzjH67>D($iDY`0s>XeR}ok z($T*Eg>F51j7dwIoRmI4En@U+Tr_mh=!EEQU23cQYj*4TL0H(3!9zY7GkVk~5u-;B z88URl{M4RZYpV0Ld-duyaPW|kqee$WM2roefQyC<9{AA*yX*q~m$Y8U7D7i-e%qoG4S867cUQdCS-Ol)N2xN)C+GBPaegSXWE zO+FYnWXPy7<0GSD;-)7|kDNMb+^7h&(5s<({wBRX=IxPF6XFt*6BCnAMfm85Az=g5 z{g=1+DD2};A|_6aNlctQJ1HeOAvSV+#Hb+y!&<2O8w?B^88Lc7faAO^z@@O zYSeh8UV{epUU|Moji(=d=)r&AAcu#SoFDYR^LQ$!jkdx+$4)7Irif$)%>M5LB#~n2t7DgWv^uwsX!qPqcaQ1@U9kH-*)_X~7WduJNMy z32n7S>x}W}?XoATh_xNt3v2vXVMR?6R&>L9GDPlyd@_;TGi$`TFgiJwOdwcDL~ z9qzrwnihMvY7gxsPJa|}oJv-5!sFJAgsRqz#3#LkV?39X@E>_C`!+oe{Xre9D;9M| zM-{V14ii?x`r;VdG}~{~KpY!A>QZ?vpKWGhRm7g2MZBq+Y-eWDGgiv1TGqUjde;0o z4Xin{U%~H}B+W~yk7Etxxmhz_ux2J#$9>PpekP&c3Gol0AODthkNNa+>(z$hw_xpm zHu`!wj_`jeKIFD->kKPpW=+}Nj%_LWu(kQiNoey`NeB0@bB?e1d?3zE#P3tq&p++5 z`+;$h{kY!>A9GX4_9MdYz!+AMaV1^h4~I9SjYf!ty>4yX5Uvj+4zt`^xS)lkZChtK z$G3i)jJQ&N`TMWG{*+@fd&Y~hAACRK#@-pS{Rt5jFy_x$b5rU-cK$Bw`2FNDD{Vn@ zYhh|DYsIoYPCJb0WTbbHv~j~&=lG$68?B6m@8JGF<*`*OK9GHwgLlWeCyu*6Wcw4x zKPubi^ZxwPZl}(b%Rj)FbVO|LJ?oF(f0f7o`s)w$rHB!z0nW9*{g#DmJIJ^Z z4>HEIVBQ<@J@L6FO{f&I{mGGxIlhj#PIGJTo)u1=Uu}s+J3Y`|Z|kQYcR0tt+&IBn zwc?|@tCR17?W|oh!rHlgo^$+%!&|Hk>qbl3xA$}B_@3{USz9(uk-w>fIc|J+ ztUETkR>=0_qU%^o7k9B%EbWQs9pLcLAAkIYSnj8YZHCL(q;BZ=uRmB{Zk#A-_pU|y zu#6G%`ctR=lE?U-SSRa_kF6K7{b{icB~S5vu30t6`t{gBr_QXbt=6`!v#o>RZ*$8! z@-4=Co{TH%<3IkmUG{a`w%LetrdS&>maLQfIz9fiknK;8Zz8W-^ZCbetU11V(|FXs zS)M%cvx8If_`w6~txaD{g1i`qcQZ!T#X46(E+kHWGi3XS0pPh++q`ejcS~jcY@4y) zUw+x+jOm|$o&-i#BF?+q;C;*DJK6e8vPI00KY#w|JL~Y_O{x#58?l9u?I$O`Wo51z zF8S$;jpHFlqfk$reCJys<92MDYwg;VF0uLD?hI?UCcOLa!E@j5UMjDlJY)=;?UM&6 zi#Dzwi#`m8ZfG5{{Ta#cfX_zBaohamWV`JoYa8C-&g~0i+u!eAB4sb1eed2?inxz) z;5qh>*Ydr8i&*qmTjJ$&@LiB6XU%9Avi(^z+DpFSyF>g{J_GAvteeluHi)zR`!ePG zIJkcU$_*X_3rCOcm&AP@<2l&}T)P+dNPa*a97onkex5g{ zd&u_ZrgX#H4aYta?`{tG7rc(=;PY^N`HXCb^B}(-J8T_4e#H6>#C;y)IbOqSc@OVp z9jxn{uM=e5^XK-+rv2`?CI)<$B*%?1m3&VA;&XAF`K)Y@`tBrb1KJ1N=P{n+HN2Mh z@LtxzcfdMX_x!m%L$*I}&ihhs5I2;mlJEAcl4DO^{U4$Gz3rbw`zLeRK4n3ye*Rz4J|ff3 zcJ@m(#g>T^8X4n1M~6cKje%*pIwdrA>a6~3p)OjC%$IAQcHY;mOXRFrAv!? z&E6EJh7YY~O&Hh6nmD1cHECiq*ek89sZnjLn5iA$Q*^fC)CkVNY3Xqf8lr2z%gm_0^&(=Bp=Ft16NTf~r&&lhH=Cz?T2M zBU*3&1b=K7=2J2XelF!4w!`+=Hs@22Wg(h?n70>lI}1U9QbLp~E5u!Q3Q@7*S)y{K z3&el^bFrLHsfJN~?r9;Ot$u;1g&eOJYq;|(^=b=Izm5bi3*@P(Z@mKM#0Xw?#q z`C9_HUT2ASTAw4@wmDz4d*?#YzU?K>989MUCB(ZOON!2&t`hIPd$s7&`C8<4l@i^$ zT#tOP8<7uo6Y{t26{ANA5jLQ*hzQ60%vd2NPZXF#5y;sR&b$ogWb}Lt=V7MDUnA#W zIRCA2IA@~g zO6JdhRnC)ej)e0goEy>eB6?0_aYkF@fOQc|7PoiiN0u+??wKQT=0`9$vgY%?V$I4h zIX|*?^~at$lGUrj+HqpjhLQi>+{w3bInJN# z-ZfXwp?tS{UbgcnhxV@!hxV?`pZS!Zw`D)4a{Q;Fd6kfW_nuh%58T9J{3miIr|V~5 za2dD7#fn{4jHUc#|Hc9QzVxySQK`M1cjc8=mAtxS$*ZosvLrVCk|p-=|J5b0zWOR2 zIp4nFtdgbfy63*~_m#ir5p0z!R(`bdV-+8I_~D1K`$&aH9=WId1NS^~&t2D*w5z}9 z%6sm+r+j%{`^dwUt2|bvQl-jOo~iWAqm>@3RJn3xR`TzPxa7)<)eXh3E`Q&Bl^?16 zaOH=qRC=aLrK*ottM*K_$E#I+rdqWsRVqDJ@xh9fE8O=;`BHZM#jimP6)RP$gi0Sr zmL9gJpRMuibI(2Xt*wffV~Re9{0 zO4qCIh|6xQ^30RBpGaPRGQuXJad%VgGy8cqg7&3Ci8G&FfQwYGQD4i z4juSPQromxkdYDA`1LpH*1k#Ce?#5ZUw^$x98O1eXw#tsF31S)+P!P9blljt4Z8JN zlje;ZG`Ptg{}MO6{%VuvEjivPZMt^tK9e{0?$NC~hIrxd_K_JGiA|d{eZ5g>UH^?u z8#is*ayTx_7|`?mo-^yvC-=QgdHw`|f>Uti*;rp=nSY4a}MFs{g8wTn=4FLhA&ZauoUY2UVO z(`Gl@<9}J1maW>f>Chtu7bnY`QWr)%dUK*Eodbj)7 z1q!uw)c#)AuKjy|IH>=?p<$m!gby7) zY{c+kpMDxPeBhw|pJk+`zSp(Q+s^f+(fJ--`}ZF-AZ+-sVZ%p_8WAxnB78(d_^_~{ zVPS&?MMg&U?%Jkp8GHPTmu}O!YmfeYKO8VHENtYch*9C#K8pw+^J#eauwh{X2KDcO zXMVR$S*QP<-s{%8Z@&Ry1BVR@|1|uwaS;RoCZ|>8l zKe{tAd_4M}m^3G8(L%JaFl9#ajL66d<3@!K2NU(~Z})%k<)wS{W&Lsae47e@~ zoZj=QOYIk2^6vH@f*VGS8#e(j3)QEfKlnE#F)=1;(zu9G!-fw0u>GA^o?~CX_{uA9 zy17i5vSn|%<(6_thQM~)ZTuHMaHwqAGG%VQ>B^F}y5mhHuFP>OajvetKuhJP2|NGM zEq_Jo#}zjTapFR3_X+{O3f`3vyI5j|O_rg4>*Kn_^%=ZR|Mr*u`huK4)9%;fC0Byr z`TpedV?6!i?e$Yj^Qxll%oqVg4zN&Ai0eSNfXe;N#6GYu>=XNz-NqQ%&7e0xLqN#* z6yyJ9VjtKS_KAJ-#PRUk+-}5=9=DVgnTV$yLGHnEA}>&3EV{R-IB}2WSWz9Q%cIV*e2U%A3jB2+M-YF+l@Dhc=Vi0o&4K%&ps`- zB3^4ne5U(-7V5fO<)zqtjz@bkPekXY$h?$?G0#|0<%~>!3^^gBN`ZuP%qvNpR?*30 zah{9M6NBeL?g+<6<*{6ju{&2j$GC9C57n!So#;3slL1?)y0t}C8?0%X8pZK0hB~fA zeLA0pW6XRS=F-^tEz_CbQpK0!QVqE%Pbo@5NwR;>NL0Bg%!gra1@BEl9egI4|AM+z zJ`QqVoP3;XWnZRFK1=ciVxqsG=hp}H9?w*_j`;B%tgniha+Tr{nFoZN9K3Vpv~b*# zFotYb<>a8R$a|TYguEH#z|2a10rQP7Vx1JJhBbSpP29&Zp5rwtUxv9h$eH0Y$>-s- z;QjF3pe?o;&s-iphy6am2{Rf}L#gvtN%{n%&ABFiS&gG_HUS%_sYJT+lk+Pbn=7d&aRDq*2NsucJmVC3~Z2j zCCvXIrOtm-&PkgWZT&V$KHGP@)1AXde)!6h%ap92L%xrDt#Ji5-v zZPE>;r?#->%N#1?GogRq?_R7=$&$~QHouvNj$*FQ+?e;ndzlxdw?FUOk>qdiCF{U?d|ka$u3a+}IX+F~xH4CXbNBvj*^=(Kw~a$)eh;r@ zE(Pyhv8<2hR;-7TyvN)#nMZ{_5F_kUyS6!xKl@Ky5ogR-Bad*-&fK_Wjk!7Lsc%_} z(%Q&84f8zrzd-)L;zjSuTps2^ee+ek{}SXV^_00r%$L#ImQ7JI_lbFPY@2SnFWbaU)n*UJuanFV zTDGJc`uM(O_JP+i?}xnMQeU~IP}jk7=Z^WZ4ii6|FJ|r>bIR0nye9j?K6Pl9Gykyv zI{%MyV_|AbnImGh%Un3Vd(sNzTCG^t7r8sj%u6soe*k>-kxc_o4v_f*=0(gKU>#p= ziFOX3LjKI+jLzVz*0OE(LA{Fx=>OR{%76A>VY8VW2V#(UQ2vd%UCi^7`E=+f^LNaR z^ZZ;JoB26uu%!LS2{AW5 z$_M7I5%(GBLwYLm2_dUGwm&z=@n`?}?sy;N$I``J@s4|V^3~V|^GtP)pX%c$o<8uJ z?~xC-VclmojX^$5pq$;ZIaOSxcgRV^eOMo6 z^LSsG2c_$0dHnbfa$K2X#oRUW(=qUaxpD3A7aJ$YJS65KF(-@IF}Gc?CHQWb%cQrT ze%dK9MQpGS> z2lFHJhwq^$`hQuj`me@)fRZi9VO*E_iRAHLk!xvg#N5e~DnHshuJ%vKd?4bSycTTx z4R#)L@0hD6`3pI`%vsy?MWn=&`wRmlFTC3^=l&BT%+Fqpd|KYG`_6YvUQs+sKK)I8 zD}cF{d=3+r>@Rb=n2Si-hxPw%zO{N1{^mL6Ei&hlxn}(KfZmuJ#`BO-mmUE^L?3P$T8owbD@+i>@#gg z+WC};%*Ui2k~y*{W$rG|;Tm2mb7t`z%voh!Du*~pwn1KEn`~QQnsRBl)wxq{`hS_h z^W$U=Dsw#fekt#_emhg~3-Uxcj^AOtnY$`^2Yk*v&i(t=S_k&6Lr$el*yk~xBNliq z@8P|ygWv8T5AvO|jjzxaF>U6AlaD%gDw*T>Q$|r%GJjg~4P*jk1anl`cfR9Y7&G#V zGv*jy_Kp2zKJFpNd)k(VZQ?$U@tn?G=KIm_VmGmZI$1Z{U|UKqO_8#dePN$++kfg3 z>iW&FSNJYvelM`<@cT~Wk7MlEKF9L_#+Lb`vd_q6{o(Lea!mp42TeT2JXc=BYxzFp zyRi9b7wRNG%4a}ZY*X?P`k?djk;j&c@h>jNp1en$>x{kR`FRp|lrNIk<+!8o7+2!% zi0t<_QV!yK4>C8G`MKO@uJVx|zLD4PTKO*IGw2+0$Sh!b$F_NrkKA_<<=`OK{m0mE zf=;Au=ox#wW4>$pD#RmsM)n;#it?WFn>n{!N5ypnKmTkK_i>Epcnz;TjC*)5>tJ21 zlXVl%-ggjWV9_tI#*sqYXn!K#W9ejfEWiB%N{0sa` z>L0GfA#tC_c#gKEd>4EMz6aDnouP8*@eUx*I8*jsh1r&L|u^k{%>agiQTW*{~2;DOy1YHRlKuR^0ZnfaM0p3sMFFEhT;3L59|y3l*9O6i}-mKa)#x#PXAE{ z-=*q5WIk+w!2a)&_#}U*{vSo3Pe{A@6l_s`PwM2!qw*ZDkysAaf7GpXvfKY}g7=^E zk^b!p%wPP_sY7vZ|MmDA{1e#!opMY$wj5(Q{=XlWV=u=a`?()~%|Fz|-tnjWtY25A zJaFx{e>)HBsW)-HZvEPDoByZf%>U3yO7?*NAt&kMDE>cc@c&VHjOQp%i4|hzNVfcs z{%igR7B{Smuv)bcR?Ql6Ed;(46u8}fyXeh1GJaQV&FV3b{RVy!C!ozpy~{WhaLu>X{ySi^*SS%+FjMIB({7j0=B#y(`O3Ae&N zdQ{He-Fc_@FtF#&k&B;sT71rD;5;6&(7Dq!z|FOwQdXDFHz<1VMtrlbG^mW#tqZ9v zsGQZk>#a83juO<(Chkj8=Xovf>56+<2kYvBI(7YQi*2&)P8~|hzOYZ2yW<@2rA}{y z?_B(7MG@AhzSz%u{t1M&LI=?&&cWx@cmETej{}@5{yT|%D72gMmqkGSkjyKpf;=rg zIWi7p-ce5ZRCr6fczsdn)8ptLZ{}%iuwTbIMm>>U7$kYEj*9a7~{r9fX$dB>Yd`zC!{7qgb&y)B6KW-ZH91o5Q z$N6H=C7@y;tOyasL0r$KiGQcEh>FJGOyL2|w=Buaz8;F7b53)dG zp0bb6&F}tlEI2m&d&F0Rxc>GA5WhuxvrT1C;;$VQQPCKjaXdiY)qG5zCU29+$?N2K z@;)&@ED#fg)h?iXqny)vp0beNI_BRu=eXPgx&w3%s66Na&_ke#phs+a6lD<=jlr41 z1DbEi!{lZ1GCx80c9Le(nhdu$ZL`?nDZ2{BXXI$f7Aj-2lK=gsCfoKyo0kr{j z0rdrOe1?HWfj$F`1&squ08O+h5@it;jlr411DbEi!{b5xdvWA#@;G^&Jl_vQz4s32 z4G?uA{ey==#0;@h5=1Of2Al)>C(8i#kNsqSX~WzMx(8GlR0H%Hh`8?#`Vho%7z5(? zM1f*JaiD3S1W=+)Nhph`XbjFY9?<+tUM5eIx5?w=b;^@x2#KYuL6?EF48ZO` zW}|O%9*BCM*r&`Q_G$0e1-%XG0irK71~eJOXQw>pcyMg^ep78qL#b&Y_KT=!49-*@ z(0t5N^EY{&JWt*y2I4`K1GEP|2K5BB2Gs{W4WbMnrid-=3lMYvgpK|Pv41hBB#8Yi z4|)pJ5Ja1%4~RN{Jc#y>#yw@@0#L@e=ZXy_ONi}dZx;LSy+>p{^pH6E*kj_?N|nU% zA}Sh#GnNOy!xh2H_uVUgyrrx-aOIU^=XvLeFTnFlY~CjZv>b@VJ~2XDpf9K`s1b-V zfclhn0R8GqK*ZeN9SdOl=YlQ*T>-iYMC{XUX#i>m>IeD+Gzml>XeNlVFYUq$#ip`l z#E)DO_!}&5HW606I>M?}Ram#(Dt@`-67e;# zK--3LfY=~LCWA(SJ^*q4kb0E1G-W_(PzezAEwM)o{$1PIXqf&8{gUfIcY&&aYJ*yX z`hrG+CV{4dW`P!5e!2MO!3V^NdUajAH*fmS_?se4{~>?dq^KW-mmiwrZvasikQ&qz z)}xPz-%FGbJAe)P2($$fK&A|!4NX7#3DDi3QlQI0#2zvDcWjh9ls}h%t_GC@Jp!r) zY6B+qpNW73gKqqo7+s#2jM*f0qrQbwBO?(x3{U>Yz74T|q-Zks#{*1=n3Gj?}8D zo|VsB#2-atP{ac+4`?|+-{9t(#V^22x{aMkl*2&XK}|u=ffx_G8B_wq9Kmxy|I5wP z{nw!3J3y5|^*|j!13+UzjPa-a`)+Zp$?MAADSGGbJ1-iGqOtJvfVPS0AD?rMSPcAd zZXg0Q5Y!P=A4DIUzV@{s+5rFSSRi#jY0v|p=RwUuv{h)cOhNybSA1BUq@Poy z^NV=h&+A3MgY$&81)iuP{=~dkCa^RW#CTvI5Pj{h@GdZ2@5h{74S3>G#Kxe6bF zCShLl2>qDTvH3Yhj1jQpGr!YXSRL94t7Cg%bppw<1NQSs@8W)kI%1#1V|qW|%d@P` z9S!P)eG-r9{h;MJIA73oeuy?g9nX*c94#@w>z@zaq7!~^n&jsaUSpfC{gYB(Rrxqw?hB|-lyAAq+1#ULH`djk8ffx3W( zGl%chhH73vH_u4CQ@+2|Qdm9T7uN70!ixAzSdrs|HE9B9qOc}wio`xBO4B6lgQl1? z8NZ{1744yO@cSGqChC09MWA9<%#=$(ms_!tN}w#MXzEo~?9^){#i5iGeVr8-eLd(# zN%1HZl}1_Gii;tYv8k*TA44jqC>Etdw_#t=wAkA%m+rKt#erh&GVU(?HtFBi^f*P+ z>{9=hWxRb}mvQ%6)8p^SXF?mget+rf6Z`0r`5gNB^z*u7pvOg(#+d1G#F)l8#IaW6 z?!F84PSktCyP8_cCiM>0JH@-z?|BN}`Q?%aBrlLBByXHAd4>FHznEpO5P`r5Wz)6b2~sIVQ?G*@xE=)Dn?%e@ zDg*q37o%^nsT@iWcvjJEC>6!W+zxqQ6L22yp}T?oyKxM-2}T|~X>8pKj0L0naQr^V z3%BHcUdXMF?Xliq?doIpGqBHm7TtI3$Kx5*bMjd=8DpWxN0xf*^jO-ZGu9k)j=d(l z6Z0KmpYJV}?@+!^{cf+tyC&hCN8#Pud=L$uFnJ_8iac|H!$(slfv5b~_v4=Uw=u7= zFZtKdW8~Qh5yBccK<7^OIHrl`^}bnV zcigCxZLxl{P3&v=V9K08Ibq9!VDe2K)AE2N#w>>8X2?u?Tpig;IqQ&qANKp1YRhL^ z9w<2gSuo`WTOOFQfO6n!$OA14$S0;O$Ok!~dD`G{@-S_JuJ0+`o*Nlp`vE@!YXd>d z3495p=K&Z4AU287g4$U3`5@*U(D#2B^a`lMdFP2}$|pU4ncedc`|o#Cc9FD?jPY^g zCh(sV?v*^IuFoF#O2$*qYhCZYf2kjA%zO2RtK*f9(LCnUM<(7izdE{I@pNdNuCdKB zn7%f!CUH&75`!jo@3#`-AFz+HRK#QIxGMGU|F%^1vR$_g_n5aojtuh13D+jct!xOe z5hxQJ8$sI#EVXSwnV{bjOUi>_vViiPMB6~|f@2d%IiO^LR}K^dpPg@$vMqD82Q&{G zx=iaisq1(gyx+H1?qmS{fcnUpM~>heV2*QbbwG^2F$TbR+y$V5#y{I&TepH3e_+l} zXRPDdnj7ph{?V*Qd_+>w6dX_Itp4@#}hR*9FBq?ZIGrUh|4$Q<8_W z!8>`8n3s6hSf`C^+rGN=>2$9xZeu(Mt`*L6iQj3s&Y_2}pWkHEZ_AcYvV?L$$sWI4 z;BU$=n{2;C+Yes9!Z(H(OZrDjCTN>V+XslL`0WGAgj~x4uaDrB1tp~2=#>S*ZGw>T z0NSml46ym0GJr%L@6$7PGJrO~y?2Ylz#QXk4M3GZw}Gw%F$P%B_-7kzi)}LJk8_cO zo_R`~%E|Vp-f#DguqGn5C&vTNF0pTqP44x5PPh-I>xuaw{=5hChVIZhMV5-clcYZt%WGhi8Lv2h+q~Fiby;*!gIDru>$X7X_`QI)(cgk4?P{+OmI~%!m%n5uIR0VVgh?u+xRFHWMa-Q#_{qo5O*WcH+{^{ag>3Xkk@7nl`<*9e)#PWjK_jdfwqazH=lYbOx z{chsjr{g`CH~n;tcazt>_PL`&C5D1w+{8Aqsc~uJbUI2BaV+;AMOje9HdwcA%Wl_^ zA-Zp-jPT0`K4YM~%U&*+zJr#nUU@*9OUq*?e&F#HY+2xm8R&QpV}~Ra=XGpDJ8r1? zNyZ86n4yvbI!5H!AE*a#}#XvhQYZ^=^GW?os~iy3%q0wZ2v%K`F?KTe?R0_qqQBcR@K=MA(R zV4MIpYKS=l+WQXKwl6V3zqVf=l_QW{{ERZ6x@?RppUr;m46e{0O!SPzQxKa*%9+zd)~F-Z5?L#!(O~oFKRMv zbQ9+Wt|Yc|#&@!ff0iy)!9Iz{^#19V=Dlv+lsCGa>}8K3AEc~spHs;Pa}4Zpab$v$ zkIGiCo&i9FY(bM|onJr8ivMKTxYZ^i$`7mHC1>u2x#)AncH6La!%hkM%j zetqx8^c>t9aeT+l_xtsRPw!yfvG09)KByn>*YRq;&BeT-tAKkK@4@xD*G4z79$bfO zoVjIo7=9eM%#NytI3MbGc6EjBhn5d!>6H`4b4$O(Z!>7QV9Ep?Q!vNd>od5vf&I<` zWP#T%2o^6Oj|87j)GRqsYa9JH=zsbc zj{RTqUm}Wq$*HeHK34dbIv+AlKf9dE3!1ZI>U;WIDy}E7A2N>5cND1WgT?V2Jx{&h z;NFPcdFJE5XW9<+`0v_=XQ}b7d6%WD?>u(9t*=c?xcD}8uEtn!9L`7}ZmU`>gVU2Z zZc`#klK!pB?2bG2@;+}Hx?Prvg7s1N-z=4!;msRqbm!7+XQMJAy5{8$FyxJ;{&!F2(SmhuK{v^IxR4#P-4NhhnZtM3NiAg zO|r)nZGaoEmurIl*ZQCMr>{Z(zv?wtiRo|V&{v-4Gf}qjS#{oo&efxz^k3Qej;=Rw z@7VcuVM%+FC^Ikpg;GMWPFz@Pj8}ml2ow{9N-n4tYe6IOE&?a~BGsD*5e#|NC z0*4y2PFc-3#{KMRX5v#puL*VBt&cK9x24+%B2%Pn(Q-kT?(+pcJLQA&DFS4IC&pmf z36!s1dBAt$&P5tB+mr=H{LteckRKc$LC-0w{DR;%0eOz3<_(l@;Lj-X&Ou6WzjfhHEX@71jm=N@cpteNFg7V(;EO2W08cpvw58R~cr_0O=+vvg&I z*+5!f+iJ6`CYGjQXCrVOAhp#1{ovnaht9XTF(!MTY6mMbg2Azlrb{5$cVOj|_b zJXp!|%=727X`S!tdjt2hmvXf8wcPdE`Cc18v|sPw-mvc-U2o`lo$KQ8l%wZ;n3pj+ zSI28T=IS^T>mhJXY)G7#*x`N%d~3WqjgL2m zp=Cl4`(Qf9o$^4x6WU+e|I)UBlHIN>4b*D<%oCktH!*EciT=5g7W|!PCOAy5?7&c{VdLBL&-RoTwPtmGAP;C1e`PPHTKya z>kZm&uzu=iFiJy41j+@|R?xD5{(~M*Eelu%kq3^iplyTP%L2zHu;qX&3(Poy;xCm` zK;J;+h`BKWJ-?g_IS@29KzrbPxjryg`u_~VfB0yuzsH(;CG&J_&!_MG@qCGU!_IeN z`D(t#&C{WcZ`k?v`czlfyYqFfu6N_e%BR=1z2+IMQvxwhAJx_GCf*-6;$&K9=Mca%&isKt$0#Q`fO$k68}P>mLf8X3 zHo$LFOpG``clQ68g#R(;A$LNpt(3XC($44i7|eCI#yWdP=lkt^6_e3xZXDca6VG!n zFJpNc_l8gJ*o>huAIgV!V|01JyALlx@#V!d@u@KS0?H6H!#FzLGxT*&12XIUYX#v54`aMR~9Jw?D}LnPf5?m_2+Ho$MvOdNMnp~L^!F&842lKS4K^Uc_v{JxH% z?>(`6GcVWF^|a6My&F3ROy%GzeXrl88^1R(A0&on*!J4Kbajl@REB=UCRTQa~(C;xUu-IrEtRKJRaT zo91ubKfBLTRw$X^wI}?t!59132jTnG?txL46srV~cTwY>b^QPui zhlfqR#(n_a71ljC*ZMVEjC(QcVz;K1G7B`rSMr#om+Vsi)@7*UzIx1hxo^mroMnV5 z6S!~M3R?Era={Z{F#QHw9_aB#>5&CaJkdYrsNbh43$%ZrWPxK7*trF+958Ky)0G28 zUZGzO1eq(4*ueMaBF9~Bwaj515a<8!y`3e9{htx>|FNGH2gdPTJ;%q_wtg^uuVZ_V zqt4vD9gp?f`A$sFmDg^pm$;{%*Sh{uJ64Q$ZeU*9i%QoA!Mu#mhw$M8e0Ll30e-sH zgFYMt;M}XbHO3rFoA`DxP5cIJfIxl_}lWpl?8lPunjzOjrzS(7MOAX`_9@Zy`Bm>!k9~R`N{7a z`0Rn8enAd-L}m^VbRueunv;a&UTKCEk~ShOp1NpKky8EH*hZB9vT!Owi+H_zwYbM@J@T z`@ky;W*RY9B@2{1aAO9pP2iOW+8)qziLP&uy&N#-4{e@xzW1ir9C>q#74Jj;8S4x9 z{nE(sR~0_|hhGVs-q!i1tsfNk{1&ch>ofkU;xBR07(<8dK%I=c+zrC|Z%wiH!fwFz zk}BX{exvSwNz>!>x{-%4=SxD)n50Kw6I3zm$!d;X$rvD%ek{_OX4-TIqud~OSPkIcDqg8wkrMo zGy8Av^Z5*#fqjkzk1+MuLeGY z+4^b@)ztYq4^!u7YMq~y@R+r6!w75hmt(EXn<8u)XKmS}X}q;%GiVcO0`@gc#J;9T z?7Q^UmPkv}qO_KhCp3R4+#8sr&hh#14&KS19?ZKwI&I$ha~hzXN!B-C#TXQe{cPy# zIO`ipxW?Jn$5i>PeSGnvwo>oTO##Lw)j^qeq6}D+_LlYS*YP%a+hzN@55fDG&GXq~ zk&)gSWA~zM@7Q*bFTSA9^BpZ(*vcjRPIssU_BA!fJ}9-Rwa}qAvHu3%MLp#kz(-K_ zfR+WWP2kQYsyKm~L$qxIJ4WEn8|2O}$c-G}_v8yB{>O!13!RK_xKrNQI$!2b7~jux zanJAB>zF=dD%aaWf76dY`pfrLL2cyUMC6pYxpcPPaxkysb++Fc1n(;DZrHoD;Snbz z{!g6vv7owcZP_we#`))_)CKPATJvl&_w})PDfOK5?lIouJp^zCYW-4xa(C~ZWfB{duapg z+qW{$eGFB>&p+=%Kb5Va;}TBnfpWl(Yf>(6-l$(S4bary=8GKZdK zk8*$c@4x?QWu&)}_@^Amo?Z=cKKSuDvtB`*A};rxIn68C_eBfek~nu`7AjWB@zb(^ zGIaggVW(Mp@XI%D_*BJu-`X#5Z!v7DD`&hWxt=^39Ixd*<_OAK$i{y!m?WJRajM^HN^6e*gVg@RynoM>AJ{ z1bYxMX=&qYUGK%cqvN%1f_~II>crW7u{Y|lB^e#0-g+}(}A(HAFAFPPWZ`t{g> zP)+Cg_*Yw|D*Vp{hUdPXe*}ACNk&KOk3W9Pb00%hz`h5@FVq-1@&LBLf_bl7zy5kS zR5iIio^|AF_+;+9t(w=dLg2c}HAqtM}hV#IBjD^>F$ zuFhwC&(`_2t?ztm!-spFo1ZxCF}%0V`SAXOSMe8OfSm89{ZDip3}#S`xe;s5dYtPQ*iSaf1vM+ z7qx@zCGK_n!W=^-4;XvdxITa8fWFx9naoF2Yo?fwtmk;tT<>Yg0eiiH$|>~c5e174 zaGpTV6=*qtxo%)&O4MD25C7wU|LN*Eb$m~Lcf;@F+qS;e_Zs({i{acX$9MkRMgH+|I1_uGFy z```2d)@9~T>^+pI%UnIk_Fovk;CPYDv1}yc8_Sn=!x;V*s+wFM-@R*r)c;PNk)3m> z)55Cm)sn$^@pW6KE zkHG<#3m3FN4DitDb}84_bDoa4Nt@ryN6qn@WFFVYf*m)nYX)E94ahFy?nJKpa=I5X z7n{8GyTmW`~m1hLEzYZ`@ zk9MXP^BV6g{g}7;lRC@5yc2^%++X|R#Abfvu!VZoeS25H{%-<|0Mn2)c_&>@bL7sq zvW_0z8>;C%A7_pc{R|bu;Fzg6rbixN{OBY6fE?buZj1q@!Pk-aXN(u`ocl@+FsF-o zoqC<3-ySgKfIojguN7Bvz`M@Khz&UT#BR<&A}|s?<=#T4|0Bl({|UCd!}qqN&Nt%v z%Fd^~WPC%<#=d_&4)MSF%kg=A{UPsX43NAhbx}w?Ppk*xoxH02@4)yQ{cy^T{E1%$ z?>^&b3m3GKxL=ss68kpkeU^G(pJN&7xITxHHoyVQj}+9#9Ao+|T2^{Wdt5oboDbZ& zV@{sC!8Hvlmi2IAJt~L8&Dr+Kftu1MbYcXiU%*^(XU@>}3!tmCy~;deHiPhiRb z*aAtQm?`B8A^z{d7^oa66Z@{8AE@*FxL0}k)e#4%X02G>GtcioR0V$_2Do%_XW+lC zPuBzQ^hpieJ35~H86@8B#NK3V?M-?A#tmaaHJ#_iwplt!4Js{@`bPfUS0aFer zf57V#sQ8t#2ZGB1u5pc-QlXII-;RR@*!w|zeB0Ldti|KJEORy>Ytt4qK@9NM{9u8g zb*x`I9I^j8z^f6%b8P$Ub-cv9jdwQ=r~GO1_KIb_^CgEZXvcAoaXR|?>1l6+-m**X zgHr8Mo-_7?9p|xB-VZ{VirB%1b@>x}57qazYlcC-HulCUAq%t|<@iwsuUOU#YZCto zRZXss??x`kEX4hs93$p%K@Pa<5Q5kP0dYd#d_fRfz$*vzdSjhWtbKw?(2Z?3hcZT9oeJ9GV>PxnHuGm-euE#@WO zIj^SU=#019F*N$(jJ=^uMY(8Un|#S(3-wIjemz~bou1Yj*v&6`TiO!DT|veG*>}cJ zWchyfl6H3Ydno|W}2u&?90Do@jy_sLcc&=%14fIl`sn*h4X zlmh|rK{Gbs+5*Hz;lzJ=_(SSB^_-skK3kCZp3?cWrR*4s{4N}1ti9fzF&SR7ch8bs zzvt7v@X(yj%+}CBOfFYSFDM-8I$&Gk5l%kn!?TQ*P1^ZO4~0rT|9gA)JXHS(LbL0w&xJ?1^JHyK}3 z{y6>GT|4H5YC6xyfBEG*;Iu6;{*I6IF_xNmO{nAk``?kV#e)Y5E{FN((eI_NA>$W} zJqFBSD0x61VAswCc^)gq09P*Wqij9Idfi+@zZ_70fnMjO;{)Cpp;|AdVuXG<5EvT> zmNPH|7>S!&vGC!4;semx_PnSW*VpT<~EAlI`t z;tFbBSAF+D=XCnzKyZ7&kpqrTzLO@iy}HsuleUGzQ2xowC>41N-?wN`|n`jT;JX{RQR1dgMqmUc&KH zq{oz{8f(6jRVzNoml)vpyVGs`&seX{?O^G*2lN^*uK6?yUSj0&-ppd zcNf&gd3xHJGH1^5=aruK+V+}1gX3Mt;1&Tp^tt(aNyfX@Z@=a}hb`2zZ^b+v+gZG* zgO}2MzxDat9LF`Z0hH`I8LH_#AK$t)(THDi4#J*?kn+GDYuHR8%m@wL8q zPp!A7?&rKMzZ+3d+potCOTLh?D4#zs=f!E?XNP$iSJ#+V@iy&`(+AkKE484yj_-Bu zfsN7GS^|tOS%i}Nr%kvQc8aWnrM=I1;h}u^4ri!`KYDbJj8`bx$?>!2DqzoOTZ-dO zA7J;cg`uj+^>M}kKVR7&_7eV5xnB2HC_2Rct`z4%j-&@d8}1rB%7r!fURObF zi~+7(*2nfmX?JS--qc0Prg!wbC+4m(?~lK=mwdc=Q)EGP-Qs#$+UeBsOQ5e!qW(62 z>-|v2^?5Bni2t42=N4Sw5d&O-aaUui=O%19D(xuBgVxah&GC00@@EXUY?=gI+iMRv z-y>y!Uk+#)pko8tFQ7eO*a9*(puZ2L<$&Vr&^ZGN8?%r@7#~|W@t-n7**w|B_Dx&g zzy6+Mtk&dkyz9%njm4PfD#-6rG1f&r^O5_|d_=ij!w7G%@eS-xkx)1`$JC z@0uUqvI_;ojqAguoxgN(7v$CEkuq>Uh_WC9{m-x1d+5H8mNHV#TW~BfUhX`lk_D8< zYgP@+ml)ul?=qy0=UflxdL7D@9B}6fRNeq(fjLj0Wq@8=7=*alkCkmnd zuLPg|dEdOA*7=5?&vjS<-`!AgSG^ujt-ZC^+ap(u^W6ot?c6cfh(#&vySOJGYP_@5 zn0N8c*t^8MjxCX=% z8&GRQgXa%1Ht3HJ2C)Tf8NfNiX|Yuc9sZ|GBL3~U5ZL=}jL-kgeErQG`uWPH(zxgN zD(ut#BB^iF7UZ|-nWx7&Z-sf=29>(rz`ZlSu5iz|v>SK##?VY(ntJre4|$JYg~I>A z{cENDzYOd46?HdC?wh}PjXtK2;}~Rwd%yZkd9ZwGj{^OcDf`Ybm*XgHCa+9Yc0pUn zh&EQ%k#9l`L9UN82Do@pdsha)=2Q7zhArUu1@^a99XX)+QTqe52eeP1;#F=Aq3zdt z;sWlshf;uzX>m^$KKw^jh8$FDsRLqsjPLtm`^whW_P)lwUVBeJ3^EvTTFQum{5Jis zn4@E?T;pHG?|}UP%$xISCg$C^nf9Mm2|F70!fwnx7u3c$+Oj3@+qh<_Xt`bL-?|KS z+^xfv4}6BhhrcYSu3N{B?t`z(i?mar+ z9%s(a*z0nbtHtki71YKU;O8p`NIjD~%-ivD8Fw>d?keU+A6ok2DCrk$*<5h*7mSP0 zPN!a{zW%RiiE%Gv2lFC1r;s1{x1rwKAAg)c{PIJIe>I1p#?>PWFn*j5*s^&_sA^8@ z_?FF+?Qf?r#{+Y|a@~<$d!*MOn)ZN>5Blc}b$rl`3wYv#K7W8QFPY;4Y|Mcim>&Oh zp~HXllq#6J_s8~4Utih#9_)L*z2WM7g?+BUrC;_s;!^JxXbf=E#tAb1r*)3&&ucx; z`Encgo;fy+`S%p&ofw?*!x>BCde?%C0e-P@ytLt$FYO83Dq3Oe>tifK9XGF0GNh-B z8!%>GklV%$W2OJ^v7791)^b3_QHSJD4DkCUQvWl@L+2TXk^@d`0JeadE6B+wP&ot6 zx*}5sKnLpY4pB!E|E*dG>&+&@YWhZSYTis(<;saALWpXhe}k?8T>?4>R8ZU5sQ6+K z+qw%>_4?~Yaz&zK9-}0Wr77B+uw#@-n*-kHmdK>$> zIqqJwd@1%p^a+&gD!3RR^DilL1Lr06c&j`m%A}L|@VAwJ{BgU=FP6C;dd-1t3&0*w z>yT}GK+Y3n=MykS7(6cE$N+y1v5k#{_-6_s{+~chgma>~(fNAZfPHD}YwUZ!v7^`J z@O!rf`E7dU=~2&UoulT|H4n1X^XYny&5W~iU&qi~|5?S_-j{lmYtRd7<61qoMLDC0 z*w_1J8SFTZg(Av?eux3){crJvdiF!;JI7qjKWM*6k2Op64k(wI!<*NQF+k2Is2oq7 z(;Z33bxOd~`Cg$Ng(|?wvd}`LS z=evUYj=@pH(N-+$Yh#|}=@MmxzTcD=ePleLAaj`c_n0|W#6QQ=orlzN7A5l$5J%ku z+dZ!vV}SfShiS+wroG45qO%6k?-Sq~#O`-VRBXWX33Pl)SsTRm@b+>FL59lqC9P#gVo z@`b`be0lJki+hcETgOXX9|H4E49@k%$Y+CN`yrpvCF*ZY`ndkh zlE>WNfpUXB!0ufIH-Etx;F?u~6#n&GCCAq%3+N+!y(KnZUjJ8HqGi6Pv*tj`09y_~ z2Dq^Sd+oQ35eByfsJ9dk2igMe8e`*|Lx>3wBZR~M|7@3NgUEf6Fe^vv2-#lEnYYuJqOLzWA;R^4s)WXQJ?L`}0!I=N9vf zt(&;lJ~i#>-qzWio)7h7pYc7CKelhrxiS~ama$sr(?@f^x1qjGTae$TXPzE?f2HrWeXn)BF+Xn1 zt+|-jcxP$)(6&tt*^reF@vBfw&b4~P$12Dfmp+v9>*_c6^*MdqS1z}Iv$Xdq4?bV{ z5x!+waNjXt-x)u3=OB%-wrwri1oWeb-MnrmPyPa%tUKbp#+nYUIg)D+?R8(C*Z}2# z`X)rcTw%~Tf?)H6@PDBXnKL+lPJR3hfWnIZMnUwx!o45+rp}kxmpWgKvy1yy(k@eL zL7U;*^vQXB{UPt?-tf$ti!0aPb1i5~i{I-isEskewQGjkx=8b#&Zp6~eGYowj;HBZoA$*i zSHAta0ROfwVu0&1Kau$VeC0sMg43ao?CV+DWXlW64cZmof48`xhF!*V7{8Qx2hLoi zJ>G_`MIYfCd^f%Z@!uqs@Z%w&OAfvdav)qG1qdJ`AFUIw@E(DpBNx)G3t2c6*w}0 z>pkQeWLpk6J^^`9%K?{nTo{w*u7&gVNa{WPu>qrF&A+xJ)_PW&sr^JAXa*SJ@9eJH$3|Jm`QXk^73$e{?M##Ha6av{X{C7&iZ3k!>p#DaR`<~)N%iH z?r$X*GFK1Dm%qoIUEgW{2hT^)@1eJ}%ZS~)Zj1p^#w!`XH6D}!+1mn6Ou%CcDE?J) zK-&Rk{;(|vq%WYbK^<8r@gMU%@UOn3VERE?=esdJ=2gJYcXYlRa7$4Mg1Ue=_pTMvM;M0=V)!JfjOhCrKG4DMi@s+}-|6^W&9c0)&@cA|N zbsnFd+jFr`Ki~EB19iS$hp)b|LtB7r#tLd<43N1xO8-;Wo4GVv*9XJA((@{Q=8wH8 zpPD|wHhlA@pf=_K(oR>H9*WXWlmWqx@pvdwvVd{Gz27e@sIDWXa}szQX^*!aXFc|= z9AGZh)^8H?r5}uG(B{|q-joAM2EZP0YytQM9)AF_0bgv;%^T9TfIn~0D+3qGqWzppD}`;@2lH#Y5W?5S^X@LOB@dpq>iI?CUlLr)as-=`#B zNL$orThBuunlW-6E0b||PyEg6gRfmPB42XYLScyC8lt_-#L(Yfj zoE~#dAMrammzs^e?|xg8*f+nqqt@inCM(Eq)AR355vR%y)j2WRr+4SkgJGWWbvLGF z`qavvrk(JA?R^WlWL0%$MKl^2^H8HkNg`&+e>Htws0L&}?VFO?b((9f$qE_chL!xXN;`xzEpMaNs*KI}KijIC@q$Xn@cd;cwEl7;s(v?HV7Wz;sLD}3iSfw=e#C>*jMcU z_B<7AeEX=E&7c22^0zqvRZwqWBWUt1CVss#XC(}w?j1^-x|DOT(L@_D)aKn$=_ zxNE(4)a3%_@qEX6MA!q^f{*C;g1t#rLIXrD5jeM=L+Sa-W)FZ1FebpY2p)iCnv`(? z(*#o8U|BDe<_eGptQH?RkN7`K@&5+3b6E|)VjuNWi1WL#KF%XGKKS+&>*q5#QeU3U zx&zmp)eU_nz<<0B#cxAb&vN7H>-#YeeXWec;p0|9ziNGU%o@V~Tz~#&C#-!n*NJ>* zxy)|Z{j?u=%?%fSRqXBqt$)4j`>((LH0AuMPKkAi?%dx%);wx{fo^YXd#|A&3+e@ku@_D(*h)bghMUOTtf z(f3v31MU;w$M;}8d8sEC_uW3j+4Sf$g}(t`KhM|0M1D`)cl~?~^Rxlr<9Ph88SZJ0 zK4xeD>(AdWF4!FZd-vx)TaoO*g%^BoMfT%v0T;8z!22zZl6WfAAK`nTEym-}2Up@8 z!VY6LgU)M1uc7NV?s5UA3n&+$4Pc(o>w;yS0OJA7v3P^*0rrYH#Qz_Qf4BPR*8&phyzIJbXScWRy)tVSI92>d%!B6_xDVXEg?Yr!c?|v;d5uqvZpUc##hz(y86D4^-Y(7 z-%r@b_XIwGd0n%*Jt(ufpfMW$iFq7@zgJ%0<@WINCEm%O$*J+2>#jL*R&QPVBf--R z&-=@9n$>4F_7nVndZzgc=$Gh|H2=@@PFs-d0P=z>rK4VX<&hfqqaT2MpXz6n3k1%Y z%_DR?pqd|mCh#`#g;?k%q$VKxw_xK`iHDCl`c-p>|HBXcQ~BKZtkC%r=TErDc}cN; zK3AWyKJ2XL`+g2b=*17;6MXJ0H9%-x;G7=wsGF(z`WEK%xY{^2>;dZZRzkmO84=VO zgX05B)2;p0AHpZr>;~TZwO1eC`nM}^|3flw@S6eujIFwM05Je;+)A9on{Pfx@Q?ZF zq2D;p9GJuf+6@4tWh3gD&w5}R&3 zGvNOK!Sl4MWi$5I*PONO0yhHZhn>|rMRKkfdkG&9T>!S|zWcV-PrwxSLIX7Hr(R=R zYcU@1nt+c7Vs60ffaM0v2KYLG$OA&(0QkVX;{WthO!olqNq!%B{kq@x*vH=0N$1P? zzG2_@v)~z=#`|C#@0X#3w9^rN7Y=G!a zNfW>~;QzIA=l_p5^v@(Gf_jJ&`<5@&+@A0x(@)6vt)?gC z_C$w?vA&Se;MRu^`#j6Nbup_0IW%5}$=5SaE_w8rJENb+k+}DmPkb7D9M74x zj&{+72MT_%zNj@pIHtwNCLK%L0h{oJna%^nc|pv@^X`wH4^S%4aeT zfNP7_8RG$G2fQW_xd3v6P7|1r8-O-&?04o6|A+l~z&|-Z{Qh#VFZkE_PsBd^z%id5 zJ)s|vSn=RxfB&r)cu+JzXpHA&njWxEem@EGz`x*Ld}bMg1OJ;hU()*5%f2761?r3p z`(M&{19gFjJJK?{VE1=_PWHpw2(brZzgEKUwmu~07$jR;t7SGm0Bv@qv9P@!kE1&*?#s#zN1AtGv z^711C{tpt&*Os>hc-;T)e@?am_T<6~K0n^FUf|ZzM-FMw% z9w6{~$wgltunv;vBpcy>HrtWx0Y2X>xeM$$-ZSuz^PXy$tsa^20KOOGI9EgilzJ>) zTXsw{N6hymJK%GKNfV$RV08kqe<;QRsYc**@sY=^dfnXl|0557xy1JPoHea4`TS0P zFVys?-;dbG`KI6Zb9fB<)|?nL|zeOo(Kj1#Yewx9B-q19I$9h8Hhi5sL3i<-Dfa}kE{t(=!c$wN8_%mbi9f2X@ z^LxV8VFSPghO7;e^UxJ}LCzgu$L_lGTE6^Dj@QVZpg7-FgJ?PnawfK@!|ba$LaRqw zz6OYX0`N(jGr+k|v;(zxAjAT4{TdfwZXom!G8Tw+LZ%6>mEM8lkKJ?r@PEXsrAESP zp%DA9jvuuWU7epAzvNc5?vj2Vv3~OViSO^D-wWq)U_J!su^D@~Z*mx=N3Y=?b#uTE9z$+!mYjv}*&lD;e5qjn8^!^|mcgF2SKUtF{a-CPQN%^F zyutsgt<~CP)*~~|!n{l312j%SQvz$%uuXA8_(@kedt{k;=Ux+z#&+WahC+5{}20r~= z!T%k1EPo$s|GtL)46PhC4E%oMhJzamb^(tS_CdG0P6&DO)&KkHh2JL z4M|LV!m)20zhTeOc(>P##;<#AeeJRPX#Cvgj@FDuqZiBiMOn{~wX>|9WX^@ZX)oAqq^@_N7f?pxFknjJV$>uVzJ1I|6>@w4XGgafQZ&1(66ruiMUfj5g^ zMBJ=;fM6Z@wvB#mkl($12-?A{b$@=2tFAo8Ft7E?L0?rH;Os!`Yxep;(gsrfP+2?F zXCv(T!Znp_0PO?f0onk>1C9J(FAsQ+`pCJ&|B=8yw9i@}599rc-(Lpy5&NT81e(;2 z=F`X-ZN2M8(VtII?4&xGZtTC|;;*T_*b%;y7@Bm%EN^gT@BzhoRT~f;KRXgBg0&fF2|1i}I`1&F1?JjvjvH^Y$Y0?BFE_Lxh z$_*qvpq>}7SOA>(gyY^ce&g%s%>N7i*GWADIe+H&#U@tc{U-J~_m!N#^!xnItVbL* zIvTa6M<3yie(%`F3^pfeE0#+&8xZV3853k&An5{m?NFZ$z;$H41l-bQJ>r`4 zJYYyHfOx?4K(7bbTruc@e=YrO?-TsLdHlvX`#C(H*6(hXv*}ydS=_(!X8^O;EM8(wGIOck4zT7T8gkejb3D!4r;q%N*mM@jmJ!8~Oc7*pK-=VBd7UG!sg`r=ash zE+;X-EN9cN{8heq#nu_P>#iF#o=Z6ti>FvS*^UWwe!+y7Yo zklFyR8J2p0=vR`vp42Y@Eo?sV|2p~XsOJOjmyZ3khn}Cki|+#8i}`&&gA3dr_F&et z>08(9;{KIA1Gh@80h%Dt6SNwEt{#x*2Gf z_t;0?2j3&?I5^{u)&N)b%3rxxgEl$KIsGg5PP?yk-8Cm^jRf;2T6e7dN!EXmbXLR# z$io8vr9S9$g=Kyq)eo080M}892gn1Af7Wt`bsJFT1xycMjUasFiO2uN{Nev7;2$;7 z4bES!>tk+TX6*~VV!WSb?mNFsVK(4w_6=zqP>%)RBj*$Udr15P z?Sopsuj{MT_7MAdO<&6GuVnql8&L$Eg=_sw#q*25F&^>?*R>Ju*8c$m~0 ztgk=eVS4YDEmvp{qSYx|4RU4!ikQIcK+*@bx0+la%Mm1PFz+MC=L-=3bshj;lwtv^ z4=_ET%LBl1StA4-P!lw4oOHt5=g|MZUOqGPAN^_KEJ%fG?gzpUm^>yLdO!q4TmNI9m~B_LZ%BY)ds}e zU~hfUUgMKac<0>V|Cpoql)5OyJg{{w>`NbCr=FiWe+~O-<~}+9`txlrl=X*VCQqvU z!u2yN-D{KdjKBxoAsqGLe7jS63$DEUD8c?QvQ~xTF~VPNc-X#e=#|$=|Bd>6)FEf} z$LK?#9jL?v+3cboF5uz;vjK<+KIeLhhsFZnrT>`K2z7aY^{jX22ka$$cyHMwcTVxY z4lz%TeXH-WUdkHwTlsx&gNc3kc*gsP;|%*2+gZQJfwDj4oin-4H{W)#*2|+-e#_>| zX7WFl`=Y4jyYAZ6ih0blKpzd3*@0D}k6g3de=(E42R}jo!KJeQH<$J!#2y6114T^W z;sMy@q!0Gl0H+JOIswxJzypTb0Mi0|jZoe%VAuOuv0s2%fW*TRBcFWYU(TWbe}nLQ z`<&Lpo9Fqm9^TNu+t&GqaDMaqX(rDBn)96%*G11B{1fzPT%(w)@o=l@#ate6{;Q>4 zpV;?5UnMpH^Ah22A1*h{??jA(^Lp9FL$xo7=Q#EyWp<#43y405oPhQUIL?o2B=LUa z0J>PmUaj;w(El$uFYV>%@6YQ4Brg#6DE4)NWsP9c1JO5h@`>-7L;SxGKAvkFbH8tKn$3d(_GKOymeziXT^Fqfd|)N^g{@)Z?z=b1HTunPjUJ{p z;3nB;3%Rz3!)^PvTO;PtuTxvF1&9k!=f4s&GnV_gQLlmCeZ&4?nzMinF#MaHH9p|` z5fKlN3rJ70(*{v9i24D{Cyq;a8@L-5`ZLnHNPdt|^M%fqyuj)?#Q&;e-YBs>`Xh6{4_(xHcqPZLejokUE$nyWeb4zb z?8h1VhJCa7pC4a4qrG*}Uxb|6Wik&C{b^X2%C!od-Mw|wa-p!SzsGD6@P7Dw!~OAc zEEjA7?gKYirMR2r{)v-55V+H(P3Ne;=lR5}MLowj&lN79e4xPvUAUVYME&7J>WbwSN-P*p6-%e{d=;F>f|U>Y0XxTOihSx>h3O}wmmYDZiw-5v4cO1F+yMG?ur`ZeR1DII={d5-g3!Dr++OVNGM>tnF zcUNG^@3xFf85amPAjSmH26HY@iw6+51G~$;LD#`GW%;JOZ;01x(gO0{VfG4ia{}12 zY9I0c@0mONANw8PAAHBp>PXw;(eLB+u{6H-;w4-*nDk#XQ}~F z_t`AkPt=1p=-953<2X{wiD4-xR5<=lWH z)FdFkk8_4|qvt1_qqZGjOyKQ+*#OKg%W?#%V1K@jH{_P>LN!W)3^(M?0m~XKd->1GGdTD=MFR`Eee$e`z&R_HU>=)k< zdd0{Gct7&P@C~-Cj>KwAgPyEh5;4eB(nBvjXFKeDFcNA2tIR#C&7a9|7m^_lEn`%KcF@ z3tqD1b&dRO$9{-C2;AcO>o8mQe5rf7O8qLdM$G6ztYGgM?;GzI>we|l&=t{(fjv&Z zFK8Q~JL)+K=LGlbQZ5kqK$Qzn7oa}a&J*JM#d%(d8*&rCdR`x3Hh}oYG4TMa4bm8w zHo(mdpcX!__&*QYXVCkze!eR9TQNW5eT(_Ke&1sL((eQ3<-A?n??Up$T1yI?W3FwE zarhEq9Qt$A_avTYT$*qXzh7eCVd+W#$<^3wQ)RCu)wY z2FceVSUr;3fnWn*8_f=c^BTBS9upK?AZml22OMHEOr>uC=N&d_xi|1c_Z6)>T_qjE9_?aCQ3AOrJyi z9b@qDv;5x1dce5gPw|eUTgU_Ocm9kcf97M=W5<4!wt@D5HX&&TY99h$;OD$XKC`ZH z9ug<=_CU^Gv;!3`Am^RB0KOB`2Yo!?^MoNkAT}O3+LfrwfR2#rhRRxDvjK?*__@NY zWlwq_;{fCb=M(>bA^87zz`nzO)%RPiRnq!vHT@3zsND`)fA04!=7;8=xP1fr>i1b+ zN}uns5007pe8YXfe&DL{dd+YTzh*u@VqdR+%e5IM0$!XZ7kPKpuh%gS>=MuLw=1*K zYew5}?WqN)DC|KNGX(uWxkA@2u!-rKJt#dcZeUL<4-_ z{(F>D`W_S00zNCefSO>^0yHNO_a0zvP_O}KnHB&|;MCRcn>+u1+$xX%dW}!!_jPuU z#Zujv-(mkk*7xK(f7){+VSi)8@28j^_$T(ocX+-Z^7+Q~MUM_VRQZ04^AXF_=kxjn z?mf>BelPQf@a6L!p_P0sc2ksO0vsfRRzAoTC=lfCH15eGgy^`x2?#X|Z=PSP_?umJi`>u~C zU(fIkoH}en8?d!T<8Z{{OJ97g!gkO$WXBX=G#{%cX7U4cWvyO&$4T{^!M=fA%<6<54>{$Cjs*~2A^t}cmB8V z>1}MU72L~`Yjm20YdSt3f3Hhyh}7ly|K)8?#~zUn;21ws-40mIK@k_k`ecinJRj)T z0K{IMdI7W9Sv(MI0Bk<;qvP??^R2WU$OEsJ8534_mimagIe~=#zBpih@&69-|Cal# z)%cMgrI?@C$3D+xeP8nXsPALUU&X%Ud)itb`F`E+=QFa zy~9Zr_sRcud^_+8A1_CbUE~dxyX-a3NcMoXp~ESRCq$bNd4baoI(8s%p1!ys^dxk( z0j~=P{-X`>dBU6rq`Kj6DWAJ%yX07<{-AeBtswHSSje}Q3-wU8|Darx-~if72z{Dy zfUgahWCP4Eo_5OLivNGiTod-xcQ^(|+g^L@nljPq+;pSYJ}&EHosZ@j+B=ZQnbd<*Ai zY7P#0xFxR}HF*&)#ePz%g+ z0yQlFKJv6v{%%h3k2*-s?MA*&ey`;EdiB0)%+Gi~&++Hj5BQJuJBJkUzU7UQ#viU7 zhyCR9%UGYdw|pMBWz_VmT))fnGmal(`OMGv;vE>xG0wdEa*y6_;I-oK9lPM{L1qgQ zZ%KCGv>4me?Evcyi}{d zxSJ`^uLW?HfaC;=e8BwT|DEFhf&Z58XUtE{kDPx}%n$5$^L&PV)bTOL@BO~Re$@C) zGmd`0sq-3xhHE zeXJeo{2uo6tKN^ljVsmi@c%_ypzkH;An|=q#Rq~u!1INCp)S~gTAsi-fYuXbwSzt$ z@Hs-n1GMw#BeePe-#e802kUkq%@N|BBi;@OFO3`kdZ5**PzxBA6a1w3|M$%nXMjaWa+=>z-birb8>41bwc2Dv;j39U|b-x0m}Dt&Tm}6?>FN28%j2y#sP4B z>$Tp@2YhmT`l)k@|MTGgJ^u4LA7X!au8+Lm#r$C}dhhoW_KAPi^{}qbVPE)S)cGXd zPn@6is$IWN-^84rVc%=|f$zr{KV0`E?jy(dzBJ(~$D7Aw4dcLcv7|=;eFB44_P4dy z$9=Q~a&-0};{`!GxKWPA37m}!pFhO~-VQKVK#kDH1c3`=nn0N&w0I!T4>t6Gm=|Cy z5Nd^0FY>W~i~GgjpsqCSH-uOKD`-=J2haxeaR9FafFqxN>Ide||DSN&yTtB=c*n>5 z>ienrK`V87f9&Z?dLPblU++%z{aVwX=lHAneP7q(vER}867P?BJ-Plo_9O1WUt`T) zxTZ6%Z!xjw`L-vx?F()`-*E47m2>nGlL70B?{fmiaXd%+Y|v|iRsP<0Kc1UbVK+Lw zqQ@2#T0uQFn7{=zCTPV2Lp6a`EI{4Q_Yapd1WFE&W{T1VWP1z5dBSB*&}P(HAA8CN z&;}qkIPduH&Gk>r^&#Hsi}^k0_c4E8j?er;^!tka(65`-^ib#XvA)EnWgU;_`yTg! z>(_L>$nyioPq=US`2vpx&JEY+3g*L-K4bJ6ch`AxAC`Lzf5YGT_r>dH974H*w?|!D z5IKR%C8!;UaY4`trJvCFz$7k^v_YRA=++4Y9uRW_g$+xC?H{cYaQJU{$@f1W@2eTV<3^%MKT`=j26 zI1s#l680;8-^cn<-}f4yT)UxuCvjWW(FOk7iSJ8)U&DRwKPd4I%yP3t!eNVWb<=XW)I(eIZUzr~*2SfBA~iG6Z? z;6GtM`umW#Yy10zdEhPaci=Cq=)Wi9dIJtyM}`W{k@5qUvr2bhNlIziPAlw1Hg zk&Yb*=Q+j$$p#=6OLYR&X-v1VzN|7YV0xfxf`JE2%n306FEyoR1N=VZ-Ufs{hhsj# zY64QPFgQTq0Z|LIx#DY1+j}nk|N8S2-zoL}uI87ppJIOK{ieJ6I-lhGk+aEr`16?G za(u`EG0zt{zpv+w*th&rjQ2CY&w3uj`VRY^?`Qg+>EIUUL*FZPJ^Fj{=p6U-wTk)R z-zVV|b2+#$(>q@mgx_)G&-NVr4W7%twPW43B%Tmr24@ckrWzpT1*rw3+JJ68ppOH9BhM%P-!0nS zXT<-@-2SNfvBsCVe)#?Ae1DF8!++k>L#@A+=d-yIxz3;R%605Bzt@lbGR}`azl`yz zqpPM?!+pS36Zbh*4d=wQJIcPFigg@ytq(`{9JK|t-zVFIXbZ?of-RV22L^Egi9E_LJ}L=K3P`U5yXEC)W02{=V9B#{ACjTOVJ6{Q-VI z*Z9LV7<^NV^~LW;>{se`BG*^kJAWUv`(eJ`FyHa%5#zviTG;CwzvGye>}PQcm)-CG zCjCr%5snqUP;!x|7rN)n_-<$eP=lGr0>K8zcU;B- zX@-Esg<2n|wLyJafaL>l4VGF!9|vGil5$Y*vEz{p{%l z_lNDQ)cLqs9ufbH`8%__a=nk7ADUm}{V~ty^ZPBGzvTR7tS{H5V!smSXFk8e^#%LH zNk7N;xDS3h`f`tL{7h_zBlfSseY%g$MeZm+^ZxR0;_u0pm|gH3!E=K=PKa7T-~`46 zM8}vE6U4d!i3zBiAf5{OfxM3ZcG&b+trv_uAn5@qFHrOiRr&^F91!XQ{aoS11AJY8 z*8=joplUv#-~e6=1V>(b`iJJw{~P|xxWBK?k9~bL%`al#`TjQVr`B(Je;50r4?Ev) zoL_RvwRk_~_2rt&{62NQ3D_^We&+MZ^K%TrwhTfeK%!K`rq z-12vOEWCEr9=P`;+@Pixs2zxL0qY7vtfO&Hg$rah!0Z5PCBe0L{;h{VzK^I0CLU1B z4ff{*$N}W&`UJW;LByJ712#;_0az1M;{fxD{}Yb~{xKJc_5JYwE#HrPAM1Rm`PqK% z#c-Rpk7AKEL$$ zz=m+;oJX5qHGNJn=Hhna@bu+48it8o9D(o7%J#U!o_Abf@4MRH1Ute15$yr_M6g4V z7ibJp=L9h>h}u9UCh+=TtQ{(NfN}vJ3xJCy8{lIBvjKUHP|XI^;(%lWpar(-gCz!t z`G9U+(08>ikXisZZaW_cF0l6W56_+dKk@kY7Wj8{epBZAI@lM@zt8s*`@WAi>V47g z+x4NozZd)9nudRk_e0-~a#WwwpMd==kJrIH{J+C~#_yevk363HR0h09j1${-v^_5Q z^CgP?&9}*3n6h98{Nu?k;Q3uUl59bX7kExUeKPTy6c?~A5nRCO0>K94+92}-Yhuqt zH9y$o0cPLXx0P%F^gwEY>=l6j4;&!G0nn484RCu3)Ov!OS|H}tRp$hl4lv*N-%oR% z&A4CZcrxy1ou9>1oZa1>;Wq^PE#9AMdk`NE!oKJHg8k_C3+zXYk9luu`_b=17lY4F zm@wbp#eMYkjM1a7FK`)sIWTLuCcf*(4T^o;|HMBZ`=La;V74K*2lD!nC-~SQ*@8?v z3^|1|E=V>2=OV-e9bF*hNs&MCHG>ul82{2(prQ$y9*7!&nie2)$b%kO>lN}d#F!H# z{vMJc9luYXW<1w4zAo3#;`^$%VOvS$!}w|$TJSb0n~q019!hny|w=mg{g-~zeLOWHu&2Ed*w7sxaL zXe~(-)Ea@B4M2T1`-el%P^Jfl8RK0WKrO(or_u(XugvOAx>_LSf>^CeyC$fn10Y6T zyXNobkpBb!$b0&@AHDt5{H?|}#r;0tXZ?N2_Xq98@%~z!PtN;oc2}48Q|lLQnDKsS z{m_)b_Y2OCTr_oljr9ZmJ@$QW&ue_u9DeZm0r$DT@5ep;wC!P5Vz%7JEZ@`2_BXp! zaF695%YQEIhyEV0Z9vlf1*Y;eNB+~f5HD5bB%xQ--Fo*?OGrD{>gQIjh2_xv%bg0`zwll=JJAX)ZDy}?_FH)xP1%rIfjYrzT;&9`+d*JZ3DSN zvEzsP+g`2dvivfft{#W}i9q|L+rDzrW`ES?6E+e#`ek_xHZva;wJs z%XvP)zQcdD&L_<5)cRiQ1L^So==Y_zpS(Zj`G|eY>dF1S*ZJw6tS3Hjejn?1{62hs z!ad^Un1k2&KKRmrdvNZA`{d{QFt2-+V!zUIVgFLzce{Js-!G5fuB&DnJU>Y9CBDCC ztH=i!7Zi4YpI>W@Q%s;a0-qm)<5g}{F;=QmA%AGj84{2lg%n;}<|&F^!*zsvin_q$$R zVjn((oL}O}G^49h*H`8I-B{o2d@Ph%f`|irEU?baAw(|);sA>U z5)UYBK;!|=1{7KVwLn}ewRz$(A5hE-sn!IAeBclrfHi@{#u;lqGI##p@Q=Kw;op3} z#Qt&r9@hGGeLr+n%dxcM{@z?adB1Qx`u=*JKk)vJ?{|LR=l6w&PV4s#`{MUw?T%nS z3fZ2j9J}B*g#sxVS(DP8? z0Z9)yH_RY$HlUIlw7fvj14C^~J6v#xUY`JapD-tJyc zf9C%Q#|9^e`aqUTOnJjtL!?|F=mKg7OcUXGn-mLJPC#-4nipv3fymh^pMwoR9N_8$ z3l3mfAhiJN56R*H+JHg}ta1R(2%jPb02erO?L7MbgnxX-dhE|yzuFw{w(s{dJ*fG| zIzRgUsQFn9NS5nwV!s>n$Nt@*N83!U6z^MIPdC43^*rYH6ZYZzL!DlU{g9(e*e^JK zhI@z6D$cJO-?61{U7bC?B|I0$j@^Jg$!tPy2e!mm#JEan2Xc)taDf%H0ptNG4k&sC zvpAs42b8_SJ`S*10h}LfIsp5FOouA9K;{8(%yR*5R^WQm0T}}r{y#d0{2%x~psM-P z_qTPwG}EWA&M*3Y*Wc6h{ZaFS?+<-FLwSG1zVLtg{vyXm|CQJAu|LOf|2^vD5&IJ7 z$J{(~cQu|5%(XG!!TKG7?`hW-*o0&k%oY^(z`qCDBaI7uY~bv`ZNg_HCO~bH+JW%- zQ=TyK0IvtsV*%J)=z*pQvNnMDM_!ONAddsAMtCv@fL5I90j3;$w&srd^po9z3U>z|h7jP+xT-@rZbMa(%rpW(fM^E>7Kp$oPG&&_N`)gFXcf$u54N3l;?TtMBz>w~Nt z@Y*1BP@f~56bnqU0jX~|vjGJMsMQ8mwLs|ADcV=fAWccI|lw+ zaes&VclPY9)cH^4`+dF-GyEoKe$Mw#iur3by&n51-Y@HU^1PnK`i}2MJ%1Y9*KiJe z+roa8eGAXS<9vR{Zj`(LzpGB*V*?kP1Uryw30yo7w82g+Ao-A6MH66dz+wTj0Vx(J zY(NnQFei9@D-Iz3Lrnnp6tUi*Tn8Zjp(&O739=@*$^kelaEcfJG4g!k|L=P7Z`>b# zKkVfZ@gMU2s`+EK4`=z(_gjs&elOJgsrgAg*OGjH(br?zGIM#3v$+53~KhyYXxDRoD$KNMzo_sxVU&Hxz&d2)rv2P7$b#`-oiTCGvAL4z+{M7ma@9)&{CC<;7+3?cv`N97Zf5|^P z+;^~kz2JYTE7*--3p%`@X$KnGKvp+keMf2sXafpOuv;TU9$@uBHc!ac2C+}ba)Q(Y zsRgDuAoO*V`9O&cksFFJfbR|H)CK2#L4!Cz=nDeh#k}x526(jKU*Z78K5Bj({^|QG z_^0lVbF8sHxqqe3pPD~39Da{kzEA9ZJ?=-HPu|n3zCUVy@ck**r@TLl`F)O`^}Md8 zPq82Bcsucah?mt@ChW%;zr=lv>B;B4e=o6qgWzA*T>}@M8<&4A+5#UV1WthW6W*ih zhJ9S1W&;HOv9B<406BMz17J_%OyXbzdTNC8d;sbKhG~JE8OZC#bih^7|7#2&b*1n* z?Ofnw4FG&M?HAxUfW*k_&iIEpjQ`*7{C|#r&h%!!)cX8dn!lgtn>2s$GOgJp_m9|5 zzTbL4UEDvx_Y?b3^LM;I#r%nrc4B^sW4kqdmgDQj`sB%i{lND%5AQH#KA#*s;6D0# zkJB2?ZxsC7+8U1dz4fy_*S(I~1O5+ZYob;_JCI@%+OGI{d_2&!0X{bf+lib&(gMr| zkOSDhMKulpu2#kYw?+qFh0}Rpu;roghAmabAk;2yjv0>nii1TfJt!x;y0NaqB=mMCLq%SYjZ-V1Gsu1<^qL(rg_0>K2Uc) z2=jntEs%==WHtyi@^xo?eEg<8=M?|Q0Sw{(m7IT_`wJJN?w8Hh$iAW5oUhpC5UB z^6xp`HwpgJy4(Hi@6UfO?1i%@$rg|g;0Qawyn)$(ZamOu1E2*2J%Cz(*8^E27Yl*@SoC`tGea+qzqheT&cQ*og4jSiwwuCHxmN{pkC__p*8)*WbrFzu4QC=lklK zpXvQI-fw>2^M2P)ORdl6_gdHot`z^l=c>=I;~xIqR*Cgn1p7mm`&+RIi5t)k+!XYL zV1uF^2=6oF0x6EkY6YAg;OhfAdBLa$LJO$p1g%fV=L3ZY6g2_WTma{Vl`%k(2f#HI z_YGklfI47S4+wp#XHKwa+ueQu#sJ{|bL#(3BL1OuP9OWHncfy}#XbB=-A{XbIm;XV z|M)QbjdlLOe!xF@zxV?`$CJ8WJ>NIU_lG=xQPW@7`w;Il=J$L*`F*X&>EIriGwsfJ zzWMqb^Ie>86YOu>Heum89UBpBL1qW=KgAZrxPW$mF@fm9=M1BHZ*Dh zp$D!z^AqDY%^Ck+?eO2K_s?hi4CDUoKEG7!Pu;(t`&+)hi~W%E>(%_2>kGUeIc&%K zeGgyK`vmu~H@Aj;ukTlUe$?|yUvIcqtOqO`=5H134_VGW@b5EwKwH500ydzq15uAG zHGzr^i2a13fkYeNY6PhTWE>#n1E>Qa7VB~V;6K=aqAobg1(Y0Mhr|H&et`ML|3R7d zmumj}jIWgUBlqvc9X|GzBXU)1^7Y+jqCpEN(y`waV>(c%1l z(D_q-Zvys#H^qOzzQ*$f?$ysHyl)rmZ{K#?fR+9x*^34z$oPQSftWX(WCNr=z+wU6 z0WJ>kHh^_OIFHH$oDJyH0c!a`)&p2waF`RK902_Qp(h~A1yTnz4aw>OdSZY$BhY$+ z83Rn02e5ta=hy#x{QG{NI{t^{{kVrG`ZYPpy=9^Yb?Z>S4_F4WZh==+6VQuFK7 z_-1oFyxw2b_hm7^VE{V+5bUd;4Y-e*Uci3N?G3|Syl)o#4_UdL$m{^@O5_9N0@Mde z8vuRK=LU+rfUgmv4Tv1T^$T_IkGi0tI)HJ2EEgcN!qorcyRXIowfP{87@+72=3D@3 zfYagsi>i^$C;s;m-2?cy*q^$8XTEQi_bcc7q2@Q${7lXL8TZ59Gxpa$AIJTv`*k#b z(fwV%zpMGDd|y-Zi`X~cpKJZ5mnWS+=lqDXyFUMB#bS!-;p0m`@9}=SJT7Z{IF>)- zcU}H(k}b&WK!^=YPZT>4^97|1@Ol99gJuJm+srutVxl|_Ff9tYPDe){dfMPZvYk?UTuRHS}=P>@SsP(M74titKWO|O z`&*Hp^FALqj9foqHTijmdEkENinaiM4||Ya+uMPZPk;^ZJb<-Bm` zd;swuYXa)I0IdgLE>ONZtJe-1fYt&-njJ*8DN{Pc=W#{!888`F`0;nss61{;K&yGot?|{+;hn+`onW zO1`h*`<3^Hy1vZ!EB1rVA7lLj`w{ov=hJuBv3}3?E#rIck%ePr!9Ltmvk@_FFgxIF zKo$>VHo)Zwvp9e}An1V_2juyH4*rW?Va^Py_XryYuo(fNUjx|pHNmZVKiuQbM^C-~(sX~hZs0$ZOCsmz>-;C>`;+fadVi|%o5=eseqVEY-tPzOC%zwWU%_s~ zJaB){9X%_z4IQqaoPhSl+krw8$ZP<3fX@xOoIoun=r}-43$%P-=n>9xft(?ZJb={$ z4ao%#iUDFC&^SOD1C)8d%VZ9{%>}5;1!4?valH;;{_*en{RZGapYxMqf2;Ww?XT1C zGr8u6+#lb)`hV8^h=0g({`CD--LK~ROWn`c`FXwHxPR{Z!TXDtKl**q`K#C`C$D2a z;of8Z&hfqX*t#bi`8$reeMt5ovjx!(v~7T|7fLw+#sahft}Z}wAaRxuZ9volSr-T$ zAj}P8E`U0K)CFg`zzH!xnFr8%VCDhn|8+J<$^+y*fz!+cnA{IY4d79U|2XR-<^G1o z{=`4IKfa@^*S8h>_x1VqaQ{BtPxsu7{XKr3N6YtHollqd=e0e=K4NXg`Y9d<_H(Y^ z#eKm1y>h(w`#lSfC)<#0fyD~jC0>YjV1NxUO|Yg1690`jpo9OE3-Gf;%NU^B7v$>! zD;fZ5g0&vl=K|)DZKa<}yAW(f z#tC2t^8c5#LDmg07HH^!u1>h41(E|aYJyTd05|~W1Thzg`2dLnWHA7Bz$t5iI3t)E zfXxL#9He^KWX<9_j(?Agh9e;fOy&sRV0d_8a;*L_>xRbAfT+a^?bhVgTYF-(k`LD;xm+zflK( zYe)6t*b`_q07(O63;_R+Rn7#Nwim#B<5~0Q|4)Yh|4QNims0oR`+YIvmvuiYkAKzv zME6hHpU?aC)%qgtXY7xhw)ubJKa2ab+WtE4H{b7Tdh&R`5BnMBORV2_x8R?vum!z}=WLf~?fR+~MbpSU$4@{0S@+9*J+0@5_)pwFY5zs+FZmnw|IGVq%}?Iz zuX#W8hgP*eU_aITY3wi8nD+Sg$=Nu7Y%0M~!y@&Qc_;Bo;&bU@Yv>)h~|2auXT zx0i702M`XRbKBtm!``7f*U{#JsQ(u|D4z|;xj=pHsS_z)M6UTj0N}`vs>;xxcdRH`o58_Fuj$?fFyf zpZG_Q$EjNH9rJ!t2cFOQLETTE_ScI2t=3lGo4W#}b`prsdfKdYoy+EoxRb~Q4{$Jz(m;;Ra0^~VBX#X4L znEzMtuem?T`(NVs`RMWgou1!z&tIwiF9-iw-hWE`*SWuF|9bs1=Lg?^lg;fR{+qtv zVsGT}03Qojjew5g;yclvK;!}aY_MVY*ZJUOKQQZnI`aUCf6D`E9V#?H_5m;lxUs;0)Bte3 z*J}XG|9@)k^8crt{6YE5(En<=|5W!g(f?=o_u7A&_hGX>eck`S+<#g3a)Bu?SWF0`%0BiWiT%cjSK-D@x z*9VYl0N4k>93XQ5$N`2~K$5o}QUh>Fr~y*{ul#?0@!zfe$#egC-M{Fkat>AhXYQZ# zz9jFWxxZH3KkNQr!^5mU`v1W_e^L88PkjpZ^htYpf%^ykFWR4<>pNBKPyCbn3r4ig zPiy_i`-ARJ?jLG=GVDXEkG{W#ePaH;@q-U`*8>mSCw2h~&ohotw*f^Q;OYVxn^+IA z(*a!!z+B+e8bIs`OtXQ^|6iOY`hZdm(ER%Uy{rB|#r_TcAMqdbzb5{R+#m5@$^9w*F$*@-{imKk)Bd_W zKVJK9<^8GsS&#U1-!~VA9zm@CvW8d3@WCP@H91k#E z5O&4K0ptLv4JvB_z*o940P!Dd0XiB$nh{oVfKE>!IY6cX!vA;c096CvY~W@NAmjj5 z17sbr)Bwq~q;tVz{6Dl02>u^yXdjTo|BpQXf8>z;Kgj<}|6k&t{-3%3-2bQCKkNQG zxqqksWx4;pnIGy8lJ-9l|F^Vz{k7&-`c_Q0LQiWk<41CThW|#d&xF`NasQUypZk7` z^MQZw^Y0f9P+Q?Sjt9h8Ad3Tp1BkteIsiEUV}ME?zSaL38zOmKHixw0ObGZS?%Nh$^T+S91{te&XkNpS42KYsLU^XD}fTRWFIRU2w zl=#PcEi^#GKQ#c<0famt@h^3d)6@cI8er!Chv6TbFYXVf_(u;=tq;ig|Ecf~4Z!d} zWB(8Q|AH*+$%VbSut(RdZg}kzf z`=I~darpOn|AzKA82?H0BmNV=C&q#M2X#FZj>#6_IkW+$1vEK;s|V=j0l@z&dB7eG za3%3y%>j`AL+1tl>HpaWSnUHCj{mX-VEQ=#OZNYBkAH9~ z2ckA$3jVLPf5USB8vDojzM^kK`=|e}*8PMzzGeJBHU6RZTij1P8|OFdKlIRp<7}xN zz+P;{fEpn2PyY`N6f}Sg|2Suy3p7OyKsgU6s{tVX zOa4C*|L6mn+y~4W;O>3^L*sw=Mb!UHrT;$y{r|&=|Cj#X_5H4d|JV3G)V4?d&z_&i z|7-pq+CNsnk>DS_x6s+{Mc%)Y_X8%A?q@lEANK>-#`lT)@%SOJ0a#`Wj1$lXB>aO% zFc&Zh|9K7o@4bP4$#;}FfDHdQXVcVK>O?Dham zt^pp3fB65eJW}xg^AZRAw`V?cbi~s1zhS*Uk4pYu`uK;8etcCa2` zsQ*WPLODQ*{{{aa)B`vr{|EkmQx<$NG&yKZNvf!KHqi1!)YyY6EACvVnvVK+Yf5?u&|09+E=k-4|{Xffi_C>o*YIz7fYkeE{=c3F z6fXb&s0I3?`TJBOK+O-_-{<}Ma{pmAD78P-Dirm9#6PwFy#62czhM@5ssFoKz{BeQ zEA@Xe1EBQ(3IC}54fVgA0mgj*P&*d$f0Oxtng2)bYWn*Bsq%j(3I_j!tX*UQlfdSz zZ#&69K%F7<#-9`Y|DZ|uf8A?G#^W#D7oyKduE` zW&f|6|DpZAocqiEU+MqDeiZ2c{nzaOLmhr;1CkD~U2)7>pgb0Eb|B;l)AJJcq5Wrh zf7BbZ_K&__>Jlbuf707P{I_y{u#svnHTN%bz_QvO%mC{4|D_p#A^)%a|C|AW+<%FG z*8ZUeIPd>!_5bGmKcWABLjAwte@OqIumAtb_{_DR7{B55qw#L98I6IR+S+6H(fGN~ z9jy`!LiB${}BJ#{10dV)6f4|GXABnESvqy{(tfRHv6xx0Wb$h{;xH_tO1NQz-9x6XaJbE z7xMsuKby_V{(to5xi~=LcINJr9za{*?LoND<@&%O%?fpgnkNKaN*E+@gpPnAT`uyMe3;@ig#Qe|tzCXrmX#+A2z&zl@ z86f8WJqM`g0&*LW&jF%6fRCr2=aG-ev4Q=_`%!;D-`|P*;s0BEfk6B7^Z$nR{xIx4q?w=_a5_p)5QMR z3)IeA9{=~x|14|&d+{&dIc7g&28{OpS9|}k2OxTXYV$vO_5ka}zxjUc{V(C0hHmN z{y)qCfJPSQ|7ZBm=YZAY0O$dw9++%^<^~kw;P$}=#NWa5x$rlWurE3Ps_*a4{XtxV z`KIRkz5f>-gxY@@`)`5$47ODF0tUab*uOUax0?I!Xn)i6{u2Lb{(rd#@Z{c~(*HaB zUmWKD2LHcN;^Sk#^X^{V4_E@$e*49vhXi}ill5<8?TEae{G03sdxLs|AKP`;(RU{4 z6^Liv{jO(?FTCIY?Qx3yU(P|R2Eb+jNe)GG05(@qXRJ<|0anfcDr*2_&MP$ls{v^3 z|AYNMx*8y^GiXgqnE`4XfOPUEl}ftk_W&J(0A*Rn9l7% z()Ru1$>(Q&-^cued_QLX^!fho+@FbkKccs)_UE|2`TlbMpYy`(@7n%9!T-ztf9C%= z`z!SQmAStd``2fG)qDS^+y4i({~z4zdFm^H^VS0Xf8pmx7Xy2;cC_!8f2p^~0iO88 z(VHe|0QfB60AKvVA4^X38y53~7@!~jhw2;^G04;1lrsRe|(K(zsu6No&Z z*A7%LAGp5Z9{U92^<2D}J{GINHb5U~tEbpIk ze?#Yf4Z;7U*gw<$OZ>|mA>-}0uN z$Cw9t`K8|s=K%P}g8qkdG&~2uJpj1}5bS{N1GLl}K&%5kTe%}@M-u+S9Ply^C_EH= zbkIHkaTbthfm#dfVgStrrhb4f2Otly+1i$8tMh;|9$=1;$7l-z-s3FKu$KsZzU}+X zJYV7Wr@;QOxF5P-d+$GDA2nB)y$=4ROZWY#)crB`XWtL!ehk$9sr_}ef6V^J-V>F* ze~5qW`**QF@sDd<_x>>a!w;^Lc|Y%Z=QAhze#O7|&s}#NeG8bAwIenE4)DVrhy$QM zJXc_(QBB4#?^OG!E#u0Z9|Icp$d}tQBBR{6}XZwS0fj{%lXh#@^q}nLmjAechjN|5W!oMEi&CkGVfm z`z!H3wLk9tSH*v?_MiMeY`_(leM|h{hsN)G`!goS{EB_?{eSR3M-M&jaid=im;?46 z4mWU+$36Y&qff)XPvQXhEPUSQKlexD_2(ZT=K{789LYGqq#WRq=K!zdK7dUPpwTP|E&AZ_x|>? zzsddK*ACWe{ku}{^VKiEYP{#`A2Z1B12eme?|=5QwdM~o_rEspFYONA&(oj(pGUV3 z;Q;tNh!;@j@XohCP0q!u$7iqGS9F0xWX9*=5?9H>@$w^tXB{c)sPWYj?_Yh@x5XB( z(tTL3z4}s7n<~W}$YqiY!K1H-$SvQ;}-}&j1<69$j{%1&@ z4@>fWo7RbsI7`;qGVA*sS?9_%1de~6==%Z+H;rCMSzxiUxL1A4Y zIsOfzQ*D$bxxOt}mr0KQa#>eMJbRV+wyPz#CF}dr^QY@N@r78|i@tY*=;Aj@o=^7t zyL*%9d!qGkyG3+8F46k73(w!aS+q7;_iPs2-Y!^``G2zRyY~)Rcgm95-}|tj^_$** zzr^zQ-+#AT+hi{f^abLGegHZ4EIcnQz7BN){vXsFMBgWJzvux$9%zkRD_F=8+4X~K z={mWlaBb-|hHFi(Ib3_V2C*kFuF0#fI!3QgT&uWdaqZeQY}fMTmmWGk@7(=G=X;~% z_@ARb9~b}@CM{s<#V;D&Cm2J$|HBRYv;{l;;17=e?Dt>NUk_A%R&W8>fj92?*zt$< z{^jw3U-*;p5r_T7__$;LW_;oadyh{#;e%m)XnfL%AC_LCzt^?;qQ9VMJq7C@$ETk9$?@r@eM;8}PB3=cET?e_LmKe!T9C18kis$1}em>%j54bq9{mTK7d+Uy^mu`0TT=zAWo2 z<8yRTCDxYXUKPo?{}@P&&u~M-?=PY18Yu` zYeB9FyFPG@d`hnwy>@W@;98Pv>Xg-Xec844QM>l!`a4;!L%SwFEZ3=Aqq=a-?ma&K z*uNVeb>ut82Yu<~TZsaOa=)@ zFL}WWMmIzMz-L@sCyR5ixCYWSKz=NB^joE$X3xREdSpHENir|^gTMPbqwhl(fE`$9 z1L+!A;Nj7AF2IF>0r%$@ml}$myX82``e@2h3oGlfd7ek2Yu!9Jx*qryyT~TYIMM_{HM_k zzxO{zWAqV&8^9(m*5Vw@(>3t2KUY6L{`FrQZI;=c-`IKQ(aQuAn5DhA*%^4|V;?*E zuTOpI=w(0tl+oLyj_BR8-n&?fbMOeh2Hx}3Cy(APeD_s5KVkG+a+&Wc>)~3zyRbux zGq5-Vi!-n|1B)}TI0K6_us8#YGq5-Vi!-n|1B)}TI0K6_us8#YGq5-Vi!-n|1B)}T zI0K6_us8#YGq5-Vi!-n|1B)}TI0K6_us8#YGq5-Vi!-n|1B)}TI0KJ>GcaD<7H43> z8TfwvI{p#N z{eoxxc>CYm1+;j)eFkuSv>#oZzs31kTptU3w9mi-9}9f6ua1TPSm0xUj|D#3XJCPk z1wPtW$HIRs@Ug(h0w3)&u)xOxAMLAS;XfAmn16ii)ds*0(ce#-@c(-rAC!N%uKoIh z@*|V-FU!dP{50@2DZex+Kh-0^e;$6H z`Vh$e;Ni!E9}54$L(e=MhCdNyrd;;yb1y#@zTnv>AA30L`?D`w_E6}Pk3IR!g3I`? zA3XC=NbrKI{`o5YZ;Fenja%PG(~v2e%KHnZzo4csJ^NH}=)+GuwBTyV3x9lhsy`Zk z`@!>1Jp9~(ho1=jQ~0yg{>f#_o(+G_BOtY&{3r1j-=<^-;~(3sDAunOW!77Ya?<09 za`v5ya_QBIay4vwYmlNWbx9lGJL&p!=@@vA* zGOqDUp?)C@{hN&+c|FH;J=b)7lIi*sToc`wjGsW$J+m+1&-71SEPi|B1~2tV-@mDE z(Eqz2%53FLB?S~ufAjAvH}8#(-n)6_M{mO8u8T~_sp*=S=&H#{jQokB{CIEt@rt&A zfwqd{@q4dRlxug!q!m^-H`f%V#q7KmmfI8&pOIf&oSzXNvFS&O^23!|4ke~#A5Tw= z_|3W>D#{ffy{cTL{OF&RJJvt_$(4%oy*GZZeDX7G&5zD{_TIO?rzjUbef7DYUvb%* z1wXpxw@+TADCY*B@uN5A{QR%VZ%?`RL0IoQcPT%ANVzLq`GfMuTjA+*u3qq~-(G&t z9hcwvy@glLfpZTBoqOeVH!NIu!*y4l7o;puZ2$hFJ8!@JwtrLq?M6no}L`{h@!k#S>N6_>U56wwbxg^uPE$yp>y)MW zD|6slucy7BB;BPQTex7&D;Y5Pt0}i$QhNQ5E3WwEtCrhnhG@$S8A@7{L*{@pJs%EQ}s?TOsCfB(M7J-fC+ z{)>LLaN*A^KT{XFFIG-bY;9Q+4C;bCI?iX8Ux4K_~|F~5xNOK!w+JDIZ)EM&r6NdVHV<_WSz^{yJDKCb4 zz#o18763n)8R|1beP*bC^H86K?0(IU{s)b9D*cRqvyoK!Khs#B?Kl2sFCxTDTAxYl zGiiOQw7vkNgMJu(g0A_Ir;`7#O6y?D|4miF)02Sl@8t81iv{>iKS=8*_4Tz?Wrb)O zyKm>lm+0=ZUvJ$TbvQXar=Ym3qN1$0ASWaF@WH)XzkZ(XKDBD&j)>^^&-hO zqGFRWl3DWbk+jsc>Qsb}>m#;{*+w)soy6sr{fUfuK+mX;zvi!qkxI4b| z-4BAGE4b=M&McRYXN+7-(`d^ZGlNB{WxyMOD{_3DF#I@5=$4%O&-?W%*(E8cyb z_rpCezV^OTAOCVy1gNfGe?-?SzI5s9UVHIgVtnV5FRak@#*ddD()F^hzRJ?|v@N>p zjTfH$9o@b4p~v^?dd7QyIj-voAAbzW+hg_iXC8m(w{-XC4~A#x`i@uMsMGaz@4Y*q zyWh{%k3SOr;4O6b`g;~P>iRp+yfCUeUjOs^y59G6o4zyn{(EkuyFdEvJ$8L0{7-M{ zdh^pyuh#X#$DR5+_x|p;*U;VXFMLwhOBX-5O4rkuEsN6ieededdvE{sO_$T%^R8L0 z>-+EgeYCD`y#M|@UH{8^cf+r5y8fDfqPuf1x%TGY-13WG|L)%Vet*|pfB56Qx83^d zo3Fq22bY`zpBEu-L=Ixqa*AjqBH~{UqcfJhnIAxvsRLrlF<1yKiV@!e*No8S3k9Z)vEhC|&cs`Q`$hYR?7Tt%s$s-N);QGA z)L2tpQSWx;XYUgP_GD(~HMv~vLvG#K(mbK-qm^a#1Krt~5p;j-zFp~A+14fcy_I>cjx7`>UvX&?n+Be+$QdCI-FRk>n$0_+H}4C z_;JuLt<^`4Bp%)(?r(^VYr%j^P3wo}XQV@eW9@odLR{=dasTV+7zBazBguA9O~oi1 zKQgSB9g2xwC+@G=7mY}9I6h6+2a}QtbiFgtrDsG(?OQAEuiA@o-W?N*JI#j=SL=FJ zhVF{qAGvq6xc}9zTttim2Qi|HV`AEMJ*!wBh}^qp*9vj}i*36j_C-cSMn%UQjKY7S zF)4t%JGOmE_s8!2c=Oh6JK+3#*9959A*UpIW_f^OkMfckJA? zYqz?a?Hb*=WBax(n>Vh0`scXswB8mHIxo~2a=R7d)jfOOym{`El$-e|TX@U7TU-li zlnpHi`_wzgz-QPq&X$MqVRqO0vMP9SM^FFIC=IjGq5hr@_;FR)rr(22^WsD0bicFL zGt35hJL!IT-2FcH2jnpHyB}+&Z#U65+Q(v;Hng^P^$re?j*X8F5B7Gow>DrH8~0;M zE2oHC{yLx(id%8N?Tbt^uF$`nq{_x_c!V+Y$hDMCCPS!cp(caeD)PR&$QW#Buk;VJ- z3rowZYU&|`rFEpa0r{@Hv@k!4?hY;9ms3!L+Yq6(3B!yvkJi;xHMm`cxd#OQi0s^g z7MH7&MpBFG8-Rdoh5|@xfq3l{qBs+>`{!e8a~SU z>fr1Bg*CN>nHhTogPo}vnHXjHg;k(kT#R(loA1_(Gg5bn``eRK;J|}9x%C)-1qB#O zEoHhZBPDr0!C(edIrW)ab_ko$m-DB zk`m%Ji~H+i;|4MEj;1(3H66Kc{OGV=aTude++P(cEBi5P!F$;pMf-j(dqGY`c? ztrz##>^lm<;^J_pH8BxmsX9w{9Xfbm-&%2h<=%WDqJ)#+d6~w|=JX za{mz}t4l4@&(dv`n4&sqn65@KNqqVUU5%YCug3rPUk#E*BhcgITwF~QVES8otRH4@ z4m_d34$jja*&RDC*16}=)5$`XGtQoOw(AUwav>GO3ydFz-&EoMS5Mr5E&n%Fg+E)~ z#QO%_xL$zY^n(ij9Qm+s{Oz2J@%Nbuf2P8psqkki{Fw@Wrox}8@MkLgnF@cV!v9}i z>;I=!__I*q|Chh7qpF<#O;zI4-;95kHW?QS@SA>6;$J$3-}G;R`1vlqc$g{iXG;8; z5`U(|pDFQYO8l7;f2PErDe-4Y{QrNK_*bDQrXN0uf9+5IM@{0>qJHDw)oI3slou8L z)fK*fySjt^{|zkY9z6Z5v(G*M{0kObbkW6^e)qeVSuSJW9lrG9i!NGl!TIN(d-hrL zuoS=Yx)po&#bEtPc791^Z4<2z7{bf{uJ)$d%94WY^rP`H`}VB59!nuAS8d(BFDBt= zdUio+CDx?(3=EHs4iEHTWqMU9ArpYyx&{v^<)?1kwr4*H1=~LNOt3Rb6vO&%ns&=*U1%M{~U)ZY;#zAn0z`x;ru^p0CuesBLWP>KPau9O&t4 zYpku{EBE7LB6l0uQ`ggyow%gb%v`J&s%yf6h@KuShG@b&=Azup)TFqB5xX|8Bf7n( zuG_R@?}0-JM>AmEikkYSRxEw!#S7b-ijsURk54>wAY#X+uLXAPhHVh|a8g=kP9a_r z*EKY$O}%*aTTxn=lbM!u7}njkaShq5_te$vNC*&4&&tKRi^}R+wYImW600q8v(OQT zq9S%~Ubl*4PhPop({>P!JCc%qoYq*BSEv=e<+QTmczWs)BD;Ol+7$x(e zPba!_KU)6P>h+s<02~vSkd&N~ny#kzrllm~6>aQ+$e5C%f(>6S|A=E}{q5t=R;*dS zY1^)deFtL>AImTHr=PFr!ChD5sIKV__pwqJeh>BU z+`4JY<_)Vp`SAU>PbR{FuLJuDR!_~+-D7%yJ{-5<>s4QU_Q{7Iy!{#hCM>VK{`LnS zZpMNWyQ?Sx@3K|hVJ}Lq#_!={YgT@-{9{00e|ZjJtT16noYp=cWRvKM(eDJ~h?>FLVsQ9&Q=$q#s zeJJF?`#AdUKRgf|OPJPYUwrw`wYYK@zRBKny)}W$;kXy^y1ngB;lU65;cf&e`|ZD5 zw78tm#~*+4ndirGWsiB~m6w;3iff+4%AEAz2Ny5;-5rGe^=)_EJwk}chaP_HT@ruo zbI&}pi=eMPGOp{J@4xr%yKeh6ppJ#N-1;a1K3*CUPUdpI@)&!}oq@*A!SFZnYvSd5 zes}w=w=5*^Pj0-4_}Q2J;r<7Y;-0%dObv6l;A*Jw!Hrnf@Vh&1{pC$J{DiPSy!QGi z+_3fE`TM&UapejgWDmMRoP>^Cl!T=USgQ83>#zMG(3KZoe$^LPd9(UAzrFp===k`A zkM3Et=%Iv!`1t4VTp6+VJ>agp>hg<$>^t*;;!z`NM0l{%s)^z}@mo%P;$H`NhpQExhsi z>#q6Xm6t9!`y_@uF~Cllf59c+yYh!uU;C4vUVp<4H)=Ny-f;a-ue(pmjs4)t%f5HX1?SG2rM$tuuY@VCpZ+0uL9pw- zpTZT!eia<@^s~=B_x$rOuou)9wU^k-+Dqz->I>|7_MH9Mr$d5o#R9$3%YxrKUS3&K zPpiwid-?~lf{hmUU=`b7KgHhW#(D(5><@xzDPZYsVYww(tA?d!1{y1Y>~^dL8Xv-C` zdiqH433Z}}R`&JbV<{{u;#w9;N%-QqT0?L$A*DkyASns4YpW};NXUfdYVzfLkYxyX ztX6_Wy2yYec3H6pT~JucMQLh z-95d1YF|%pPdAnf@}H}>FVkz%F90f3^_^9Jx^sGhaiQ;uu3Ipx3o1js0}^!jctuJHI)@*SjXhb%Q>FO z(PQ^KmBHa%4(1r{tr>9Xy1To!wv~e6z(j3%S!u7X4+EZI;Gay-$jZ*iqhRkEu;~^o zj_h_LlMRlSl@ym07v&$%%1D>kv@}r1qDJI02Xfg^Z6S2Z0U|B!X%cq)o+puyGcrpk zyt%sS+j%x~bvE|l4?`^ldAZr(oR*qmpr1%i0ZE>6+!Y0dg%oXF_Nro5>~>LrYssy5 zIL1IdC0VveI&w5Mtra(IV<;)|sfD|zkQKTLc#Hb%4y>_EOFeod=_w)66A6iE(nl@2 zk7wuPP^NS@<*__>5eZeAg@s=H*6LD%NeglM~|OVq;{3=;+u(6#Z>O$wyOC`l&%@nwsXO zG%#3q6sdU_I|p(O79?U>UGt&XnCPejPY977 zi`*A=&<2>j@^E}Y22k#TBkYK~obag$sE0~oq7O#xi{yCQJ&)~;*q2B;XCI0?Tuktk zM3%^tm~-MVtQgTxt*>9TQ8L+&vzr#<_=ZHP9Jbtm_Vr-n;v8 z63BMXqdRx);f^tW;NZb%p>ZsWb;VIP4^%}%ZV7D?!d1?R&Lm|dCOM5 zmUZ{;J#3E^D0TM`hi%=mdDDhfp|=w9+~7w)TMw)N@7T$9T6d{C*^Z&@+qP~Y-1^TR z4ZZ*g&T(UKaA?>CVUEz?;GZEax-P!?_64`QZn*@HacPt?uU{OxAk?+^dfc1%b!gbL z&%f}(i|j@AlKS$jSJ=xcQYL%B{`|9H;7jQ;^n+|X5;Wj(qZ84XR-*EBiPflx$+Tj$ zp|-j*=dYnWUEaPdA5n_XZ5(Y%I|%okeEldwVZop7gifW4o}~yVX!jrij*i$D}DYC)1i5~932|;pb^1exNkc2RJ0|!#3c8?Ak_pS%~Rs`$X#MW+lqi*z|UHa5!BB`Jwzvy|qGC7MbIdSave*V{YbNtakA z&jqQgu0&D5(Y=eFMgD^cAqwxGzWzbWU@xtx2VqYle%`Q1VG(5oBF(|QeYE(#4_~-+ ziqs0iWkrP?-LvRfO2kAOML`>KA9AbO%X<2-N*DAycc>k#y|1mcrKzF5u9~OTf;^7yU-V2a z9v&eJrPhX~=2o?pwP~&W%>=E1olEghnJX#hAZt^qtRl&gxSLzl7S`O?*gz6j)pmDv zmLLl~M;7T>^bC?2k|k&(S=QCJwly_q4ZY26&B&}(6_hNIvX67{*rI2$cxvUzvae>4 zl4WmweVa&@_0SK?gNO5SvNH|*)5kK2JWrN`JXyB(P_lH6BeAm5@sdJdk4bO_)-vbd zflg-2`eJBB)G{)jC1J-GJ)M>Tie(fQ-CYfxMz(C?*|N2;09t`udYVM1q=6()1MVt3 zdy^P8dv#eU9zBsPTk{)`EHjT~q@@@QoHnC&>m0NXRKXanz%Z zC?r+N^73*?%0X9lCX!_}!4o)sV$qY7Tq(WS3-QHM6;N(hj+*0c$CY#RSPp)T=Ot5i zeVQafgE(YY`bx>>N=JI7rzH1irkd%dXV1Z|^m6psQA)0HvOz2|Qav>oMx}s{CD5Rg zk}J=Z^{LJ1(nKWJL$OAKCl1COitD8Y?MIT6v922p>e5)6I}c(G79}HBwu1bjn1fFV zk)A+ujpZs=Cng=q1j=1>lpS?f5nd?fPa6QMj${%dGx__7O@Il@CjZ$d6~UUJJFcmC6z zu3IkUiSjzd9T0dypgRDMwa(dpZPyp*&d!hKA|<+JXX%&duH3m`=YB)~uI_%17IIrl zaOtYFVsX6dv>v)1IE}8)?xX9WbLjfqetv!4%=Lev9fbY4&CjVOzV+vl{mA2w4oZhhoV~)Pu*Xe^lyLw^pM{B;m;A{JukAg2$sl;Nl&bj2$ z3odnBa?UKKgwPbZoI|HM|5=#x{~xui2Rn?&f49aNS5jb1`rn%C`?p(b>HlXY{Ws1` z`eS0acV^OmX3~FV(tl>se`eBuX40R}G|;@n%%nff3eHUW&rJHyO#07E`r|2aX3~FV z(!X|Q(tl>se`eBuX3~FV(tl>se`eDEzw?F1f5oIf&Gpj{|Ih3n{rJCO^IhUj|EBtw z*Vqf3ZU;xwe}$l2V$AvnXDh7o75u#KdoKhVbpLs+qI`9qqU^&yKgaPQaL>;bW_?Iu z3sx)a=l`Ox`|}j`^m__>Z@0p>99G!DFoh-IcNXpYue@QsSZT6cbOgc%484hsPe@F{OJ%I{ zO3zR(Q0jlTdiM@2jiF^Xc;yEi7Al#6IG3Py&I>!YVsQ@NDsdXrxk}BPw|8vXBDRe4 zCFW(S0QS=Mjhp0Z9KO_N-+r)+HOr=)qg3C$ZKDrv3ei%Ovz4lIR|6*r?%6Y?4jdT1 zb%O*>RyF}vfl*R%Vn*fc&o>yz9sXwVYC_2Nf7!HNBKf){$sFsVMtq@_4{YI3V-=F2 z_W>*;GwaRSKPQO#BBQ24zP^!Ynr(@V|NU*rX};x$jEI=r(L(vR{~@Hu@YLaxdIWGLSfY!asZw%?;T2(x;PXd_+b01v&%Cqlj3bXb!`sa82Cb1WhB} zOr^XL+5M4X`@zYhGx?}Me!YIZ*XyyCk|U8V%sp=)!QMQMy+i)cA>@?=$}1eX{%b?f zZCFn-+4e$EUnJa#kW&o#q+}qg-dMl(Ymf4tRiYH`j1U@6Y3&Vl3%2{4cg9&~{F7qS zF1Yr$i>xr+VVUo8m_4L(2kY{e1yFH6Jmb;DVTWB#4t zA;A|0&sUrmE)EO6IQUG(dC`*4;7fwPgNp}4gD(x9r#R1892z_;*s=71#Sbi*zvK+X z8E{)za8U5XlEn|bwPsHY7SO7PS-h6O5?PY_NV1mfKB}CqxULHi4h$Y$y7;LL8Q2Z9 zva*_0t2OT0I#$Q(Sp&B1Y+}u<#odYnot*QRgaii!k1q*Xk&9(_rDbXvD`yp~l2vOp ztd`ZOXyI-I(sfHXfVKx8jw&xL!D2Vf56J3O0-mZk&s-W3Yz-b=@<>8?adAn>6hws> zpm}3BA(w_mmzj`ig-*>g+|F>H}-a3Qbrksu}9KFcefr9(*pZ*cYl+x~pr1 zT9T~mrzFA11KZ0AeX(8(5L9hERdM}WSgjmhf6$gZ(m!`*Y3J{VHJTskIX$1lHd>LMpSFmT4!B-Jr<5R8=FX9 z*SXW+Np_3$J~w^pAn2b6FZ)RSR@5b0xb`V07G)9`R+&;yv(3G7d>gb!j_FKjCjAeA zy>k4glm1q6{NrvyA%r4>^pff1Hio7tNKNO3G=$vK4GB(uLOQKy$ekE(7kC=dCmy0M zPTomyRycJ8Q0uvi!wJ6`OLk$6-w5@LG#_lSp9rcaVv9lSB^&PxHKMC|987^#OVu(J ziGWAqX|bvZY?A|_TJLOVY^1PYm;pklQ<_ZW5gv(STC7^(tgJT2?GuB|gG+6YsyRa9 ziwj^fwb)4s$v+NLmh**@v~J%#XYzg%wy?lQGu4zksVAsJa9ZYl=ZyKHM)>i+{UJEy ztPox!+#9wmJS_C=P}B(L|2lYCcuM!0KPSm=eJR3khP9(r*o)d<1i zp_hbGjj%L4^wLnO5$+EUofYZ`S@OV=;Q7H+Bis=l8WcJn{J_KStc{309IwW+1T|4j zvKswrpS1IuNhfuXjL2cFrKSy8Q4GoI&Jy@o7Db%EA;I?96Ju+V_e!Qik}`B=GM zR!&){vWitPA2@1_UoHdAvGmdCO0Q9YQawZw@0+PAI4dM9)Ea6Fek`fdToG`JB719; zI|=K$aT%Z!A>pwVKGlPXyG3A6L0#|%PIt-I6(}`4<%?>8{F~BrOd_^+EkH8kSg6=rM$UkEn1?rST1QvtmEzXMcR+g_q8J=>o-Jy=`@q zey)C=;yn4eCjA2a9L0Ik+XMOn{cOc`Tbq87ewN~T#jRhgpQ*S`+M-{oe@AiQ*n#ir z^Awl5UH_hr1g^cKU#27XYJbqbuj3hBo!^5C!YaUJxX z@BQmuajnie^ZN@U1HwJ>UEH*=JpGv&7T&KBMjbmFqX~(Z|^5 zUwrx1S1VSoT(z35?q9QZEvCLOPqh(W5pTtu7TeBt4AK!g^p!GZHR%&9ZNGXzjj~49 z9LvNPdi2qJ0X`lqMzMsduCj`1jyil+*+e@Ww6k^>_FQ1yBRD;#k8jj4G&IbH)e-mT z2peg_yg9zY^C3jhma0P#VzndqoU@UW;0qHP>t`{aD7aW2SNFbzmBX(tXi;hWLX0==k`AjoDN?ORt@RJ&nE^ zVLftxjN)D|0j~ZiZ|}BaTM(B1?ldG22}~BiRxQGU+yEeeO#>)X1WyogTm&M+5sabWGMsUW zu~16UyvK;b=pN z`;sZ3sH58WiKAhY&uH?rm%-FB3j0kON~PtjyvuJe8GYc1#^?qzqBdHTDhOopw4To= z_B{#F8xN?x=mLaenbe0z)RE>{~N(!L?YxkBjq9Q4oqM-5QQv_tr-dhdXsi2VMkk&tHYRz(3 z-k4W?>D1T>UHS|3X(f$|xCq1aH~us&(e>me%aSO@E3-PRthw~f{+ zm|f4z8Pf-(`7l_^WK1V=x;wh4d01t}4qmu~VJ1^eRnydTEyHq5%~X$T*=kNe zu9j!X4=m6MS*zCOZtozKwQj1xRMdd#02^dOY}kT2h>cq&EVcl<>KG#vQOVZYreS|C zYGOeXwbw1YMAU885p6VR+=NxzNHT#XpHxv|svxfpTS4A3Zk!$iz)VM+b(ocncRu5J)g!S5j!4^bU zV6}-ro0F_S@=786346gowuz}sCZtdYoD|^*%^-$45n$8otjX<9Tom?HnF~oY-YY7k zw%SePofNucFM4e!$urQ~At>B?}D_S6+9`szXjm>Ii?j48pIpdrEiie$OzzXIkO!|sc#U^YX|ZK<_K`P5LbOi zi?G}@)>dI{li}2MCnW>fMU<*i7jL6q{rZ`Faj{f$GrAY^46)2;uM6)I@dJg~`lg z-^+!9iWqM61Z4pi)7bF~vmy?9<)ZO#u~}d-kMe#kykkK1j86&yGR#emM!sQOLwr%~ z?xtoDY$aTpiNZ38BZ#XQ%0V_CFgB?&r3ewjy^$`^6GCOUm4v-P+&|oUN1(t84b7oB zU;^8X+K_h4GOJvS$@#&8sBg+Uz6zRYQ;M=6>`%yqCcD#1Sy+SSA*>^o(ZF#ZsK{p` z>Pxx;Y8|zX<86s2pLvrMi7PT6Ap=LOqa)*^qsaYUhcc&0)dSXsg8;^xBbgY?_En8^ zO%pEa@SG7Rb|S^P5p$NLZX@%0p&FobM~pHd1Z6_#!eCJ%STj^A5wZeNBCuQ)C4yFP zav^QziRjR&qN-s%79R98&XB=2BUyPCRYw9~ zB5lGccKzi0gn~3f(c|vv6-W#BLPBcjQI1qwjS%G{gvB6fCaEfi{&Z#B*gQuk54^h$c<=S$OiEsoh&ovUGf@kYy5^V)tKxxSP~+xj z7$YKzQ3c?pEm|jwYnFk)6j&+wPl8L6NmEGLR4ZsFf(y9?+>^XbT9WC!#Jy;SU`lzx zDu}y{vYvaS^a)aw`j4o4O9N^^o9uZ#*dI?dqr~%sJxLzn$AZ2XBW66e&Iu$U(+dui zKq=?C^vN>GaY#DI&Jwwha=TN9hmjg$=n6A=#i9vPp*ERGF!?62q@hHMfN!#s)LZt0 z+272)CcRkbEYOz8E`&}o(keZHevj3B%^FaoLtQBtKR9g5QC!SIa%Rj8yIhLQqM{G)iT z;$d&XHZj4UB87L7RXsgMYEe)paRd0Hp?^Lh0f?%~qn`?@fs}G5?BkrKRLd)Xm!vup zG#W5Y1TCiF?ImdCaZekWMauIuW>!Y^u+^Z9Wn|8%z0t%{IENr6vmI4ZbC9|D1D#BY zLNZCsI!MXb0?Bwfe(8;qZqn)Hl{H$$rhyi|#V&32H@;{0?p@dxcL&~E^>5p{b@S#; z8#iuPzkc1s9aqgm|Cd82BXc* zjg8niw6?aUsEaVrMuEn z@$9961g5QeF@PnFQ7*R>q1h0DlPJtLBsWADg6j;TjP)l`&b2V9PP%)Afa(STdVvQL zHp6zTCm)-slNnlDxEfpwvjoy~Mtkh-4&R0K*?lO141Ljdeuib<_3 z+p!EXBW7Dw-h+*_@jEuC#hECA0`;D-0`EOm(S=p$9z%L`z+@-m1ZPBLa3U^(VFgIwbs(<}!heJjCMIw+2u)U0#|~Qz*hB!Lr-Z0 zJ8BPG`ouU&d}DDpY+4u%6OyO~H-VPj7P>Od@zZ!PF0ajzEEkJCfTCCCm~Gm^|s* zw1)uwJ_M)}SWnrvRsjUwRG@=d2++U@3*VlE(GSuSWIuR^lP5Ax*$?PNJ6=Nd@@^2W zC_Ra_SD#=!dCchRiZr4n0g7rewomVh^eR@2r#53Kc*nEog$9ZzZ9Y*E`x<)$z*usU zABzELj0J@EuD;1bdeVTTG0oC79uWpchAAQ#L6L{FDsM=eN&|!himBsxkV8Bg_l!i= zQ{w4Cb3~c43(`hWVIiVsiE+%W*KO**h7(2tYTd^L7qQeB0jzz{Pjpt%;YkT5>!o4* zVmShm$}vtrbe;@E{=%1*6nWFRP#!6X3IcU-n8$MBPr94ZK|Q)pW_E8*!Z6IDl$4eR ztD}J9dDSU$S|KRN&htc)>BezdswXWQ>e)V-%YsnI|E5{0DWxATKR=U7+^#8Li2F zhW>c}W_Ca99d5Q@DpsUuC|(Q&UkBVKxRek!(96iW*sw zm`rD?rG)WQQ!wGJ_2PX_H|uf}W7ak%M;+PSNpiD_b}w%q$y2cHyp=b`6gt{g`ARKK z?pp=Wh;gQ+r1S*#xVve_f;Sx%<}xZdgKwMQTU6jzNTnR5cW2$0@#)ao3rJrw7PS>w z&nO+td}Rrhauk!b8gHn!74wGNQq)j%lA)`)c;f}+QRS%byDLkfwZNLM;8h53Ani@`r4@y)?k0x?4KNYJ1)>J(kb4m0+uTFK z?xKag2}OhpTYsjVh1G{aiU^WiYEHZ)DPr1HgfXj>6(x> z?!I2?XQ{upiN(Z)8C=MXoRs8zn>dLJ*5SKdm=S_Hc>M*@L?hYj!kxTq3=eUCn-My}(~QcPqUZYF6FC*k(?XHS&>bn@Mru&%RzZdBlkE$Xu9SHTl1< zCyM}Oc7F^QYJh1nj}HEp#pH`QLZAhSggIg#70g=uk4cPqFWfAVH$-^jD~~8vP!~bl z9D&TCkm9BY5w;*c+9?+LhX)zE3C!Kr-DIQ4iFq!Lma0x041L3_Z~T_I!x%#d-&m-? zQ;3X${^485OerK_!h-xk0+R^RcGNx6cAl(&#TBMhGks1_n}Q6Smn7NOYy+9U#-6$r-JV?^WOw^kcS)+AymVqOEycA`Y6U;H&H99h9M1$fY zaSEW2Qss$LaAu8jeNd7u4RbGB6<-DL)hxzqs*?+~0xe(7(@+suv$ZU|tU895R_QEF zOQmXnQWX}g7*i;ib%jC}s4nWDPLh?ik;yR^$#XU){h$jb#zm3UN$lx>9ks>XJYfY8 zZ-F=v(~_JP%}c>G3?OC_k;m|qOf(5fJucbeom;oNs}r-vf-SJ_W;iJMzQ8hIiL95u zsMRz9#d;7xg*Ilgc&XOq?xZz4hOBt^*p_8wuq!Y7?!9))g3Z=Y)TBb3j zm!`WNr~Hwi0Y(CsL@|eMvXt8Cq$dRdrQ}VqNIZ{`1ekF)^v5KX*H%Q=>Iq!*@a7el zj5P@?0;veR;&DxCM{u9+C)y(K5?qKv-9~a#p)XxQ8pGXUWL!aA3M1fn%V0PJ!BrT9 z!>M+Va50_dYZd7eyrU_ji(-S)hF5xbKYw_T45VwiTindZm{ihuTlh;P8#r1ho8zqz zPk&yO$@;#YB6Oh0>(;?U6abm#;aHEqF$fiwWJgs z?-Gm<09iO#L_aZFs}nAxN=}!l3ke)-nshiXpB^ zl5$*oBMXfV!iiXA9@2QDgHeRs=ut>*37j_gdBZOsKFye+qMR>T%kXC&$AY!1*)|ct zks?qF3lZ}v0yPimGEI2Hv=K_=*rKe5^#nYhP!OSfLMg{lYWh~@au z$8bT}H}N;^H*r{Ki#B%C957)O1@&_SB}aKaVa>)5^W;ft^rUMtQ# z^SQkr_i>^Yoj=L8J9ck)P*u)XYEuO4FsN`M9uyrZxpnKVch16ocr^x!^NNj$!a-i( zMYK3-%ckw0oP(Y2s?9Kinh$K#w)bbzR(Dh2NH!0-O&hmA1RP7LnNo+{yA8IRHg4E( z1?`EKDk(}^nAE^gmYV?F@&bUB9w4_Gj5Zm#e(m|RQ$p zpN=l`ScjOe-}KZZq%e>fxrHO&nS_j`-sL9uS{!F{Zu~otEVatt54fOvaC92=APKsD z<0lqnzEXihY0=weml05t-PdoBP_pQWpd0+4I55x8_$C2-sN$@=!-ox@HH>P4dSn${ z8c*lsnLX1B-7tyjv`}AJ|62w4N4j7N@AtSBI@a6;$w@dr*slwGe84c4k6TaUf>S{a z9~2S9@GSWCbWrmU*t(SufvKT>9*QJpw0$w@eC7#;9JufkT~uCb6VCVEuf6 zLIuvy6%q)tGFJFO%~8c5TQL>XJF+>|b?c|0x(|mPntN*_HnW=ym662^gDGIRkp!oxz;2JGzPhf_;giK8OnsRx1sf}LTvTd|FeKM*@F zm1C0}cPaLQvlo}8Kl324uFwljP_-62tC@IX8-ij-*^xgj!5(FaC z&qUe~&=q!(KYA*wIEqu3ErsZ=uuB1VH#K7NN0`yyF@z|N;`Bdx@lG=1RQ@3){}02z z3u&+ddP?V;n)nQwT;)G(aO!?fce-gcz|`(y?tU`)i3gppIAh*X$EFzi{?hx6qsgxB zOWhAnW7=;VeIs53K?-iHSS@s#b#;*JJibh28P(75Q^+C%tf1{H3U=R_BFG9q*L>yh z4hd{CSxtLo8iAYWPJp{ZX2vmnut4JH9eE7i~QNvkP=tQLFZRZlXj zy`a$SI7X!~@60(=DTLVnc-IXV^GX35jfF1@zwg()QV6w&EepAeR|=sx&{+1 z3y*smE3=hX6>WNGDPhB?Krms|T5RQ1Euoe&M^(}@OR)EvGyG1f5Ijh1SmcAGy2DYC z@!V2iUEx$AaHv;8L6nbXY|2*ZC_nm0aHtj7-%yRv%o=^6kb!X`j z+6>SYeleg9yy-Ugzq!juHAiXspF$w8E1Z&oozPxykP>_2gQN7=Bi{h`b_h}b7I1e3 zHU|L;53qj5Bmy_buHw>(~Kk4&&{SA9YSt6E6q-us-?If6!}1PO~+D#r}#$uEo}fRZd!YCkFv5vKRVTgX)F(FWFvv z>cL-Js45GT^K?h|wm;6s{P{oW&Yt(r#@zRXx^v=-`Iut9Sa%IQggJ#vb=Q{~W)g5@ zQ{q`D55A|n8qY;_aGCBp8i=Cc`?_nZ1$q5)-SvuwoPD|Ow0&Pi?z}>G?hR0p!LHC9 z?w?p0BJ7pAbJr{jzi>FN3{;hqlppAhzs|+YxpY;0T9C@G?CvY*k8_lN)*biLANkcA z^XFLjm3{U3rvxx%Hg4PZUNR5&`DOI?&zhr&%fmlB?<5Pqv?cub!c#$O7QnWopI&l$ zpmBZV#_!DwGOm*se*er_s=yrm`Q`IH*Ee5r)@?qc@Wwhq(y2FFwr-#HYzZKHJxJq#oZY+eHmJ|{RFj0n zBtdk|+I8#X$r{s9CFJX0uQB*NP?s zpR;`Y&L9DD?De&)i9Zf2VVhb#R6VBR0^lGHBKn$uPChcj=ina$xq9td@5v&5`Wr+! za?L8vpAI(R`itX{#2F}_6JQAWszgp>R6$fASFTzk)Sq-($^p+=R2;b)dn|g*FVq*i zDtdG}6i3KEPlXgmQkjg(k*ii1L^p6rw@o^OB^GC4`6B&HO4j%AC*&)uzFM&gl78(u zuf(4y9k9ibE3t9pWSx8@jWv*8u2{j522ncnX(vBg#W;k8%uh65Uh(BLMCF-NUgQ^F z8APe`d6$|c6-sfMUty2SDJ9*#R~`oz6%Az5OTI|I-e(auVSgg}(wDSz<|L}-L5-n8 z$QOZxsIU}`6O2<+ppXZR?LPoy!;4>hHU&}dKw%LM5`Yu+LRYW-+C2O`{tqZ2U-%rS zsZADDQsrii^h5gfsx&J%7SVkEGdiNpL=y8JV<0X!WbM8!!!gnZ@`d79nly?%VR<_9|uz)r`^>>`pM~WFeOWh^RWnk@G20$C0{P3>^mYhI3j8KiNz&1IEE}->* zPK6acCa_Lm0j+!DZ#XNEL@@Q3FfLb(({*jx3TW-)f2DH;eZ@7q7stA{06O$I7G`*` z%h~cy(Z8IqbQ`?$1GMt7zwqMkKieh#g9=br8@G$@_{e9~a2avN*6k{LR;iq*UMQSZp~Y1^drxIj}bDjn~XbZhntV zfn3>V;9h@K01Y#Vev%4(wtSPFy#IZ9TsGZ3Ao=Soz z>Ppo((aR5e*cWT;Vs41Q>ZPpq5D#!_0VV-e^Z6;5MDehfNE=Af9=xjQU>H2PawH~B z>)iDol>jX;L-D+7oKhzApAxE?&gc^SiK^u8;3OGs7RYh_BqeTwaFVxbJ|TzQX}b25IrwUxP~A|=8>>bz;ZdRpimfGHGEiT?8P0*H{+zM zshq9qp*Io&oEgYU%mlr_F)vUkLKH<;6KCe3Xcs`1XSR*SIPXNQcGq|;Gahm%FNZT1 zP`ns3Xp*_rYN?COzzLET?q;fqx5PO(j^z?g^>mA1O~+z+7*B-6=3%4P+&nKuGiMWW z7)aXDRai*&HCae4Cm|*4(U90&i4)9ym65m+!g+zLz^e`QwY$p3-O{nPOna7Ei*y>D3MFSmkb{>>H1SkRX%0;#y;oW5> z6+Jk?l#F5JJ*AdP!E7LQj8oH09cAUzm1(txZW5iU5CVDBTP!gIT1X1#pi#pO-*VW~rWDkiV9FejZ zqy$I2E=z+QOD9VScYUOt)>>xoBW9>kSeDsyGzB+bdEBY2?y5L{)3ZLxgMfl z$#|~(Iuuz_7hA~w4BFF|n0t{L{L)#F*Jc7bIFO~QK0p zP;*_`Z6=T_WM7)qz=$p(Qm&kS6UG5dYMRA|l9uDj8#iI>A(3gRsbHsi*jch2IoZW7 z6Ua#*mTK{UEO23GPIk3VAs3mJ;sLQ^8t*FC@m2%le^`v?oKCzOK@Os_gw6nkFiNEjW^iB}mVo1w6st*)zZcHb62?4$VikA>b zgY@D6!g{fGAptAG@&TUHdC_4=fOiKtN=ZuKBq&Z!G@wS^0Vv*xaH>AgqDfGaK$t+D z*GJX@Rf4LM1dvq4(SDPlo`TOXEXitA(H<;HQ;;>47F+^huE|&_f>ca&7}1p$aD>Gd z@q(8N>#0LbR82_nY)~ZffIm&6;w4Ez6i}2^Cc8=}AW^4*3avzMNl7JlJCP)s?jkH! zG*wIM5f*CWUNa^1(n@)Ki8T+$G5g!cC@0x(n`; zfYM0BoL6g{xl#5KYr7!HYenAkvX6xCnULdQhf%>3cP~-VNh;?qc|#eF<5if^?Gh&o zP|=Ho6SWXCSx9JO)Sm>L2s4%3W&;VN$eOw^i1?IE;)Z zHz6PaW$o$9Sgn%eAf2epB|UG6O7cmVUBEmwugBI@>AR~uOvl6GOvN^86t0Jrcc04Z zSVJ_RL><73QU9#C$-SQO9^Wl9oo1Yw*j>Ar$3mFG^Op!VajH4UAEcLM9dcX%wymvWEeb z4WVRtD}*wYw?rsmv9u}RQDd<@}h9I1yP?mkf)fpC@g&I-?TdSvk$f}MKS@Q|uqO+*hB%IVj zEeI%dRLF`?C<+w8gmls()mj%A1fQ^KfUQM2aN8Ksq=#$i%~0MG|@8OK1xeJH!$cH{Gh{qQAYO~rB(8`inas9qic-@ThoJUK%)!o zSkjPWDWz?vx>mHL!zat(S2!_?m{LHJEPcRZ8KXN`kqgaOsgsnX%2=z?Y65EQVucb*8X(Jvqnw0>ET@+@<&tua)KcRu z18WB34jY0>$7=^i!!**S{Ake9GKS*Qigp22eKl2wh@sHX5Osp4&B&%KXUyP;j-)p! zZqZh)uu-p6R#aLm*&&a_HedK|`)8Zzx%h z$_E@YB;G2Hlhz=17g%EdGS-r%O z`o@PP%diF6RKC?m%EQ3m%rO`35EVggzcI=p)#+VS69uASad^ zhx-CW68ll3SE;gdQbWe)k7@Rho|6KLT8pFjf-7zqLrmxgw-qyl6C@|`6VE5u%%~u! zu)hcks%Y^MHww8rF|(PP(LAXv*6Cc1fC5`#0hZxJ3zHZfC~QJQi>4V7vxu2!WC-QJ?wk{gsak&mT0f|Y1RgONmZa2I8%?zD7j#&~tg zvV(BESZ7`?E$E?E+=Q~vSdu&WC_BnhoMaH1P2|j|GIZgAY)%e~>=EsxImCD(dr&i$ z%F;Bn61*(gvvS7h>|QC~h@q7spl=%$vyGOlpzLm};GrhyD1!;PKTB{YCaFnI(n;{K zfR6^9Tv?e}fmyBMlwfW@vr@tol9QTdMJvm(S($7vix}lA(0Jd%1KFW*G6R=Q<@#U~ zOB~G>fEKP}8OH*THPM8F@SYLUOe{&6Bl?9KIKVP zVKp^XOFKEeAB%U4vyMp%;MEu#84T*Pc&@*eq@n?Fu%-p2vt16}f<+9N6ybxO#W_h6 z-pB$j8XCD$Q&QE`pftAAE;zA>G1!^zv5 zCs1SEhvHZqJFLdz8|lPZNkPeM+vxTkwC0OjnMJTjn@}d2MLVf$$kr?_=&%bt$`VgX z8sdwNJgTS>zuM99F#Xo^z6}^>l5rzcV zz92TcX~eD1)ot#9vZO=t2>}VG>$b$b{N!u4-w}DZd{{q2cMKQC#l>6WPt%8OF??BE zWJFBGgg#$)v?e0VXopYJ?IV$UBlbqH2v_cieva~ zx??aAA&nh6O`mApv3t+X^idt_&^r+4SnO%Kt#;e4odquaLfu|^09$9Ev7>m)jtZRU zZnq`EWAF!O=B6r~?Cz-D7Zr{FbtRSIe0Rr0+<^o7DZ0zmh|}Gjc@dHN4Z5>^Pd@z` z&2-?G%k9wLP~OxhyoDs4@r1o>I_Qii)pHsWop@mxo;BhFwD6Ph=-4D|gM-71u)7XU zEVK++hjF+9uPw1@B|oo_ae~<0juRxD23)|f#&OJ!gka%>yB^~)~8Jsmlt6=cehnJpEmtY#S&_K^RXdS`_ z5Jr70iK!l977()z%o-#NNk->jFsi_DQlJX?iJ=9Q0>O~pEYPu3QhL>c(IAwifA)Z# z9zaMWVF;3(Kx7CF$7xvl0>F&kk$9O&3^5a!+5LpT2ZbJ1R!&9j3gC#qK5M_~M?)Z3 zS0E79-Z_2F+PXUW{??Ctc zen5|_x2M^?`t;XAmy8&IZ zaaXn_@#(Gx=9cz=j=5bHeov|J_UYKi${(eM{@G^dGX$W_jeT4{Aww3>sbQhLiF{vODm|19-E4JS3? z`9(I+sslmHf(C&NuEvJOz($%%7b4I-D}QJ_r9mwXFi2_DLHH|%95fE7cQw>E1U9%! zN~xJ<@H(kxxUK<*N47LK*4N>L;5qdwR#ec_M?jgo0-siD=yz57dORIEL)OFh^ z_t`oyLalYD=}uP-Jq$X^YkC|=ea@l!np#WkY5J(G5>JVEif*$};+tro2kFyv*LWE| z%5s*~3?b{eCh9RDPSf4PWontL*^Zp&9^rqn_2KEg!;OsR>aND4^l7@ItE8-3N5XT} zSJ%`~!fP$=rG(c9ucw4p-%Sb6QH%ehgf~bDuf3vzGTm@lJN>e^xOl>I@r37YY^EeZ zCmd1C#)f+Q5IieCGARJbPQ!tQbey0>@f^XAYf=#^>0l*7QA$L>^`V5Fk}9&2hasiSK)_}X+mVF%RFISk+sYYI5h!Rd0GXBNB!eGM68xk;N&%GM zf{+I%aGDEE40#k3q6@JCc!HY^Qas4>;6)OV2@r!IxImmV=)^G&wC{+aod5}m1UF0n z$%8x;nWUln8nSl)q_OtT8N@f^yuH;pNlJyZbS7>9lA5)D)*wDJgoRZa+ah;D2yx%q z7ldQBEQl!H`x-w3i;jH`=(YCE?swPI7xo4NUjjkBL0~{jAmn{IsZQ#``+GK;lcOsHok_&K6P0ko&L4cR40lX{G~!;)ATW z0l>iPY<^wUqGaQ$aV;K}QQ15nPfJlX#4>eIW#_fd_)ZI^k<^R zu-!g~S`E>G0gyBpwlCpX*=;D{3rovniXH0s($h%e@>kkmfSliH5&^Pa7fL!bpG7AE6S9$(4hOn z_wG?F^326Muw+)Sx$&X~bJ%x>jNWB3WmYdcvDm)dK*01W8?)>5^23;Q)}QGLlQDx? zY-D40`UV-Zo65Gy=IdatcabbLWt{5nxN@du) zTQ5-|9XJE3m54ZdhkA)bx{!#fJ5KMYqC!-}yP{nQI3q=CpF31wFbXqx+Sqw|xN#p6 z4DAS@II7mxgV*X34OBhTt~&#{fypVS(T=FP?G>7bEDOGjZRDX{5|@IiE)9K~8YH)R zxrEVfmmRm)3wJ3Zm2=wh&B07zc~KupG&>j2O%^Q9-gd{TmVL-ZL`4_m<>j>DtIT7y zmJqEIU0!#`Hal(mSZ!A@f)LcXkLv7Q_F<&XQn6E_*VYj#&a^}p`2-Jig%%a?$W|YZ zQ#D`U>NiA6qug-^@)C=xkQ#|VuOl@3lK&D+6|T7B%i$f_MBVYQ#5_^eOUmh}Q%Hp= zbFy3fE$U{x(HJ5{Re>U>C0=xnuE4vl!0cvxEg{WdX`S>QpzD)*Bs7d>h}-~=a~FpT z1~&Pd;9&D8eDBWQ;^oAvhe6Klu3?a?BrgmKU0!Uu2(Oq58K61_{@%}YlPG#)Bg2Bu zFeI6jk|M{#$i_q-CC(YDQ6sO1qaiJbS3|c>Vxu4Rhw%A3hkeKygny$x$9PTh4@gO^@ZhSV~O;v3OXTrw07*+8u(%I}=foUu4eS1W3+P{$`{ zg3y#79KY5gr;nzH3XZTGJk-EYF3WIA4aH%L<2JWoj0z4G6*{BuIiu6g1W;23c@GqH z(oWu`ho?k}sy;NzLYp#Lo=!DRtq%IY}I!BlCLee~?; z0q`h%A22KACEZb6A-6Uja&Ul~ltUH-%84>djS)qn&~ygfbOx!ix<~N`CnN0ew{Q_%L2Re{}#OaE8Z`VAD}(Oa&vfoUW)0EZb2GIaD=} z;ncuFDI^L*N&r$F*;eejA9LQunUlU!4Q%mvikTB142aVU8NPuWtYcegpLsgk3gnI%|T?}%am;rpeC&k@D2rYQRA%pj}#4>MJ!$0$5kAPz|7}W4$DR z2atq2kpT!cm&60DV+KfOfIXo_mdFeenkOJq?(sUQ;^Owc;z3YHXSl}jSmtWJSk{9G zV_PT`TO24hiVUFnf^^BKD-sP1c47aJk(5MgAgLScHU!UZ6WeG-xC;kQz}# zL`om2M5UvVJ=kyV+jF3+Fuu@$2xKvGG)Dxo4#z~Amg>L{Iq=ZFy}ORt1^xnGzLDqa zhrW&cL#p#;IQSr11n(#bBhTT_3jppaL(FwB7e~8TmxGAh@85?SwtKdp4d(@*h@Fe~ zLB{|M+G|RCQb!yWN4gF}A9`dwWcRKe9pRjT47TV|8LYcVy~TgnYPQga>br}5_xA4D zy?f`r-Y{|-etzB3kELhk6wtS!FfSAOry&DnryNCEgH$6y(2c70V&JarsWO}$$kwxZ zGt<-as~bAVJ=Wy)P;X~rc?Kob@Q36~NjuQ3=Cm6vlD8Iz!&yd_kr_-smUrqJ1%1rk z#+*Y(#1X+Am9INuoR-Qvwwwuvk@BFYomSM;7k(go*K9~Vs5GKSC-#ZGcSxt5J9li~ z+7S+C7&{QC=C5cF^&!DY1Yx|zVJ7~H$wS) z_#wJ%w&TjV+?)v;9HeJ)#twfoe6#299xsn=z}(%kY5m6^W9=`(Qc+!OaOCX{)aSQ%Y!};YMHHL+P>vZLsn3NU z3isJr+b}ueDYo2I!#1p6AO5f5(D|+OJFyd+@6w2kAbbpp?Afzb-`b60`RHt|T3dg- zF8sZ4XXfUu)bDz;^XvML*R5MizaQVkzoN1-_ra#0*WmZ-mr%9AWx-%H$4=-m?x5K= zx-n6Q18Vbvc!|@^Pvgc5fyplV=* zXcRgos0{!@Z0t_QGm1J}tM)R~3@OUab@lZ-NcEuqHIhTDokF?12!&!$qB#NG9*7m9 zVweJQJKV3&=g~}SMh2=Ixg!roiF2n(9=F}p9pCL^X-Ez+BLc-T0tgwXTLw5qL8Ov~ z{S8fzIEki&TfUpf9Cabt!c)_*Qc4!B6zvQAxBNgLF9d{IbV4|LB((t;006aW@`D8e zVvZ&>ZzV)r4BYfH3*>!xl}T4p133j&LtIR_fdCvhAlQbdpp=Mm&@LkQ=Ak16C0%Ps?DT_*qz4G|Stl?a2OPif~h|1}>##6s;S zb>5@V1dN7+YyK;~j(BE|Ob{mL3O5?`YXd+FaKjT~uEx>cjw;SxNx0(2K($FFVss$x zcU+MjS2_|p5-}JVAZCoj3$L0FVg7*~13VO34q}UR$_3H!K)cTwLp&7)VHH&I{W424 zBHSWl&fUjjWb5TtQq+iyBaYg+0quyLjs=4XH70KH29qu8oVOvcTpc`O}OM|3PNQ(f<)2% z!b{Z4v%S12M?G(&#UBA6`ozMJ(r65~3~UKB`!LjSn5rF_b^3C+#YcaOE%8j&8<8)< zKpd;X?#Zg_3?up#X!2oPJVYW?2W5vT`JIlego&pLre=sW^NU{9Y3g_{O8-NiP# zv7r%hGz9-cmzpm&TxdY{%Hiy6I3GdTpu54In{7y*G&EeG z)zTr(UmrSssx=(Gm~b)J*wB2fN0NUKy3u?N5mge5V{zE%nWs8|@q?k+aOtKApTLwi z8aV63OsIzHsh)5c`JXk8YuwjdY!DjO!mIPy3lUI-s7x8eE@@K|0!!Pb_%3x<#jUh zGI$O(wh6@*N!nWHtDHzE=rWFG?UcXPsyTO)?064; zq0xZV*w-n;)Kqu6j`p5DY}8Qquj>@t3LPV8Plrd&Y0-%W#-w(@+wKx;#$G>#^ z%j&2-RZBn}Tf*;$?QZ--ZT1ZG(u1B(*rzSEr;$IkIzlFyxDz|40Qlz*qtu& zKfLx_FVtvGxJf3`agDGVB!X0=6~)Kmro3_UhVMp#!vI+?Ac%pBu$ykg-$;aw)yM|( zW79kgkUqL0yKZz1WD$gL8K_wx_0)7fn3G$B)RDuSY#fQ#ng)Y2Qc)O39s+S@^T;&& zV%GrJZt6WvLy=s9UNjWBdkh8H*OkDdk+NdW-qGUX0Qe0syD>C1l>7qp()Po!8q8Ve zDZ#L}AlZbEZm^Ne_VrF26aj*auh#BY#+PMNmOIcH4;qfAxn=l3DAjTYGkhyVXu*iQnuA0AyWmDWw5mlz>kP`sD54wqH3_!{`Lg^ofs-Z zhaA`i`#hrKiS3CX!6d`FxLUx{ogdUb047(54&L8!o0O7ITKktGM8%KFylbez-S%Y= z5P`-hMIZ$*>f8g$KEqNe^R0f@&JAh{K#C8)lWiuqhw!*ddA5xn8k+z5zz+#0#}2!6 zadt2)G!9WX{Gz|x=Pvs%lP3|+WM%TPw3rkbO?m!atKNzRwSm?EM&pnM%r}ij9t-B! zI@1(xO=yj88Ndo5Z^a6cakT2&$ks|^mIg+MFsK+uhZE|%!Yv6cgB?>#0yTtV$rgv| zd%{RtJb*!*)5TE^JQ$>;c%(RE)}IZ9o9}DxZn}sQTCKQRf-@Gdu!MqHb-0Ln>!M92 zTQv2xbll)mhkV8&)O)iXSsbcEtk7iO1?Ok*4Bc#2))yHuw(45_ zz0+NQAw7jelzH)THyPKUywQ06!iDoBnoO_`X0WcX>1Ja~w=%7*=a@`BgRkC%XAnll zO1mg=MB_nN=k~Mbange{ArlXWAbe53Xf?L>D%0wkZXeu@lWnlA+C{Um;R+d7OSV#C z(O|zZL*aT%nci_B{%c!5l|GRv}#?$2Xb16f$vt;{vs)03isO{}z=>*Lw(YA3DQ z3*BsB>&f23pd!L*cfr6)m|>j1ht<^(KP3xWhie45SBifd^zYCaWs^_(YEfOc83+Bz z9-s8r+BMkUT31d|u-(AFMGY))H(fm^s$N4WGm&$i;*A`rqwpjLWIP?&sBP1RjogYz zK{-(E_HN_Wdx%Y7^GDUgnEn1?GOJcNvap8b47wwP8-#C83K#oXfQ-++>Q z7m7)7h%-k1o?W{<@?)VKlu`}pJ|`a`2c>`$&yHp4%pbEN`5&)e#JaM44!pTc_jx+} zv8sS-=+7iF&&MpPw`-QOR4Bx{kf3K(I2~*HMUU!}Pe{H46&dIIFMmucEDscMR}PCd zc^=j0uxr<`1|t1Du%3siTJr5ckVd@#P+;x41*|M_CFB>sRjhymrMTmDmnnx2DO82< znb>+8Y&UKsu4~pRw#=0p5`NLUs<^IMJD)W`NZjS>ysB>dKzgmQScvtkaiI$NzhUY? zMe2EX{new2z5wd6Lerx{<*Iqa^QxT*tddU=M;96W3*=QL=zFZJ=Sd;t=vk>K(-kWj8dR?zBDZ(49;B(Ug@}m{+vI-PNWmUZj&wqhEEA_DA0n(4C=`L0Bf%RidS zT)~s#*v!-W1wwGsW$Hj}3ct57G_nBE_nIg5(Q>g|&LO_qfeVCm=TA{b&kL{;vs%7v zIU~h^^ASy>)8B|lW<)-qvx#Yw3Ti{Tqv;j|C%w~$cysA8#@=z5y6fNJD1i4GYsCsI zN0HcBya=92xl7eg0n~m~(8#3Js}i&RD?dWjTl8`La3*tgrteJ3o&UNu&QcNb0|(0M zq<~6KJX7!dfn4&z45cfc7Qa~TOBj9pSX5@}K&d1Bl_rM|y+{sq@sjDp>AvvQGppCp zYItZU@;VL0MnNlF<*t8Z+#;q84qveku2#dMJQpooG!+9uytjpB7{fD4>{uI}{<5Xq z@OI&Xg;RLsz0)gJD_z9$JG40HhKpvn3lz04|05!5{Wt)V+Vyh?ENc<>O6oC zR3F{<=R4HiO?cvp7~%W=y!YSXv7H^OocF=yy8sP%#XS3MCWq>l)7pvUHzQ=Czc+6Z zlg*rEOueC~C{oMRci$s2C984R78`tP>vH1J?VxC4)Z|yZYrh3tuk##~pmwSq>^v;B z2C5(MT!P-B7as2hKLCaAWTp<1pmXQU`JaGxQ$8RDc@0QsT^G!s$H>`pCIWg}y>IS= zLvdT;sh)t+bncwlvt|?Y4yOm`tx?jie-1f!_AD{Wo&anQOOHT1B2Z`g@6Me)d)CZZ z)GSx5y$92vo}pOYPKqvZ+96{~@69FoGiS^kN6gg8Z)M+mD;uP?hRWmIMz|9R$~=;It0=&B-JNNwJwRu|3V;b zn?!$>dMrzuS|>%CJkoibsB}>iI#E+JYS?hMqhW{+P*ICV;!BGaRp4`2N)2%5vDjrr z8J_-JDTuCGlzBSL;qMK9ZT;lis*|28ouYYa`RO=_8KWvUno?Z7ijsJO7J6o-ww^R0 zipjc9l>XFXanY40oL4li^m-Y_ET_9fO7a&Y-6cW=#X5uz=@rpV^nZt;YU-Y(7u`~$`V!ERVcfvRm3Q#R1+NT zSE>;&y~4`z&cM;11fseTKL=DhiUm2#p=RnnFJjgY7YGG{_RSL{j24T8g~}?t;_eFv zucJ@^&QZ|=Cx9q3E3!Qb-50hlRiB`uYP{TE1d!*r=$b><%PjBs!3e6H;^AtxbWY1G zi$&Ny0I0`F{0WfdQDJb{>a0>4>a3ymIe={8;%aywh=Ep$EBLV-%(GQEr>Le#N4fd| zC=*rd01W0JO3czsj}n7$R+<5>2vHI$E{R2iIxFccq*_Kg{Pj76*^C+=3_z(U07S7F z!y|zFuXU=@h0qx_I-Xjb;n75Wl{Cy%ttd^0&dT+JsrQFY2a?7FB5|U~jAh1eS$~vX zWO*gpHK_WhJ5R`69#23rOrgzO{<1Uh~C{$Q< z%p)txMnOmtj{U`8Y6fnc|>t5FgN6n zbU7m2u9bpUfKFg1y^X^tlS1AT+a5q&!}kSHm1HL_38m$2kSxk`8(na^SAWVu=fN*2T}0~+3zydG^n zNe5ZHE9NdPEGuHPG0=z~O_`AX4+0gDy=-v}BigRhRl)J65bKS>y7Pi#5%Y8@C~Osb zb>$QdEr82W83=)RaX%nUDOOE3*q9PI@gAO@UU#ufDMZ~5s}e-sxhtkdf1vxEh@;ot z1xwx+c;vIU>%OiYw+eG7XpslX-fsWd({nOvgJ^!>g{s#?iFX}%nvinKJv^iDDam&1 zEd$xt?uS$h|8F5JBnr5#qxz$A90xEZBMxud%apuH{^N^y+dOdr$M_3?%6*S&Nesz@ z_HeY|AkuX%=TD!FG|xx*+8$(o$+SMo&491%!Fea`K5u39V{uN)U&=$d8oJIG`OqGY zp$n0l8;+aiOq$F*oM;bJrP9=sfsf zVi(Nu`y#whQ@qN`^RROItg013RZjLH=^T9wip~b9^|0$|QP7yIEiIfc7M2(?f;iWK zI?_Bt5=I`nM;yGlt`mI0g$ykL$l=b7L0Feh)KW<)2B7BY=K%5omz-h^3|LRgJ09Mg zd76ilt*5j7q4LUDfZYSSayf}TZa`G#cr<`Bcm23KLv^8`KR~u}m0<`#(MrB>P#(*{ zJWoC=y@SL+!;OR(06B9f9RP8l%K>J2wnqta<{04KX%J1;u3iv5PI9gU_6$5F_X33SQ9bPF{ZT%aJ1O$} z%+f3m#@#m1ywFjgTzN;a7@ZFW(fdK7l;TE-mFZzaK@nFJUB@6czE}v|{?XavQ`(S& z*$+bJkLWc~Ga?_%Adfi1BPhy4iO~QdpV*6WyifXjP*Wm7{XwoOYC?J`>a3DOobHi@ zHl_bpwhR6DdHvtr4)*`l^X1lGQ>URt7g*V0$h~-7r%Mm!E-ocC=LmMTJNk=3GiJ_M7G%t&KaF{vL8R2gw`koR&8N?pvNarl%9s8$`ZdNx6?dQE z^mj59_{3vua6P-?}ur_t`daxbrz5me&geC0u;6WW!TVRV%RkoLduXNKt>;B z#O2=&9WrDn%OQ2KdhTNweZm0@dG%FBL{w2s-k8wAfWF_p`U)e&up5pXegYp*#mnX^#6=9d1q>w7l)z`ys&FFoZ2RYzUwr9hAe1V!a%$<44>8^T=NH9`R|yeO z#o>`$c z*s|~dDA@kXKm8Ga=U;>jFN;_AGr8-3{P8)alMarSVwe~%hm5|RIq}DCmf@($ zt09vwGr;=!v(Ex@-DTpcG32d#Oz-&lv(EqlAu%joT7m+E55@)m`_oT9^9(V#6~o~9 z-AwL#{13nX!_!RfcId3;^QBDgc>G^}M|1!be_E)&ygcL*Q+xmEcfSSM8Q-H6c;Uqt z-n_@;EC2M{-~9G>JU*)53(w8RS;YtAZvW(|r+)KW8t$mq2l0P?{+aDez4h3WPZIri zzmL#Epg&X0)SLhKHmYZ~pw3&R}Bwn_<07y#0fp{q$##KLO6f8ft|={q+pg?07Kl*7tw< z6Ga4PMYW##`9`MoegCn?e)7|w65}2<7Ha)GhiP}e`{<*O{e*{#C+@1Tzy3`l)9iYfK;0+QbkV^qjm>A{4O4iq}&VSN#4T7@hl zgCCW3pa8e5RS{Lyo(s8%iqcjO#!Na;kQV{Dddt!-4+g?e(<1^B#X48Ll@<3qAS-Eq zejXKhRpN+~UMf#W52O!}+*}VQ0*O*tOs3vdI;22~lPL7_?5xtFOCF4wv_CH=SLC|l z2qa2mF?j<~KKpP*-UIR#8O5@+*8{PV_R)o7&EcaL`iexcRa7lK7!w$(Fs$Wjk-1sL zMP%lqlJ@3i_s7V>77rwtv^OU!2BR11xV255fXk+jFkX<3w?r$F#XTO7MIhWa&(@%k zjvBlKGK&f;r3Yjt?a8K!xVU=l(OVYO!AQH(LWJoL5`{9a$pf;I_GFhWMggmFQ*Fb~DGd@HX)dN9GH-I-KfSIZPxxBW9h28{%-oz3>48{DJJV55RtFc6sUR}D1~%K()dYYl&T8p8>!N3i9GThRL0CyU z(oud^i$Jt&D<|`c2Vo}dI7Ss}y%3q(<3WT7;e{AkvdHuRAcU^AYs^E-(6PEQ;{*(M zRL|{cRDL;%u(HxGcmSa!02D%ps9;*pu>S1m(a$uDHd2i=nP+;4X40ph9Y!?(4TDKDnNkUx9n}id!$&ocX|R=+ za>hfolC~T@EDr0=V42eHA%;LiA!Kow2K!Pp)MCEvA!F>}L*kIeqx~r|$Mg_Q5D$W? zW3c|2o+3Z1fYFXBwHaj#B4jN^9;((6fB;EY%XjaHWHbJH;%sPf^pvyxgu-~p`R?;Ra!=N9~m~0(AaJoNL z1C^L-hvmV`9->UzwBNU11I;)jk9K<~X41y}`}P|=P8_lhW`sP%5E1ubB+0Z6?JtG} zk7@&&*cSjz9F+S{c}St84Jcg!q8~)-;QmVy~<2p7ZOv1BK#$D+f>->;3Q7j9a zJ(v*Ke1T_k-2*X`_rd#7K8q_1j!j9g2NOzW?}*fS~ACV5M1u$DG{CcTbaeKgP?TV zd5nD^HJ<#JQ^>afZ$-9U4?(g1H021BmB(bI<-0zDGx#W~3&HtdzeOCAsg;hQj*zto**bRg zjE5?dw;u5wfpfvrk}0l(pyX~na@e8jsq)xu&$v+X7F0h1Q9nv6Vx{D`&ViB%syOTi z6+X+6GRIC6i3YMaL_`5%y@zNeZzf+z$4Kp{m2#xbLo-1;c!>N3B9gD7+zllUju1II z`r*J42|p!D!Z3D%$V>4Zk%vnhTdj22OdgQN1N9^F;AsyLx*R;9u>+$Y79d{s5JSoE zeq8rQKP-=Qdx%!@C$95j95N3dN_YJOl4*jtZ@C_mPS%gMp+!)rEd+_*_>5}#=J-Q2qQU)(wQJY8L=8moiCkZf zLM!$;d6t+zI;Ehc)_Mxv)KTd`6h#2RfaA*}VQE5l#GmGOfcVO;J>T#x#Q zFk+Qht*;Sl^>uP}9RYf`G2qs!<%C$VQmoVf(ZI%9x4MxK*EVbb3wiOw<=S%XBe6nT zDOPDVr_{AlvpYkrveh{+lcie%#FqKkP%(5 zP%IRSbnw!kl3Yj+Ltdc^RnVnj?!5QkpD*T%1)2jAAIQZ`1UdbIf)vhtZ{EE3#rtBu z29O3$a``O+Oae~u9iT5>#lkb)S6YRj1$R$hQNrui%qYS5*Bj$>CHHVp;NAHF_ zHGi?TI58Z`n@ky1F`GadgUt8mEn$VT<}K0|C5C%snf(X4LEJ4m}SnM_5Mvl zrO$d#e=i{%+&TdnRS!(e&}UlHSF+MuX1}Yy8xN%yPehti!L+H`G!iN&?`EYxoIO{c z8xN(YOhlgd?RUgG+GH_Bq2${IglV5MdyYOQJ{+olb0V^#Vv=}UyrTo9Pqn5^J`YIf z#8lkk!{Ww=?`4cex|>qHgww!HpCV`9AeB?6%oH;%+_%HF>E5Q%NI(-%yroUj02Gt0 zx0jlP+c;&0F(ZI);`?KffF3_VOb`<_popEEu$@%8w{YroV|pOmdwnA3G!;x^nDzF! z9Ku|lHg%dYEfBWqUK{&{ctd+rjMK)82|7S>!Wkff)o&rYKV_=BgT z6r=4iW5rkvIDMQoe#~Z4srS9N-uAuiM}7CPNOFqT#Oq?T#@w_wt#Pkqvf7j0n&g}0 z58pjGj8dVLMs{J^8*+3#DOB~wL~WvYOHOCHEw;G497zOY&I5^}BjFx$myBdt+M z^f{MhBgY2D`oh6Oe@4!iFxqfpr;m_h>1!?bC65V=fkMmvOgUc{q9uuBYuGGO*<3ne z^y{O2qm6JdIO%yxx{6oD5N)UdPFB*ZYXGr&Cy#tB@R||s=@|OLOO%);UJ7Ile>sI9 zEjTd{7-fV*)z7_vX{0&D^Wp`9T7Q9LVrsoSIYi(kLz1*4J2^>D($N9u z&1Wd5irn%G;sp(G4GPFVe?)?NCk`DRKODY9==Qa?OPAU^(Qxh&R2}PXs;|Y#^ivJD z;76Eu8ft5BeAIW!sy)>KH^P*c@q@qCKxMNV@E}5c=Z>StX+o_4;+&*{WWA@4*XT6^ zYwaF53|8-{Y8*}Vp>d|H;r=-GnX2O_bo}DSFKy%=bisOCeJ84qQ?pE84Z6LbXeG1W zQwM^13zu*}@tp|N$P<_7_qH1RF54@s^=bpZqfPtq%k+B_{cbi;M_e2r1jR(L_ja1A zeXa)P*OGPAIu!MyAxBoT;SSXi#q6u*E}yl~wu>kn~Zj^;%katF`(BvAR%JQNbjm+N!FziE;rHvhKBI6?%ogGJwe9Mf`4> zr;gJ`-Ri3>tMFCCSL#)=q5}lJ+XPlO(Gs*G0D@Ij*-ZnkS5{N(UtTUQ)63$)N*)i= z@0Zl?=DA{Y5!K5QDzr+gqK;&nXR#cx$d;@rF4annvP5J+$q0nZT17GY%1TN~jZ$Bk zze1MMa@jqYGNjxnD$z@PrT#L?nz9+|tUwRd0b#SX5FO_Yrpd}a43RBmOp#{_izz

MI<8RO4mRsizc^#)#{A`2{)%@gT5mxkjszfO0@NNb?#6 zexyQW2}{3NggJv!my@sO`!Gf;vI-DL1h-Q}iii9P3To>4vLL^}C@>I_ z+*^>Fr|0?d0~Cn6L~Ssy4qBo$eWD+OvuQ)W)Cp6dq*h0BZXk+6n* zCdt}NWMh38W+2*zv?oZl`GrOy43#X+LZeYXXeNrip9aY?%LdmFps)3O*kK&>- zn|w$6PJzDwBRey)vW;v5V+FE>{ORw=E%1RPFQy~H=gaox7`d`QX_AA|))-Wio|&a* z`O(+Y%0?~|ik!%$qSsKQC>@DR12i3>GR7CKM z3_nNEkgEj!cvg--2ew2Oq6e^%?gLS#*O16i5f;M-n$?q$c8sE5KE&SSG5W)mw{uYJ z8pE!oph2hsnmi+{#5xH?KVQ8Jgn9+N@Y6cPt*qRZw=KpxiyUq!vnkHKPhY5BUU6>d!k0GNkmgunP&`TLU6OFh})EH!w z_{a8IG9VpQFhfR70-GT~o9S2cBF^XI_N%WZfZyq3Y}prT@m;rMeH9xhG&X>NQpTg6 zC?-r-^>G6>#b0C9o?zqATT2w#O_qpkl)uKNXK!`YFt7H?s%kPrY@$xG^=Lc6BVfH; z3uPTYOW!c70Ti+xFkWPb0>@=F|54ylMGaez>dLAB2*z<)bp-?LUR?I}z=j zt>e{fJi4k+kQtI~rImVRBFvzyVdHUG{cfEvEw3~x2USTnTITs`7$-6uwWZ}+xq%VN zYK6>`iekcSm6hw|J~C+KY`1zbT{0XcFhV-IH&YIcWXtPx)fR@MO&KU(d7x5Olk`vv zU4Mlf3EQQS5kj8NDregzVY@^X_HgoqYoW@O@segOmWeD!8Jj2{q`9^WZ7M*3?IJ_t zFO#LDK(Mn67i253k&~^3=ud!-24%8}zuwJduyzmv%T-LB3y`!zo()6lsS|uT@)?$_ zE=2q50d!iYgf=6VdAtm3k`Ojwxr&O7;(;(wWVd8bIhkKJTx8J_OKfF3Xh8sP$%B2; zi~MBK%1L@(6ZQQrErS$UpN9qoJ`lAMs~CARu!H&1OXINsRp=9LFi{qg{W4*{N=ikk zU4osCb+0JDK!e@!A%`Uk$c)){Fn#ds%7w8alLTX>%-Gc;7;aEt#+0$bAsfex6*9%F z%fb00gGae$>|_!809feMg5;Pb%X45I@D<0o8fC{UvyxT;yO&-%y@U@n*{eKdJ}_UL z1}efxc|98@3!ivSCh@-KYCh~7ObDzQ+8X2rz*e#`3pJ7{$AW+ed#1xG`SXxH%0;#d z3j=%REA->jR7y5W&k5wBakOjCXfdq9f&%PZEHY{u><^o*Z2G&gZx;j#pbQL}V5

wlNQLuuxs3>nM{_IovF-f7E@#+HEJe{q$A;z%ckRe zW=;SmNuEe^jFp*{5iw+e95Nb($I|`j0WwwTwX8!?7HkzLy*Sd7kS<||j0`I?o$Q!} zUN20PS5sl)22g@0V#eah23@kBp5nqXn3FAd^ZRem~rKb1PT+M|B~mybvnq;Kk?-UO7`;Qc`sm;mogvJMZn> zsG04y=4zE!x}&WTdd8ff2MB^eu8caAJf|3mb*}{iV&E`Wnkhr2R_j@ z$#vvy-41R>BvjQbTGIof4#dL1^LyO5bh*Afv~20pg~ve~v|j(%T(@=$ zy~lT|KS1RvP*8rU6|?~#`_`FjS92)zPR0^N`C!QkI@XuCF0fXvI7TYlyFQ>Hs2H_y zE2ZfZ*ZS93tCv?2$G&wRFlEW&_fzpn46G5W?KP`?tF2WZHG(d$Ev2~W60z7?G_MMP z_|^VZVx_fW4Sk>Q)GtJoZ!y*Ln%@GVZ&kudvBFxum0|(Gy!lKK3$6J}?|`Z!EE`y% ze5$MHXhja$}iAcVwXH>GvooitwEE-dxJbCoc0XwU%roC*e-^ocZs+KaVO^Exrv> z{8Imi)&~m@kQzJY;&<_$c-NYRcw&55e25^g_F({%mh+2faPT9~g};KgE*{D7b2shM+$AZA;$W?Temz@ol|ixw`N zzm}9TGp5g(Ls8FJ){OVNK}}d_scFmi_ChncWjd(#EQ-ucT_phsERgdT%r~$o`552V z_aOqJD3K**$anU^6Y$RuU|r@?{LJi_%>-?_oKy@F;;EQ_0+wbzDQ4DALA@+7T}-p4 zPHqH!(0k^)b5^idllIQ^>4<8ErcRkW=N3o<-VM!tckav`&?va+ooUmiPDS9;nzT%U zmN3_tBd6sM$G%0AiGWbyJL7l3TMEpPv**m#XUpldVGFiTn>=OmWP~sg0xSTTCN^7} z?Vn}MoJ%fY=)}ay@4WLi!k3fega!b#nV5O}Ol#Uw(n_X`e_I31d~4$Lo1hMuVNRbh zc?+%5$KxrS`4&QgZ+;+&gjvZ+85m^s&7&Bmm?$R5G27r)CQQ?(%1Nh)U;Ctqh+Iw( zkA|(i8P@%(}?B0*<*zDc$w zczN8|H^z=d^wJuglnoM8rU>Fh198r`$w3TNjT$><%xHuLUmr?j*zcrfSd@G$B`O$%7=}_HJs$+q@-b#Pfr+Q zj(%+>`D50;AtMmR6iMdrVWYc13XG;0C3~0A_h3bdiA9`P_1q# zBM{z0oX;9Qg%#QL!cdAPBCeNwofw=;icgY5mXaciUPL%jd&PYD`6;A`oq;%>e0e|V z(mU>@S6;z|81a&PZY8vdTl;(g4|0W34M$zC`oU1#X zh^^eITQI5Ez|>%;Mg8sIX=@cQyq$G*rvs>c z=mofl{Rwtn0^zh8lPu|#+PcI#9|Dj!c`OwYLaeP4+ZNc-+Ee$RqDUjSULcB&VHHiV zUZ5nR^?|Ip0-A*&B=#!UpHy4{?aSEF(cg<@*V9IY?!e}z_u}yg#yBWG- zE{iU}2;I+DJD^oh88$08rdQOCU%!Oq!gUV(+(}VHipkx=&xw@?K=Rg0HdCyPE*g{t z36Iq}M`BMGm6Tv3CW>T@t$xQzMm#mMio$I8x!@dGr;0v=V9Ux%_yP5lVC!Ol)CSU* z%IHQ02`=Q8aJTSVe3@N}s3J{LZMvEYu_v(#u_?kv4$~RP*Dvrs)iI&qxDCQUR7R4x zaY5263X5>W5Elc^!j{~JpUF1aJ&1Pt@Z)7NFeUejv3n{h#u-FNzed4G9H`5`3EE(F zp8|{5h}|J#j#hpV-5VQ(-wW$8ZESY%EAM@5!ecJ9g9w>r`2_{o#L1j`*bDrOyArh2 z1&FX&dD*S_@xCICSxf9}5VOP%=>}OAR6Vi^R9vtj2O&2rx3CYt58`uzkcS-&VwJfF z+1dCxzOb(VdKKn!YSt{t^XYXnaT61wk=DJ+}`BK6A2p z!(?7a&&tZwGp&rAJ^%*f(0M-0y_z<0ILTIGM#3W3$Xcx}BkC$h2B~atGjN z`d-L8nHdQErP4($E!%{8tVoVhfI^ixx^|4d--%F#V|>=xG-}0X9m~3hp9Mwxa6B^8 z%*sqZP6|}gt`7(GQ;TdGhKQpImFFMxACoCH_@>ioNh8C|%s582Oy1^59M0cg1m3nY z5;A1w9a@+66zWr_Abkw09;2+cX@l&mbl28=%OvDu_2L5Ya@`(Ml;~ z)6&fMGc?>ANI6H#5Xz;vqdaq=YO6^aIuZJ}78YAS=R z?!Ar+D6TIrcC%eMS9{XwH-qyj8nT-0XY1?FpmC0U4RpJKiPV{f?8$}G^#khV>23=2 zHL^8Bnua@)hVAS1gV83Skqybk8W=O`bz`@pdxaIgdKyG=+Ga!2M0HN6a)x!hmJP|( zI+XtpAPU({D}Rw{m>Br#BJ2i3Qg`}3ge)o82pZQn(J`>9nM_IDX&+h&q0s;7Iv*g{ z0I?8`#Ft#dxA__%57b#F@jI5xK0}pB^y4_XP9`N01!wHKlLi8m%CgiQk61Ulh5Z0J z5!CRPD_9GwN9GGw0Hy`&2%AJ77G>|MDx2`#msrL7YdN?mZ&o2A29s8P6$F^H(`;0( zkVW%VVJxgft#v{L6`Nttd{vY_f<>t{kV2zvDROiEDytl}4A>f3%`X^YZ8&&eS$2^= z^;jUu`7yf}(MZ_CQo8y~R%|d?()tgH0?iV~c`E7^JEwr^%vNEoD@wPL#7X zgGs@9HP8y5Ek^=LFSkk&`or;Rn0r~JOiE38C8CUM(qK{WGl$fAE6YVWf_No#pLq~W zHXZv1|Ldg`g2Rwutz+1V@v~J7 zvxaCO=0%gRE6e?SZXc^q&-5agH;h9cU^=P~c4k#E8$>d22yT{@mkq?s=`FE)N<@hd zQ9rthJQ!9Shw?GUYlVo%!5|irg~2&|gcwUHvdI?CY$_-&rm$RLNf6{g_#O8PAsq(J zC^8Gl!X<`LMG!X5=6MuRC=XG-sG)Wt(6xckRM2F3zJHrEE>`1V6C2H(oRHDX?BA7Vz4P%F&9aASy zWM%u5rQ;tROIJ*mPJ=b0oxy-ynSB+EV~iZk0!*;7avd4yo{B@;Fmw1BW>$p*&!;1} zN!BfwKJbZ9v%hu2ua0dCl5N9(v61FN5MzQmd(C_kf2Own^tu!9XYNvV8Ey)(+>lidF(u`mzhr_nM!iwPEwRG z6;Ea4{^6P-!WA&)aIwCfU z+8sSfx~ln&Aj4#oC?Tk727Rd{X9*_5RoOi#3Mst(_G^G8gj?Hbwh*Y;D1l(}&fAkF zjZ%oJcNGAU99PbiDFk@SMWoCnjcFBTJCnog2r%)jk>C*@zGQEDH-eanAPP-iAtp>5 zp&)lYBo>fCLZ%~P;s}|DawEwK((*1NNR1dCfQh389{e;sQh*pYE=fTS&tyfJ z1?hpcfTm!95ClO2;TE}mmePYXjY7Z#JO1Gcb7v7WrSF_js*$#&E(J^gd42Q{@bQP6<=UyN3oYzBlfZ>B zZ!qSy*I!kj+wV_{EG;X;QzFc0R3v$YL3T3&Syy=Z=`C=Uh9WJaU#IYg{j%a!`nGyT z^q}8D+(+rmpphd+zNA3=C#fe*!uTOTX~k$1LLvn6g|4fpG=W^q5x?ik1AC zQJ43QbCz;EmfO{86yhhqXv6G30cC_Q%jM(gnK)}O7K=$*ky^u1Cd3*t^pA|Yczfmq z7p2x*2_)FCp(tMRuMD#<;)Gx)ttUNUJQ^=EIdllZF}6ZpEFPm|GLD6^YQzquPF{ZH zIc9iu_h>aoT6tOsXr@#%0iytpO+Y=|XfGW@ZyrgFaAW0$5bmWH|MCpu`fg7h%c~(q zcSQNIRE2xt#Xm66Zp4v6977!4rq&iK;>`6`1dd*M5tU99u(1%Ky6Cz=JGCT`%CJ8_ z|9fV5bH@lZ2YP*&8OpHGT)~alq4deW0H}o3>B|-JNyXv+;hKSq}@158`Y*Fd9He|l6ou`IPskQ6eBMc409LRd9uztNI}Ah zXYGGxic^oBA^UPDq+3Adho=IK9(KiI!wY^Y&4u@u8vNjj|$){&WF;?ENF_rLr7FBxj( zy+S}S^cJhBX71?OOX30j?Qj1FV=ruZncr!ek9sr&bXgMcH@|rT$iA@s-YakL%<`F$_M_B&msiz3r8SWnYDv5T1j%@MlZA|?SzXQ*wo_g}h{|=}OH-Mvf<<850 zQO^g9L<`^J@FeJy+LQLv)8J>-rP7kdS&j*Vnp6ns6 zz&`Q#af100ETFVX zN>%Yh@bRDh^d~=ggft%hoCe@El1`6ZJtGD(&ptvv_Sg>r)t>97`M5Y&_%GFz?lQ~A z9)0u&fZqB|1nH{TsV2`e`YBjG`sj~;Owij;;l<>MJM$60?xa;>Aj<~uM?d;Lpj10w zN3CsDid;h=Pb(A!W^A!-+TeX26v{8~mgH7^XlJNujH7T!0YOGadx&5I*Pjh;&*FSt z>ZmA4RM6&8lu%Vp;)H@I;3V=xdHG(L(XO|JE;m<{mSTTKD3Yf!$|Diy4DUDyKl4Fl z5S+|Qje*Gaq7ttpjkYhb5VL_IqqyMhi1{1@J7K#P@kcL22$M^qqe-{Rn`;zGKJY>{PV$AZlE{31GRLXU_v##xjv zKv@^^sRDq8y(q;;QGo@ZP-fO)k3gJ{u}^Rb0EZQ!RDdrQW(G?N3VC0vCPyd@VZd0F zeG8jqnm=i-Afb%RLoou)wmigvH4g~2W)L6jhdP@Jl;G(pR;YB65lYYS!Vsjhk8>F# zXMUs;>bKx-??I1DWQl^s`S~~nq*ehJn!gxzD-S4ltrRaUJ#;KRmYHl*dv8)pvC`7T zF*E%@tjIGsmxP0y^o%v^ZBb^0;-p>>4y@Q!IXLl2n{5}P9Sfx$i$%#`5f~B36^HsU zX=Xo^bqmpAC5@(sLN)+}Qq%hJx=ev=;zaAIKqA#l+wV~xXIJVob8>iJPl_uPqO4w! z4&^Ip?@yd)awJYq4W*>UqGYfD2VY$f4q*Z$Qq0tS9)UQDbCZJNwB)#Pj&&lGo?^v< zWNU^4A&pQUCe`Q1p7~->b1(hWtjLSxRX7&g5<*N|7VuXq^ zDH=d~VmR41Im%8DQU~QfFV(ek4~7~9)$$%H{>2mR}ppt>geF3AGTsK)=hO9HqFNh<6`<@fs4$$ zVi?Iwbi|4zkv6z}epve)#;i*@MiF}>)z2EEkj$ab!9!j~X4o!GjX-c;91wBPJhamz zEEw*~PUSGZ9||i`+QHC)gI-+S*VviYvpgQkbwcjbjts$#Q8BsH)Y_-!6Mb+w7RklA5Qf0QO zJT`en#73i*Dc~1=^9Ff9KUM#}uD4{UAtA}#( zVj;4DBL7i@o@b?c)F5}LHU^+vyjV!E5?KPp14Zv(>ehm)LE z4V`Z&@J$k~gLC)*1cLc++cLJ=JHmgD+^ zXD(U>%ICnHh{b47xe7-XT(=E1 zbYSGI{T?g zG>Q0zqdcE$ydISqaO&t81=wLjiJp|u(UiS0C>hMh`3TZR@zGM?X7$6sT_Y~>yfEZo z>PM|uF4pCA$}A~4sK*0D=*ZDMv6AYKp}tdqSB@ZXppKX^m{8a*$Nec%#lW!zg7}VD zyFFa2a4j^i;{JA{=5h?D+Qh%A>vtt^8Gw4 zaIPd3m&}N+d_(PkfLpi21F?@p0OVhyf+3tT58XPFLWP`&fpQ)7{h@vPy@>x)jST*s z&%Zak`2XWO@L-(%okzqYy+8W#kE!T3mGc%)SU-Pk9qM2`7}xpjAN=qi9$}hWk^8YM zMfujl5C8BV6wRsD{qqK<-2LYF|DI?VCVqU63w=NKuRToa`A^^b9@7*t1mcgzhwq1= z>;LhghrS!7{rJ&GA706XzQ6mtotaTA6b7 zKYa6BL~=Mn&7Up*c>Vef0yTj&loZl8KN#2bH{bY%LV>x`=^^ou^KVs< zGw!O;b(+e3yq+{`dRmTx;1>6-xLbfEdgWmb-<14F#Y6UY(NG&2NE2+;@2_xVuz~R1n{g-~3kG_u_7Vud^cjOnBtOad#p=D|s{~ z?gw$VMI7d$Sg{^I!V>hV9{~Jn+>hdHp$pwk93%!=_dW1H++zTQ{0V~w-WNA35RP-p z@tP=+yBjrfb=1h!_sc!C_*->pD4)A$x7e+Lh-d?dEgqto^zoiu4%OHv_uur;LP;O* z-nDDD#zVzEYi~-(L$!!1cImryFqXUX@n`Ew3?;4GwR5LK6no{)8V}JTqS%S6ce{-} za%Z!LWP-EOsVx4@L`uMZClH&=bSWeW!C&Ojg7V%S%Q4fhSVhAsbei`WuiqK~-> zB7S)&i0076pKjiwF_rj|M)$>8i7zG%H-=k7W`2qiaXP35Psg`Y5LJ8nuS6aC<|nvg zNK}2BkH>x~PChYWxPQ3$(H0b-)0~k8<|1~2_E0jZGi36XPsAsNhxny9>r1avO9(S* z`Q}XyF&;9F?PACMaq>wT7aaP=hEGtlP7PI5(7yFok~id?O`qU4qQ-3FKlN=B+wYIF ze9tg(=#-5_)R~w7whzR~?$F!(ER*8lU@B;3CV4jVFT#F%loQ1#^r z_Qbfa#K|8g4<9gG4Ex&9ug1xrjTru&;a`n2^~Z4V;lB*Wd9=SDl05ud!yk+@AADv+ z((rE%|J%5b?~%WZ`0Ma*4F7su@V*~E|5yC~zZ?FIxM2LZp7`U?zm9lt#D9(p8V~&E zA3XZA$G`gcEZ=PYKB#J*o2Xp3&NVW{A-Vsg^OvmZXKvb0^}2P`>()`syVXMrC2yi) z-Wn58>HAQ(hiE2mbV_y;V{890_xnIxL&+QW?%A_f!$(KoFAh-MZuNyw`fS*{M}3jh z=PF7HC2!cX+x?aa{#-?g77+zLIU4>n<$*gfEb*CBUn8+k9;`jBogQ3IeS^ z$6vf}uOEZmKU+2V_51;}_uxaP`0D$O1I9u7P~2c=w3W=CI(Milm+rDQL8Nb8JbzI1 zeMA)pT`Gps*G}6N;P0ftWS3|rtM8r8RMN_~-#!o#8%o9}Z^upzpE1SU{aUFNT)Puj zk~REc`gi+yO6pTZ#85ImW!vy6q)%V~UlKKA`|E!V{8g^mL7yp*v|S0j82pj?S`x;Y zNh^1Jx(y#8{v5MjZjd#9J*8i&R%ERNTuKT<9ZL?~#Ht&lh zFS3PfBy3qI8lk0@6faU_%f_25k0qB9NNhtPX@enYXvk6$AT3L~#z zYmz4Y+I;yircIN!AtVqs<3*Zz@2S6YXKaS{ulDO7Ez9@KyXSY$z4z&!^Uk~PEROxh zyoxt@t=^2|3 zI$>QOE#t`Np}#VSGL)Nv-GRPFPEYDKJV{zi7X1}5GsOq_-$wnDy`MQd1T98XXev;Z zaYodVK8$M|`P{&uJg8(4NjY>9Ez$c&1A|0ObtT4Zob*EkHp}4pG=*QP5;y4E?aC@0iyH>XD=U_nbI+ z>g1{HQ}TTUl6hO7*XdKz-BUzeDx*|J^);8=r^|doE zTS%(vl0M|NoE6od>^uGB2t4M?&~lk6&f;~S*n8s9XMHbTnJ=ApOP9HxyW-ZiqYwVs zKoE}LC1`pdsG_p1oyQ+~VlX)MkJsi`&Sy#QaXo9%de82=A3pGhcjiFfcLm;?Lf*=9j?Wdy}b-{8gJ;_TTgHx1Jx1OrC$?<$w73o9}*j^;+Sz z9O-SYx(Zj_+T8sIfBeYvbyFpa1gXfAReAm@g1QTIqsFp7a~o=B@=x*KKb8)R9j=@W^A2 zKlSvpc;ucP$REg;-r|}gcVTJe?X6uscb$0f^p_re?5kh@`Zw~wQ6T+}D>iq*lCsLJ zP1|=LIC}i7!~#o*~~{ql(82tM)uxV;13SGH^nNO-{04x^%!tEokuK`zXdhNQa~<%$-Uh@B0%+A_o2hK!}9#KKW(AfVkwl| zW$oU>_ZXcgM8wad-3R4^$|36@a$I~kcYGTaSW7__F|xVy&&c=w>ZkV|&<+^z1_bWf zuRB^%X^o(4C)>q4lun~-dkDDXFAgAgp>02(pZ#G|tEjlfwzC~16O?K?OlLArAp`G1&w*hZTSefSW26(fvZLF2Iv34$? z#*RA&@le&F%&l=e*OT{lB7>oA58GmH^)$0)8F=1mwYBqhwq4$#weW3ntJ=oft&T1Y zw-vY6!y2r{Ce~y%x1`W1?lLB#?2;eGxnrk& z=PaEp(9r^~=V_i?faO-DO>MVh@z!=YfnF6S79T;m7@nZ&Hl~>zKV4A!^Zj^zvw`6T&o2;s8jspa?h1VD~ zB&$=wO$LTHsm-}9Y$Myms$@X;7OSQ<#YU}{JqjRZQ)3fvGT`LbtT)fW(~wS;*hYg6 zW{4}tz`T}YU>!Zw2EEbNWYKv9t0ca>idV~G=oH9auX%L*;Tm`YF9(hboJEHmN;L;J zF1RUmE-b^U_o$v(I$)r~kQ9`rssbqkDc35uD|K1*tcbpB zj+gQ>oGcNDZICf2fjBsso2xg|xCWddYm^#iEn8u&DrEw0tu1He1m}Qr!0pw_X8RVU z#@d480=7b&st{kk)*`;D;LA5DRjz8sW@C{!RUjBEl?95psoc*PAE#t2+h{=a=t;Ykb$;gY}_MWbqz-fI44?+LkTV>K%6CA&K<1~DUk_O4^oj9kt#db@PqlWYIH>3cccjUeMU?purl#%F=wrSZi7B9IRC|e%C>V+FrAb%fRLnpHQJyrTA|&>G zsxE<#5L57CLFZqGB2g6xEpEm{Z0@h(Q38#?w zq;1kJ2==HW#^P2io)fo%A$v#*>k&uP9ZSYz_*aYbpc8F2v=zjxKu`|4LN;9HMvWNi zJivJbFF$THIU@_AYCsKUwP=OhuZdXI^wVc9{ zFy5k6p!8sVC@%~xD&tbe;caeQ^=abTM180wAyvF&*a9{j5d}jv8wmY?$UAmw$hr_Uo^gRw=gA{!!`R%di%Bs8ZfVBGrP1;-1Z{C0wNhdN_o z&noB584;2D22yU>Ks8^F;rkYxwvZ&zn0Y}&4qlu=k}Q0O{&5WTM`WZtR1rU&ikU`a zw*3O_(wC^Z2<1>@GiLnn#WsDJa%@LU$7Z}6Pu7H z)VMyy5JgRqedZfe=N;!kxfDiu5NzHl8rQr)`*ikIe8t1Z!1L;Pe$jj>I$;L@dp>^Q z;-$Px$=5HvGCkv%Nd`lq2+D`JKvZKn6Bd2hGiRvgodFjwAvW$bXWNvCuXef9ETTYE zvL@_eysW7NTJawWy+BgbSoVZFo;!s-PnVDl)HTVPFyj;PthhxV?C@0(GN4BFSiuA~ z`$XIwhrB3ZO|&hYu&BZc@F?6tje67;TQ&i4P#_|7>5LUk&nZW%d32AkD(y$i13$!8NdD`gI}ad(H0;HWDTGm) z)`LB)$2!s@_ZWwJDdnH>{jUAU9?ka}-CaK=#0Q5C>xYd4Cq=@GbDfj{x;wdN_hCu~ zV7}IVNIPU4?EV^JUz1<%L~sk+!*&}Tr;x%q~jep168fCt_7#w04=> zA4jaF`R#`N`hK(PZ$UJF+1({~!4hnZqOW0{Y}}{qGxi>&$Yt|yc62HTHZZqS+63c+ z?j~few)S@OZlmj7%5G^6wWY%rj2(O5Ok^dDpEWn_(e@;}?(+M64xcv8#*DjnbnJpn zy3^RvK{?I&agLlm+waaZvtFp~#*}e-#EZ@Plo9)}k1Z;+3 z)(AV!ldShNHgx87njM`TtOLg2PPRkd&f8)6wZgh<;R0zO6roe4kf6>EtwRNhr%^EM z1j=e^B!(7+V3@l)b~$&cI~8DLaN?~pFsvCiqZku5WSw)TwnN#jV1h)2C5drhE@DP7 z9qSUe*CHGOwx0tKrA-Dzy~zOP7$C4RYpt4gd%M}z-lk!hd>brZF$T<~n-ExfTa>m$ zE8;5JS~c7Yy+sCyhT#e%mX`B>YuFeHfX@V(; zzAE~1L4zTlY(l1ck0>1f2XXlOqKM`z6f}-DC!vd82~EKOou)~czlsX)UNia@k!GjDa9cJeUlLmA)ks{0bvSj! z%rcF)!$dMCUwqB|ntn0)J(vP8%*;3?oYvu6WlV>{u=k8eG#YV5)UXweY4G^AB4IV` z3_0N|ZB514U4dM*+r~Bg=I09iPPFS|lx5Q)Sh{_%0>M&2n|W>&-{#Dbb0hfV z;!BLfIP}4uvlqWKA}H@pz~L;F zHplq8Y1-}StU2Z{qeMX+UfQlNQASi)dlQs1$Na}A@*PmkIohtt4^Ws7_Nh5R#nsKf ziJsTbQwd&d);Hp4$F>$CW6WKMVMfWB=~rmCCNEA1&n(!1V!yrz7`C{gV5V`7%imjI*hikXIKzjK&4P;Npl8siI##9Na7_v4>X@)Onl0OPLvs!B}J{ z2OEo$jW}gZLwf9SHyqvIVm1xKl6>pX>O!kI5ubp`DV)!8FwrS&`J4&7p~3r|QGq-% z72N`sYStoo@d5!bv1hs1)hsMbMFXbg#Nr7(rU{9$LK90TY(ivIgbEaj`ROTnfYB*r z3ic=_O>#xt@S+Ae!zb{VH4)2=xr8K<|GpIfh8E@&I9=dKJ|JY+zbud+O`?A^VvFeT z$R`=H`~atg+YTpx4Q;Z&0Fs5bIf#MkcSW-C+m;rg0*I^)@}fa99APsQ4!I#e#9c{C zhwjT4@`X1nwBp4Ko81}@*+RKOSX>aEoM0jlgy9%)g6f9Zlfq^NOn(6HKiLoO+BWr%| zCN3RKiovfD&x(o->&pN&D2|ml&ZUv8{w+Y$@uWZsXnA=FuyV`KMa=2VSkjxQ6hnI8 zuLSPjDK1v5IvsO*11AQ?2~~Mw?V7T(<$%tLHRJa1`s=BsAdEG2e=2d7b!|gAJ=aT1 zmI2Ee$#jHGNQPD-wQ5%V&PQ`3rlqS^ECM!-L=5eXwXo{9{|j>2GQ!IB0!H_n#{RHs zQ))RhHhgzx=_$QzNgdZlj3Ht-3LEYCSYd z!Yy1_m<{9{{jJ5mU{&v??>Kh3J@m(6 z8-AU}eKfLQ=+I>Hg@SFyD}F_`S896VHl|jBQ60K0Es=!13Hyu|%gWwLuYo{fZr~&{ zz~JkZvf)x7!HK4oT7XWVWs)%Shz%<>aVyxe_r)DfG14_)R!G83gm;=4h!A3N`PtcS zhl9ARlE{Wkb!SUHNKxq~iTQOkQ0YB1RnxeDhZ!|Fe6e*%^=Et2A82sYAQY|6+lH+jFCyS zqByotM0gckru^K;#*@B*4Bj87D&s~b5muaxN#R>RCMEkg-anJ$XES>XDshHG!nizU zWD*&O_>wP%g`?6~Vsz|GM&A@)niCP_Q6rOxlIrM5%c0ce@@V4RXeJrH2bj~1QQ;kM zE;;&q25%T?I6+B|ERW%k?wpZHr`~WDz>45!H7cKDBgu2$={o|i9`jfk{1R9O9Ur#O zDI-QES;ljZrVR%G5akgzoE*udGr=4m@xh!r%frTV8Qf{)8$@|T`#2Gw{e_1HI;Td~ zhmB`5I^*a@j0^Xw5q&r@IQ&eSSb}-qm{f|$2aTbpy}c?HwF*RdOyN$D`N%LQcv20b zMCQ=9$VbXNx`b8@0KV}H#_}K^FoqvJOvIi%>W#I)lL`IgL3to~cJTfla)LVAZ3od9 z6M5rr(3OG2*@36i^J;?=13v$Yt_r&Hv&P`Vf^T0HNbudjfOf%EKASi*@aO2K^;*T$ z4V`SDDCi`gF;9`lmf_tcQ@3+!nk1j)XN-X_^q`a3w?WF0lr%R5gyh*X4-p0Pbb@4z z43EH9Nf8{CGs%Hx{;&rQO=e%Elq!kJ0cMOO;IB&vwe9LnfSVIN$?XA)VV0M1M@N=GBWlvE52Oag0Ul3|`wtOId} zOP0`lLb+uOfUhGWfE2Du7#*IBP;VJzA`){V!rcj)aUg@5$y&y1*vF};6`k&zQc*a! zWb``qF%CBo9PekGbY}*C92|?WsXtK@W8<0R8$<;|%R+Sy^dI#qUbBx>3|=&8-lSRK z^aPQHdOVv!PZ6mvsYf=N{1}x9&592jf~t`6{D^f*H^OlVT=^(;8+7MKRMKrS-aJXM zLc@Uid6g{BiES*XE> zdub*8l~59-2IWx|L#N|%R}!MTCK;qN@y3y&wWeW4)w8_hc=>9-%|)&>_^eSLhIU#q}TBDs+gK_ZfHA3Kh~n z*LQSp^BPhi#dV3AjiqS~^8Yaqd?=@B24EmXB+EDD^l3fCy(Tv|uWc+^c#_x*x z*(<$`>r)+Y@sq=Mlo#2lH!hw#=vh}FE;h_xfBS(u>(&(57)ZF9KDB3S=>nau*3Vyi z?<@Co+_o}DOV* z&RPu{vTM`NPl>JQF-{l5K}g-el~((3sSjUS(T>fz(tZ(Es*E>5Xz{_^ZA z{^!1Oa-V1YLKlW~N~QCcFMlsM@c8Gw?NuxDY_jB%R?WXZ`|3<+@X6D^|Eb#5;Kk6m zV*b6vzq~jZ8hYwW_jKQ}VR5$3CAYL}{+-LOefRuC0L`D>-+b$eJO|Kjsp#69S3i93 zH^2DlU%haCGBW=B6OY_`;P-AVDR5%UEU5@Vp6oB*xiB*s@ee=!wJ+Sg@As-!6}mJI z`76S)&)YN8ld-UG__-&({J`-&O;sgDS$N6FkqQxg_f{z28yi0J^fw>rP@$-D_gNBKijDTUnCWlyuWJY%H>NJ6)nhd!H5U None: + context.get().emit(e) + logger.debug("%s", e) + + +def cut_query(self: GenotypedContig, cut_point: float) -> Tuple[GenotypedContig, GenotypedContig]: + """ Cuts query sequence in two parts with cut_point between them. """ + + cut_point = max(0.0, cut_point) + left = replace(self, name=None, seq=self.seq[:ceil(cut_point)]) + right = replace(self, name=None, seq=self.seq[ceil(cut_point):]) + return left, right + + +def cut_reference(self: AlignedContig, cut_point: float) -> Tuple[AlignedContig, AlignedContig]: + """ Cuts this alignment in two parts with cut_point between them. """ + + alignment_left, alignment_right = self.alignment.cut_reference(cut_point) + left = replace(self, name=None, alignment=alignment_left) + right = replace(self, name=None, alignment=alignment_right) + log(events.Cut(self, left, right, cut_point)) + return left, right + + +def lstrip(self: AlignedContig) -> AlignedContig: + """ + Trims the query sequence of the contig from its beginning up to the start of the + alignment. The CIGAR alignment is also updated to reflect the trimming. + """ + + alignment = self.alignment.lstrip_reference().lstrip_query() + q_remainder, query = cut_query(self, alignment.q_st - 0.5) + alignment = alignment.translate(0, -1 * alignment.q_st) + result = AlignedContig.make(query, alignment, self.strand) + log(events.LStrip(self, result)) + return result + + +def rstrip(self: AlignedContig) -> AlignedContig: + """ + Trims the query sequence of the contig from its end based on the end of the + alignment. The CIGAR alignment is also updated to reflect the trimming. + """ + + alignment = self.alignment.rstrip_reference().rstrip_query() + query, q_remainder = cut_query(self, alignment.q_ei + 0.5) + result = AlignedContig.make(query, alignment, self.strand) + log(events.RStrip(self, result)) + return result + + +def overlap(a: AlignedContig, b: AlignedContig) -> bool: + def intervals_overlap(x, y): + return x[0] <= y[1] and x[1] >= y[0] + + if a.group_ref != b.group_ref: + return False + + return intervals_overlap((a.alignment.r_st, a.alignment.r_ei), + (b.alignment.r_st, b.alignment.r_ei)) + + +def munge(self: AlignedContig, other: AlignedContig) -> AlignedContig: + """ + Combines two adjacent contigs into a single contig by joining their + query sequences and alignments. + """ + + match_fraction = min(self.match_fraction, other.match_fraction) + ref_name = max([self, other], key=lambda x: x.alignment.ref_length).ref_name + query = GenotypedContig(seq=self.seq + other.seq, + name=None, + ref_name=ref_name, + group_ref=self.group_ref, + ref_seq=self.ref_seq, + match_fraction=match_fraction) + + self_alignment = self.alignment + other_alignment = \ + other.alignment.translate( + query_delta=(-1 * other.alignment.q_st + self.alignment.q_ei + 1), + reference_delta=0) + alignment = self_alignment.connect(other_alignment) + + ret = AlignedContig.make(query=query, alignment=alignment, strand=self.strand) + log(events.Munge(self, other, ret)) + return ret + + +def sliding_window(sequence: Iterable[T]) -> Iterable[Tuple[Optional[T], T, Optional[T]]]: + """ + Generate a three-element sliding window of a sequence. + + Each element generated contains a tuple with the previous item (None if the first item), + the current item, and the next item (None if the last item) in the sequence. + """ + + a, b, c = tee(sequence, 3) + prevs = chain([None], a) + nexts = chain(islice(c, 1, None), [None]) + return zip(prevs, b, nexts) + + +def combine_contigs(parts: List[AlignedContig]) -> AlignedContig: + """ + Combine a list of contigs into a single AlignedContig by trimming and merging overlapping parts. + + Left-trimming and right-trimming occur at any shared overlapping points + between adjacent parts. munge() is used to combine contiguous parts without overlap. + """ + + stripped_parts = [] + for prev_part, part, next_part in sliding_window(parts): + if prev_part is not None: + part = lstrip(part) + if next_part is not None: + part = rstrip(part) + stripped_parts.append(part) + + ret = reduce(munge, stripped_parts) + log(events.Combine(stripped_parts, ret)) + return ret + + +def align_to_reference(contig: GenotypedContig) -> Iterable[GenotypedContig]: + """ + Align a single Contig to its reference sequence, producing potentially multiple aligned contigs. + + If the reference sequence (ref_seq) is unavailable, the contig is returned unaltered. + Otherwise, alignments are performed and contigs corresponding to each alignment are yielded. + """ + + if contig.ref_seq is None: + log(events.NoRef(contig)) + yield contig + return + + alignments, _algo = align_consensus(contig.ref_seq, contig.seq) + hits = [x.to_cigar_hit() for x in alignments] + strands: List[Literal["forward", "reverse"]] = ["forward" if x.strand == 1 else "reverse" for x in alignments] + + for i, (hit, strand) in enumerate(zip(hits, strands)): + log(events.InitialHit(contig, i, hit, strand)) + + if not hits: + log(events.ZeroHits(contig)) + yield contig + return + + if len(set(strands)) > 1: + log(events.StrandConflict(contig)) + yield contig + return + + strand = strands[0] + if strand == "reverse": + rc = str(Seq.Seq(contig.seq).reverse_complement()) + original_contig = contig + new_contig = replace(contig, seq=rc) + contig = new_contig + hits = [replace(hit, q_st=len(rc)-hit.q_ei-1, q_ei=len(rc)-hit.q_st-1) for hit in hits] + + log(events.ReverseComplement(original_contig, new_contig)) + for i, (hit, strand) in enumerate(zip(hits, strands)): + log(events.InitialHit(contig, i, hit, strand)) + + def quality(x: CigarHit): + mlen = sum(1 for x in x.cigar.relax().iterate_operations() + if x == CigarActions.MATCH) + return (mlen, x.ref_length) + + filtered = list(drop_overlapping_cigar_hits(hits, quality)) + connected = list(connect_nonoverlapping_cigar_hits(filtered)) + log(events.HitNumber(contig, list(zip(hits, strands)), connected)) + + for i, single_hit in enumerate(connected): + query = replace(contig, name=None) + part = AlignedContig.make(query, single_hit, strand) + log(events.ConnectedHit(contig, part, i)) + yield part + + +def strip_conflicting_mappings(contigs: Iterable[GenotypedContig]) -> Iterable[GenotypedContig]: + contigs = list(contigs) + names = {contig.id: contig for contig in contigs} + + def get_indexes(id: int) -> Tuple[int, int]: + contig = names[id] + if isinstance(contig, AlignedContig): + return contig.alignment.q_st, contig.alignment.r_st + else: + return -1, -1 + + reference_sorted = list(sorted(names.keys(), key=lambda id: get_indexes(id)[1])) + query_sorted = list(sorted(names.keys(), key=lambda id: get_indexes(id)[0])) + + def is_out_of_order(id: int) -> bool: + return reference_sorted.index(id) != query_sorted.index(id) + + sorted_by_query = sorted(contigs, key=lambda contig: get_indexes(contig.id)) + for prev_contig, contig, next_contig in sliding_window(sorted_by_query): + if isinstance(contig, AlignedContig): + original = contig + start = prev_contig.alignment.q_ei + 1 if isinstance(prev_contig, AlignedContig) else 0 + end = next_contig.alignment.q_st - 1 if isinstance(next_contig, AlignedContig) else len(contig.seq) - 1 + + if prev_contig is not None or is_out_of_order(original.id): + contig = lstrip(contig) + log(events.InitialStrip(original, start, original.alignment.q_st - 1)) + if next_contig is not None or is_out_of_order(original.id): + contig = rstrip(contig) + log(events.InitialStrip(original, original.alignment.q_ei + 1, end)) + + yield contig + + +def align_all_to_reference(contigs: Iterable[GenotypedContig]) -> Iterable[GenotypedContig]: + """ + Align multiple contigs to their respective reference sequences. + + Applies align_to_reference to each contig in the given collection, + flattening the result into a single list. + """ + + groups = map(align_to_reference, contigs) + groups = map(strip_conflicting_mappings, groups) + for group in groups: + yield from group + + +def align_queries(seq1: str, seq2: str) -> Tuple[str, str]: + """ + Globally align two query sequences against each other + and return the resulting aligned sequences in MSA format. + """ + + gap_open_penalty = 15 + gap_extend_penalty = 3 + use_terminal_gap_penalty = 1 + aseq1, aseq2, score = \ + align_it( + seq1, seq2, + gap_open_penalty, + gap_extend_penalty, + use_terminal_gap_penalty) + + return aseq1, aseq2 + + +def find_all_overlapping_contigs(self: AlignedContig, aligned_contigs): + """ + Yield all contigs from a collection that overlap with a given contig. + Contigs are considered overlapping if they have overlapping intervals on the same reference genome. + """ + + for other in aligned_contigs: + if overlap(self, other): + yield other + + +def find_overlapping_contig(self: AlignedContig, aligned_contigs): + """ + Find the single contig in a collection that overlaps the most with a given contig. + It returns the contig with the maximum overlapped reference length with the given contig (self). + """ + + every = find_all_overlapping_contigs(self, aligned_contigs) + return max(every, key=lambda other: other.alignment.ref_length if other else 0, default=None) + + +def calculate_concordance(left: str, right: str) -> List[Fraction]: + """ + Calculate concordance for two given sequences using a sliding average. + + The function compares the two strings character by character, simultaneously from + both left to right and right to left, calculating a score that represents a moving + average of matches at each position. If characters match at a given position, + a score of 1 is added; otherwise, a score of 0 is added. The score is then + averaged with the previous scores using a weighted sliding average where the + current score has a weight of 1/3 and the accumulated score has a weight of 2/3. + This sliding average score is halved and then processed again, but in reverse direction. + + :param left: string representing first sequence + :param right: string representing second sequence + :return: list representing concordance ratio for each position + """ + + if len(left) != len(right): + raise ValueError("Can only calculate concordance for same sized sequences") + + result: List[Fraction] = [Fraction(0)] * len(left) + + def slide(start, end): + scores_sum = Fraction(0) + inputs = list(zip(left, right)) + increment = 1 if start <= end else -1 + + for i in range(start, end, increment): + (a, b) = inputs[i] + current = Fraction(1) if a == b else Fraction(0) + scores_sum = (scores_sum * 2 / 3 + current * 1 / 3) + result[i] += scores_sum / 2 + + # Slide forward, then in reverse, adding the scores at each position. + slide(0, len(left)) + slide(len(left) - 1, -1) + + return result + + +def disambiguate_concordance(concordance: List[Fraction]) -> Iterable[Tuple[Fraction, int]]: + for i, x in enumerate(concordance): + global_rank = i if i < len(concordance) / 2 else len(concordance) - i - 1 + yield x, global_rank + + +def concordance_to_cut_points(left_overlap, right_overlap, aligned_left, aligned_right, concordance): + """ Determine optimal cut points for stitching based on sequence concordance in the overlap region. """ + + concordance_d = list(disambiguate_concordance(concordance)) + sorted_concordance_indexes = [i for i, v in sorted(enumerate(concordance_d), + key=itemgetter(1), + reverse=True, + )] + + def remove_dashes(s: str): + return s.replace('-', '') + + for max_concordance_index in sorted_concordance_indexes: + aligned_left_q_index = len(remove_dashes(aligned_left[:max_concordance_index])) + aligned_right_q_index = right_overlap.alignment.query_length - \ + len(remove_dashes(aligned_right[max_concordance_index:])) + 1 + aligned_left_r_index = left_overlap.alignment.coordinate_mapping.query_to_ref.left_max(aligned_left_q_index) + if aligned_left_r_index is None: + aligned_left_r_index = left_overlap.alignment.r_st - 1 + aligned_right_r_index = right_overlap.alignment.coordinate_mapping.query_to_ref.right_min(aligned_right_q_index) + if aligned_right_r_index is None: + aligned_right_r_index = right_overlap.alignment.r_ei + 1 + if aligned_right_r_index > aligned_left_r_index: + return aligned_left_r_index + 0.5, aligned_right_r_index - 0.5, max_concordance_index + + return left_overlap.alignment.r_st - 1 + 0.5, right_overlap.alignment.r_ei + 1 - 0.5, 0 + + +def stitch_2_contigs(left, right): + """ + Stitch two contigs together into a single coherent contig. + + The function handles the overlap by cutting both contigs into segments, aligning the + overlapping segments, and then choosing the optimal stitching points based on sequence + concordance. Non-overlapping segments are retained as is. + """ + + # Cut in 4 parts. + left_remainder, left_overlap = cut_reference(left, right.alignment.r_st - 0.5) + right_overlap, right_remainder = cut_reference(right, left.alignment.r_ei + 0.5) + left_overlap = lstrip(rstrip(left_overlap)) + right_overlap = lstrip(rstrip(right_overlap)) + left_remainder = rstrip(left_remainder) + right_remainder = lstrip(right_remainder) + log(events.StitchCut(left, right, left_overlap, right_overlap, left_remainder, right_remainder)) + + # Align overlapping parts, then recombine based on concordance. + aligned_left, aligned_right = align_queries(left_overlap.seq, right_overlap.seq) + concordance = calculate_concordance(aligned_left, aligned_right) + aligned_left_cutpoint, aligned_right_cutpoint, max_concordance_index = \ + concordance_to_cut_points(left_overlap, right_overlap, aligned_left, aligned_right, concordance) + left_overlap_take, left_overlap_drop = cut_reference(left_overlap, aligned_left_cutpoint) + right_overlap_drop, right_overlap_take = cut_reference(right_overlap, aligned_right_cutpoint) + + # Log it. + average_concordance = Fraction(sum(concordance) / (len(concordance) or 1)) + cut_point_location_scaled = max_concordance_index / (((len(concordance) or 1) - 1) or 1) + log(events.Overlap(left, right, left_overlap, right_overlap, + left_remainder, right_remainder, left_overlap_take, + right_overlap_take, concordance, average_concordance, + max_concordance_index, cut_point_location_scaled)) + + return combine_contigs([left_remainder, left_overlap_take, right_overlap_take, right_remainder]) + + +def combine_overlaps(contigs: List[AlignedContig]) -> Iterable[AlignedContig]: + """ + Repeatedly combine all overlapping aligned contigs into an iterable collection of contiguous AlignedContigs. + It proceeds by iterating through sorted contigs and stitching any overlapping ones until none are left. + """ + + # Going left-to-right through aligned contigs. + contigs = list(sorted(contigs, key=lambda x: x.alignment.r_st)) + while contigs: + current = contigs.pop(0) + + # Find overlap. If there isn't one - we are done with the current contig. + overlapping_contig = find_overlapping_contig(current, contigs) + if not overlapping_contig: + log(events.NoOverlap(current)) + yield current + continue + + # Replace two contigs by their stitched version, then loop with it. + new_contig = stitch_2_contigs(current, overlapping_contig) + contigs.remove(overlapping_contig) + contigs.insert(0, new_contig) + log(events.Stitch(current, overlapping_contig, new_contig)) + + +def merge_intervals(intervals: List[Tuple[int, int]]) -> List[Tuple[int, int]]: + """ + Merge overlapping and adjacent intervals. + Note that intervals are inclusive. + + :param intervals: A list of intervals [start, end] where 'start' and 'end' are integers. + :return: A list of merged intervals. + """ + + if not intervals: + return [] + + # Sort intervals by their starting values + sorted_intervals = sorted(intervals, key=lambda x: x[0]) + + merged_intervals = [sorted_intervals[0]] + for current in sorted_intervals[1:]: + current_start, current_end = current + last_start, last_end = merged_intervals[-1] + if current_start <= last_end + 1: + # Extend the last interval if there is an overlap or if they are adjacent + merged_intervals[-1] = (min(last_start, current_start), max(last_end, current_end)) + else: + # Add the current interval if there is no overlap + merged_intervals.append(current) + + return merged_intervals + + +def find_covered_contig(contigs: List[AlignedContig]) -> Tuple[Optional[AlignedContig], List[AlignedContig]]: + """ + Find and return the first contig that is completely covered by other contigs. + + :param contigs: List of all aligned contigs to be considered. + :return: An AlignedContig if there is one completely covered by others, None otherwise. + """ + + def calculate_cumulative_coverage(others) -> List[Tuple[int, int]]: + intervals = [(contig.alignment.r_st, contig.alignment.r_ei) for contig in others] + merged_intervals = merge_intervals(intervals) + return merged_intervals + + for current in contigs: + current_interval = (current.alignment.r_st, current.alignment.r_ei) + + # Create a map of cumulative coverage for contigs + overlaping_contigs = [x for x in contigs if x.id != current.id and overlap(current, x)] + cumulative_coverage = calculate_cumulative_coverage(overlaping_contigs) + + # Check if the current contig is covered by the cumulative coverage intervals + if any((cover_interval[0] <= current_interval[0] and cover_interval[1] >= current_interval[1]) + for cover_interval in cumulative_coverage): + return current, overlaping_contigs + + return None, [] + + +def drop_completely_covered(contigs: List[AlignedContig]) -> List[AlignedContig]: + """ Filter out all contigs that are contained within other contigs. """ + + contigs = contigs[:] + while contigs: + covered, covering = find_covered_contig(contigs) + if covered: + contigs.remove(covered) + log(events.Drop(covered, covering)) + else: + break + + return contigs + + +def split_contigs_with_gaps(contigs: List[AlignedContig]) -> List[AlignedContig]: + """ + Split contigs at large gaps if those gaps are covered by other contigs in the list. + + A gap within a contig is considered large based on a pre-defined threshold. If another contig aligns + within that gap's range, the contig is split into two around the midpoint of the gap. + """ + + def covered_by(gap, other): + # Check if any 1 reference coordinate in gap is mapped in `other`. + gap_coords = gap.coordinate_mapping.ref_to_query.domain + cover_coords = set(other.alignment.coordinate_mapping.ref_to_query.keys()) + return not gap_coords.isdisjoint(cover_coords) + + def covered(self, gap): + return any(covered_by(gap, other) for other in contigs if other != self) + + def significant(gap): + # noinspection PyLongLine + # The size of the gap is unavoidably, to some point, arbitrary. Here we tried to adjust it to common gaps in HIV, as HIV is the primary test subject in MiCall. A notable feature of HIV-1 reverse transcription is the appearance of periodic deletions of approximately 21 nucleotides. These deletions have been reported to occur in the HIV-1 genome and are thought to be influenced by the structure of the viral RNA. Specifically, the secondary structures and foldings of the RNA can lead to pause sites for the reverse transcriptase, resulting in staggered alignment when the enzyme slips. This misalignment can cause the reverse transcriptase to "jump," leading to deletions in the newly synthesized DNA. The unusually high frequency of about 21-nucleotide deletions is believed to correspond to the pitch of the RNA helix, which reflects the spatial arrangement of the RNA strands. The 21 nucleotide cycle is an average measure and is thought to be associated with the length of one turn of the RNA helix, meaning that when reverse transcriptase slips and reattaches, it often does so one helical turn away from the original site. # noqa: E501 + return gap.ref_length > 21 + + def try_split(self: AlignedContig): + for gap in self.alignment.deletions(): + if not significant(gap): + # Really we do not want to split on every little deletion + # because that would mean that we would need to stitch + # overlaps around them. + # And we are likely to lose quality with every stitching operation. + # By skipping we assert that this gap is aligner's fault. + log(events.IgnoreGap(self, gap)) + continue + + if covered(self, gap): + midpoint = gap.r_st + (gap.r_ei - gap.r_st) / 2 + self.alignment.epsilon + left_part, right_part = cut_reference(self, midpoint) + left_part = rstrip(left_part) + right_part = lstrip(right_part) + + contigs.remove(self) + contigs.append(left_part) + contigs.append(right_part) + process_queue.put(right_part) + log(events.SplitGap(self, gap, left_part, right_part)) + return + + process_queue: LifoQueue = LifoQueue() + for contig in contigs: + process_queue.put(contig) + + while not process_queue.empty(): + contig = process_queue.get() + try_split(contig) + + return contigs + + +def stitch_contigs(contigs: Iterable[GenotypedContig]) -> Iterable[GenotypedContig]: + contigs = list(contigs) + for contig in contigs: + log(events.Intro(contig)) + contig.register() + + maybe_aligned = list(align_all_to_reference(contigs)) + + # Contigs that did not align do not need any more processing + yield from (x for x in maybe_aligned if not isinstance(x, AlignedContig)) + aligned = [x for x in maybe_aligned if isinstance(x, AlignedContig)] + + aligned = split_contigs_with_gaps(aligned) + aligned = drop_completely_covered(aligned) + yield from combine_overlaps(aligned) + + +GroupRef = Optional[str] + + +def stitch_consensus(contigs: Iterable[GenotypedContig]) -> Iterable[GenotypedContig]: + contigs = list(stitch_contigs(contigs)) + consensus_parts: Dict[GroupRef, List[AlignedContig]] = defaultdict(list) + + for contig in contigs: + if isinstance(contig, AlignedContig): + consensus_parts[contig.group_ref].append(contig) + else: + yield contig + + def combine(group_ref): + ctgs = sorted(consensus_parts[group_ref], key=lambda x: x.alignment.r_st) + result = combine_contigs(ctgs) + log(events.FinalCombine(ctgs, result)) + return result + + yield from map(combine, consensus_parts) + + +def write_contigs(output_csv: TextIO, contigs: Iterable[GenotypedContig]): + writer = csv.DictWriter(output_csv, + ['ref', 'match', 'group_ref', 'contig'], + lineterminator=os.linesep) + writer.writeheader() + for contig in contigs: + writer.writerow(dict(ref=contig.ref_name, + match=contig.match_fraction, + group_ref=contig.group_ref, + contig=contig.seq)) + + output_csv.flush() + + +def read_contigs(input_csv: TextIO) -> Iterable[GenotypedContig]: + projects = ProjectConfig.loadDefault() + + for row in csv.DictReader(input_csv): + seq = row['contig'] + ref_name = row['ref'] + group_ref = row['group_ref'] + match_fraction = float(row['match']) + + try: + ref_seq = projects.getGenotypeReference(group_ref) + except KeyError: + try: + ref_seq = projects.getReference(group_ref) + except KeyError: + ref_seq = None + + yield GenotypedContig(name=None, + seq=seq, + ref_name=ref_name, + group_ref=group_ref, + ref_seq=str(ref_seq) if ref_seq is not None else None, + match_fraction=match_fraction) + + +def contig_stitcher(input_csv: TextIO, output_csv: TextIO, stitcher_plot_path: Optional[str]) -> int: + with StitcherContext.fresh() as ctx: + contigs = list(read_contigs(input_csv)) + + if output_csv is not None or stitcher_plot_path is not None: + contigs = list(stitch_consensus(contigs)) + + if output_csv is not None: + write_contigs(output_csv, contigs) + + if stitcher_plot_path is not None: + plot_stitcher_coverage(ctx.events, stitcher_plot_path) + + return len(contigs) + + +def main(argv: Sequence[str]): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('contigs', type=argparse.FileType('r'), help="Input CSV file with assembled contigs.") + parser.add_argument('stitched_contigs', type=argparse.FileType('w'), + help="Output CSV file with stitched contigs.") + parser.add_argument('--plot', type=argparse.FileType('w'), + help="Output SVG image visualizing the stitching process.") + verbosity_group = parser.add_mutually_exclusive_group() + verbosity_group.add_argument('--verbose', action='store_true', help='Increase output verbosity.') + verbosity_group.add_argument('--no-verbose', action='store_true', help='Normal output verbosity.', default=True) + verbosity_group.add_argument('--debug', action='store_true', help='Maximum output verbosity.') + verbosity_group.add_argument('--quiet', action='store_true', help='Minimize output verbosity.') + + args = parser.parse_args(argv) + + if args.quiet: + logger.setLevel(logging.ERROR) + elif args.verbose: + logger.setLevel(logging.INFO) + elif args.debug: + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.WARN) + + logging.basicConfig(level=logger.level) + + plot_path = args.plot.name if args.plot is not None else None + contig_stitcher(args.contigs, args.stitched_contigs, plot_path) + + +if __name__ == '__main__': + import sys + main(sys.argv[1:]) diff --git a/micall/core/coverage_plots.py b/micall/core/coverage_plots.py index 5773717f5..4d8c5fc74 100644 --- a/micall/core/coverage_plots.py +++ b/micall/core/coverage_plots.py @@ -223,7 +223,7 @@ def concordance_plot(concordance_csv, plot_path=None, filetype='png', concordanc if plot_path is None: plot_path, _ = os.path.split(concordance_csv.name) reader = DictReader(concordance_csv) - fig, ax = plt.subplots(figsize=(4, 3), dpi=100) + fig, ax = plt.subplots(figsize=(4, 3), dpi=100, layout='tight') paths = [] for (reference, region), group in itertools.groupby(reader, itemgetter('reference', 'region')): @@ -250,7 +250,6 @@ def concordance_plot(concordance_csv, plot_path=None, filetype='png', concordanc plt.ylim([0, 110]) plt.xlabel('Reference coordinates (AA)', fontsize=9) plt.ylabel('20-base window average', fontsize=9) - plt.tight_layout() figname_parts = ['concordance', reference, region, filetype] if concordance_prefix: figname_parts.insert(0, concordance_prefix) @@ -284,7 +283,7 @@ def make_tar_path(tar_path): def parse_args(): parser = argparse.ArgumentParser(description='Generate coverage plots from MiCall outputs.') - parser.add_argument('amino_csv', type=argparse.FileType('rU'), + parser.add_argument('amino_csv', type=argparse.FileType('r'), help=' CSV containing amino acid frequency outputs.') parser.add_argument('coverage_scores_csv', type=argparse.FileType('w'), help=' CSV coverage scores.') diff --git a/micall/core/denovo.py b/micall/core/denovo.py index 643f727cc..5e32f6acb 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -1,451 +1,90 @@ import argparse import logging import os -import typing -from collections import Counter -from csv import DictWriter, DictReader +from typing import Optional, TextIO, cast, BinaryIO +from csv import DictReader from datetime import datetime from glob import glob -from io import StringIO -from itertools import groupby -from operator import itemgetter -from shutil import rmtree -from subprocess import run, PIPE, CalledProcessError, STDOUT +from shutil import rmtree, copyfileobj +from subprocess import PIPE, CalledProcessError, STDOUT +import subprocess from tempfile import mkdtemp from Bio import SeqIO -from Bio.Blast.Applications import NcbiblastnCommandline from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord -from micall.core.project_config import ProjectConfig -from tempfile import mkstemp -from shutil import move, copymode -from os import fdopen, remove -from iva.assembly import Assembly -from pyfastaq.tasks import deinterleave -from micall.core.remap import remap, map_to_contigs - -HAPLOFLOW = "haploflow" IVA = "iva" -RAGTAG = "/home/charlotte/Documents/Git/MiCall/.venv/bin/RagTag/ragtag.py" -DEFAULT_DATABASE = os.path.join(os.path.dirname(__file__), - '..', - 'blast_db', - 'refs.fasta') logger = logging.getLogger(__name__) -def write_contig_refs(contigs_fasta_path, - contigs_csv, - merged_contigs_csv=None, - blast_csv=None): - """ Run BLAST search to identify contig sequences. - - :param str contigs_fasta_path: path to file to read contig sequences from - and append merged contigs to - :param contigs_csv: open file to write assembled contigs to - :param merged_contigs_csv: open file to read contigs that were merged from - amplicon reads - :param blast_csv: open file to write BLAST search results for each contig - """ - writer = DictWriter(contigs_csv, - ['ref', 'match', 'group_ref', 'contig'], - lineterminator=os.linesep) - writer.writeheader() - with open(contigs_fasta_path, 'a') as contigs_fasta: - if merged_contigs_csv is not None: - contig_reader = DictReader(merged_contigs_csv) - for i, row in enumerate(contig_reader, 1): - contig_name = f'merged-contig-{i}' - contigs_fasta.write(f">{contig_name}\n{row['contig']}\n") - group_refs = {} - genotypes = genotype(contigs_fasta_path, - blast_csv=blast_csv, - group_refs=group_refs) - genotype_count = 0 - for i, record in enumerate(SeqIO.parse(contigs_fasta_path, "fasta")): - (ref_name, match_fraction) = genotypes.get(record.name, ('unknown', 0)) - seq = record.seq - if match_fraction < 0: - seq = seq.reverse_complement() - match_fraction *= -1 - writer.writerow(dict(ref=ref_name, - match=match_fraction, - group_ref=group_refs.get(ref_name), - contig=seq)) - genotype_count += 1 - return genotype_count - - -def genotype(fasta, db=DEFAULT_DATABASE, blast_csv=None, group_refs=None): - """ Use Blastn to search for the genotype of a set of reference sequences. - - :param str fasta: file path of the FASTA file containing the query - sequences - :param str db: file path of the database to search for matches - :param blast_csv: open file to write the blast matches to, or None - :param dict group_refs: {contig_ref: group_ref} or None. The dictionary - will get filled in with the mapping from each contig's reference name - to the best matched reference for the whole seed group. - :return: {query_name: (ref_name, matched_fraction)} where query_name is a - sequence header from the query sequences FASTA file, ref_name is the - name of the best match from the database, and matched_fraction is the - fraction of the query that aligned against the reference (matches and - mismatches). - """ - contig_nums = {} # {contig_name: contig_num} - with open(fasta) as f: - for line in f: - if line.startswith('>'): - contig_name = line[1:-1] - contig_nums[contig_name] = len(contig_nums) + 1 - blast_columns = ['qaccver', - 'saccver', - 'pident', - 'score', - 'qcovhsp', - 'qstart', - 'qend', - 'sstart', - 'send'] - cline = NcbiblastnCommandline(query=fasta, - db=db, - outfmt=f'"10 {" ".join(blast_columns)}"', - evalue=0.0001, - gapopen=5, - gapextend=2, - penalty=-3, - reward=1, - max_target_seqs=5000) - stdout, _ = cline() - samples = {} # {query_name: (subject_name, matched_fraction)} - matches = sorted(DictReader(StringIO(stdout), blast_columns), - key=lambda row: (row['qaccver'], float(row['score']))) - if not blast_csv: - blast_writer = None - else: - blast_writer = DictWriter(blast_csv, - ['contig_num', - 'ref_name', - 'score', - 'match', - 'pident', - 'start', - 'end', - 'ref_start', - 'ref_end'], - lineterminator=os.linesep) - blast_writer.writeheader() - contig_top_matches = {match['qaccver']: match['saccver'] - for match in matches} - top_refs = set(contig_top_matches.values()) - projects = ProjectConfig.loadDefault() - match_scores = Counter() - for contig_name, contig_matches in groupby(matches, itemgetter('qaccver')): - contig_top_ref = contig_top_matches[contig_name] - contig_seed_group = projects.getSeedGroup(contig_top_ref) - for match in contig_matches: - ref_name = match['saccver'] - if ref_name not in top_refs: - continue - match_seed_group = projects.getSeedGroup(ref_name) - if match_seed_group == contig_seed_group: - match_scores[ref_name] += float(match['score']) - - if group_refs is not None: - group_top_refs = {projects.getSeedGroup(ref_name): ref_name - for ref_name, count in reversed(match_scores.most_common())} - for ref_name in contig_top_matches.values(): - group_refs[ref_name] = group_top_refs[projects.getSeedGroup(ref_name)] - - for match in matches: - matched_fraction = float(match['qcovhsp']) / 100 - if int(match['send']) < int(match['sstart']): - matched_fraction *= -1 - pident = round(float(match['pident'])) - contig_name = match['qaccver'] - samples[contig_name] = (match['saccver'], matched_fraction) - if blast_writer: - blast_writer.writerow(dict(contig_num=contig_nums[contig_name], - ref_name=match['saccver'], - score=match['score'], - match=matched_fraction, - pident=pident, - start=match['qstart'], - end=match['qend'], - ref_start=match['sstart'], - ref_end=match['send'])) - return samples - - -def separate_contigs(contigs_csv, ref_contigs_csv, noref_contigs_csv): - """ Separate contigs into those that mapped to or did not map to a reference. - :param contigs_csv: file with contigs, open in read mode - :param ref_contigs_csv: file for contigs that mapped to a reference, open in write mode - :param noref_contigs_csv: file for contigs that did not map to a reference, open in write mode - """ - threshold = 0.1 - # is a match threshold sufficient or do we need info from blast_csv as well? - fieldnames = ['ref', 'match', 'group_ref', 'contig'] - ref_contig_writer = DictWriter(ref_contigs_csv, fieldnames) - ref_contig_writer.writeheader() - noref_contig_writer = DictWriter(noref_contigs_csv, fieldnames) - noref_contig_writer.writeheader() - contig_reader = DictReader(contigs_csv) - num_total = 0 - num_match = 0 - for row in contig_reader: - num_total += 1 - if float(row['match']) > threshold: - ref_contig_writer.writerow(row) - num_match += 1 - else: - noref_contig_writer.writerow(row) - return num_total - num_match - - -def separate_reads(remap_csv, ref_reads_file, noref_reads_file, unmapped1, unmapped2): - """ Separate reads from remap.csv file into those that mapped to un unknown partial and the rest. - - :param remap_csv: remap output file created by map_to_contigs, open in read mode - :param ref_reads_file: file to write potentially useful reads (that mapped to useful contigs or that did not map) - :param noref_reads_file: file to write useless reads (that mapped to unknown contig) - :param unmapped1: fasta file 1 of reads that did not map - :param unmapped2: fasta file 2 of reads that did not map - """ - fieldnames = ['qname', 'flag', 'rname', 'pos', 'mapq', 'cigar', 'rnext', 'pnext', 'tlen', 'seq', 'qual'] - remap_reader = DictReader(remap_csv) - for row in remap_reader: - if row['rname'][-16:] == "-unknown-partial": - file_to_write = noref_reads_file - else: - file_to_write = ref_reads_file - file_to_write.write('@'+row['qname']+'\n') - file_to_write.write(row['seq']+'\n') - file_to_write.write('+\n') - file_to_write.write(row['qual']+'\n') - for line in unmapped1: - ref_reads_file.write(line) - for line in unmapped2: - ref_reads_file.write(line) +def count_fasta_sequences(file_path): + with open(file_path, 'r') as file: + return sum(1 for line in file if line.startswith('>')) def denovo(fastq1_path: str, fastq2_path: str, - contigs_csv: typing.TextIO, + fasta: TextIO, work_dir: str = '.', - merged_contigs_csv: typing.TextIO = None, - blast_csv: typing.TextIO = None, - haplo_args=None): + merged_contigs_csv: Optional[TextIO] = None, + ): """ Use de novo assembly to build contigs from reads. - :param fastq1_path: FASTQ file name for read 1 reads - :param fastq2_path: FASTQ file name for read 2 reads - :param contigs_csv: open file to write assembled contigs to + :param fastq1: FASTQ file for read 1 reads + :param fastq2: FASTQ file for read 2 reads + :param fasta: file to write assembled contigs to :param work_dir: path for writing temporary files :param merged_contigs_csv: open file to read contigs that were merged from amplicon reads - :param blast_csv: open file to write BLAST search results for each contig """ + old_tmp_dirs = glob(os.path.join(work_dir, 'assembly_*')) for old_tmp_dir in old_tmp_dirs: rmtree(old_tmp_dir, ignore_errors=True) tmp_dir = mkdtemp(dir=work_dir, prefix='assembly_') + start_time = datetime.now() start_dir = os.getcwd() joined_path = os.path.join(tmp_dir, 'joined.fastq') - run(['merge-mates', - fastq1_path, - fastq2_path, - '--interleave', - '-o', joined_path], - check=True) - - if haplo_args is None: - haplo_args = {'long': 0, - 'filter': 500, - 'thres': -1, - 'strict': 5, - 'error': 0.02, - 'kmer': 41, - 'merge': False, - 'scaffold': False, - 'patch': False, - 'ref': None, - 'RP': False, - 'IVA': False} - if not haplo_args['IVA']: - assembly_out_path = os.path.join(tmp_dir, 'haplo_out') - contigs_fasta_path = os.path.join(assembly_out_path, 'contigs.fa') - haplo_cmd = [HAPLOFLOW, - '--read-file', joined_path, - '--out', assembly_out_path, - '--k', str(haplo_args['kmer']), - '--error-rate', str(haplo_args['error']), - '--strict', str(haplo_args['strict']), - '--filter', str(haplo_args['filter']), - '--thres', str(haplo_args['thres']), - '--long', str(haplo_args['long'])] - try: - run(haplo_cmd, check=True, stdout=PIPE, stderr=STDOUT) - except CalledProcessError as ex: - output = ex.output and ex.output.decode('UTF8') - if output != 'Failed to make first seed. Cannot continue\n': - logger.warning('Haploflow failed to assemble.', exc_info=True) - logger.warning(output) - with open(contigs_fasta_path, 'a'): - pass - else: - assembly_out_path = os.path.join(tmp_dir, 'iva_out') - contigs_fasta_path = os.path.join(assembly_out_path, 'contigs.fasta') - iva_args = [IVA, '--fr', joined_path, '-t', '2'] - if merged_contigs_csv is not None: - seeds_fasta_path = os.path.join(tmp_dir, 'seeds.fasta') - with open(seeds_fasta_path, 'w') as seeds_fasta: - SeqIO.write((SeqRecord(Seq(row['contig']), f'seed-{i}', '', '') - for i, row in enumerate(DictReader(merged_contigs_csv))), - seeds_fasta, - 'fasta') - seeds_size = seeds_fasta.tell() - if seeds_size > 0: - iva_args.extend(['--contigs', seeds_fasta_path, '--make_new_seeds']) - iva_args.append(assembly_out_path) - try: - run(iva_args, check=True, stdout=PIPE, stderr=STDOUT) - except CalledProcessError as ex: - output = ex.output and ex.output.decode('UTF8') - if output != 'Failed to make first seed. Cannot continue\n': - logger.warning('iva failed to assemble.', exc_info=True) - logger.warning(output) - with open(contigs_fasta_path, 'a'): - pass - - if haplo_args['RP']: - contigs_firstpass = os.path.join(assembly_out_path, "contigs_firstpass.csv") - blast_firstpass = os.path.join(assembly_out_path, "blast_firstpass.csv") - ref_contigs = os.path.join(assembly_out_path, "ref_contigs.csv") - noref_contigs = os.path.join(assembly_out_path, "noref_contigs.csv") - with open(contigs_firstpass, 'w') as contigs_firstpass_csv, \ - open(blast_firstpass, 'w') as blast_firstpass_csv: - contig_count = write_contig_refs(contigs_fasta_path, - contigs_firstpass_csv, - blast_csv=blast_firstpass_csv) - with open(contigs_firstpass, 'r') as contigs_firstpass_csv, \ - open(ref_contigs, 'w') as ref_contigs_csv, \ - open(noref_contigs, 'w') as noref_contigs_csv: - num_noref = separate_contigs(contigs_firstpass_csv, ref_contigs_csv, noref_contigs_csv) - print(f"Assembled {contig_count} contigs in the first pass, of which {num_noref} did not map to a reference.") - unmapped1_path = os.path.join(assembly_out_path, 'firstpass_unmapped1.fastq') - unmapped2_path = os.path.join(assembly_out_path, 'firstpass_unmapped2.fastq') - remap_path = os.path.join(assembly_out_path, 'firstpass_remap.csv') - if num_noref: - with open(remap_path, 'w') as remap_csv, \ - open(os.path.join(assembly_out_path, 'firstpass_remap_counts.csv'), 'w') as counts_csv, \ - open(os.path.join(assembly_out_path, 'firstpass_remap_conseq.csv'), 'w') as conseq_csv, \ - open(unmapped1_path, 'w') as unmapped1, \ - open(unmapped2_path, 'w') as unmapped2, \ - open(contigs_firstpass, 'r') as contigs_firstpass_csv: - map_to_contigs(fastq1_path, - fastq2_path, - contigs_firstpass_csv, - remap_csv, - counts_csv, - conseq_csv, - unmapped1, - unmapped2, - assembly_out_path, ) - # we want to discard the reads that mapped to the contigs that did not blast to the refs - ref_reads_path = os.path.join(assembly_out_path, 'ref_reads.fasta') - noref_reads_path = os.path.join(assembly_out_path, 'noref_reads.fasta') - with open(remap_path, 'r') as remap_csv, \ - open(ref_reads_path, 'w') as ref_reads_file, \ - open(noref_reads_path, 'w') as noref_reads_file, \ - open(unmapped1_path, 'r') as unmapped1, \ - open(unmapped2_path, 'r') as unmapped2: - separate_reads(remap_csv, ref_reads_file, noref_reads_file, unmapped1, unmapped2) - assembly_out_path = os.path.join(tmp_dir, 'haplo_secondpass_out') - contigs_fasta_path = os.path.join(assembly_out_path, 'contigs.fa') - haplo_cmd = [HAPLOFLOW, - '--read-file', ref_reads_path, - '--out', assembly_out_path, - '--k', str(haplo_args['kmer']), - '--error-rate', str(haplo_args['error']), - '--strict', str(haplo_args['strict']), - '--filter', str(haplo_args['filter']), - '--thres', str(haplo_args['thres']), - '--long', str(haplo_args['long'])] - run(haplo_cmd, check=True, stdout=PIPE, stderr=STDOUT) - - if haplo_args['merge']: - fh, abs_path = mkstemp() - i = 0 - with fdopen(fh, 'w') as new_file: - with open(contigs_fasta_path) as old_file: - for line in old_file: - if line.startswith('>'): - new_file.write(f">contig{i}\n") - i += 1 - else: - new_file.write(line) - copymode(contigs_fasta_path, abs_path) - remove(contigs_fasta_path) - move(abs_path, contigs_fasta_path) - print(f"Number of contigs before trimming and joining: {i}") - - haplo_assembly = Assembly(contigs_file=contigs_fasta_path) - reads_prefix = os.path.join(tmp_dir, 'reads') - reads_1 = reads_prefix + '_1.fa' - reads_2 = reads_prefix + '_2.fa' - deinterleave(joined_path, reads_1, reads_2, fasta_out=True) - haplo_assembly._trim_strand_biased_ends(reads_prefix, tag_as_trimmed=True) - haplo_assembly._remove_contained_contigs(list(haplo_assembly.contigs.keys())) - haplo_assembly._merge_overlapping_contigs(list(haplo_assembly.contigs.keys())) - contigs_fasta_path = os.path.join(assembly_out_path, 'contigs_merged.fasta') - haplo_assembly.write_contigs_to_file(contigs_fasta_path) - - if haplo_args['scaffold']: - scaffolding_path = os.path.join(assembly_out_path, 'scaffolding') - scaffold_cmd = ['python3.8', - RAGTAG, - 'scaffold', - haplo_args['ref'], - contigs_fasta_path, - '-o', scaffolding_path, - '--aligner', 'nucmer', - '--nucmer-params', '--maxmatch -l 30 -c 20'] - run(scaffold_cmd, check=True, stdout=PIPE, stderr=STDOUT) - new_contigs_fasta_path = os.path.join(scaffolding_path, 'ragtag.scaffold.fasta') - if os.path.getsize(new_contigs_fasta_path) > 0: - print('Scaffolding was successful!') - contigs_fasta_path = new_contigs_fasta_path - else: - print('Scaffolding was not successful') - - if haplo_args['patch']: - patching_path = os.path.join(assembly_out_path, 'patching') - patch_cmd = ['python3.8', - RAGTAG, - 'patch', - contigs_fasta_path, - haplo_args['ref'], - '-o', patching_path, - '--nucmer-params', '--maxmatch -l 30 -c 20'] - run(patch_cmd, check=True, stdout=PIPE, stderr=STDOUT) - new_contigs_fasta_path = os.path.join(patching_path, 'ragtag.patch.fasta') - if os.path.getsize(new_contigs_fasta_path) > 0: - print('Patching was successful!') - contigs_fasta_path = new_contigs_fasta_path - else: - print('Patching was not successful') + subprocess.run(['merge-mates', + fastq1_path, + fastq2_path, + '--interleave', + '-o', joined_path], + check=True) + iva_out_path = os.path.join(tmp_dir, 'iva_out') + contigs_fasta_path = os.path.join(iva_out_path, 'contigs.fasta') + iva_args = [IVA, '--fr', joined_path, '-t', '2'] + if merged_contigs_csv is not None: + seeds_fasta_path = os.path.join(tmp_dir, 'seeds.fasta') + with open(seeds_fasta_path, 'w') as seeds_fasta: + SeqIO.write((SeqRecord(Seq(row['contig']), f'seed-{i}', '', '') + for i, row in enumerate(DictReader(merged_contigs_csv))), + seeds_fasta, + 'fasta') + seeds_size = seeds_fasta.tell() + if seeds_size > 0: + iva_args.extend(['--contigs', seeds_fasta_path, '--make_new_seeds']) + iva_args.append(iva_out_path) + try: + subprocess.run(iva_args, check=True, stdout=PIPE, stderr=STDOUT) + except CalledProcessError as ex: + output = ex.output and ex.output.decode('UTF8') + if output != 'Failed to make first seed. Cannot continue\n': + logger.warning('iva failed to assemble.', exc_info=True) + logger.warning(output) + with open(contigs_fasta_path, 'a'): + pass + + with open(contigs_fasta_path) as reader: + copyfileobj(cast(BinaryIO, reader), fasta) os.chdir(start_dir) duration = datetime.now() - start_time - contig_count = write_contig_refs(contigs_fasta_path, - contigs_csv, - blast_csv=blast_csv) + contig_count = count_fasta_sequences(contigs_fasta_path) logger.info('Assembled %d contigs in %s (%ds) on %s.', contig_count, duration, @@ -455,10 +94,24 @@ def denovo(fastq1_path: str, if __name__ == '__main__': logging.basicConfig(level=logging.INFO) - parser = argparse.ArgumentParser() - parser.add_argument('fastq1') - parser.add_argument('fastq2') - parser.add_argument('contigs', type=argparse.FileType('w')) + parser = argparse.ArgumentParser( + description="A script to perform de novo assembly of reads to build contigs." + ) + parser.add_argument( + 'fastq1', + type=argparse.FileType('r'), + help="Path to the FASTQ file containing read 1 of paired-end sequencing data." + ) + parser.add_argument( + 'fastq2', + type=argparse.FileType('r'), + help="Path to the FASTQ file containing read 2 of paired-end sequencing data." + ) + parser.add_argument( + 'fasta', + type=argparse.FileType('w'), + help="Path to the output FASTA file where assembled contigs will be written." + ) args = parser.parse_args() - denovo(args.fastq1, args.fastq2, args.contigs) + denovo(args.fastq1.name, args.fastq2.name, args.fasta) diff --git a/micall/core/filter_quality.py b/micall/core/filter_quality.py index 3bc080610..e4a4deb25 100755 --- a/micall/core/filter_quality.py +++ b/micall/core/filter_quality.py @@ -13,7 +13,7 @@ def parse_args(): description='Post-processing of short-read alignments.') parser.add_argument('quality_csv', - type=argparse.FileType('rU'), + type=argparse.FileType('r'), help='QC error rate data, grouped by tile') parser.add_argument('bad_cycles_csv', type=argparse.FileType('w'), diff --git a/micall/core/plot_contigs.py b/micall/core/plot_contigs.py index 35fee6ace..f0187de32 100644 --- a/micall/core/plot_contigs.py +++ b/micall/core/plot_contigs.py @@ -1,23 +1,45 @@ import typing +from typing import Dict, Tuple, List, Set, Iterable, NoReturn from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, FileType from collections import Counter, defaultdict from csv import DictReader from io import StringIO from itertools import groupby -from math import log10, copysign +from math import log10, copysign, floor from operator import itemgetter, attrgetter from pathlib import Path +import logging import yaml +from aligntools import CigarHit from genetracks import Figure, Track, Multitrack, Coverage # noinspection PyPep8Naming import drawsvg as draw -from genetracks.elements import Element +from genetracks.elements import Element, Label from matplotlib import cm, colors from matplotlib.colors import Normalize from micall.core.project_config import ProjectConfig from micall.utils.alignment_wrapper import align_nucs +from micall.utils.contig_stitcher_contigs import Contig, GenotypedContig, AlignedContig +from micall.utils.contig_stitcher_context import StitcherContext +import micall.utils.contig_stitcher_events as events +from micall.data.landmark_reader import LandmarkReader + + +logger = logging.getLogger(__name__) + + +class LeftLabel(Label): + """Like Label, but anchored to the left, instead of the middle. + """ + def draw(self, *args, **kwargs): + d = super().draw(*args, **kwargs) + assert len(d.children) == 1 + text = d.children[0] + text.args['text-anchor'] = 'left' + # text.args['fill'] = 'red' # works by the way + return d class SmoothCoverage(Coverage): @@ -153,17 +175,19 @@ def draw(self, x=0, y=0, xscale=1.0): group.append(draw.Line(line_start, arrow_y, arrow_start, arrow_y, stroke='black')) - group.append(draw.Circle(centre, h/2, r, fill='ivory', stroke='black')) + if self.label is not None: + group.append(draw.Circle(centre, h/2, r, fill='ivory', stroke='black')) group.append(draw.Lines(arrow_end, arrow_y, arrow_start, arrow_y + arrow_size/2, arrow_start, arrow_y - arrow_size/2, arrow_end, arrow_y, fill='black')) - group.append(draw.Text(self.label, - font_size, - centre, h / 2, - text_anchor='middle', - dy="0.35em")) + if self.label is not None: + group.append(draw.Text(self.label, + font_size, + centre, h / 2, + text_anchor='middle', + dy="0.35em")) return group @@ -376,6 +400,856 @@ def build_coverage_figure(genome_coverage_csv, blast_csv=None, use_concordance=F return f +def plot_stitcher_coverage(logs: Iterable[events.EventType], genome_coverage_svg_path: str): + with StitcherContext.stage(): + f = build_stitcher_figure(logs) + f.show(w=970).save_svg(genome_coverage_svg_path, context=draw.Context(invert_y=True)) + return f + + +def build_stitcher_figure(logs: Iterable[events.EventType]) -> Figure: + complete_contig_map: Dict[int, GenotypedContig] = {} + name_map: Dict[int, str] = {} + complete_parent_graph: Dict[int, List[int]] = {} + alive_set: Set[int] = set() + morphism_graph: Dict[int, List[int]] = {} + reduced_parent_graph: Dict[int, List[int]] = {} + transitive_parent_graph: Dict[int, List[int]] = {} + discarded: List[int] = [] + unknown: List[int] = [] + anomaly: List[int] = [] + unaligned_map: Dict[int, List[CigarHit]] = {} + overlaps_list: List[int] = [] + overlap_leftparent_map: Dict[int, int] = {} + overlap_rightparent_map: Dict[int, int] = {} + overlap_lefttake_map: Dict[int, int] = {} + overlap_righttake_map: Dict[int, int] = {} + overlap_left_sibling: Dict[int, int] = {} + overlap_right_sibling: Dict[int, int] = {} + combine_left_edge: Dict[int, int] = {} + combine_right_edge: Dict[int, int] = {} + children_join_points: List[int] = [] + query_position_map: Dict[int, Tuple[int, int]] = {} + lstrip_map: Dict[int, int] = {} + rstrip_map: Dict[int, int] = {} + + def remove_intermediate_edges(graph): + tr_cl = transitive_closure(graph) + ret = {} + for parent, children in graph.items(): + lst = [] + for child in children: + if all(other not in tr_cl.get(child, []) for other in children): + lst.append(child) + ret[parent] = lst + return ret + + def remove_transitive_edges(graph): + tr_cl = transitive_closure(graph) + ret = {} + for parent, children in graph.items(): + lst = [] + for child in children: + is_transitive = any(child in tr_cl.get(other_node, []) + for other_node in children + if other_node != child) + if not is_transitive: + lst.append(child) + ret[parent] = lst + return ret + + def remove_duplicate_edges(graph): + ret = {} + for parent, children in graph.items(): + lst = [] + for child in children: + if child not in lst: + lst.append(child) + ret[parent] = lst + return ret + + def get_transitive_children(recur, lst, graph, current): + for child in graph.get(current, []): + if child not in recur: + recur.add(child) + lst.append(child) + get_transitive_children(recur, lst, graph, child) + + def transitive_closure(graph): + ret = {} + for parent in graph: + children = [] + get_transitive_children(set(), children, graph, parent) + ret[parent] = children + return ret + + def copy_graph(graph): + ret = {} + for parent, children in graph.items(): + ret[parent] = children[:] + return ret + + def reflexive_closure(graph): + ret = copy_graph(graph) + for parent, children in ret.items(): + if parent not in children: + children.append(parent) + for child in children[:]: + if child not in ret: + ret[child] = [] + lst = ret[child] + if child not in lst: + ret[child].append(child) + return ret + + def inverse_graph(graph): + ret = {} + for parent, children in graph.items(): + for child in children: + if child not in ret: + ret[child] = [] + ret[child].append(parent) + return ret + + def graph_sum(graph_a, graph_b): + ret = copy_graph(graph_a) + for key, values in graph_b.items(): + if key not in ret: + ret[key] = [] + for value in values: + lst = ret[key] + if value not in lst: + lst.append(value) + return ret + + def symmetric_closure(graph): + return graph_sum(graph, inverse_graph(graph)) + + def record_contig(contig: GenotypedContig, parents: Iterable[GenotypedContig]): + complete_contig_map[contig.id] = contig + if [contig.id] != [parent.id for parent in parents]: + for parent in parents: + complete_contig_map[parent.id] = parent + if contig.id not in complete_parent_graph: + complete_parent_graph[contig.id] = [] + + complete_parent_graph[contig.id].append(parent.id) + + def record_alive(contig: Contig): + alive_set.add(contig.id) + + def record_bad_contig(contig: GenotypedContig, lst: List[int]): + complete_contig_map[contig.id] = contig + if contig.id not in lst: + lst.append(contig.id) + + def record_lstrip(result: AlignedContig, original: AlignedContig): + lstrip_map[result.id] = original.id + + def record_rstrip(result: AlignedContig, original: AlignedContig): + rstrip_map[result.id] = original.id + + def hit_to_insertions(contig: GenotypedContig, hit: CigarHit): + yield CigarHit.from_default_alignment(q_st=0, q_ei=hit.q_st - 1, r_st=hit.r_st, r_ei=hit.r_st - 1) + yield from hit.insertions() + yield CigarHit.from_default_alignment(q_st=hit.q_ei + 1, q_ei=len(contig.seq) - 1, + r_st=hit.r_ei + 1, r_ei=hit.r_ei) + + def hits_to_insertions(contig: GenotypedContig, hits: List[CigarHit]): + for hit in hits: + yield from hit_to_insertions(contig, hit) + + def record_initial_hit(contig: GenotypedContig, hits: List[CigarHit]): + insertions = [gap for gap in hits_to_insertions(contig, hits)] + unaligned_map[contig.id] = insertions + + for event in logs: + if isinstance(event, events.FinalCombine): + record_contig(event.result, event.contigs) + record_alive(event.result) + elif isinstance(event, events.SplitGap): + record_contig(event.left, [event.contig]) + record_contig(event.right, [event.contig]) + record_alive(event.left) + record_alive(event.right) + elif isinstance(event, events.Intro): + record_contig(event.contig, []) + record_alive(event.contig) + elif isinstance(event, events.ConnectedHit): + record_contig(event.part, [event.contig]) + record_alive(event.part) + elif isinstance(event, events.NoRef): + record_bad_contig(event.contig, unknown) + record_alive(event.contig) + elif isinstance(event, events.ZeroHits): + record_bad_contig(event.contig, anomaly) + record_alive(event.contig) + elif isinstance(event, events.StrandConflict): + record_bad_contig(event.contig, anomaly) + record_alive(event.contig) + elif isinstance(event, events.ReverseComplement): + record_contig(event.result, [event.contig]) + record_alive(event.result) + elif isinstance(event, events.HitNumber): + record_initial_hit(event.contig, event.connected) + record_alive(event.contig) + elif isinstance(event, events.Munge): + record_contig(event.result, [event.left, event.right]) + elif isinstance(event, events.LStrip): + record_contig(event.result, [event.original]) + record_lstrip(event.result, event.original) + elif isinstance(event, events.RStrip): + record_contig(event.result, [event.original]) + record_rstrip(event.result, event.original) + elif isinstance(event, events.InitialStrip): + pass + elif isinstance(event, events.Overlap): + overlaps_list.append(event.left_overlap.id) + overlaps_list.append(event.right_overlap.id) + overlap_leftparent_map[event.left_remainder.id] = event.left.id + overlap_rightparent_map[event.right_remainder.id] = event.right.id + overlap_lefttake_map[event.left_remainder.id] = event.left_take.id + overlap_righttake_map[event.right_remainder.id] = event.right_take.id + overlap_left_sibling[event.left_remainder.id] = event.right_remainder.id + overlap_right_sibling[event.right_remainder.id] = event.left_remainder.id + elif isinstance(event, events.Drop): + record_bad_contig(event.contig, discarded) + record_alive(event.contig) + elif isinstance(event, events.StitchCut): + record_contig(event.left_overlap, [event.left]) + record_contig(event.left_remainder, [event.left]) + record_contig(event.right_overlap, [event.right]) + record_contig(event.right_remainder, [event.right]) + elif isinstance(event, events.Stitch): + record_contig(event.result, [event.left, event.right]) + record_alive(event.result) + elif isinstance(event, events.NoOverlap): + record_alive(event.contig) + elif isinstance(event, events.Cut): + record_contig(event.left, [event.original]) + record_contig(event.right, [event.original]) + elif isinstance(event, events.Combine): + record_alive(event.result) + record_contig(event.result, event.contigs) + if event.contigs: + combine_left_edge[event.result.id] = event.contigs[0].id + combine_right_edge[event.result.id] = event.contigs[-1].id + elif isinstance(event, (events.IgnoreGap, events.InitialHit)): + pass + else: + _x: NoReturn = event + raise RuntimeError(f"Unrecognized action or event: {event}") + + notransitive_parent_graph = remove_transitive_edges(complete_parent_graph) + nodup_parent_graph = remove_duplicate_edges(notransitive_parent_graph) + + # Close alive set by parents + def extend_alive(contig_id): + alive_set.add(contig_id) + for parent_id in nodup_parent_graph.get(contig_id, []): + extend_alive(parent_id) + + for contig_id in alive_set.copy(): + extend_alive(contig_id) + + parent_graph: Dict[int, List[int]] = {} + for contig_id in nodup_parent_graph: + if contig_id in alive_set: + parent_graph[contig_id] = nodup_parent_graph[contig_id] + + contig_map: Dict[int, GenotypedContig] = {k: v for k, v in complete_contig_map.items() if k in alive_set} + bad_contigs = anomaly + discarded + unknown + group_refs = {contig.group_ref: len(contig.ref_seq) for contig in contig_map.values() if contig.ref_seq} + children_graph = inverse_graph(parent_graph) + transitive_parent_graph = transitive_closure(parent_graph) + transitive_children_graph = transitive_closure(children_graph) + reduced_parent_graph = remove_intermediate_edges(transitive_parent_graph) + sorted_roots = list(sorted(parent_id for + parent_id in contig_map + if parent_id not in parent_graph)) + sorted_sinks = list(sorted(child_id for + child_id in contig_map + if child_id not in children_graph)) + + lstrip_set = set(lstrip_map.keys()) + rstrip_set = set(rstrip_map.keys()) + + for contig_id, parents in parent_graph.items(): + if len(parents) == 1: + morphism_graph[parents[0]] = [contig_id] + + transitive_morphism_graph = transitive_closure(morphism_graph) + reduced_morphism_graph = remove_intermediate_edges(transitive_morphism_graph) + eqv_morphism_graph = reflexive_closure(symmetric_closure(transitive_morphism_graph)) + + for contig_id, parents in parent_graph.items(): + if len(parents) > 1: + children_join_points.append(contig_id) + + def set_query_position(contig_id: int) -> None: + contig = contig_map[contig_id] + children_ids = children_graph.get(contig.id, []) + + def copy_from_parent(contig: AlignedContig, parent_id: int) -> None: + if parent_id in query_position_map: + (original_q_st, original_q_ei) = query_position_map[parent_id] + (current_q_st, current_q_ei) = (contig.alignment.q_st, contig.alignment.q_ei) + current_query_len = abs(current_q_st - current_q_ei) + + if contig_id in lstrip_map: + query_position_map[contig.id] = (original_q_ei - current_query_len, original_q_ei) + elif contig_id in rstrip_map: + query_position_map[contig.id] = (original_q_st, original_q_st + current_query_len) + else: + query_position_map[contig_id] = query_position_map[parent_id] + + if contig_id not in query_position_map: + if isinstance(contig, AlignedContig): + regular_parents_ids = parent_graph.get(contig_id, []) + regular_parents_ids = [name for name in regular_parents_ids if name in query_position_map] + strip_parents_ids = lstrip_map.get(contig_id, None) or rstrip_map.get(contig_id, None) + parents_ids = (strip_parents_ids and [strip_parents_ids]) or regular_parents_ids + if parents_ids: + for parent_id in parents_ids: + copy_from_parent(contig, parent_id) + else: + query_position_map[contig_id] = (contig.alignment.q_st, contig.alignment.q_ei) + + for child_id in children_ids: + set_query_position(child_id) + + for contig_id in sorted_roots: + set_query_position(contig_id) + + def copy_takes_one_side(edge_table, overlap_xtake_map, overlap_xparent_map, overlap_xsibling, xstrip_set): + for parent in edge_table: + child_remainder = edge_table[parent] + for child_remainder_morph in eqv_morphism_graph.get(child_remainder, [child_remainder]): + for parent_morph in eqv_morphism_graph.get(parent, [parent]): + if child_remainder_morph in xstrip_set: + xstrip_set.add(parent_morph) + if parent_morph in xstrip_set: + xstrip_set.add(child_remainder_morph) + + if child_remainder_morph in overlap_xtake_map: + continue + for parent_remainder in overlap_xparent_map: + if overlap_xparent_map[parent_remainder] == parent_morph: + overlap_xtake_map[child_remainder_morph] = overlap_xtake_map[parent_remainder] + overlap_xsibling[child_remainder_morph] = overlap_xsibling[parent_remainder] + yield True + + # Closing `takes` by parents + while list(copy_takes_one_side(combine_right_edge, overlap_lefttake_map, + overlap_leftparent_map, overlap_left_sibling, rstrip_set)): + pass + while list(copy_takes_one_side(combine_left_edge, overlap_righttake_map, + overlap_rightparent_map, overlap_right_sibling, lstrip_set)): + pass + + final_nodes: List[int] = [] + final_parts: Dict[int, bool] = {} + final_children_mapping: Dict[int, List[int]] = {} + + def add_join_parents(join_id): + if join_id in children_join_points: + for contig_id in parent_graph.get(join_id, [join_id]): + add_join_parents(contig_id) + else: + final_nodes.append(join_id) + + for join_id in children_join_points + sorted_sinks: + add_join_parents(join_id) + + def is_ancestor(contig_id, other_ids): + for other in other_ids: + if other == contig_id: + continue + + if contig_id in transitive_children_graph.get(other, []): + return True + return False + + for contig_id in final_nodes[:]: + if is_ancestor(contig_id, final_nodes): + final_nodes.remove(contig_id) + + for contig_id in final_nodes: + if any(contig_id in eqv_morphism_graph.get(bad, []) for bad in bad_contigs): + continue + + if any(contig_id in eqv_morphism_graph.get(temp_id, [temp_id]) for temp_id in overlaps_list): + continue + + final_parts[contig_id] = True + + for contig_id in bad_contigs: + final_parts[contig_id] = True + + for parent_id in sorted_roots: + children = [] + for final_contig in final_parts: + if final_contig == parent_id or \ + parent_id in reduced_parent_graph.get(final_contig, [final_contig]): + children.append(final_contig) + + final_children_mapping[parent_id] = children + + aligned_size_map: Dict[int, Tuple[int, int]] = {} + full_size_map: Dict[int, Tuple[int, int]] = {} + + def get_neighbours(part, lookup): + for clone in eqv_morphism_graph.get(part.id, [part.id]): + maybe_id = lookup.get(clone, None) + if maybe_id is not None: + yield contig_map[maybe_id] + + def get_final_version(contig): + [name] = reduced_morphism_graph.get(contig.id, [contig.id]) + return contig_map[name] + + def get_neighbour(part, lookup): + if not part: + return None + lst = list(get_neighbours(part, lookup)) + ret = max(map(get_final_version, lst), key=lambda contig: contig.alignment.ref_length, default=None) + return ret + + def get_contig_coordinates(contig: GenotypedContig) -> Tuple[int, int, int, int]: + if isinstance(contig, AlignedContig) and contig.alignment.ref_length > 0: + r_st = contig.alignment.r_st + r_ei = contig.alignment.r_ei + if contig.id in aligned_size_map: + a_r_st, a_r_ei = aligned_size_map[contig.id] + else: + a_r_st = r_st + a_r_ei = r_ei + if contig.id in full_size_map: + f_r_st, f_r_ei = full_size_map[contig.id] + else: + f_r_st = r_st - contig.alignment.q_st + f_r_ei = r_ei + (len(contig.seq) - contig.alignment.q_ei) + else: + f_r_st = 0 + f_r_ei = len(contig.seq) + a_r_st = f_r_st + a_r_ei = f_r_ei + return (a_r_st, a_r_ei, f_r_st, f_r_ei) + + for parent_id in sorted_roots: + parts_ids = final_children_mapping[parent_id] + for part_id in parts_ids: + part = contig_map[part_id] + if not isinstance(part, AlignedContig): + continue + + prev_part = get_neighbour(part, overlap_righttake_map) + next_part = get_neighbour(part, overlap_lefttake_map) + + if prev_part is not None: + r_st = prev_part.alignment.r_st + elif part_id in lstrip_set: + r_st = part.alignment.r_st + else: + start_delta = -1 * part.alignment.q_st + r_st = part.alignment.r_st + start_delta + + if next_part is not None: + r_ei = next_part.alignment.r_ei + elif part_id in rstrip_set: + r_ei = part.alignment.r_ei + else: + end_delta = len(part.seq) - 1 - part.alignment.q_ei + r_ei = part.alignment.r_ei + end_delta + + aligned_size_map[part.id] = (r_st, r_ei) + + sibling_left_id = ([overlap_left_sibling[name] + for name in eqv_morphism_graph.get(part.id, [part.id]) + if name in overlap_left_sibling] or [0])[0] + sibling_left = sibling_left_id and contig_map[sibling_left_id] + sibling_right_id = ([overlap_right_sibling[name] + for name in eqv_morphism_graph.get(part.id, [part.id]) + if name in overlap_right_sibling] or [0])[0] + sibling_right = sibling_right_id and contig_map[sibling_right_id] + prev_part = get_neighbour(sibling_right, overlap_lefttake_map) + next_part = get_neighbour(sibling_left, overlap_righttake_map) + + if prev_part is not None: + r_st = prev_part.alignment.r_st + else: + r_st = part.alignment.r_st + + if next_part is not None: + r_ei = next_part.alignment.r_ei + else: + r_ei = part.alignment.r_ei + + full_size_map[part.id] = (r_st, r_ei) + + def carve_gap(gap: CigarHit, aligned_parts: Iterable[AlignedContig]): + for contig in aligned_parts: + (a_r_st, a_r_ei, f_r_st, f_r_ei) = get_contig_coordinates(contig) + other_coords = query_position_map.get(contig.id, (-1, -2)) + + other_q_st = min(other_coords) - max(0, abs(f_r_st - a_r_st)) + other_q_ei = max(other_coords) + max(0, abs(a_r_ei - f_r_ei)) + + if gap.q_st <= other_q_st and gap.q_ei >= other_q_st: + q_st = gap.q_st + q_ei = other_q_st - 1 + elif gap.q_ei >= other_q_ei and gap.q_ei <= other_q_ei: + q_st = other_q_ei + 1 + q_ei = gap.q_ei + elif gap.q_st >= other_q_st and gap.q_ei <= other_q_ei: + return None + else: + continue + + if q_st >= other_q_st and q_ei <= other_q_ei: + return None + + if q_st > q_ei: + return None + + gap = CigarHit.from_default_alignment(q_st=q_st, q_ei=q_ei, r_st=gap.r_st, r_ei=gap.r_ei) + + if gap.query_length > 0: + return gap + + def collect_gaps(root: int, children_ids: List[int]): + all_children = [contig_map[name] for name in children_ids] + children = [child for child in all_children if isinstance(child, AlignedContig)] + for name in unaligned_map: + if reduced_parent_graph.get(name, [name]) == [root]: + for gap in unaligned_map[name]: + carved = carve_gap(gap, children) + if carved is not None: + yield carved + + carved_unaligned_parts: Dict[int, List[int]] = {} + for root in sorted_roots: + existing: Set[Tuple[int, int]] = set() + children = final_children_mapping[root] + for gap in collect_gaps(root, children): + coords = (gap.q_st, gap.q_ei) + if coords not in existing: + existing.add(coords) + if root not in carved_unaligned_parts: + carved_unaligned_parts[root] = [] + fake = Contig(name=None, seq="") + carved_unaligned_parts[root].append(fake.id) + query_position_map[fake.id] = coords + + merged_unaligned_parts: Dict[int, List[int]] = {} + for root in sorted_roots: + children = final_children_mapping[root] + unaligned_children = carved_unaligned_parts.get(root, []) + todo = children + unaligned_children + todo = list(sorted(todo, key=lambda name: query_position_map.get(name, (-1, -1)))) + current_group = [] + for child_id in todo + [None]: + if child_id in unaligned_children: + coords = query_position_map[child_id] + current_group.append(coords) + elif current_group: + coords = (min(q_st for q_st, q_ei in current_group), + max(q_ei for q_st, q_ei in current_group)) + if root not in merged_unaligned_parts: + merged_unaligned_parts[root] = [] + fake = Contig(name=None, seq="") + query_position_map[fake.id] = coords + merged_unaligned_parts[root].append(fake.id) + current_group = [] + + name_map = {} + for i, root in enumerate(sorted_roots): + children = final_children_mapping[root] + unaligned_children = merged_unaligned_parts.get(root, []) + + name_map[root] = f"{i + 1}" + + todo_ids = children + unaligned_children + todo_ids = list(sorted(todo_ids, key=lambda name: query_position_map.get(name, (-1, -1)))) + for k, child_id in enumerate(todo_ids): + if len(todo_ids) > 1: + name_map[child_id] = f"{i + 1}.{k + 1}" + else: + name_map[child_id] = f"{i + 1}" + + for bad_id in bad_contigs: + if bad_id not in children: + if bad_id in transitive_parent_graph \ + and root in transitive_parent_graph[bad_id]: + k += 1 + name_map[bad_id] = f"{i + 1}.{k + 1}" + + for contig_id, name in name_map.items(): + if contig_id in complete_contig_map: + contig = complete_contig_map[contig_id] + logger.debug(f"Contig name {contig.unique_name} is displayed as {name!r}.") + + def get_tracks(parts: Iterable[GenotypedContig]) -> Iterable[Track]: + for part in parts: + name = name_map[part.id] + (a_r_st, a_r_ei, f_r_st, f_r_ei) = get_contig_coordinates(part) + + if a_r_st < f_r_st: + yield Track(min(a_r_st, f_r_st) + position_offset, + max(a_r_st, f_r_st) + position_offset, color="yellow") + + if a_r_ei > f_r_ei: + yield Track(min(a_r_ei, f_r_ei) + position_offset, + max(a_r_ei, f_r_ei) + position_offset, color="yellow") + + if isinstance(part, AlignedContig): + colour = 'lightgrey' + else: + colour = "yellow" + + yield Track(f_r_st + position_offset, f_r_ei + position_offset, label=f"{name}", color=colour) + + def get_arrows(parts: Iterable[GenotypedContig], labels: bool) -> Iterable[Arrow]: + for part in parts: + name = name_map[part.id] if labels else None + height = 20 if labels else 1 + elevation = 1 if labels else -20 + (a_r_st, a_r_ei, f_r_st, f_r_ei) = get_contig_coordinates(part) + + if isinstance(part, AlignedContig) and part.strand == "reverse": + tmp = a_r_st + a_r_st = a_r_ei + a_r_ei = tmp + + yield Arrow(a_r_st + position_offset, a_r_ei + position_offset, + elevation=elevation, + h=height, + label=name) + + def make_ray() -> Element: + screen_size = (max_position - min_position) + position_offset / 2 + single_size = 0.02 * screen_size + + def generate_beams(): + for i in range(floor(screen_size / single_size) + 1): + if i % 2 == 0: + yield Track(i * single_size + min_position + position_offset / 2, + (i + 1) * single_size + min_position + position_offset / 2, + h=0.1, color="green") + + return Multitrack(list(generate_beams())) + + def add_section(title: str) -> None: + label = LeftLabel(text=title, x=0, font_size=12) + pos = position_offset / 2 + figure.add(Arrow(pos, pos, h=0)) + figure.add(make_ray()) + figure.add(Arrow(pos, pos, h=0)) + figure.add(Track(pos, pos, label=label, h=0)) + + min_position = 0 + max_position = max(group_refs.values(), default=1) + for contig_id in final_parts: + contig = contig_map[contig_id] + if isinstance(contig, AlignedContig): + positions = get_contig_coordinates(contig) + max_position = max(max_position, max(positions)) + min_position = min(min_position, min(positions)) + else: + max_position = max(max_position, len(contig.seq)) + + position_offset = -1 * min_position + 0.05 * (max_position - min_position) + + ################ + # Drawing part # + ################ + + landmark_reader = LandmarkReader.load() + figure = Figure() + for group_ref in group_refs: + try: + if group_ref is not None: + landmarks = landmark_reader.get_landmarks(group_ref) + else: + landmarks = None + except ValueError: + landmarks = None + + ############# + # Landmarks # + ############# + + if landmarks: + # Filling out missing ends. + prev_landmark = None + for landmark in sorted(landmarks, key=itemgetter('start')): + landmark.setdefault('frame', 0) + if prev_landmark and 'end' not in prev_landmark: + prev_landmark['end'] = landmark['start'] - 1 + prev_landmark = landmark + + # Computing the stretching factor. + landmark_max = 0 + for landmark in landmarks: + landmark_max = max(landmark_max, landmark['end']) + + stretch_c = group_refs[group_ref] / landmark_max + + # Drawing the landmarks. + for frame, frame_landmarks in groupby(landmarks, itemgetter('frame')): + subtracks = [] + for landmark in frame_landmarks: + landmark_colour = landmark.get('colour') + if landmark_colour is None: + continue + subtracks.append(Track(landmark['start'] * stretch_c + position_offset, + landmark['end'] * stretch_c + position_offset, + label=landmark['name'], + color=landmark_colour)) + figure.add(Multitrack(subtracks)) + + ############# + # Reference # + ############# + + r_st = 0 + r_ei = group_refs[group_ref] + reference_tracks = [] + reference_min = r_st + position_offset + reference_max = r_ei + position_offset + reference_tracks.append(Track(r_st + position_offset, r_ei + position_offset, color="red")) + + for contig_id in final_parts: + contig = contig_map[contig_id] + if contig.group_ref != group_ref: + continue + + if not isinstance(contig, AlignedContig): + continue + + if contig_id in bad_contigs: + continue + + (a_r_st, a_r_ei, f_r_st, f_r_ei) = get_contig_coordinates(contig) + reference_tracks.append(Track(a_r_st + position_offset, a_r_ei + position_offset, color="yellow")) + reference_min = min(a_r_st + position_offset, reference_min) + reference_max = max(a_r_ei + position_offset, reference_max) + + for contig_id in final_parts: + contig = contig_map[contig_id] + if contig.group_ref != group_ref: + continue + + if not isinstance(contig, AlignedContig): + continue + + if contig_id in bad_contigs: + continue + + (a_r_st, a_r_ei, f_r_st, f_r_ei) = get_contig_coordinates(contig) + reference_tracks.append(Track(f_r_st + position_offset, f_r_ei + position_offset, color="lightgray")) + reference_min = min(f_r_st + position_offset, reference_min) + reference_max = max(f_r_ei + position_offset, reference_max) + + figure.add(Multitrack(reference_tracks)) + midpoint = round((reference_max - reference_min) / 2 + reference_min) + figure.add(Track(midpoint, midpoint, label=group_ref, color="transparent", h=-11.5)) + + ########## + # Arrows # + ########## + + ref_arrows: List[Arrow] = [] + for root in sorted_roots: + parts_ids = final_children_mapping[root] + parts_ids = [name for name in parts_ids if name not in bad_contigs] + parts = [contig_map[name] for name in parts_ids] + parts = [part for part in parts if part.group_ref == group_ref] + ref_arrows.extend(get_arrows(parts, labels=True)) + + if ref_arrows: + figure.add(ArrowGroup(ref_arrows)) + + ########### + # Contigs # + ########### + + for root in sorted_roots: + parts_ids = final_children_mapping[root] + parts_ids = [name for name in parts_ids if name not in bad_contigs] + parts = [contig_map[name] for name in parts_ids] + parts = [part for part in parts if part.group_ref == group_ref] + if parts: + figure.add(ArrowGroup(list(get_arrows(parts, labels=False)))) + figure.add(Multitrack(list(get_tracks(parts)))) + + ############# + # Discarded # + ############# + + def get_group_discards(group_ref): + for root in sorted_roots: + if contig_map[root].group_ref != group_ref: + continue + + parts_ids = final_children_mapping[root] + parts_ids = [id for id in parts_ids if id in discarded] + unaligned_parts = merged_unaligned_parts.get(root, []) + for id in sorted(parts_ids + unaligned_parts, + key=lambda x: name_map[x.id] if isinstance(x, Contig) else name_map[x]): + if id in unaligned_parts: + (q_st, q_ei) = query_position_map[id] + label = name_map[id] + yield Track(position_offset, position_offset + abs(q_ei - q_st), + label=label, color="yellow") + else: + part = contig_map[id] + yield Multitrack(list(get_tracks([part]))) + + disc = list(get_group_discards(group_ref)) + if disc: + add_section("discards:") + for element in disc: + figure.add(element) + + ############# + # Anomalies # + ############# + + def get_group_anomalies(group_ref): + for root in sorted_roots: + parts_ids = final_children_mapping[root] + parts_ids = [name for name in parts_ids if name in anomaly] + parts = [contig_map[name] for name in parts_ids] + parts = [part for part in parts if part.group_ref == group_ref] + for part in parts: + yield Multitrack(list(get_tracks([part]))) + + anom = list(get_group_anomalies(group_ref)) + if anom: + add_section("anomaly:") + for element in anom: + figure.add(element) + + ########### + # Unknown # + ########### + + if unknown: + add_section("unknown:") + for parent_id in sorted_roots: + parts_ids = final_children_mapping[parent_id] + parts_ids = [name for name in parts_ids if name in unknown] + parts = [contig_map[name] for name in parts_ids] + for part in parts: + figure.add(Multitrack(list(get_tracks([part])))) + + if not figure.elements: + figure.add(Track(0, max_position, label='.', color='none')) + figure.add(Track(0, max_position * 3 / 2, label='No contigs found.', color='none', h=-10)) + return figure + + def map_references(contig_ref_name: str, coordinates_name: str, projects: ProjectConfig) -> typing.Mapping[int, int]: diff --git a/micall/core/project_config.py b/micall/core/project_config.py index 2fea21b39..b336254f9 100644 --- a/micall/core/project_config.py +++ b/micall/core/project_config.py @@ -1,5 +1,6 @@ import json import os +from typing import Dict, List G2P_SEED_NAME = "HIV1-CON-XX-Consensus-seed" @@ -53,7 +54,7 @@ def writeSeedFasta(self, fasta_file, excluded_seeds=None): if excluded_seeds: seed_region_set.difference_update(excluded_seeds) seed_region_list = list(seed_region_set) - seed_name_map = {} # {sequence: name} + seed_name_map: Dict[str, str] = {} # {sequence: name} seed_region_list.sort() for name in seed_region_list: region = self.config['regions'][name] @@ -157,7 +158,7 @@ def getProjectRegions(self, seed_name, coordinate_name, excluded_projects=None): project_names = set(self.config['projects']) if excluded_projects is not None: project_names.difference_update(excluded_projects) - project_names = sorted(project_names) + project_names: List[str] = sorted(project_names) # type: ignore[no-redef] for project_name in project_names: project = self.config['projects'][project_name] for region in project['regions']: diff --git a/micall/core/remap.py b/micall/core/remap.py index 5940eda50..268689bc5 100644 --- a/micall/core/remap.py +++ b/micall/core/remap.py @@ -631,7 +631,7 @@ def remap(fastq1: str, stderr, callback) new_counts.update(split_counts) - with open(samfile, 'rU') as f: + with open(samfile, 'r') as f: for fields in splitter.walk(f): remap_writer.writerow(dict(zip(SAM_FIELDS, fields))) @@ -771,7 +771,7 @@ def map_to_contigs(fastq1, stderr, callback) new_counts.update(split_counts) - with open(samfile, 'rU') as f: + with open(samfile, 'r') as f: for fields in splitter.walk(f): write_remap_row(remap_writer, fields) @@ -1199,7 +1199,7 @@ def main(): parser.add_argument('fastq1', help=' FASTQ containing forward reads') parser.add_argument('fastq2', help=' FASTQ containing reverse reads') parser.add_argument('contigs_csv', - type=argparse.FileType('rU'), + type=argparse.FileType('r'), help=' CSV containing assembled contigs') parser.add_argument('remap_csv', type=argparse.FileType('w'), diff --git a/micall/core/sam2aln.py b/micall/core/sam2aln.py index 8312f79b3..81da1bdab 100755 --- a/micall/core/sam2aln.py +++ b/micall/core/sam2aln.py @@ -26,7 +26,7 @@ def parse_args(): parser = argparse.ArgumentParser( description='Conversion of SAM data into aligned format.') parser.add_argument('remap_csv', - type=argparse.FileType('rU'), + type=argparse.FileType('r'), help=' SAM output of bowtie2 in CSV format') parser.add_argument('aligned_csv', type=argparse.FileType('w'), diff --git a/micall/core/trim_fastqs.py b/micall/core/trim_fastqs.py index ff063b835..8e30f3b72 100755 --- a/micall/core/trim_fastqs.py +++ b/micall/core/trim_fastqs.py @@ -60,9 +60,9 @@ def trim(original_fastq_filenames: typing.Sequence[str], bad_cycles_filename: str, trimmed_fastq_filenames: typing.Sequence[str], use_gzip: bool = True, - summary_file: typing.TextIO = None, - skip: typing.Tuple[str] = (), - project_code: str = None): + summary_file: typing.Optional[typing.TextIO] = None, + skip: typing.Iterable[str] = (), + project_code: typing.Optional[str] = None): """ :param original_fastq_filenames: sequence of two filenames, containing diff --git a/micall/data/landmark_reader.py b/micall/data/landmark_reader.py index 0dbe456d5..9c01b6031 100644 --- a/micall/data/landmark_reader.py +++ b/micall/data/landmark_reader.py @@ -1,5 +1,5 @@ import re -import typing +from typing import Optional, TextIO, Dict from operator import itemgetter from pathlib import Path @@ -10,7 +10,7 @@ class LandmarkReader: @classmethod - def load(cls, f: typing.TextIO = None): + def load(cls, f: Optional[TextIO] = None): """ Load an instance of this class from an open JSON file. :param f: The file to load from, or None to load from the default. @@ -70,6 +70,13 @@ def get_coordinates(self, seed_name: str) -> str: return genotype_landmarks['coordinates'] raise ValueError(f'No landmarks match {seed_name!r}.') + def get_landmarks(self, seed_name: str) -> Dict[str, object]: + for genotype_landmarks in self.landmarks: + seed_pattern = genotype_landmarks['seed_pattern'] + if re.fullmatch(seed_pattern, seed_name): + return genotype_landmarks['landmarks'] + raise ValueError(f'No landmarks match {seed_name!r}.') + def get_region(self, ref_name, position): match_regions = [] matches = [entry diff --git a/test_samples_default.csv b/micall/data/test_samples_default.csv similarity index 100% rename from test_samples_default.csv rename to micall/data/test_samples_default.csv diff --git a/micall/drivers/sample.py b/micall/drivers/sample.py index 5ad9b27af..c63c3353c 100644 --- a/micall/drivers/sample.py +++ b/micall/drivers/sample.py @@ -9,6 +9,7 @@ from micall.core.aln2counts import aln2counts from micall.core.amplicon_finder import write_merge_lengths_plot, merge_for_entropy from micall.core.cascade_report import CascadeReport +from micall.core.contig_stitcher import contig_stitcher from micall.core.coverage_plots import coverage_plot, concordance_plot from micall.core.plot_contigs import plot_genome_coverage from micall.core.prelim_map import prelim_map @@ -19,11 +20,17 @@ from micall.core.denovo import denovo from micall.g2p.fastq_g2p import fastq_g2p, DEFAULT_MIN_COUNT, MIN_VALID, MIN_VALID_PERCENT from micall.utils.driver_utils import makedirs +from micall.utils.fasta_to_csv import fasta_to_csv from contextlib import contextmanager logger = logging.getLogger(__name__) +def prepend_prefix_to_basename(prefix: str, path: str): + dir_name, base_name = os.path.split(path) + return os.path.join(dir_name, prefix + base_name) + + @contextmanager def open_files(**files): """ Context manager that will open files and close them at the end. @@ -61,10 +68,10 @@ def open_files(**files): raise IOError -def exclude_extra_seeds(excluded_seeds: typing.Sequence[str], - project_code: str = None) -> typing.Sequence[str]: +def exclude_extra_seeds(excluded_seeds: typing.Iterable[str], + project_code: typing.Optional[str] = None) -> typing.Sequence[str]: if project_code == 'HIVGHA': - return excluded_seeds + return tuple(excluded_seeds) projects = ProjectConfig.loadDefault() hivgha_seeds = projects.getProjectSeeds('HIVGHA') extra_exclusions = {seed @@ -81,7 +88,7 @@ def __init__(self, rank=None, debug_remap=False, scratch_path=None, - skip: typing.Tuple[str] = (), + skip: typing.Iterable[str] = (), **paths): """ Record the details. @@ -98,13 +105,13 @@ def __init__(self, fastq1 = paths.get('fastq1') if 'fastq2' in paths: pass - elif 'fastq1' in paths: + elif fastq1: if '_R1_' not in fastq1: raise ValueError( "fastq2 not given, and fastq1 does not contain '_R1_'.") paths['fastq2'] = fastq1.replace('_R1_', '_R2_') if fastq1: - self.name = '_'.join(os.path.basename(fastq1).split('_')[:2]) + self.name: typing.Optional[str] = '_'.join(os.path.basename(fastq1).split('_')[:2]) else: self.name = None self.basespace_id = basespace_id @@ -158,11 +165,10 @@ def get_scratch_path(self): def process(self, pssm, - excluded_seeds=(), - excluded_projects=(), + excluded_seeds: typing.Iterable[str] = (), + excluded_projects: typing.Iterable[str] = (), force_gzip=False, - use_denovo=False, - haplo_args=None): + use_denovo=False): """ Process a single sample. :param pssm: the pssm library for running G2P analysis @@ -226,16 +232,36 @@ def process(self, merged_contigs_csv=merged_contigs_csv) if use_denovo: - self.run_denovo(excluded_seeds, haplo_args=haplo_args) + self.run_denovo(excluded_seeds) else: self.run_mapping(excluded_seeds) + self.process_post_assembly(prefix="", + use_denovo=use_denovo, + excluded_projects=excluded_projects) + + if use_denovo: + self.process_post_assembly(prefix="unstitched_", + use_denovo=use_denovo, + excluded_projects=excluded_projects) + + logger.info('Finished sample %s.', self) + + def process_post_assembly(self, + use_denovo: bool, + excluded_projects: typing.Iterable[str], + prefix: str, + ): + + def with_prefix(path): + return prepend_prefix_to_basename(prefix, path) + logger.info('Running sam2aln on %s.', self) - with open(self.remap_csv) as remap_csv, \ - open(self.aligned_csv, 'w') as aligned_csv, \ - open(self.conseq_ins_csv, 'w') as conseq_ins_csv, \ - open(self.failed_csv, 'w') as failed_csv, \ - open(self.clipping_csv, 'w') as clipping_csv: + with open(with_prefix(self.remap_csv)) as remap_csv, \ + open(with_prefix(self.aligned_csv), 'w') as aligned_csv, \ + open(with_prefix(self.conseq_ins_csv), 'w') as conseq_ins_csv, \ + open(with_prefix(self.failed_csv), 'w') as failed_csv, \ + open(with_prefix(self.clipping_csv), 'w') as clipping_csv: sam2aln(remap_csv, aligned_csv, @@ -244,32 +270,35 @@ def process(self, clipping_csv=clipping_csv) logger.info('Running aln2counts on %s.', self) - with open_files(aligned_csv=(self.aligned_csv, 'r'), + with open_files(aligned_csv=(with_prefix(self.aligned_csv), 'r'), + + # Does not need a prefix because it is produced before the denovo/remap split. g2p_aligned_csv=(self.g2p_aligned_csv, 'r'), - clipping_csv=(self.clipping_csv, 'r'), - nuc_csv=(self.nuc_csv, 'w'), - conseq_ins_csv=(self.conseq_ins_csv, 'r'), - remap_conseq_csv=(self.remap_conseq_csv, 'r'), - contigs_csv=(self.contigs_csv, 'r') if use_denovo else None, - nuc_detail_csv=(self.nuc_details_csv, 'w') if use_denovo else None, - amino_csv=(self.amino_csv, 'w'), - amino_detail_csv=(self.amino_details_csv, 'w') if use_denovo else None, - insertions_csv=(self.insertions_csv, 'w'), - conseq_csv=(self.conseq_csv, 'w'), - conseq_region_csv=(self.conseq_region_csv, 'w') if use_denovo else None, - failed_align_csv=(self.failed_align_csv, 'w'), - coverage_summary_csv=(self.coverage_summary_csv, 'w'), - genome_coverage_csv=(self.genome_coverage_csv, 'w'), - conseq_all_csv=(self.conseq_all_csv, 'w'), - conseq_stitched_csv=(self.conseq_stitched_csv, 'w') if use_denovo else None, - minimap_hits_csv=(self.minimap_hits_csv, 'w'), - alignments_csv=(self.alignments_csv, 'w'), - alignments_unmerged_csv=(self.alignments_unmerged_csv, 'w'), - alignments_intermediate_csv=(self.alignments_intermediate_csv, 'w'), - alignments_overall_csv=(self.alignments_overall_csv, 'w'), - concordance_csv=(self.concordance_csv, 'w'), - concordance_detailed_csv=(self.concordance_detailed_csv, 'w'), - concordance_seed_csv=(self.concordance_seed_csv, 'w')) as opened_files: + + clipping_csv=(with_prefix(self.clipping_csv), 'r'), + nuc_csv=(with_prefix(self.nuc_csv), 'w'), + conseq_ins_csv=(with_prefix(self.conseq_ins_csv), 'r'), + remap_conseq_csv=(with_prefix(self.remap_conseq_csv), 'r'), + contigs_csv=(with_prefix(self.contigs_csv), 'r') if use_denovo else None, + nuc_detail_csv=(with_prefix(self.nuc_details_csv), 'w') if use_denovo else None, + amino_csv=(with_prefix(self.amino_csv), 'w'), + amino_detail_csv=(with_prefix(self.amino_details_csv), 'w') if use_denovo else None, + insertions_csv=(with_prefix(self.insertions_csv), 'w'), + conseq_csv=(with_prefix(self.conseq_csv), 'w'), + conseq_region_csv=(with_prefix(self.conseq_region_csv), 'w') if use_denovo else None, + failed_align_csv=(with_prefix(self.failed_align_csv), 'w'), + coverage_summary_csv=(with_prefix(self.coverage_summary_csv), 'w'), + genome_coverage_csv=(with_prefix(self.genome_coverage_csv), 'w'), + conseq_all_csv=(with_prefix(self.conseq_all_csv), 'w'), + conseq_stitched_csv=(with_prefix(self.conseq_stitched_csv), 'w') if use_denovo else None, + minimap_hits_csv=(with_prefix(self.minimap_hits_csv), 'w'), + alignments_csv=(with_prefix(self.alignments_csv), 'w'), + alignments_unmerged_csv=(with_prefix(self.alignments_unmerged_csv), 'w'), + alignments_intermediate_csv=(with_prefix(self.alignments_intermediate_csv), 'w'), + alignments_overall_csv=(with_prefix(self.alignments_overall_csv), 'w'), + concordance_csv=(with_prefix(self.concordance_csv), 'w'), + concordance_detailed_csv=(with_prefix(self.concordance_detailed_csv), 'w'), + concordance_seed_csv=(with_prefix(self.concordance_seed_csv), 'w')) as opened_files: aln2counts(opened_files['aligned_csv'], opened_files['nuc_csv'], @@ -299,46 +328,47 @@ def process(self, concordance_seed_csv=opened_files['concordance_seed_csv']) logger.info('Running coverage_plots on %s.', self) - os.makedirs(self.coverage_maps) - with open(self.amino_csv) as amino_csv, \ - open(self.coverage_scores_csv, 'w') as coverage_scores_csv: + os.makedirs(with_prefix(self.coverage_maps)) + with open(with_prefix(self.amino_csv)) as amino_csv, \ + open(with_prefix(self.coverage_scores_csv), 'w') as coverage_scores_csv: coverage_plot(amino_csv, coverage_scores_csv, coverage_maps_path=self.coverage_maps, coverage_maps_prefix=self.name, excluded_projects=excluded_projects) - with open(self.genome_coverage_csv) as genome_coverage_csv, \ - open(self.minimap_hits_csv) as minimap_hits_csv: + with open(with_prefix(self.genome_coverage_csv)) as genome_coverage_csv, \ + open(with_prefix(self.minimap_hits_csv)) as minimap_hits_csv: if not use_denovo: minimap_hits_csv = None plot_genome_coverage(genome_coverage_csv, minimap_hits_csv, - self.genome_coverage_svg) + with_prefix(self.genome_coverage_svg)) - with open(self.genome_coverage_csv) as genome_coverage_csv, \ - open(self.minimap_hits_csv) as minimap_hits_csv: + with open(with_prefix(self.genome_coverage_csv)) as genome_coverage_csv, \ + open(with_prefix(self.minimap_hits_csv)) as minimap_hits_csv: if not use_denovo: minimap_hits_csv = None plot_genome_coverage(genome_coverage_csv, minimap_hits_csv, - self.genome_concordance_svg, + with_prefix(self.genome_concordance_svg), use_concordance=True) - with open(self.concordance_detailed_csv) as concordance_detailed_csv: - concordance_plot(concordance_detailed_csv, plot_path=self.coverage_maps, concordance_prefix=self.name) + with open(with_prefix(self.concordance_detailed_csv)) as concordance_detailed_csv: + concordance_plot(concordance_detailed_csv, + plot_path=with_prefix(self.coverage_maps), + concordance_prefix=self.name) logger.info('Running cascade_report on %s.', self) with open(self.g2p_summary_csv) as g2p_summary_csv, \ - open(self.remap_counts_csv) as remap_counts_csv, \ - open(self.aligned_csv) as aligned_csv, \ - open(self.cascade_csv, 'w') as cascade_csv: + open(with_prefix(self.remap_counts_csv)) as remap_counts_csv, \ + open(with_prefix(self.aligned_csv)) as aligned_csv, \ + open(with_prefix(self.cascade_csv), 'w') as cascade_csv: cascade_report = CascadeReport(cascade_csv) cascade_report.g2p_summary_csv = g2p_summary_csv cascade_report.remap_counts_csv = remap_counts_csv cascade_report.aligned_csv = aligned_csv cascade_report.generate() - logger.info('Finished sample %s.', self) def load_sample_info(self): path = Path(self.sample_info_csv) @@ -383,28 +413,42 @@ def run_mapping(self, excluded_seeds): scratch_path, debug_file_prefix=debug_file_prefix) - def run_denovo(self, excluded_seeds, haplo_args=None): + def run_denovo(self, excluded_seeds): logger.info('Running de novo assembly on %s.', self) scratch_path = self.get_scratch_path() - with open(self.merged_contigs_csv) as merged_contigs_csv, \ - open(self.contigs_csv, 'w') as contigs_csv, \ - open(self.blast_csv, 'w') as blast_csv: + + with open(self.unstitched_contigs_fasta, 'w') as unstitched_contigs_fasta, \ + open(self.merged_contigs_csv, 'r') as merged_contigs_csv: denovo(self.trimmed1_fastq, self.trimmed2_fastq, - contigs_csv, + unstitched_contigs_fasta, self.scratch_path, merged_contigs_csv, - blast_csv=blast_csv, - haplo_args=haplo_args) + ) + + with open(self.unstitched_contigs_csv, 'w') as unstitched_contigs_csv, \ + open(self.merged_contigs_csv, 'r') as merged_contigs_csv, \ + open(self.blast_csv, 'w') as blast_csv: + fasta_to_csv(self.unstitched_contigs_fasta, + unstitched_contigs_csv, + merged_contigs_csv, + blast_csv=blast_csv, + ) + + with open(self.unstitched_contigs_csv, 'r') as unstitched_contigs_csv, \ + open(self.contigs_csv, 'w') as contigs_csv: + contig_stitcher(unstitched_contigs_csv, contigs_csv, self.stitcher_plot_svg) + logger.info('Running remap on %s.', self) if self.debug_remap: debug_file_prefix = os.path.join(scratch_path, 'debug') else: debug_file_prefix = None + with open(self.contigs_csv) as contigs_csv, \ open(self.remap_csv, 'w') as remap_csv, \ open(self.remap_counts_csv, 'w') as counts_csv, \ - open(self.remap_conseq_csv, 'w') as conseq_csv, \ + open(self.remap_conseq_csv, 'w') as remap_conseq_csv, \ open(self.unmapped1_fastq, 'w') as unmapped1, \ open(self.unmapped2_fastq, 'w') as unmapped2: @@ -413,9 +457,31 @@ def run_denovo(self, excluded_seeds, haplo_args=None): contigs_csv, remap_csv, counts_csv, - conseq_csv, + remap_conseq_csv, unmapped1, unmapped2, scratch_path, debug_file_prefix=debug_file_prefix, excluded_seeds=excluded_seeds) + + def with_prefix(path): + return path and prepend_prefix_to_basename("unstitched_", path) + + with open(self.unstitched_contigs_csv) as contigs_csv, \ + open(with_prefix(self.remap_csv), 'w') as remap_csv, \ + open(with_prefix(self.remap_counts_csv), 'w') as counts_csv, \ + open(with_prefix(self.remap_conseq_csv), 'w') as remap_conseq_csv, \ + open(with_prefix(self.unmapped1_fastq), 'w') as unmapped1, \ + open(with_prefix(self.unmapped2_fastq), 'w') as unmapped2: + + map_to_contigs(self.trimmed1_fastq, + self.trimmed2_fastq, + contigs_csv, + remap_csv, + counts_csv, + remap_conseq_csv, + unmapped1, + unmapped2, + scratch_path, + debug_file_prefix=with_prefix(debug_file_prefix), + excluded_seeds=excluded_seeds) diff --git a/micall/g2p/fastq_g2p.py b/micall/g2p/fastq_g2p.py index 9235b3a48..c89276dbc 100755 --- a/micall/g2p/fastq_g2p.py +++ b/micall/g2p/fastq_g2p.py @@ -36,9 +36,9 @@ def parse_args(): parser = argparse.ArgumentParser(description='Calculate g2p scores from amino acid sequences.') - parser.add_argument('fastq1', type=argparse.FileType('rU'), + parser.add_argument('fastq1', type=argparse.FileType('r'), help=' FASTQ file containing read 1 reads') - parser.add_argument('fastq2', type=argparse.FileType('rU'), + parser.add_argument('fastq2', type=argparse.FileType('r'), help=' FASTQ file containing read 2 reads') parser.add_argument('g2p_csv', type=argparse.FileType('w'), help=' CSV containing g2p predictions.') diff --git a/micall/main.py b/micall/main.py new file mode 100644 index 000000000..d1c86f6a8 --- /dev/null +++ b/micall/main.py @@ -0,0 +1,161 @@ +#! /usr/bin/env python + +""" +This script serves as the primary command-line interface for MiCall, +enabling users to execute various MiCall components directly +from the terminal. +""" + +import sys +import argparse +import os +import runpy +from typing import Sequence +from pathlib import Path +from importlib.metadata import version + +# Run micall/utils/get_list_of_executables.py to get the up-to-date list of these executables. +# The consistency of this list is verified in micall/tests/test_installation.py +EXECUTABLES = [ + "micall/main.py", + "micall/resistance/genreport.py", + "micall/resistance/resistance.py", + "micall/resistance/pdfreport.py", + "micall/core/filter_quality.py", + "micall/core/sam2aln.py", + "micall/core/denovo.py", + "micall/core/trim_fastqs.py", + "micall/core/plot_contigs.py", + "micall/core/cascade_report.py", + "micall/core/remap.py", + "micall/core/prelim_map.py", + "micall/core/aln2counts.py", + "micall/core/contig_stitcher.py", + "micall/core/coverage_plots.py", + "micall/core/plot_simple.py", + "micall/tests/test_installation.py", + "micall/tests/test_hcv_rules_import.py", + "micall/g2p/fastq_g2p.py", + "micall/blast_db/make_blast_db.py", + "micall/utils/concordance_evaluation.py", + "micall/utils/basespace_upload.py", + "micall/utils/release_test_publish.py", + "micall/utils/compare_mapping.py", + "micall/utils/project_seeds_from_compendium.py", + "micall/utils/fasta_to_csv.py", + "micall/utils/hcv_rules_import.py", + "micall/utils/dd.py", + "micall/utils/find_reads_in_sam.py", + "micall/utils/hcv_rules_display.py", + "micall/utils/coverage_data.py", + "micall/utils/find_by_coverage.py", + "micall/utils/primer_locations.py", + "micall/utils/fetch_sequences.py", + "micall/utils/sam_g2p_simplify.py", + "micall/utils/contig_summary.py", + "micall/utils/micall_kive.py", + "micall/utils/compare_454_samples.py", + "micall/utils/genreport_rerun.py", + "micall/utils/remove_dupe_dirs.py", + "micall/utils/find_missing_samples.py", + "micall/utils/release_test_microtest.py", + "micall/utils/denovo_simplify.py", + "micall/utils/sort_sam.py", + "micall/utils/sample_fastq.py", + "micall/utils/sample_sheet_parser.py", + "micall/utils/projects_upload.py", + "micall/utils/projects_dump.py", + "micall/utils/find_chimera.py", + "micall/utils/docker_build.py", + "micall/utils/probe_finder.py", + "micall/utils/aln2counts_simplify.py", + "micall/utils/release_test_setup.py", + "micall/utils/samples_from_454.py", + "micall/utils/amplicon_finder.py", + "micall/utils/driver_utils.py", + "micall/utils/seed_alignments.py", + "micall/utils/release_test_compare.py", + "micall/utils/remap_fastq_simplify.py", + "micall/utils/contig_counts.py", + "micall/utils/micall_kive_resistance.py", + "micall/utils/ref_aligner.py", + "micall/utils/scan_run_folders.py", + "micall/utils/contig_blaster.py", + "micall/utils/micall_docker.py", + "micall/utils/hcv_reference_tree.py", + "micall/utils/sample_project_summary.py", + "micall/utils/get_list_of_executables.py", + "micall/monitor/update_qai.py", + "micall/monitor/micall_watcher.py", + "micall/tcr/igblast.py", +] + + +def executable_name(path: str) -> str: + file_name = Path(path).name + name, extension = os.path.splitext(file_name) + return name + + +def executable_module(path: str) -> str: + noext, extension = os.path.splitext(path) + pythized = noext.replace(os.path.sep, '.') + return pythized + + +EXECUTABLES_MAP = {executable_name(path): path for path in EXECUTABLES} + + +def execute_module_as_main(module_name: str, arguments: Sequence[str]) -> int: + sys.argv = [module_name] + list(arguments) + micall_directory = str(Path(__file__).parent.parent) + if micall_directory not in sys.path: + sys.path.append(micall_directory) + runpy.run_module(module_name, run_name='__main__', alter_sys=True) + return 0 + + +def get_version() -> str: + if __package__ is None: + return "development" + else: + return str(version(__package__)) + + +def get_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Run MiCall script.", add_help=False) + parser.add_argument("--version", action="store_true", help="Print version and exit.") + parser.add_argument('--help', action='store_true', help='Show this help message and exit.') + parser.add_argument("program", nargs='?', choices=EXECUTABLES_MAP.keys(), help="Program name.") + parser.add_argument("arguments", nargs=argparse.REMAINDER, help="Program arguments.") + return parser + + +def main(argv: Sequence[str]) -> int: + parser = get_parser() + args = parser.parse_args(argv) + + if args.version: + print(get_version()) + return 0 + + elif args.help: + parser.print_help() + return 0 + + elif EXECUTABLES_MAP.get(args.program): + path = EXECUTABLES_MAP[args.program] + mod = executable_module(path) + return execute_module_as_main(mod, args.arguments) + + else: + parser.print_help() + return 1 + + +def cli() -> int: + return main(sys.argv[1:]) + + +if __name__ == '__main__': + exit(cli()) diff --git a/micall/monitor/kive_watcher.py b/micall/monitor/kive_watcher.py index 0e0dbd032..003a6526d 100644 --- a/micall/monitor/kive_watcher.py +++ b/micall/monitor/kive_watcher.py @@ -59,6 +59,9 @@ 'resistance_consensus_csv', 'wg_fasta', 'mid_fasta', + 'unstitched_cascade_csv', + 'unstitched_conseq_csv', + 'unstitched_contigs_csv', 'contigs_csv', 'alignment_svg', 'alignment_png', @@ -72,7 +75,8 @@ 'contigs_primers_csv', 'table_precursor_csv', 'proviral_landscape_csv', - 'hivseqinr_results_tar'] + 'hivseqinr_results_tar', # TODO: remove when proviral is finally updated. + 'detailed_results_tar'] # noinspection PyArgumentList FolderEventType = Enum('FolderEventType', 'ADD_SAMPLE FINISH_FOLDER') @@ -779,7 +783,7 @@ def run_pipeline(self, run = self.run_proviral_pipeline( sample_watcher, folder_watcher, - 'Proviral HIVSeqinR') + 'Proviral') return run if pipeline_type == PipelineType.RESISTANCE: run = self.run_resistance_pipeline( @@ -936,13 +940,15 @@ def run_proviral_pipeline(self, sample_watcher, folder_watcher, description): run_dataset['argument_name']: run_dataset['dataset'] for run_dataset in main_run['datasets'] if run_dataset['argument_name'] in ('sample_info_csv', - 'conseq_csv', - 'contigs_csv', - 'cascade_csv')} + 'unstitched_cascade_csv', + 'unstitched_conseq_csv', + 'unstitched_contigs_csv')} input_datasets = { argument_name: self.kive_retry(lambda: self.session.get(url).json()) for argument_name, url in input_dataset_urls.items()} - input_datasets['conseqs_csv'] = input_datasets.pop('conseq_csv') + input_datasets['cascade_csv'] = input_datasets.pop('unstitched_cascade_csv') + input_datasets['conseqs_csv'] = input_datasets.pop('unstitched_conseq_csv') + input_datasets['contigs_csv'] = input_datasets.pop('unstitched_contigs_csv') run = self.find_or_launch_run( pipeline_id, input_datasets, @@ -980,9 +986,14 @@ def find_or_launch_run(self, for run_dataset in run_datasets): run = None if run is None: - run_datasets = [dict(argument=app_arg, - dataset=inputs[name]['url']) - for name, app_arg in app_args.items()] + try: + run_datasets = [dict(argument=app_arg, + dataset=inputs[name]['url']) + for name, app_arg in app_args.items()] + except KeyError as e: + raise ValueError(f"Pipeline input error: {repr(e)}." + f" The specified app with id {pipeline_id} appears to expect a different set of inputs." + f" Does the run name {repr(run_name)} make sense for it?") run_params = dict(name=run_name, batch=run_batch['url'], groups_allowed=ALLOWED_GROUPS, diff --git a/micall_watcher.py b/micall/monitor/micall_watcher.py similarity index 98% rename from micall_watcher.py rename to micall/monitor/micall_watcher.py index 763e6b2e2..5461270b6 100644 --- a/micall_watcher.py +++ b/micall/monitor/micall_watcher.py @@ -9,9 +9,9 @@ from micall.monitor.kive_watcher import find_samples, KiveWatcher, FolderEventType from micall.monitor import update_qai try: - from micall_logging_override import LOGGING + from micall.utils.micall_logging_override import LOGGING except ImportError: - from micall_logging_config import LOGGING + from micall.utils.micall_logging_config import LOGGING POLLING_DELAY = 10 # seconds between scans for new samples or finished runs logger = logging.getLogger(__name__) diff --git a/micall/monitor/update_qai.py b/micall/monitor/update_qai.py index 860035ff4..28cbe6466 100644 --- a/micall/monitor/update_qai.py +++ b/micall/monitor/update_qai.py @@ -8,10 +8,11 @@ from collections import defaultdict from datetime import datetime import logging +from functools import partial from pathlib import Path from micall.monitor.sample_watcher import PipelineType -from operator import itemgetter +from operator import itemgetter, getitem import os from micall.monitor import qai_helper @@ -24,6 +25,7 @@ def parse_args(): import argparse + pipeline_parser = partial(getitem, PipelineType) parser = argparse.ArgumentParser( description="Update the Oracle database with conseq information", formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -34,7 +36,7 @@ def parse_args(): help='version suffix for batch names and folder names') parser.add_argument('--pipeline_group', default=PipelineType.MAIN, - type=PipelineType, + type=pipeline_parser, choices=(PipelineType.MAIN, PipelineType.DENOVO_MAIN, PipelineType.PROVIRAL), @@ -164,10 +166,16 @@ def build_review_decisions(coverage_file, collated_counts_file, cascade_file, # noinspection PyTypeChecker sample_names = dict(map(itemgetter('tags', 'filename'), sample_sheet['DataSplit'])) + def read_int(table, name): + ret = float(table[name]) + if float(int(ret)) != ret: + raise ValueError(f"Bad value for {name!r}: {ret!r}. Expected an integer.") + return int(ret) + counts_map = {} # {tags: raw, (tags, seed): mapped]} # sample,type,count for counts in csv.DictReader(collated_counts_file): - count = int(counts['count']) + count = read_int(counts, 'count') tags = sample_tags[counts['sample']] count_type = counts['type'] if count_type not in ('raw', 'unmapped'): @@ -178,11 +186,11 @@ def build_review_decisions(coverage_file, collated_counts_file, cascade_file, unreported_tags = set() for counts in csv.DictReader(cascade_file): tags = sample_tags[counts['sample']] - counts_map[tags] = int(counts['demultiplexed']) * 2 + counts_map[tags] = read_int(counts, 'demultiplexed') * 2 unreported_tags.add(tags) key = tags, G2P_SEED_NAME - counts_map[key] = int(counts['v3loop']) * 2 + counts_map[key] = read_int(counts, 'v3loop') * 2 sequencing_map = defaultdict(dict) # {tags: {project: sequencing}} for sequencing in sequencings: @@ -201,9 +209,9 @@ def build_review_decisions(coverage_file, collated_counts_file, cascade_file, "tagged layouts missing?" % (tags, coverage_file.name)) sequencing = project_map.get(coverage['project']) if sequencing is not None: - score = int(coverage['on.score']) + score = read_int(coverage, 'on.score') else: - score = int(coverage['off.score']) + score = read_int(coverage, 'off.score') first_project = sorted(project_map.keys())[0] sequencing = project_map[first_project] project_region_id = project_region_map[(coverage['project'], @@ -226,8 +234,8 @@ def build_review_decisions(coverage_file, collated_counts_file, cascade_file, 'seed_region_id': seed_region_id, 'sample_name': coverage['sample'], 'score': score, - 'min_coverage': int(coverage['min.coverage']), - 'min_coverage_pos': int(coverage['which.key.pos']), + 'min_coverage': read_int(coverage, 'min.coverage'), + 'min_coverage_pos': read_int(coverage, 'which.key.pos'), 'raw_reads': raw_count, 'mapped_reads': mapped_count } @@ -365,7 +373,7 @@ def find_pipeline_id(session, pipeline_version): def load_ok_sample_regions(result_folder): ok_sample_regions = set() coverage_file = os.path.join(result_folder, 'coverage_scores.csv') - with open(coverage_file, "rU") as f: + with open(coverage_file, "r") as f: reader = csv.DictReader(f) for row in reader: if row['on.score'] == '4': diff --git a/micall/tests/data/contig_stitcher_fuzz_nogaps.json b/micall/tests/data/contig_stitcher_fuzz_nogaps.json new file mode 100644 index 000000000..754796324 --- /dev/null +++ b/micall/tests/data/contig_stitcher_fuzz_nogaps.json @@ -0,0 +1,264 @@ +[{ "type": "contig", "fields": { "start": 502, "end": 540, "name": "a" } }, { "type": "contig", "fields": { "start": 196, "end": 890, "name": "b" } }, { "type": "contig", "fields": { "start": 202, "end": 664, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 196, "end": 890, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 502, "end": 540, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 227, "end": 338, "name": "a" } }, { "type": "contig", "fields": { "start": 109, "end": 110, "name": "b" } }, { "type": "contig", "fields": { "start": 912, "end": 972, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 109, "end": 110, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 227, "end": 338, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 550, "end": 637, "name": "a" } }, { "type": "contig", "fields": { "start": 541, "end": 548, "name": "b" } }, { "type": "contig", "fields": { "start": 415, "end": 897, "name": "c" } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 541, "end": 548, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 550, "end": 637, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 75, "end": 570, "name": "a" } }, { "type": "contig", "fields": { "start": 65, "end": 74, "name": "b" } }, { "type": "contig", "fields": { "start": 394, "end": 692, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 65, "end": 74, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 75, "end": 570, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 892, "end": 988, "name": "a" } }, { "type": "contig", "fields": { "start": 420, "end": 554, "name": "b" } }, { "type": "contig", "fields": { "start": 77, "end": 900, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 77, "end": 900, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 892, "end": 988, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 565, "end": 767, "name": "a" } }, { "type": "contig", "fields": { "start": 765, "end": 825, "name": "b" } }, { "type": "contig", "fields": { "start": 136, "end": 295, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 136, "end": 295, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 565, "end": 767, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 960, "end": 997, "name": "a" } }, { "type": "contig", "fields": { "start": 886, "end": 903, "name": "b" } }, { "type": "contig", "fields": { "start": 1, "end": 347, "name": "c" } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 1, "end": 347, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 960, "end": 997, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 857, "end": 882, "name": "a" } }, { "type": "contig", "fields": { "start": 687, "end": 795, "name": "b" } }, { "type": "contig", "fields": { "start": 298, "end": 856, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 298, "end": 856, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 857, "end": 882, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 374, "end": 644, "name": "a" } }, { "type": "contig", "fields": { "start": 557, "end": 761, "name": "b" } }, { "type": "contig", "fields": { "start": 106, "end": 861, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 106, "end": 861, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 557, "end": 761, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 513, "end": 923, "name": "a" } }, { "type": "contig", "fields": { "start": 314, "end": 937, "name": "b" } }, { "type": "contig", "fields": { "start": 227, "end": 260, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 227, "end": 260, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 314, "end": 937, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 647, "end": 657, "name": "a" } }, { "type": "contig", "fields": { "start": 694, "end": 937, "name": "b" } }, { "type": "contig", "fields": { "start": 522, "end": 530, "name": "c" } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 522, "end": 530, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 694, "end": 937, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 255, "end": 770, "name": "a" } }, { "type": "contig", "fields": { "start": 245, "end": 360, "name": "b" } }, { "type": "contig", "fields": { "start": 136, "end": 244, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 136, "end": 244, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 245, "end": 360, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 840, "end": 936, "name": "a" } }, { "type": "contig", "fields": { "start": 154, "end": 969, "name": "b" } }, { "type": "contig", "fields": { "start": 812, "end": 839, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 812, "end": 839, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 840, "end": 936, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 154, "end": 969, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 840, "end": 936, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 615, "end": 772, "name": "a" } }, { "type": "contig", "fields": { "start": 289, "end": 740, "name": "b" } }, { "type": "contig", "fields": { "start": 94, "end": 288, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 94, "end": 288, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 289, "end": 740, "name": "b" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 289, "end": 740, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 615, "end": 772, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 147, "end": 735, "name": "a" } }, { "type": "contig", "fields": { "start": 97, "end": 100, "name": "b" } }, { "type": "contig", "fields": { "start": 25, "end": 225, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 25, "end": 225, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 147, "end": 735, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 97, "end": 100, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 147, "end": 735, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 624, "end": 903, "name": "a" } }, { "type": "contig", "fields": { "start": 203, "end": 571, "name": "b" } }, { "type": "contig", "fields": { "start": 420, "end": 623, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 420, "end": 623, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 624, "end": 903, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 203, "end": 571, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 624, "end": 903, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 725, "end": 901, "name": "a" } }, { "type": "contig", "fields": { "start": 566, "end": 703, "name": "b" } }, { "type": "contig", "fields": { "start": 215, "end": 950, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 215, "end": 950, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 566, "end": 703, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 566, "end": 703, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 725, "end": 901, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 610, "end": 809, "name": "a" } }, { "type": "contig", "fields": { "start": 564, "end": 596, "name": "b" } }, { "type": "contig", "fields": { "start": 363, "end": 563, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 363, "end": 563, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 564, "end": 596, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 564, "end": 596, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 610, "end": 809, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 158, "end": 710, "name": "a" } }, { "type": "contig", "fields": { "start": 115, "end": 153, "name": "b" } }, { "type": "contig", "fields": { "start": 993, "end": 998, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 115, "end": 153, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 158, "end": 710, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 115, "end": 153, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 158, "end": 710, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 873, "end": 945, "name": "a" } }, { "type": "contig", "fields": { "start": 493, "end": 872, "name": "b" } }, { "type": "contig", "fields": { "start": 323, "end": 383, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 493, "end": 872, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 873, "end": 945, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 493, "end": 872, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 873, "end": 945, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 92, "end": 880, "name": "a" } }, { "type": "contig", "fields": { "start": 14, "end": 18, "name": "b" } }, { "type": "contig", "fields": { "start": 54, "end": 752, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 54, "end": 752, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 92, "end": 880, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 14, "end": 18, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 92, "end": 880, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 894, "end": 993, "name": "a" } }, { "type": "contig", "fields": { "start": 418, "end": 430, "name": "b" } }, { "type": "contig", "fields": { "start": 759, "end": 860, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 759, "end": 860, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 894, "end": 993, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 418, "end": 430, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 894, "end": 993, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 264, "end": 597, "name": "a" } }, { "type": "contig", "fields": { "start": 20, "end": 207, "name": "b" } }, { "type": "contig", "fields": { "start": 21, "end": 263, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 21, "end": 263, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 264, "end": 597, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 207, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 264, "end": 597, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 812, "end": 898, "name": "a" } }, { "type": "contig", "fields": { "start": 427, "end": 568, "name": "b" } }, { "type": "contig", "fields": { "start": 281, "end": 761, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 281, "end": 761, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 427, "end": 568, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 427, "end": 568, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 812, "end": 898, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 474, "end": 511, "name": "a" } }, { "type": "contig", "fields": { "start": 275, "end": 436, "name": "b" } }, { "type": "contig", "fields": { "start": 273, "end": 273, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 273, "end": 273, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 275, "end": 436, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 275, "end": 436, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 474, "end": 511, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 510, "end": 962, "name": "a" } }, { "type": "contig", "fields": { "start": 260, "end": 396, "name": "b" } }, { "type": "contig", "fields": { "start": 168, "end": 259, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 168, "end": 259, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 260, "end": 396, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 260, "end": 396, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 510, "end": 962, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 912, "end": 962, "name": "a" } }, { "type": "contig", "fields": { "start": 162, "end": 989, "name": "b" } }, { "type": "contig", "fields": { "start": 281, "end": 974, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 162, "end": 989, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 912, "end": 962, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 281, "end": 974, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 912, "end": 962, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 302, "end": 374, "name": "a" } }, { "type": "contig", "fields": { "start": 61, "end": 301, "name": "b" } }, { "type": "contig", "fields": { "start": 185, "end": 563, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 61, "end": 301, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 302, "end": 374, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 185, "end": 563, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 302, "end": 374, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 438, "end": 590, "name": "a" } }, { "type": "contig", "fields": { "start": 840, "end": 899, "name": "b" } }, { "type": "contig", "fields": { "start": 435, "end": 839, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 435, "end": 839, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 840, "end": 899, "name": "b" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 435, "end": 839, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 438, "end": 590, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 398, "end": 723, "name": "a" } }, { "type": "contig", "fields": { "start": 161, "end": 947, "name": "b" } }, { "type": "contig", "fields": { "start": 179, "end": 191, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 161, "end": 947, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 398, "end": 723, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 179, "end": 191, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 398, "end": 723, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 114, "end": 842, "name": "a" } }, { "type": "contig", "fields": { "start": 50, "end": 112, "name": "b" } }, { "type": "contig", "fields": { "start": 79, "end": 82, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 50, "end": 112, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 114, "end": 842, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 79, "end": 82, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 114, "end": 842, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 961, "end": 963, "name": "a" } }, { "type": "contig", "fields": { "start": 827, "end": 960, "name": "b" } }, { "type": "contig", "fields": { "start": 364, "end": 767, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 827, "end": 960, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 961, "end": 963, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 364, "end": 767, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 961, "end": 963, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 627, "end": 654, "name": "a" } }, { "type": "contig", "fields": { "start": 412, "end": 733, "name": "b" } }, { "type": "contig", "fields": { "start": 53, "end": 428, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 53, "end": 428, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 412, "end": 733, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 53, "end": 428, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 627, "end": 654, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 92, "end": 428, "name": "a" } }, { "type": "contig", "fields": { "start": 74, "end": 74, "name": "b" } }, { "type": "contig", "fields": { "start": 21, "end": 73, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 21, "end": 73, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 74, "end": 74, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 21, "end": 73, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 92, "end": 428, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 429, "end": 706, "name": "a" } }, { "type": "contig", "fields": { "start": 369, "end": 594, "name": "b" } }, { "type": "contig", "fields": { "start": 170, "end": 380, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 369, "end": 594, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 429, "end": 706, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 170, "end": 380, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 429, "end": 706, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 947, "end": 975, "name": "a" } }, { "type": "contig", "fields": { "start": 689, "end": 732, "name": "b" } }, { "type": "contig", "fields": { "start": 219, "end": 535, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 689, "end": 732, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 947, "end": 975, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 219, "end": 535, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 947, "end": 975, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 9, "end": 498, "name": "a" } }, { "type": "contig", "fields": { "start": 7, "end": 9, "name": "b" } }, { "type": "contig", "fields": { "start": 9, "end": 9, "name": "c" } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 7, "end": 9, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 9, "end": 498, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 9, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 9, "end": 498, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 751, "end": 836, "name": "a" } }, { "type": "contig", "fields": { "start": 451, "end": 750, "name": "b" } }, { "type": "contig", "fields": { "start": 436, "end": 663, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 451, "end": 750, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 751, "end": 836, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 436, "end": 663, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 751, "end": 836, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 696, "end": 917, "name": "a" } }, { "type": "contig", "fields": { "start": 187, "end": 619, "name": "b" } }, { "type": "contig", "fields": { "start": 209, "end": 350, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 209, "end": 350, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 696, "end": 917, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 209, "end": 350, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 696, "end": 917, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 566, "end": 784, "name": "a" } }, { "type": "contig", "fields": { "start": 915, "end": 945, "name": "b" } }, { "type": "contig", "fields": { "start": 258, "end": 565, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 258, "end": 565, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 566, "end": 784, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 258, "end": 565, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 566, "end": 784, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 68, "end": 251, "name": "a" } }, { "type": "contig", "fields": { "start": 7, "end": 860, "name": "b" } }, { "type": "contig", "fields": { "start": 6, "end": 42, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 42, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 7, "end": 860, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 42, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 68, "end": 251, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 965, "end": 967, "name": "a" } }, { "type": "contig", "fields": { "start": 988, "end": 988, "name": "b" } }, { "type": "contig", "fields": { "start": 312, "end": 763, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 312, "end": 763, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 988, "end": 988, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 312, "end": 763, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 965, "end": 967, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 512, "end": 738, "name": "a" } }, { "type": "contig", "fields": { "start": 120, "end": 387, "name": "b" } }, { "type": "contig", "fields": { "start": 62, "end": 119, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 62, "end": 119, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 120, "end": 387, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 62, "end": 119, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 512, "end": 738, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 214, "end": 530, "name": "a" } }, { "type": "contig", "fields": { "start": 107, "end": 213, "name": "b" } }, { "type": "contig", "fields": { "start": 9, "end": 213, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 107, "end": 213, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 214, "end": 530, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 213, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 214, "end": 530, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 38, "end": 360, "name": "a" } }, { "type": "contig", "fields": { "start": 32, "end": 652, "name": "b" } }, { "type": "contig", "fields": { "start": 0, "end": 84, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 32, "end": 652, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 38, "end": 360, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 84, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 32, "end": 652, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 485, "end": 835, "name": "a" } }, { "type": "contig", "fields": { "start": 409, "end": 484, "name": "b" } }, { "type": "contig", "fields": { "start": 352, "end": 541, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 409, "end": 484, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 485, "end": 835, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 352, "end": 541, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 409, "end": 484, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 668, "end": 974, "name": "a" } }, { "type": "contig", "fields": { "start": 945, "end": 963, "name": "b" } }, { "type": "contig", "fields": { "start": 116, "end": 964, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 116, "end": 964, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 668, "end": 974, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 116, "end": 964, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 945, "end": 963, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 63, "end": 974, "name": "a" } }, { "type": "contig", "fields": { "start": 5, "end": 294, "name": "b" } }, { "type": "contig", "fields": { "start": 2, "end": 62, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 62, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 63, "end": 974, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 62, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 5, "end": 294, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 250, "end": 287, "name": "a" } }, { "type": "contig", "fields": { "start": 14, "end": 779, "name": "b" } }, { "type": "contig", "fields": { "start": 11, "end": 11, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 14, "end": 779, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 250, "end": 287, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 11, "end": 11, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 14, "end": 779, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 600, "end": 802, "name": "a" } }, { "type": "contig", "fields": { "start": 535, "end": 584, "name": "b" } }, { "type": "contig", "fields": { "start": 222, "end": 443, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 535, "end": 584, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 600, "end": 802, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 222, "end": 443, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 535, "end": 584, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 114, "end": 826, "name": "a" } }, { "type": "contig", "fields": { "start": 46, "end": 113, "name": "b" } }, { "type": "contig", "fields": { "start": 41, "end": 43, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 46, "end": 113, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 114, "end": 826, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 41, "end": 43, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 46, "end": 113, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 750, "end": 755, "name": "a" } }, { "type": "contig", "fields": { "start": 773, "end": 773, "name": "b" } }, { "type": "contig", "fields": { "start": 69, "end": 770, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 69, "end": 770, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 750, "end": 755, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 69, "end": 770, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 773, "end": 773, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 765, "end": 899, "name": "a" } }, { "type": "contig", "fields": { "start": 970, "end": 989, "name": "b" } }, { "type": "contig", "fields": { "start": 337, "end": 622, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 337, "end": 622, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 765, "end": 899, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 337, "end": 622, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 970, "end": 989, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 521, "end": 673, "name": "a" } }, { "type": "contig", "fields": { "start": 826, "end": 974, "name": "b" } }, { "type": "contig", "fields": { "start": 353, "end": 520, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 353, "end": 520, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 521, "end": 673, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 353, "end": 520, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 826, "end": 974, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 663, "end": 756, "name": "a" } }, { "type": "contig", "fields": { "start": 24, "end": 727, "name": "b" } }, { "type": "contig", "fields": { "start": 20, "end": 24, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 727, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 663, "end": 756, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 24, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 24, "end": 727, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 44, "end": 758, "name": "a" } }, { "type": "contig", "fields": { "start": 5, "end": 39, "name": "b" } }, { "type": "contig", "fields": { "start": 1, "end": 2, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 5, "end": 39, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 44, "end": 758, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 1, "end": 2, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 5, "end": 39, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 223, "end": 843, "name": "a" } }, { "type": "contig", "fields": { "start": 146, "end": 221, "name": "b" } }, { "type": "contig", "fields": { "start": 122, "end": 141, "name": "c" } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 146, "end": 221, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 223, "end": 843, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 122, "end": 141, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 146, "end": 221, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 116, "end": 524, "name": "a" } }, { "type": "contig", "fields": { "start": 76, "end": 115, "name": "b" } }, { "type": "contig", "fields": { "start": 27, "end": 31, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 76, "end": 115, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 116, "end": 524, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 27, "end": 31, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 76, "end": 115, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 499, "end": 728, "name": "a" } }, { "type": "contig", "fields": { "start": 757, "end": 927, "name": "b" } }, { "type": "contig", "fields": { "start": 196, "end": 663, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 196, "end": 663, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 499, "end": 728, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 196, "end": 663, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 757, "end": 927, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 892, "end": 963, "name": "a" } }, { "type": "contig", "fields": { "start": 162, "end": 293, "name": "b" } }, { "type": "contig", "fields": { "start": 67, "end": 76, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 67, "end": 76, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 892, "end": 963, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 67, "end": 76, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 162, "end": 293, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 33, "end": 113, "name": "a" } }, { "type": "contig", "fields": { "start": 508, "end": 950, "name": "b" } }, { "type": "contig", "fields": { "start": 3, "end": 32, "name": "c" } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 32, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 33, "end": 113, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 32, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 508, "end": 950, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 301, "end": 649, "name": "a" } }, { "type": "contig", "fields": { "start": 446, "end": 545, "name": "b" } }, { "type": "contig", "fields": { "start": 108, "end": 300, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 108, "end": 300, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 301, "end": 649, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 108, "end": 300, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 446, "end": 545, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 642, "end": 724, "name": "a" } }, { "type": "contig", "fields": { "start": 367, "end": 631, "name": "b" } }, { "type": "contig", "fields": { "start": 98, "end": 131, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 98, "end": 131, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 367, "end": 631, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 98, "end": 131, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 367, "end": 631, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 389, "end": 566, "name": "a" } }, { "type": "contig", "fields": { "start": 148, "end": 730, "name": "b" } }, { "type": "contig", "fields": { "start": 9, "end": 147, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 147, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 148, "end": 730, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 147, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 148, "end": 730, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 931, "end": 944, "name": "a" } }, { "type": "contig", "fields": { "start": 141, "end": 930, "name": "b" } }, { "type": "contig", "fields": { "start": 140, "end": 140, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 141, "end": 930, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 931, "end": 944, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 140, "end": 140, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 141, "end": 930, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 476, "end": 573, "name": "a" } }, { "type": "contig", "fields": { "start": 476, "end": 652, "name": "b" } }, { "type": "contig", "fields": { "start": 464, "end": 475, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 464, "end": 475, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 476, "end": 573, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 464, "end": 475, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 476, "end": 652, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 183, "end": 500, "name": "a" } }, { "type": "contig", "fields": { "start": 23, "end": 25, "name": "b" } }, { "type": "contig", "fields": { "start": 17, "end": 404, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 17, "end": 404, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 183, "end": 500, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 17, "end": 404, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 23, "end": 25, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 23, "end": 25, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 183, "end": 500, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 444, "end": 949, "name": "a" } }, { "type": "contig", "fields": { "start": 170, "end": 219, "name": "b" } }, { "type": "contig", "fields": { "start": 167, "end": 443, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 167, "end": 443, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 444, "end": 949, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 167, "end": 443, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 170, "end": 219, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 170, "end": 219, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 444, "end": 949, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 276, "end": 591, "name": "a" } }, { "type": "contig", "fields": { "start": 85, "end": 185, "name": "b" } }, { "type": "contig", "fields": { "start": 2, "end": 385, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 385, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 276, "end": 591, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 85, "end": 185, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 276, "end": 591, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 85, "end": 185, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 276, "end": 591, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 145, "end": 146, "name": "a" } }, { "type": "contig", "fields": { "start": 75, "end": 107, "name": "b" } }, { "type": "contig", "fields": { "start": 20, "end": 144, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 144, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 145, "end": 146, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 75, "end": 107, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 145, "end": 146, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 75, "end": 107, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 145, "end": 146, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 49, "end": 385, "name": "a" } }, { "type": "contig", "fields": { "start": 41, "end": 43, "name": "b" } }, { "type": "contig", "fields": { "start": 8, "end": 216, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 8, "end": 216, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 41, "end": 43, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 41, "end": 43, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 49, "end": 385, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 41, "end": 43, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 49, "end": 385, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 8, "end": 660, "name": "a" } }, { "type": "contig", "fields": { "start": 4, "end": 4, "name": "b" } }, { "type": "contig", "fields": { "start": 1, "end": 3, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 1, "end": 3, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 4, "end": 4, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 4, "end": 4, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 8, "end": 660, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 4, "end": 4, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 8, "end": 660, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 642, "end": 949, "name": "a" } }, { "type": "contig", "fields": { "start": 272, "end": 641, "name": "b" } }, { "type": "contig", "fields": { "start": 302, "end": 845, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 272, "end": 641, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 642, "end": 949, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 302, "end": 845, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 642, "end": 949, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 272, "end": 641, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 642, "end": 949, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 366, "end": 398, "name": "a" } }, { "type": "contig", "fields": { "start": 323, "end": 342, "name": "b" } }, { "type": "contig", "fields": { "start": 272, "end": 351, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 323, "end": 342, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 366, "end": 398, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 272, "end": 351, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 366, "end": 398, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 323, "end": 342, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 366, "end": 398, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 806, "end": 894, "name": "a" } }, { "type": "contig", "fields": { "start": 391, "end": 805, "name": "b" } }, { "type": "contig", "fields": { "start": 508, "end": 763, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 391, "end": 805, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 806, "end": 894, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 508, "end": 763, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 806, "end": 894, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 391, "end": 805, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 806, "end": 894, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 141, "end": 479, "name": "a" } }, { "type": "contig", "fields": { "start": 36, "end": 111, "name": "b" } }, { "type": "contig", "fields": { "start": 18, "end": 115, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 18, "end": 115, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 36, "end": 111, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 18, "end": 115, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 141, "end": 479, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 36, "end": 111, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 141, "end": 479, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 571, "end": 901, "name": "a" } }, { "type": "contig", "fields": { "start": 402, "end": 448, "name": "b" } }, { "type": "contig", "fields": { "start": 47, "end": 401, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 47, "end": 401, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 402, "end": 448, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 47, "end": 401, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 571, "end": 901, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 402, "end": 448, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 571, "end": 901, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 441, "end": 666, "name": "a" } }, { "type": "contig", "fields": { "start": 395, "end": 440, "name": "b" } }, { "type": "contig", "fields": { "start": 287, "end": 440, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 395, "end": 440, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 441, "end": 666, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 287, "end": 440, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 441, "end": 666, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 395, "end": 440, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 441, "end": 666, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 142, "end": 212, "name": "a" } }, { "type": "contig", "fields": { "start": 51, "end": 141, "name": "b" } }, { "type": "contig", "fields": { "start": 20, "end": 756, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 51, "end": 141, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 142, "end": 212, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 756, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 51, "end": 141, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 51, "end": 141, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 142, "end": 212, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 738, "end": 896, "name": "a" } }, { "type": "contig", "fields": { "start": 642, "end": 681, "name": "b" } }, { "type": "contig", "fields": { "start": 572, "end": 903, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 572, "end": 903, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 738, "end": 896, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 572, "end": 903, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 642, "end": 681, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 642, "end": 681, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 738, "end": 896, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 385, "end": 920, "name": "a" } }, { "type": "contig", "fields": { "start": 193, "end": 247, "name": "b" } }, { "type": "contig", "fields": { "start": 55, "end": 384, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 55, "end": 384, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 385, "end": 920, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 55, "end": 384, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 193, "end": 247, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 193, "end": 247, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 385, "end": 920, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 602, "end": 905, "name": "a" } }, { "type": "contig", "fields": { "start": 310, "end": 315, "name": "b" } }, { "type": "contig", "fields": { "start": 141, "end": 277, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 310, "end": 315, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 602, "end": 905, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 141, "end": 277, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 310, "end": 315, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 310, "end": 315, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 602, "end": 905, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 400, "end": 825, "name": "a" } }, { "type": "contig", "fields": { "start": 337, "end": 399, "name": "b" } }, { "type": "contig", "fields": { "start": 319, "end": 327, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 337, "end": 399, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 400, "end": 825, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 319, "end": 327, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 337, "end": 399, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 337, "end": 399, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 400, "end": 825, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 217, "end": 462, "name": "a" } }, { "type": "contig", "fields": { "start": 201, "end": 205, "name": "b" } }, { "type": "contig", "fields": { "start": 112, "end": 147, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 112, "end": 147, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 217, "end": 462, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 112, "end": 147, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 201, "end": 205, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 201, "end": 205, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 217, "end": 462, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 513, "end": 701, "name": "a" } }, { "type": "contig", "fields": { "start": 355, "end": 512, "name": "b" } }, { "type": "contig", "fields": { "start": 64, "end": 354, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 355, "end": 512, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 513, "end": 701, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 64, "end": 354, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 355, "end": 512, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 355, "end": 512, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 513, "end": 701, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 950, "end": 955, "name": "a" } }, { "type": "contig", "fields": { "start": 950, "end": 950, "name": "b" } }, { "type": "contig", "fields": { "start": 615, "end": 949, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 615, "end": 949, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 955, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 615, "end": 949, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 950, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 950, "end": 950, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 955, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 167, "end": 317, "name": "a" } }, { "type": "contig", "fields": { "start": 12, "end": 779, "name": "b" } }, { "type": "contig", "fields": { "start": 7, "end": 11, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 7, "end": 11, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 12, "end": 779, "name": "b" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 779, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 167, "end": 317, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 7, "end": 11, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 167, "end": 317, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 479, "end": 987, "name": "a" } }, { "type": "contig", "fields": { "start": 185, "end": 299, "name": "b" } }, { "type": "contig", "fields": { "start": 136, "end": 244, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 136, "end": 244, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 185, "end": 299, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 185, "end": 299, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 479, "end": 987, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 136, "end": 244, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 479, "end": 987, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 130, "end": 305, "name": "a" } }, { "type": "contig", "fields": { "start": 125, "end": 126, "name": "b" } }, { "type": "contig", "fields": { "start": 106, "end": 124, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 106, "end": 124, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 125, "end": 126, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 125, "end": 126, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 130, "end": 305, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 106, "end": 124, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 130, "end": 305, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 505, "end": 869, "name": "a" } }, { "type": "contig", "fields": { "start": 102, "end": 575, "name": "b" } }, { "type": "contig", "fields": { "start": 94, "end": 384, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 102, "end": 575, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 505, "end": 869, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 94, "end": 384, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 102, "end": 575, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 94, "end": 384, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 505, "end": 869, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 785, "end": 840, "name": "a" } }, { "type": "contig", "fields": { "start": 719, "end": 784, "name": "b" } }, { "type": "contig", "fields": { "start": 659, "end": 777, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 719, "end": 784, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 785, "end": 840, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 659, "end": 777, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 719, "end": 784, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 659, "end": 777, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 785, "end": 840, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 410, "end": 583, "name": "a" } }, { "type": "contig", "fields": { "start": 287, "end": 409, "name": "b" } }, { "type": "contig", "fields": { "start": 158, "end": 286, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 287, "end": 409, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 410, "end": 583, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 158, "end": 286, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 287, "end": 409, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 158, "end": 286, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 410, "end": 583, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 802, "end": 896, "name": "a" } }, { "type": "contig", "fields": { "start": 29, "end": 981, "name": "b" } }, { "type": "contig", "fields": { "start": 318, "end": 801, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 318, "end": 801, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 802, "end": 896, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 29, "end": 981, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 802, "end": 896, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 318, "end": 801, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 802, "end": 896, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 584, "end": 641, "name": "a" } }, { "type": "contig", "fields": { "start": 258, "end": 771, "name": "b" } }, { "type": "contig", "fields": { "start": 110, "end": 257, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 110, "end": 257, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 258, "end": 771, "name": "b" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 258, "end": 771, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 584, "end": 641, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 110, "end": 257, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 584, "end": 641, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 763, "end": 775, "name": "a" } }, { "type": "contig", "fields": { "start": 407, "end": 752, "name": "b" } }, { "type": "contig", "fields": { "start": 688, "end": 762, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 688, "end": 762, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 763, "end": 775, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 407, "end": 752, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 763, "end": 775, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 688, "end": 762, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 763, "end": 775, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 44, "end": 168, "name": "a" } }, { "type": "contig", "fields": { "start": 20, "end": 33, "name": "b" } }, { "type": "contig", "fields": { "start": 3, "end": 24, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 24, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 20, "end": 33, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 33, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 44, "end": 168, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 24, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 44, "end": 168, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 230, "end": 327, "name": "a" } }, { "type": "contig", "fields": { "start": 208, "end": 215, "name": "b" } }, { "type": "contig", "fields": { "start": 114, "end": 207, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 114, "end": 207, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 208, "end": 215, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 208, "end": 215, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 230, "end": 327, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 114, "end": 207, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 230, "end": 327, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 775, "end": 869, "name": "a" } }, { "type": "contig", "fields": { "start": 106, "end": 454, "name": "b" } }, { "type": "contig", "fields": { "start": 289, "end": 556, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 106, "end": 454, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 775, "end": 869, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 106, "end": 454, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 775, "end": 869, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 289, "end": 556, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 775, "end": 869, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 736, "end": 935, "name": "a" } }, { "type": "contig", "fields": { "start": 234, "end": 735, "name": "b" } }, { "type": "contig", "fields": { "start": 261, "end": 668, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 234, "end": 735, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 736, "end": 935, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 234, "end": 735, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 736, "end": 935, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 261, "end": 668, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 736, "end": 935, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 227, "end": 344, "name": "a" } }, { "type": "contig", "fields": { "start": 159, "end": 159, "name": "b" } }, { "type": "contig", "fields": { "start": 80, "end": 102, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 80, "end": 102, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 227, "end": 344, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 159, "end": 159, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 227, "end": 344, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 80, "end": 102, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 227, "end": 344, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 523, "end": 734, "name": "a" } }, { "type": "contig", "fields": { "start": 271, "end": 466, "name": "b" } }, { "type": "contig", "fields": { "start": 185, "end": 522, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 185, "end": 522, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 523, "end": 734, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 271, "end": 466, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 523, "end": 734, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 185, "end": 522, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 523, "end": 734, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 158, "end": 772, "name": "a" } }, { "type": "contig", "fields": { "start": 107, "end": 149, "name": "b" } }, { "type": "contig", "fields": { "start": 30, "end": 121, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 30, "end": 121, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 107, "end": 149, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 107, "end": 149, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 158, "end": 772, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 30, "end": 121, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 158, "end": 772, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 54, "end": 175, "name": "a" } }, { "type": "contig", "fields": { "start": 17, "end": 19, "name": "b" } }, { "type": "contig", "fields": { "start": 12, "end": 14, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 14, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 17, "end": 19, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 17, "end": 19, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 54, "end": 175, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 14, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 54, "end": 175, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 72, "end": 953, "name": "a" } }, { "type": "contig", "fields": { "start": 39, "end": 72, "name": "b" } }, { "type": "contig", "fields": { "start": 1, "end": 38, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 1, "end": 38, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 39, "end": 72, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 39, "end": 72, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 72, "end": 953, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 1, "end": 38, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 72, "end": 953, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 110, "end": 505, "name": "a" } }, { "type": "contig", "fields": { "start": 27, "end": 396, "name": "b" } }, { "type": "contig", "fields": { "start": 13, "end": 103, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 27, "end": 396, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 110, "end": 505, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 13, "end": 103, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 110, "end": 505, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 13, "end": 103, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 110, "end": 505, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 930, "end": 943, "name": "a" } }, { "type": "contig", "fields": { "start": 280, "end": 818, "name": "b" } }, { "type": "contig", "fields": { "start": 351, "end": 645, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 280, "end": 818, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 930, "end": 943, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 351, "end": 645, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 930, "end": 943, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 351, "end": 645, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 930, "end": 943, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 745, "end": 786, "name": "a" } }, { "type": "contig", "fields": { "start": 530, "end": 744, "name": "b" } }, { "type": "contig", "fields": { "start": 568, "end": 703, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 530, "end": 744, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 745, "end": 786, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 568, "end": 703, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 745, "end": 786, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 568, "end": 703, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 745, "end": 786, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 283, "end": 654, "name": "a" } }, { "type": "contig", "fields": { "start": 236, "end": 814, "name": "b" } }, { "type": "contig", "fields": { "start": 126, "end": 274, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 126, "end": 274, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 236, "end": 814, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 126, "end": 274, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 283, "end": 654, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 126, "end": 274, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 283, "end": 654, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 28, "end": 86, "name": "a" } }, { "type": "contig", "fields": { "start": 21, "end": 587, "name": "b" } }, { "type": "contig", "fields": { "start": 0, "end": 20, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 20, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 21, "end": 587, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 20, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 28, "end": 86, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 20, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 28, "end": 86, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 750, "end": 946, "name": "a" } }, { "type": "contig", "fields": { "start": 297, "end": 749, "name": "b" } }, { "type": "contig", "fields": { "start": 184, "end": 749, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 297, "end": 749, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 750, "end": 946, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 184, "end": 749, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 750, "end": 946, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 184, "end": 749, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 750, "end": 946, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 332, "end": 620, "name": "a" } }, { "type": "contig", "fields": { "start": 80, "end": 443, "name": "b" } }, { "type": "contig", "fields": { "start": 76, "end": 158, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 80, "end": 443, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 332, "end": 620, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 76, "end": 158, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 80, "end": 443, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 76, "end": 158, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 332, "end": 620, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 769, "end": 793, "name": "a" } }, { "type": "contig", "fields": { "start": 717, "end": 768, "name": "b" } }, { "type": "contig", "fields": { "start": 392, "end": 766, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 717, "end": 768, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 769, "end": 793, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 392, "end": 766, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 717, "end": 768, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 392, "end": 766, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 769, "end": 793, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 365, "end": 618, "name": "a" } }, { "type": "contig", "fields": { "start": 59, "end": 871, "name": "b" } }, { "type": "contig", "fields": { "start": 47, "end": 364, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 47, "end": 364, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 365, "end": 618, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 47, "end": 364, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 59, "end": 871, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 47, "end": 364, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 365, "end": 618, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 75, "end": 769, "name": "a" } }, { "type": "contig", "fields": { "start": 62, "end": 931, "name": "b" } }, { "type": "contig", "fields": { "start": 14, "end": 59, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 62, "end": 931, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 75, "end": 769, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 14, "end": 59, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 62, "end": 931, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 14, "end": 59, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 75, "end": 769, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 816, "end": 844, "name": "a" } }, { "type": "contig", "fields": { "start": 698, "end": 748, "name": "b" } }, { "type": "contig", "fields": { "start": 264, "end": 644, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 698, "end": 748, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 816, "end": 844, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 264, "end": 644, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 698, "end": 748, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 264, "end": 644, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 816, "end": 844, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 855, "end": 922, "name": "a" } }, { "type": "contig", "fields": { "start": 156, "end": 854, "name": "b" } }, { "type": "contig", "fields": { "start": 84, "end": 86, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 156, "end": 854, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 855, "end": 922, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 84, "end": 86, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 156, "end": 854, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 84, "end": 86, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 855, "end": 922, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 22, "end": 771, "name": "a" } }, { "type": "contig", "fields": { "start": 827, "end": 843, "name": "b" } }, { "type": "contig", "fields": { "start": 17, "end": 18, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 17, "end": 18, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 22, "end": 771, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 17, "end": 18, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 827, "end": 843, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 17, "end": 18, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 22, "end": 771, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 869, "end": 983, "name": "a" } }, { "type": "contig", "fields": { "start": 998, "end": 999, "name": "b" } }, { "type": "contig", "fields": { "start": 793, "end": 868, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 793, "end": 868, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 869, "end": 983, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 793, "end": 868, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 998, "end": 999, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 793, "end": 868, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 869, "end": 983, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 935, "end": 990, "name": "a" } }, { "type": "contig", "fields": { "start": 572, "end": 934, "name": "b" } }, { "type": "contig", "fields": { "start": 381, "end": 571, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 572, "end": 934, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 935, "end": 990, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 381, "end": 571, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 572, "end": 934, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 381, "end": 571, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 935, "end": 990, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 685, "end": 956, "name": "a" } }, { "type": "contig", "fields": { "start": 685, "end": 922, "name": "b" } }, { "type": "contig", "fields": { "start": 524, "end": 684, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 524, "end": 684, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 685, "end": 956, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 524, "end": 684, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 685, "end": 922, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 524, "end": 684, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 685, "end": 956, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 418, "end": 658, "name": "a" } }, { "type": "contig", "fields": { "start": 112, "end": 622, "name": "b" } }, { "type": "contig", "fields": { "start": 33, "end": 417, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 33, "end": 417, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 418, "end": 658, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 112, "end": 622, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 418, "end": 658, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 33, "end": 417, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 112, "end": 622, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 844, "end": 869, "name": "a" } }, { "type": "contig", "fields": { "start": 543, "end": 933, "name": "b" } }, { "type": "contig", "fields": { "start": 289, "end": 942, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 543, "end": 933, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 844, "end": 869, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 289, "end": 942, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 844, "end": 869, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 289, "end": 942, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 543, "end": 933, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 735, "end": 936, "name": "a" } }, { "type": "contig", "fields": { "start": 404, "end": 734, "name": "b" } }, { "type": "contig", "fields": { "start": 36, "end": 915, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 404, "end": 734, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 735, "end": 936, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 36, "end": 915, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 735, "end": 936, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 36, "end": 915, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 404, "end": 734, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 283, "end": 721, "name": "a" } }, { "type": "contig", "fields": { "start": 266, "end": 282, "name": "b" } }, { "type": "contig", "fields": { "start": 129, "end": 282, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 266, "end": 282, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 283, "end": 721, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 129, "end": 282, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 283, "end": 721, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 129, "end": 282, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 266, "end": 282, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 649, "end": 727, "name": "a" } }, { "type": "contig", "fields": { "start": 284, "end": 994, "name": "b" } }, { "type": "contig", "fields": { "start": 250, "end": 274, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 284, "end": 994, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 649, "end": 727, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 250, "end": 274, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 649, "end": 727, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 250, "end": 274, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 284, "end": 994, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 399, "end": 905, "name": "a" } }, { "type": "contig", "fields": { "start": 159, "end": 229, "name": "b" } }, { "type": "contig", "fields": { "start": 49, "end": 92, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 159, "end": 229, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 399, "end": 905, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 49, "end": 92, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 399, "end": 905, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 49, "end": 92, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 159, "end": 229, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 965, "end": 989, "name": "a" } }, { "type": "contig", "fields": { "start": 257, "end": 964, "name": "b" } }, { "type": "contig", "fields": { "start": 225, "end": 233, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 257, "end": 964, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 965, "end": 989, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 225, "end": 233, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 965, "end": 989, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 225, "end": 233, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 257, "end": 964, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 821, "end": 861, "name": "a" } }, { "type": "contig", "fields": { "start": 820, "end": 931, "name": "b" } }, { "type": "contig", "fields": { "start": 566, "end": 820, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 566, "end": 820, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 821, "end": 861, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 820, "end": 931, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 821, "end": 861, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 566, "end": 820, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 820, "end": 931, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 894, "end": 922, "name": "a" } }, { "type": "contig", "fields": { "start": 886, "end": 996, "name": "b" } }, { "type": "contig", "fields": { "start": 66, "end": 885, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 66, "end": 885, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 886, "end": 996, "name": "b" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 886, "end": 996, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 894, "end": 922, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 66, "end": 885, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 886, "end": 996, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 952, "end": 996, "name": "a" } }, { "type": "contig", "fields": { "start": 167, "end": 571, "name": "b" } }, { "type": "contig", "fields": { "start": 43, "end": 166, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 43, "end": 166, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 167, "end": 571, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 167, "end": 571, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 952, "end": 996, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 43, "end": 166, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 167, "end": 571, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 48, "end": 958, "name": "a" } }, { "type": "contig", "fields": { "start": 29, "end": 29, "name": "b" } }, { "type": "contig", "fields": { "start": 1, "end": 14, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 29, "end": 29, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 48, "end": 958, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 29, "end": 29, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 48, "end": 958, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 1, "end": 14, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 29, "end": 29, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 451, "end": 615, "name": "a" } }, { "type": "contig", "fields": { "start": 284, "end": 450, "name": "b" } }, { "type": "contig", "fields": { "start": 209, "end": 210, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 284, "end": 450, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 451, "end": 615, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 284, "end": 450, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 451, "end": 615, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 209, "end": 210, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 284, "end": 450, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 42, "end": 687, "name": "a" } }, { "type": "contig", "fields": { "start": 10, "end": 38, "name": "b" } }, { "type": "contig", "fields": { "start": 9, "end": 10, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 10, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 42, "end": 687, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 10, "end": 38, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 42, "end": 687, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 10, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 10, "end": 38, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 92, "end": 840, "name": "a" } }, { "type": "contig", "fields": { "start": 91, "end": 91, "name": "b" } }, { "type": "contig", "fields": { "start": 1, "end": 91, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 1, "end": 91, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 92, "end": 840, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 91, "end": 91, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 92, "end": 840, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 1, "end": 91, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 91, "end": 91, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 613, "end": 679, "name": "a" } }, { "type": "contig", "fields": { "start": 271, "end": 432, "name": "b" } }, { "type": "contig", "fields": { "start": 142, "end": 192, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 142, "end": 192, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 271, "end": 432, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 271, "end": 432, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 613, "end": 679, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 142, "end": 192, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 271, "end": 432, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 202, "end": 432, "name": "a" } }, { "type": "contig", "fields": { "start": 197, "end": 199, "name": "b" } }, { "type": "contig", "fields": { "start": 84, "end": 196, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 84, "end": 196, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 197, "end": 199, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 197, "end": 199, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 202, "end": 432, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 84, "end": 196, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 197, "end": 199, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 272, "end": 569, "name": "a" } }, { "type": "contig", "fields": { "start": 514, "end": 531, "name": "b" } }, { "type": "contig", "fields": { "start": 175, "end": 513, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 175, "end": 513, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 514, "end": 531, "name": "b" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 175, "end": 513, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 272, "end": 569, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 175, "end": 513, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 514, "end": 531, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 466, "end": 763, "name": "a" } }, { "type": "contig", "fields": { "start": 444, "end": 621, "name": "b" } }, { "type": "contig", "fields": { "start": 101, "end": 172, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 444, "end": 621, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 466, "end": 763, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 101, "end": 172, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 466, "end": 763, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 101, "end": 172, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 444, "end": 621, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 918, "end": 934, "name": "a" } }, { "type": "contig", "fields": { "start": 352, "end": 569, "name": "b" } }, { "type": "contig", "fields": { "start": 181, "end": 306, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 352, "end": 569, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 918, "end": 934, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 181, "end": 306, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 918, "end": 934, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 181, "end": 306, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 352, "end": 569, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 633, "end": 980, "name": "a" } }, { "type": "contig", "fields": { "start": 370, "end": 632, "name": "b" } }, { "type": "contig", "fields": { "start": 44, "end": 116, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 370, "end": 632, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 633, "end": 980, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 44, "end": 116, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 633, "end": 980, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 44, "end": 116, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 370, "end": 632, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 119, "end": 708, "name": "a" } }, { "type": "contig", "fields": { "start": 78, "end": 433, "name": "b" } }, { "type": "contig", "fields": { "start": 53, "end": 77, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 53, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 78, "end": 433, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 53, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 119, "end": 708, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 53, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 78, "end": 433, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 524, "end": 671, "name": "a" } }, { "type": "contig", "fields": { "start": 351, "end": 859, "name": "b" } }, { "type": "contig", "fields": { "start": 347, "end": 350, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 351, "end": 859, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 524, "end": 671, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 347, "end": 350, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 524, "end": 671, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 347, "end": 350, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 351, "end": 859, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 510, "end": 804, "name": "a" } }, { "type": "contig", "fields": { "start": 17, "end": 356, "name": "b" } }, { "type": "contig", "fields": { "start": 12, "end": 14, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 17, "end": 356, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 510, "end": 804, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 14, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 510, "end": 804, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 14, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 17, "end": 356, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 738, "end": 757, "name": "a" } }, { "type": "contig", "fields": { "start": 63, "end": 294, "name": "b" } }, { "type": "contig", "fields": { "start": 11, "end": 12, "name": "c" } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 63, "end": 294, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 738, "end": 757, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 11, "end": 12, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 738, "end": 757, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 11, "end": 12, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 63, "end": 294, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 606, "end": 664, "name": "a" } }, { "type": "contig", "fields": { "start": 26, "end": 605, "name": "b" } }, { "type": "contig", "fields": { "start": 20, "end": 20, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 26, "end": 605, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 606, "end": 664, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 20, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 606, "end": 664, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 20, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 26, "end": 605, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 873, "end": 950, "name": "a" } }, { "type": "contig", "fields": { "start": 640, "end": 870, "name": "b" } }, { "type": "contig", "fields": { "start": 140, "end": 554, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 140, "end": 554, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 873, "end": 950, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 140, "end": 554, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 873, "end": 950, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 140, "end": 554, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 640, "end": 870, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 649, "end": 893, "name": "a" } }, { "type": "contig", "fields": { "start": 741, "end": 963, "name": "b" } }, { "type": "contig", "fields": { "start": 623, "end": 648, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 623, "end": 648, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 649, "end": 893, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 623, "end": 648, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 649, "end": 893, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 623, "end": 648, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 741, "end": 963, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 126, "end": 131, "name": "a" } }, { "type": "contig", "fields": { "start": 178, "end": 763, "name": "b" } }, { "type": "contig", "fields": { "start": 22, "end": 77, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 22, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 178, "end": 763, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 22, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 126, "end": 131, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 22, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 178, "end": 763, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 265, "end": 905, "name": "a" } }, { "type": "contig", "fields": { "start": 14, "end": 926, "name": "b" } }, { "type": "contig", "fields": { "start": 12, "end": 13, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 13, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 14, "end": 926, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 13, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 265, "end": 905, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 13, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 14, "end": 926, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 951, "end": 982, "name": "a" } }, { "type": "contig", "fields": { "start": 950, "end": 950, "name": "b" } }, { "type": "contig", "fields": { "start": 91, "end": 950, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 950, "end": 950, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 951, "end": 982, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 91, "end": 950, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 951, "end": 982, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 91, "end": 950, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 950, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 455, "end": 772, "name": "a" } }, { "type": "contig", "fields": { "start": 347, "end": 900, "name": "b" } }, { "type": "contig", "fields": { "start": 335, "end": 341, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 347, "end": 900, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 455, "end": 772, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 335, "end": 341, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 347, "end": 900, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 335, "end": 341, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 347, "end": 900, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 235, "end": 631, "name": "a" } }, { "type": "contig", "fields": { "start": 52, "end": 156, "name": "b" } }, { "type": "contig", "fields": { "start": 8, "end": 40, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 52, "end": 156, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 235, "end": 631, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 8, "end": 40, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 52, "end": 156, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 8, "end": 40, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 52, "end": 156, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 771, "end": 932, "name": "a" } }, { "type": "contig", "fields": { "start": 380, "end": 770, "name": "b" } }, { "type": "contig", "fields": { "start": 333, "end": 333, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 380, "end": 770, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 771, "end": 932, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 333, "end": 333, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 380, "end": 770, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 333, "end": 333, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 380, "end": 770, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 703, "end": 936, "name": "a" } }, { "type": "contig", "fields": { "start": 966, "end": 966, "name": "b" } }, { "type": "contig", "fields": { "start": 681, "end": 854, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 681, "end": 854, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 703, "end": 936, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 681, "end": 854, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 966, "end": 966, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 681, "end": 854, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 966, "end": 966, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 512, "end": 939, "name": "a" } }, { "type": "contig", "fields": { "start": 472, "end": 496, "name": "b" } }, { "type": "contig", "fields": { "start": 388, "end": 413, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 388, "end": 413, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 512, "end": 939, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 388, "end": 413, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 472, "end": 496, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 388, "end": 413, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 472, "end": 496, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 534, "end": 837, "name": "a" } }, { "type": "contig", "fields": { "start": 616, "end": 949, "name": "b" } }, { "type": "contig", "fields": { "start": 357, "end": 533, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 357, "end": 533, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 534, "end": 837, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 357, "end": 533, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 616, "end": 949, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 357, "end": 533, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 616, "end": 949, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 725, "end": 903, "name": "a" } }, { "type": "contig", "fields": { "start": 670, "end": 724, "name": "b" } }, { "type": "contig", "fields": { "start": 517, "end": 669, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 670, "end": 724, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 725, "end": 903, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 517, "end": 669, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 670, "end": 724, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 517, "end": 669, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 670, "end": 724, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 591, "end": 896, "name": "a" } }, { "type": "contig", "fields": { "start": 591, "end": 701, "name": "b" } }, { "type": "contig", "fields": { "start": 213, "end": 590, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 213, "end": 590, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 591, "end": 896, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 213, "end": 590, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 591, "end": 701, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 213, "end": 590, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 591, "end": 701, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 806, "end": 994, "name": "a" } }, { "type": "contig", "fields": { "start": 108, "end": 208, "name": "b" } }, { "type": "contig", "fields": { "start": 40, "end": 912, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 40, "end": 912, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 806, "end": 994, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 40, "end": 912, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 108, "end": 208, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 108, "end": 208, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 806, "end": 994, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 108, "end": 208, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 806, "end": 994, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 785, "end": 990, "name": "a" } }, { "type": "contig", "fields": { "start": 654, "end": 686, "name": "b" } }, { "type": "contig", "fields": { "start": 351, "end": 784, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 351, "end": 784, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 785, "end": 990, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 351, "end": 784, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 654, "end": 686, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 654, "end": 686, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 785, "end": 990, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 654, "end": 686, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 785, "end": 990, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 904, "end": 983, "name": "a" } }, { "type": "contig", "fields": { "start": 397, "end": 713, "name": "b" } }, { "type": "contig", "fields": { "start": 222, "end": 813, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 222, "end": 813, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 397, "end": 713, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 397, "end": 713, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 904, "end": 983, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 222, "end": 813, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 904, "end": 983, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 397, "end": 713, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 904, "end": 983, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 827, "end": 865, "name": "a" } }, { "type": "contig", "fields": { "start": 178, "end": 719, "name": "b" } }, { "type": "contig", "fields": { "start": 39, "end": 177, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 39, "end": 177, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 178, "end": 719, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 178, "end": 719, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 827, "end": 865, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 39, "end": 177, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 827, "end": 865, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 178, "end": 719, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 827, "end": 865, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 937, "end": 986, "name": "a" } }, { "type": "contig", "fields": { "start": 878, "end": 936, "name": "b" } }, { "type": "contig", "fields": { "start": 235, "end": 881, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 878, "end": 936, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 937, "end": 986, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 235, "end": 881, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 878, "end": 936, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 235, "end": 881, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 937, "end": 986, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 878, "end": 936, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 937, "end": 986, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 309, "end": 686, "name": "a" } }, { "type": "contig", "fields": { "start": 82, "end": 308, "name": "b" } }, { "type": "contig", "fields": { "start": 24, "end": 81, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 82, "end": 308, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 309, "end": 686, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 81, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 82, "end": 308, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 81, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 309, "end": 686, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 82, "end": 308, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 309, "end": 686, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 821, "end": 923, "name": "a" } }, { "type": "contig", "fields": { "start": 37, "end": 820, "name": "b" } }, { "type": "contig", "fields": { "start": 24, "end": 921, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 37, "end": 820, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 821, "end": 923, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 921, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 821, "end": 923, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 921, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 37, "end": 820, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 37, "end": 820, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 821, "end": 923, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 219, "end": 718, "name": "a" } }, { "type": "contig", "fields": { "start": 186, "end": 218, "name": "b" } }, { "type": "contig", "fields": { "start": 89, "end": 218, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 186, "end": 218, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 219, "end": 718, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 89, "end": 218, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 219, "end": 718, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 89, "end": 218, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 186, "end": 218, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 186, "end": 218, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 219, "end": 718, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 684, "end": 943, "name": "a" } }, { "type": "contig", "fields": { "start": 575, "end": 670, "name": "b" } }, { "type": "contig", "fields": { "start": 344, "end": 382, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 575, "end": 670, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 684, "end": 943, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 344, "end": 382, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 684, "end": 943, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 344, "end": 382, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 575, "end": 670, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 575, "end": 670, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 684, "end": 943, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 802, "end": 971, "name": "a" } }, { "type": "contig", "fields": { "start": 447, "end": 801, "name": "b" } }, { "type": "contig", "fields": { "start": 364, "end": 390, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 447, "end": 801, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 802, "end": 971, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 364, "end": 390, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 802, "end": 971, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 364, "end": 390, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 447, "end": 801, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 447, "end": 801, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 802, "end": 971, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 618, "end": 832, "name": "a" } }, { "type": "contig", "fields": { "start": 241, "end": 348, "name": "b" } }, { "type": "contig", "fields": { "start": 66, "end": 617, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 66, "end": 617, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 618, "end": 832, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 66, "end": 617, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 241, "end": 348, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 241, "end": 348, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 618, "end": 832, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 66, "end": 617, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 618, "end": 832, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 268, "end": 690, "name": "a" } }, { "type": "contig", "fields": { "start": 265, "end": 266, "name": "b" } }, { "type": "contig", "fields": { "start": 179, "end": 267, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 179, "end": 267, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 268, "end": 690, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 265, "end": 266, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 268, "end": 690, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 265, "end": 266, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 268, "end": 690, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 179, "end": 267, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 268, "end": 690, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 28, "end": 978, "name": "a" } }, { "type": "contig", "fields": { "start": 3, "end": 5, "name": "b" } }, { "type": "contig", "fields": { "start": 0, "end": 8, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 8, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 3, "end": 5, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 5, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 28, "end": 978, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 5, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 28, "end": 978, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 8, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 28, "end": 978, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 503, "end": 659, "name": "a" } }, { "type": "contig", "fields": { "start": 417, "end": 491, "name": "b" } }, { "type": "contig", "fields": { "start": 6, "end": 416, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 416, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 417, "end": 491, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 417, "end": 491, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 503, "end": 659, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 417, "end": 491, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 503, "end": 659, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 416, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 503, "end": 659, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 932, "end": 967, "name": "a" } }, { "type": "contig", "fields": { "start": 366, "end": 547, "name": "b" } }, { "type": "contig", "fields": { "start": 46, "end": 748, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 366, "end": 547, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 932, "end": 967, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 46, "end": 748, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 932, "end": 967, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 366, "end": 547, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 932, "end": 967, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 46, "end": 748, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 932, "end": 967, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 296, "end": 809, "name": "a" } }, { "type": "contig", "fields": { "start": 223, "end": 295, "name": "b" } }, { "type": "contig", "fields": { "start": 61, "end": 277, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 223, "end": 295, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 296, "end": 809, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 61, "end": 277, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 296, "end": 809, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 223, "end": 295, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 296, "end": 809, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 61, "end": 277, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 296, "end": 809, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 584, "end": 776, "name": "a" } }, { "type": "contig", "fields": { "start": 32, "end": 371, "name": "b" } }, { "type": "contig", "fields": { "start": 8, "end": 414, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 8, "end": 414, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 32, "end": 371, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 8, "end": 414, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 584, "end": 776, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 32, "end": 371, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 584, "end": 776, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 8, "end": 414, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 584, "end": 776, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 793, "end": 988, "name": "a" } }, { "type": "contig", "fields": { "start": 751, "end": 766, "name": "b" } }, { "type": "contig", "fields": { "start": 640, "end": 750, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 640, "end": 750, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 751, "end": 766, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 640, "end": 750, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 793, "end": 988, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 751, "end": 766, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 793, "end": 988, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 640, "end": 750, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 793, "end": 988, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 264, "end": 348, "name": "a" } }, { "type": "contig", "fields": { "start": 139, "end": 263, "name": "b" } }, { "type": "contig", "fields": { "start": 24, "end": 263, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 139, "end": 263, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 264, "end": 348, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 263, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 264, "end": 348, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 139, "end": 263, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 264, "end": 348, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 263, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 264, "end": 348, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 46, "end": 161, "name": "a" } }, { "type": "contig", "fields": { "start": 45, "end": 45, "name": "b" } }, { "type": "contig", "fields": { "start": 23, "end": 46, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 45, "end": 45, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 46, "end": 161, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 23, "end": 46, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 45, "end": 45, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 45, "end": 45, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 46, "end": 161, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 23, "end": 46, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 46, "end": 161, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 68, "end": 511, "name": "a" } }, { "type": "contig", "fields": { "start": 43, "end": 58, "name": "b" } }, { "type": "contig", "fields": { "start": 0, "end": 67, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 67, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 68, "end": 511, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 67, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 43, "end": 58, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 43, "end": 58, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 68, "end": 511, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 67, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 68, "end": 511, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 312, "end": 971, "name": "a" } }, { "type": "contig", "fields": { "start": 52, "end": 88, "name": "b" } }, { "type": "contig", "fields": { "start": 9, "end": 40, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 52, "end": 88, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 312, "end": 971, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 40, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 52, "end": 88, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 52, "end": 88, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 312, "end": 971, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 40, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 312, "end": 971, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 575, "end": 761, "name": "a" } }, { "type": "contig", "fields": { "start": 236, "end": 574, "name": "b" } }, { "type": "contig", "fields": { "start": 165, "end": 203, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 236, "end": 574, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 575, "end": 761, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 165, "end": 203, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 236, "end": 574, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 236, "end": 574, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 575, "end": 761, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 165, "end": 203, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 575, "end": 761, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 406, "end": 942, "name": "a" } }, { "type": "contig", "fields": { "start": 234, "end": 306, "name": "b" } }, { "type": "contig", "fields": { "start": 214, "end": 215, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 214, "end": 215, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 406, "end": 942, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 214, "end": 215, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 234, "end": 306, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 234, "end": 306, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 406, "end": 942, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 214, "end": 215, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 406, "end": 942, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 161, "end": 596, "name": "a" } }, { "type": "contig", "fields": { "start": 96, "end": 160, "name": "b" } }, { "type": "contig", "fields": { "start": 77, "end": 95, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 96, "end": 160, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 161, "end": 596, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 77, "end": 95, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 96, "end": 160, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 96, "end": 160, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 161, "end": 596, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 77, "end": 95, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 161, "end": 596, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 927, "end": 966, "name": "a" } }, { "type": "contig", "fields": { "start": 927, "end": 927, "name": "b" } }, { "type": "contig", "fields": { "start": 814, "end": 926, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 814, "end": 926, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 927, "end": 966, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 814, "end": 926, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 927, "end": 927, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 927, "end": 927, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 927, "end": 966, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 814, "end": 926, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 927, "end": 966, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 909, "end": 909, "name": "a" } }, { "type": "contig", "fields": { "start": 78, "end": 945, "name": "b" } }, { "type": "contig", "fields": { "start": 54, "end": 77, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 54, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 78, "end": 945, "name": "b" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 78, "end": 945, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 909, "end": 909, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 54, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 909, "end": 909, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 54, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 909, "end": 909, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 734, "end": 777, "name": "a" } }, { "type": "contig", "fields": { "start": 287, "end": 377, "name": "b" } }, { "type": "contig", "fields": { "start": 23, "end": 422, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 23, "end": 422, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 287, "end": 377, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 287, "end": 377, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 734, "end": 777, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 23, "end": 422, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 734, "end": 777, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 23, "end": 422, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 734, "end": 777, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 54, "end": 688, "name": "a" } }, { "type": "contig", "fields": { "start": 32, "end": 33, "name": "b" } }, { "type": "contig", "fields": { "start": 14, "end": 31, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 14, "end": 31, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 32, "end": 33, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 32, "end": 33, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 54, "end": 688, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 14, "end": 31, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 54, "end": 688, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 14, "end": 31, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 54, "end": 688, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 301, "end": 551, "name": "a" } }, { "type": "contig", "fields": { "start": 106, "end": 632, "name": "b" } }, { "type": "contig", "fields": { "start": 26, "end": 258, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 106, "end": 632, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 301, "end": 551, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 26, "end": 258, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 106, "end": 632, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 26, "end": 258, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 301, "end": 551, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 26, "end": 258, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 301, "end": 551, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 739, "end": 753, "name": "a" } }, { "type": "contig", "fields": { "start": 294, "end": 738, "name": "b" } }, { "type": "contig", "fields": { "start": 15, "end": 677, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 294, "end": 738, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 739, "end": 753, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 15, "end": 677, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 294, "end": 738, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 15, "end": 677, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 739, "end": 753, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 15, "end": 677, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 739, "end": 753, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 25, "end": 788, "name": "a" } }, { "type": "contig", "fields": { "start": 12, "end": 24, "name": "b" } }, { "type": "contig", "fields": { "start": 0, "end": 11, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 24, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 25, "end": 788, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 11, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 12, "end": 24, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 11, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 25, "end": 788, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 11, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 25, "end": 788, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 583, "end": 649, "name": "a" } }, { "type": "contig", "fields": { "start": 406, "end": 975, "name": "b" } }, { "type": "contig", "fields": { "start": 230, "end": 582, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 230, "end": 582, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 583, "end": 649, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 406, "end": 975, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 583, "end": 649, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 230, "end": 582, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 406, "end": 975, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 230, "end": 582, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 583, "end": 649, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 849, "end": 883, "name": "a" } }, { "type": "contig", "fields": { "start": 612, "end": 848, "name": "b" } }, { "type": "contig", "fields": { "start": 11, "end": 848, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 612, "end": 848, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 849, "end": 883, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 11, "end": 848, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 849, "end": 883, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 11, "end": 848, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 612, "end": 848, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 11, "end": 848, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 849, "end": 883, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 905, "end": 960, "name": "a" } }, { "type": "contig", "fields": { "start": 240, "end": 961, "name": "b" } }, { "type": "contig", "fields": { "start": 96, "end": 168, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 240, "end": 961, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 905, "end": 960, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 96, "end": 168, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 905, "end": 960, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 96, "end": 168, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 240, "end": 961, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 96, "end": 168, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 905, "end": 960, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 950, "end": 997, "name": "a" } }, { "type": "contig", "fields": { "start": 183, "end": 630, "name": "b" } }, { "type": "contig", "fields": { "start": 31, "end": 87, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 183, "end": 630, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 997, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 31, "end": 87, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 997, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 31, "end": 87, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 183, "end": 630, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 31, "end": 87, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 997, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 916, "end": 936, "name": "a" } }, { "type": "contig", "fields": { "start": 74, "end": 915, "name": "b" } }, { "type": "contig", "fields": { "start": 57, "end": 68, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 74, "end": 915, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 916, "end": 936, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 57, "end": 68, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 916, "end": 936, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 57, "end": 68, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 74, "end": 915, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 57, "end": 68, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 916, "end": 936, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 508, "end": 897, "name": "a" } }, { "type": "contig", "fields": { "start": 242, "end": 502, "name": "b" } }, { "type": "contig", "fields": { "start": 91, "end": 241, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 91, "end": 241, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 242, "end": 502, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 242, "end": 502, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 508, "end": 897, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 242, "end": 502, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 508, "end": 897, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 91, "end": 241, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 242, "end": 502, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 404, "end": 565, "name": "a" } }, { "type": "contig", "fields": { "start": 9, "end": 164, "name": "b" } }, { "type": "contig", "fields": { "start": 7, "end": 9, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 164, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 404, "end": 565, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 7, "end": 9, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 404, "end": 565, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 164, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 404, "end": 565, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 7, "end": 9, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 9, "end": 164, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 704, "end": 975, "name": "a" } }, { "type": "contig", "fields": { "start": 78, "end": 703, "name": "b" } }, { "type": "contig", "fields": { "start": 77, "end": 77, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 78, "end": 703, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 704, "end": 975, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 77, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 704, "end": 975, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 78, "end": 703, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 704, "end": 975, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 77, "end": 77, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 78, "end": 703, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 838, "end": 910, "name": "a" } }, { "type": "contig", "fields": { "start": 666, "end": 767, "name": "b" } }, { "type": "contig", "fields": { "start": 660, "end": 665, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 660, "end": 665, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 666, "end": 767, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 660, "end": 665, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 838, "end": 910, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 666, "end": 767, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 838, "end": 910, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 660, "end": 665, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 666, "end": 767, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 130, "end": 888, "name": "a" } }, { "type": "contig", "fields": { "start": 129, "end": 129, "name": "b" } }, { "type": "contig", "fields": { "start": 70, "end": 129, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 129, "end": 129, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 130, "end": 888, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 70, "end": 129, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 130, "end": 888, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 129, "end": 129, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 130, "end": 888, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 70, "end": 129, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 129, "end": 129, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 276, "end": 436, "name": "a" } }, { "type": "contig", "fields": { "start": 159, "end": 226, "name": "b" } }, { "type": "contig", "fields": { "start": 137, "end": 154, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 159, "end": 226, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 276, "end": 436, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 137, "end": 154, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 159, "end": 226, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 159, "end": 226, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 276, "end": 436, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 137, "end": 154, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 159, "end": 226, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 912, "end": 951, "name": "a" } }, { "type": "contig", "fields": { "start": 731, "end": 911, "name": "b" } }, { "type": "contig", "fields": { "start": 569, "end": 658, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 731, "end": 911, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 912, "end": 951, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 569, "end": 658, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 731, "end": 911, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 731, "end": 911, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 912, "end": 951, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 569, "end": 658, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 731, "end": 911, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 522, "end": 593, "name": "a" } }, { "type": "contig", "fields": { "start": 378, "end": 414, "name": "b" } }, { "type": "contig", "fields": { "start": 315, "end": 325, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 315, "end": 325, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 522, "end": 593, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 315, "end": 325, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 378, "end": 414, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 378, "end": 414, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 522, "end": 593, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 315, "end": 325, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 378, "end": 414, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 420, "end": 450, "name": "a" } }, { "type": "contig", "fields": { "start": 3, "end": 419, "name": "b" } }, { "type": "contig", "fields": { "start": 0, "end": 2, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 419, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 420, "end": 450, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 2, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 3, "end": 419, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 419, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 420, "end": 450, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 2, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 3, "end": 419, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 308, "end": 679, "name": "a" } }, { "type": "contig", "fields": { "start": 308, "end": 308, "name": "b" } }, { "type": "contig", "fields": { "start": 111, "end": 307, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 111, "end": 307, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 308, "end": 679, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 111, "end": 307, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 308, "end": 308, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 308, "end": 308, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 308, "end": 679, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 111, "end": 307, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 308, "end": 308, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 239, "end": 779, "name": "a" } }, { "type": "contig", "fields": { "start": 45, "end": 386, "name": "b" } }, { "type": "contig", "fields": { "start": 44, "end": 44, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 44, "end": 44, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 45, "end": 386, "name": "b" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 45, "end": 386, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 239, "end": 779, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 44, "end": 44, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 239, "end": 779, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 44, "end": 44, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 45, "end": 386, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 25, "end": 407, "name": "a" } }, { "type": "contig", "fields": { "start": 9, "end": 13, "name": "b" } }, { "type": "contig", "fields": { "start": 5, "end": 8, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 5, "end": 8, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 9, "end": 13, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 9, "end": 13, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 25, "end": 407, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 5, "end": 8, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 25, "end": 407, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 5, "end": 8, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 9, "end": 13, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 174, "end": 175, "name": "a" } }, { "type": "contig", "fields": { "start": 103, "end": 173, "name": "b" } }, { "type": "contig", "fields": { "start": 34, "end": 102, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 103, "end": 173, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 174, "end": 175, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 34, "end": 102, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 103, "end": 173, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 34, "end": 102, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 174, "end": 175, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 34, "end": 102, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 103, "end": 173, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 463, "end": 474, "name": "a" } }, { "type": "contig", "fields": { "start": 462, "end": 921, "name": "b" } }, { "type": "contig", "fields": { "start": 78, "end": 462, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 78, "end": 462, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 463, "end": 474, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 462, "end": 921, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 463, "end": 474, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 78, "end": 462, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 463, "end": 474, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 78, "end": 462, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 462, "end": 921, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 110, "end": 850, "name": "a" } }, { "type": "contig", "fields": { "start": 19, "end": 684, "name": "b" } }, { "type": "contig", "fields": { "start": 6, "end": 18, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 18, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 19, "end": 684, "name": "b" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 19, "end": 684, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 110, "end": 850, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 18, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 110, "end": 850, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 18, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 19, "end": 684, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 385, "end": 576, "name": "a" } }, { "type": "contig", "fields": { "start": 307, "end": 346, "name": "b" } }, { "type": "contig", "fields": { "start": 216, "end": 306, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 216, "end": 306, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 307, "end": 346, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 307, "end": 346, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 385, "end": 576, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 216, "end": 306, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 385, "end": 576, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 216, "end": 306, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 307, "end": 346, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 593, "end": 980, "name": "a" } }, { "type": "contig", "fields": { "start": 26, "end": 272, "name": "b" } }, { "type": "contig", "fields": { "start": 1, "end": 24, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 26, "end": 272, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 593, "end": 980, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 26, "end": 272, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 593, "end": 980, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 1, "end": 24, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 593, "end": 980, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 1, "end": 24, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 26, "end": 272, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 781, "end": 819, "name": "a" } }, { "type": "contig", "fields": { "start": 293, "end": 780, "name": "b" } }, { "type": "contig", "fields": { "start": 76, "end": 109, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 293, "end": 780, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 781, "end": 819, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 293, "end": 780, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 781, "end": 819, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 76, "end": 109, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 781, "end": 819, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 76, "end": 109, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 293, "end": 780, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 882, "end": 927, "name": "a" } }, { "type": "contig", "fields": { "start": 519, "end": 621, "name": "b" } }, { "type": "contig", "fields": { "start": 292, "end": 359, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 292, "end": 359, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 882, "end": 927, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 519, "end": 621, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 882, "end": 927, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 292, "end": 359, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 882, "end": 927, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 292, "end": 359, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 519, "end": 621, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 128, "end": 665, "name": "a" } }, { "type": "contig", "fields": { "start": 127, "end": 127, "name": "b" } }, { "type": "contig", "fields": { "start": 86, "end": 127, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 86, "end": 127, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 128, "end": 665, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 127, "end": 127, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 128, "end": 665, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 86, "end": 127, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 128, "end": 665, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 86, "end": 127, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 127, "end": 127, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 361, "end": 945, "name": "a" } }, { "type": "contig", "fields": { "start": 117, "end": 352, "name": "b" } }, { "type": "contig", "fields": { "start": 91, "end": 110, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 91, "end": 110, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 117, "end": 352, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 117, "end": 352, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 361, "end": 945, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 91, "end": 110, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 361, "end": 945, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 91, "end": 110, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 117, "end": 352, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 76, "end": 739, "name": "a" } }, { "type": "contig", "fields": { "start": 47, "end": 63, "name": "b" } }, { "type": "contig", "fields": { "start": 21, "end": 46, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 21, "end": 46, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 47, "end": 63, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 47, "end": 63, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 76, "end": 739, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 21, "end": 46, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 76, "end": 739, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 21, "end": 46, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 47, "end": 63, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 783, "end": 801, "name": "a" } }, { "type": "contig", "fields": { "start": 304, "end": 922, "name": "b" } }, { "type": "contig", "fields": { "start": 222, "end": 276, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 304, "end": 922, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 783, "end": 801, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 222, "end": 276, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 783, "end": 801, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 222, "end": 276, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 783, "end": 801, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 222, "end": 276, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 304, "end": 922, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 727, "end": 973, "name": "a" } }, { "type": "contig", "fields": { "start": 77, "end": 668, "name": "b" } }, { "type": "contig", "fields": { "start": 2, "end": 73, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 77, "end": 668, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 727, "end": 973, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 73, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 727, "end": 973, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 73, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 727, "end": 973, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 73, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 77, "end": 668, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 328, "end": 740, "name": "a" } }, { "type": "contig", "fields": { "start": 50, "end": 327, "name": "b" } }, { "type": "contig", "fields": { "start": 3, "end": 41, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 50, "end": 327, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 328, "end": 740, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 41, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 328, "end": 740, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 41, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 328, "end": 740, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 3, "end": 41, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 50, "end": 327, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 54, "end": 916, "name": "a" } }, { "type": "contig", "fields": { "start": 46, "end": 151, "name": "b" } }, { "type": "contig", "fields": { "start": 12, "end": 45, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 45, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 46, "end": 151, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 45, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 54, "end": 916, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 45, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 54, "end": 916, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 12, "end": 45, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 46, "end": 151, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 748, "end": 939, "name": "a" } }, { "type": "contig", "fields": { "start": 747, "end": 747, "name": "b" } }, { "type": "contig", "fields": { "start": 297, "end": 747, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 747, "end": 747, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 748, "end": 939, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 297, "end": 747, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 748, "end": 939, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 297, "end": 747, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 748, "end": 939, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 297, "end": 747, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 747, "end": 747, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 458, "end": 945, "name": "a" } }, { "type": "contig", "fields": { "start": 262, "end": 466, "name": "b" } }, { "type": "contig", "fields": { "start": 34, "end": 186, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 262, "end": 466, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 458, "end": 945, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 34, "end": 186, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 262, "end": 466, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 34, "end": 186, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 458, "end": 945, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 34, "end": 186, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 262, "end": 466, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 731, "end": 757, "name": "a" } }, { "type": "contig", "fields": { "start": 654, "end": 675, "name": "b" } }, { "type": "contig", "fields": { "start": 355, "end": 472, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 654, "end": 675, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 731, "end": 757, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 355, "end": 472, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 654, "end": 675, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 355, "end": 472, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 731, "end": 757, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 355, "end": 472, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 654, "end": 675, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 970, "end": 970, "name": "a" } }, { "type": "contig", "fields": { "start": 10, "end": 969, "name": "b" } }, { "type": "contig", "fields": { "start": 6, "end": 7, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 10, "end": 969, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 970, "end": 970, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 7, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 10, "end": 969, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 7, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 970, "end": 970, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 7, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 10, "end": 969, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 164, "end": 171, "name": "a" } }, { "type": "contig", "fields": { "start": 103, "end": 128, "name": "b" } }, { "type": "contig", "fields": { "start": 38, "end": 54, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 38, "end": 54, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 164, "end": 171, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 38, "end": 54, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 103, "end": 128, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 38, "end": 54, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 164, "end": 171, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 38, "end": 54, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 103, "end": 128, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 844, "end": 927, "name": "a" } }, { "type": "contig", "fields": { "start": 862, "end": 881, "name": "b" } }, { "type": "contig", "fields": { "start": 280, "end": 843, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 280, "end": 843, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 844, "end": 927, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 280, "end": 843, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 862, "end": 881, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 280, "end": 843, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 844, "end": 927, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 280, "end": 843, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 862, "end": 881, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 372, "end": 845, "name": "a" } }, { "type": "contig", "fields": { "start": 211, "end": 371, "name": "b" } }, { "type": "contig", "fields": { "start": 139, "end": 210, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 211, "end": 371, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 372, "end": 845, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 139, "end": 210, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 211, "end": 371, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 139, "end": 210, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 372, "end": 845, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 139, "end": 210, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 211, "end": 371, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 144, "end": 728, "name": "a" } }, { "type": "contig", "fields": { "start": 144, "end": 877, "name": "b" } }, { "type": "contig", "fields": { "start": 132, "end": 143, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 132, "end": 143, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 144, "end": 728, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 132, "end": 143, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 144, "end": 877, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 132, "end": 143, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 144, "end": 728, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 132, "end": 143, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 144, "end": 877, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 709, "end": 751, "name": "a" } }, { "type": "contig", "fields": { "start": 496, "end": 809, "name": "b" } }, { "type": "contig", "fields": { "start": 291, "end": 356, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 496, "end": 809, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 709, "end": 751, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 291, "end": 356, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 709, "end": 751, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 291, "end": 356, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 496, "end": 809, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 291, "end": 356, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 496, "end": 809, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 647, "end": 745, "name": "a" } }, { "type": "contig", "fields": { "start": 561, "end": 561, "name": "b" } }, { "type": "contig", "fields": { "start": 151, "end": 167, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 561, "end": 561, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 647, "end": 745, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 151, "end": 167, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 647, "end": 745, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 151, "end": 167, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 561, "end": 561, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 151, "end": 167, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 561, "end": 561, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 165, "end": 417, "name": "a" } }, { "type": "contig", "fields": { "start": 115, "end": 164, "name": "b" } }, { "type": "contig", "fields": { "start": 93, "end": 96, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 115, "end": 164, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 165, "end": 417, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 93, "end": 96, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 165, "end": 417, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 93, "end": 96, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 115, "end": 164, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 93, "end": 96, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 115, "end": 164, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 957, "end": 981, "name": "a" } }, { "type": "contig", "fields": { "start": 519, "end": 596, "name": "b" } }, { "type": "contig", "fields": { "start": 436, "end": 956, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 436, "end": 956, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 957, "end": 981, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 436, "end": 956, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 519, "end": 596, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 519, "end": 596, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 957, "end": 981, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 519, "end": 596, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 957, "end": 981, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 436, "end": 956, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 957, "end": 981, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 526, "end": 895, "name": "a" } }, { "type": "contig", "fields": { "start": 66, "end": 408, "name": "b" } }, { "type": "contig", "fields": { "start": 37, "end": 278, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 37, "end": 278, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 66, "end": 408, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 66, "end": 408, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 526, "end": 895, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 37, "end": 278, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 526, "end": 895, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 66, "end": 408, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 526, "end": 895, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 37, "end": 278, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 526, "end": 895, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 859, "end": 932, "name": "a" } }, { "type": "contig", "fields": { "start": 85, "end": 609, "name": "b" } }, { "type": "contig", "fields": { "start": 2, "end": 84, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 84, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 85, "end": 609, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 85, "end": 609, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 859, "end": 932, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 84, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 859, "end": 932, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 85, "end": 609, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 859, "end": 932, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 84, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 859, "end": 932, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 374, "end": 842, "name": "a" } }, { "type": "contig", "fields": { "start": 82, "end": 373, "name": "b" } }, { "type": "contig", "fields": { "start": 25, "end": 244, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 82, "end": 373, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 374, "end": 842, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 25, "end": 244, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 82, "end": 373, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 25, "end": 244, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 374, "end": 842, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 82, "end": 373, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 374, "end": 842, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 25, "end": 244, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 374, "end": 842, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 695, "end": 930, "name": "a" } }, { "type": "contig", "fields": { "start": 174, "end": 694, "name": "b" } }, { "type": "contig", "fields": { "start": 162, "end": 173, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 174, "end": 694, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 695, "end": 930, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 162, "end": 173, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 174, "end": 694, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 162, "end": 173, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 695, "end": 930, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 174, "end": 694, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 695, "end": 930, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 162, "end": 173, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 695, "end": 930, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 526, "end": 605, "name": "a" } }, { "type": "contig", "fields": { "start": 392, "end": 525, "name": "b" } }, { "type": "contig", "fields": { "start": 105, "end": 525, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 392, "end": 525, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 526, "end": 605, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 105, "end": 525, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 526, "end": 605, "name": "a" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 105, "end": 525, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 392, "end": 525, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 392, "end": 525, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 526, "end": 605, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 105, "end": 525, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 526, "end": 605, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 201, "end": 724, "name": "a" } }, { "type": "contig", "fields": { "start": 20, "end": 193, "name": "b" } }, { "type": "contig", "fields": { "start": 6, "end": 18, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 193, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 201, "end": 724, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 18, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 201, "end": 724, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 18, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 20, "end": 193, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 193, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 201, "end": 724, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 18, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 201, "end": 724, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 352, "end": 433, "name": "a" } }, { "type": "contig", "fields": { "start": 215, "end": 351, "name": "b" } }, { "type": "contig", "fields": { "start": 170, "end": 208, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 215, "end": 351, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 352, "end": 433, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 170, "end": 208, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 352, "end": 433, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 170, "end": 208, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 215, "end": 351, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 215, "end": 351, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 352, "end": 433, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 170, "end": 208, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 352, "end": 433, "name": "a" } } } }] +[{ "type": "contig", "fields": { "start": 315, "end": 808, "name": "a" } }, { "type": "contig", "fields": { "start": 25, "end": 211, "name": "b" } }, { "type": "contig", "fields": { "start": 5, "end": 24, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 5, "end": 24, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 25, "end": 211, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 25, "end": 211, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 315, "end": 808, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 5, "end": 24, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 315, "end": 808, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 25, "end": 211, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 315, "end": 808, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 5, "end": 24, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 25, "end": 211, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 329, "end": 667, "name": "a" } }, { "type": "contig", "fields": { "start": 20, "end": 328, "name": "b" } }, { "type": "contig", "fields": { "start": 5, "end": 19, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 328, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 329, "end": 667, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 5, "end": 19, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 20, "end": 328, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 5, "end": 19, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 329, "end": 667, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 20, "end": 328, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 329, "end": 667, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 5, "end": 19, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 20, "end": 328, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 821, "end": 986, "name": "a" } }, { "type": "contig", "fields": { "start": 514, "end": 661, "name": "b" } }, { "type": "contig", "fields": { "start": 86, "end": 425, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 514, "end": 661, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 821, "end": 986, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 86, "end": 425, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 821, "end": 986, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 86, "end": 425, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 514, "end": 661, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 514, "end": 661, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 821, "end": 986, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 86, "end": 425, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 514, "end": 661, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 604, "end": 714, "name": "a" } }, { "type": "contig", "fields": { "start": 2, "end": 603, "name": "b" } }, { "type": "contig", "fields": { "start": 0, "end": 0, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 603, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 604, "end": 714, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 0, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 604, "end": 714, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 0, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 2, "end": 603, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 2, "end": 603, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 604, "end": 714, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 0, "end": 0, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 2, "end": 603, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 309, "end": 781, "name": "a" } }, { "type": "contig", "fields": { "start": 204, "end": 245, "name": "b" } }, { "type": "contig", "fields": { "start": 127, "end": 203, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 127, "end": 203, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 204, "end": 245, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 204, "end": 245, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 309, "end": 781, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 204, "end": 245, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 309, "end": 781, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 127, "end": 203, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 309, "end": 781, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 127, "end": 203, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 204, "end": 245, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 77, "end": 383, "name": "a" } }, { "type": "contig", "fields": { "start": 55, "end": 57, "name": "b" } }, { "type": "contig", "fields": { "start": 39, "end": 49, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 55, "end": 57, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 77, "end": 383, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 39, "end": 49, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 77, "end": 383, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 55, "end": 57, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 77, "end": 383, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 39, "end": 49, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 77, "end": 383, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 39, "end": 49, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 55, "end": 57, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 45, "end": 474, "name": "a" } }, { "type": "contig", "fields": { "start": 27, "end": 44, "name": "b" } }, { "type": "contig", "fields": { "start": 6, "end": 21, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 27, "end": 44, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 45, "end": 474, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 21, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 45, "end": 474, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 27, "end": 44, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 45, "end": 474, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 21, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 45, "end": 474, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 6, "end": 21, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 27, "end": 44, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 34, "end": 319, "name": "a" } }, { "type": "contig", "fields": { "start": 24, "end": 34, "name": "b" } }, { "type": "contig", "fields": { "start": 19, "end": 23, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 19, "end": 23, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 24, "end": 34, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 19, "end": 23, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 34, "end": 319, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 34, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 34, "end": 319, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 19, "end": 23, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 34, "end": 319, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 19, "end": 23, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 24, "end": 34, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 953, "end": 995, "name": "a" } }, { "type": "contig", "fields": { "start": 952, "end": 952, "name": "b" } }, { "type": "contig", "fields": { "start": 386, "end": 952, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 952, "end": 952, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 953, "end": 995, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 386, "end": 952, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 953, "end": 995, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 952, "end": 952, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 953, "end": 995, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 386, "end": 952, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 953, "end": 995, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 386, "end": 952, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 952, "end": 952, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 864, "end": 940, "name": "a" } }, { "type": "contig", "fields": { "start": 554, "end": 770, "name": "b" } }, { "type": "contig", "fields": { "start": 273, "end": 313, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 554, "end": 770, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 864, "end": 940, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 273, "end": 313, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 554, "end": 770, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 554, "end": 770, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 864, "end": 940, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 273, "end": 313, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 864, "end": 940, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 273, "end": 313, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 554, "end": 770, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 692, "end": 807, "name": "a" } }, { "type": "contig", "fields": { "start": 656, "end": 691, "name": "b" } }, { "type": "contig", "fields": { "start": 234, "end": 460, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 656, "end": 691, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 692, "end": 807, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 234, "end": 460, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 656, "end": 691, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 656, "end": 691, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 692, "end": 807, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 234, "end": 460, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 692, "end": 807, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 234, "end": 460, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 656, "end": 691, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 236, "end": 384, "name": "a" } }, { "type": "contig", "fields": { "start": 213, "end": 215, "name": "b" } }, { "type": "contig", "fields": { "start": 4, "end": 59, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 4, "end": 59, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 236, "end": 384, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 4, "end": 59, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 213, "end": 215, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 213, "end": 215, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 236, "end": 384, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 4, "end": 59, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 236, "end": 384, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 4, "end": 59, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 213, "end": 215, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 188, "end": 885, "name": "a" } }, { "type": "contig", "fields": { "start": 67, "end": 187, "name": "b" } }, { "type": "contig", "fields": { "start": 24, "end": 66, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 67, "end": 187, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 188, "end": 885, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 66, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 67, "end": 187, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 67, "end": 187, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 188, "end": 885, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 66, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 188, "end": 885, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 24, "end": 66, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 67, "end": 187, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 419, "end": 653, "name": "a" } }, { "type": "contig", "fields": { "start": 419, "end": 419, "name": "b" } }, { "type": "contig", "fields": { "start": 405, "end": 418, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 405, "end": 418, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 419, "end": 653, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 405, "end": 418, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 419, "end": 419, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 419, "end": 419, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 419, "end": 653, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 405, "end": 418, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 419, "end": 653, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 405, "end": 418, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 419, "end": 419, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 345, "end": 700, "name": "a" } }, { "type": "contig", "fields": { "start": 256, "end": 823, "name": "b" } }, { "type": "contig", "fields": { "start": 188, "end": 255, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 188, "end": 255, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 256, "end": 823, "name": "b" } } } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 256, "end": 823, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 345, "end": 700, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 188, "end": 255, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 345, "end": 700, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 188, "end": 255, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 345, "end": 700, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 188, "end": 255, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 256, "end": 823, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 901, "end": 923, "name": "a" } }, { "type": "contig", "fields": { "start": 222, "end": 368, "name": "b" } }, { "type": "contig", "fields": { "start": 171, "end": 221, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 171, "end": 221, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 222, "end": 368, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 222, "end": 368, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 901, "end": 923, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 171, "end": 221, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 901, "end": 923, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 171, "end": 221, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 901, "end": 923, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 171, "end": 221, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 222, "end": 368, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 36, "end": 927, "name": "a" } }, { "type": "contig", "fields": { "start": 27, "end": 35, "name": "b" } }, { "type": "contig", "fields": { "start": 8, "end": 26, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 27, "end": 35, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 36, "end": 927, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 8, "end": 26, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 27, "end": 35, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 8, "end": 26, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 36, "end": 927, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 8, "end": 26, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 36, "end": 927, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 8, "end": 26, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 27, "end": 35, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 169, "end": 916, "name": "a" } }, { "type": "contig", "fields": { "start": 108, "end": 316, "name": "b" } }, { "type": "contig", "fields": { "start": 100, "end": 105, "name": "c" } }, { "type": "overlap", "fields": { "a": { "type": "contig", "fields": { "start": 108, "end": 316, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 169, "end": 916, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 100, "end": 105, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 169, "end": 916, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 100, "end": 105, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 108, "end": 316, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 100, "end": 105, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 169, "end": 916, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 100, "end": 105, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 108, "end": 316, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 291, "end": 774, "name": "a" } }, { "type": "contig", "fields": { "start": 27, "end": 150, "name": "b" } }, { "type": "contig", "fields": { "start": 22, "end": 22, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 27, "end": 150, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 291, "end": 774, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 22, "end": 22, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 291, "end": 774, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 22, "end": 22, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 27, "end": 150, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 22, "end": 22, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 291, "end": 774, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 22, "end": 22, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 27, "end": 150, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 455, "end": 789, "name": "a" } }, { "type": "contig", "fields": { "start": 440, "end": 454, "name": "b" } }, { "type": "contig", "fields": { "start": 408, "end": 414, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 440, "end": 454, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 455, "end": 789, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 408, "end": 414, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 455, "end": 789, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 408, "end": 414, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 440, "end": 454, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 408, "end": 414, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 455, "end": 789, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 408, "end": 414, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 440, "end": 454, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 555, "end": 727, "name": "a" } }, { "type": "contig", "fields": { "start": 121, "end": 199, "name": "b" } }, { "type": "contig", "fields": { "start": 81, "end": 120, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 81, "end": 120, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 121, "end": 199, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 121, "end": 199, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 555, "end": 727, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 81, "end": 120, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 555, "end": 727, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 121, "end": 199, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 555, "end": 727, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 81, "end": 120, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 555, "end": 727, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 81, "end": 120, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 121, "end": 199, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 700, "end": 754, "name": "a" } }, { "type": "contig", "fields": { "start": 440, "end": 699, "name": "b" } }, { "type": "contig", "fields": { "start": 260, "end": 439, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 440, "end": 699, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 700, "end": 754, "name": "a" } } } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 260, "end": 439, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 440, "end": 699, "name": "b" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 260, "end": 439, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 700, "end": 754, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 440, "end": 699, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 700, "end": 754, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 260, "end": 439, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 700, "end": 754, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 260, "end": 439, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 440, "end": 699, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 950, "end": 990, "name": "a" } }, { "type": "contig", "fields": { "start": 140, "end": 750, "name": "b" } }, { "type": "contig", "fields": { "start": 49, "end": 92, "name": "c" } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 140, "end": 750, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 990, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 49, "end": 92, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 990, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 49, "end": 92, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 140, "end": 750, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 140, "end": 750, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 990, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 49, "end": 92, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 950, "end": 990, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 49, "end": 92, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 140, "end": 750, "name": "b" } } } }] +[{ "type": "contig", "fields": { "start": 676, "end": 749, "name": "a" } }, { "type": "contig", "fields": { "start": 67, "end": 675, "name": "b" } }, { "type": "contig", "fields": { "start": 36, "end": 53, "name": "c" } }, { "type": "tangent", "fields": { "a": { "type": "contig", "fields": { "start": 67, "end": 675, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 676, "end": 749, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 36, "end": 53, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 676, "end": 749, "name": "a" } } } }, { "type": "nooverlap", "fields": { "a": { "type": "contig", "fields": { "start": 36, "end": 53, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 67, "end": 675, "name": "b" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 67, "end": 675, "name": "b" } }, "b": { "type": "contig", "fields": { "start": 676, "end": 749, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 36, "end": 53, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 676, "end": 749, "name": "a" } } } }, { "type": "kiss", "fields": { "a": { "type": "contig", "fields": { "start": 36, "end": 53, "name": "c" } }, "b": { "type": "contig", "fields": { "start": 67, "end": 675, "name": "b" } } } }] diff --git a/micall/tests/data/exact_parts_contigs.csv b/micall/tests/data/exact_parts_contigs.csv new file mode 100644 index 000000000..cb9745df6 --- /dev/null +++ b/micall/tests/data/exact_parts_contigs.csv @@ -0,0 +1,3 @@ +ref,match,group_ref,contig +HIV1-B-FR-K03455-seed,1.0,HIV1-B-FR-K03455-seed,GGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACAT +HIV1-B-FR-K03455-seed,1.0,HIV1-B-FR-K03455-seed,ACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAG diff --git a/micall/tests/data/exact_parts_contigs.fasta b/micall/tests/data/exact_parts_contigs.fasta new file mode 100644 index 000000000..963f29c3f --- /dev/null +++ b/micall/tests/data/exact_parts_contigs.fasta @@ -0,0 +1,4 @@ +>gag1 +GGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACAT +>gag2 +ACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAG diff --git a/micall/tests/data/exact_parts_contigs.plot.svg b/micall/tests/data/exact_parts_contigs.plot.svg new file mode 100644 index 000000000..7bd2b78a5 --- /dev/null +++ b/micall/tests/data/exact_parts_contigs.plot.svg @@ -0,0 +1,187 @@ + + + + + + + + +5' LTR + + + + + +gag + + + + + +vif + + + + + +tat + + + + + +nef + + + + + + + +tat + + + + + +vpu + + + + + +rev + + + + + +3' LTR + + + + + + + +pol + + + + + +vpr + + + + + +rev + + + + + +env + + + + + + + +PR + + + + + +RT + + + + + +INT + + + + + +V3 + + + + + +GP41 + + + + + + + + + + + + + + + + + + + + + + + +HIV1-B-FR-K03455-seed + + + + + + + +1 + + + + + +2 + + + + + + + + + + + + +1 + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/exact_parts_contigs_stitched.csv b/micall/tests/data/exact_parts_contigs_stitched.csv new file mode 100644 index 000000000..c586ada9c --- /dev/null +++ b/micall/tests/data/exact_parts_contigs_stitched.csv @@ -0,0 +1,2 @@ +ref,match,group_ref,contig +HIV1-B-FR-K03455-seed,1.0,HIV1-B-FR-K03455-seed,GGCCTGAAAATCCATACAATACTCCAGTATTTGCCATAAAGAAAAAAGACAGTACTAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAAGACTTCTGGGAAGTTCAATTAGGAATACCACATCCCGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACTGGATGTGGGTGATGCATATTTTTCAGTTCCCTTAGATGAAGACTTCAGGAAGTATACTGCATTTACCATACCTAGTATAAACAATGAGACACCAGGGATTAGATATCAGTACAATGTGCTTCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGAGCCTTTTAGAAAACAAAATCCAGACATAGTTATCTATCAATACATGGATGATTTGTATGTAGGATCTGACTTAGAAATAGGGCAGCATAGAACAAAAATAGAGGAGCTGAGACAACATCTGTTGAGGTGGGGACTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTGGATGGGTTATGAACTCCATCCTGATAAATGGACAGTACAGCCTATAGTGCTGCCAGAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGGAAATTGAATTGGGCAAGTCAGATTTACCCAGGGATTAAAGTAAGGCAATTATGTAAACTCCTTAG diff --git a/micall/tests/data/stitcher_plots/test_big_insertion_in_a_single_contig.svg b/micall/tests/data/stitcher_plots/test_big_insertion_in_a_single_contig.svg new file mode 100644 index 000000000..f3571995d --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_big_insertion_in_a_single_contig.svg @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_big_insertion_in_a_single_contig_2.svg b/micall/tests/data/stitcher_plots/test_big_insertion_in_a_single_contig_2.svg new file mode 100644 index 000000000..7d250074d --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_big_insertion_in_a_single_contig_2.svg @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_correct_processing_complex_nogaps.svg b/micall/tests/data/stitcher_plots/test_correct_processing_complex_nogaps.svg new file mode 100644 index 000000000..60521ecf3 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_correct_processing_complex_nogaps.svg @@ -0,0 +1,240 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +testref-1 + + + + + + + +1 + + + + + +2 + + + + + +3 + + + + + +4 + + + + + + + + + + + + +1 + + + + + + + + + + + + + +2 + + + + + + + + + + + + + +3 + + + + + + + + + + + + + +4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +testref-2 + + + + + + + +5 + + + + + +6 + + + + + +7 + + + + + +8 + + + + + + + + + + + + +5 + + + + + + + + + + + + + +6 + + + + + + + + + + + + + +7 + + + + + + + + + + + + + +8 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_correct_processing_of_two_overlapping_and_one_separate_contig.svg b/micall/tests/data/stitcher_plots/test_correct_processing_of_two_overlapping_and_one_separate_contig.svg new file mode 100644 index 000000000..49aa21a16 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_correct_processing_of_two_overlapping_and_one_separate_contig.svg @@ -0,0 +1,221 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +2.2 + + + + + +3.2 + + + + + + + + + + + + + + + +1.1 + + + + + + + + + + + + + +2.2 + + + + + + + + + + + + + + + + +3.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + + + +2.1 + + + + + +2.3 + + + + + +3.1 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_correct_processing_of_two_overlapping_and_one_separate_contig_2.svg b/micall/tests/data/stitcher_plots/test_correct_processing_of_two_overlapping_and_one_separate_contig_2.svg new file mode 100644 index 000000000..62cf27e71 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_correct_processing_of_two_overlapping_and_one_separate_contig_2.svg @@ -0,0 +1,221 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +2.2 + + + + + +3.2 + + + + + + + + + + + + + + + +1.1 + + + + + + + + + + + + + +2.2 + + + + + + + + + + + + + + + + +3.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + + + +2.1 + + + + + +2.3 + + + + + +3.1 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_correct_stitching_of_one_normal_and_one_unknown.svg b/micall/tests/data/stitcher_plots/test_correct_stitching_of_one_normal_and_one_unknown.svg new file mode 100644 index 000000000..48dbf9a57 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_correct_stitching_of_one_normal_and_one_unknown.svg @@ -0,0 +1,147 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + +1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +unknown: + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs.svg b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs.svg new file mode 100644 index 000000000..cf5086981 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + +2 + + + + + + + + + + + + +1 + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_real_hcv.svg b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_real_hcv.svg new file mode 100644 index 000000000..eb0036e0e --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_real_hcv.svg @@ -0,0 +1,145 @@ + + + + + + + + +5' + + + + + +C + + + + + +E1 + + + + + +E2 + + + + + +p7 + + + + + +NS2 + + + + + +NS3 + + + + + +NS4b + + + + + +NS4a + + + + + +NS5a + + + + + +NS5b + + + + + +3' + + + + + + + + + + + + + + + + + + + + + + + +HCV-1a + + + + + + + +1 + + + + + +2 + + + + + + + + + + + + +1 + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_real_hiv.svg b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_real_hiv.svg new file mode 100644 index 000000000..662638fc6 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_real_hiv.svg @@ -0,0 +1,187 @@ + + + + + + + + +5' LTR + + + + + +gag + + + + + +vif + + + + + +tat + + + + + +nef + + + + + + + +tat + + + + + +vpu + + + + + +rev + + + + + +3' LTR + + + + + + + +pol + + + + + +vpr + + + + + +rev + + + + + +env + + + + + + + +PR + + + + + +RT + + + + + +INT + + + + + +V3 + + + + + +GP41 + + + + + + + + + + + + + + + + + + + + + + + +HIV1-B-ZA-KP109515-seed + + + + + + + +1 + + + + + +2 + + + + + + + + + + + + +1 + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_with_insignificant_gaps.svg b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_with_insignificant_gaps.svg new file mode 100644 index 000000000..2c555bf9d --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_with_insignificant_gaps.svg @@ -0,0 +1,290 @@ + + + + + + + + +5' LTR + + + + + +gag + + + + + +vif + + + + + +tat + + + + + +nef + + + + + + + +tat + + + + + +vpu + + + + + +rev + + + + + +3' LTR + + + + + + + +pol + + + + + +vpr + + + + + +rev + + + + + +env + + + + + + + +PR + + + + + +RT + + + + + +INT + + + + + +V3 + + + + + +GP41 + + + + + + + + + + + + + + + + + + + + + + + +HIV1-B-FR-K03455-seed + + + + + + + +1.1 + + + + + +2 + + + + + + + + + + + + + + + +1.1 + + + + + + + + + + + + + +2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_with_padding.svg b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_with_padding.svg new file mode 100644 index 000000000..473330d16 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_contigs_with_padding.svg @@ -0,0 +1,183 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +2.2 + + + + + + + + + + + + + + + +1.1 + + + + + + + + + + + + + + + + +2.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + + + +2.1 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_different_organism_contigs.svg b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_different_organism_contigs.svg new file mode 100644 index 000000000..00499e4f3 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_correct_stitching_of_two_partially_overlapping_different_organism_contigs.svg @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + +testref-1 + + + + + + + +1 + + + + + + + + + + + + +1 + + + + + + + + + + + + + + + + + +testref-2 + + + + + + + +2 + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_cross_alignment_around_small_insertion.svg b/micall/tests/data/stitcher_plots/test_cross_alignment_around_small_insertion.svg new file mode 100644 index 000000000..bc367f670 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_cross_alignment_around_small_insertion.svg @@ -0,0 +1,217 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.4 + + + + + +2.2 + + + + + +1.2 + + + + + + + + + + + + + + + + +1.4 + + + + + +1.2 + + + + + + + + + + + + + +2.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.1 + + + + + +1.3 + + + + + +1.5 + + + + + +2.1 + + + + + +2.3 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_forward_and_reverse_match.svg b/micall/tests/data/stitcher_plots/test_forward_and_reverse_match.svg new file mode 100644 index 000000000..63ad69129 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_forward_and_reverse_match.svg @@ -0,0 +1,119 @@ + + + + + + + + + + + + +testref + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +anomaly: + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_gap_around_big_insertion.svg b/micall/tests/data/stitcher_plots/test_gap_around_big_insertion.svg new file mode 100644 index 000000000..77435ae1e --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_gap_around_big_insertion.svg @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +2.2 + + + + + +1.3 + + + + + + + + + + + + + + + + + + + +1.1 + + + + + + + + +1.3 + + + + + + + + + + + + + +2.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + + + +2.1 + + + + + +2.3 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_gap_around_small_insertion.svg b/micall/tests/data/stitcher_plots/test_gap_around_small_insertion.svg new file mode 100644 index 000000000..7cf51be1b --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_gap_around_small_insertion.svg @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +2.2 + + + + + +1.3 + + + + + + + + + + + + + + + + + + + +1.1 + + + + + + + + +1.3 + + + + + + + + + + + + + +2.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + + + +2.1 + + + + + +2.3 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_identical_stitching_of_one_contig.svg b/micall/tests/data/stitcher_plots/test_identical_stitching_of_one_contig.svg new file mode 100644 index 000000000..ff865ed0e --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_identical_stitching_of_one_contig.svg @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_multiple_reverse_complement_matches.svg b/micall/tests/data/stitcher_plots/test_multiple_reverse_complement_matches.svg new file mode 100644 index 000000000..17bf82242 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_multiple_reverse_complement_matches.svg @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_multiple_reverse_complement_matches_out_of_order.svg b/micall/tests/data/stitcher_plots/test_multiple_reverse_complement_matches_out_of_order.svg new file mode 100644 index 000000000..17bf82242 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_multiple_reverse_complement_matches_out_of_order.svg @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_overlaping_in_reference_space.svg b/micall/tests/data/stitcher_plots/test_overlaping_in_reference_space.svg new file mode 100644 index 000000000..6d416d0a5 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_overlaping_in_reference_space.svg @@ -0,0 +1,173 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +1.3 + + + + + + + + + + + + + + + + + + + +1.1 + + + + + + + + +1.3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_partial_align_consensus.svg b/micall/tests/data/stitcher_plots/test_partial_align_consensus.svg new file mode 100644 index 000000000..fa876a30d --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_partial_align_consensus.svg @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_partial_align_consensus_multiple_overlaping_sequences.svg b/micall/tests/data/stitcher_plots/test_partial_align_consensus_multiple_overlaping_sequences.svg new file mode 100644 index 000000000..92a38ddf4 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_partial_align_consensus_multiple_overlaping_sequences.svg @@ -0,0 +1,177 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +2 + + + + + + + + + + + + + + + +1.1 + + + + + + + + + + + + + + + + +2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_partial_align_consensus_multiple_sequences.svg b/micall/tests/data/stitcher_plots/test_partial_align_consensus_multiple_sequences.svg new file mode 100644 index 000000000..533ef9bba --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_partial_align_consensus_multiple_sequences.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + +2 + + + + + + + + + + + + +1 + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_reverse_complement_match.svg b/micall/tests/data/stitcher_plots/test_reverse_complement_match.svg new file mode 100644 index 000000000..1e61ce1b3 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_reverse_complement_match.svg @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_reverse_complement_match_with_padding.svg b/micall/tests/data/stitcher_plots/test_reverse_complement_match_with_padding.svg new file mode 100644 index 000000000..cba7989be --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_reverse_complement_match_with_padding.svg @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_separate_stitching_of_non_overlapping_contigs_1.svg b/micall/tests/data/stitcher_plots/test_separate_stitching_of_non_overlapping_contigs_1.svg new file mode 100644 index 000000000..79a8d0e22 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_separate_stitching_of_non_overlapping_contigs_1.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + +2 + + + + + + + + + + + + +1 + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_separate_stitching_of_non_overlapping_contigs_2.svg b/micall/tests/data/stitcher_plots/test_separate_stitching_of_non_overlapping_contigs_2.svg new file mode 100644 index 000000000..fddc82407 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_separate_stitching_of_non_overlapping_contigs_2.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +2 + + + + + +1 + + + + + + + + + + + + +1 + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitch_cross_alignment.svg b/micall/tests/data/stitcher_plots/test_stitch_cross_alignment.svg new file mode 100644 index 000000000..75e2a1f5a --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitch_cross_alignment.svg @@ -0,0 +1,179 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.4 + + + + + +1.2 + + + + + + + + + + + + + + + + +1.4 + + + + + +1.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.1 + + + + + +1.3 + + + + + +1.5 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitch_with_insertion.svg b/micall/tests/data/stitcher_plots/test_stitch_with_insertion.svg new file mode 100644 index 000000000..50bfa70b5 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitch_with_insertion.svg @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_contig_with_big_covered_gap.svg b/micall/tests/data/stitcher_plots/test_stitching_contig_with_big_covered_gap.svg new file mode 100644 index 000000000..21629cb29 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_contig_with_big_covered_gap.svg @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +2 + + + + + +1.2 + + + + + + + + + + + + + + + + +1.1 + + + + + +1.2 + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_contig_with_big_noncovered_gap.svg b/micall/tests/data/stitcher_plots/test_stitching_contig_with_big_noncovered_gap.svg new file mode 100644 index 000000000..7c692ab46 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_contig_with_big_noncovered_gap.svg @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_contig_with_big_noncovered_gap_2.svg b/micall/tests/data/stitcher_plots/test_stitching_contig_with_big_noncovered_gap_2.svg new file mode 100644 index 000000000..c0e986950 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_contig_with_big_noncovered_gap_2.svg @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + +2 + + + + + + + + + + + + +1 + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_contig_with_small_covered_gap.svg b/micall/tests/data/stitcher_plots/test_stitching_contig_with_small_covered_gap.svg new file mode 100644 index 000000000..1323c199e --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_contig_with_small_covered_gap.svg @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +2 + + + + + +1.2 + + + + + + + + + + + + + + + + +1.1 + + + + + +1.2 + + + + + + + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_of_all_overlapping_contigs_into_one_sequence.svg b/micall/tests/data/stitcher_plots/test_stitching_of_all_overlapping_contigs_into_one_sequence.svg new file mode 100644 index 000000000..87a968b81 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_of_all_overlapping_contigs_into_one_sequence.svg @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + +2 + + + + + +3 + + + + + + + + + + + + +1 + + + + + + + + + + + + + +2 + + + + + + + + + + + + + +3 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_of_completely_identical_contigs.svg b/micall/tests/data/stitcher_plots/test_stitching_of_completely_identical_contigs.svg new file mode 100644 index 000000000..52229e76d --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_of_completely_identical_contigs.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +3 + + + + + + + + + + + + +3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + + +1 + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_of_identical_contigs.svg b/micall/tests/data/stitcher_plots/test_stitching_of_identical_contigs.svg new file mode 100644 index 000000000..52229e76d --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_of_identical_contigs.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +3 + + + + + + + + + + + + +3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + + +1 + + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_of_zero_contigs.svg b/micall/tests/data/stitcher_plots/test_stitching_of_zero_contigs.svg new file mode 100644 index 000000000..3a07b1711 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_of_zero_contigs.svg @@ -0,0 +1,18 @@ + + + + + + + +. + + + + + +No contigs found. + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_partial_align.svg b/micall/tests/data/stitcher_plots/test_stitching_partial_align.svg new file mode 100644 index 000000000..fa876a30d --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_partial_align.svg @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_partial_align_multiple_sequences.svg b/micall/tests/data/stitcher_plots/test_stitching_partial_align_multiple_sequences.svg new file mode 100644 index 000000000..2d4ff8f91 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_partial_align_multiple_sequences.svg @@ -0,0 +1,183 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +2.2 + + + + + + + + + + + + + + + +1.1 + + + + + + + + + + + + + + + + +2.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + + + +2.1 + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_when_multiple_contigs_completely_covered_by_other_contigs.svg b/micall/tests/data/stitcher_plots/test_stitching_when_multiple_contigs_completely_covered_by_other_contigs.svg new file mode 100644 index 000000000..3b2f6aaf5 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_when_multiple_contigs_completely_covered_by_other_contigs.svg @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1.1 + + + + + +2.2 + + + + + + + + + + + + + + + +1.1 + + + + + + + + + + + + + + + + +2.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + +1.2 + + + + + +2.1 + + + + + + + + + + + + +3 + + + + + + + + + + + + + +4 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_when_one_contig_completely_covered_by_another.svg b/micall/tests/data/stitcher_plots/test_stitching_when_one_contig_completely_covered_by_another.svg new file mode 100644 index 000000000..d7ed69a7a --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_when_one_contig_completely_covered_by_another.svg @@ -0,0 +1,159 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +2 + + + + + + + + + + + + + + + + + + +2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + + + + + + + + +1 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_stitching_with_empty_contigs.svg b/micall/tests/data/stitcher_plots/test_stitching_with_empty_contigs.svg new file mode 100644 index 000000000..4f3fa7839 --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_stitching_with_empty_contigs.svg @@ -0,0 +1,147 @@ + + + + + + + + + + + + + + + + + + +testref + + + + + + + +1 + + + + + + + + + + + + +1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +anomaly: + + + + + + +2 + + + + \ No newline at end of file diff --git a/micall/tests/data/stitcher_plots/test_visualizer_correct_labeling_of_different_organism_contigs.svg b/micall/tests/data/stitcher_plots/test_visualizer_correct_labeling_of_different_organism_contigs.svg new file mode 100644 index 000000000..e550e9a1a --- /dev/null +++ b/micall/tests/data/stitcher_plots/test_visualizer_correct_labeling_of_different_organism_contigs.svg @@ -0,0 +1,500 @@ + + + + + + + + + + + + + + + + + + +testref-1 + + + + + + + +1 + + + + + + + + + + + + +1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +anomaly: + + + + + + +3 + + + + + + + + + + + + + + + + + +testref-2 + + + + + + + +2 + + + + + + + + + + + + +2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +discards: + + + + + + +4 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +anomaly: + + + + + + +5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +unknown: + + + + + + +6 + + + + \ No newline at end of file diff --git a/micall/tests/microtest/make_sample.py b/micall/tests/microtest/make_sample.py index 41fa7e26e..bc8d64bf4 100644 --- a/micall/tests/microtest/make_sample.py +++ b/micall/tests/microtest/make_sample.py @@ -106,6 +106,14 @@ def main(): 315, 20, (CodonMutation(207, 'TCT'),)))), + # FastqFile('2060A-V3LOOP_S8_L001_R1_001.fastq', + # '02060', + # False, + # [FastqSection('HIV1-B-FR-K03455-seed', 7106, 7169, 10)]), + # FastqFile('2060A-V3LOOP_S8_L001_R2_001.fastq', + # '02060', + # True, + # [FastqSection('HIV1-B-FR-K03455-seed', 7160, 7217, 10)]), FastqFile('2070A-PR_S9_L001_R1_001.fastq', '2070', False, @@ -268,6 +276,8 @@ def main(): True, sections_2210_2)] for fastq_file in fastq_files: + # if not fastq_file.name.startswith('2060'): + # continue with open(fastq_file.name, 'w') as f: next_cluster = 1 for section in fastq_file.sections: diff --git a/micall/tests/test_aln2counts_report.py b/micall/tests/test_aln2counts_report.py index e1c5c5a69..1a869a261 100644 --- a/micall/tests/test_aln2counts_report.py +++ b/micall/tests/test_aln2counts_report.py @@ -2796,7 +2796,6 @@ def test_write_sequence_coverage_counts_with_unaligned_middle(projects, hcv_ref = projects.getReference('HCV-1a') seq = ref[:100] + hcv_ref[1000:1100] + ref[1000:1100] expected_ref_positions = (list(range(1, 101)) + - list(range(501, 601)) + list(range(1001, 1101))) expected_query_positions = list(range(1, 301)) diff --git a/micall/tests/test_consensus_aligner.py b/micall/tests/test_consensus_aligner.py index af13d86d8..0857a1f9b 100644 --- a/micall/tests/test_consensus_aligner.py +++ b/micall/tests/test_consensus_aligner.py @@ -1,27 +1,46 @@ import math import typing +from typing import Iterable, Tuple +import random from io import StringIO from pytest import approx from micall.core.aln2counts import SeedAmino, ReportAmino -from micall.utils.consensus_aligner import ConsensusAligner, AlignmentWrapper, CigarActions, AminoAlignment +from micall.utils.consensus_aligner import ConsensusAligner, Alignment, AminoAlignment +from aligntools import CigarActions, Cigar from micall.core.project_config import ProjectConfig # noinspection PyUnresolvedReferences from micall.tests.test_remap import load_projects +from micall.tests.utils import fixed_random_seed from micall.utils.report_amino import ReportNucleotide +def mutate_sequence(rate, seq): + def mutate(x): + if random.random() >= rate: + return x + + while True: + y = random.choice(['A', 'C', 'G', 'T']) + if y != x: return y + + with fixed_random_seed(42): + return ''.join(map(mutate, seq)) + + def assert_alignments(aligner: ConsensusAligner, - *expected_alignments: AlignmentWrapper): + *expected_alignments: Alignment): __tracebackhide__ = True - wrapped_alignments = tuple(AlignmentWrapper.wrap(alignment) + wrapped_alignments = tuple(Alignment.coerce(alignment) for alignment in aligner.alignments) if repr(wrapped_alignments) != repr(expected_alignments): assert wrapped_alignments == expected_alignments for i, (wrapped_alignment, expected_alignment) in enumerate( zip(wrapped_alignments, expected_alignments)): - for field_name in AlignmentWrapper.init_fields: + for field_name in dir(expected_alignment): + if callable(getattr(expected_alignment, field_name)) or field_name.startswith('_'): + continue wrapped = (i, field_name, getattr(wrapped_alignment, field_name)) expected = (i, field_name, getattr(expected_alignment, field_name)) assert wrapped == expected @@ -70,6 +89,36 @@ def create_reading_frames(consensus: str) -> typing.Dict[int, return reading_frames +def make_alignment( + ctg='', + ctg_len=0, + r_st=0, + r_en=0, + strand=1, + q_st=0, + q_en=0, + mapq=0, + cigar: Iterable[Tuple[int, CigarActions]] = tuple(), + cigar_str=None) -> Alignment: + + cigar = list(cigar) + if not cigar: + cigar = [(max(q_en-q_st, r_en-r_st), CigarActions.MATCH)] + if cigar_str is None: + cigar_str = str(Cigar(cigar)) + + return Alignment(ctg=ctg, + ctg_len=ctg_len, + r_st=r_st, + r_en=r_en, + strand=strand, + q_st=q_st, + q_en=q_en, + mapq=mapq, + cigar=cigar, + cigar_str=cigar_str) + + def test_create_reading_frames(): reading_frames = create_reading_frames('AAACCCTTTGGG') @@ -84,30 +133,22 @@ def test_create_reading_frames(): def test_alignment_repr(): - alignment = AlignmentWrapper('R1', 0, 1001, 1100, 1, 1, 100) - - assert repr(alignment) == "AlignmentWrapper('R1', 0, 1001, 1100, 1, 1, 100)" - + alignment = make_alignment('R1', 0, 1001, 1100, 1, 1, 100) -def test_wrap_overrides(): - alignment1 = AlignmentWrapper(r_st=100, r_en=200) - alignment2 = AlignmentWrapper.wrap(alignment1, r_en=300, blen=200, cigar=[]) - expected_alignment = AlignmentWrapper(r_st=100, r_en=300, cigar=[]) - - assert alignment2 == expected_alignment + assert repr(alignment) == "Alignment(ctg='R1', ctg_len=0, r_st=1001, r_en=1100, strand=1, q_st=1, q_en=100, mapq=0, cigar=[(99, CigarActions.MATCH)], cigar_str='99M')" def test_start_contig(projects): seed_name = 'SARS-CoV-2-seed' seed_seq = projects.getReference(seed_name) consensus = seed_seq[1000:2000] - expected_alignment = AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=1000, - r_en=2000, - q_st=0, - q_en=1000, - mapq=60) + expected_alignment = make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=1000, + r_en=2000, + q_st=0, + q_en=1000, + mapq=60) aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, consensus) @@ -121,27 +162,27 @@ def test_start_contig_multiple_sections(projects): seed_name = 'SARS-CoV-2-seed' seed_seq = projects.getReference(seed_name) consensus = seed_seq[6000:6500] + seed_seq[3000:3500] + seed_seq[1000:2000] - expected_alignments = [AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=6000, - r_en=6500, - q_st=0, - q_en=500, - mapq=60), - AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=3000, - r_en=3500, - q_st=500, - q_en=1000, - mapq=60), - AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=1000, - r_en=2000, - q_st=1000, - q_en=2000, - mapq=60)] + expected_alignments = [make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=6000, + r_en=6500, + q_st=0, + q_en=500, + mapq=60), + make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=3000, + r_en=3500, + q_st=500, + q_en=1000, + mapq=60), + make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=1000, + r_en=2000, + q_st=1000, + q_en=2000, + mapq=60)] aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, consensus) @@ -371,12 +412,12 @@ def test_start_contig_short_consensus(projects): start = 1560 end = 1617 consensus = seed_seq[start:end] - expected_alignment = AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=start, - r_en=end, - q_st=0, - q_en=end-start) + expected_alignment = make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=start, + r_en=end, + q_st=0, + q_en=end-start) aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, consensus) @@ -389,17 +430,16 @@ def test_start_contig_deletion_minimap2(projects): seed_name = 'SARS-CoV-2-seed' seed_seq = projects.getReference(seed_name) consensus = seed_seq[2000:2030] + seed_seq[2031:2060] - expected_alignment = AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=2000, - r_en=2060, - q_st=0, - q_en=59, - mapq=9, - cigar=[[30, CigarActions.MATCH], - [1, CigarActions.DELETE], - [29, CigarActions.MATCH]], - NM=1) + expected_alignment = make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=2000, + r_en=2060, + q_st=0, + q_en=59, + mapq=9, + cigar=[(30, CigarActions.MATCH), + (1, CigarActions.DELETE), + (29, CigarActions.MATCH)]) aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, consensus) @@ -411,25 +451,26 @@ def test_start_contig_deletion_minimap2(projects): def test_start_contig_big_deletion_minimap2(projects): seed_name = 'HCV-1a' seed_seq = projects.getReference(seed_name) - consensus = seed_seq[340:920] + seed_seq[3000:9000] - expected_alignment = [AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=340, - r_en=920, - q_st=0, - q_en=580, - mapq=60, - cigar=[[580, CigarActions.MATCH]], - NM=0), - AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=3000, - r_en=9000, - q_st=580, - q_en=6580, - mapq=60, - cigar=[[6000, CigarActions.MATCH]], - NM=0)] + seed_seq = mutate_sequence(seq=seed_seq, rate=0.04) + consensus = seed_seq[290:983] + seed_seq[3000:9269] + + expected_alignment = [make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=290, + r_en=983, + q_st=0, + q_en=693, + mapq=60, + cigar=[(693, CigarActions.MATCH)]), + make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=3000, + r_en=9269, + q_st=693, + q_en=6962, + mapq=60, + cigar=[(6269, CigarActions.MATCH)])] + aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, consensus) @@ -442,17 +483,16 @@ def test_start_contig_deletion_gotoh(projects): seed_name = 'SARS-CoV-2-seed' seed_seq = projects.getReference(seed_name) consensus = seed_seq[2000:2030] + seed_seq[2031:2050] - expected_alignment = AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=2000, - r_en=2050, - q_st=0, - q_en=49, - mapq=0, - cigar=[[30, CigarActions.MATCH], - [1, CigarActions.DELETE], - [19, CigarActions.MATCH]], - NM=0) + expected_alignment = make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=2000, + r_en=2050, + q_st=0, + q_en=49, + mapq=0, + cigar=[(30, CigarActions.MATCH), + (1, CigarActions.DELETE), + (19, CigarActions.MATCH)]) aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, consensus) @@ -466,15 +506,14 @@ def test_start_contig_matched_deletion_gotoh(projects): seed_name = 'SARS-CoV-2-seed' seed_seq = projects.getReference(seed_name) consensus = seed_seq[2000:2030] + '-' + seed_seq[2031:2050] - expected_alignment = AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=2000, - r_en=2050, - q_st=0, - q_en=50, - mapq=0, - cigar=[[50, CigarActions.MATCH]], - NM=0) + expected_alignment = make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=2000, + r_en=2050, + q_st=0, + q_en=50, + mapq=0, + cigar=[(50, CigarActions.MATCH)]) aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, consensus) @@ -488,17 +527,16 @@ def test_start_contig_insertion_minimap2(projects): seed_name = 'SARS-CoV-2-seed' seed_seq = projects.getReference(seed_name) consensus = seed_seq[2000:2030] + 'ACT' + seed_seq[2030:2060] - expected_alignment = AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=2000, - r_en=2060, - q_st=0, - q_en=63, - mapq=9, - cigar=[[30, CigarActions.MATCH], - [3, CigarActions.INSERT], - [30, CigarActions.MATCH]], - NM=3) + expected_alignment = make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=2000, + r_en=2060, + q_st=0, + q_en=63, + mapq=8, + cigar=[(30, CigarActions.MATCH), + (3, CigarActions.INSERT), + (30, CigarActions.MATCH)]) aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, consensus) @@ -511,17 +549,16 @@ def test_start_contig_insertion_gotoh(projects): seed_name = 'SARS-CoV-2-seed' seed_seq = projects.getReference(seed_name) consensus = seed_seq[2000:2030] + 'T' + seed_seq[2030:2050] - expected_alignment = AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=2000, - r_en=2050, - q_st=0, - q_en=51, - mapq=0, - cigar=[[30, CigarActions.MATCH], - [1, CigarActions.INSERT], - [20, CigarActions.MATCH]], - NM=0) + expected_alignment = make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=2000, + r_en=2050, + q_st=0, + q_en=51, + mapq=0, + cigar=[(30, CigarActions.MATCH), + (1, CigarActions.INSERT), + (20, CigarActions.MATCH)]) aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, consensus) @@ -535,13 +572,13 @@ def test_start_contig_with_only_primary_matches(projects): seed_name = 'HIV1-B-FR-K03455-seed' seed_seq = projects.getReference(seed_name) consensus = seed_seq[:500] - expected_alignment = AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=0, - r_en=500, - q_st=0, - q_en=500, - mapq=60) + expected_alignment = make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=0, + r_en=500, + q_st=0, + q_en=500, + mapq=60) aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, consensus) @@ -556,13 +593,13 @@ def test_start_contig_reading_frames(projects): reading_frames = create_reading_frames(expected_consensus) seed_name = 'HCV-6t' seed_seq = projects.getReference(seed_name) - expected_alignment = AlignmentWrapper(ctg='N/A', - ctg_len=len(seed_seq), - r_st=4798, - r_en=4807, - q_st=0, - q_en=9, - mapq=0) + expected_alignment = make_alignment(ctg='N/A', + ctg_len=len(seed_seq), + r_st=4798, + r_en=4807, + q_st=0, + q_en=9, + mapq=0) aligner = ConsensusAligner(projects) aligner.start_contig(seed_name, reading_frames=reading_frames) @@ -844,7 +881,7 @@ def test_count_coord_concordance(): aligner = ConsensusAligner(projects) aligner.consensus = "AGATTTCGATGATTCAGAAGATAAGCA" aligner.coordinate_name = 'test-region' - aligner.alignments = [AlignmentWrapper(r_st=0, r_en=27, q_st=0, q_en=27, cigar=[[27, CigarActions.MATCH]])] + aligner.alignments = [make_alignment(r_st=0, r_en=27, q_st=0, q_en=27, cigar=[(27, CigarActions.MATCH)])] expected_concordance_list = [1.0]*len(aligner.consensus) @@ -864,7 +901,7 @@ def test_count_coord_concordance_mismatch(): aligner.consensus = "AGATTTCGATGATTCAGAAGATTTGCA" # changed nucs: ^^ aligner.coordinate_name = 'test-region' - aligner.alignments = [AlignmentWrapper(r_st=0, r_en=27, q_st=0, q_en=27, cigar=[[27, CigarActions.MATCH]])] + aligner.alignments = [make_alignment(r_st=0, r_en=27, q_st=0, q_en=27, cigar=[(27, CigarActions.MATCH)])] # At the end of the consensus, the size of the averaging window for the concordance decreases from 20 to 11. # The concordance therefore decreases from 18/20 to 9/11 @@ -887,7 +924,7 @@ def test_count_coord_concordance_short_match(): aligner.consensus = "AGATTTCGATGATTCTCTTCTAAACGT" # last match position: ^ aligner.coordinate_name = 'test-region' - aligner.alignments = [AlignmentWrapper(r_st=0, r_en=15, q_st=0, q_en=15, cigar=[[15, CigarActions.MATCH]])] + aligner.alignments = [make_alignment(r_st=0, r_en=15, q_st=0, q_en=15, cigar=[(15, CigarActions.MATCH)])] # We start out with 100% match for the first 6 positions expected_concordance_list = [1.0] * 6 # After that, the averaging window (whose size is still increasing) starts to slide past the match: @@ -915,8 +952,8 @@ def test_count_coord_concordance_two_matches(): aligner = ConsensusAligner(projects) aligner.consensus = "AGATTTCGATGATTCAGAAGATTTGCATTT" aligner.coordinate_name = 'test-region' - aligner.alignments = [AlignmentWrapper(r_st=0, r_en=12, q_st=0, q_en=12, cigar=[[12, CigarActions.MATCH]]), - AlignmentWrapper(r_st=15, r_en=30, q_st=15, q_en=30, cigar=[[15, CigarActions.MATCH]])] + aligner.alignments = [make_alignment(r_st=0, r_en=12, q_st=0, q_en=12, cigar=[(12, CigarActions.MATCH)]), + make_alignment(r_st=15, r_en=30, q_st=15, q_en=30, cigar=[(15, CigarActions.MATCH)])] expected_concordance_list = [1.0] * 3 + [12/13, 12/14, 12/15, 13/16, 14/17, 15/18, 16/19] + [17/20]*11 + \ [16/19, 15/18, 15/17, 15/16] + [1.0]*5 @@ -937,9 +974,9 @@ def test_count_coord_concordance_with_insertion(): aligner.consensus = "AGATTTCGACCCTGATTCAGAAGATTTGCA" # insertion: ^^^ aligner.coordinate_name = 'test-region' - aligner.alignments = [AlignmentWrapper(r_st=0, r_en=27, q_st=0, q_en=30, cigar=[[9, CigarActions.MATCH], - [3, CigarActions.INSERT], - [18, CigarActions.MATCH]])] + aligner.alignments = [make_alignment(r_st=0, r_en=27, q_st=0, q_en=30, cigar=[(9, CigarActions.MATCH), + (3, CigarActions.INSERT), + (18, CigarActions.MATCH)])] # the window size increases from 10 to 20, while the averaging window slides over the insertion expected_concordance_list = [9/10, 9/11, 9/12, 10/13, 11/14, 12/15, 13/16, 14/17, 15/18, 16/19] # for 10 positions in the middle, the insertion is included in the full window size fo 20 @@ -963,9 +1000,9 @@ def test_count_coord_concordance_with_deletion(): aligner.consensus = "AGATTTCGATTCAGAAGATTTGCA" # deletion behind this pos: ^ aligner.coordinate_name = 'test-region' - aligner.alignments = [AlignmentWrapper(r_st=0, r_en=27, q_st=0, q_en=30, cigar=[[9, CigarActions.MATCH], - [3, CigarActions.DELETE], - [15, CigarActions.MATCH]])] + aligner.alignments = [make_alignment(r_st=0, r_en=27, q_st=0, q_en=30, cigar=[(9, CigarActions.MATCH), + (3, CigarActions.DELETE), + (15, CigarActions.MATCH)])] # the deletion does not decrease the concordance expected_concordance_list = [1.0]*len(aligner.consensus) @@ -984,7 +1021,7 @@ def test_count_seed_region_concordance(projects): seed_name = 'test-seed' seed_ref = "AGATTTCGATGATTCAGAAGATTTGCA" region = 'test-region' - seed_alignments = [AlignmentWrapper(r_st=0, r_en=27, q_st=0, q_en=27, cigar=[[27, CigarActions.MATCH]])] + seed_alignments = [make_alignment(r_st=0, r_en=27, q_st=0, q_en=27, cigar=[(27, CigarActions.MATCH)])] expected_file = """\ seed_name,contig,region,pct_concordance,pct_covered @@ -1007,7 +1044,7 @@ def test_count_seed_region_concordance_mismatch(projects): seed_name = 'test-seed' seed_ref = "AGATTTCGATGATTCAGAAGATTTGCATGA" region = 'test-region' - seed_alignments = [AlignmentWrapper(r_st=0, r_en=30, q_st=0, q_en=30, cigar=[[30, CigarActions.MATCH]])] + seed_alignments = [make_alignment(r_st=0, r_en=30, q_st=0, q_en=30, cigar=[(30, CigarActions.MATCH)])] expected_file = """\ seed_name,contig,region,pct_concordance,pct_covered @@ -1029,7 +1066,7 @@ def test_count_seed_region_concordance_seed_not_aligned(projects): seed_name = 'test-seed' seed_ref = "AGATTTCGATGATTCAGAAGATTTGCATGA" region = 'test-region' - seed_alignments = [AlignmentWrapper(r_st=0, r_en=15, q_st=0, q_en=15, cigar=[[15, CigarActions.MATCH]])] + seed_alignments = [make_alignment(r_st=0, r_en=15, q_st=0, q_en=15, cigar=[(15, CigarActions.MATCH)])] expected_file = """\ seed_name,contig,region,pct_concordance,pct_covered @@ -1051,7 +1088,7 @@ def test_count_seed_region_concordance_larger_match(projects): seed_name = 'test-seed' seed_ref = "AGATTTCGATGATTCAGAAGATTTGCATGA" region = 'test-region' - seed_alignments = [AlignmentWrapper(r_st=0, r_en=30, q_st=0, q_en=30, cigar=[[30, CigarActions.MATCH]])] + seed_alignments = [make_alignment(r_st=0, r_en=30, q_st=0, q_en=30, cigar=[(30, CigarActions.MATCH)])] expected_file = """\ seed_name,contig,region,pct_concordance,pct_covered @@ -1074,9 +1111,9 @@ def test_count_seed_region_concordance_insertion(projects): seed_name = 'test-seed' seed_ref = "AGATTTCGATGATTCAGAAGATTTGCA" region = 'test-region' - seed_alignments = [AlignmentWrapper(r_st=0, r_en=27, q_st=0, q_en=30, cigar=[[9, CigarActions.MATCH], - [3, CigarActions.INSERT], - [18, CigarActions.MATCH]])] + seed_alignments = [make_alignment(r_st=0, r_en=27, q_st=0, q_en=30, cigar=[(9, CigarActions.MATCH), + (3, CigarActions.INSERT), + (18, CigarActions.MATCH)])] expected_file = """\ seed_name,contig,region,pct_concordance,pct_covered @@ -1099,9 +1136,9 @@ def test_count_seed_region_concordance_deletion(projects): seed_name = 'test-seed' seed_ref = "AGATTTCGATGATTCAGAAGATTTGCATGA" region = 'test-region' - seed_alignments = [AlignmentWrapper(r_st=0, r_en=30, q_st=0, q_en=27, cigar=[[9, CigarActions.MATCH], - [3, CigarActions.DELETE], - [18, CigarActions.MATCH]])] + seed_alignments = [make_alignment(r_st=0, r_en=30, q_st=0, q_en=27, cigar=[(9, CigarActions.MATCH), + (3, CigarActions.DELETE), + (18, CigarActions.MATCH)])] expected_file = """\ seed_name,contig,region,pct_concordance,pct_covered diff --git a/micall/tests/test_contig_stitcher.py b/micall/tests/test_contig_stitcher.py new file mode 100644 index 000000000..a3f8c5393 --- /dev/null +++ b/micall/tests/test_contig_stitcher.py @@ -0,0 +1,1887 @@ +import random +from dataclasses import dataclass +import logging +import os +import pytest +from typing import Tuple, List + +from aligntools import CigarActions, CigarHit, Cigar + +import micall.core.contig_stitcher as stitcher +from micall.core.contig_stitcher import ( + split_contigs_with_gaps, + stitch_contigs, + GenotypedContig, + merge_intervals, + find_covered_contig, + stitch_consensus, + calculate_concordance, + align_all_to_reference, + disambiguate_concordance, + lstrip, + rstrip, +) +from micall.core.plot_contigs import plot_stitcher_coverage +from micall.tests.utils import mock_align_consensus, MockAlignment, fixed_random_seed +from micall.tests.test_fasta_to_csv import check_hcv_db, DEFAULT_DATABASE # activates the fixture +from micall.tests.test_remap import load_projects # activates the "projects" fixture + + +logging.getLogger("micall.core.contig_stitcher").setLevel(logging.DEBUG) +logging.getLogger("micall.core.plot_contigs").setLevel(logging.DEBUG) + + +# make linters not complain about unused imports. +assert check_hcv_db is not None +assert DEFAULT_DATABASE is not None +assert load_projects is not None + + +@pytest.fixture() +def exact_aligner(monkeypatch): + monkeypatch.setattr("micall.core.contig_stitcher.align_consensus", mock_align_consensus) + + +@pytest.fixture +def visualizer(request, tmp_path): + stitcher.context.set(stitcher.StitcherContext.make()) + test_name = request.node.name + plot_name = test_name + ".svg" + pwd = os.path.dirname(__file__) + plots_dir = os.path.join(pwd, "data", "stitcher_plots") + os.makedirs(plots_dir, exist_ok=True) + path_to_expected = os.path.join(plots_dir, plot_name) + path_to_produced = os.path.join(tmp_path, plot_name) + + def check(): + logs = stitcher.context.get().events + figure = plot_stitcher_coverage(logs, path_to_produced) + + with open(path_to_produced, "r") as produced_file: + produced_data = produced_file.read() + with open(path_to_expected, "r") as expected_file: + expected_data = expected_file.read() + + assert produced_data == expected_data, ( + "The contents of the stitched contigs plot" + " does not match the expected contents." + ) + + return figure + + return check + + +def test_identical_stitching_of_one_contig(exact_aligner, visualizer): + # Scenario: When stitching one contig, it remains the same. + + contigs = [ + GenotypedContig( + name="a", + seq="ACTGACTG" * 100, + ref_name="testref", + group_ref="testref", + ref_seq="T" * 20 + "ACTGACTG" * 110 + "T" * 20, + match_fraction=1.0, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + assert results[0].seq == contigs[0].seq + + assert len(visualizer().elements) > len(contigs) + + +def test_separate_stitching_of_non_overlapping_contigs_1(exact_aligner, visualizer): + # Scenario: When stitching multiple non-overlapping contigs, the order doesn't matter. + + ref_seq = "A" * 100 + "C" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="A" * 70, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="C" * 70, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + + # No claims about the output order, so wrap into set() + assert {contig.seq for contig in contigs} == {contig.seq for contig in results} + + assert len(visualizer().elements) > len(contigs) + + +def test_separate_stitching_of_non_overlapping_contigs_2(exact_aligner, visualizer): + # Scenario: When stitching multiple non-overlapping contigs, + # the order doesn't matter. + + ref_seq = "A" * 100 + "C" * 100 + + contigs = [ + GenotypedContig( + name="b", + seq="C" * 70, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="a", + seq="A" * 70, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + + # No claims about the output order, so wrap into set() + assert {contig.seq for contig in contigs} == {contig.seq for contig in results} + + assert len(visualizer().elements) > len(contigs) + + +def test_correct_stitching_of_two_partially_overlapping_contigs( + exact_aligner, visualizer +): + # Scenario: Two partially overlapping contigs are stitched correctly into a single sequence. + + ref_seq = "A" * 100 + "C" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="A" * 50 + "C" * 20, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="A" * 20 + "C" * 50, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + + result = results[0] + + assert 100 == len(result.seq) + assert result.seq == "A" * 50 + "C" * 50 + + assert len(visualizer().elements) > len(contigs) + + +def test_correct_stitching_of_two_partially_overlapping_contigs_with_padding( + exact_aligner, visualizer +): + # Scenario: Two partially overlapping contigs are stitched correctly into a single sequence. + + ref_seq = "A" * 100 + "C" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="M" * 10 + "A" * 50 + "C" * 20 + "Z" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="J" * 10 + "A" * 20 + "C" * 50 + "N" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + + result = results[0] + + assert 120 == len(result.seq) + assert result.seq == "M" * 10 + "A" * 50 + "C" * 50 + "N" * 10 + + assert len(visualizer().elements) > len(contigs) + + +def test_correct_stitching_of_two_partially_overlapping_contigs_real_hiv( + projects, visualizer +): + # Scenario: Two partially overlapping contigs are stitched + # correctly into a single sequence. Not using exact aligner this time. + + ref_name = "HIV1-B-ZA-KP109515-seed" + ref = projects.getReference(ref_name) + + contigs = [ + GenotypedContig( + name="a", + seq=ref[1700:2000], + ref_name=ref_name, + group_ref=ref_name, + ref_seq=ref, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq=ref[1900:2200], + ref_name=ref_name, + group_ref=ref_name, + ref_seq=ref, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + + result = results[0] + + assert 500 == len(result.seq) + assert result.seq == ref[1700:2200] + + assert len(visualizer().elements) > len(contigs) + + +def test_correct_stitching_of_two_partially_overlapping_contigs_real_hcv( + projects, visualizer +): + # Scenario: Two partially overlapping contigs are stitched + # correctly into a single sequence. Not using exact aligner this time. + + ref_name = "HCV-1a" + ref = projects.getReference(ref_name) + group_ref = ref_name + + contigs = [ + GenotypedContig( + name="a", + seq=ref[1700:2000], + ref_name=ref_name, + group_ref=group_ref, + ref_seq=ref, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq=ref[1900:2200], + ref_name=ref_name, + group_ref=group_ref, + ref_seq=ref, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + + result = results[0] + + assert 500 == len(result.seq) + assert result.seq == ref[1700:2200] + + assert len(visualizer().elements) > len(contigs) + + +def test_correct_stitching_of_two_partially_overlapping_contigs_with_insignificant_gaps( + projects, visualizer +): + # Scenario: Two partially overlapping contigs are stitched + # correctly into a single sequence, with insignificant gaps. + + hxb2_name = "HIV1-B-FR-K03455-seed" + ref = projects.getReference(hxb2_name) + gap_ref = "".join(c if i % 30 > 2 else "" for i, c in enumerate(ref)) + + contigs = [ + GenotypedContig( + name="a", + seq=gap_ref[1700:2000], + ref_name=hxb2_name, + group_ref=hxb2_name, + ref_seq=ref, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq=gap_ref[1900:2200], + ref_name=hxb2_name, + group_ref=hxb2_name, + ref_seq=ref, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + + result = results[0] + + assert 500 == len(result.seq) + assert result.seq == gap_ref[1700:2200] + + assert len(visualizer().elements) > len(contigs) + + +def test_correct_processing_of_two_overlapping_and_one_separate_contig( + exact_aligner, visualizer +): + # Scenario: Two overlapping contigs are stitched together, the non-overlapping is kept separate. + # One contig on the right, and two on the left. + + ref_seq = "Z" * 5 + "A" * 100 + "C" * 100 + "T" * 100 + "Y" * 5 + + contigs = [ + GenotypedContig( + name="a", + seq="M" * 5 + "A" * 50 + "C" * 20 + "J" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="Q" * 5 + "A" * 20 + "C" * 50 + "I" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="c", + seq="N" * 5 + "C" * 20 + "T" * 50 + "H" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == contigs[0].seq.rstrip("J") + "C" * 30 + contigs[ + 2 + ].seq.lstrip("N") + assert len(visualizer().elements) > len(contigs) + + +def test_correct_processing_of_two_overlapping_and_one_separate_contig_2( + exact_aligner, visualizer +): + # Scenario: Two overlapping contigs are stitched together, the non-overlapping is kept separate. + # One contig on the left, and two on the right. + + ref_seq = "Z" * 5 + "A" * 100 + "C" * 100 + "T" * 100 + "Y" * 5 + + contigs = [ + GenotypedContig( + name="a", + seq="N" * 5 + "A" * 50 + "C" * 20 + "H" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="M" * 5 + "C" * 50 + "T" * 20 + "J" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="c", + seq="Q" * 5 + "C" * 20 + "T" * 50 + "I" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == contigs[0].seq.rstrip("H") + "C" * 30 + contigs[ + 2 + ].seq.lstrip("Q") + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_of_all_overlapping_contigs_into_one_sequence( + exact_aligner, visualizer +): + # Scenario: All contigs have some overlapping parts, resulting in one continuous sequence after stitching. + + ref_seq = "A" * 100 + "C" * 100 + "T" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="A" * 50 + "C" * 20, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="A" * 20 + "C" * 100 + "T" * 20, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="c", + seq="C" * 20 + "T" * 50, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + + result = results[0] + + assert 200 == len(result.seq) + assert result.seq == "A" * 50 + "C" * 100 + "T" * 50 + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_with_empty_contigs(exact_aligner, visualizer): + # Scenario: The function is able to handle and ignore empty contigs. + + ref_seq = "A" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq=ref_seq, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="", + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert {contig.seq for contig in contigs} == {contig.seq for contig in results} + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_of_identical_contigs(exact_aligner, visualizer): + # Scenario: The function correctly handles and avoids duplication when identical contigs are stitched together. + + contigs = [ + GenotypedContig( + name=name, + seq="ACTGACTG" * 100, + ref_name="testref", + group_ref="testref", + ref_seq="ACTGACTG" * 100, + match_fraction=1.0, + ) + for name in ["a", "b", "c"] + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + assert results[0].seq == contigs[2].seq + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_of_completely_identical_contigs(exact_aligner, visualizer): + # Scenario: The function correctly handles and avoids duplication when completely identical contigs + # are stitched together. + + contigs = [ + GenotypedContig( + name="x", + seq="ACTGACTG" * 100, + ref_name="testref", + group_ref="testref", + ref_seq="ACTGACTG" * 100, + match_fraction=1.0, + ) + for copy in [1, 2, 3] + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + assert results[0].seq == contigs[2].seq + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_of_zero_contigs(exact_aligner, visualizer): + # Scenario: The function does not crash if no contigs given. + + contigs = [] + results = list(stitch_contigs(contigs)) + assert results == contigs + + assert len(visualizer().elements) > 0 + + +def test_correct_stitching_of_two_partially_overlapping_different_organism_contigs( + exact_aligner, visualizer +): + # Scenario: Two partially overlapping contigs, but which come from different organism, + # are not stitched into a single sequence. + + ref_seq = "A" * 100 + "C" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="A" * 50 + "C" * 20, + ref_name="testref-1", + group_ref="testref-1", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="A" * 20 + "C" * 50, + ref_name="testref-2", + group_ref="testref-2", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 2 + + assert {contig.seq for contig in contigs} == {contig.seq for contig in results} + + assert len(visualizer().elements) > len(contigs) + + +def test_correct_processing_complex_nogaps(exact_aligner, visualizer): + # Scenario: There are two reference organisms. + # Each with 4 contigs. + # For each, three overlapping contigs are stitched together, the non-overlapping is kept separate. + + ref_seq = "A" * 100 + "C" * 100 + "T" * 100 + "G" * 100 + + contigs = [ + [ + GenotypedContig( + name="a" + ref_name, + seq="A" * 50 + "C" * 20, + ref_name=ref_name, + group_ref=ref_name, + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b" + ref_name, + seq="A" * 20 + "C" * 50, + ref_name=ref_name, + group_ref=ref_name, + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="c" + ref_name, + seq="C" * 70 + "T" * 20, + ref_name=ref_name, + group_ref=ref_name, + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="d" + ref_name, + seq="T" * 20 + "G" * 50, + ref_name=ref_name, + group_ref=ref_name, + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + for ref_name in ["testref-1", "testref-2"] + ] + + contigs = sum(contigs, start=[]) + + results = list(stitch_contigs(contigs)) + assert len(results) == 4 + + assert 170 == len(results[0].seq) + assert results[0].seq == "A" * 50 + "C" * 100 + "T" * 20 + assert results[0].group_ref == "testref-1" + + assert 170 == len(results[1].seq) + assert results[1].seq == "A" * 50 + "C" * 100 + "T" * 20 + assert results[1].group_ref == "testref-2" + + assert results[2].seq == contigs[3].seq + assert results[3].seq == contigs[7].seq + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_when_one_contig_completely_covered_by_another( + exact_aligner, visualizer +): + # Scenario: If one contig is completely covered by another contig, + # the completely covered contig must be dropped. + + ref_seq = "A" * 100 + "C" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="M" * 10 + "A" * 20 + "C" * 20 + "O" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="P" * 10 + "A" * 50 + "C" * 50 + "Z" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + + # Test to ensure that the final result contains the contig 'b' and + # does not contain the completely covered contig 'a'. + assert results[0].seq == contigs[1].seq + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_when_multiple_contigs_completely_covered_by_other_contigs( + exact_aligner, visualizer +): + # Scenario: If two contigs are completely covered by another two contigs. + + ref_seq = "A" * 100 + "B" * 100 + "C" * 100 + "D" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="M" * 10 + "A" * 20 + "B" * 100 + "C" * 20 + "O" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="K" * 10 + "B" * 20 + "C" * 100 + "D" * 20 + "J" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="c", + seq="I" * 10 + "B" * 60 + "C" * 80 + "P" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="d", + seq="Z" * 10 + "B" * 80 + "C" * 60 + "F" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_contig_with_big_noncovered_gap(exact_aligner, visualizer): + # Scenario: One contig has a big gap, which is however not covered by anything else. + + ref_seq = "A" * 100 + "C" * 100 + "T" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="A" * 50 + "T" * 50, # mind the C gap + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + + assert {contig.seq for contig in contigs} == {contig.seq for contig in results} + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_contig_with_big_noncovered_gap_2(exact_aligner, visualizer): + # Scenario: One contig has a big gap, which is however not covered by anything else. + + ref_seq = "A" * 100 + "C" * 100 + "T" * 100 + "G" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="A" * 50 + "T" * 50, # mind the C gap + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="B", + seq="G" * 50, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + + assert {contig.seq for contig in contigs} == {contig.seq for contig in results} + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_contig_with_big_covered_gap(exact_aligner, visualizer): + # Scenario: If one contig has a big gap covered by another contig. + + ref_seq = "G" * 100 + "A" * 100 + "C" * 100 + "T" * 100 + "G" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="G" * 50 + "A" * 50 + "T" * 100, # mind the gap + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="A" * 100 + "C" * 100 + "T" * 50, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + contigs = list(align_all_to_reference(contigs)) + assert len(contigs) == 2 + assert len(list(contigs[0].alignment.deletions())) == 1 + assert len(list(contigs[1].alignment.deletions())) == 0 + + results = list(split_contigs_with_gaps(contigs)) + assert len(results) == 3 + assert all(list(contig.alignment.deletions()) == [] for contig in results) + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_contig_with_small_covered_gap(exact_aligner, visualizer): + # Scenario: If one contig has a small gap covered by another contig. + + ref_seq = "G" * 100 + "A" * 29 + "C" * 100 + "T" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="G" * 100 + "A" * 0 + "C" * 100, # mind the gap + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="A" * 29 + "C" * 50, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + ] + + contigs = list(align_all_to_reference(contigs)) + assert len(contigs) == 2 + assert len(list(contigs[0].alignment.deletions())) == 1 + assert len(list(contigs[1].alignment.deletions())) == 0 + results = list(split_contigs_with_gaps(contigs)) + assert len(results) == 3 + + assert len(visualizer().elements) > len(contigs) + + assert all(x.seq == lstrip(rstrip(x)).seq for x in results) + assert {contig.seq for contig in contigs} == {contig.seq for contig in results} + + +def test_stitching_partial_align(exact_aligner, visualizer): + # Scenario: A single contig has a sequence that partially aligns to the reference sequence. + + contigs = [ + GenotypedContig( + name="a", + seq="T" * 10 + "C" * 20 + "A" * 10, + ref_name="testref", + group_ref="testref", + ref_seq="A" * 20 + "C" * 20 + "T" * 20, + match_fraction=0.3, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == len(contigs) + for result in results: + assert any(result.seq in contig.seq for contig in contigs) + + assert len(visualizer().elements) > len(contigs) + + assert all(x.seq != lstrip(rstrip(x)).seq for x in results) + + assert {contig.seq for contig in contigs} != { + lstrip(rstrip(contig)).seq for contig in results + } + + +def test_partial_align_consensus(exact_aligner, visualizer): + # Scenario: A single contig partially aligns to the reference sequence, and a consensus sequence is being stitched. + + contigs = [ + GenotypedContig( + name="a", + seq="T" * 10 + "C" * 20 + "A" * 10, + ref_name="testref", + group_ref="testref", + ref_seq="A" * 20 + "C" * 20 + "T" * 20, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == len(contigs) + assert {contig.seq for contig in contigs} == {contig.seq for contig in results} + + assert len(visualizer().elements) > len(contigs) + + +def test_stitching_partial_align_multiple_sequences(exact_aligner, visualizer): + # Scenario: Multiple contigs have sequences that partially align to the same reference sequence. + + ref_seq = "A" * 20 + "C" * 20 + "T" * 20 + + contigs = [ + GenotypedContig( + name="a", + seq="Z" * 5 + "C" * 20 + "T" * 5 + "U" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + GenotypedContig( + name="b", + seq="M" * 5 + "C" * 5 + "T" * 10 + "G" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 1 + assert results[0].seq == "Z" * 5 + "C" * 20 + "T" * 10 + "G" * 10 + assert len(visualizer().elements) > len(contigs) + + +def test_partial_align_consensus_multiple_sequences(exact_aligner, visualizer): + # Scenario: Multiple contigs partially align to the same reference sequence, + # and a consensus sequence is being stitched from them. + + ref_seq = "A" * 20 + "C" * 20 + "T" * 20 + + contigs = [ + GenotypedContig( + name="a", + seq="A" * 20, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + GenotypedContig( + name="b", + seq="T" * 20, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == contigs[0].seq + contigs[1].seq + + assert len(visualizer().elements) > len(contigs) + + +def test_partial_align_consensus_multiple_overlaping_sequences( + exact_aligner, visualizer +): + # Scenario: Multiple contigs partially align to the same reference sequence, + # and a consensus sequence is being stitched from them. + + ref_seq = "A" * 20 + "C" * 20 + "T" * 20 + + contigs = [ + GenotypedContig( + name="a", + seq="T" * 10 + "A" * 5 + "C" * 20 + "A" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + GenotypedContig( + name="b", + seq="C" * 20 + "T" * 5 + "A" * 10 + "G" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert ( + results[0].seq == "T" * 10 + "A" * 5 + "C" * 20 + "T" * 5 + "A" * 10 + "G" * 10 + ) + assert results[0].seq == contigs[0].seq[:-10] + contigs[1].seq[20:] + + assert len(visualizer().elements) > len(contigs) + + +def test_big_insertion_in_a_single_contig(projects, visualizer): + # Scenario: Single contig produces many alignments. + + hxb2_name = "HIV1-B-FR-K03455-seed" + ref_seq = projects.getReference(hxb2_name) + seq = ref_seq[2000:3000] + 'C' * 300 + ref_seq[3100:4000] + + contigs = [ + GenotypedContig( + name="a", + seq=seq, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == contigs[0].seq + + assert len(visualizer().elements) > len(contigs) + + +def test_big_insertion_in_a_single_contig_2(exact_aligner, visualizer): + # Scenario: Single contig produces many alignments. + + ref_seq = "A" * 10 + "B" * 20 + "C" * 10 + + contigs = [ + GenotypedContig( + name="a", + seq="A" * 10 + "D" * 100 + "C" * 10, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == contigs[0].seq + + assert len(visualizer().elements) > len(contigs) + + +def test_gap_around_small_insertion(exact_aligner, visualizer): + # Scenario: Contig is split around its gap, then stripped. + + ref_seq = "A" * 10 + "B" * 29 + "C" * 10 + + contigs = [ + GenotypedContig( + name="a", + seq="P" * 5 + "A" * 10 + "D" * 6 + "C" * 10 + "Z" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + GenotypedContig( + name="b", + seq="Q" * 5 + "B" * 29 + "J" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == "P" * 5 + "A" * 10 + "B" * 29 + "C" * 10 + "Z" * 5 + assert len(visualizer().elements) > len(contigs) + + +def test_gap_around_big_insertion(exact_aligner, visualizer): + # Scenario: Contig is split around its gap, then stripped. + + ref_seq = "A" * 10 + "B" * 29 + "C" * 10 + + contigs = [ + GenotypedContig( + name="a", + seq="P" * 5 + "A" * 10 + "D" * 100 + "C" * 10 + "Z" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + GenotypedContig( + name="b", + seq="Q" * 5 + "B" * 29 + "J" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == "P" * 5 + "A" * 10 + "B" * 29 + "C" * 10 + "Z" * 5 + assert len(visualizer().elements) > len(contigs) + + +def test_stitch_with_insertion(exact_aligner, visualizer): + # Scenario: Contig is aligned with multiple hits, and the borders are correctly handled. + + ref_seq = "X" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "M" * 5 + + contigs = [ + GenotypedContig( + name="a", + seq="P" * 5 + "A" * 10 + "D" * 6 + "C" * 10 + "Z" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == "PPPPPAAAAAAAAAADDDDDDCCCCCCCCCCZZZZZ" + assert len(visualizer().elements) > len(contigs) + + +def test_stitch_cross_alignment(exact_aligner, visualizer): + # Scenario: Single contig is cross-aligned. + + ref_seq = "X" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "M" * 5 + + contigs = [ + GenotypedContig( + name="a", + seq="P" * 5 + "C" * 10 + "D" * 6 + "A" * 10 + "Z" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == "AAAAAAAAAACCCCCCCCCC" + assert len(visualizer().elements) > len(contigs) + + +def test_cross_alignment_around_small_insertion(exact_aligner, visualizer): + # Scenario: Single contig is cross-aligned, then combined with another contig that is between its aligned parts. + + ref_seq = "X" * 5 + "A" * 10 + "B" * 20 + "C" * 10 + "M" * 5 + + contigs = [ + GenotypedContig( + name="a", + seq="P" * 5 + "C" * 10 + "D" * 6 + "A" * 10 + "Z" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + GenotypedContig( + name="b", + seq="Q" * 5 + "B" * 20 + "J" * 5, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == "A" * 10 + "B" * 20 + "C" * 10 + assert len(visualizer().elements) > len(contigs) + + +def test_reverse_complement_match(projects, visualizer): + # Scenario: Single contig is aligned in the reverse strand. + + from mappy import revcomp + + hxb2_name = "HIV1-B-FR-K03455-seed" + ref = projects.getReference(hxb2_name) + ref_part = ref[2000:2200] + seq = revcomp(ref_part) + + contigs = [ + GenotypedContig( + name="a", + seq=seq, + ref_name="testref", + group_ref="testref", + ref_seq=ref, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == ref_part + assert len(visualizer().elements) > len(contigs) + + +def test_reverse_complement_match_with_padding(projects, visualizer): + # Scenario: Single contig is aligned in the reverse strand. + + from mappy import revcomp + + hxb2_name = "HIV1-B-FR-K03455-seed" + ref = projects.getReference(hxb2_name) + ref_part = "T" * 24 + ref[2000:2200] + "G" * 27 + seq = revcomp(ref_part) + + contigs = [ + GenotypedContig( + name="a", + seq=seq, + ref_name="testref", + group_ref="testref", + ref_seq=ref, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == ref_part + assert len(lstrip(results[0]).seq) == len(ref_part) - 24 + assert len(rstrip(results[0]).seq) == len(ref_part) - 27 + assert rstrip(results[0]).seq == ref_part[:-27] # 27 Gs on the right + assert lstrip(results[0]).seq == ref_part[24:] # 24 Ts on the left + assert len(visualizer().elements) > len(contigs) + + +def test_multiple_reverse_complement_matches(projects, visualizer): + # Scenario: Single contig is aligned in the reverse strand in multiple places. + + from mappy import revcomp + + hxb2_name = "HIV1-B-FR-K03455-seed" + ref = projects.getReference(hxb2_name) + ref_part = ( + "T" * 24 + + ref[2000:2600] + + "A" * 9 + + ref[3000:3600] + + "T" * 9 + + ref[4000:4600] + + "G" * 27 + ) + seq = revcomp(ref_part) + + contigs = [ + GenotypedContig( + name="a", + seq=seq, + ref_name="testref", + group_ref="testref", + ref_seq=ref, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert len(results[0].seq) == len(ref_part) + assert results[0].seq == ref_part + assert len(lstrip(results[0]).seq) == len(ref_part) - 24 + assert len(rstrip(results[0]).seq) == len(ref_part) - 27 + assert lstrip(results[0]).seq == ref_part[24:] + assert rstrip(results[0]).seq == ref_part[:-27] + + assert len(visualizer().elements) > len(contigs) + + +def test_multiple_reverse_complement_matches_out_of_order(projects, visualizer): + # Scenario: Single contig is aligned in the reverse strand in multiple places, producing an out of order alignment. + + from mappy import revcomp + + hxb2_name = "HIV1-B-FR-K03455-seed" + ref = projects.getReference(hxb2_name) + ref_part = ( + "T" * 24 + + ref[2000:2600] + + "A" * 9 + + ref[3000:3600] + + "T" * 9 + + ref[4000:4600] + + "G" * 27 + ) + seq = revcomp(ref_part) + + contigs = [ + GenotypedContig( + name="a", + seq=seq, + ref_name="testref", + group_ref="testref", + ref_seq=ref, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert len(results[0].seq) == len(ref_part) + assert results[0].seq == ref_part + assert len(lstrip(results[0]).seq) == len(ref_part) - 24 + assert len(rstrip(results[0]).seq) == len(ref_part) - 27 + assert lstrip(results[0]).seq == ref_part[24:] + assert rstrip(results[0]).seq == ref_part[:-27] + assert len(visualizer().elements) > len(contigs) + + +def test_forward_and_reverse_match(projects, visualizer): + # Scenario: Single contig is aligned in both strands. + + from mappy import revcomp + + hxb2_name = "HIV1-B-FR-K03455-seed" + ref = projects.getReference(hxb2_name) + seq = ref[1000:1100] + revcomp(ref[2000:2200]) + + contigs = [ + GenotypedContig( + name="a", + seq=seq, + ref_name="testref", + group_ref="testref", + ref_seq=ref, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == seq + assert len(visualizer().elements) > len(contigs) + + +def test_overlaping_in_reference_space(projects, visualizer, monkeypatch): + # Scenario: Single contig is aligned in two parts that overlap in reference space. + + def mock_align(reference_seq: str, consensus: str) -> Tuple[List[MockAlignment], str]: + alignments = [MockAlignment(ctg="N/A", ctg_len=0, + strand=1, mapq=60, + is_primary=True, + q_st=100, q_en=300, + r_st=200, r_en=400, + cigar=[(200, CigarActions.MATCH)], + cigar_str="200M"), + MockAlignment(ctg="N/A", ctg_len=0, + strand=1, mapq=60, + is_primary=True, + q_st=300, q_en=500, + r_st=300, r_en=500, + cigar=[(200, CigarActions.MATCH)], + cigar_str="200M"), + ] + algorithm = 'mock' + return (alignments, algorithm) + + monkeypatch.setattr("micall.core.contig_stitcher.align_consensus", mock_align) + + ref = 'A' * 700 + seq = 'C' * 600 + + contigs = [ + GenotypedContig( + name="a", + seq=seq, + ref_name="testref", + group_ref="testref", + ref_seq=ref, + match_fraction=0.3, + ), + ] + + results = list(stitch_consensus(contigs)) + assert len(results) == 1 + assert results[0].seq == 'C' * 500 + + assert isinstance(results[0], stitcher.AlignedContig) + assert results[0].alignment == CigarHit(Cigar.parse('300M'), r_st=200, r_ei=499, q_st=100, q_ei=399) + + assert len(visualizer().elements) > len(contigs) + + +def test_correct_stitching_of_one_normal_and_one_unknown(exact_aligner, visualizer): + # Scenario: Two partially overlapping contigs are stitched correctly into a single sequence. + + ref_seq = "A" * 100 + "C" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="A" * 50 + "C" * 20, + ref_name="testref", + group_ref="testref", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="A" * 20 + "C" * 50, + ref_name=None, + group_ref=None, + ref_seq=None, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 2 + + assert 70 == len(results[0].seq) + assert 70 == len(results[1].seq) + + assert {result.seq for result in results} == {contig.seq for contig in contigs} + + assert len(visualizer().elements) > len(contigs) + + +def test_main_invocation(exact_aligner, tmp_path, hcv_db): + pwd = os.path.dirname(__file__) + contigs = os.path.join(pwd, "data", "exact_parts_contigs.csv") + stitched_contigs = os.path.join(tmp_path, "stitched.csv") + stitcher.main([contigs, stitched_contigs]) + + assert os.path.exists(contigs) + assert os.path.exists(stitched_contigs) + + # Check the contents of stitched_contigs + with open(stitched_contigs, "r") as stitched_file: + stitched_data = stitched_file.read() + + expected_file_path = os.path.join(pwd, "data", "exact_parts_contigs_stitched.csv") + with open(expected_file_path, "r") as expected_file: + expected_data = expected_file.read() + + assert ( + stitched_data == expected_data + ), "The contents of the stitched contigs file do not match the expected contents." + + +def test_visualizer_simple(exact_aligner, tmp_path, hcv_db): + pwd = os.path.dirname(__file__) + contigs = os.path.join(pwd, "data", "exact_parts_contigs.csv") + stitched_contigs = os.path.join(tmp_path, "stitched.csv") + plot = os.path.join(tmp_path, "exact_parts_contigs.plot.svg") + stitcher.main([contigs, stitched_contigs, "--debug", "--plot", plot]) + + assert os.path.exists(contigs) + assert os.path.exists(stitched_contigs) + + # Check the contents of stitched_contigs + with open(stitched_contigs, "r") as stitched_file: + stitched_data = stitched_file.read() + + expected_file_path = os.path.join(pwd, "data", "exact_parts_contigs_stitched.csv") + with open(expected_file_path, "r") as expected_file: + expected_data = expected_file.read() + assert ( + stitched_data == expected_data + ), "The contents of the stitched contigs file do not match the expected contents." + + # Check the contents of stitched_contigs + expected_plot = os.path.join(pwd, "data", "exact_parts_contigs.plot.svg") + with open(plot, "r") as stitched_file, open(expected_plot, "r") as expected_file: + stitched_data = stitched_file.read() + expected_data = expected_file.read() + assert ( + stitched_data == expected_data + ), "The contents of the stitched plot file do not match the expected contents." + + +def test_visualizer_correct_labeling_of_different_organism_contigs( + exact_aligner, visualizer +): + # Scenario: Some discarded and anomaly contigs correctly labelled. + + ref_seq = "A" * 100 + "C" * 100 + + contigs = [ + GenotypedContig( + name="a", + seq="A" * 50, + ref_name="testref-1", + group_ref="testref-1", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b", + seq="C" * 50, + ref_name="testref-2", + group_ref="testref-2", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="a_anomaly", + seq="D" * 50, + ref_name="testref-1", + group_ref="testref-1", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="b_discarded", + seq="C" * 20, + ref_name="testref-2", + group_ref="testref-2", + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="some_anomaly", + seq="T" * 20, + ref_name='unknown', + group_ref=None, + ref_seq=ref_seq, + match_fraction=0.5, + ), + GenotypedContig( + name="some_unknown", + seq="T" * 20, + ref_name='unknown', + group_ref=None, + ref_seq=None, + match_fraction=0.5, + ), + ] + + results = list(stitch_contigs(contigs)) + assert len(results) == 5 + + assert len(visualizer().elements) > len(contigs) + + +# _ _ _ _ _ _ +# | | | |_ __ (_) |_ | |_ ___ ___| |_ ___ +# | | | | '_ \| | __| | __/ _ \/ __| __/ __| +# | |_| | | | | | |_ | || __/\__ \ |_\__ \ +# \___/|_| |_|_|\__| \__\___||___/\__|___/ +# + + +@pytest.mark.parametrize( + "intervals, expected", + [ + ([], []), + ([(1, 3)], [(1, 3)]), + # Non-overlapping intervals + ([(1, 3), (5, 6)], [(1, 3), (5, 6)]), + # Directly overlapping intervals + ([(1, 3), (2, 5)], [(1, 5)]), + # Adjacent intervals that exactly touch each other + ([(1, 2), (3, 4)], [(1, 4)]), + # Nested intervals + ([(1, 10), (2, 5)], [(1, 10)]), + # Multiple merged intervals + ([(1, 3), (2, 4), (6, 8), (10, 11), (11, 12)], [(1, 4), (6, 8), (10, 12)]), + # Intervals out of initial order + ([(4, 6), (1, 2)], [(1, 2), (4, 6)]), + # Overlapping intervals with out of order inputs + ([(1, 4), (3, 5), (2, 3), (7, 10), (9, 12)], [(1, 5), (7, 12)]), + # Large set of intervals with various overlaps + ( + [(1, 4), (2, 6), (5, 8), (7, 8), (10, 15), (11, 12), (13, 14), (17, 18)], + [(1, 8), (10, 15), (17, 18)], + ), + # Intervals where end is less than start should return + # as is or be handled explicitly depending on implementation + ([(5, 3), (1, 2)], [(1, 2), (5, 3)]), + # Intervals that are exactly one after the other in sequence / Intervals that are completely disjoint + ([(1, 2), (4, 5), (7, 8)], [(1, 2), (4, 5), (7, 8)]), + # Overlapping intervals that merge into one large interval + ([(2, 6), (4, 10), (5, 15), (14, 20)], [(2, 20)]), + # Same interval repeated multiple times + ([(1, 5), (1, 5), (1, 5)], [(1, 5)]), + # Single point intervals + ([(1, 1), (5, 5), (3, 3)], [(1, 1), (3, 3), (5, 5)]), + ([(1, 1), (5, 5), (3, 3), (1, 1), (1, 1)], [(1, 1), (3, 3), (5, 5)]), + ([(1, 1), (2, 3)], [(1, 3)]), + # Intervals that start with negative numbers + ([(-5, 0), (-2, 3), (1, 7), (9, 12)], [(-5, 7), (9, 12)]), + ], +) +def test_merge_intervals(intervals, expected): + assert merge_intervals(intervals) == expected + + +@dataclass +class TestMockAlignment: + r_st: int + r_ei: int + + +class MockAlignedContig: + def __init__(self, ref_name, group_ref, r_st, r_ei, name="contig"): + self.ref_name = ref_name + self.group_ref = group_ref + self.alignment = TestMockAlignment(r_st, r_ei) + self.name = name + self.id = id(self) + + +# Simple function to create mock AlignedContig objects for testing, including ref_name. +def create_mock_aligned_contig(ref_name, r_st, r_ei, name="contig"): + return MockAlignedContig(ref_name, ref_name, r_st, r_ei, name) + + +@pytest.mark.parametrize( + "contigs, expected_covered_name", + [ + # No contigs are completely covered. + ([("ref1", 0, 100), ("ref1", 101, 200)], None), + ([("ref1", 0, 50), ("ref1", 51, 100)], None), + # A single contig is completely covered by one other contig. + ([("ref1", 0, 100), ("ref1", 0, 200)], "contig1"), + ([("ref1", 50, 150), ("ref1", 0, 200)], "contig1"), + # A single contig completely covers another, but with different reference names. + ([("ref1", 0, 50), ("ref2", 0, 100)], None), + # Single coverage with exact match. + ([("ref1", 0, 100), ("ref1", 0, 100)], "contig1"), + # A single contig is completely covered at the beginning by one and at the end by another contig. + ([("ref1", 0, 50), ("ref1", 50, 100), ("ref1", 25, 75)], "contig3"), + # Contigs overlap but none are completely covered. + ([("ref1", 0, 50), ("ref1", 40, 90), ("ref1", 80, 120)], None), + # Multiple contigs with some covered completely by a single other contig. + ([("ref1", 0, 200), ("ref1", 10, 30), ("ref1", 170, 190)], "contig2"), + # Multiple contigs with complex overlaps and one completely covered. + ( + [("ref1", 30, 60), ("ref1", 0, 50), ("ref1", 20, 70), ("ref1", 60, 90)], + "contig1", + ), + # Edge case where a contig starts where another ends. + ([("ref1", 0, 50), ("ref1", 50, 100)], None), + # Contigs are completely covered in a nested fashion. + ([("ref1", 0, 200), ("ref1", 50, 150), ("ref1", 100, 125)], "contig2"), + # Contigs are adjacent and cover each other completely. + ([("ref1", 0, 100), ("ref1", 101, 200), ("ref1", 0, 200)], "contig1"), + # Single large contig covers several smaller non-adjacent contigs. + ( + [ + ("ref1", 0, 500), + ("ref1", 50, 100), + ("ref1", 200, 250), + ("ref1", 300, 350), + ], + "contig2", + ), + # Single large contig covers several smaller adjacent contigs. + ( + [ + ("ref1", 50, 100), + ("ref1", 70, 300), + ("ref1", 101, 199), + ("ref1", 200, 350), + ], + "contig2", + ), + # Single small contig is covered by several larger contigs. + ( + [ + ("ref1", 0, 250), + ("ref1", 200, 300), + ("ref1", 600, 800), + ("ref1", 250, 700), + ], + "contig2", + ), + # Complex case with multiple contigs and complete coverage by combinations. + ( + [ + ("ref1", 0, 100), + ("ref1", 30, 130), + ("ref1", 60, 160), + ("ref1", 90, 190), + ("ref1", 120, 220), + ], + "contig2", + ), + # Contigs with same start but different end, where one is covered. + ([("ref1", 0, 100), ("ref1", 0, 50)], "contig2"), + # Contigs with same end but different start, where one is covered. + ([("ref1", 50, 100), ("ref1", 0, 100)], "contig1"), + # Contig covered by two overlapping contigs that don't individually cover the whole range. + ([("ref1", 0, 75), ("ref1", 25, 100), ("ref1", 0, 100)], "contig1"), + # Two contigs are covered completely by one large contig. + ([("ref1", 0, 300), ("ref1", 50, 100), ("ref1", 200, 250)], "contig2"), + # No contigs at all. + ([], None), + ], +) +def test_find_covered(contigs, expected_covered_name): + mock_contigs = [ + create_mock_aligned_contig(ref_name, r_st, r_ei, f"contig{i+1}") + for i, (ref_name, r_st, r_ei) in enumerate(contigs) + ] + covered, covering = find_covered_contig(mock_contigs) + if expected_covered_name is None: + assert covered is None + else: + assert covered is not None + assert covered.name == expected_covered_name + + +def test_concordance_same_length_inputs(): + with pytest.raises(ValueError): + calculate_concordance("abc", "ab") + + +def test_concordance_completely_different_strings(): + result = calculate_concordance("a" * 30, "b" * 30) + assert all(n == 0 for n in result) + + +def generate_random_string_pair(length): + left = "".join(random.choice("ACGT") for _ in range(length)) + right = "".join(random.choice("ACGT") for _ in range(length)) + return left, right + + +@pytest.mark.parametrize( + "left, right, expected", + [ + ("aaaaa", "aaaaa", [0.6, 0.68, 0.7, 0.68, 0.6]), + ("abcdd", "abcdd", [0.6, 0.68, 0.7, 0.68, 0.6]), + ("aaaaaaaa", "baaaaaab", [0.3, 0.62, 0.71, 0.75, 0.75, 0.71, 0.62, 0.3]), + ("aaaaaaaa", "aaaaaaab", [0.64, 0.73, 0.79, 0.8, 0.79, 0.73, 0.64, 0.31]), + ("aaaaaaaa", "aaaaaaab", [0.64, 0.73, 0.79, 0.8, 0.79, 0.73, 0.64, 0.31]), + ("aaaaaaaa", "aaaaabbb", [0.6, 0.68, 0.7, 0.68, 0.6, 0.29, 0.19, 0.13]), + ("aaaaaaaa", "aaabbaaa", [0.56, 0.63, 0.62, 0.39, 0.39, 0.62, 0.63, 0.56]), + ("aaaaa", "bbbbb", [0] * 5), + ("", "", []), + ], +) +def test_concordance_simple(left, right, expected): + result = [round(float(x), 2) for x in calculate_concordance(left, right)] + assert result == expected + + +@pytest.mark.parametrize( + "left, right, expected", + [ + ("a" * 128, "a" * 128, 64), + ("a" * 128, "a" * 64 + "b" * 64, 32), + ("a" * 128, "a" * 64 + "ba" * 32, 32), + ("a" * 128, "a" * 54 + "b" * 20 + "a" * 54, 28), # two peaks + ("a" * 128, "a" * 63 + "b" * 2 + "a" * 63, 32), # two peaks + ("a" * 1280, "b" * 640 + "a" * 640, round(1280 * 3 / 4)), + ("a" * 128, "b" * 48 + "a" * 32 + "b" * 48, 64), + ( + "a" * 128, + "b" * 48 + "a" * 15 + "ab" + "a" * 15 + "b" * 48, + 48 + 16 // 2, + ), # two peaks - choosing 1nd + ( + "a" * 128, + "b" * 48 + "a" * 15 + "ba" + "a" * 15 + "b" * 48, + 48 + 15 + 16 // 2, + ), # two peaks - choosing 2nd + ( + "a" * 128, + "b" * 48 + "a" * 15 + "bb" + "a" * 15 + "b" * 48, + 48 + 15 // 2, + ), # two peaks - choosing 1st + ], +) +def test_concordance_simple_index(left, right, expected): + concordance = calculate_concordance(left, right) + concordance_d = list(disambiguate_concordance(concordance)) + index = max(range(len(concordance)), key=lambda i: concordance_d[i]) + if abs(index - expected) > 1: + assert index == expected + + +def generate_test_cases(num_cases): + with fixed_random_seed(42): + length = random.randint(1, 80) + return [generate_random_string_pair(length) for _ in range(num_cases)] + + +concordance_cases = generate_test_cases(num_cases=100) + + +@pytest.mark.parametrize("left, right", concordance_cases) +def test_concordance_output_range(left, right): + result = calculate_concordance(left, right) + assert all( + 0 <= n <= 1 for n in result + ), "All values in result should be between 0 and 1" + + +@pytest.mark.parametrize("left, right", concordance_cases) +def test_concordance_higher_if_more_matches_added(left, right): + # Insert exact matches in the middle + matching_sequence = "A" * 30 + insert_position = len(left) // 2 + new_left = ( + left[:insert_position] + + matching_sequence + + left[insert_position + len(matching_sequence):] + ) + new_right = ( + right[:insert_position] + + matching_sequence + + right[insert_position + len(matching_sequence):] + ) + + old_conc = calculate_concordance(left, right) + new_conc = calculate_concordance(new_left, new_right) + old_average = sum(old_conc) / len(old_conc) + new_average = sum(new_conc) / len(new_conc) + assert old_average <= new_average + + +@pytest.mark.parametrize("left, right", concordance_cases) +def test_concordance_higher_in_matching_areas(left, right): + # Insert exact matches in the middle + matching_sequence = "A" * 30 + insert_position = len(left) // 2 + new_left = ( + left[:insert_position] + + matching_sequence + + left[insert_position + len(matching_sequence):] + ) + new_right = ( + right[:insert_position] + + matching_sequence + + right[insert_position + len(matching_sequence):] + ) + + concordance_scores = calculate_concordance(new_left, new_right) + + # Check concordance in the matching area + matching_area_concordance = concordance_scores[ + insert_position:insert_position + len(matching_sequence) + ] + + # Calculate average concordance inside and outside the matching area + average_inside = sum(matching_area_concordance) / len(matching_sequence) + average_outside = (sum(concordance_scores) - sum(matching_area_concordance)) / ( + len(concordance_scores) - len(matching_sequence) + ) + + # Assert that the concordance is indeed higher in the matching area + assert ( + average_inside > average_outside + ), "Concordance in matching areas should be higher than in non-matching areas" diff --git a/micall/tests/test_contig_stitcher_fuzz.py b/micall/tests/test_contig_stitcher_fuzz.py new file mode 100644 index 000000000..dde2af769 --- /dev/null +++ b/micall/tests/test_contig_stitcher_fuzz.py @@ -0,0 +1,145 @@ +import pytest +import json +import os +from micall.core.contig_stitcher import ( + GenotypedContig, + AlignedContig, + stitch_consensus, + stitch_contigs, + drop_completely_covered, + StitcherContext, +) +import micall.core.contig_stitcher as stitcher +from micall.core.plot_contigs import build_stitcher_figure +from aligntools import CigarHit, Cigar, CigarActions +from typing import Dict, List +from collections import defaultdict + + +@pytest.fixture +def no_aligner(monkeypatch): + monkeypatch.setattr("micall.core.contig_stitcher.align_to_reference", lambda x: [x]) + + +@pytest.fixture(autouse=True) +def stitcher_context(): + stitcher.context.set(StitcherContext.make()) + + +def read_contigs(line): + array = json.loads(line) + contig_descriptions = [obj["fields"] for obj in array if obj["type"] == "contig"] + for description in contig_descriptions: + start = description["start"] + end = description["end"] + name = description["name"] + length = end - start + 1 + assert length > 0 + + ref_seq = "A" * 1000 # it does not matter + seq = "C" * 10 + "A" * length + "T" * 10 + query = GenotypedContig( + name=name, + seq=seq, + ref_name="commonref", + group_ref="commongroup", + ref_seq=ref_seq, + match_fraction=2 / 3, + ) + alignment = CigarHit( + Cigar([(length, CigarActions.MATCH)]), + q_st=20, + q_ei=20 + length - 1, + r_st=start, + r_ei=end, + ) + contig = AlignedContig.make(query=query, alignment=alignment, strand="forward") + aidee = f"{start:03d}-{end:03d}" + yield {"contig": contig, "id": aidee} + + +def get_case_descriptions(): + pwd = os.path.dirname(__file__) + jsonfile = os.path.join(pwd, "data", "contig_stitcher_fuzz_nogaps.json") + with open(jsonfile, "r", encoding="utf8") as reader: + for line in reader: + read = list(read_contigs(line)) + contigs = [x["contig"] for x in read] + ids = [x["id"] for x in read] + aidee = ",".join(ids) + yield {"contigs": contigs, "id": aidee} + + +all_case_descriptions = list(get_case_descriptions()) +all_case_ids = [x["id"] for x in all_case_descriptions] + + +@pytest.mark.parametrize("description", all_case_descriptions, ids=all_case_ids) +def test_contig_number_prop(no_aligner, description): + contigs = description["contigs"] + stitched = list(stitch_consensus(contigs)) + assert len(stitched) <= len(contigs) + + +@pytest.mark.parametrize("description", all_case_descriptions, ids=all_case_ids) +def test_contig_number_prop2(no_aligner, description): + contigs = description["contigs"] + consensus = list(stitch_consensus(contigs)) + stitched = list(stitch_contigs(contigs)) + uncovered = list(drop_completely_covered(contigs)) + assert len(consensus) <= len(stitched) <= len(uncovered) <= len(contigs) + + +def test_contig_number_prop2_existential(): + # This test is just to confirm that our cases cover all sub-actions. + + contig_sets = [x["contigs"] for x in all_case_descriptions] + + assert any( + len(list(stitch_contigs(contigs))) > len(list(stitch_consensus(contigs))) + for contigs in contig_sets + ) + + assert any( + len(list(drop_completely_covered(contigs))) > len(list(stitch_contigs(contigs))) + for contigs in contig_sets + ) + + assert any( + len(list(contigs)) > len(list(drop_completely_covered(contigs))) + for contigs in contig_sets + ) + + +def get_all_reference_positions(contigs: List[GenotypedContig]): + ret: Dict[int, int] = defaultdict(lambda: 0) + for contig in contigs: + if isinstance(contig, AlignedContig): + for i in contig.alignment.coordinate_mapping.ref_to_query.domain: + ret[i] += 1 + + return ret + + +@pytest.mark.parametrize("description", all_case_descriptions, ids=all_case_ids) +def test_stitching_intervals_prop(no_aligner, description): + contigs = description["contigs"] + stitched = list(stitch_contigs(contigs)) + initial_positions = get_all_reference_positions(contigs) + stitched_positions = get_all_reference_positions(stitched) + + # Checks that no reference position has been lost, and no new positions "created" + assert set(initial_positions.keys()) == set(stitched_positions.keys()) + + # Checks that there are no overlaps between contigs + assert all(v == 1 for (k, v) in stitched_positions.items()) + + +@pytest.mark.parametrize("description", all_case_descriptions, ids=all_case_ids) +def test_visualizer_simple(no_aligner, description): + contigs = description["contigs"] + with StitcherContext.fresh() as ctx: + list(stitch_consensus(contigs)) + assert len(ctx.events) >= len(contigs) + figure = build_stitcher_figure(ctx.events) + assert len(figure.elements) > len(contigs) + 1 diff --git a/micall/tests/test_denovo.py b/micall/tests/test_denovo.py index a7fbff009..12e2d791d 100644 --- a/micall/tests/test_denovo.py +++ b/micall/tests/test_denovo.py @@ -1,233 +1,32 @@ from io import StringIO from pathlib import Path +import re -from Bio import SeqIO -from pytest import fixture, mark +from pytest import mark -from micall.core.denovo import write_contig_refs, denovo, DEFAULT_DATABASE, genotype -from micall.blast_db.make_blast_db import make_blast_db, DEFAULT_PROJECTS +from micall.core.denovo import denovo +from micall.tests.test_fasta_to_csv import check_hcv_db, DEFAULT_DATABASE # activates the fixture +# make linters not complain about unused imports. +assert check_hcv_db is not None +assert DEFAULT_DATABASE is not None -@fixture(scope='session', name='hcv_db') -def check_hcv_db(): - db_path = Path(DEFAULT_DATABASE) - index_path = db_path.parent / "refs.fasta.nin" - build_needed = not index_path.exists() - if not build_needed: - projects_date = Path(DEFAULT_PROJECTS).stat().st_mtime - index_date = index_path.stat().st_mtime - build_needed = index_date < projects_date - if build_needed: - with open(DEFAULT_PROJECTS) as projects_json, \ - open(DEFAULT_DATABASE, 'w') as refs_fasta: - make_blast_db(projects_json, refs_fasta) - assert index_path.exists() - return db_path - -def test_make_blast_db_excludes_hivgha(hcv_db): - fasta_path = Path(DEFAULT_DATABASE) - with fasta_path.open() as f: - for reference in SeqIO.parse(f, 'fasta'): - # Exclude the Ghana project, because they're recombinant. - assert reference.name != 'HIV1-CRF02_AG-GH-AB286855-seed' - - -def test_write_contig_refs_two_sequences(tmpdir, hcv_db): - contigs_fasta = Path(tmpdir) / "contigs.fasta" - contigs_fasta.write_text("""\ ->foo -TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA ->bar -CAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC -""") - contigs_csv = StringIO() - expected_contigs_csv = """\ -ref,match,group_ref,contig -HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA -HCV-1a,1.0,HCV-1a,CAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC -""" - - write_contig_refs(str(contigs_fasta), contigs_csv) - - assert expected_contigs_csv == contigs_csv.getvalue() - - -def test_write_contig_refs_two_groups(tmpdir, hcv_db): - contigs_fasta = Path(tmpdir) / "contigs.fasta" - contigs_fasta.write_text("""\ ->foo -ACCCGCCCCTAATAGGGGCGACACTCCGCCATGAATC ->bar -ACCATGGATCACTCCCCTGTGAGGAACTACTGTCTT ->baz -TGCAATGACAGCTTACAGACGGGTTTCCTCGCTTCCTTGTTTTACACCCA -""") - contigs_csv = StringIO() - expected_contigs_csv = """\ -ref,match,group_ref,contig -HCV-2a,1.0,HCV-2b,ACCCGCCCCTAATAGGGGCGACACTCCGCCATGAATC -HCV-1g,1.0,HCV-1g,ACCATGGATCACTCCCCTGTGAGGAACTACTGTCTT -HCV-2b,1.0,HCV-2b,TGCAATGACAGCTTACAGACGGGTTTCCTCGCTTCCTTGTTTTACACCCA -""" - - write_contig_refs(str(contigs_fasta), contigs_csv) - - assert expected_contigs_csv == contigs_csv.getvalue() - - -def test_write_contig_refs_not_found(tmpdir, hcv_db): - contigs_fasta = Path(tmpdir) / "contigs.fasta" - contigs_fasta.write_text("""\ ->foo -CATCACATAGGAGA -""") - contigs_csv = StringIO() - expected_contigs_csv = """\ -ref,match,group_ref,contig -unknown,0,,CATCACATAGGAGA -""" - - write_contig_refs(str(contigs_fasta), contigs_csv) - - assert expected_contigs_csv == contigs_csv.getvalue() - - -def test_write_contig_refs_partial_match(tmpdir, hcv_db): - contigs_fasta = Path(tmpdir) / "contigs.fasta" - contigs_fasta.write_text("""\ ->foo -TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA ->bar -CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC -""") - contigs_csv = StringIO() - expected_contigs_csv = """\ -ref,match,group_ref,contig -HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA -HCV-1a,0.75,HCV-1a,CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC -""" - - write_contig_refs(str(contigs_fasta), contigs_csv) - - assert expected_contigs_csv == contigs_csv.getvalue() - - -def test_write_contig_refs_reversed_match(tmpdir, hcv_db): - """ If BLAST match is reversed, then reverse the contig before reporting. """ - contigs_fasta = Path(tmpdir) / "contigs.fasta" - contigs_fasta.write_text("""\ ->foo -TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA ->bar -GTCGTCGCCACACACGAGCATGGTGCAGTCCTGGAGCCCTGTCTCCTATGTGATG -""") - contigs_csv = StringIO() - expected_contigs_csv = """\ -ref,match,group_ref,contig -HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA -HCV-1a,0.75,HCV-1a,CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC -""" - - write_contig_refs(str(contigs_fasta), contigs_csv) - - assert expected_contigs_csv == contigs_csv.getvalue() - - -def test_genotype(tmpdir, hcv_db): - contigs_fasta = Path(tmpdir) / "contigs.fasta" - contigs_fasta.write_text("""\ ->foo -TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA ->bar -CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC -""") - blast_csv = StringIO() - expected_blast_csv = """\ -contig_num,ref_name,score,match,pident,start,end,ref_start,ref_end -2,HCV-1g,37,0.67,100,19,55,8506,8542 -2,HCV-1a,41,0.75,100,15,55,8518,8558 -1,HCV-1a,41,1.0,100,1,41,8187,8227 -""" - - genotype(str(contigs_fasta), blast_csv=blast_csv) - - assert expected_blast_csv == blast_csv.getvalue() - - -def test_write_contig_refs(tmpdir, hcv_db): - contigs_fasta = Path(tmpdir) / "contigs.fasta" - contigs_fasta.write_text("""\ ->foo -TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA ->bar -CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC -""") - contigs_csv = StringIO() - expected_contigs_csv = """\ -ref,match,group_ref,contig -HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA -HCV-1a,0.75,HCV-1a,CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC -""" - blast_csv = StringIO() - expected_blast_csv = """\ -contig_num,ref_name,score,match,pident,start,end,ref_start,ref_end -2,HCV-1g,37,0.67,100,19,55,8506,8542 -2,HCV-1a,41,0.75,100,15,55,8518,8558 -1,HCV-1a,41,1.0,100,1,41,8187,8227 -""" - - write_contig_refs(str(contigs_fasta), contigs_csv, blast_csv=blast_csv) - - assert expected_contigs_csv == contigs_csv.getvalue() - assert expected_blast_csv == blast_csv.getvalue() - - -def test_write_contig_refs_none(tmpdir, hcv_db): - contigs_fasta = Path(tmpdir) / 'contigs.fasta' - assert not contigs_fasta.exists() - - contigs_csv = StringIO() - expected_contigs_csv = """\ -ref,match,group_ref,contig -""" - - write_contig_refs(str(contigs_fasta), contigs_csv) - - assert expected_contigs_csv == contigs_csv.getvalue() - - -def test_merged_contig(tmpdir, hcv_db): - contigs_fasta = Path(tmpdir) / 'contigs.fasta' - assert not contigs_fasta.exists() - - merged_contigs_path = Path(tmpdir) / 'merged_contigs.csv' - merged_contigs_path.write_text("""\ -contig -TGCACAAGACCCAACAACAATACAAGAAAAAGTATAAGGATAGGACCAGGA -""") - - contigs_csv = StringIO() - expected_contigs_csv = """\ -ref,match,group_ref,contig -HIV1-C-BR-JX140663-seed,1.0,HIV1-C-BR-JX140663-seed,TGCACAAGACCCAACAACAATACAAGAAAAAGTATAAGGATAGGACCAGGA -""" - - with merged_contigs_path.open() as merged_contigs_csv: - write_contig_refs(str(contigs_fasta), - contigs_csv, - merged_contigs_csv=merged_contigs_csv) - - assert expected_contigs_csv == contigs_csv.getvalue() +def normalize_fasta(content: str) -> str: + result = re.sub(r'^>.*$', '>', + content, + flags=re.MULTILINE) + result = ''.join(result.split('\n')) + return result @mark.iva() # skip with -k-iva def test_denovo_iva(tmpdir, hcv_db): microtest_path = Path(__file__).parent / 'microtest' - contigs_csv = StringIO() - expected_contigs_csv = """\ -ref,match,group_ref,contig -HCV-2a,1.0,HCV-2a,TGAGGGCCAAAAAGGTAACTTTTGATAGGATGCAAGTGC\ + contigs_fasta = StringIO() + expected_contigs_fasta = """\ +>contig.00001 +TGAGGGCCAAAAAGGTAACTTTTGATAGGATGCAAGTGC\ TCGACGCTCATTACGACTCAGTCTTAAAGGACATCAAGCTAGCGGCCTCCAAGGTCTCCG\ CGAGGCTCCTCACCCTGGAGGAGGCATGCCAGCTAACTCCACCCCATTCTGCAAGATCCAAATATGGGTTTGGGGCTA\ AGGAGGTGCGCAGCTTGTCCGGGAGGGCCGTTAACCACATCAAGTCCGTGTGGAAGGACCTCCTGGAAGACTCACAAA\ @@ -238,7 +37,9 @@ def test_denovo_iva(tmpdir, hcv_db): denovo(str(microtest_path / '2160A-HCV_S19_L001_R1_001.fastq'), str(microtest_path / '2160A-HCV_S19_L001_R2_001.fastq'), - contigs_csv, + contigs_fasta, tmpdir) - assert contigs_csv.getvalue() == expected_contigs_csv + result = contigs_fasta.getvalue() + expected = expected_contigs_fasta + assert normalize_fasta(result) == normalize_fasta(expected) diff --git a/micall/tests/test_fasta_to_csv.py b/micall/tests/test_fasta_to_csv.py new file mode 100644 index 000000000..4ab951b6d --- /dev/null +++ b/micall/tests/test_fasta_to_csv.py @@ -0,0 +1,227 @@ +from io import StringIO +from pathlib import Path + +from Bio import SeqIO +import pytest + +from micall.utils.fasta_to_csv import default_database, genotype, fasta_to_csv +from micall.blast_db.make_blast_db import make_blast_db, DEFAULT_PROJECTS + + +@pytest.fixture(scope='session') +def DEFAULT_DATABASE(): + with default_database() as ret: + yield ret + + +@pytest.fixture(scope='session', name='hcv_db') +def check_hcv_db(DEFAULT_DATABASE): + db_path = Path(DEFAULT_DATABASE) + index_path = db_path.parent / "refs.fasta.nin" + build_needed = not index_path.exists() + if not build_needed: + projects_date = Path(DEFAULT_PROJECTS).stat().st_mtime + index_date = index_path.stat().st_mtime + build_needed = index_date < projects_date + if build_needed: + with open(DEFAULT_PROJECTS) as projects_json, \ + open(DEFAULT_DATABASE, 'w') as refs_fasta: + make_blast_db(projects_json, refs_fasta) + assert index_path.exists() + return db_path + + +def test_make_blast_db_excludes_hivgha(hcv_db, DEFAULT_DATABASE): + fasta_path = Path(DEFAULT_DATABASE) + with fasta_path.open() as f: + for reference in SeqIO.parse(f, 'fasta'): + # Exclude the Ghana project, because they're recombinant. + assert reference.name != 'HIV1-CRF02_AG-GH-AB286855-seed' + + +def test_genotype(tmpdir, hcv_db): + contigs_fasta = Path(tmpdir) / "contigs.fasta" + contigs_fasta.write_text("""\ +>foo +TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA +>bar +CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC +""") + blast_csv = StringIO() + expected_blast_csv = """\ +contig_num,ref_name,score,match,pident,start,end,ref_start,ref_end +2,HCV-1g,37,0.67,100,19,55,8506,8542 +2,HCV-1a,41,0.75,100,15,55,8518,8558 +1,HCV-1a,41,1.0,100,1,41,8187,8227 +""" + + genotype(str(contigs_fasta), blast_csv=blast_csv) + + assert expected_blast_csv == blast_csv.getvalue() + + +def test_fasta_to_csv_two_sequences(tmpdir, hcv_db): + contigs_fasta = Path(tmpdir) / "contigs.fasta" + contigs_fasta.write_text("""\ +>foo +TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA +>bar +CAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC +""") + contigs_csv = StringIO() + expected_contigs_csv = """\ +ref,match,group_ref,contig +HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA +HCV-1a,1.0,HCV-1a,CAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC +""" + + fasta_to_csv(str(contigs_fasta), contigs_csv) + + assert expected_contigs_csv == contigs_csv.getvalue() + + +def test_fasta_to_csv_two_groups(tmpdir, hcv_db): + contigs_fasta = Path(tmpdir) / "contigs.fasta" + contigs_fasta.write_text("""\ +>foo +ACCCGCCCCTAATAGGGGCGACACTCCGCCATGAATC +>bar +ACCATGGATCACTCCCCTGTGAGGAACTACTGTCTT +>baz +TGCAATGACAGCTTACAGACGGGTTTCCTCGCTTCCTTGTTTTACACCCA +""") + contigs_csv = StringIO() + expected_contigs_csv = """\ +ref,match,group_ref,contig +HCV-2a,1.0,HCV-2b,ACCCGCCCCTAATAGGGGCGACACTCCGCCATGAATC +HCV-1g,1.0,HCV-1g,ACCATGGATCACTCCCCTGTGAGGAACTACTGTCTT +HCV-2b,1.0,HCV-2b,TGCAATGACAGCTTACAGACGGGTTTCCTCGCTTCCTTGTTTTACACCCA +""" + + fasta_to_csv(str(contigs_fasta), contigs_csv) + + assert expected_contigs_csv == contigs_csv.getvalue() + + +def test_fasta_to_csv_not_found(tmpdir, hcv_db): + contigs_fasta = Path(tmpdir) / "contigs.fasta" + contigs_fasta.write_text("""\ +>foo +CATCACATAGGAGA +""") + contigs_csv = StringIO() + expected_contigs_csv = """\ +ref,match,group_ref,contig +unknown,0,,CATCACATAGGAGA +""" + + fasta_to_csv(str(contigs_fasta), contigs_csv) + + assert expected_contigs_csv == contigs_csv.getvalue() + + +def test_fasta_to_csv_partial_match(tmpdir, hcv_db): + contigs_fasta = Path(tmpdir) / "contigs.fasta" + contigs_fasta.write_text("""\ +>foo +TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA +>bar +CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC +""") + contigs_csv = StringIO() + expected_contigs_csv = """\ +ref,match,group_ref,contig +HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA +HCV-1a,0.75,HCV-1a,CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC +""" + + fasta_to_csv(str(contigs_fasta), contigs_csv) + + assert expected_contigs_csv == contigs_csv.getvalue() + + +def test_fasta_to_csv_reversed_match(tmpdir, hcv_db): + """ If BLAST match is reversed, then reverse the contig before reporting. """ + contigs_fasta = Path(tmpdir) / "contigs.fasta" + contigs_fasta.write_text("""\ +>foo +TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA +>bar +GTCGTCGCCACACACGAGCATGGTGCAGTCCTGGAGCCCTGTCTCCTATGTGATG +""") + contigs_csv = StringIO() + expected_contigs_csv = """\ +ref,match,group_ref,contig +HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA +HCV-1a,0.75,HCV-1a,CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC +""" + + fasta_to_csv(str(contigs_fasta), contigs_csv) + + assert expected_contigs_csv == contigs_csv.getvalue() + + +def test_fasta_to_csv(tmpdir, hcv_db): + contigs_fasta = Path(tmpdir) / "contigs.fasta" + contigs_fasta.write_text("""\ +>foo +TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA +>bar +CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC +""") + contigs_csv = StringIO() + expected_contigs_csv = """\ +ref,match,group_ref,contig +HCV-1a,1.0,HCV-1a,TCACCAGGACAGCGGGTTGAATTCCTCGTGCAAGCGTGGAA +HCV-1a,0.75,HCV-1a,CATCACATAGGAGACAGGGCTCCAGGACTGCACCATGCTCGTGTGTGGCGACGAC +""" + blast_csv = StringIO() + expected_blast_csv = """\ +contig_num,ref_name,score,match,pident,start,end,ref_start,ref_end +2,HCV-1g,37,0.67,100,19,55,8506,8542 +2,HCV-1a,41,0.75,100,15,55,8518,8558 +1,HCV-1a,41,1.0,100,1,41,8187,8227 +""" + + fasta_to_csv(str(contigs_fasta), contigs_csv, blast_csv=blast_csv) + + assert expected_contigs_csv == contigs_csv.getvalue() + assert expected_blast_csv == blast_csv.getvalue() + + +def test_fasta_to_csv_none(tmpdir, hcv_db): + contigs_fasta = Path(tmpdir) / 'contigs.fasta' + assert not contigs_fasta.exists() + + contigs_csv = StringIO() + expected_contigs_csv = """\ +ref,match,group_ref,contig +""" + + fasta_to_csv(str(contigs_fasta), contigs_csv) + + assert expected_contigs_csv == contigs_csv.getvalue() + + +def test_merged_contig(tmpdir, hcv_db): + contigs_fasta = Path(tmpdir) / 'contigs.fasta' + assert not contigs_fasta.exists() + + merged_contigs_path = Path(tmpdir) / 'merged_contigs.csv' + merged_contigs_path.write_text("""\ +contig +TGCACAAGACCCAACAACAATACAAGAAAAAGTATAAGGATAGGACCAGGA +""") + + contigs_csv = StringIO() + expected_contigs_csv = """\ +ref,match,group_ref,contig +HIV1-C-BR-JX140663-seed,1.0,HIV1-C-BR-JX140663-seed,TGCACAAGACCCAACAACAATACAAGAAAAAGTATAAGGATAGGACCAGGA +""" + + with merged_contigs_path.open() as merged_contigs_csv: + fasta_to_csv(str(contigs_fasta), + contigs_csv, + merged_contigs_csv=merged_contigs_csv) + + assert expected_contigs_csv == contigs_csv.getvalue() diff --git a/micall/tests/test_installation.py b/micall/tests/test_installation.py new file mode 100644 index 000000000..b9971876e --- /dev/null +++ b/micall/tests/test_installation.py @@ -0,0 +1,156 @@ +#! /usr/bin/env python + +""" + +This test is supposed to verify that installation of MiCall is not broken. + +This tests assumes Debian-compatible operating system, such as Ubuntu. +It also assumes that python3 and python3-venv are installed. + +It then: + 1. Creates a temporary virtual environment. + 2. Activates the environment. + 3. Installs MiCall via pip. + 4. Runs various shell commands to check the installation. + +""" + + +import subprocess +import venv +from pathlib import Path +from typing import Sequence +import pytest +import shlex +import re +import os +from itertools import groupby +from micall.utils.get_list_of_executables import iterate_executables +from micall.main import EXECUTABLES + + +# Function to quote shell arguments. +def quote(s: object) -> str: + return shlex.quote(str(s)) + + +@pytest.fixture(scope="session") +def temp_venv(tmpdir_factory): + """ + Fixture for creating and cleaning up a virtual environment. + + This fixture creates a virtual environment in a temporary directory, + provides context to run commands in this environment, and cleans up after the test. + """ + + # Create the virtual environment + venv_dir = tmpdir_factory.mktemp("venv") + venv.create(venv_dir, with_pip=True) + + # Yield the environment setup to the test function + yield venv_dir / "bin" / "activate" + + +@pytest.fixture(scope="session") +def micall_installation(temp_venv: Path): + """ + Ensures an installed micall executable. + """ + + q = quote + + # Check that MiCall is not installed. + stdout, stderr, returncode = run_command(f"export PATH= ; . {q(temp_venv)} && command -v micall") + assert returncode != 0, "Unexpected MiCall installation." + + # Path to MiCall directory (3 levels up from the current script file) + script_path = Path(__file__).resolve() + micall_path = script_path.parent.parent.parent + + # Install MiCall using pip from the local path + stdout, stderr, returncode = run_command(f". {q(temp_venv)} && pip install -- {q(micall_path)}") + assert returncode == 0, f"Failed to install MiCall:\n{stderr}" + + yield "micall" + + +def run_command(command: Sequence[str]): + """Executes a shell command within a provided environment and returns output, error, and return code.""" + + result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return result.stdout.decode('utf-8').strip(), result.stderr.decode('utf-8').strip(), result.returncode + + +def test_micall_installation(temp_venv, micall_installation): + """ + Test to verify installation of MiCall. + + This test installs MiCall in an isolated virtual environment and verifies the installation + by executing the command `command -v micall`. + """ + + # Check MiCall executable path to verify installation + q = quote + stdout, stderr, returncode = run_command(f"export PATH= ; . {q(temp_venv)} && command -v micall") + assert returncode == 0, f"Cound not find MiCall installation:\n{stderr}" + assert stdout.endswith('micall'), "Unexpected output for micall path check." + + +def test_micall_version(temp_venv, micall_installation): + """ + Test to verify installation of MiCall. + + This test installs MiCall in an isolated virtual environment and verifies the installation + by executing the command `micall --version`. + """ + + # Check MiCall version to verify installation + q = quote + stdout, stderr, returncode = run_command(f"export PATH= ; . {q(temp_venv)} && micall --version") + assert returncode == 0, f"MiCall version command failed:\n{stderr}" + lines = [line.strip() for line in stdout.split('\n')] + first_line = lines[0].strip() + assert re.match(r'(\d+[.]\d+[.]\d+)|development', first_line), "Unexpected output for micall version check." + + +def test_micall_help(temp_venv, micall_installation): + """ + Test to verify installation of MiCall. + + This test installs MiCall in an isolated virtual environment and verifies the installation + by executing the command `micall --help`. + """ + + # These are supposed to be listed in output of --help. + executables = [os.path.splitext(path.name)[0] for path in iterate_executables()] + + # Check MiCall help to verify installation + q = quote + stdout, stderr, returncode = run_command(f"export PATH= ; . {q(temp_venv)} && micall --help") + assert returncode == 0, f"MiCall help command failed:\n{stderr}" + + for executable in executables: + assert executable in stdout, f"Executable {executable!r} not listed in micall --help." + + +def test_executables_names(): + """ + Verify that all and only those executables found by `iterate_executables()` are used in micall/main.py. + """ + + assert set(EXECUTABLES) == set(map(str, iterate_executables())) + + +def test_executables_duplicates(): + """ + Verify that there is no duplication in names of executables. + """ + + def get_name(path: Path) -> str: + return os.path.splitext(path.name)[0] + + executables = list(iterate_executables()) + + for key, group in groupby(executables, key=get_name): + paths = list(map(str, group)) + assert len(paths) == 1, f"Scripts {group!r} share the same executable name." diff --git a/micall/tests/test_kive_watcher.py b/micall/tests/test_kive_watcher.py index 929b30671..18d2f977b 100644 --- a/micall/tests/test_kive_watcher.py +++ b/micall/tests/test_kive_watcher.py @@ -23,7 +23,7 @@ trim_run_name, compress_old_versions from micall.monitor.sample_watcher import PipelineType, ALLOWED_GROUPS, FolderWatcher, SampleWatcher from micall.monitor.find_groups import SampleGroup -from micall_watcher import parse_args +from micall.monitor.micall_watcher import parse_args class DummyDataset: @@ -1759,13 +1759,13 @@ def test_launch_proviral_run(raw_data_with_two_samples, mock_open_kive): argument_name='sample_info_csv'), dict(dataset='/datasets/111/', argument_type='O', - argument_name='contigs_csv'), + argument_name='unstitched_contigs_csv'), dict(dataset='/datasets/112/', argument_type='O', - argument_name='conseq_csv'), + argument_name='unstitched_conseq_csv'), dict(dataset='/datasets/113/', argument_type='O', - argument_name='cascade_csv')]] # run datasets + argument_name='unstitched_cascade_csv')]] # run datasets mock_session.get.return_value.json.side_effect = [ dict(url='/datasets/110/', id=110), dict(url='/datasets/111/', id=111), @@ -1784,7 +1784,7 @@ def test_launch_proviral_run(raw_data_with_two_samples, mock_open_kive): dataset='/datasets/112/'), dict(argument='/containerargs/106', dataset='/datasets/113/')], - name='Proviral HIVSeqinR on 2120A', + name='Proviral on 2120A', batch='/batches/101', groups_allowed=['Everyone'])) @@ -3007,7 +3007,7 @@ def test_collate_denovo_results(raw_data_with_two_samples, default_config, mock_ expected_cascade_path = version_folder / "denovo" / "cascade.csv" expected_done_path = version_folder / "denovo" / "doneprocessing" - proviral_path = version_folder / "denovo" / "hivseqinr_results" + proviral_path = version_folder / "denovo" / "detailed_results" main_scratch_path = version_folder / "scratch" main_scratch_path.mkdir() @@ -3160,3 +3160,40 @@ def test_collate_csv_with_sample_already_filled(): KiveWatcher.extract_csv(source2, target, 'ignored', source_count=1) assert target.getvalue() == expected_target + +def test_launch_main_good_pipeline_id(mock_open_kive, default_config): + _mock_session = mock_open_kive.return_value + kive_watcher = KiveWatcher(default_config) + kive_watcher.app_urls = { + default_config.micall_filter_quality_pipeline_id: '/containerapps/102'} + kive_watcher.app_args = { + default_config.micall_filter_quality_pipeline_id: dict( + quality_csv='/containerargs/103')} + + inputs = {'quality_csv': {'url': '/datasets/104', 'id': 104}} + run_batch = {'url': '/batches/101'} + kive_watcher.find_or_launch_run(pipeline_id=42, + inputs=inputs, + run_name='MiCall filter quality on 140101_M01234', + run_batch=run_batch) + +def test_launch_main_bad_pipeline_id(mock_open_kive, default_config): + _mock_session = mock_open_kive.return_value + kive_watcher = KiveWatcher(default_config) + kive_watcher.app_urls = { + default_config.micall_filter_quality_pipeline_id: '/containerapps/102'} + kive_watcher.app_args = { + default_config.micall_filter_quality_pipeline_id: dict( + quality_csv='/containerargs/103')} + + inputs = {'quality_csv': {'bad_argument': 777, 'id': 104}} + run_batch = {'url': '/batches/101'} + pipeline_id = 42 + expected_msg = f'The specified app with id {pipeline_id}' \ + ' appears to expect a different set of inputs' + + with pytest.raises(ValueError, match=expected_msg) as _excinfo: + kive_watcher.find_or_launch_run(pipeline_id=pipeline_id, + inputs=inputs, + run_name='MiCall filter quality on 140101_M01234', + run_batch=run_batch) diff --git a/micall/tests/test_release_test_compare.py b/micall/tests/test_release_test_compare.py index 823bd03b3..cc6a8608a 100644 --- a/micall/tests/test_release_test_compare.py +++ b/micall/tests/test_release_test_compare.py @@ -4,7 +4,7 @@ import typing -from release_test_compare import compare_sample, SampleFiles, Sample, \ +from micall.utils.release_test_compare import compare_sample, SampleFiles, Sample, \ MiseqRun, Scenarios, ConsensusDistance, group_samples_file, \ group_nucs_file, compare_consensus diff --git a/micall/tests/test_sample_sheet_parser.py b/micall/tests/test_sample_sheet_parser.py index 8364aeed1..ed401d451 100644 --- a/micall/tests/test_sample_sheet_parser.py +++ b/micall/tests/test_sample_sheet_parser.py @@ -730,6 +730,52 @@ def test_extra_commas(self): ss = sample_sheet_parser(StringIO(stub_sample_sheet)) self.assertEqual(ss["Experiment Name"], "10-Jul-2014") + def test_underscores_in_sample_name(self): + """ + Extracts the correct project code and sample name in presence of underscores. + """ + + stub_sample_sheet = """ +[Header] +IEMFileVersion,3 +Investigator Name,RL +Project Name,10-Jul-2014_v1test +Experiment Name,10-Jul-2014_v1test +Date,07/10/2014 +Workflow,GenerateFASTQ +Assay,Nextera +Description,Nextera +Chemistry,Amplicon +[Reads] +251 +251 +[Settings] +[Data] +Sample_ID,Sample_Name,Sample_Plate,Sample_Well,index,index2,Sample_Project,Description,GenomeFolder +CFE_SomeId_10-Jul-2014_N501-N701_Sample1_Proj1,Sample1_Proj1,10-Jul-2014_testing,N/A,ACGTACGT,TGCATGCA,\ +10-Jul-2014_testing,Research:Sample1_Proj1:TRUE Comments:Sample1_Proj1:thisiscommentone \ +Disablecontamcheck:Sample1_Proj1:FALSE, +CFE_SomeId_10-Jul-2014_N501-N702_Sample2_Proj2,Sample2_Proj2,10-Jul-2014_testing,N/A,AAAAGGGG,CCCCTTTT,\ +10-Jul-2014_testing,Research:Sample2_Foo_Proj2:FALSE Comments:Sample2_Foo_Proj2:thisiscommenttwo \ +Chemistry:Sample2_Foo_Proj2:BreakingBad Disablecontamcheck:Sample2_Foo_Proj2:TRUE, +""" + + ss = sample_sheet_parser(StringIO(stub_sample_sheet)) + split_rows = ss['DataSplit'] + assert len(split_rows) == 2 + + assert split_rows[0]['filename'] == 'Sample1-Proj1_S1' + assert split_rows[1]['filename'] == 'Sample2-Proj2_S2' + + assert split_rows[0]['project'] == 'Proj1' + assert split_rows[1]['project'] == 'Proj2' + + assert split_rows[0]['sample'] == 'Sample1' + assert split_rows[1]['sample'] == 'Sample2' + + assert split_rows[0]['sample_number'] == 'S1' + assert split_rows[1]['sample_number'] == 'S2' + def test_read_sample_sheet_overrides(tmpdir): sample_sheet_path = Path(str(tmpdir)) / 'SampleSheet.csv' diff --git a/micall/tests/test_tests_utils.py b/micall/tests/test_tests_utils.py new file mode 100644 index 000000000..3e0c62415 --- /dev/null +++ b/micall/tests/test_tests_utils.py @@ -0,0 +1,74 @@ + +from micall.tests.utils import MockAligner, MockAlignment + +def test_basic_mapping(): + + aligner = MockAligner('acgt' + 'a' * 20 + 'acgt') + + alignment = list(aligner.map('a' * 10)) + + assert len(alignment) == 5 + + alignment = alignment[0] + + assert isinstance(alignment, MockAlignment) + assert alignment.mapq == 60 + assert alignment.strand == 1 + assert alignment.r_st == 4 + assert alignment.r_en == 14 + assert alignment.q_st == 0 + assert alignment.q_en == 10 + + +def test_exact_match(): + aligner = MockAligner("abcdefg") + alignments = list(aligner.map("abc")) + assert len(alignments) == 1 + assert alignments[0].r_st == 0 + assert alignments[0].r_en == 3 + + +def test_no_match(): + aligner = MockAligner("abcdefg") + alignments = list(aligner.map("xyz")) + assert len(alignments) == 0 + + +def test_partial_match(): + aligner = MockAligner("abcdefg") + alignments = list(aligner.map("abxyabc")) + assert len(alignments) == 1 + assert alignments[0].r_st == 0 + assert alignments[0].r_en == 3 + + +def test_multiple_matches(): + aligner = MockAligner("A" * 40) + alignments = list(aligner.map("A" * 20)) + assert len(alignments) == 5 + assert alignments[0].r_st == 0 + assert alignments[0].r_en == 20 + assert alignments[1].r_st == 20 + assert alignments[1].r_en == 40 + + +def test_multiple_matches_bigger_query(): + aligner = MockAligner("A" * 40) + alignments = list(aligner.map("A" * 50)) + assert len(alignments) == 5 + assert alignments[0].r_st == 0 + assert alignments[0].r_en == 40 + assert alignments[1].r_st == 0 + assert alignments[1].r_en == 40 + + +def test_empty_reference(): + aligner = MockAligner("A" * 0) + alignments = list(aligner.map("A" * 20)) + assert len(alignments) == 0 + + +def test_empty_query(): + aligner = MockAligner("A" * 40) + alignments = list(aligner.map("A" * 0)) + assert len(alignments) == 0 diff --git a/micall/tests/utils.py b/micall/tests/utils.py new file mode 100644 index 000000000..9b76cf0a2 --- /dev/null +++ b/micall/tests/utils.py @@ -0,0 +1,78 @@ +from contextlib import contextmanager +from dataclasses import dataclass +import random +from aligntools import CigarActions +from typing import Tuple, List, Iterator + +from micall.utils.alignment import Alignment + + +def find_all_occurrences(s, substring): + start = 0 + while True: + start = s.find(substring, start) + if start == -1: # no more occurrences found + return + yield start + start += len(substring) + + +@dataclass(frozen=True) +class MockAlignment(Alignment): + is_primary: bool + + +class MockAligner: + """ + Mock for the mappy's aligner class. + Only reports exact matches. + """ + + def __init__(self, seq, *args, **kwargs): + self.seq = seq + self.max_matches = 5 + self.min_length = 3 + + def map(self, seq) -> Iterator[Alignment]: + max_matches = self.max_matches + returned = set() + for length in range(len(seq), self.min_length - 1, -1): + for start in range(len(seq) - length + 1): + end = start + length + substring = seq[start:end] + for r_st in find_all_occurrences(self.seq, substring): + mapq = 60 + strand = 1 # Doesn't handle reverse complements in this mock. + r_en = r_st + len(substring) + q_st = start + q_en = end + cigar = [(q_en - q_st, CigarActions.MATCH)] + cigar_str = f'{(q_en - q_st)}M' + if (q_st, q_en, r_st, r_en) not in returned: + returned.add((q_st, q_en, r_st, r_en)) + yield MockAlignment(ctg='N/A', ctg_len=0, + strand=strand, mapq=mapq, + cigar=cigar, cigar_str=cigar_str, + q_st=q_st, q_en=q_en, + r_st=r_st, r_en=r_en, + is_primary=True) + + max_matches -= 1 + if max_matches < 1: + return + + +def mock_align_consensus(reference_seq: str, consensus: str) -> Tuple[List[Alignment], str]: + alignments = list(MockAligner(reference_seq).map(consensus)) + algorithm = 'mock' + return (alignments, algorithm) + + +@contextmanager +def fixed_random_seed(seed): + original_state = random.getstate() + random.seed(seed) + try: + yield + finally: + random.setstate(original_state) diff --git a/micall/utils/alignment.py b/micall/utils/alignment.py new file mode 100644 index 000000000..876b4077d --- /dev/null +++ b/micall/utils/alignment.py @@ -0,0 +1,60 @@ +from typing import Tuple, Sequence, Union, NoReturn +from dataclasses import dataclass + +from aligntools import CigarActions, Cigar, CigarHit +import mappy + + +@dataclass(frozen=True) +class Alignment: + """ + Our representation of mappy's Alignment object. + """ + + ctg: str + ctg_len: int + r_st: int + r_en: int + strand: int + q_st: int + q_en: int + mapq: int + cigar: Sequence[Tuple[int, CigarActions]] + cigar_str: str + + @staticmethod + def coerce(obj: Union['Alignment', mappy.Alignment]) -> 'Alignment': + if isinstance(obj, Alignment): + return obj + elif isinstance(obj, mappy.Alignment): + cigar = [(size, CigarActions(action)) + for (size, action) in obj.cigar] + return Alignment(ctg=obj.ctg, + ctg_len=obj.ctg_len, + r_st=obj.r_st, r_en=obj.r_en, + strand=obj.strand, + q_st=obj.q_st, q_en=obj.q_en, + mapq=obj.mapq, + cigar=cigar, + cigar_str=obj.cigar_str, + ) + else: + _: NoReturn = obj + raise TypeError(f"Cannot coerce from {obj!r}.") + + def to_cigar_hit(self) -> CigarHit: + return CigarHit(Cigar(self.cigar), + r_st=self.r_st, r_ei=self.r_en - 1, + q_st=self.q_st, q_ei=self.q_en - 1) + + @staticmethod + def from_cigar_hit(hit: CigarHit, ctg: str, ctg_len: int, strand: int, mapq: int) -> 'Alignment': + return Alignment(ctg=ctg, + ctg_len=ctg_len, + r_st=hit.r_st, r_en=hit.r_ei + 1, + strand=strand, + q_st=hit.q_st, q_en=hit.q_ei + 1, + mapq=mapq, + cigar=list(hit.cigar._data), + cigar_str=str(hit.cigar), + ) diff --git a/micall/utils/aln2counts_simplify.py b/micall/utils/aln2counts_simplify.py new file mode 100644 index 000000000..8a93f2bdc --- /dev/null +++ b/micall/utils/aln2counts_simplify.py @@ -0,0 +1,187 @@ +import sys +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from collections import defaultdict +import logging +import os +import re +from csv import DictReader +from typing import Union + +from micall.core.trim_fastqs import trim +from micall.utils.dd import DD +from micall.core.aln2counts import aln2counts + +logger = logging.getLogger(__name__) + +ALIGNED_CSV_HEADER = 'refname,qcut,rank,count,offset,seq' +SUBSEQ_ENV_VARNAME = 'MICALL_DD_SUBSEQ' + +def parse_args(argv): + parser = ArgumentParser( + description='Find the simplest list of aligned reads that reproduces a chosen problem.', + formatter_class=ArgumentDefaultsHelpFormatter) + parser.add_argument('filename', + help='Input file with the initial aligned reads') + parser.add_argument('simple', + help='Output file with the simplified aligned reads') + parser.add_argument('--test', + help='name of the test to run', + choices=MicallDD.test_names, + default=MicallDD.test_names[0]) + + return parser.parse_args(argv) + + +class MicallDD(DD): + test_names = ['subseq'] + + def __init__(self, + filename, + simple, + test_name): + super(MicallDD, self).__init__() + self.filename = filename + self.simple = simple + self.get_result = getattr(self, 'check_' + test_name) + self.reads = read_aligned(self.filename) + + expected_subsequence = os.environ.get(SUBSEQ_ENV_VARNAME, None) + if expected_subsequence is None: + raise RuntimeError(f"Expected ${SUBSEQ_ENV_VARNAME!r} environment variable to be set for the 'subseq' test") + + self.expected_subsequence_re = re.compile(expected_subsequence) + + def _test(self, read_indexes): + read_count = len(read_indexes) + self.write_simple_aligned(self.simple, read_indexes) + workdir = os.path.dirname(self.simple) + + def writer(filename): + return open(os.path.join(workdir, filename), 'w+') + + with open(self.simple, 'r') as aligned_csv, \ + writer('nuc.csv') as nuc_csv, \ + writer('amino.csv') as amino_csv, \ + writer('insertions.csv') as insertions_csv, \ + writer('conseq.csv') as conseq_csv, \ + writer('failed_align.csv') as failed_align_csv, \ + writer('nuc_detail.csv') as nuc_detail_csv, \ + writer('stitched.csv') as stitched_csv: + + # noinspection PyBroadException + try: + aln2counts(# Inputs # + aligned_csv, + nuc_csv, + amino_csv, + insertions_csv, + conseq_csv, + failed_align_csv, + # Outputs # + nuc_detail_csv=nuc_detail_csv, + conseq_stitched_csv=stitched_csv, + ) + + exception = None + except Exception as ex: + logger.warning(f'Read counting failed: {ex!r}.', exc_info=True) + exception = ex + + stitched_csv.seek(0) + result = self.get_result(stitched_csv, read_count, exception) + if result == DD.FAIL: + save_best(aligned_csv) + save_best(nuc_csv) + save_best(amino_csv) + save_best(insertions_csv) + save_best(conseq_csv) + save_best(failed_align_csv) + save_best(stitched_csv) + + return result + + def check_subseq(self, stitched_csv, read_count, exception): + if exception is not None: + return DD.UNRESOLVED + + simple_count = len(stitched_csv.readlines()) - 1 + + logger.debug('Result: %d stitched sequences from %d selected reads.', + simple_count, read_count) + + stitched_csv.seek(0) + success = self.expected_subsequence_re.search(stitched_csv.read()) + + return DD.FAIL if success else DD.PASS + + def write_simple_aligned(self, filename, read_indexes): + selected_reads = (self.reads[i] for i in read_indexes) + with open(filename, 'w') as f: + f.write(ALIGNED_CSV_HEADER) + f.write('\n') + for line in selected_reads: + f.write(line) + + def coerce(self, c): + if c is None: + return 'None' + blocks = [] # [[first, last]] indexes for all contiguous blocks + for i in c: + if (not blocks) or blocks[-1][-1] != i-1: + blocks.append([i, i]) + else: + blocks[-1][-1] = i + return '[' + ', '.join(str(block[0]) if block[0] == block[1] + else '{}-{}'.format(*block) + for block in blocks) + ']' + + +def save_best(file: Union[str, '_io.TextIOWrapper']): + """ Save the current best version of a file. + """ + + filename = file if type(file) is str else file.name + base, ext = os.path.splitext(filename) + best = base + '_best' + ext + + os.rename(filename, best) + + +def read_aligned(filename): + """ Load all the reads from an aligned reads file into a dictionary. + + @param filename: the aligned.csv file to open + @param reads: dict({index: line}) + """ + + with open(filename, 'r') as f: + header = next(f) + + # Sanity check that may detect instances where an incorrect file has been passed as input. + if header.strip() != ALIGNED_CSV_HEADER.strip(): + raise ValueError(f'Aligned reads file {filename!r} does not start with a known header') + + return f.readlines() + + +def main(argv): + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s[%(levelname)s]%(module)s:%(lineno)d - %(message)s', + stream=sys.stdout) + args = parse_args(argv) + try: + logger.info('Starting.') + dd = MicallDD(args.filename, + args.simple, + args.test) + read_indexes = list(range(len(dd.reads))) + min_indexes = dd.ddmin(read_indexes) + dd.test(min_indexes) + logger.info('Done.') + except Exception as ex: + logger.error('Failed.', exc_info=ex) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/micall/utils/consensus_aligner.py b/micall/utils/consensus_aligner.py index ab0bbdbc9..35fbe4cee 100644 --- a/micall/utils/consensus_aligner.py +++ b/micall/utils/consensus_aligner.py @@ -1,18 +1,20 @@ -import typing +from typing import Dict, List, Optional, Set, Iterator, Iterable, Tuple from dataclasses import dataclass, replace -from enum import IntEnum -from itertools import count +from itertools import count, groupby from operator import attrgetter import csv import os import logging +from aligntools import CigarActions, Cigar, CigarHit, connect_nonoverlapping_cigar_hits from gotoh import align_it, align_it_aa -from mappy import Alignment, Aligner +from mappy import Aligner +import mappy from micall.core.project_config import ProjectConfig from micall.utils.report_amino import SeedAmino, ReportAmino, ReportNucleotide, SeedNucleotide from micall.utils.translation import translate +from micall.utils.alignment import Alignment logger = logging.getLogger(__name__) @@ -23,10 +25,115 @@ # Most codons in an insertion or deletion that is still aligned in amino acids. MAXIMUM_AMINO_GAP = 10 -CigarActions = IntEnum( - 'CigarActions', - 'MATCH INSERT DELETE SKIPPED SOFT_CLIPPED HARD_CLIPPED', - start=0) + +# +# Alignments with deletions larger than MAX_GAP_SIZE +# will be split around those deletions into multiple +# separate alignments. +# +MAX_GAP_SIZE = 600 # TODO: make this smaller? + + + +def align_gotoh(coordinate_seq: str, consensus: str) -> Optional[Alignment]: + gap_open_penalty = 15 + gap_extend_penalty = 3 + use_terminal_gap_penalty = 1 + assert '&' not in consensus, "Consensus contains forbidden character '&'" + consensus = ''.join('&' if x == '-' else x for x in consensus) + aligned_coordinate, aligned_consensus, score = align_it( + coordinate_seq, + consensus, + gap_open_penalty, + gap_extend_penalty, + use_terminal_gap_penalty) + + if min(len(coordinate_seq), len(consensus)) < score: + cigar = Cigar.from_msa(aligned_coordinate, aligned_consensus) + cigar = cigar.relax() # turn '=' and 'X' into 'M'. + hit = CigarHit(cigar, + q_st=0, q_ei=len(consensus)-1, + r_st=0, r_ei=len(coordinate_seq)-1) + hit = hit.lstrip_query().lstrip_reference().rstrip_query().rstrip_reference() + return Alignment.from_cigar_hit( + hit, + ctg='N/A', + ctg_len=len(coordinate_seq), + strand=1, + mapq=0) + else: + return None + + +def connect_alignments(alignments: Iterable[Alignment]) -> Iterator[Alignment]: + stranded = groupby(alignments, key=lambda x: (x.strand, x.ctg, x.ctg_len)) + for (strand, ctg, ctg_len), group_iter in stranded: + group = list(group_iter) + hits = list(map(Alignment.to_cigar_hit, group)) + connected_hits = connect_nonoverlapping_cigar_hits(hits) + mapq = min(x.mapq for x in group) + for hit in connected_hits: + yield Alignment.from_cigar_hit(hit, + ctg=ctg, ctg_len=ctg_len, + strand=strand, mapq=mapq) + + +def collect_big_gaps_cut_points(alignment: Alignment) -> Iterator[float]: + hit = alignment.to_cigar_hit() + for deletion in hit.deletions(): + if deletion.ref_length > MAX_GAP_SIZE: + midpoint = deletion.r_st + deletion.ref_length / 2 + yield int(midpoint) + hit.epsilon + + +def cut_hit_into_multiple_parts(hit: CigarHit, cut_points: Iterable[float]) -> Iterator[CigarHit]: + for cut_point in cut_points: + left, right = hit.cut_reference(cut_point) + left = left.rstrip_reference() + right = right.lstrip_reference() + yield left + hit = right + yield hit + + +def split_around_big_gaps(alignments: Iterable[Alignment]) -> Iterator[Alignment]: + for alignment in alignments: + cut_points = list(collect_big_gaps_cut_points(alignment)) + if cut_points: + hit = alignment.to_cigar_hit() + for part in cut_hit_into_multiple_parts(hit, cut_points): + yield Alignment.from_cigar_hit(part, + ctg=alignment.ctg, + ctg_len=alignment.ctg_len, + strand=alignment.strand, + mapq=alignment.mapq) + else: + yield alignment + + +def align_consensus(coordinate_seq: str, consensus: str) -> Tuple[List[Alignment], str]: + aligner = Aligner(seq=coordinate_seq, bw=500, bw_long=500, preset='map-ont') + mappy_alignments: List[mappy.Alignment] = list(aligner.map(consensus)) + if mappy_alignments or 10_000 < len(consensus): + algorithm = 'minimap2' + alignments = [Alignment.coerce(alignment) + for alignment in mappy_alignments + if alignment.is_primary] + + # Following code will connect non-overlapping alignments + # that mappy outputs sometimes. + alignments = list(connect_alignments(reversed(alignments))) + else: + algorithm = 'gotoh' + gotoh_alignment = align_gotoh(coordinate_seq, consensus) + if gotoh_alignment: + alignments = [gotoh_alignment] + else: + alignments = [] + + alignments = list(split_around_big_gaps(alignments)) + alignments.sort(key=attrgetter('q_st')) + return (alignments, algorithm) def align_aminos(reference: str, @@ -62,109 +169,6 @@ def map_amino_sequences(from_seq: str, to_seq: str): return seq_map -class AlignmentWrapper(Alignment): - init_fields = ( - 'ctg ctg_len r_st r_en strand q_st q_en mapq cigar is_primary mlen ' - 'blen NM trans_strand read_num cs MD').split() - - @classmethod - def wrap(cls, source: Alignment, **overrides): - """ Wrap an Alignment object to make it easier to compare and display. - - Mostly used when testing. - """ - args = [getattr(source, field_name) - for field_name in cls.init_fields] - for name, value in overrides.items(): - i = cls.init_fields.index(name) - args[i] = value - return cls(*args) - - # noinspection PyPep8Naming - def __new__(cls, - ctg='', - ctg_len=0, - r_st=0, - r_en=0, - strand=1, - q_st=0, - q_en=0, - mapq=0, - cigar: typing.Iterable[typing.List[int]] = tuple(), - is_primary=True, - mlen=0, - blen=0, - NM=0, - trans_strand=0, - read_num=1, - cs='', - MD=''): - """ Create an instance. - - :param ctg: name of the reference sequence the query is mapped to - :param ctg_len: total length of the reference sequence - :param r_st and r_en: start and end positions on the reference - :param strand: +1 if on the forward strand; -1 if on the reverse strand - :param q_st and q_en: start and end positions on the query - :param mapq: mapping quality - :param cigar: CIGAR returned as an array of shape (n_cigar,2). The two - numbers give the length and the operator of each CIGAR operation. - :param is_primary: if the alignment is primary (typically the best and - the first to generate) - :param mlen: length of the matching bases in the alignment, excluding - ambiguous base matches. - :param blen: length of the alignment, including both alignment matches - and gaps but excluding ambiguous bases. - :param NM: number of mismatches, gaps and ambiguous positions in the - alignment - :param trans_strand: transcript strand. +1 if on the forward strand; -1 - if on the reverse strand; 0 if unknown - :param read_num: read number that the alignment corresponds to; 1 for - the first read and 2 for the second read - :param cs: the cs tag. - :param MD: the MD tag as in the SAM format. It is an empty string unless - the MD argument is applied when calling mappy.Aligner.map(). - """ - cigar = list(cigar) - if not mlen: - mlen = min(q_en-q_st, r_en-r_st) - if not blen: - blen = max(q_en-q_st, r_en-r_st) - if not cigar: - cigar = [[max(q_en-q_st, r_en-r_st), CigarActions.MATCH]] - return super().__new__(cls, - ctg, - ctg_len, - r_st, - r_en, - strand, - q_st, - q_en, - mapq, - cigar, - is_primary, - mlen, - blen, - NM, - trans_strand, - read_num-1, - cs, - MD) - - def __eq__(self, other: Alignment): - for field_name in self.init_fields: - self_value = getattr(self, field_name) - other_value = getattr(other, field_name) - if self_value != other_value: - return False - return True - - def __repr__(self): - return (f'AlignmentWrapper({self.ctg!r}, {self.ctg_len}, ' - f'{self.r_st}, {self.r_en}, {self.strand}, ' - f'{self.q_st}, {self.q_en})') - - class ConsensusAligner: def __init__(self, projects: ProjectConfig, @@ -178,14 +182,14 @@ def __init__(self, self.coordinate_name = self.consensus = self.amino_consensus = '' self.algorithm = '' self.consensus_offset = 0 - self.alignments: typing.List[Alignment] = [] - self.reading_frames: typing.List[typing.List[SeedAmino]] = [] - self.seed_nucs: typing.List[SeedNucleotide] = [] - self.amino_alignments: typing.List[AminoAlignment] = [] + self.alignments: List[Alignment] = [] + self.reading_frames: Dict[int, List[SeedAmino]] = {} + self.seed_nucs: List[SeedNucleotide] = [] + self.amino_alignments: List[AminoAlignment] = [] self.contig_name = contig_name # consensus nucleotide positions that were inserts - self.inserts: typing.Set[int] = set() + self.inserts: Set[int] = set() if alignments_file is not None: self.alignments_writer = self._create_alignments_writer(alignments_file) @@ -247,11 +251,9 @@ def _create_alignments_writer(alignments_file, different_columns=None): return writer def start_contig(self, - coordinate_name: str = None, - consensus: str = None, - reading_frames: typing.Dict[ - int, - typing.List[SeedAmino]] = None): + coordinate_name: Optional[str] = None, + consensus: Optional[str] = None, + reading_frames: Optional[Dict[int, List[SeedAmino]]] = None): self.clear() if consensus: @@ -276,17 +278,8 @@ def start_contig(self, coordinate_seq = self.projects.getGenotypeReference(coordinate_name) except KeyError: coordinate_seq = self.projects.getReference(coordinate_name) - aligner = Aligner(seq=coordinate_seq, preset='map-ont') - self.alignments = list(aligner.map(self.consensus)) - if self.alignments or 10_000 < len(self.consensus): - self.algorithm = 'minimap2' - else: - self.algorithm = 'gotoh' - self.align_gotoh(coordinate_seq, self.consensus) - self.alignments = [alignment - for alignment in self.alignments - if alignment.is_primary] - self.alignments.sort(key=attrgetter('q_st')) + + self.alignments, self.algorithm = align_consensus(coordinate_seq, self.consensus) if self.overall_alignments_writer is not None: for alignment in self.alignments: @@ -300,54 +293,12 @@ def start_contig(self, "cigar_str": alignment.cigar_str} self.overall_alignments_writer.writerow(row) - def align_gotoh(self, coordinate_seq, consensus): - gap_open_penalty = 15 - gap_extend_penalty = 3 - use_terminal_gap_penalty = 1 - aligned_coordinate, aligned_consensus, score = align_it( - coordinate_seq, - consensus, - gap_open_penalty, - gap_extend_penalty, - use_terminal_gap_penalty) - if min(len(coordinate_seq), len(consensus)) < score: - ref_start = len(aligned_consensus) - len(aligned_consensus.lstrip('-')) - aligned_consensus: str = aligned_consensus[ref_start:] - aligned_coordinate: str = aligned_coordinate[ref_start:] - aligned_consensus = aligned_consensus.rstrip('-') - ref_index = ref_start - consensus_index = 0 - cigar = [] - for ref_nuc, nuc in zip(aligned_coordinate, aligned_consensus): - expected_nuc = consensus[consensus_index] - ref_index += 1 - consensus_index += 1 - expected_action = CigarActions.MATCH - if nuc == '-' and nuc != expected_nuc: - expected_action = CigarActions.DELETE - consensus_index -= 1 - if ref_nuc == '-': - expected_action = CigarActions.INSERT - ref_index -= 1 - if cigar and cigar[-1][1] == expected_action: - cigar[-1][0] += 1 - else: - cigar.append([1, expected_action]) - self.alignments.append(AlignmentWrapper( - 'N/A', - len(coordinate_seq), - ref_start, - ref_index, - q_st=0, - q_en=consensus_index, - cigar=cigar)) - def find_amino_alignments(self, start_pos: int, end_pos: int, - repeat_pos: typing.Optional[int], - skip_pos: typing.Optional[int], - amino_ref: str): + repeat_pos: Optional[int], + skip_pos: Optional[int], + amino_ref: Optional[str]): translations = { reading_frame: translate( '-'*(reading_frame + self.consensus_offset) + @@ -526,11 +477,11 @@ def report_region( self, start_pos: int, end_pos: int, - report_nucleotides: typing.List[ReportNucleotide], - report_aminos: typing.List[ReportAmino] = None, - repeat_position: int = None, - skip_position: int = None, - amino_ref: str = None): + report_nucleotides: List[ReportNucleotide], + report_aminos: Optional[List[ReportAmino]] = None, + repeat_position: Optional[int] = None, + skip_position: Optional[int] = None, + amino_ref: Optional[str] = None): """ Add read counts to report counts for a section of the reference. :param start_pos: 1-based position of first nucleotide to report in @@ -564,7 +515,7 @@ def report_region( self.build_nucleotide_report(start_pos, end_pos, report_nucleotides) - else: + elif amino_ref is not None: report_aminos.extend(ReportAmino(SeedAmino(None), i + 1) for i in range(len(amino_ref))) self.build_amino_report(start_pos, @@ -592,13 +543,13 @@ def get_deletion_coverage(self, consensus_nuc_index): def build_amino_report(self, start_pos: int, end_pos: int, - report_nucleotides: typing.List[ReportNucleotide], - report_aminos: typing.List[ReportAmino] = None, - repeat_position: int = None, - skip_position: int = None, - amino_ref: str = None): + report_nucleotides: List[ReportNucleotide], + report_aminos: Optional[List[ReportAmino]] = None, + repeat_position: Optional[int] = None, + skip_position: Optional[int] = None, + amino_ref: Optional[str] = None): """ Add read counts to report counts for a section of the reference. - + Used for regions that translate to amino acids. :param start_pos: 1-based position of first nucleotide to report in @@ -656,13 +607,13 @@ def build_amino_report(self, @staticmethod def update_report_amino(coord_index: int, - report_aminos: typing.List[ReportAmino], - report_nucleotides: typing.List[ReportNucleotide], + report_aminos: List[ReportAmino], + report_nucleotides: List[ReportNucleotide], seed_amino: SeedAmino, start_pos: int, - repeat_position: int = None, - skip_position: int = None, - skipped_nuc=None): + repeat_position: Optional[int] = None, + skip_position: Optional[int] = None, + skipped_nuc: Optional[SeedAmino] =None): report_amino = report_aminos[coord_index] report_amino.seed_amino.add(seed_amino) ref_nuc_pos = coord_index * 3 + start_pos @@ -847,7 +798,7 @@ def count_match(self, def build_nucleotide_report(self, start_pos: int, end_pos: int, - report_nucleotides: typing.List[ReportNucleotide]): + report_nucleotides: List[ReportNucleotide]): """ Add read counts to report counts for a section of the reference. Used for regions that don't translate to amino acids. @@ -903,8 +854,8 @@ def seed_concordance(self, seed_name, projects, seed_coordinates, excluded_regio if self.seed_concordance_writer is None: return seed_ref = self.projects.getReference(seed_name) - seed_aligner = Aligner(seq=seed_ref, preset='map-ont') - seed_alignments = list(seed_aligner.map(self.consensus)) + seed_aligner = mappy.Aligner(seq=seed_ref, bw=500, bw_long=500, preset='map-ont') + seed_alignments: List[mappy.Alignment] = list(seed_aligner.map(self.consensus)) regions = projects.getCoordinateReferences(seed_name) for region in regions: @@ -920,14 +871,14 @@ def seed_concordance(self, seed_name, projects, seed_coordinates, excluded_regio continue self.region_seed_concordance(region, seed_name, seed_alignments, seed_ref, start_pos, end_pos) - def coord_concordance(self, half_window_size=10): + def coord_concordance(self, half_window_size: int = 10) -> List[float]: coord_alignments = self.alignments try: coord_ref = self.projects.getGenotypeReference(self.coordinate_name) except KeyError: coord_ref = self.projects.getReference(self.coordinate_name) query_matches = [0] * len(self.consensus) - concordance_list: typing.List[typing.Any] = [None] * len(self.consensus) + concordance_list: List[float] = [0] * len(self.consensus) for alignment in coord_alignments: ref_progress = alignment.r_st @@ -1018,11 +969,11 @@ class AminoAlignment: ref_end: int action: CigarActions reading_frame: int - query: str = None # Amino sequence - ref: str = None # Amino sequence - aligned_query: str = None - aligned_ref: str = None - ref_amino_start: int = None + query: Optional[str] = None # Amino sequence + ref: Optional[str] = None # Amino sequence + aligned_query: Optional[str] = None + aligned_ref: Optional[str] = None + ref_amino_start: Optional[int] = None def has_overlap(self, start_pos: int, end_pos: int) -> bool: before_end = self.ref_start < end_pos @@ -1072,8 +1023,15 @@ def size(self): def amino_size(self): return (self.size + 2) // 3 - def map_amino_sequences(self) -> typing.Dict[int, int]: + def map_amino_sequences(self) -> Dict[int, int]: """ Map reference amino indexes to query amino indexes. """ + + assert self.aligned_ref is not None, "For this operation, aligned_ref must not be None" + assert self.aligned_query is not None, "For this operation, aligned_query must not be None" + assert self.query is not None, "For this operation, query must not be None" + assert self.ref is not None, "For this operation, ref must not be None" + assert self.ref_amino_start is not None, "For this operation, ref_amino_start must not be None" + seq_map = {} query_offset = (self.query_start + self.reading_frame) // 3 ref_index = query_index = 0 diff --git a/micall/utils/contig_blaster.py b/micall/utils/contig_blaster.py index 3a872f8af..ccdbacd0a 100644 --- a/micall/utils/contig_blaster.py +++ b/micall/utils/contig_blaster.py @@ -5,7 +5,7 @@ from operator import itemgetter from tempfile import NamedTemporaryFile -from micall.core.denovo import write_contig_refs +from micall.utils.fasta_to_csv import fasta_to_csv def parse_args(): @@ -44,7 +44,7 @@ def main(): fasta_file.flush() new_contigs_csv = StringIO() blast_csv = StringIO() - write_contig_refs(fasta_file.name, new_contigs_csv, blast_csv=blast_csv) + fasta_to_csv(fasta_file.name, new_contigs_csv, blast_csv=blast_csv) blast_csv.seek(0) for source_contig_num, contig_rows in groupby(DictReader(blast_csv), itemgetter('contig_num')): diff --git a/micall/utils/contig_stitcher_context.py b/micall/utils/contig_stitcher_context.py new file mode 100644 index 000000000..7672733da --- /dev/null +++ b/micall/utils/contig_stitcher_context.py @@ -0,0 +1,53 @@ +from typing import List, Dict +from contextvars import ContextVar +from contextlib import contextmanager +from dataclasses import dataclass +from copy import deepcopy + +import micall.utils.contig_stitcher_events as st_events + + +@dataclass +class StitcherContext: + uniq_dict: Dict[object, Dict[object, int]] + events: List[st_events.EventType] + + def register(self, key: object, value: object) -> int: + if value not in self.uniq_dict: + self.uniq_dict[value] = {} + + existing = self.uniq_dict[value] + if key not in existing: + existing[key] = len(existing) + 1 + + return existing[key] + + def emit(self, event: st_events.EventType) -> None: + self.events.append(event) + + @staticmethod + def make() -> 'StitcherContext': + return StitcherContext(events=[], uniq_dict={}) + + @staticmethod + @contextmanager + def fresh(): + ctx = StitcherContext.make() + token = context.set(ctx) + try: + yield ctx + finally: + context.reset(token) + + @staticmethod + @contextmanager + def stage(): + ctx = deepcopy(context.get()) + token = context.set(ctx) + try: + yield ctx + finally: + context.reset(token) + + +context: ContextVar[StitcherContext] = ContextVar("StitcherContext") diff --git a/micall/utils/contig_stitcher_contigs.py b/micall/utils/contig_stitcher_contigs.py new file mode 100644 index 000000000..a05c5bdc4 --- /dev/null +++ b/micall/utils/contig_stitcher_contigs.py @@ -0,0 +1,67 @@ +from dataclasses import dataclass +from typing import Optional, Literal +from functools import cached_property +from aligntools import CigarHit + + +ID_STATE = 0 + +def generate_new_id() -> int: + global ID_STATE + ID_STATE += 1 + return ID_STATE + + +@dataclass(frozen=True) +class Contig: + name: Optional[str] + seq: str + + @cached_property + def id(self) -> int: + return generate_new_id() + + @cached_property + def unique_name(self) -> str: + index = self.register() + unqualified = repr(self.name) if self.name is not None else "" + if index == 1 and self.name: + return unqualified + else: + return unqualified + f'({index})' + + def register(self) -> int: + from micall.utils.contig_stitcher_context import context + ctx = context.get() + return ctx.register(key=self.id, value=self.name) + + +@dataclass(frozen=True) +class GenotypedContig(Contig): + ref_name: str + group_ref: Optional[str] + + # The sequence of self.group_ref. None in cases where the reference organism is unknown. + ref_seq: Optional[str] + + # Approximated overall concordance between `seq` and `ref_seq`. + # It is calculated by BLAST as qcovhsp/100, where qcovhsp means Query Coverage Per HSP. + match_fraction: float + + +@dataclass(frozen=True) +class AlignedContig(GenotypedContig): + alignment: CigarHit + strand: Literal["forward", "reverse"] + + @staticmethod + def make(query: GenotypedContig, alignment: CigarHit, strand: Literal["forward", "reverse"]): + return AlignedContig( + alignment=alignment, + strand=strand, + seq=query.seq, + name=query.name, + ref_name=query.ref_name, + group_ref=query.group_ref, + ref_seq=query.ref_seq, + match_fraction=query.match_fraction) diff --git a/micall/utils/contig_stitcher_events.py b/micall/utils/contig_stitcher_events.py new file mode 100644 index 000000000..bfa5c5ed2 --- /dev/null +++ b/micall/utils/contig_stitcher_events.py @@ -0,0 +1,305 @@ +from typing import Union, List, Tuple, Literal +from dataclasses import dataclass +from fractions import Fraction +from aligntools import CigarHit + +from micall.utils.contig_stitcher_contigs import GenotypedContig, AlignedContig + + +@dataclass(frozen=True) +class Cut: + original: AlignedContig + left: AlignedContig + right: AlignedContig + cut_point: float + + def __str__(self) -> str: + return ( + f"Created contigs {self.left.unique_name} at {self.left.alignment} and " + f"{self.right.unique_name} at {self.right.alignment} by cutting " + f"{self.original.unique_name} at {self.original.alignment} at cut point = " + f"{round(self.cut_point, 1)}." + ) + + +@dataclass(frozen=True) +class LStrip: + original: AlignedContig + result: AlignedContig + + def __str__(self) -> str: + return ( + f"Doing lstrip of {self.original.unique_name} at {self.original.alignment} (len " + f"{len(self.original.seq)}) resulted in {self.result.unique_name} at " + f"{self.result.alignment} (len {len(self.result.seq)})." + ) + + +@dataclass(frozen=True) +class RStrip: + original: AlignedContig + result: AlignedContig + + def __str__(self) -> str: + return ( + f"Doing rstrip of {self.original.unique_name} at {self.original.alignment} (len " + f"{len(self.original.seq)}) resulted in {self.result.unique_name} at " + f"{self.result.alignment} (len {len(self.result.seq)})." + ) + + +@dataclass(frozen=True) +class Munge: + left: AlignedContig + right: AlignedContig + result: AlignedContig + + def __str__(self) -> str: + return ( + f"Munged contigs {self.left.unique_name} at {self.left.alignment} with " + f"{self.right.unique_name} at {self.right.alignment} resulting in " + f"{self.result.unique_name} at {self.result.alignment}." + ) + + +@dataclass(frozen=True) +class Combine: + contigs: List[AlignedContig] + result: AlignedContig + + def __str__(self) -> str: + contigs_str = ', '.join( + [f"{x.unique_name} at {x.alignment} (len {len(x.seq)})" for x in self.contigs]) + return ( + f"Created a frankenstein {self.result.unique_name} at {self.result.alignment} " + f"(len {len(self.result.seq)}) from [{contigs_str}]." + ) + + +@dataclass(frozen=True) +class NoRef: + contig: GenotypedContig + + def __str__(self) -> str: + return f"Contig {self.contig.unique_name} not aligned - no reference." + + +@dataclass(frozen=True) +class InitialHit: + contig: GenotypedContig + index: int + hit: CigarHit + strand: Literal["forward", "reverse"] + + def __str__(self) -> str: + strand_info = '' if self.strand == 'forward' else ' (rev)' + return ( + f"Part {self.index} of contig {self.contig.unique_name} aligned at {self.hit}" + f"{strand_info}." + ) + + +@dataclass(frozen=True) +class ZeroHits: + contig: GenotypedContig + + def __str__(self) -> str: + return f"Contig {self.contig.unique_name} not aligned - backend's choice." + + +@dataclass(frozen=True) +class StrandConflict: + contig: GenotypedContig + + def __str__(self) -> str: + return ( + f"Discarding contig {self.contig.unique_name} because it aligned both in forward " + "and reverse sense." + ) + + +@dataclass(frozen=True) +class ReverseComplement: + contig: GenotypedContig + result: GenotypedContig + + def __str__(self) -> str: + return f"Reverse complemented contig {self.contig.unique_name}." + + +@dataclass(frozen=True) +class HitNumber: + contig: GenotypedContig + initial: List[Tuple[CigarHit, Literal["reverse", "forward"]]] + connected: List[CigarHit] + + def __str__(self) -> str: + return ( + f"Contig {self.contig.unique_name} produced {len(self.initial)} aligner hits. " + f"After connecting them, the number became {len(self.connected)}." + ) + + +@dataclass(frozen=True) +class ConnectedHit: + contig: GenotypedContig + part: AlignedContig + index: int + + def __str__(self) -> str: + part_strand_info = '' if self.part.strand == 'forward' else ' (rev)' + return ( + f"Part {self.index} of contig {self.contig.unique_name} re-aligned as " + f"{self.part.unique_name} at {self.part.alignment}{part_strand_info}." + ) + + +@dataclass(frozen=True) +class InitialStrip: + contig: AlignedContig + q_st: int + q_ei: int + + def __str__(self) -> str: + return ( + f"Trimming (strip) contig {self.contig.unique_name} from {self.q_st} to " + f"{self.q_ei}." + ) + + +@dataclass(frozen=True) +class StitchCut: + left: AlignedContig + right: AlignedContig + left_overlap: AlignedContig + right_overlap: AlignedContig + left_remainder: AlignedContig + right_remainder: AlignedContig + + def __str__(self) -> str: + return ( + f"Stitching {self.left.unique_name} at {self.left.alignment} (len {len(self.left.seq)}) " + f"with {self.right.unique_name} at {self.right.alignment} (len {len(self.right.seq)}). " + f"The left_overlap {self.left_overlap.unique_name} is at {self.left_overlap.alignment} " + f"(len {len(self.left_overlap.seq)}) and the right_overlap {self.right_overlap.unique_name} is " + f"at {self.right_overlap.alignment} (len {len(self.right_overlap.seq)})." + ) + + +@dataclass(frozen=True) +class Overlap: + left: AlignedContig + right: AlignedContig + left_overlap: AlignedContig + right_overlap: AlignedContig + left_remainder: AlignedContig + right_remainder: AlignedContig + left_take: AlignedContig + right_take: AlignedContig + concordance: List[Fraction] + average: Fraction + cut_point: int + cut_point_scaled: float + + def __str__(self) -> str: + average_concordance = round(self.average * 100) + cut_point_location_scaled = round(self.cut_point_scaled * 100) + concordance_str = ', '.join(str(int(round(x * 100)) / 100) for x in self.concordance) + return ( + f"Created overlap contigs {self.left_take.unique_name} at {self.left_overlap.alignment} and " + f"{self.right_take.unique_name} at {self.right_take.alignment} based on parts of " + f"{self.left.unique_name} and {self.right.unique_name}, with avg. concordance {average_concordance}%, " + f"cut point at {cut_point_location_scaled}%, and full concordance [{concordance_str}]." + ) + + +@dataclass(frozen=True) +class NoOverlap: + contig: AlignedContig + + def __str__(self) -> str: + return f"Nothing overlaps with {self.contig.unique_name}." + + +@dataclass(frozen=True) +class Stitch: + left: AlignedContig + right: AlignedContig + result: AlignedContig + + def __str__(self) -> str: + return ( + f"Stitching {self.left.unique_name} with {self.right.unique_name} results in " + f"{self.result.unique_name} at {self.result.alignment} (len {len(self.result.seq)})." + ) + + +@dataclass(frozen=True) +class Drop: + contig: AlignedContig + covering: List[AlignedContig] + + def __str__(self) -> str: + covering_contig_names = ', '.join(repr(x.unique_name) for x in self.covering) + return ( + f"Dropped contig {self.contig.unique_name} as it is completely covered by these contigs: " + f"{covering_contig_names}." + ) + + +@dataclass(frozen=True) +class IgnoreGap: + contig: AlignedContig + gap: CigarHit + + def __str__(self) -> str: + return f"Ignored insignificant gap of {self.contig.unique_name}, {self.gap}." + + +@dataclass(frozen=True) +class SplitGap: + contig: AlignedContig + gap: CigarHit + left: AlignedContig + right: AlignedContig + + def __str__(self) -> str: + return ( + f"Split contig {self.contig.unique_name} at {self.contig.alignment} around its gap at " + f"[{self.gap.q_st}, {self.gap.q_ei}]->[{self.gap.r_st}, {self.gap.r_ei}]. Left part: " + f"{self.left.unique_name} at {self.left.alignment}, right part: {self.right.unique_name} at " + f"{self.right.alignment}." + ) + + +@dataclass(frozen=True) +class Intro: + contig: GenotypedContig + + def __str__(self) -> str: + return ( + f"Introduced contig {self.contig.unique_name} (seq = {self.contig.seq}) of ref " + f"{self.contig.ref_name!r}, group_ref {self.contig.group_ref} (seq = {self.contig.ref_seq}), " + f"and length {len(self.contig.seq)}." + ) + + +@dataclass(frozen=True) +class FinalCombine: + contigs: List[AlignedContig] + result: AlignedContig + + def __str__(self) -> str: + contigs_str = [f"{x.unique_name} at {x.alignment} (len {len(x.seq)})" for x in self.contigs] + contigs_format = ', '.join(contigs_str) + return ( + f"Combining these contigs for final output for {self.result.group_ref}: " + f"[{contigs_format}]." + ) + + +AlignmentEvent = Union[NoRef, InitialHit, ZeroHits, StrandConflict, ReverseComplement, + HitNumber, ConnectedHit] +ModifyEvent = Union[LStrip, RStrip] +EventType = Union[Cut, ModifyEvent, Munge, Combine, AlignmentEvent, InitialStrip, StitchCut, + Overlap, NoOverlap, Stitch, Drop, IgnoreGap, SplitGap, Intro, FinalCombine] diff --git a/micall/utils/contig_summary.py b/micall/utils/contig_summary.py index a798139d7..2df68bdf9 100644 --- a/micall/utils/contig_summary.py +++ b/micall/utils/contig_summary.py @@ -5,7 +5,7 @@ from Bio.Blast.Applications import NcbiblastnCommandline -from micall.core.denovo import DEFAULT_DATABASE +from micall.utils.fasta_to_csv import default_database import matplotlib matplotlib.use('Agg') @@ -61,16 +61,17 @@ def main(): print(sample_dir, contigs_fasta_paths) continue contigs_fasta_path, = contigs_fasta_paths - cline = NcbiblastnCommandline(query=str(contigs_fasta_path), - db=DEFAULT_DATABASE, - outfmt=blast_format, - evalue=0.0001, - gapopen=5, - gapextend=2, - penalty=-3, - reward=1, - max_target_seqs=5000) - stdout, _ = cline(stderr=False) + with default_database() as DEFAULT_DATABASE: + cline = NcbiblastnCommandline(query=str(contigs_fasta_path), + db=DEFAULT_DATABASE, + outfmt=blast_format, + evalue=0.0001, + gapopen=5, + gapextend=2, + penalty=-3, + reward=1, + max_target_seqs=5000) + stdout, _ = cline(stderr=False) plot_contigs(sample_dir, stdout) plot_path = contig_plots_path / (sample_dir.name + '.png') plt.savefig(str(plot_path)) diff --git a/micall/utils/denovo_simplify.py b/micall/utils/denovo_simplify.py index 9bbfe83e6..6786a6458 100644 --- a/micall/utils/denovo_simplify.py +++ b/micall/utils/denovo_simplify.py @@ -90,7 +90,7 @@ def _test(self, read_indexes): exception = None # noinspection PyBroadException try: - denovo(trimmed_filename1, trimmed_filename2, contigs_csv, workdir) + denovo(trimmed_filename1, trimmed_filename2, contigs_csv, None, workdir) except Exception as ex: logger.warning('Assembly failed.', exc_info=True) exception = ex diff --git a/docker_build.py b/micall/utils/docker_build.py similarity index 100% rename from docker_build.py rename to micall/utils/docker_build.py diff --git a/micall/utils/externals.py b/micall/utils/externals.py index e0f1bf01d..9bc08dee5 100644 --- a/micall/utils/externals.py +++ b/micall/utils/externals.py @@ -3,6 +3,7 @@ import sys import re from subprocess import CalledProcessError +from pathlib import Path class AssetWrapper(object): @@ -10,10 +11,10 @@ class AssetWrapper(object): def __init__(self, path, **kwargs): # noinspection PyArgumentList super(AssetWrapper, self).__init__(**kwargs) - app_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) - local_path = os.path.join(app_dir, path) - if os.path.exists(local_path): - self.path = local_path + app_dir = Path(__file__).parent.parent / "assets" + local_path = app_dir / path + if local_path.exists(): + self.path = str(local_path) else: self.path = os.path.join(getattr(sys, '_MEIPASS', ''), path) diff --git a/micall/utils/fasta_to_csv.py b/micall/utils/fasta_to_csv.py new file mode 100644 index 000000000..892291a98 --- /dev/null +++ b/micall/utils/fasta_to_csv.py @@ -0,0 +1,293 @@ +import argparse +import logging +import os +import typing +from typing import Optional, TextIO, Iterable, Dict, cast, Sequence, Iterator +from collections import Counter +from csv import DictWriter, DictReader +from itertools import groupby +from operator import itemgetter +from pathlib import Path +import contextlib + +from io import StringIO +import importlib.resources as resources + +from Bio import SeqIO +from Bio.Blast.Applications import NcbiblastnCommandline + +from micall.core.project_config import ProjectConfig +from micall.utils.contig_stitcher_contigs import GenotypedContig + + +@contextlib.contextmanager +def reference_dir() -> Iterator[Path]: + """ + A context manager handling reference sequences paths packaged with MiCall. + + The complexity of the function arises from the need to maintain compatibility with + multiple python versions due to changes in APIs of the `importlib.resources` package. + + It first tries to fetch the resource using `resources.files` function introduced in + Python 3.9. If it fails, it falls back on `resources.path`. + It further ensures that the obtained resource is returned + as a Path instance regardless of it being a string, Path, or contextlib context-manager instance. + + Note: `resources.path` is set to be deprecated in future Python versions, hence the + intended primary method is using `resources.files`. + + Yields: + Path: A path-like object pointing to the reference directory within 'micall'. + """ + + try: + ret = resources.as_file(resources.files('micall').joinpath('blast_db')) # type: ignore + except AttributeError: + ret = resources.path('micall', 'blast_db') # type: ignore + + if isinstance(ret, str): + yield Path(ret) + elif isinstance(ret, Path): + yield ret + else: + with ret as path: + yield path + + +@contextlib.contextmanager +def default_database() -> Iterator[str]: + with reference_dir() as blast_db: + yield str(blast_db / "refs.fasta") + + +def read_assembled_contigs(group_refs: Dict[str, str], + genotypes: Dict[str, typing.Tuple[str, float]], + contigs_fasta_path: str) -> Iterable[GenotypedContig]: + """Read assembled contigs and generate GenotypedContig objects. + + Args: + group_refs (Dict[str, str]): Mapping of reference names to group references. + genotypes (Dict[str, Tuple[str, float]]): Mapping of contig names to (reference name, match fraction). + contigs_fasta_path (str): Path to the FASTA file containing contig sequences. + + Returns: + Iterable[GenotypedContig]: An iterable of GenotypedContig objects. + """ + projects = ProjectConfig.loadDefault() + + for i, record in enumerate(SeqIO.parse(contigs_fasta_path, "fasta")): + (ref_name, match_fraction) = genotypes.get(record.name, ('unknown', 0)) + seq = record.seq + if match_fraction < 0: + seq = seq.reverse_complement() + match_fraction *= -1 + + group_ref = group_refs.get(ref_name) + try: + ref_seq = projects.getGenotypeReference(group_ref) + except KeyError: + try: + ref_seq = projects.getReference(group_ref) + except KeyError: + ref_seq = None + + yield GenotypedContig(name=record.name, + seq=str(seq), + ref_name=ref_name, + group_ref=group_ref, + ref_seq=str(ref_seq) if ref_seq is not None else None, + match_fraction=match_fraction) + + +def init_contigs_refs(contigs_csv: TextIO) -> DictWriter: + """Initialize a CSV writer with header for contig references. + + Args: + contigs_csv (TextIO): Open file object to write the contig references. + + Returns: + DictWriter: A CSV DictWriter object initialized with the headers. + """ + writer = DictWriter(contigs_csv, + ['ref', 'match', 'group_ref', 'contig'], + lineterminator=os.linesep) + writer.writeheader() + return writer + + +def write_contigs(writer: DictWriter, + group_refs: Dict[str, str], + genotypes: Dict[str, typing.Tuple[str, float]], + contigs_fasta_path: str): + """Write contigs to a CSV file. + + Args: + writer (DictWriter): CSV writer to write contigs. + group_refs (Dict[str, str]): Mapping of reference names to group references. + genotypes (Dict[str, Tuple[str, float]]): Mapping of contig names to (reference name, match fraction). + contigs_fasta_path (str): Path to the FASTA file containing contig sequences. + """ + for contig in read_assembled_contigs(group_refs, genotypes, contigs_fasta_path): + writer.writerow(dict(ref=contig.ref_name, + match=contig.match_fraction, + group_ref=contig.group_ref, + contig=contig.seq)) + + +def genotype(fasta: str, db: Optional[str] = None, + blast_csv: Optional[TextIO] = None, + group_refs: Optional[Dict[str, str]] = None) -> Dict[str, typing.Tuple[str, float]]: + """Use Blastn to search for the genotype of a set of reference sequences. + + Args: + fasta (str): File path of the FASTA file containing the query sequences. + db (Optional[str]): File path of the database to search for matches. + blast_csv (Optional[TextIO]): Open file to write the blast matches to, or None. + group_refs (Optional[Dict[str, str]]): Dictionary to fill with the mapping from + each contig's reference name to the best matched reference for the whole seed group. + + Returns: + Dict[str, Tuple[str, float]]: Mapping of query name to (reference name, matched fraction). + """ + + contig_nums: Dict[str, int] = {} # {contig_name: contig_num} + with open(fasta) as f: + for line in f: + if line.startswith('>'): + contig_name = line[1:-1] + contig_nums[contig_name] = len(contig_nums) + 1 + blast_columns = ['qaccver', + 'saccver', + 'pident', + 'score', + 'qcovhsp', + 'qstart', + 'qend', + 'sstart', + 'send'] + + def invoke_blast(db: str) -> str: + cline = NcbiblastnCommandline(query=fasta, + db=db, + outfmt=f'"10 {" ".join(blast_columns)}"', + evalue=0.0001, + gapopen=5, + gapextend=2, + penalty=-3, + reward=1, + max_target_seqs=5000) + stdout, _ = cline() + return stdout + + if db is None: + with default_database() as db: + stdout = invoke_blast(db) + else: + stdout = invoke_blast(db) + + samples = {} # {query_name: (subject_name, matched_fraction)} + matches = sorted(DictReader(StringIO(stdout), blast_columns), + key=lambda row: (row['qaccver'], float(row['score']))) + if not blast_csv: + blast_writer = None + else: + blast_writer = DictWriter(blast_csv, + ['contig_num', + 'ref_name', + 'score', + 'match', + 'pident', + 'start', + 'end', + 'ref_start', + 'ref_end'], + lineterminator=os.linesep) + blast_writer.writeheader() + contig_top_matches = {match['qaccver']: match['saccver'] + for match in matches} + top_refs = set(contig_top_matches.values()) + projects = ProjectConfig.loadDefault() + match_scores: typing.Counter[str] = Counter() + for contig_name, contig_matches in groupby(matches, itemgetter('qaccver')): + contig_top_ref = contig_top_matches[contig_name] + contig_seed_group = projects.getSeedGroup(contig_top_ref) + for match in contig_matches: + ref_name = match['saccver'] + if ref_name not in top_refs: + continue + match_seed_group = projects.getSeedGroup(ref_name) + if match_seed_group == contig_seed_group: + match_scores[ref_name] += float(match['score']) # type: ignore[assignment] + + if group_refs is not None: + group_top_refs = {projects.getSeedGroup(ref_name): ref_name + for ref_name, count in reversed(match_scores.most_common())} + for ref_name in contig_top_matches.values(): + group_refs[ref_name] = group_top_refs[projects.getSeedGroup(ref_name)] + + for match in matches: + matched_fraction = float(match['qcovhsp']) / 100 + if int(match['send']) < int(match['sstart']): + matched_fraction *= -1 + pident = round(float(match['pident'])) + contig_name = match['qaccver'] + samples[contig_name] = (match['saccver'], matched_fraction) + if blast_writer: + blast_writer.writerow(dict(contig_num=contig_nums[contig_name], + ref_name=match['saccver'], + score=match['score'], + match=matched_fraction, + pident=pident, + start=match['qstart'], + end=match['qend'], + ref_start=match['sstart'], + ref_end=match['send'])) + return samples + + +def fasta_to_csv(contigs_fasta_path: str, + contigs_csv: TextIO, + merged_contigs_csv: Optional[TextIO] = None, + blast_csv: Optional[TextIO] = None) -> None: + """Run BLAST search to identify contig sequences and write them to CSV. + + Args: + contigs_fasta_path (str): Path to the FASTA file containing contig sequences. + contigs_csv (TextIO): Open file to write assembled contigs to. + merged_contigs_csv: open file to read contigs that were merged from amplicon reads. + blast_csv (Optional[TextIO]): Open file to write BLAST search results for each contig. + """ + + with open(contigs_fasta_path, 'a') as contigs_fasta: + if merged_contigs_csv is not None: + contig_reader = DictReader(merged_contigs_csv) + for i, row in enumerate(contig_reader, 1): + contig_name = f'merged-contig-{i}' + contigs_fasta.write(f">{contig_name}\n{row['contig']}\n") + + writer = init_contigs_refs(cast(TextIO, contigs_csv)) + group_refs: Dict[str, str] = {} + + genotypes = genotype(contigs_fasta_path, blast_csv=blast_csv, group_refs=group_refs) + + write_contigs(writer, group_refs, genotypes, contigs_fasta_path) + contigs_csv.flush() + + +def main(argv: Sequence[str]): + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description="Convert contigs from FASTA to CSV format with BLAST annotations.") + parser.add_argument('contigs_fasta', help="Input FASTA file with contig sequences.") + parser.add_argument('contigs_csv', type=argparse.FileType('w'), + help="Output CSV file to write assembled contigs.") + parser.add_argument('--merged_contigs_csv', type=argparse.FileType('r'), + help="Optional CSV file with contigs that were merged from amplicon reads.") + parser.add_argument('--blast_csv', type=argparse.FileType('w'), + help="Optional CSV file to write BLAST search results.") + args = parser.parse_args(argv) + fasta_to_csv(args.contigs_fasta, args.contigs_csv, args.merged_contigs_csv, args.blast_csv) + + +if __name__ == "__main__": + import sys + main(sys.argv[1:]) diff --git a/micall/utils/find_chimera.py b/micall/utils/find_chimera.py index 3df477e36..3b636aa71 100644 --- a/micall/utils/find_chimera.py +++ b/micall/utils/find_chimera.py @@ -100,7 +100,7 @@ def process_file(sample_name, projects, args): nuc_counts = defaultdict(dict) # {(source, dest): {pos: (agree, disagree)}} nucleotide_path = os.path.join(args.results, 'nuc.csv') - with open(nucleotide_path, 'rU') as nuc_csv: + with open(nucleotide_path, 'r') as nuc_csv: reader = DictReader(nuc_csv) sample_rows = (row for row in reader if row['sample'] == sample_name) diff --git a/micall/utils/get_list_of_executables.py b/micall/utils/get_list_of_executables.py new file mode 100644 index 000000000..d43206b0f --- /dev/null +++ b/micall/utils/get_list_of_executables.py @@ -0,0 +1,68 @@ +#! /usr/bin/env python3 + +import argparse +import sys +from typing import Sequence, Iterator +import os +from pathlib import Path +import re + + +def is_executable_script(content: str) -> bool: + if content.startswith("#!"): + return True + + if re.findall(r'__name__\s*==\s*[\'"]__main__', content): + return True + + if 'import argparse' in content: + return True + + if 'from argparse' in content: + return True + + return False + + +def iterate_executables() -> Iterator[Path]: + script_path: Path = Path(__file__).resolve() + base_dir = script_path.parent.parent.parent + + # Iterate over all files in the base directory. + for root, _, files in os.walk(base_dir): + for file in files: + + # Process only files with a .py extension. + if not file.endswith('.py'): + continue + + file_path = Path(root) / file + content = file_path.read_text() + + if is_executable_script(content): + relative = file_path.relative_to(base_dir) + yield relative + + +def main(argv: Sequence[str]) -> int: + """ + Main function to list the script files. + + Args: + argv: A list of command-line arguments. + + Returns: + An exit status code (0 for success). + """ + + parser = argparse.ArgumentParser(description="List executable Python scripts.") + parser.parse_args(argv) + + for path in iterate_executables(): + print(path) + + return 0 + + +if __name__ == "__main__": + exit(main(sys.argv[1:])) diff --git a/micall/utils/make_fda_refs.py b/micall/utils/make_fda_refs.py index e2155a908..8ee471ce9 100644 --- a/micall/utils/make_fda_refs.py +++ b/micall/utils/make_fda_refs.py @@ -10,11 +10,11 @@ hyphy = HyPhy._THyPhy (os.getcwd(), 1) # instance of HyPhy hyphyAlign.change_settings(hyphy) # default settings -handle = open('fda_hcv_polyprotein.fa', 'rU') +handle = open('fda_hcv_polyprotein.fa', 'r') fasta = convert_fasta(handle) handle.close() -handle = open('/Users/art/git/MiseqPipeline/projects.json', 'rU') +handle = open('/Users/art/git/MiseqPipeline/projects.json', 'r') proj = json.load(handle) handle.close() @@ -35,4 +35,4 @@ outfile.close() - \ No newline at end of file + diff --git a/micall/utils/make_hcv_genotype_refs.py b/micall/utils/make_hcv_genotype_refs.py index e2705c2fd..7a04cc969 100644 --- a/micall/utils/make_hcv_genotype_refs.py +++ b/micall/utils/make_hcv_genotype_refs.py @@ -26,7 +26,7 @@ def convert_fasta (lines): sequence += i.strip('\n') try: blocks.append([h,sequence]) # handle last entry - except: + except RuntimeError: raise Exception("convert_fasta(): Error appending to blocks [{},{}]".format(h, sequence)) return blocks @@ -44,7 +44,7 @@ def convert_fasta (lines): noTerminalPenalty = 1) -with open('HCV_REF_2012_genome.fasta', 'rU') as handle: +with open('HCV_REF_2012_genome.fasta', 'r') as handle: genomes = convert_fasta(handle) # keep one per genotype projects = ProjectConfig.loadDefault() @@ -58,7 +58,7 @@ def convert_fasta (lines): if subtype in processed_subtypes: continue for region, refseq in h77.iteritems(): - print subtype, region + print(subtype, region) aquery, aref, ascore = hyphyAlign.pair_align(hyphy, refseq, genome) left, right = hyphyAlign.get_boundaries(aref) outfile.write('>%s-%s\n%s\n' % (subtype, diff --git a/micall_docker.py b/micall/utils/micall_docker.py similarity index 95% rename from micall_docker.py rename to micall/utils/micall_docker.py index fd963fdc3..c13383fd1 100644 --- a/micall_docker.py +++ b/micall/utils/micall_docker.py @@ -1,3 +1,9 @@ +#! /usr/bin/env python + +""" +Entry script that serves as an entry point of MiCall's Docker image. +""" + from argparse import ArgumentParser import csv import errno @@ -343,61 +349,6 @@ def get_parser(default_max_active): "--project_code", "-p", help="Select primers to trim: HCV, HIVB, HIVGHA, or SARSCOV2.") - command_parser.add_argument( - "-haplo_long", - type=int, - default=0, - ) - command_parser.add_argument( - "-haplo_filter", - type=int, - default=500, - ) - command_parser.add_argument( - "-haplo_thres", - type=int, - default=-1, - ) - command_parser.add_argument( - "-haplo_strict", - type=int, - default=5, - ) - command_parser.add_argument( - "-haplo_error", - type=float, - default=0.02, - ) - command_parser.add_argument( - "-haplo_kmer", - type=int, - default=41, - ) - command_parser.add_argument( - "-denovo_merge", - action='store_true', - ) - command_parser.add_argument( - "-scaffold", - action='store_true', - ) - command_parser.add_argument( - "-patch", - action='store_true', - ) - command_parser.add_argument( - "-ref", - type=str, - default=None, - ) - command_parser.add_argument( - "-RP", - action='store_true', - ) - command_parser.add_argument( - "-IVA", - action='store_true', - ) return parser @@ -639,18 +590,22 @@ def process_run(run_info, args): logger.info('Summarizing run.') run_summary = summarize_run(run_info) - with ProcessPoolExecutor(max_workers=args.max_active) as pool: - for _ in pool.map(functools.partial(process_sample, - args=args, - pssm=pssm, - use_denovo=run_info.is_denovo), - run_info.get_all_samples()): - pass + def runner(func, inputs): + inputs = list(inputs) + if args.max_active > 1 and len(inputs) > 1: + with ProcessPoolExecutor(max_workers=args.max_active) as pool: + list(pool.map(func, inputs)) + else: + list(map(func, inputs)) + + runner(functools.partial(process_sample, + args=args, + pssm=pssm, + use_denovo=run_info.is_denovo), + run_info.get_all_samples()) - for _ in pool.map(functools.partial(process_resistance, - run_info=run_info), - run_info.sample_groups): - pass + runner(functools.partial(process_resistance, run_info=run_info), + run_info.sample_groups) collate_samples(run_info) if run_summary is not None: @@ -998,26 +953,13 @@ def process_sample(sample, args, pssm, use_denovo=False): """ sample.debug_remap = args.debug_remap sample.skip = args.skip - args_haplo = {'long': args.haplo_long, - 'filter': args.haplo_filter, - 'thres': args.haplo_thres, - 'strict': args.haplo_strict, - 'error': args.haplo_error, - 'kmer': args.haplo_kmer, - 'merge':args.denovo_merge, - 'scaffold': args.scaffold, - 'patch': args.patch, - 'ref': args.ref, - 'RP': args.RP, - 'IVA': args.IVA} try: excluded_seeds = [] if args.all_projects else EXCLUDED_SEEDS excluded_projects = [] if args.all_projects else EXCLUDED_PROJECTS sample.process(pssm, excluded_seeds, excluded_projects, - use_denovo=use_denovo, - haplo_args=args_haplo) + use_denovo=use_denovo) except Exception: message = 'Failed to process {}.'.format(sample) logger.error(message, exc_info=True) @@ -1163,7 +1105,8 @@ def collate_samples(run_info: RunInfo): 'concordance.csv', 'concordance_seed.csv'] if run_info.is_denovo: - filenames += ['conseq_stitched.csv', 'conseq_region.csv'] + filenames += ['conseq_stitched.csv', 'conseq_region.csv', + 'unstitched_cascade.csv', 'unstitched_conseq.csv', 'unstitched_contigs.csv'] for filename in filenames: out_path = run_info.output_path with open(os.path.join(out_path, filename), 'w') as fout: diff --git a/micall_kive.py b/micall/utils/micall_kive.py similarity index 89% rename from micall_kive.py rename to micall/utils/micall_kive.py index 3d49b0e39..b846acd2a 100644 --- a/micall_kive.py +++ b/micall/utils/micall_kive.py @@ -1,3 +1,10 @@ +#! /usr/bin/env python + +""" +Entry script that serves as an entry point of MiCall's Singularity image. +This file is run by Kive. +""" + import logging import shutil import tarfile @@ -80,9 +87,18 @@ def parse_args(): action='store_true', help='Use de novo assembly instead of mapping to ' 'reference sequences.') - parser.add_argument('contigs_csv', + parser.add_argument('unstitched_cascade_csv', + nargs='?', + help='count of reads at each step') + parser.add_argument('unstitched_conseq_csv', + nargs='?', + help='CSV containing mapping unstitched consensus sequences') + parser.add_argument('unstitched_contigs_csv', nargs='?', help='CSV containing contigs built by de novo assembly') + parser.add_argument('contigs_csv', + nargs='?', + help='CSV containing contigs built by de novo assembly and stitched by our stitcher') parser.add_argument('read_entropy_csv', nargs='?', help='CSV containing read pair length counts') @@ -124,6 +140,8 @@ def load_sample(args): coverage_scores_csv=args.coverage_scores_csv, aligned_csv=args.aligned_csv, g2p_aligned_csv=args.g2p_aligned_csv, + unstitched_conseq_csv=args.unstitched_conseq_csv, + unstitched_contigs_csv=args.unstitched_contigs_csv, contigs_csv=args.contigs_csv, genome_coverage_csv=args.genome_coverage_csv, genome_coverage_svg=args.genome_coverage_svg, @@ -153,4 +171,5 @@ def main(): tar.add(image_path, archive_path) -main() +if __name__ == '__main__': + main() diff --git a/micall_kive_resistance.py b/micall/utils/micall_kive_resistance.py similarity index 95% rename from micall_kive_resistance.py rename to micall/utils/micall_kive_resistance.py index 74008623c..c2ce3ddec 100644 --- a/micall_kive_resistance.py +++ b/micall/utils/micall_kive_resistance.py @@ -1,3 +1,10 @@ +#! /usr/bin/env python + +""" +Entry script that serves as an entry point of MiCall's Singularity image. +This file is run by Kive. +""" + import logging import shutil from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter diff --git a/micall_logging_config.py b/micall/utils/micall_logging_config.py similarity index 98% rename from micall_logging_config.py rename to micall/utils/micall_logging_config.py index 0e67e7897..fce41308f 100644 --- a/micall_logging_config.py +++ b/micall/utils/micall_logging_config.py @@ -13,7 +13,7 @@ # Production server probably needs /var/log/micall/micall.log # Don't forget to create the folder and change owner to micall. -LOG_FILE = 'micall.log' +LOG_FILE = '/tmp/micall.log' LOGGING = { # This is the default logger. Probably want to switch console to mail. diff --git a/micall/utils/project_seeds_from_compendium.py b/micall/utils/project_seeds_from_compendium.py index 6b3acdaa0..fd5b759a7 100644 --- a/micall/utils/project_seeds_from_compendium.py +++ b/micall/utils/project_seeds_from_compendium.py @@ -82,7 +82,7 @@ def main(): clean_count = 0 dirty_count = 0 recombinant_names = [] - with open(filename, 'rU') as f: + with open(filename, 'r') as f: reader = csv.reader(f) for description, seed_seq in reader: seed_seq = seed_seq.replace('-', '') diff --git a/micall/utils/ref_aligner.py b/micall/utils/ref_aligner.py index 41b99e6d0..67601cd91 100644 --- a/micall/utils/ref_aligner.py +++ b/micall/utils/ref_aligner.py @@ -44,7 +44,7 @@ def __init__(self, group_seeds.append(seed) self.seeds = group_seeds self.aligner_preset = aligner_preset - self.aligner = Aligner(seq=self.main_reference, preset=aligner_preset) + self.aligner = Aligner(seq=self.main_reference, bw=500, bw_long=500, preset=aligner_preset) self.num_warning_seeds = 0 self.verbose = verbose self.warning_seeds = {'insertions': [], 'deletions': [], 'frameshift': [], 'coverage': [], 'alignment': []} diff --git a/micall/utils/reference_distances.py b/micall/utils/reference_distances.py index 375ee3fa5..1c5a3783d 100644 --- a/micall/utils/reference_distances.py +++ b/micall/utils/reference_distances.py @@ -56,7 +56,7 @@ def calculate_distance(source, destination): def plot_distances(projects_filename): - with open(projects_filename, 'rU') as f: + with open(projects_filename, 'r') as f: config = json.load(f) populate_key_references(config['regions']) groups = defaultdict(list) diff --git a/release_test_compare.py b/micall/utils/release_test_compare.py similarity index 95% rename from release_test_compare.py rename to micall/utils/release_test_compare.py index d8914460e..eb10bb4dd 100644 --- a/release_test_compare.py +++ b/micall/utils/release_test_compare.py @@ -10,6 +10,7 @@ from glob import glob from operator import itemgetter import os +import logging import matplotlib.pyplot as plt import pandas as pd @@ -20,9 +21,14 @@ from micall.utils.primer_tracker import PrimerTracker from micall.utils.report_amino import SeedNucleotide, MAX_CUTOFF from micall.utils.translation import translate -from micall_docker import get_available_memory +from micall.utils.micall_docker import get_available_memory MICALL_VERSION = '7.15' +# ^^^^^^ Version of the MiCall release being tested. +# This is the new version against which older versions are compared. +# The version for the older revision is determined dynamically in the `find_runs` function. +# The source folder is inspected to find all previous result versions for each run. +# These versions are then sorted and the latest one is selected for comparison. MiseqRun = namedtuple('MiseqRun', 'source_path target_path is_done') MiseqRun.__new__.__defaults__ = (None,) * 3 @@ -39,6 +45,8 @@ 'scenarios', # {Scenarios: [description]} 'consensus_distances']) # [ConsensusDistance] +logger = logging.getLogger(__name__) + class Scenarios(IntEnum): NONE = 0 @@ -70,6 +78,13 @@ def parse_args(default_max_active): default=default_max_active, type=int, help='Number of parallel workers to process the samples.') + + verbosity_group = parser.add_mutually_exclusive_group() + verbosity_group.add_argument('--verbose', action='store_true', help='Increase output verbosity.') + verbosity_group.add_argument('--no-verbose', action='store_true', help='Normal output verbosity.', default=True) + verbosity_group.add_argument('--debug', action='store_true', help='Maximum output verbosity.') + verbosity_group.add_argument('--quiet', action='store_true', help='Minimize output verbosity.') + return parser.parse_args() @@ -81,10 +96,10 @@ def find_runs(source_folder, target_folder, use_denovo): target_path = os.path.join(run_path, 'Results', 'version_' + MICALL_VERSION) + done_path = os.path.join(target_path, 'doneprocessing') is_done = os.path.exists(done_path) - if use_denovo: - target_path = os.path.join(target_path, 'denovo') + source_results_path = os.path.join(source_folder, 'MiSeq', 'runs', @@ -97,6 +112,12 @@ def find_runs(source_folder, target_folder, use_denovo): message = f'Unexpected results file name in {run_name}.' raise ValueError(message) from ex source_path = os.path.join(source_results_path, source_versions[-1]) + + if use_denovo: + target_path = os.path.join(target_path, 'denovo') + source_path = os.path.join(source_path, 'denovo') + + logger.debug("Comparing %r with %r.", source_path, target_path) yield MiseqRun(source_path, target_path, is_done) @@ -104,7 +125,9 @@ def parse_version(version_name): version_text = version_name.split('_')[-1] if version_text.endswith('.zip'): version_text = version_text[:-4] - return tuple(map(int, version_text.split('.'))) + version_text, possible_dash, possible_modifiers = version_text.partition("-") + version_numbers = tuple(map(int, version_text.split('.'))) + return (version_numbers, possible_modifiers) def report_source_versions(runs): @@ -462,6 +485,8 @@ def compare_consensus(sample: Sample, continue source_nucs = [] target_nucs = [] + + # Note: if either source or target region is missing, it might be because its coverage score is below 4. if source_details is None: has_big_change = True target_nucs = [nuc for nuc, row in target_details] @@ -677,6 +702,15 @@ def main(): default_max_active = max(1, available_memory // recommended_memory) args = parse_args(default_max_active) + if args.quiet: + logger.setLevel(logging.ERROR) + elif args.verbose: + logger.setLevel(logging.INFO) + elif args.debug: + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.WARN) + with ProcessPoolExecutor() as pool: runs = find_runs(args.source_folder, args.target_folder, args.denovo) runs = report_source_versions(runs) diff --git a/release_test_microtest.py b/micall/utils/release_test_microtest.py similarity index 90% rename from release_test_microtest.py rename to micall/utils/release_test_microtest.py index 407a9ba0b..0c76d0aa5 100644 --- a/release_test_microtest.py +++ b/micall/utils/release_test_microtest.py @@ -179,8 +179,7 @@ def check_2100(self): if self.is_denovo: expected_regions = {'HIV1-CON-XX-Consensus-seed', '1-HIV1-B-FR-K03455-seed', - '2-HCV-1a', - '3-HIV1-B-FR-K03455-seed'} + '2-HCV-1a'} else: expected_regions = {'HIV1-CON-XX-Consensus-seed', 'HCV-1a', @@ -247,7 +246,7 @@ def check_2120(self): def check_2130(self): conseq_rows = list(self.read_file('2130A-HCV_S15', 'conseq.csv')) regions = set(map(itemgetter('region'), conseq_rows)) - expected_regions = ({'1-HCV-2a', '2-HCV-2a'} + expected_regions = ({'1-HCV-2a'} if self.is_denovo else {'HCV-2a'}) assert regions == expected_regions, regions @@ -255,7 +254,7 @@ def check_2130(self): def check_2130midi(self): conseq_rows = list(self.read_file('2130AMIDI-MidHCV_S16', 'conseq.csv')) regions = set(map(itemgetter('region'), conseq_rows)) - expected_regions = ({'1-HCV-2a', '2-HCV-2a'} + expected_regions = ({'1-HCV-2a'} if self.is_denovo else {'HCV-2a'}) assert regions == expected_regions, regions @@ -439,29 +438,70 @@ def process_sample(self, fastq_file: Path): fields = sections[0].split('-') project_code = fields[-1] writer.writerow(dict(sample=sample_name, project=project_code)) - output_names = ['g2p.csv', - 'g2p_summary.csv', - 'remap_counts.csv', - 'remap_conseq.csv', - 'unmapped1.fastq', - 'unmapped2.fastq', - 'conseq_ins.csv', - 'failed.csv', - 'cascade.csv', - 'nuc.csv', - 'amino.csv', - 'insertions.csv', - 'conseq.csv', - 'conseq_all.csv', - 'conseq_region.csv', - 'conseq_stitched.csv' - 'failed_align.csv', - 'coverage_scores.csv', - 'coverage_maps.tar', - 'aligned.csv', - 'g2p_aligned.csv', - 'genome_coverage.csv', - 'genome_coverage.svg'] + + if self.is_denovo: + output_names = [ + 'g2p.csv', + 'g2p_summary.csv', + 'remap_counts.csv', + 'remap_conseq.csv', + 'unmapped1.fastq', + 'unmapped2.fastq', + 'conseq_ins.csv', + 'failed.csv', + 'cascade.csv', + 'nuc.csv', + 'amino.csv', + 'insertions.csv', + 'conseq.csv', + 'conseq_all.csv', + 'concordance.csv', + 'concordance_seed.csv', + 'failed_align.csv', + 'coverage_scores.csv', + 'coverage_maps.tar', + 'aligned.csv', + 'g2p_aligned.csv', + 'genome_coverage.csv', + 'genome_coverage.svg', + 'genome_concordance.svg', + 'unstitched_cascade.csv', + 'unstitched_conseq.csv', + 'unstitched_contigs.csv', + 'contigs.csv', + 'read_entropy.csv', + 'conseq_region.csv', + 'conseq_stitched.csv', + ] + + else: + output_names = [ + 'g2p.csv', + 'g2p_summary.csv', + 'remap_counts.csv', + 'remap_conseq.csv', + 'unmapped1.fastq', + 'unmapped2.fastq', + 'conseq_ins.csv', + 'failed.csv', + 'cascade.csv', + 'nuc.csv', + 'amino.csv', + 'insertions.csv', + 'conseq.csv', + 'conseq_all.csv', + 'concordance.csv', + 'concordance_seed.csv', + 'failed_align.csv', + 'coverage_scores.csv', + 'coverage_maps.tar', + 'aligned.csv', + 'g2p_aligned.csv', + 'genome_coverage.csv', + 'genome_coverage.svg', + 'genome_concordance.svg', + ] + output_paths = [output_path/name for name in output_names] app_name = 'denovo' if self.is_denovo else None run_with_retries(self.build_command([sample_info_path, @@ -470,6 +510,15 @@ def process_sample(self, fastq_file: Path): self.bad_cycles_path], output_paths, app_name)) + + for path in output_paths: + + if path == (output_path/"conseq_ins.csv"): + # This file is special. See https://github.com/cfe-lab/MiCall/issues/1085 + path = output_path/"scratch"/"conseq_ins.csv" + + assert os.path.exists(path), f"Expected output file {path!r} to be created." + return sample_name def process_resistance(self, sample_group: SampleGroup): @@ -578,7 +627,7 @@ def main(): type=Path, help='Singularity image to run tests in.') args = parser.parse_args() - source_path: Path = Path(__file__).parent / 'micall' / 'tests' / 'microtest' + source_path: Path = Path(__file__).parent.parent / 'tests' / 'microtest' if args.sandbox is None: sandbox_path = source_path shutil.rmtree(source_path / 'scratch', ignore_errors=True) diff --git a/release_test_publish.py b/micall/utils/release_test_publish.py similarity index 100% rename from release_test_publish.py rename to micall/utils/release_test_publish.py diff --git a/release_test_setup.py b/micall/utils/release_test_setup.py similarity index 97% rename from release_test_setup.py rename to micall/utils/release_test_setup.py index 60480cbea..9d2df4578 100644 --- a/release_test_setup.py +++ b/micall/utils/release_test_setup.py @@ -173,9 +173,14 @@ def setup_run(self): os.symlink(interop_source, interop_target) for filename in ('RunInfo.xml', 'SampleSheet.csv', + 'SampleSheetOverrides.csv', 'needsprocessing'): - copy(os.path.join(self.run_name, filename), - os.path.join(target_run_path, filename)) + + source = os.path.join(self.run_name, filename) + target = os.path.join(target_run_path, filename) + if os.path.exists(source): + copy(source, target) + results_path = os.path.join(target_run_path, 'Results', 'version_' + self.config.pipeline_version) diff --git a/micall/utils/remap_fastq_simplify.py b/micall/utils/remap_fastq_simplify.py index d0c8a6af4..7eac913e7 100644 --- a/micall/utils/remap_fastq_simplify.py +++ b/micall/utils/remap_fastq_simplify.py @@ -60,7 +60,7 @@ def filter_fastqs(self, filename1): devnull, devnull, devnull) - with open(remap_filename, 'rU') as remap_csv: + with open(remap_filename, 'r') as remap_csv: print('Filtering.') reader = DictReader(remap_csv) mapped_qnames = {row['qname'] @@ -72,7 +72,7 @@ def filter_fastqs(self, filename1): return filter_name1 def filter_reads(self, filename, filter_name, qnames): - with open(filename, 'rU') as fin, open(filter_name, 'w') as fout: + with open(filename, 'r') as fin, open(filter_name, 'w') as fout: for read in zip(fin, fin, fin, fin): qname = read[0].split()[0][1:] if qname in qnames: @@ -92,12 +92,12 @@ def _test(self, read_indexes, debug_file_prefix=None): trimmed_filename2 = os.path.join(workdir, 'rerun.trimmed2.fastq') prelim_censored_filename = os.path.join(workdir, 'rerun_censored.prelim.csv') prelim_trimmed_filename = os.path.join(workdir, 'rerun_trimmed.prelim.csv') - with open(self.bad_cycles_filename, 'rU') as bad_cycles: + with open(self.bad_cycles_filename, 'r') as bad_cycles: bad_cycles = list(csv.DictReader(bad_cycles)) - with open(simple_filename1, 'rU') as simple1, \ + with open(simple_filename1, 'r') as simple1, \ open(censored_filename1, 'w') as censored1: censor(simple1, bad_cycles, censored1, use_gzip=False) - with open(simple_filename2, 'rU') as simple2, \ + with open(simple_filename2, 'r') as simple2, \ open(censored_filename2, 'w') as censored2: censor(simple2, bad_cycles, censored2, use_gzip=False) with open(prelim_censored_filename, 'w+') as prelim_censored_csv, \ @@ -205,7 +205,7 @@ def read_fastq(filename, reads): @param filename: the FASTQ file to open @param reads: defaultdict({qname: [line1, line2, line3, line4, line1, line2, line3, line4]} """ - with open(filename, 'rU') as f: + with open(filename, 'r') as f: for line1, line2, line3, line4 in zip(f, f, f, f): qname = line1.split()[0] lines = reads[qname] diff --git a/micall/utils/report_amino.py b/micall/utils/report_amino.py index 917614915..8c7e70380 100644 --- a/micall/utils/report_amino.py +++ b/micall/utils/report_amino.py @@ -1,4 +1,5 @@ from collections import Counter +from typing import Optional from micall.utils.translation import translate, ambig_dict @@ -19,7 +20,7 @@ def __init__(self, consensus_nuc_index, counts=None): if consensus_nuc_index is not None: self.all_consensus_nuc_indexes.add(consensus_nuc_index) self.counts = counts or Counter() # {amino: count} - self.codon_counts = Counter() # {codon_nucs: count} + self.codon_counts: Counter = Counter() # {codon_nucs: count} self.nucleotides = [] for i in range(3): seed_nuc = SeedNucleotide() @@ -239,7 +240,7 @@ def count_overlap(self, other): class ReportNucleotide: - def __init__(self, position: int, seed_nucleotide: SeedNucleotide = None): + def __init__(self, position: int, seed_nucleotide: Optional[SeedNucleotide] = None): self.position = position if seed_nucleotide is None: self.seed_nucleotide = SeedNucleotide() diff --git a/micall/utils/sam_g2p_simplify.py b/micall/utils/sam_g2p_simplify.py index e573e355d..637db00bc 100644 --- a/micall/utils/sam_g2p_simplify.py +++ b/micall/utils/sam_g2p_simplify.py @@ -34,7 +34,7 @@ def test(remap_lines, temp_prefix, pssm, ruby_script, delete_results=True): try: check_call([ruby_script, remap_file.name, nuc_filename, ruby_out_filename], cwd=ruby_path) - with open(nuc_filename, 'rU') as nuc_csv, \ + with open(nuc_filename, 'r') as nuc_csv, \ open(python_out_filename, 'wb') as g2p_csv: # TODO: update this to the new arguments. @@ -83,7 +83,7 @@ def ddmin(remap_lines, temp_prefix, pssm, ruby_script): return header + remap_lines def compare_conseqs(txtfilename, ruby_script, pssm): - with open(txtfilename, 'rU') as remap_file: + with open(txtfilename, 'r') as remap_file: remap_lines = remap_file.readlines() simple_prefix = os.path.splitext(txtfilename)[0] + '_simple' if test(remap_lines, simple_prefix, pssm, ruby_script) != 'PASS': diff --git a/micall/utils/sample_fastq.py b/micall/utils/sample_fastq.py index f1df838b2..35d2aa56e 100755 --- a/micall/utils/sample_fastq.py +++ b/micall/utils/sample_fastq.py @@ -8,10 +8,10 @@ def parse_args(): parser = argparse.ArgumentParser( description="Randomly sample reads from FASTQ files for quick processing.") parser.add_argument('fastq1', - type=argparse.FileType('rU'), + type=argparse.FileType('r'), help='original FASTQ file of forward reads') parser.add_argument('fastq2', - type=argparse.FileType('rU'), + type=argparse.FileType('r'), help='original FASTQ file of reverse reads') parser.add_argument('short_fastq1', type=argparse.FileType('w'), diff --git a/micall/utils/sample_sheet_parser.py b/micall/utils/sample_sheet_parser.py index 45ce675dd..f6edfec57 100755 --- a/micall/utils/sample_sheet_parser.py +++ b/micall/utils/sample_sheet_parser.py @@ -179,9 +179,12 @@ def sample_sheet_parser(handle): samp, proj, val = None, None, None if sample_sheet_version == 1: sj, val = elem.split(':') - samp, proj = sj.split(project_delimiter_v1) + components = sj.split(project_delimiter_v1) + samp, proj = (project_delimiter_v1.join(components[:-1]), components[-1]) elif sample_sheet_version == 2: - samp, proj, val = elem.split(project_delimiter_v2) + components = elem.split(project_delimiter_v2) + samp, proj, val = (project_delimiter_v2.join(components[:-2]), + components[-2], components[-1]) if samp == entry['sample'] and proj == entry['project']: if name == 'Research': diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..a6e44d0c3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,302 @@ +[project] +name = "micall" +version = "7.17.0" +description = "Pipeline for processing FASTQ data from an Illumina MiSeq to genotype human RNA viruses like HIV and hepatitis C." +authors = [ + { name = "British Columbia Centre for Excellence in HIV/AIDS", email = "vmysak@bccfe.ca" }, +] +license = {text = "AGPL-3.0"} +license-files = { paths = ["LICENSE.txt"] } +readme = "README.md" +classifiers = [ + "License :: OSI Approved :: GNU Affero General Public License v3", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Operating System :: OS Independent", + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering :: Bio-Informatics", +] + +dependencies = [ + # Requirements for running the bare pipeline under Kive. + "pytz==2024.2", + "biopython==1.84", + "gotoh @ git+https://github.com/cfe-lab/gotoh.git@v0.3.0#egg=gotoh&subdirectory=alignment/gotoh", + "pyvdrm @ git+https://github.com/cfe-lab/pyvdrm.git@v0.3.2", + "numpy==2.1.3", + "scipy==1.14.1", + "matplotlib==3.9.2", + "cutadapt==4.9", + "python-Levenshtein==0.26.1", + "PyYAML==6.0.2", + "reportlab==4.2.5", + "pysam==0.22.1", + "genetracks @ git+https://github.com/cfe-lab/genetracks.git@v0.4", + "mappy==2.28", + "drawsvg==2.4.0", + "cairosvg==2.7.1", + "aligntools==1.2.1", +] + +[project.optional-dependencies] +test = [ + # Dependencies required for running the test suite + "pytest==8.3.3", + "coverage==7.6.4", + "pandas==2.2.3", + "seaborn==0.13.2", + "ete3", + # For reading HCV rules from an Excel file. + "openpyxl==3.1.5", + + # Also includes watcher's dependencies. + "kiveapi @ git+https://github.com/cfe-lab/Kive.git@v0.15#egg=kiveapi&subdirectory=api", + "requests==2.32.3", +] +dev = [ + # Dependencies required for development (linting, type checking, etc.) + "ruff==0.7.2", + "mypy==1.13.0", + "mypy-extensions==1.0.0", + "gprof2dot==2024.6.6", + "codecov==2.1.13", # For reporting the code coverage. +] +denovo = [ + # Requirements for running De-Novo pipeline (only problematic ones). + "iva @ git+https://github.com/cfe-lab/iva.git@v1.1.1", +] +watcher = [ + # Requirements for running the MISEQ_MONITOR.py script + "kiveapi @ git+https://github.com/cfe-lab/Kive.git@v0.15#egg=kiveapi&subdirectory=api", + "requests==2.32.3", +] +basespace = [ + # Requirements for running the micall_basespace.py script + "requests==2.32.3", +] + +[project.scripts] +micall = "micall.main:cli" + +[tool.hatch.build] +include = [ + "micall/", +] +exclude = [ + "micall/tests/", +] +skip-excluded-dirs = true + +[project.urls] +homepage = "https://cfe-lab.github.io/MiCall" +repository = "https://github.com/cfe-lab/MiCall" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.pytest.ini_options] +markers = [ + "iva: test requires IVA (deselect with '-m \"not iva\"')" +] + +[tool.ruff.lint.per-file-ignores] +# TODO: resolve these warnings insteads of ignoring them. + +# Syntax error: ignore unexpected token in this file +"micall/utils/make_hcv_genotype_refs.py" = ["E999"] + +# File with unused imports and local variable assignments +"micall/core/plot_simple.py" = ["F401", "F841"] + +# Ignore bare except usage in project configuration +"micall/core/project_config.py" = ["E722"] + +# Unused import in remap module +"micall/core/remap.py" = ["F401"] + +# Do not compare types and bare excepts in pssm_lib module +"micall/g2p/pssm_lib.py" = ["E722", "E721"] + +# F-string errors without placeholders in tests +"micall/tests/svg_differ.py" = ["F541"] + +# Unused import in test_aln2counts_report module +"micall/tests/test_aln2counts_report.py" = ["F401"] + +# Multiple issues in test_consensus_aligner: unused imports, multi-statement line, and unused local variable +"micall/tests/test_consensus_aligner.py" = ["F401", "E701", "F841"] + +# Unused import in test_primer_tracker module +"micall/tests/test_primer_tracker.py" = ["F401"] + +# F-string errors without placeholders in test_remap module +"micall/tests/test_remap.py" = ["F541"] + +# Unused import in test_sample module +"micall/tests/test_sample.py" = ["F401"] + +# Unused imports, undefined name, and type comparison in aln2counts_simplify module +"micall/utils/aln2counts_simplify.py" = ["F401", "F821", "E721"] + +# Unused local variable in compare_mapping module +"micall/utils/compare_mapping.py" = ["F841"] + +# Multiple issues in conseq_compare: unused sys, redefinitions, and unused local variables +"micall/utils/conseq_compare.py" = ["F811", "F401", "F841"] + +# Comparisons to None should use `is` or `is not` in dd module +"micall/utils/dd.py" = ["E711"] + +# Unused imports and local variable in find_reads_in_sam module +"micall/utils/find_reads_in_sam.py" = ["F401", "F841"] + +# F-string errors without placeholders in primer_tracker module +"micall/utils/primer_tracker.py" = ["F541"] + +# F-string errors without placeholders in ref_aligner module +"micall/utils/ref_aligner.py" = ["F541"] + +# Unused local variable in release_test_compare script +"release_test_compare.py" = ["F841"] + +[tool.mypy] +files = ["micall/"] + +exclude = '''(?x) + ^micall/core/aln2counts[.]py$| + ^micall/core/amplicon_finder[.]py$| + ^micall/core/cascade_report[.]py$| + ^micall/core/consensus_builder[.]py$| + ^micall/core/coverage_plots[.]py$| + ^micall/core/plot_simple[.]py$| + ^micall/core/prelim_map[.]py$| + ^micall/core/remap[.]py$| + ^micall/core/sam2aln[.]py$| + ^micall/core/trim_fastqs[.]py$| + ^micall/drivers/run_info[.]py$| + ^micall/drivers/sample_group[.]py$| + ^micall/drivers/sample[.]py$| + ^micall/g2p/fastq_g2p[.]py$| + ^micall/g2p/pssm_lib[.]py$| + ^micall/monitor/find_groups[.]py$| + ^micall/monitor/kive_watcher[.]py$| + ^micall/monitor/qai_helper[.]py$| + ^micall/monitor/sample_watcher[.]py$| + ^micall/monitor/update_qai[.]py$| + ^micall/monitor/micall_watcher[.]py$| + ^micall/resistance/asi_algorithm[.]py$| + ^micall/resistance/genreport[.]py$| + ^micall/resistance/pdfreport[.]py$| + ^micall/resistance/resistance[.]py$| + ^micall/tests/.*| + ^micall/utils/aln2counts_simplify[.]py$| + ^micall/utils/amplicon_finder[.]py$| + ^micall/utils/big_counter[.]py$| + ^micall/utils/compare_454_samples[.]py$| + ^micall/utils/compare_mapping[.]py$| + ^micall/utils/concordance_evaluation[.]py$| + ^micall/utils/conseq_compare[.]py$| + ^micall/utils/contig_blaster[.]py$| + ^micall/utils/contig_counts[.]py$| + ^micall/utils/contig_summary[.]py$| + ^micall/utils/convert_primers_cutadapt[.]py$| + ^micall/utils/coverage_data[.]py$| + ^micall/utils/dd[.]py$| + ^micall/utils/denovo_simplify[.]py$| + ^micall/utils/externals[.]py$| + ^micall/utils/fetch_sequences[.]py$| + ^micall/utils/find_by_coverage[.]py$| + ^micall/utils/find_chimera[.]py$| + ^micall/utils/find_missing_samples[.]py$| + ^micall/utils/find_reads_in_sam[.]py$| + ^micall/utils/genreport_rerun[.]py$| + ^micall/utils/hcv_reference_tree[.]py$| + ^micall/utils/hcv_rules_display[.]py$| + ^micall/utils/hcv_rules_import[.]py$| + ^micall/utils/make_fda_refs[.]py$| + ^micall/utils/make_hcv_genotype_refs[.]py$| + ^micall/utils/primer_tracker[.]py$| + ^micall/utils/probe_finder[.]py$| + ^micall/utils/projects_dump[.]py$| + ^micall/utils/project_seeds_from_compendium[.]py$| + ^micall/utils/projects_upload[.]py$| + ^micall/utils/ref_aligner[.]py$| + ^micall/utils/reference_distances[.]py$| + ^micall/utils/remap_fastq_simplify[.]py$| + ^micall/utils/sam_g2p_simplify[.]py$| + ^micall/utils/sample_fastq[.]py$| + ^micall/utils/sample_project_summary[.]py$| + ^micall/utils/samples_from_454[.]py$| + ^micall/utils/sample_sheet_parser[.]py$| + ^micall/utils/scan_run_folders[.]py$| + ^micall/utils/seed_alignments[.]py$| + ^micall/utils/spring_beads[.]py$| + ^micall/utils/v3loop_alignment_scores/plot[.]py$| + ^micall/utils/release_test_compare[.]py$| + ^micall/utils/release_test_microtest[.]py$| + ^micall/utils/release_test_setup[.]py$| + ^micall/utils/micall_kive[.]py$| + ^micall/utils/micall_kive_resistance[.]py$| + ^micall/utils/micall_docker[.]py$ +''' + +check_untyped_defs = true + +[[tool.mypy.overrides]] +module = "micall.core.plot_contigs" +check_untyped_defs = false + +[[tool.mypy.overrides]] +module = "mappy" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "gotoh" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "yaml" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "Bio" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "Bio.Seq" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "Bio.Blast.Applications" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "Bio.SeqRecord" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "genetracks" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "genetracks.elements" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "drawsvg" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "matplotlib" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "matplotlib.colors" +ignore_missing_imports = true diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index e556e61ca..000000000 --- a/pytest.ini +++ /dev/null @@ -1,3 +0,0 @@ -[pytest] -markers = - iva: test requires IVA (deselect with '-m "not iva"') diff --git a/requirements-basespace.txt b/requirements-basespace.txt deleted file mode 100644 index 67a1f20c2..000000000 --- a/requirements-basespace.txt +++ /dev/null @@ -1,3 +0,0 @@ -# Requirements for running the micall_basespace.py script --r requirements.txt -requests==2.31.0 diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index be9f95100..000000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Requirements for running the tests, doing development, and using utilities --r requirements-test.txt --r requirements-watcher.txt -# Used for plotting profiling results. -gprof2dot==2022.7.29 diff --git a/requirements-test.txt b/requirements-test.txt deleted file mode 100644 index d0ab5b830..000000000 --- a/requirements-test.txt +++ /dev/null @@ -1,12 +0,0 @@ -# Requirements for running the tests - --r requirements.txt --r requirements-watcher.txt -pytest==7.4.0 -coverage==7.2.7 -pandas==2.0.2 -seaborn==0.12.2 -ete3 - -# For reading HCV rules from an Excel file. -openpyxl==3.1.2 diff --git a/requirements-watcher.txt b/requirements-watcher.txt deleted file mode 100644 index 65613ba4c..000000000 --- a/requirements-watcher.txt +++ /dev/null @@ -1,3 +0,0 @@ -# Requirements for running the MISEQ_MONITOR.py script -git+https://github.com/cfe-lab/Kive.git@v0.15#egg=kiveapi&subdirectory=api -requests==2.31.0 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 3da10e7cf..000000000 --- a/requirements.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Requirements for running the bare pipeline under Kive or BaseSpace -pytz==2023.3 -git+https://github.com/cfe-lab/iva.git@v1.1.1 -biopython==1.81 -git+https://github.com/cfe-lab/gotoh.git@v0.3.0#egg=gotoh&subdirectory=alignment/gotoh -git+https://github.com/cfe-lab/pyvdrm.git@v0.3.2 -numpy==1.24.3 -scipy==1.10.1 -matplotlib==3.7.2 -cutadapt==3.7 -python-Levenshtein==0.21.1 -PyYAML==6.0.1 -reportlab==4.0.4 -pysam==0.21.0 -git+https://github.com/cfe-lab/genetracks.git@v0.4 -mappy==2.17 -drawsvg==2.2.0 -cairosvg==2.7.0 \ No newline at end of file From e229317c8ee7a34366ff240b69ba82ab31bacc5e Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Wed, 6 Nov 2024 14:16:42 -0800 Subject: [PATCH 20/23] Replace IVA by Haploflow This change does not handle `merged_contigs_csv`. --- .github/workflows/build-and-test.yml | 29 +++++---------- Dockerfile | 37 +++++-------------- README.md | 2 +- Singularity | 35 ++++-------------- micall/core/denovo.py | 53 ++++++++++++++++------------ pyproject.toml | 5 +-- 6 files changed, 58 insertions(+), 103 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c2d870fe0..30fb68015 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -15,27 +15,16 @@ jobs: - name: Run apt update run: sudo apt-get update - - name: Install IVA assembler dependencies + - name: Install Haploflow run: | - sudo apt-get install -qq zlib1g-dev libncurses5-dev libncursesw5-dev mummer ncbi-blast+ - cd ~/bin - wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc - wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc_dump - # Server doesn't support HTTPS, so check for changed files. - echo "\ - db1935884aec2d23d4d623ff85eb4eae8d7a946c9ee0c33ea1818215c40d3099 kmc - 34a97db2dab5fdae0276d2589c940142813e9cd87ae10e5e2dd37ed3545b4436 kmc_dump" | sha256sum --check - chmod +x kmc kmc_dump - wget -q https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2 - tar -xf samtools-1.3.1.tar.bz2 --no-same-owner --bzip2 - cd samtools-1.3.1 - ./configure --prefix=$HOME - make - make install - cd ~ - wget -q https://downloads.sourceforge.net/project/smalt/smalt-0.7.6-bin.tar.gz - tar -xzf smalt-0.7.6-bin.tar.gz - ln -s ~/smalt-0.7.6-bin/smalt_x86_64 ~/bin/smalt + sudo apt-get update + sudo apt-get install -y build-essential git ronn + cd /opt/ + git clone https://github.com/hzi-bifo/Haploflow + cd Haploflow + git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50 + sh build.sh + sudo ln -s /opt/Haploflow/build/haploflow ~/bin/haploflow - name: Install Rust and merge-mates run: | diff --git a/Dockerfile b/Dockerfile index 6c65f2eb2..b2edb1e77 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,34 +54,15 @@ RUN wget -q -O bowtie2.zip https://github.com/BenLangmead/bowtie2/releases/downl ENV PATH $PATH:/opt/bowtie2 -## Installing IVA dependencies -RUN apt-get install -q -y zlib1g-dev libncurses5-dev libncursesw5-dev && \ - cd /bin && \ - wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc && \ - wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc_dump && \ - chmod +x kmc kmc_dump && \ - cd /opt && \ - wget -q https://sourceforge.net/projects/mummer/files/mummer/3.23/MUMmer3.23.tar.gz && \ - tar -xzf MUMmer3.23.tar.gz --no-same-owner && \ - cd MUMmer3.23 && \ - make --quiet install && \ - rm -r docs src ../MUMmer3.23.tar.gz && \ - ln -s /opt/MUMmer3.23/nucmer \ - /opt/MUMmer3.23/delta-filter \ - /opt/MUMmer3.23/show-coords \ - /bin && \ - cd /opt && \ - wget -q https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2 && \ - tar -xf samtools-1.3.1.tar.bz2 --no-same-owner --bzip2 && \ - cd samtools-1.3.1 && \ - ./configure --quiet --prefix=/ && \ - make --quiet && \ - make --quiet install && \ - cd /opt && \ - rm -rf samtools-1.3.1* && \ - wget -q http://downloads.sourceforge.net/project/smalt/smalt-0.7.6-bin.tar.gz && \ - tar -xzf smalt-0.7.6-bin.tar.gz --no-same-owner && \ - ln -s /opt/smalt-0.7.6-bin/smalt_x86_64 /bin/smalt +## Install Haploflow +RUN apt-get update && \ + apt-get install -y build-essential sudo git ronn cmake && \ + cd /opt/ && \ + git clone https://github.com/hzi-bifo/Haploflow && \ + cd Haploflow && \ + git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50 && \ + yes | sh build.sh && \ + ln -s /opt/Haploflow/build/haploflow /bin/haploflow ## Install dependencies for genetracks/drawsvg RUN apt-get install -q -y libcairo2-dev diff --git a/README.md b/README.md index a928002f5..2482dcc1e 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Requests is distributed under the Apache 2.0 license. Python 3 is distributed under the [Python 3 license][python]. -Bowtie2, IVA, and Python-Levenshtein are distributed under the GNU General +Bowtie2, Haploflow, and Python-Levenshtein are distributed under the GNU General Public License (GPL). Matplotlib is distributed under the [Matplotlib license][matplotlib]. diff --git a/Singularity b/Singularity index ef346e276..6db037015 100644 --- a/Singularity +++ b/Singularity @@ -62,34 +62,13 @@ From: python:3.11 ln -s /opt/bowtie2-2.2.8/ /opt/bowtie2 rm bowtie2.zip - echo ===== Installing IVA dependencies ===== >/dev/null - apt-get install -q -y zlib1g-dev libncurses5-dev libncursesw5-dev - cd /bin - wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc - wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc_dump - chmod +x kmc kmc_dump - cd /opt - wget -q https://sourceforge.net/projects/mummer/files/mummer/3.23/MUMmer3.23.tar.gz - tar -xzf MUMmer3.23.tar.gz --no-same-owner - cd MUMmer3.23 - make --quiet install - rm -r docs src ../MUMmer3.23.tar.gz - ln -s /opt/MUMmer3.23/nucmer \ - /opt/MUMmer3.23/delta-filter \ - /opt/MUMmer3.23/show-coords \ - /bin - cd /opt - wget -q https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2 - tar -xf samtools-1.3.1.tar.bz2 --no-same-owner --bzip2 - cd samtools-1.3.1 - ./configure --quiet --prefix=/ - make --quiet - make --quiet install - cd /opt - rm -rf samtools-1.3.1* - wget -q http://downloads.sourceforge.net/project/smalt/smalt-0.7.6-bin.tar.gz - tar -xzf smalt-0.7.6-bin.tar.gz --no-same-owner - ln -s /opt/smalt-0.7.6-bin/smalt_x86_64 /bin/smalt + echo ===== Installing Haploflow ===== >/dev/null + apt-get install -q -y libboost-all-dev build-essential sudo git ronn cmake + cd /opt/ + git clone https://github.com/hzi-bifo/Haploflow + cd Haploflow + git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50 + yes | sh build.sh echo ===== Installing Python packages ===== >/dev/null # Install dependencies for genetracks/drawsvg diff --git a/micall/core/denovo.py b/micall/core/denovo.py index 5e32f6acb..693b41bbd 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -2,7 +2,6 @@ import logging import os from typing import Optional, TextIO, cast, BinaryIO -from csv import DictReader from datetime import datetime from glob import glob from shutil import rmtree, copyfileobj @@ -10,12 +9,8 @@ import subprocess from tempfile import mkdtemp -from Bio import SeqIO -from Bio.Seq import Seq -from Bio.SeqRecord import SeqRecord - -IVA = "iva" +HAPLOFLOW = "haploflow" logger = logging.getLogger(__name__) @@ -40,6 +35,10 @@ def denovo(fastq1_path: str, amplicon reads """ + if merged_contigs_csv is not None: + # TODO: implement this. + logger.error("Haploflow implementation does not support contig extensions yet.") + old_tmp_dirs = glob(os.path.join(work_dir, 'assembly_*')) for old_tmp_dir in old_tmp_dirs: rmtree(old_tmp_dir, ignore_errors=True) @@ -55,26 +54,36 @@ def denovo(fastq1_path: str, '--interleave', '-o', joined_path], check=True) - iva_out_path = os.path.join(tmp_dir, 'iva_out') - contigs_fasta_path = os.path.join(iva_out_path, 'contigs.fasta') - iva_args = [IVA, '--fr', joined_path, '-t', '2'] - if merged_contigs_csv is not None: - seeds_fasta_path = os.path.join(tmp_dir, 'seeds.fasta') - with open(seeds_fasta_path, 'w') as seeds_fasta: - SeqIO.write((SeqRecord(Seq(row['contig']), f'seed-{i}', '', '') - for i, row in enumerate(DictReader(merged_contigs_csv))), - seeds_fasta, - 'fasta') - seeds_size = seeds_fasta.tell() - if seeds_size > 0: - iva_args.extend(['--contigs', seeds_fasta_path, '--make_new_seeds']) - iva_args.append(iva_out_path) + + haplo_args = {'long': 0, + 'filter': 500, + 'thres': -1, + 'strict': 5, + 'error': 0.02, + 'kmer': 41, + 'merge': False, + 'scaffold': False, + 'patch': False, + 'ref': None, + 'RP': False, + } + assembly_out_path = os.path.join(tmp_dir, 'haplo_out') + contigs_fasta_path = os.path.join(assembly_out_path, 'contigs.fa') + haplo_cmd = [HAPLOFLOW, + '--read-file', joined_path, + '--out', assembly_out_path, + '--k', str(haplo_args['kmer']), + '--error-rate', str(haplo_args['error']), + '--strict', str(haplo_args['strict']), + '--filter', str(haplo_args['filter']), + '--thres', str(haplo_args['thres']), + '--long', str(haplo_args['long'])] try: - subprocess.run(iva_args, check=True, stdout=PIPE, stderr=STDOUT) + subprocess.run(haplo_cmd, check=True, stdout=PIPE, stderr=STDOUT) except CalledProcessError as ex: output = ex.output and ex.output.decode('UTF8') if output != 'Failed to make first seed. Cannot continue\n': - logger.warning('iva failed to assemble.', exc_info=True) + logger.warning('Haploflow failed to assemble.', exc_info=True) logger.warning(output) with open(contigs_fasta_path, 'a'): pass diff --git a/pyproject.toml b/pyproject.toml index a6e44d0c3..4324e3eb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,10 +64,7 @@ dev = [ "gprof2dot==2024.6.6", "codecov==2.1.13", # For reporting the code coverage. ] -denovo = [ - # Requirements for running De-Novo pipeline (only problematic ones). - "iva @ git+https://github.com/cfe-lab/iva.git@v1.1.1", -] +denovo = [] watcher = [ # Requirements for running the MISEQ_MONITOR.py script "kiveapi @ git+https://github.com/cfe-lab/Kive.git@v0.15#egg=kiveapi&subdirectory=api", From 7f0e351d615df037fa11cb8ff2f2e8975e9ff69e Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Wed, 6 Nov 2024 13:45:56 -0800 Subject: [PATCH 21/23] Update comment that mentions dropped IVA --- micall/core/consensus_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/micall/core/consensus_builder.py b/micall/core/consensus_builder.py index f0762c68f..b8fac9443 100644 --- a/micall/core/consensus_builder.py +++ b/micall/core/consensus_builder.py @@ -30,7 +30,7 @@ def get_consensus(self): def get_consensus_for_length(self, length): nucleotides = self.length_nucleotides[length] - # IVA can't handle seeds with mixtures, so always avoid them. + # Many assemblers (such as IVA) can't handle seeds with mixtures, so always avoid them. return ''.join(nucleotides[i].get_consensus(FIRST_CUTOFF) for i in range(length)) From e03ab9749f9548e60a4674f1ce59bae709c84738 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Wed, 6 Nov 2024 13:46:32 -0800 Subject: [PATCH 22/23] Do not mention IVA in pyproject.toml --- pyproject.toml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4324e3eb1..3556f8314 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,11 +98,6 @@ build-backend = "hatchling.build" [tool.hatch.metadata] allow-direct-references = true -[tool.pytest.ini_options] -markers = [ - "iva: test requires IVA (deselect with '-m \"not iva\"')" -] - [tool.ruff.lint.per-file-ignores] # TODO: resolve these warnings insteads of ignoring them. From f6e6ec214d5173af6d9f1ce31931dbb56045856f Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Thu, 7 Nov 2024 11:17:51 -0800 Subject: [PATCH 23/23] Ensure that output file is created in denovo.py --- micall/core/denovo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/micall/core/denovo.py b/micall/core/denovo.py index 693b41bbd..42a6eefd6 100644 --- a/micall/core/denovo.py +++ b/micall/core/denovo.py @@ -69,6 +69,9 @@ def denovo(fastq1_path: str, } assembly_out_path = os.path.join(tmp_dir, 'haplo_out') contigs_fasta_path = os.path.join(assembly_out_path, 'contigs.fa') + with open(contigs_fasta_path, 'w'): + pass + haplo_cmd = [HAPLOFLOW, '--read-file', joined_path, '--out', assembly_out_path, @@ -85,8 +88,6 @@ def denovo(fastq1_path: str, if output != 'Failed to make first seed. Cannot continue\n': logger.warning('Haploflow failed to assemble.', exc_info=True) logger.warning(output) - with open(contigs_fasta_path, 'a'): - pass with open(contigs_fasta_path) as reader: copyfileobj(cast(BinaryIO, reader), fasta)