From beaf35d4232b56497b295ddbe246318e783fe110 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Thu, 24 Oct 2024 15:35:07 -0700 Subject: [PATCH 1/3] Optimize docker commands order --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index af327bbd0..9ba16134e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -85,10 +85,10 @@ RUN apt-get install -q -y zlib1g-dev libncurses5-dev libncursesw5-dev && \ ## Install dependencies for genetracks/drawsvg RUN apt-get install -q -y libcairo2-dev +RUN pip install --upgrade pip COPY . /opt/micall/ -RUN pip install --upgrade pip RUN pip install /opt/micall[basespace] RUN micall make_blast_db From f939791e02241e3a711e1986f1b4414b4f8e7a27 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 25 Oct 2024 13:25:05 -0700 Subject: [PATCH 2/3] Specify aligntools version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6e3db0585..8e42bf72f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "mappy==2.17", "drawsvg==2.3.0", "cairosvg==2.7.1", - "aligntools", + "aligntools==1.0.8", ] [project.optional-dependencies] From 8fb0a334b31be972a76f346512b523280a8594f4 Mon Sep 17 00:00:00 2001 From: Vitaliy Mysak Date: Fri, 25 Oct 2024 13:40:54 -0700 Subject: [PATCH 3/3] Update aligntools --- micall/core/contig_stitcher.py | 32 ++++++++++++++++---------------- pyproject.toml | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/micall/core/contig_stitcher.py b/micall/core/contig_stitcher.py index 31094ee76..723ff10ec 100644 --- a/micall/core/contig_stitcher.py +++ b/micall/core/contig_stitcher.py @@ -12,13 +12,13 @@ import logging from fractions import Fraction from operator import itemgetter -from aligntools import connect_cigar_hits, CigarHit +from aligntools import CigarHit, connect_nonoverlapping_cigar_hits, drop_overlapping_cigar_hits from micall.core.project_config import ProjectConfig from micall.core.plot_contigs import plot_stitcher_coverage from micall.utils.contig_stitcher_context import context, StitcherContext from micall.utils.contig_stitcher_contigs import GenotypedContig, AlignedContig -from micall.utils.alignment import Alignment, align_consensus +from micall.utils.alignment import align_consensus import micall.utils.contig_stitcher_events as events @@ -163,41 +163,41 @@ def align_to_reference(contig: GenotypedContig) -> Iterable[GenotypedContig]: yield contig return - def init_hit(x: Alignment) -> Tuple[CigarHit, Literal["forward", "reverse"]]: - cigar = x.to_cigar_hit() - return cigar, "forward" if x.strand == 1 else "reverse" - alignments, _algo = align_consensus(contig.ref_seq, contig.seq) - hits_array = [init_hit(x) for x in alignments] + hits = [x.to_cigar_hit() for x in alignments] + strands: List[Literal["forward", "reverse"]] = ["forward" if x.strand == 1 else "reverse" for x in alignments] - for i, (hit, strand) in enumerate(hits_array): + for i, (hit, strand) in enumerate(zip(hits, strands)): log(events.InitialHit(contig, i, hit, strand)) - if not hits_array: + if not hits: log(events.ZeroHits(contig)) yield contig return - if len(set(strand for hit, strand in hits_array)) > 1: + if len(set(strands)) > 1: log(events.StrandConflict(contig)) yield contig return - strand = hits_array[0][1] + strand = strands[0] if strand == "reverse": rc = str(Seq.Seq(contig.seq).reverse_complement()) original_contig = contig new_contig = replace(contig, seq=rc) contig = new_contig - hits_array = [(replace(hit, q_st=len(rc)-hit.q_ei-1, q_ei=len(rc)-hit.q_st-1), strand) - for hit, strand in hits_array] + hits = [replace(hit, q_st=len(rc)-hit.q_ei-1, q_ei=len(rc)-hit.q_st-1) for hit in hits] log(events.ReverseComplement(original_contig, new_contig)) - for i, (hit, strand) in enumerate(hits_array): + for i, (hit, strand) in enumerate(zip(hits, strands)): log(events.InitialHit(contig, i, hit, strand)) - connected = connect_cigar_hits([hit for hit, strand in hits_array]) if hits_array else [] - log(events.HitNumber(contig, hits_array, connected)) + def quality(x: CigarHit): + return x.ref_length + + filtered = list(drop_overlapping_cigar_hits(hits, quality)) + connected = list(connect_nonoverlapping_cigar_hits(filtered)) + log(events.HitNumber(contig, list(zip(hits, strands)), connected)) for i, single_hit in enumerate(connected): query = replace(contig, name=None) diff --git a/pyproject.toml b/pyproject.toml index 8e42bf72f..b2a817763 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ dependencies = [ "mappy==2.17", "drawsvg==2.3.0", "cairosvg==2.7.1", - "aligntools==1.0.8", + "aligntools==1.1.1", ] [project.optional-dependencies]