From 114f8aff91498c00c626b3ceab4f370830e06e8b Mon Sep 17 00:00:00 2001 From: Haibao Tang Date: Tue, 7 Jan 2025 18:20:47 -0800 Subject: [PATCH] Dev (#739) * update help string * Add --tandem_Nmax * only keep lower and upper version --- .github/workflows/build.yml | 2 +- src/jcvi/compara/blastfilter.py | 2 +- src/jcvi/compara/catalog.py | 15 +++++++++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 65af5d3e..1f74df33 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,7 +9,7 @@ jobs: max-parallel: 4 matrix: os: [ubuntu-latest, macos-latest] - python-version: ["3.8", "3.10", "3.12"] + python-version: ["3.8", "3.12"] steps: - uses: actions/checkout@v4 diff --git a/src/jcvi/compara/blastfilter.py b/src/jcvi/compara/blastfilter.py index 6b53324e..7a4f102d 100755 --- a/src/jcvi/compara/blastfilter.py +++ b/src/jcvi/compara/blastfilter.py @@ -307,7 +307,7 @@ def main(args): type=float, default=0.7, help="retain hits that have good bitscore. a value of 0.5 means " - "keep all values that are 50% or greater of the best hit. " + "keep all values that are 50%% or greater of the best hit. " "higher is more stringent", ) p.add_argument("--exclude", help="Remove anchors from a previous run") diff --git a/src/jcvi/compara/catalog.py b/src/jcvi/compara/catalog.py index fe2d4247..f17ba088 100644 --- a/src/jcvi/compara/catalog.py +++ b/src/jcvi/compara/catalog.py @@ -199,8 +199,6 @@ def enrich(args): (k, [x for x in leftover if info[x] == k]) for k in missing_taxa ) - # print genes, leftover - # print leftover_sorted_by_taxa solutions = [] for solution in product(*leftover_sorted_by_taxa.values()): score = sum(weights.get((a, b), 0) for a in solution for b in genes) @@ -619,6 +617,12 @@ def ortholog(args): action="store_true", help="Run in full 1x1 mode, including blocks and RBH", ) + p.add_argument( + "--tandem_Nmax", + type=int, + default=10, + help="merge tandem genes within distance", + ) p.add_argument("--cscore", default=0.7, type=float, help="C-score cutoff") p.add_argument( "--dist", default=20, type=int, help="Extent of flanking regions to search" @@ -714,8 +718,11 @@ def ortholog(args): filtered_last = last + ".filtered" if need_update(last, filtered_last, warn=True): - # If we are doing filtering based on another file then we don't run cscore anymore - dargs = [last, "--cscore={}".format(ccscore)] + dargs = [ + last, + f"--cscore={ccscore}", + f"--tandem_Nmax={opts.tandem_Nmax}", + ] if exclude: dargs += ["--exclude={}".format(exclude)] if opts.no_strip_names: