From 88826c1a78f3c26f8049cf4925f2d2893a998019 Mon Sep 17 00:00:00 2001 From: Glenn Hickey Date: Tue, 12 Mar 2024 19:44:19 -0400 Subject: [PATCH] prep release v2.8.0 --- BIN-INSTALL.md | 10 +++++----- ReleaseNotes.md | 14 ++++++++++++-- doc/progressive.md | 4 ++-- setup.py | 2 +- src/cactus/shared/common.py | 2 +- 5 files changed, 21 insertions(+), 11 deletions(-) diff --git a/BIN-INSTALL.md b/BIN-INSTALL.md index cb7447d6c..43e8ef72a 100644 --- a/BIN-INSTALL.md +++ b/BIN-INSTALL.md @@ -6,17 +6,17 @@ pre-compile binary, static linked distribution. ## Extracting If you have not already extract the distribution and cd into the cactus directory: ``` -tar -xzf cactus-bin-v2.7.2.tar.gz -cd cactus-bin-v2.7.2 +tar -xzf cactus-bin-v2.8.0.tar.gz +cd cactus-bin-v2.8.0 ``` ## Setup To build a python virtualenv and activate, do the following steps. This requires Python version >= 3.7 (so Ubuntu 18.04 users should use `-p python3.8` below): ``` -virtualenv -p python3 venv-cactus-v2.7.2 -printf "export PATH=$(pwd)/bin:\$PATH\nexport PYTHONPATH=$(pwd)/lib:\$PYTHONPATH\n" >> venv-cactus-v2.7.2/bin/activate -source venv-cactus-v2.7.2/bin/activate +virtualenv -p python3 venv-cactus-v2.8.0 +printf "export PATH=$(pwd)/bin:\$PATH\nexport PYTHONPATH=$(pwd)/lib:\$PYTHONPATH\n" >> venv-cactus-v2.8.0/bin/activate +source venv-cactus-v2.8.0/bin/activate python3 -m pip install -U setuptools pip wheel python3 -m pip install -U . python3 -m pip install -U -r ./toil-requirement.txt diff --git a/ReleaseNotes.md b/ReleaseNotes.md index 13237e7ca..8d4303bd1 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,4 +1,14 @@ -# Release 2.7.2 2023-02-23 +# Release 2.8.0 2024-03-13 + +This release significantly changes the preprocessor step of Progressive Cactus in order to be more robust and efficient in the presence of unmasked repeats, something that seems more prevalent with newer, T2T assemblies. + +- Replace lastz repeatmasking with REepeat Detector ([RED](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-015-0654-5#Sec41)) in the Progressive Cactus preprocessor. RED is more sensitive and orders of magnitude faster than the old lastz masking pipeline. Crucially, it is able to mask regions that would slip by RepeatMasker/WindowMasker/lastz in new T2T ape genomes that would otherwise break Cactus downstream. Tests so far show this change to make Cactus much faster and more robust. The old lastz pipeline can still be toggled back on in the config. +- Delete many unneeded files that previously collected in the jobstore directory until the end of execution. This was a particular issue in large `cactus-pangenome` runs where the jobstore would creep up to several terabytes for HPRC-sized inputs. +- No longer require manually editing the blast chunksize in the config when running on Slurm (to reduce the number of jobs). It's now scaled up automatically on slurm environments (by a factor controlled in the config). +- Fix bug introduced in last release where Cactus would not work on AWS/MESOS clusters unless `--defaultMemory` and `--maxMemory` options were specified (and in bytes). +- Update to the latest `taffy` and `vg` + +# Release 2.7.2 2024-02-23 This release improves MAF output, along with some other fixes @@ -8,7 +18,7 @@ This release improves MAF output, along with some other fixes - Duplicating filtering now done automatically in `cactus-maf2bigmaf`. - Disable support for multifurcations (aka polytomies or internal nodes with more than 2 children) in Progressive Cactus. I'm doing this because I got spooked by a drop in coverage I noticed recently in a 4-child alignment. This regression appears to be linked to the new PAF chaining logic that's been added over the past several months. Until that's resolved, Cactus will exit with an error if it sees degree > 2 in the tree. This behaviour can, however, be overridden in the XML configuration file. -# Release 2.7.1 2023-01-19 +# Release 2.7.1 2024-01-19 This release adds some options to tune outgroup selection, as well as updates many included dependencies and tools diff --git a/doc/progressive.md b/doc/progressive.md index cb35352cf..cd8993f0e 100644 --- a/doc/progressive.md +++ b/doc/progressive.md @@ -199,12 +199,12 @@ The Cactus Docker image contains everything you need to run Cactus (python envir ``` wget -q https://raw.githubusercontent.com/ComparativeGenomicsToolkit/cactus/master/examples/evolverMammals.txt -O evolverMammals.txt -docker run -v $(pwd):/data --rm -it quay.io/comparative-genomics-toolkit/cactus:v2.7.2 cactus /data/jobStore /data/evolverMammals.txt /data/evolverMammals.hal +docker run -v $(pwd):/data --rm -it quay.io/comparative-genomics-toolkit/cactus:v2.8.0 cactus /data/jobStore /data/evolverMammals.txt /data/evolverMammals.hal ``` Or you can proceed interactively by running ``` -docker run -v $(pwd):/data --rm -it quay.io/comparative-genomics-toolkit/cactus:v2.7.2 bash +docker run -v $(pwd):/data --rm -it quay.io/comparative-genomics-toolkit/cactus:v2.8.0 bash cactus /data/jobStore /data/evolverMammals.txt /data/evolverMammals.hal ``` diff --git a/setup.py b/setup.py index a275391f4..734cf98f5 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def run(self): setup( name = "Cactus", - version = "2.7.2", + version = "2.8.0", author = "Benedict Paten", package_dir = {'': 'src'}, packages = find_packages(where='src'), diff --git a/src/cactus/shared/common.py b/src/cactus/shared/common.py index c8382f8a8..dc8877656 100644 --- a/src/cactus/shared/common.py +++ b/src/cactus/shared/common.py @@ -318,7 +318,7 @@ def getDockerTag(gpu=False): return "latest" else: # must be manually kept current with each release - return 'v2.7.2' + ('-gpu' if gpu else '') + return 'v2.8.0' + ('-gpu' if gpu else '') def getDockerImage(gpu=False): """Get fully specified Docker image name."""