From 3798044c86d3ca655a99b75332499a1bfe31347d Mon Sep 17 00:00:00 2001 From: Cecilia-Sensalari <57489957+Cecilia-Sensalari@users.noreply.github.com> Date: Tue, 6 Apr 2021 10:59:20 +0200 Subject: [PATCH] PAML installation from source (#7) * Change PAML installation (from source) * Paml instructions in installation.rst * Improve container docs (installation.rst) * Update comments in Dockerfile and Singularity file * Remove squared brakets in NF command * Rename Paralog --> Ortholog in ortholog TSV file * Remove redundant color argument in modeling.py * Update to tool name "ksrates" in the license * Rename "wgd" module to "wgd_ksrates" * Replace "wgd" with "wgd_ksrates" in container file * Update license (author and year) * Update installation.rst (link to singularity docs) * Add note in local installation about Windows users * Remove installation commands through bioconda --- .github/workflows/test_pipeline.yml | 1 + Dockerfile | 15 ++++- LICENSE | 6 +- Singularity | 17 ++++-- doc/source/installation.rst | 77 +++++++++++++------------ doc/source/usage.rst | 2 +- ksrates/fc_lognormal_mixture.py | 6 +- ksrates/fc_wgd.py | 9 +-- setup.py | 2 +- {wgd => wgd_ksrates}/__init__.py | 0 {wgd => wgd_ksrates}/alignment.py | 0 {wgd => wgd_ksrates}/blast_mcl.py | 0 {wgd => wgd_ksrates}/codeml.py | 0 {wgd => wgd_ksrates}/colinearity.py | 0 {wgd => wgd_ksrates}/diamond.py | 0 {wgd => wgd_ksrates}/ks_distribution.py | 0 {wgd => wgd_ksrates}/modeling.py | 2 +- {wgd => wgd_ksrates}/phy.py | 0 {wgd => wgd_ksrates}/pre.py | 0 {wgd => wgd_ksrates}/utils.py | 0 {wgd => wgd_ksrates}/viz.py | 0 21 files changed, 80 insertions(+), 57 deletions(-) rename {wgd => wgd_ksrates}/__init__.py (100%) rename {wgd => wgd_ksrates}/alignment.py (100%) rename {wgd => wgd_ksrates}/blast_mcl.py (100%) rename {wgd => wgd_ksrates}/codeml.py (100%) rename {wgd => wgd_ksrates}/colinearity.py (100%) rename {wgd => wgd_ksrates}/diamond.py (100%) rename {wgd => wgd_ksrates}/ks_distribution.py (100%) rename {wgd => wgd_ksrates}/modeling.py (99%) rename {wgd => wgd_ksrates}/phy.py (100%) rename {wgd => wgd_ksrates}/pre.py (100%) rename {wgd => wgd_ksrates}/utils.py (100%) rename {wgd => wgd_ksrates}/viz.py (100%) diff --git a/.github/workflows/test_pipeline.yml b/.github/workflows/test_pipeline.yml index 8ffa70f..2161924 100644 --- a/.github/workflows/test_pipeline.yml +++ b/.github/workflows/test_pipeline.yml @@ -40,4 +40,5 @@ jobs: nextflow run ../main.nf --config config_elaeis.txt -with-docker ksrates - name: Visualize output files + if: ${{ always() }} run: ls -l test/rate_adjustment/elaeis diff --git a/Dockerfile b/Dockerfile index 1bbc832..6df0b01 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,13 +6,22 @@ ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 ENV DEBIAN_FRONTEND=noninteractive +# Install PAML from source + +RUN apt-get install -y wget && wget http://abacus.gene.ucl.ac.uk/software/paml4.9j.tgz && \ + tar -xzf paml4.9j.tgz && cd paml4.9j/src && make -f Makefile && mv codeml /bin && cd / + +# Install Python3, wgd dependencies... + RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -yq install python3-pip python3-tk git curl \ - default-jdk build-essential mcl ncbi-blast+ muscle mafft prank fasttree phyml paml + default-jdk build-essential mcl ncbi-blast+ muscle mafft prank fasttree phyml + ADD /requirements.txt /install/requirements.txt ADD /setup.py /install/setup.py ADD /ksrates /install/ksrates -ADD /wgd /install/wgd +ADD /wgd_ksrates /install/wgd_ksrates ADD /README.md /install/README.md ADD /ksrates_cli.py /install/ksrates_cli.py -RUN python3 -m pip install /install \ No newline at end of file +# Install ksrates and requirements from requirements.txt +RUN python3 -m pip install /install diff --git a/LICENSE b/LICENSE index 0c8ea9b..4fe69c9 100644 --- a/LICENSE +++ b/LICENSE @@ -631,8 +631,8 @@ to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. - Ks Rate-Correction - Copyright (C) 2020 Cecilia SENSALARI + ksrates + Copyright (C) 2021 Evolutionary Systems Biology lab at VIB/UGent This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -652,7 +652,7 @@ Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: - Ks Rate-Correction Copyright (C) 2020 Cecilia SENSALARI + ksrates Copyright (C) 2021 Evolutionary Systems Biology lab at VIB/UGent This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. diff --git a/Singularity b/Singularity index 7c3d6b8..70d032d 100644 --- a/Singularity +++ b/Singularity @@ -2,7 +2,7 @@ Bootstrap: docker From: vibpsb/i-adhore %labels - AUTHOR cesen@psb.vib-ugent.be + AUTHOR cecilia.sensalari@psb.vib-ugent.be %environment export LC_ALL=C.UTF-8 @@ -13,13 +13,22 @@ From: vibpsb/i-adhore requirements.txt install/requirements.txt setup.py install/setup.py ksrates install/ksrates - wgd install/wgd + wgd_ksrates install/wgd_ksrates README.md install/README.md ksrates_cli.py install/ksrates_cli.py %post - # install python, git, etc. + + # Install PAML from source + + apt-get install -y wget && wget http://abacus.gene.ucl.ac.uk/software/paml4.9j.tgz && \ + tar -xzf paml4.9j.tgz && cd paml4.9j/src && make -f Makefile && mv codeml /bin && cd / + + # Install Python3, wgd dependencies... + apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -yq install python3-pip python3-tk git curl \ - default-jdk build-essential mcl ncbi-blast+ muscle mafft prank fasttree phyml paml + default-jdk build-essential mcl ncbi-blast+ muscle mafft prank fasttree phyml + + # Install ksrates and requirements from requirements.txt python3 -m pip install /install diff --git a/doc/source/installation.rst b/doc/source/installation.rst index 805614e..6ab1800 100644 --- a/doc/source/installation.rst +++ b/doc/source/installation.rst @@ -6,7 +6,7 @@ Installation Container availability ====================== -Singularity runs natively only on Linux; on Windows it requires either WSL2 (see Note below) or a virtual machine (VM); on macOS it is available as a beta version or it requires a VM. +Singularity runs natively only on Linux; on Windows it requires either WSL2 (suggested; see Note below) or a virtual machine (VM); on macOS it is available as a beta version or it requires a VM. Singularity has the advantage over Docker of always producing output files with non-root permissions. .. note:: @@ -32,9 +32,9 @@ The table below summarizes relevant differences between Singularity and Docker c Singularity (suggested) ----------------------- -The machine where *ksrates* will be executed (either local computer or remote compute cluster) needs to have Singularity installed. More information can be found in the Singularity 3.7 installation `page `__ or in the documentation `history `__ for up-to-date and version-specific instructions. - -The Singularity container will be downloaded from ``vibpsb/ksrates`` repository on Docker Hub when launching the Nextflow pipeline. Successive runs will use the local copy. +The machine where *ksrates* will be executed (either local computer or remote compute cluster) needs to have Singularity installed. More information can be found in the Singularity 3.7 installation `page `__. +For Linux installation we suggest to follow the *Install from Source* `section `__ (*Install Dependencies*, *Install Go*, *Download Singularity from a release* and *Compile Singularity*). +For up-to-date and version-specific instructions, please refer to this `page `__. When using the *ksrates* Nextflow pipeline, the only other dependency that must be installed is Nextflow (for more information see its official installation `page `__). @@ -42,53 +42,45 @@ When using the *ksrates* Nextflow pipeline, the only other dependency that must sudo apt-get install default-jdk -* Then install Nextflow in one of the following ways: - - * Through ``wget``:: - - wget -qO- https://get.nextflow.io | bash +* Then install Nextflow:: - * Through ``bioconda`` (for more info on how to setup ``bioconda`` see this `page `__):: - - conda install nextflow + wget -qO- https://get.nextflow.io | bash * Optionally make Nextflow accessible by your ``$PATH`` variable, for example:: - mv nextflow /usr/local/bin + sudo mv nextflow /usr/local/bin +When launching the Nextflow pipeline with Singularity, the container will be downloaded from ``vibpsb/ksrates`` repository on Docker Hub and the local copy will be used for successive runs. Docker ------ The machine where *ksrates* will be executed (either local computer or remote compute cluster) needs to have Docker installed. More information can be found on the Docker installation `page `__. -The Docker container will be downloaded from ``vibpsb/ksrates`` repository on Docker Hub when launching the Nextflow pipeline. Successive runs will use the local copy. - When using the *ksrates* Nextflow pipeline, the only other dependency that must be installed is Nextflow (for more information see its official installation `page `__). * First install Java 8 or later. ``default-djk`` works as well:: sudo apt-get install default-jdk -* Then install Nextflow in one of the following ways: +* Then install Nextflow:: - * Through ``wget``:: - - wget -qO- https://get.nextflow.io | bash - - * Through ``bioconda`` (for more info on how to setup ``bioconda`` see this `page `__):: - - conda install nextflow + wget -qO- https://get.nextflow.io | bash * Optionally make Nextflow accessible by your ``$PATH`` variable, for example:: - mv nextflow /usr/local/bin + sudo mv nextflow /usr/local/bin + +When launching the Nextflow pipeline with Docker, the container will be downloaded from ``vibpsb/ksrates`` repository on Docker Hub and the local copy will be used for successive runs. Local installation ================== -Without the use of a container the installation of *ksrates* and its dependencies has to be carried out manually. +Without the use of a container the installation of *ksrates* and its dependencies has to be carried out manually. The following commands guide through the installation on a Linux machine; Windows users can carry out the installation with the same commands by using either WSL2 (suggested; see Note below) or a virtual machine (VM) with Linux installed. + +.. note:: + WSL2 (Windows Subsystem for Linux 2) is a native Windows 10 feature that allows to run a GNU/Linux terminal without the use of a VM. It can be installed following the official `documentation `__. 1. Clone the *ksrates* repository from `GitHub `__:: @@ -99,23 +91,34 @@ Without the use of a container the installation of *ksrates* and its dependencie cd ksrates pip3 install . -3. Non-Python dependencies can be installed in two possible ways. +3. Most of non-Python dependencies can be installed witht the following commands:: - * Through ``apt-get`` and ``wget``:: + sudo apt-get -yq install default-jdk build-essential ncbi-blast+ muscle mafft prank fasttree mcl phyml + wget -qO- https://get.nextflow.io | bash - sudo apt-get -yq install default-jdk build-essential ncbi-blast+ muscle mafft prank fasttree mcl phyml paml - wget -qO- https://get.nextflow.io | bash + Optionally make Nextflow accessible through your ``$PATH`` variable, for example:: - * Through ``apt-get`` and ``bioconda`` (more info on how to setup ``bioconda`` `here `__):: - - sudo apt-get -yq install default-jdk build-essential - conda install muscle blast mafft prank fasttree mcl phyml paml nextflow + sudo mv nextflow /usr/local/bin + +4. Install PAML 4.9j from source (for more infromation see PAML installation `page `__) to avoid compatibility issues:: - Optionally make Nextflow accessible by your ``$PATH`` variable, for example:: + wget http://abacus.gene.ucl.ac.uk/software/paml4.9j.tgz + tar -xzf paml4.9j.tgz + cd paml4.9j/src && make -f Makefile - mv nextflow /usr/local/bin + Then make the executable ``codeml`` available through the ``$PATH`` variable (the downloaded PAML directory can be deleted): + + * Either move ``codeml`` to a directory already present in ``$PATH``, e.g. ``usr/local/bin``:: + + sudo mv codeml usr/local/bin + + * Or move ``codeml`` to another directory (here assumed to be ``~/bin``) and add this directory to ``$PATH``, for the Bash shell by copying the following line to the shell initialization file (e.g. ``.bashrc``):: + + export PATH=$PATH:~/bin + + Please refer to PAML `website `__ for more information about its installation. -4. Install I-ADHoRe 3.0 from its GitHub `page `__ (required only for collinearity analysis of genome data). +5. Install I-ADHoRe 3.0 from its GitHub `page `__ (required only for collinearity analysis of genome data). Testing your installation @@ -141,4 +144,4 @@ Testing your installation * With local installation:: - nextflow run VIB-PSB/ksrates --config ./config_elaeis.txt \ No newline at end of file + nextflow run VIB-PSB/ksrates --config ./config_elaeis.txt diff --git a/doc/source/usage.rst b/doc/source/usage.rst index 9bd8984..232bde3 100644 --- a/doc/source/usage.rst +++ b/doc/source/usage.rst @@ -34,7 +34,7 @@ The *ksrates* pipeline can be automatically run through Nextflow with a few prep 3. Launch *ksrates* through the following command line:: - nextflow run VIB-PSB/ksrates --config ./config_elaeis.txt [-c ./custom_nextflow.config] + nextflow run VIB-PSB/ksrates --config ./config_elaeis.txt -c ./custom_nextflow.config The ``--config`` option takes the *ksrates configuration file*, while ``-c`` takes the optional *Nextflow configuration file*. If the Nextflow-reserved ``nextflow.config`` name is used, this latter file is automatically recognized without explicitly calling it in the command line. diff --git a/ksrates/fc_lognormal_mixture.py b/ksrates/fc_lognormal_mixture.py index f545dd8..344695d 100644 --- a/ksrates/fc_lognormal_mixture.py +++ b/ksrates/fc_lognormal_mixture.py @@ -6,10 +6,10 @@ import ksrates.fc_plotting as fcPlot from ksrates.fc_exp_log_mixture import deconvolute_data from numpy import exp, sqrt, linspace, arange, log, argmin, mean, around, array -from wgd.modeling import mixture +from wgd_ksrates.modeling import mixture import scipy.stats as ss -from wgd.modeling import plot_aic_bic -from wgd.modeling import plot_mixture, plot_probs +from wgd_ksrates.modeling import plot_aic_bic +from wgd_ksrates.modeling import plot_mixture, plot_probs import ksrates.fc_exp_log_mixture as fcEM from matplotlib.patches import Patch from ksrates.fc_cluster_anchors import subfolder diff --git a/ksrates/fc_wgd.py b/ksrates/fc_wgd.py index 6abf1a1..f7e501a 100755 --- a/ksrates/fc_wgd.py +++ b/ksrates/fc_wgd.py @@ -5,10 +5,10 @@ import subprocess import pandas as pd from numpy import zeros -from wgd.utils import read_fasta -from wgd.blast_mcl import run_mcl_ava, ava_blast_to_abc, get_one_v_one_orthologs_rbh -from wgd.ks_distribution import ks_analysis_paranome, ks_analysis_one_vs_one -from wgd.colinearity import gff_parser +from wgd_ksrates.utils import read_fasta +from wgd_ksrates.blast_mcl import run_mcl_ava, ava_blast_to_abc, get_one_v_one_orthologs_rbh +from wgd_ksrates.ks_distribution import ks_analysis_paranome, ks_analysis_one_vs_one +from wgd_ksrates.colinearity import gff_parser from ksrates.utils import merge_dicts, concat_files, can_i_run_software, translate_cds, write_fasta _OUTPUT_BLAST_FILE_PATTERN = '{}.blast.tsv' @@ -383,6 +383,7 @@ def ks_orthologs(species1, species2, cds_fasta1, cds_fasta2, base_dir='.', eval_ if isinstance(results_df, type(None)) or results_df.empty: logging.warning('No ortholog Ks data computed, will write empty ortholog Ks file!') with open(os.path.join(output_dir, output_ks_file), 'w+') as o: + results_df = results_df.rename(columns={"Paralog1":"Ortholog1", "Paralog2":"Ortholog2"}) o.write(results_df.round(5).to_csv(sep='\t')) # change back to current directory as tmp dir got deleted and subsequent os.getcwd() may fail os.chdir(cw_dir) diff --git a/setup.py b/setup.py index fd256f9..757c86e 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name='ksrates', version='0.1', - packages=['ksrates', 'wgd'], + packages=['ksrates', 'wgd_ksrates'], url='https://github.com/VIB-PSB/ksrates', license='GNU GPL v3.0', author='Cecilia Sensalari, Steven Maere, Rolf Lohaus', diff --git a/wgd/__init__.py b/wgd_ksrates/__init__.py similarity index 100% rename from wgd/__init__.py rename to wgd_ksrates/__init__.py diff --git a/wgd/alignment.py b/wgd_ksrates/alignment.py similarity index 100% rename from wgd/alignment.py rename to wgd_ksrates/alignment.py diff --git a/wgd/blast_mcl.py b/wgd_ksrates/blast_mcl.py similarity index 100% rename from wgd/blast_mcl.py rename to wgd_ksrates/blast_mcl.py diff --git a/wgd/codeml.py b/wgd_ksrates/codeml.py similarity index 100% rename from wgd/codeml.py rename to wgd_ksrates/codeml.py diff --git a/wgd/colinearity.py b/wgd_ksrates/colinearity.py similarity index 100% rename from wgd/colinearity.py rename to wgd_ksrates/colinearity.py diff --git a/wgd/diamond.py b/wgd_ksrates/diamond.py similarity index 100% rename from wgd/diamond.py rename to wgd_ksrates/diamond.py diff --git a/wgd/ks_distribution.py b/wgd_ksrates/ks_distribution.py similarity index 100% rename from wgd/ks_distribution.py rename to wgd_ksrates/ks_distribution.py diff --git a/wgd/modeling.py b/wgd_ksrates/modeling.py similarity index 99% rename from wgd/modeling.py rename to wgd_ksrates/modeling.py index 4fd1b3b..01fc1c3 100644 --- a/wgd/modeling.py +++ b/wgd_ksrates/modeling.py @@ -253,7 +253,7 @@ def plot_mixture(model, data, ax, l=0, u=5, color='black', alpha=0.2, else: curve = ss.norm.pdf( x, loc=means[k], scale=np.sqrt(varcs[k])) * weights[k] - ax.plot(x, curve, '--k', color='black', alpha=0.4) + ax.plot(x, curve, color='black', ls="--", alpha=0.4) if first: mix = curve first = False diff --git a/wgd/phy.py b/wgd_ksrates/phy.py similarity index 100% rename from wgd/phy.py rename to wgd_ksrates/phy.py diff --git a/wgd/pre.py b/wgd_ksrates/pre.py similarity index 100% rename from wgd/pre.py rename to wgd_ksrates/pre.py diff --git a/wgd/utils.py b/wgd_ksrates/utils.py similarity index 100% rename from wgd/utils.py rename to wgd_ksrates/utils.py diff --git a/wgd/viz.py b/wgd_ksrates/viz.py similarity index 100% rename from wgd/viz.py rename to wgd_ksrates/viz.py