From dd6cb91716730ac1a5d37a11fc06002ee1685f64 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sat, 24 Jul 2021 11:28:01 +1200 Subject: [PATCH 1/6] Remove duplicate entry --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 60825a8a..df8ad452 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,6 @@ modules = ["janis_assistant." + p for p in sorted(find_packages("./janis_assistant"))] -fixed_unix_version = f"janis-pipelines.unix==" + JANIS_UNIX_VERSION setup( name="janis pipelines", version=__version__, From c50d220a04fefc30bf6a296e66d35d1bfab7672c Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sat, 24 Jul 2021 11:28:18 +1200 Subject: [PATCH 2/6] Fix typos in tutorial 0 --- docs/tutorials/tutorial0.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/tutorials/tutorial0.md b/docs/tutorials/tutorial0.md index bda92deb..826f852b 100644 --- a/docs/tutorials/tutorial0.md +++ b/docs/tutorials/tutorial0.md @@ -6,7 +6,7 @@ Janis was designed with a few points in mind: - Workflows should be easy to build, - Workflows and tools must be easily shared (portable), -- Workflows should be able to execute on HPCs and cloud environments. +- Workflows should be able to execute on HPCs and cloud environments, - Workflows should be reproducible and re-runnable. Janis uses an *abstracted execution environment*, which removes the shared file system in favour of you specifiying all the files you need up front and passing them around as a File object. This allows the same workflow to be executable on your local machine, HPCs and cloud, and we let the `execution engine` handle moving our files. This also means that we can use file systems like ``S3``, ``GCS``, ``FTP`` and more without any changes to our workflow. @@ -63,7 +63,7 @@ We'll install Janis in a virtual environment as it preserves versioning of Janis pip install cwltool ``` -Test that CWLTool has installed correctly with: +Test that CWLTool has been installed correctly with: ```bash cwltool --version @@ -152,7 +152,7 @@ janis run --engine cwltool -o tutorial0-override hello --inp "Hello, $(whoami)" ### Running Janis in the background -You may want to run Janis in the background as it's own process. You could do this with `nohup [command] &`, however we can also run Janis with the `--background` flag and capture the workflow ID to watch, eg: +You may want to run Janis in the background as its own process. You could do this with `nohup [command] &`, however we can also run Janis with the `--background` flag and capture the workflow ID to watch, eg: ```bash wid=$(janis run \ @@ -165,7 +165,7 @@ janis watch $wid ## Summary -- Setup a virtualenv +- Set up a virtualenv - Installed Janis and CWLTool - Ran a small workflow with custom inputs From 7042fdc7dd7ba268680e88f0a24c7f880a3bc2a9 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sat, 24 Jul 2021 11:28:39 +1200 Subject: [PATCH 3/6] Fix tutorial 2 curl/wget examples (data not gzipped) --- docs/tutorials/tutorial1.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tutorials/tutorial1.md b/docs/tutorials/tutorial1.md index f868393b..f70068c8 100644 --- a/docs/tutorials/tutorial1.md +++ b/docs/tutorials/tutorial1.md @@ -21,10 +21,10 @@ To prepare for this tutorial, we're going to create a folder and download some d mkdir janis-tutorials && cd janis-tutorials # If WGET is installed -wget -q -O- "https://github.com/PMCC-BioinformaticsCore/janis-workshops/raw/master/janis-data.tar" | tar -xz +wget -q -O- "https://github.com/PMCC-BioinformaticsCore/janis-workshops/raw/master/janis-data.tar" | tar -x # If CURL is installed -curl -Ls "https://github.com/PMCC-BioinformaticsCore/janis-workshops/raw/master/janis-data.tar" | tar -xz +curl -Ls "https://github.com/PMCC-BioinformaticsCore/janis-workshops/raw/master/janis-data.tar" | tar -x ``` From 8d260501dcc9b987ad3e690e0a0dfe496931161e Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sat, 24 Jul 2021 16:04:59 +1200 Subject: [PATCH 4/6] Tutorial 1 typos and re-word --- docs/tutorials/tutorial1.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tutorials/tutorial1.md b/docs/tutorials/tutorial1.md index f70068c8..f8d37628 100644 --- a/docs/tutorials/tutorial1.md +++ b/docs/tutorials/tutorial1.md @@ -60,7 +60,7 @@ from janis_bioinformatics.data_types import FastqGzPairedEnd, FastaWithDict ### Tools -We've discussed the tools we're going to use. The documentation for each tool has a row in the tbale caled "Python" that gives you the import statement. This is how we'll import how tools: +We've discussed the tools we're going to use. The documentation for each tool has a row in the table caled "Python" that gives you the import statement. This is how we'll import these tools: ```python @@ -129,7 +129,7 @@ Workflow.step( ) ``` -We provide a identifier for the step (unique amongst the other nodes in the workflow), and intialise our tool, passing our inputs of the step as parameters. +We provide an identifier for the step (unique amongst the other nodes in the workflow), and intialise our tool, passing our inputs of the step as parameters. We can refer to an input (or previous result) using the dot notation. For example, to refer to the `fastq` input, we can use `w.fastq`. @@ -212,7 +212,7 @@ w.output("out", source=w.sortsam.out) ## Workflow + Translation -Hopefully you have a workflow that looks like the following! +Hopefully now you have a workflow that looks like the following! ```python from janis_core import WorkflowBuilder, String From e0f3145ffa7a1b37021c013958435a284f8fb04a Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sat, 24 Jul 2021 16:21:25 +1200 Subject: [PATCH 5/6] In tutorial1, the normal run must output to tutorial1 folder, so tutorial2 can use its output --- docs/tutorials/tutorial1.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/tutorial1.md b/docs/tutorials/tutorial1.md index f8d37628..61bb4e0d 100644 --- a/docs/tutorials/tutorial1.md +++ b/docs/tutorials/tutorial1.md @@ -272,7 +272,7 @@ janis translate tools/alignment.py wdl We'll run the workflow against the current directory. ```bash -janis run -o . --engine cwltool \ +janis run -o tutorial1 --engine cwltool \ tools/alignment.py \ --fastq data/BRCA1_R*.fastq.gz \ --reference reference/hg38-brca1.fasta \ From f0814d5d5b50397ad5ef8704bac3cffb973ee9d7 Mon Sep 17 00:00:00 2001 From: "Bruno P. Kinoshita" Date: Sat, 24 Jul 2021 18:45:20 +1200 Subject: [PATCH 6/6] Typos and use janis-tutorials for tutorials base folder --- docs/tutorials/tutorial0.md | 13 ++++++++++--- docs/tutorials/tutorial1.md | 15 +++++++-------- docs/tutorials/tutorial2.md | 6 +++--- docs/tutorials/tutorial3.md | 8 ++++---- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/docs/tutorials/tutorial0.md b/docs/tutorials/tutorial0.md index 826f852b..c2890352 100644 --- a/docs/tutorials/tutorial0.md +++ b/docs/tutorials/tutorial0.md @@ -80,6 +80,13 @@ mkdir ~/janis cd ~/janis ``` +Let's also create a directory to store the files for the tutorials. + +```bash +mkdir janis-tutorials +cd janis-tutorials +``` + You can test run an example workflow with Janis and CWLTool with the following command: ```bash @@ -112,7 +119,7 @@ janis watch d909df # Name: hello # Engine: cwltool # -# Task Dir: $HOME/janis/tutorial0 +# Task Dir: $HOME/janis/janis-tutorials/tutorial0 # Exec Dir: None # # Status: Completed @@ -125,13 +132,13 @@ janis watch d909df # [✓] hello (1s) # # Outputs: -# - out: $HOME/janis/tutorial0/out +# - out: $HOME/janis/janis-tutorials/tutorial0/out ``` There is a single output `out` from the workflow, cat-ing this result we get: ```bash -cat $HOME/janis/tutorial0/out +cat $HOME/janis/janis-tutorials/tutorial0/out # Hello, World ``` diff --git a/docs/tutorials/tutorial1.md b/docs/tutorials/tutorial1.md index 61bb4e0d..517860bc 100644 --- a/docs/tutorials/tutorial1.md +++ b/docs/tutorials/tutorial1.md @@ -1,5 +1,7 @@ # Tutorial 1 - Building a Workflow +> This tutorial uses directories created in [Tutorial 0](https://janis.readthedocs.io/en/latest/tutorials/tutorial0.html). + In this stage, we're going to build a simple workflow to align short reads of DNA. 1. Start with a pair of compressed `FASTQ` files, @@ -15,10 +17,10 @@ These tools already exist within the Janis Tool Registry, you can see their docu ## Preparation -To prepare for this tutorial, we're going to create a folder and download some data: +To prepare for this tutorial, we're going to need to download some data first: ```bash -mkdir janis-tutorials && cd janis-tutorials +cd ~/janis/janis-tutorials # If WGET is installed wget -q -O- "https://github.com/PMCC-BioinformaticsCore/janis-workshops/raw/master/janis-data.tar" | tar -x @@ -280,17 +282,14 @@ janis run -o tutorial1 --engine cwltool \ --read_group "@RG\tID:NA12878\tSM:NA12878\tLB:NA12878\tPL:ILLUMINA" ``` -After the workflow has run, you'll see the outputs in the current directory: +After the workflow has run, you'll see the outputs in the tutorial1 directory: ```bash -ls +ls ~/janis/janis-tutorials/tutorial1 -# drwxr-xr-x mfranklin 1677682026 160B data # drwxr-xr-x mfranklin 1677682026 256B janis # -rw-r--r-- mfranklin wheel 2.7M out.bam # -rw-r--r-- mfranklin wheel 296B out.bam.bai -# drwxr-xr-x mfranklin 1677682026 320B reference -# drwxr-xr-x mfranklin 1677682026 128B tools ``` ### OPTIONAL: Run with Cromwell @@ -298,7 +297,7 @@ ls If you have `java` installed, Janis can run the workflow in the Crowmell execution engine by using the `--engine cromwell` parameter: ```bash -janis run -o run-with-cromwell --engine cromwell \ +janis run -o tutorial1-run-with-cromwell --engine cromwell \ tools/alignment.py \ --fastq data/BRCA1_R*.fastq.gz \ --reference reference/hg38-brca1.fasta \ diff --git a/docs/tutorials/tutorial2.md b/docs/tutorials/tutorial2.md index 1dbc05ad..f3d05e18 100644 --- a/docs/tutorials/tutorial2.md +++ b/docs/tutorials/tutorial2.md @@ -73,7 +73,7 @@ ToolName = CommandToolBuilder( Let's start by creating a file with this template inside a second output directory: ```bash -mkdir -p tools +cd ~/janis/janis-tutorials vim tools/samtoolsflagstat.py ``` @@ -280,13 +280,13 @@ Jobs: [✓] samtoolsflagstat (N/A) Outputs: - - stats: $HOME/janis-tutorials/tutorial2/stats.txt + - stats: $HOME/janis-tutorials/tutorial2/stats ``` Janis (and CWLTool) said the tool executed correctly, let's check the output file: ```bash -cat tutorial2/stats.txt +cat tutorial2/stats ``` ``` diff --git a/docs/tutorials/tutorial3.md b/docs/tutorials/tutorial3.md index 212420ed..72ffd9ba 100644 --- a/docs/tutorials/tutorial3.md +++ b/docs/tutorials/tutorial3.md @@ -16,7 +16,7 @@ This tutorial uses the workflow build in [Tutorial 1](https://janis.readthedocs. ## Output name -Simply put, `output_name` is the dervied filename of the output without the extension. By default, this is the `tag` of the output. +Simply put, `output_name` is the derived filename of the output without the extension. By default, this is the `tag` of the output. You can specify a new output name in 2 ways: @@ -30,17 +30,17 @@ You should make the following considerations: - The input you select should be a string, or - If the output you're naming is an array, the input you select should either be: - - singular + - singular or - have the same number of elements in it. - Janis will either fall back to the first element if it's a list, or default to the output tag. This may cause outputs to override each other. +Janis will either fall back to the first element if it's a list, or default to the output tag. This may cause outputs to override each other. ## Output folder Similar to the output name, the `output_folder` is folder, or group of nested folders into which your output will be written. By default, this field has no value and outputs are linked directly into the output directory. -If the output_folder field is an array, a nested folder is created for each element in ascending order (eg: `["parent", "child", "child_of_child"]`). +If the `output_folder` field is an array, a nested folder is created for each element in ascending order (eg: `["parent", "child", "child_of_child"]`). There are multiple ways to specify output directories: