-
Notifications
You must be signed in to change notification settings - Fork 0
/
Snakefile
executable file
·105 lines (83 loc) · 4.25 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
## Pipeline to get serotype of Salmonella samples from fastq files
## The pipline uses SeqSero2 as main tool
# Snakemake rules (in order of execution):
# 1 SeqSero2_Serotype: predicts serotype using SeqSero2 package
# 2 salmonella_multi_report: generates one salmonella_multi_report.csv file with the results of all samples
##
#import pathlib
#import pprint
import yaml
#Configuration options for snakemake
configfile: 'config/config.yaml'
configfile: 'config/parameters.yaml'
# Load sample list (YAML file with form: sample > read number > file)
SAMPLES = {}
with open(config["sample_sheet"]) as sample_sheet_file:
SAMPLES = yaml.safe_load(sample_sheet_file)
# Easy access output directory
OUT = config["output_dir"]
#@################################################################################
#@#### Processes #####
#@################################################################################
#############################################################################
##### Salmonella Serotyping #####
#############################################################################
include: "bin/rules/seqsero2_senterica_serotype.smk"
include: "bin/rules/salmonella_serotype_multireport.smk"
#@################################################################################
#@#### The `onstart` checker codeblock #####
#@################################################################################
onstart:
try:
print("Checking if all specified files are accessible...")
important_files = [ config["sample_sheet"] ]
for filename in important_files:
if not os.path.exists(filename):
raise FileNotFoundError(filename)
except FileNotFoundError as e:
print("This file is not available or accessible: %s" % e)
sys.exit(1)
else:
print("\tAll specified files are present!")
shell("""
mkdir -p {OUT}
mkdir -p {OUT}/results
echo -e "\nLogging pipeline settings..."
echo -e "\tGenerating methodological hash (fingerprint)..."
echo -e "This is the link to the code used for this analysis:\thttps://gitl01-int-p.rivm.nl/hernanda/test1_salmonellaserotyper/tree/$(git log -n 1 --pretty=format:"%H")" > '{OUT}/results/junoSalmonella_log_git.txt'
echo -e "This code with unique fingerprint $(git log -n1 --pretty=format:"%H") was committed by $(git log -n1 --pretty=format:"%an <%ae>") at $(git log -n1 --pretty=format:"%ad")" >> '{OUT}/results/junoSalmonella_log_git.txt'
echo -e "\tGenerating full software list of current Conda environment (\"Juno_master\")..."
conda list > '{OUT}/results/junoSalmonella_log_conda.txt'
echo -e "\tGenerating config file log..."
rm -f '{OUT}/results/junoSalmonella_log_config.txt'
for file in config/*.yaml
do
echo -e "\n==> Contents of file \"${{file}}\": <==" >> '{OUT}/results/junoSalmonella_log_config.txt'
cat ${{file}} >> '{OUT}/results/junoSalmonella_log_config.txt'
echo -e "\n\n" >> '{OUT}/results/junoSalmonella_log_config.txt'
done
""")
#@################################################################################
#@#### These are the finalizing rules #####
#@################################################################################
#onerror:
# shell("""""")
onsuccess:
shell("""
echo -e "\tGenerating HTML index of log files..."
echo -e "\tGenerating Snakemake report..."
snakemake --profile config --config output_dir={OUT} --unlock
snakemake --profile config --config output_dir={OUT} --report {OUT}/results/snakemake_report.html
echo -e "Finished"
""")
#################################################################################
##### Specify final output: #####
#################################################################################
# Local rules
localrules:
all,
salmonella_serotype_multireport
rule all:
input:
expand(OUT+'/{sample}_serotype/SeqSero_result.tsv', sample=SAMPLES),
OUT+'/salmonella_multi_report.csv'