|
| 1 | +#!/usr/bin/env bash |
| 2 | +########################################################################################## |
| 3 | +##### MetaFX feature_analysis module – analyze selected feature in multiple samples ##### |
| 4 | +########################################################################################## |
| 5 | + |
| 6 | +help_message () { |
| 7 | + echo "" |
| 8 | + echo "$(metafx -v)" |
| 9 | + echo "MetaFX feature_analysis module – pipeline to build de Bruijn graphs for samples with selected feature and visualize them in BandageNG (https://github.com/ctlab/BandageNG)" |
| 10 | + echo "Usage: metafx feature_analysis [<Launch options>] [<Input parameters>]" |
| 11 | + echo "" |
| 12 | + echo "Launch options:" |
| 13 | + echo " -h | --help show this help message and exit" |
| 14 | + echo " -t | --threads <int> number of threads to use [default: all]" |
| 15 | + echo " -m | --memory <MEM> memory to use (values with suffix: 1500M, 4G, etc.) [default: 90% of free RAM]" |
| 16 | + echo " -w | --work-dir <dirname> working directory [default: workDir/]" |
| 17 | + echo "" |
| 18 | + echo "Input parameters:" |
| 19 | + echo " -k | --k <int> k-mer size to build de Bruij graphs (in nucleotides, maximum value is 31) [mandatory]" |
| 20 | + echo " -f | --feature-dir <dirname> directory containing folders with contigs for each category, feature_table.tsv and categories_samples.tsv files. Usually, it is workDir from other MetaFX modules (unique, stats, colored, metafast, metaspades) [mandatory]" |
| 21 | + echo " -n | --feature-name <string> name of the feature of interest (should be one of the values from first column of feature_table.tsv) [mandatory]" |
| 22 | + echo " -r | --reads-dir <dirname> directory containing files with reads for samples. FASTQ, FASTA, gzip- or bzip2-compressed [mandatory]" |
| 23 | + echo " --relab <int> minimal relative abundance of feature in sample to include sample for further analysis [optional, default: 0.1]" |
| 24 | + echo "";} |
| 25 | + |
| 26 | + |
| 27 | +# Paths to pipelines and scripts |
| 28 | +mfx_path=$(which metafx) |
| 29 | +bin_path=${mfx_path%/*} |
| 30 | +SOFT=${bin_path}/metafx-scripts |
| 31 | +PIPES=${bin_path}/metafx-modules |
| 32 | +pwd=`dirname "$0"` |
| 33 | + |
| 34 | +comment () { ${SOFT}/pretty_print.py "$1" "-"; } |
| 35 | +warning () { ${SOFT}/pretty_print.py "$1" "*"; } |
| 36 | +error () { ${SOFT}/pretty_print.py "$1" "*"; exit 1; } |
| 37 | + |
| 38 | + |
| 39 | +w="workDir" |
| 40 | +POSITIONAL=() |
| 41 | +while [[ $# -gt 0 ]] |
| 42 | +do |
| 43 | +key="$1" |
| 44 | +case $key in |
| 45 | + -h|--help) |
| 46 | + help_message |
| 47 | + exit 0 |
| 48 | + ;; |
| 49 | + -k|--k) |
| 50 | + k="$2" |
| 51 | + shift |
| 52 | + shift |
| 53 | + ;; |
| 54 | + -f|--feature-dir) |
| 55 | + featDir="$2" |
| 56 | + shift |
| 57 | + shift |
| 58 | + ;; |
| 59 | + -n|--feature-name) |
| 60 | + featName="$2" |
| 61 | + shift |
| 62 | + shift |
| 63 | + ;; |
| 64 | + -r|--reads-dir) |
| 65 | + readsDir="$2" |
| 66 | + shift |
| 67 | + shift |
| 68 | + ;; |
| 69 | + --relab) |
| 70 | + relab="$2" |
| 71 | + shift |
| 72 | + shift |
| 73 | + ;; |
| 74 | + -m|--memory) |
| 75 | + m="$2" |
| 76 | + shift |
| 77 | + shift |
| 78 | + ;; |
| 79 | + -t|--threads) |
| 80 | + p="$2" |
| 81 | + shift |
| 82 | + shift |
| 83 | + ;; |
| 84 | + -w|--work-dir) |
| 85 | + w="$2" |
| 86 | + shift |
| 87 | + shift |
| 88 | + ;; |
| 89 | + *) # unknown option |
| 90 | + POSITIONAL+=("$1") # save it in an array for later |
| 91 | + shift |
| 92 | + ;; |
| 93 | +esac |
| 94 | +done |
| 95 | +set -- "${POSITIONAL[@]}" # restore positional parameters |
| 96 | + |
| 97 | + |
| 98 | + |
| 99 | + |
| 100 | +if [ ! -d ${featDir} ]; then |
| 101 | + error "Invalid directory with features provided" |
| 102 | +fi |
| 103 | + |
| 104 | +if [ ! -d ${readsDir} ]; then |
| 105 | + error "Invalid directory with samples' reads provided" |
| 106 | +fi |
| 107 | + |
| 108 | +if [ ! -f ${featDir}/feature_table.tsv ]; then |
| 109 | + error "feature_table.tsv file missing in ${featDir}" |
| 110 | +fi |
| 111 | + |
| 112 | +# ==== Step 1 ==== |
| 113 | +comment "Running step 1: selecting samples containing feature '${featName}'" |
| 114 | +cnt=`awk -v var="${featName}" '$1==var {CNT++} END{ print CNT }' ${featDir}/feature_table.tsv` |
| 115 | +if [[ cnt -ne 1 ]]; then |
| 116 | + error "Cannot find feature '${featName}' in ${featDir}/feature_table.tsv" |
| 117 | +fi |
| 118 | + |
| 119 | +mkdir ${w} |
| 120 | +cmd1="python ${SOFT}/select_samples_by_feature.py --work-dir ${featDir} --feature ${featName} --res-dir ${w} " |
| 121 | +if [[ $relab ]]; then |
| 122 | + cmd+="--board ${relab}" |
| 123 | +fi |
| 124 | + |
| 125 | +echo "$cmd1" |
| 126 | +$cmd1 |
| 127 | +if [[ $? -eq 0 ]]; then |
| 128 | + echo "Total `wc -l ${w}/samples_list_feature_${featName}.txt` samples were selected" |
| 129 | + echo "List of samples containing feature '${featName}' saved to ${w}/samples_list_feature_${featName}.txt" |
| 130 | + echo "Nucleotide sequence for feature '${featName}' saved to ${w}/seq_feature_${featName}.fasta" |
| 131 | + comment "Step 1 finished successfully!" |
| 132 | +else |
| 133 | + error "Error during step 1!" |
| 134 | + exit 1 |
| 135 | +fi |
| 136 | + |
| 137 | + |
| 138 | +# ===== Step 2 ===== |
| 139 | + |
| 140 | +comment "Running step 2: constructing de Brujn graphs for each selected sample" |
| 141 | + |
| 142 | + |
| 143 | +cmd2="${PIPES}/metacherchant.sh " |
| 144 | +if [[ $k ]]; then |
| 145 | + cmd2+="-k $k " |
| 146 | +fi |
| 147 | +if [[ $m ]]; then |
| 148 | + cmd2+="-m $m " |
| 149 | +fi |
| 150 | +if [[ $p ]]; then |
| 151 | + cmd2+="-p $p " |
| 152 | +fi |
| 153 | + |
| 154 | +cmd2+="--coverage 1 --maxradius 1000 --bothdirs true --chunklength 10 --merge true " |
| 155 | +cmd2+="--seq ${w}/seq_feature_${featName}.fasta " |
| 156 | + |
| 157 | +mkdir ${w}/graphs |
| 158 | + |
| 159 | +while read sample ; do |
| 160 | + cmd2_i=${cmd2} |
| 161 | + cmd2_i+="-w ${w}/wd_${sample} " |
| 162 | + cmd2_i+="-o ${w}/wd_${sample}/output " |
| 163 | + readsFiles=`find ${readsDir}/${sample}_* ${readsDir}/${sample}.* 2>/dev/null | paste -sd " "` |
| 164 | + cmd2_i+="--reads ${readsFiles}" |
| 165 | + echo -n "Processing sample ${sample} (log saved to ${w}/metacherchant.log) ... " |
| 166 | + |
| 167 | + echo "${cmd2_i}" >> ${w}/metacherchant.log |
| 168 | + ${cmd2_i} &>> ${w}/metacherchant.log |
| 169 | + if [[ $? -eq 0 ]]; then |
| 170 | + echo "DONE" |
| 171 | + else |
| 172 | + error "Error during step 2!" |
| 173 | + fi |
| 174 | + ln -s `realpath $w`/wd_${sample}/output/merged/graph.gfa ${w}/graphs/${sample}.gfa |
| 175 | +done<${w}/samples_list_feature_${featName}.txt |
| 176 | + |
| 177 | + |
| 178 | +if [[ $? -eq 0 ]]; then |
| 179 | + comment "All de Bruijn graphs saved to: ${w}/graphs/. To visualise them simultaneously in BandageNG follow instructions from https://github.com/ctlab/BandageNG/wiki#multigraph-mode" |
| 180 | + comment "Step 2 finished successfully!" |
| 181 | +else |
| 182 | + error "Error during step 2!" |
| 183 | + exit 1 |
| 184 | +fi |
| 185 | + |
| 186 | + |
| 187 | +comment "MetaFX feature_analysis module finished successfully!" |
| 188 | +exit 0 |
0 commit comments