Merge pull request #71 from pinellolab/v216

Release v2.1.6
pinellolab · Nov 27, 2024 · 58c0961 · 58c0961
2 parents 0aae818 + 91cba0c
commit 58c0961
Show file tree

Hide file tree

Showing 49 changed files with 4,218 additions and 2,109 deletions.
diff --git a/.editorconfig b/.editorconfig
diff --git a/.gitignore b/.gitignore
@@ -7,10 +7,8 @@ VCFs/
 Gencode/
 Cache/
 *nohup*
-Annotations/
 PAMs/*
 samplesIDs/*
-guides/
 *.csv
 *.pkl
 *.zip

diff --git a/Dockerfile b/Dockerfile
@@ -6,7 +6,7 @@ FROM mambaorg/micromamba
 
 # Set the variables for version control during installation
 ARG crispritz_version=2.6.6
-ARG crisprme_version=2.1.5
+ARG crisprme_version=2.1.6
 
 # set the shell to bash
 ENV SHELL bash

diff --git a/LICENSE b/LICENSE
@@ -1,2 +1,16 @@
-CRISRPme has a dual license. It is made available for free to academic researchers under the Affero License (https://www.gnu.org/licenses/agpl-3.0.en.html).
-If you plan to use the CRISRPme for-profit, you will need to purchase a license. Please contact [email protected] and [email protected] for more information.
+CRISPRme is distributed under a dual-license model:  
+
+1. Academic Use 
+    CRISPRme is freely available for academic research under the GNU Affero 
+    General Public License v3.0 (AGPL-3.0)
+    (https://www.gnu.org/licenses/agpl-3.0.en.html). 
+
+2. Commercial Use 
+    For-profit institutions or users intending to use CRISPRme for commercial 
+    purposes must acquire a commercial license. For inquiries and licensing 
+    details, please contact:
+        - Luca Pinello: [email protected]  
+        - Rosalba Giugno: [email protected]
+
+For more information on licensing terms and conditions, please reach out to the 
+contacts above.
diff --git a/PostProcess/analisi_indels_NNN.sh b/PostProcess/analisi_indels_NNN.sh
@@ -1,6 +1,5 @@
 #!/bin/bash
 
-set -e  # trace all errors 
 
 # Script per l'analisi dei targets della ricerca REF e ENR con PAM NNN
 # Il file dei targets della ricerca sul genoma reference si chiama $REFtargets  -> INPUT $1
@@ -47,10 +46,8 @@ touch $REFtargets.corrected
 
 # 1) Rimozione duplicati, estrazione semicommon e unique e creazione file total
 #echo 'Creazione file .total.txt'
-./extraction.sh "$REFtargets.corrected" "$ENRtargets" "$jobid" || {
-	echo "CRISPRme ERROR: indels analysis failed (script: ${0} line $((LINENO-1)))" >&2
-	exit 1
-} # OUTPUT    $jobid.common_targets.txt -> Non usato
+./extraction.sh "$REFtargets.corrected" "$ENRtargets" "$jobid" 
+# OUTPUT    $jobid.common_targets.txt -> Non usato
 #           $jobid.semi_common_targets.txt
 #           $jobid.unique_targets.txt
 
@@ -73,10 +70,7 @@ rm "$jobid.semi_common_targets.minmaxdisr.txt"
 
 #echo 'Creazione cluster del file .total.txt'
 # 3) Clustering
-./cluster.dict.py "$jobid.total.txt" 'no' 'True' 'True' "$guide_file" 'total' 'orderChr' || {
-	echo "CRISPRme ERROR: indels clustering failed (script: ${0} line $((LINENO-1)))" >&2
-	exit 1
-} # OUTPUT     $jobid.total.cluster.txt
+./cluster.dict.py "$jobid.total.txt" 'no' 'True' 'True' "$guide_file" 'total' 'orderChr'  # OUTPUT     $jobid.total.cluster.txt
 
 #sed -i ':a;N;$!ba;s/\n/\tn\tn\tn\n/g' $jobid.total.cluster.txt
 #sed -i '$s/$/\tn\tn\tn/g' $jobid.total.cluster.txt
@@ -104,10 +98,7 @@ rm "$jobid.total.txt"
 
 #echo 'Estrazione sample dal file .total.cluster.txt'
 
-./analisi_indels_NNN.py "$annotationfile" "$jobid.total.cluster.txt" "$jobid" "$dictionaries" "$pam_file" "$mismatch" "$referencegenome" "$guide_file" $bulgesDNA $bulgesRNA || {
-	echo "CRISPRme ERROR: indels analysis failed (script: ${0} line $((LINENO-1)))" >&2
-	exit 1
-}
+./analisi_indels_NNN.py "$annotationfile" "$jobid.total.cluster.txt" "$jobid" "$dictionaries" "$pam_file" "$mismatch" "$referencegenome" "$guide_file" $bulgesDNA $bulgesRNA 
 # OUTPUT    $jobid.bestCFD_INDEL.txt
 #           $jobid.CFDGraph.txt     (per fare l'area graph dei CFD REF vs ENR)
 # NOTA AnnotatorAllTargets.py salva su disco SOLO il target con CFD più alto nel cluster e tra le scomposizioni esistenti
@@ -133,37 +124,19 @@ echo 'Sorting and adjusting results'
 # #tail file w/o header and sort for realguide,chr,cluster_pos,score
 # tail -n +2 $jobid.bestCRISTA_INDEL.txt | LC_ALL=C sort -k15,15 -k4,4 -k6,6n -k21,21rg -T ./ >>$jobid.tmp && mv $jobid.tmp $jobid.bestCRISTA_INDEL.txt
 
-./adjust_cols.py "$jobid.bestCFD_INDEL.txt" || {
-	echo "CRISPRme ERROR: CFD indels report failed (script: ${0} line $((LINENO-1)))" >&2
-	exit 1
-}
-./adjust_cols.py "$jobid.bestCRISTA_INDEL.txt" || {
-	echo "CRISPRme ERROR: CRISTA indels report failed (script: ${0} line $((LINENO-1)))" >&2
-	exit 1
-}
-./adjust_cols.py "$jobid.bestmmblg_INDEL.txt" || {
-	echo "CRISPRme ERROR: mismatch+bulges indels report failed (script: ${0} line $((LINENO-1)))" >&2
-	exit 1
-}
+./adjust_cols.py "$jobid.bestCFD_INDEL.txt" 
+./adjust_cols.py "$jobid.bestCRISTA_INDEL.txt" 
+./adjust_cols.py "$jobid.bestmmblg_INDEL.txt" 
 
 # sed -i '1s/.*/MMBLG_#Bulge_type\tMMBLG_crRNA\tMMBLG_DNA\tMMBLG_Reference\tMMBLG_Chromosome\tMMBLG_Position\tMMBLG_Cluster_Position\tMMBLG_Direction\tMMBLG_Mismatches\tMMBLG_Bulge_Size\tMMBLG_Total\tMMBLG_PAM_gen\tMMBLG_Var_uniq\tMMBLG_Samples\tMMBLG_Annotation_Type\tMMBLG_Real_Guide\tMMBLG_rsID\tMMBLG_AF\tMMBLG_SNP\tMMBLG_#Seq_in_cluster\tMMBLG_CFD\tMMBLG_CFD_ref/' $jobid.bestmmblg_INDEL.txt
 # sed -i '1s/.*/MMBLG_#Bulge_type\tMMBLG_crRNA\tMMBLG_DNA\tMMBLG_Reference\tMMBLG_Chromosome\tMMBLG_Position\tMMBLG_Cluster_Position\tMMBLG_Direction\tMMBLG_Mismatches\tMMBLG_Bulge_Size\tMMBLG_Total\tMMBLG_PAM_gen\tMMBLG_Var_uniq\tMMBLG_Samples\tMMBLG_Annotation_Type\tMMBLG_Real_Guide\tMMBLG_rsID\tMMBLG_AF\tMMBLG_SNP\tMMBLG_#Seq_in_cluster\tMMBLG_CFD\tMMBLG_CFD_ref/' $jobid.altmmblg.txt
 
 # pr -m -t -J $jobid.bestCFD_INDEL.txt $jobid.bestmmblg_INDEL.txt >$jobid.bestMerge.txt
 # pr -m -t -J $jobid.altCFD.txt $jobid.altmmblg.txt >$jobid.altMerge.txt
 
-./remove_bad_indel_targets.py "$jobid.bestCFD_INDEL.txt" || {
-	echo "CRISPRme ERROR: CFD indels report cleaning failed (script: ${0} line $((LINENO-1)))" >&2
-	exit 1
-}
-./remove_bad_indel_targets.py "$jobid.bestCRISTA_INDEL.txt" || {
-	echo "CRISPRme ERROR: CRISTA indels report cleaning failed (script: ${0} line $((LINENO-1)))" >&2
-	exit 1
-}
-./remove_bad_indel_targets.py "$jobid.bestmmblg_INDEL.txt" || {
-	echo "CRISPRme ERROR: mismatch+bulges indels report cleaning failed (script: ${0} line $((LINENO-1)))" >&2
-	exit 1
-}
+./remove_bad_indel_targets.py "$jobid.bestCFD_INDEL.txt" 
+./remove_bad_indel_targets.py "$jobid.bestCRISTA_INDEL.txt" 
+./remove_bad_indel_targets.py "$jobid.bestmmblg_INDEL.txt" 
 
 #merge targets in same chr when they are at distance 3 from each other (inclusive) preserving the highest scoring one
 # ./merge_close_targets_cfd.sh $jobid.bestCFD_INDEL.txt $jobid.bestCFD_INDEL.txt.trimmed 3 'score'

diff --git a/PostProcess/extraction.sh b/PostProcess/extraction.sh
@@ -1,7 +1,6 @@
 #!/bin/bash
 
 ##NOTE AWK & GREP REPORT NO STDOUT IF NO MATCHES ARE FOUND (AWK DO NOT PRODUCE ANY OUTPUT)
-# set -e # trace all errors
 
 #PARAM $1 is ref targets file
 #PARAM $2 is var targets file

diff --git a/PostProcess/merge_alt_chr.sh b/PostProcess/merge_alt_chr.sh
@@ -1,7 +1,5 @@
 #!/bin/bash
 
-set -e # trace all failures
-
 dir=$(dirname $1)
 fileIn=$1
 fileOut=$2
@@ -15,7 +13,8 @@ head -1 $fileIn >$fileOut
 for chrom in ${chroms[@]}; do
 
     echo $chrom
-    awk "/${chrom}\t/" test.targets.txt >$fileIn.$chrom.ref
+    # awk "/${chrom}\t/" test.targets.txt >$fileIn.$chrom.ref
+    grep -F -w "$chrom" $fileIn >$fileIn.$chrom.ref
     cut -f 3 $fileIn.$chrom.ref | LC_ALL=C sort -T "$dir" | uniq >$fileIn.$chrom.ref.targets
     awk -v chrom="$chrom" '$0 ~ chrom"_" {print($0)}' $fileIn >$fileIn.$chrom.alt
     awk 'NR==FNR{a[$0];next} !($0 in a)' $fileIn.$chrom.ref.targets $fileIn.$chrom.alt >$fileIn.$chrom.merged

diff --git a/PostProcess/merge_close_targets_cfd.sh b/PostProcess/merge_close_targets_cfd.sh
@@ -1,7 +1,5 @@
 #!/bin/bash
 
-# set -e  # capture any failure
-
 fileIn=$1
 fileOut=$2
 thresh=$3 #threshold to use in order to merge near targets
@@ -27,10 +25,16 @@ echo "Sorting done in $(($ENDTIME - $STARTTIME)) seconds"
 # echo -e $header | cat - $fileIn.sorted.tmp > $fileIn.sorted
 # rm $fileIn.sorted.tmp
 echo "Merging contiguous targets"
+
+if [[ "${sort_pivot}" == "score" ]]; then
+    criteria=$sorting_criteria_scoring
+else
+    criteria=$sorting_criteria
+fi
+python merge_contiguous_targets.py $fileIn $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd $sort_pivot $criteria
 # python remove_contiguous_samples_cfd.py $fileIn.sorted $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd
-python remove_contiguous_samples_cfd.py $fileIn $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd $sort_pivot $sorting_criteria_scoring $sorting_criteria
-# python remove_contiguous_samples_cfd_new.py $fileIn $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd $sort_pivot $sorting_criteria_scoring $sorting_criteria || {
-#     echo "CRISPRme ERROR: contigous SNP removal failed (script: ${0} line $((LINENO-1)))" >&2
-# 	exit 1
-# }
+
+
+# python remove_contiguous_samples_cfd.py $fileIn $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd $sort_pivot
+# python remove_contiguous_samples_cfd.py $fileIn $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd $sort_pivot $sorting_criteria_scoring $sorting_criteria
 # rm $fileIn.sorted