Skip to content

Commit

Permalink
Merge pull request #71 from pinellolab/v216
Browse files Browse the repository at this point in the history
Release v2.1.6
  • Loading branch information
ManuelTgn authored Nov 27, 2024
2 parents 0aae818 + 91cba0c commit 58c0961
Show file tree
Hide file tree
Showing 49 changed files with 4,218 additions and 2,109 deletions.
Empty file removed .editorconfig
Empty file.
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ VCFs/
Gencode/
Cache/
*nohup*
Annotations/
PAMs/*
samplesIDs/*
guides/
*.csv
*.pkl
*.zip
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ FROM mambaorg/micromamba

# Set the variables for version control during installation
ARG crispritz_version=2.6.6
ARG crisprme_version=2.1.5
ARG crisprme_version=2.1.6

# set the shell to bash
ENV SHELL bash
Expand Down
18 changes: 16 additions & 2 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,2 +1,16 @@
CRISRPme has a dual license. It is made available for free to academic researchers under the Affero License (https://www.gnu.org/licenses/agpl-3.0.en.html).
If you plan to use the CRISRPme for-profit, you will need to purchase a license. Please contact [email protected] and [email protected] for more information.
CRISPRme is distributed under a dual-license model:

1. Academic Use
CRISPRme is freely available for academic research under the GNU Affero
General Public License v3.0 (AGPL-3.0)
(https://www.gnu.org/licenses/agpl-3.0.en.html).

2. Commercial Use
For-profit institutions or users intending to use CRISPRme for commercial
purposes must acquire a commercial license. For inquiries and licensing
details, please contact:
- Luca Pinello: [email protected]
- Rosalba Giugno: [email protected]

For more information on licensing terms and conditions, please reach out to the
contacts above.
47 changes: 10 additions & 37 deletions PostProcess/analisi_indels_NNN.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/bin/bash

set -e # trace all errors

# Script per l'analisi dei targets della ricerca REF e ENR con PAM NNN
# Il file dei targets della ricerca sul genoma reference si chiama $REFtargets -> INPUT $1
Expand Down Expand Up @@ -47,10 +46,8 @@ touch $REFtargets.corrected

# 1) Rimozione duplicati, estrazione semicommon e unique e creazione file total
#echo 'Creazione file .total.txt'
./extraction.sh "$REFtargets.corrected" "$ENRtargets" "$jobid" || {
echo "CRISPRme ERROR: indels analysis failed (script: ${0} line $((LINENO-1)))" >&2
exit 1
} # OUTPUT $jobid.common_targets.txt -> Non usato
./extraction.sh "$REFtargets.corrected" "$ENRtargets" "$jobid"
# OUTPUT $jobid.common_targets.txt -> Non usato
# $jobid.semi_common_targets.txt
# $jobid.unique_targets.txt

Expand All @@ -73,10 +70,7 @@ rm "$jobid.semi_common_targets.minmaxdisr.txt"

#echo 'Creazione cluster del file .total.txt'
# 3) Clustering
./cluster.dict.py "$jobid.total.txt" 'no' 'True' 'True' "$guide_file" 'total' 'orderChr' || {
echo "CRISPRme ERROR: indels clustering failed (script: ${0} line $((LINENO-1)))" >&2
exit 1
} # OUTPUT $jobid.total.cluster.txt
./cluster.dict.py "$jobid.total.txt" 'no' 'True' 'True' "$guide_file" 'total' 'orderChr' # OUTPUT $jobid.total.cluster.txt

#sed -i ':a;N;$!ba;s/\n/\tn\tn\tn\n/g' $jobid.total.cluster.txt
#sed -i '$s/$/\tn\tn\tn/g' $jobid.total.cluster.txt
Expand Down Expand Up @@ -104,10 +98,7 @@ rm "$jobid.total.txt"

#echo 'Estrazione sample dal file .total.cluster.txt'

./analisi_indels_NNN.py "$annotationfile" "$jobid.total.cluster.txt" "$jobid" "$dictionaries" "$pam_file" "$mismatch" "$referencegenome" "$guide_file" $bulgesDNA $bulgesRNA || {
echo "CRISPRme ERROR: indels analysis failed (script: ${0} line $((LINENO-1)))" >&2
exit 1
}
./analisi_indels_NNN.py "$annotationfile" "$jobid.total.cluster.txt" "$jobid" "$dictionaries" "$pam_file" "$mismatch" "$referencegenome" "$guide_file" $bulgesDNA $bulgesRNA
# OUTPUT $jobid.bestCFD_INDEL.txt
# $jobid.CFDGraph.txt (per fare l'area graph dei CFD REF vs ENR)
# NOTA AnnotatorAllTargets.py salva su disco SOLO il target con CFD più alto nel cluster e tra le scomposizioni esistenti
Expand All @@ -133,37 +124,19 @@ echo 'Sorting and adjusting results'
# #tail file w/o header and sort for realguide,chr,cluster_pos,score
# tail -n +2 $jobid.bestCRISTA_INDEL.txt | LC_ALL=C sort -k15,15 -k4,4 -k6,6n -k21,21rg -T ./ >>$jobid.tmp && mv $jobid.tmp $jobid.bestCRISTA_INDEL.txt

./adjust_cols.py "$jobid.bestCFD_INDEL.txt" || {
echo "CRISPRme ERROR: CFD indels report failed (script: ${0} line $((LINENO-1)))" >&2
exit 1
}
./adjust_cols.py "$jobid.bestCRISTA_INDEL.txt" || {
echo "CRISPRme ERROR: CRISTA indels report failed (script: ${0} line $((LINENO-1)))" >&2
exit 1
}
./adjust_cols.py "$jobid.bestmmblg_INDEL.txt" || {
echo "CRISPRme ERROR: mismatch+bulges indels report failed (script: ${0} line $((LINENO-1)))" >&2
exit 1
}
./adjust_cols.py "$jobid.bestCFD_INDEL.txt"
./adjust_cols.py "$jobid.bestCRISTA_INDEL.txt"
./adjust_cols.py "$jobid.bestmmblg_INDEL.txt"

# sed -i '1s/.*/MMBLG_#Bulge_type\tMMBLG_crRNA\tMMBLG_DNA\tMMBLG_Reference\tMMBLG_Chromosome\tMMBLG_Position\tMMBLG_Cluster_Position\tMMBLG_Direction\tMMBLG_Mismatches\tMMBLG_Bulge_Size\tMMBLG_Total\tMMBLG_PAM_gen\tMMBLG_Var_uniq\tMMBLG_Samples\tMMBLG_Annotation_Type\tMMBLG_Real_Guide\tMMBLG_rsID\tMMBLG_AF\tMMBLG_SNP\tMMBLG_#Seq_in_cluster\tMMBLG_CFD\tMMBLG_CFD_ref/' $jobid.bestmmblg_INDEL.txt
# sed -i '1s/.*/MMBLG_#Bulge_type\tMMBLG_crRNA\tMMBLG_DNA\tMMBLG_Reference\tMMBLG_Chromosome\tMMBLG_Position\tMMBLG_Cluster_Position\tMMBLG_Direction\tMMBLG_Mismatches\tMMBLG_Bulge_Size\tMMBLG_Total\tMMBLG_PAM_gen\tMMBLG_Var_uniq\tMMBLG_Samples\tMMBLG_Annotation_Type\tMMBLG_Real_Guide\tMMBLG_rsID\tMMBLG_AF\tMMBLG_SNP\tMMBLG_#Seq_in_cluster\tMMBLG_CFD\tMMBLG_CFD_ref/' $jobid.altmmblg.txt

# pr -m -t -J $jobid.bestCFD_INDEL.txt $jobid.bestmmblg_INDEL.txt >$jobid.bestMerge.txt
# pr -m -t -J $jobid.altCFD.txt $jobid.altmmblg.txt >$jobid.altMerge.txt

./remove_bad_indel_targets.py "$jobid.bestCFD_INDEL.txt" || {
echo "CRISPRme ERROR: CFD indels report cleaning failed (script: ${0} line $((LINENO-1)))" >&2
exit 1
}
./remove_bad_indel_targets.py "$jobid.bestCRISTA_INDEL.txt" || {
echo "CRISPRme ERROR: CRISTA indels report cleaning failed (script: ${0} line $((LINENO-1)))" >&2
exit 1
}
./remove_bad_indel_targets.py "$jobid.bestmmblg_INDEL.txt" || {
echo "CRISPRme ERROR: mismatch+bulges indels report cleaning failed (script: ${0} line $((LINENO-1)))" >&2
exit 1
}
./remove_bad_indel_targets.py "$jobid.bestCFD_INDEL.txt"
./remove_bad_indel_targets.py "$jobid.bestCRISTA_INDEL.txt"
./remove_bad_indel_targets.py "$jobid.bestmmblg_INDEL.txt"

#merge targets in same chr when they are at distance 3 from each other (inclusive) preserving the highest scoring one
# ./merge_close_targets_cfd.sh $jobid.bestCFD_INDEL.txt $jobid.bestCFD_INDEL.txt.trimmed 3 'score'
Expand Down
1 change: 0 additions & 1 deletion PostProcess/extraction.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/bin/bash

##NOTE AWK & GREP REPORT NO STDOUT IF NO MATCHES ARE FOUND (AWK DO NOT PRODUCE ANY OUTPUT)
# set -e # trace all errors

#PARAM $1 is ref targets file
#PARAM $2 is var targets file
Expand Down
5 changes: 2 additions & 3 deletions PostProcess/merge_alt_chr.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/bin/bash

set -e # trace all failures

dir=$(dirname $1)
fileIn=$1
fileOut=$2
Expand All @@ -15,7 +13,8 @@ head -1 $fileIn >$fileOut
for chrom in ${chroms[@]}; do

echo $chrom
awk "/${chrom}\t/" test.targets.txt >$fileIn.$chrom.ref
# awk "/${chrom}\t/" test.targets.txt >$fileIn.$chrom.ref
grep -F -w "$chrom" $fileIn >$fileIn.$chrom.ref
cut -f 3 $fileIn.$chrom.ref | LC_ALL=C sort -T "$dir" | uniq >$fileIn.$chrom.ref.targets
awk -v chrom="$chrom" '$0 ~ chrom"_" {print($0)}' $fileIn >$fileIn.$chrom.alt
awk 'NR==FNR{a[$0];next} !($0 in a)' $fileIn.$chrom.ref.targets $fileIn.$chrom.alt >$fileIn.$chrom.merged
Expand Down
18 changes: 11 additions & 7 deletions PostProcess/merge_close_targets_cfd.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/bin/bash

# set -e # capture any failure

fileIn=$1
fileOut=$2
thresh=$3 #threshold to use in order to merge near targets
Expand All @@ -27,10 +25,16 @@ echo "Sorting done in $(($ENDTIME - $STARTTIME)) seconds"
# echo -e $header | cat - $fileIn.sorted.tmp > $fileIn.sorted
# rm $fileIn.sorted.tmp
echo "Merging contiguous targets"

if [[ "${sort_pivot}" == "score" ]]; then
criteria=$sorting_criteria_scoring
else
criteria=$sorting_criteria
fi
python merge_contiguous_targets.py $fileIn $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd $sort_pivot $criteria
# python remove_contiguous_samples_cfd.py $fileIn.sorted $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd
python remove_contiguous_samples_cfd.py $fileIn $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd $sort_pivot $sorting_criteria_scoring $sorting_criteria
# python remove_contiguous_samples_cfd_new.py $fileIn $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd $sort_pivot $sorting_criteria_scoring $sorting_criteria || {
# echo "CRISPRme ERROR: contigous SNP removal failed (script: ${0} line $((LINENO-1)))" >&2
# exit 1
# }


# python remove_contiguous_samples_cfd.py $fileIn $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd $sort_pivot
# python remove_contiguous_samples_cfd.py $fileIn $fileOut $thresh $chrom $position $total $true_guide $snp_info $cfd $sort_pivot $sorting_criteria_scoring $sorting_criteria
# rm $fileIn.sorted
Loading

0 comments on commit 58c0961

Please sign in to comment.