From 2d4f9ec187c23b0e7b023e051ec02bf398b3edb6 Mon Sep 17 00:00:00 2001 From: DOH-JDJ0303 Date: Sat, 27 Apr 2024 11:55:00 -0700 Subject: [PATCH] fixing ATCG filtering --- bin/input-qc.sh | 2 +- modules/local/bind-clusters.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/input-qc.sh b/bin/input-qc.sh index 46032a8..1434e49 100755 --- a/bin/input-qc.sh +++ b/bin/input-qc.sh @@ -23,7 +23,7 @@ cat ${fasta} | sed 's/>.*$/@&@/g' | tr -d '\n' | tr '@' '\n' | grep -v '>' | tai cat seqs | sort | uniq > f1 #---- FILTER 2: REMOVE SEQUENCES WITH AMBIGUOUS BASES ----# -cat f1 | grep -vE 'R|Y|M|K|S|W|H|B|V|D|N' > f2 +cat f1 | grep -E '^[ATCG]+$' > f2 if [ ! -s f2 ] then echo "Error: All sequences had ambiguous bases!" && exit 1 diff --git a/modules/local/bind-clusters.nf b/modules/local/bind-clusters.nf index 352befa..76e91aa 100644 --- a/modules/local/bind-clusters.nf +++ b/modules/local/bind-clusters.nf @@ -20,7 +20,7 @@ process BIND_CLUSTERS { if [ -f "${prefix}-looseends.csv " ] then max_n=\$(cat *main.csv | cut -f 4 -d ',' | sort -rn) - mv ${prefix}-looseends.csv tmp && cat tmp | grep -v "\$header" | tr ',' '\t' | awk -v max=\${max_n} -v OFS=',' '{print $1,$2,$3,$4+max}' > ${prefix}-looseends.csv + mv ${prefix}-looseends.csv tmp && cat tmp | grep -v "\$header" | tr ',' '\t' | awk -v max=\${max_n} -v OFS=',' '{print \$1,\$2,\$3,\$4+max}' > ${prefix}-looseends.csv rm tmp fi echo \$header > ${prefix}-clusters.csv