Skip to content

Commit

Permalink
Merge pull request #7 from DOH-JDJ0303/image_fix
Browse files Browse the repository at this point in the history
reducing R memory usage and increasing resources
  • Loading branch information
DOH-JDJ0303 authored Apr 26, 2024
2 parents e36cb4c + ab8581e commit d19a4df
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 10 deletions.
7 changes: 5 additions & 2 deletions bin/cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ if(dist_path == "version"){
library(tidyverse)
library(ggtree)
library(ape)
install.packages("bigmemory")
library(bigmemory)

# set output file name
file.name <- paste(taxa_name,segment_name,sep="-")

#---- LOAD PAIRWISE DISTANCES ----#
dist.df <- read_tsv(dist_path, col_names = c("ID1","ID2","DIST","PVAL","HASHES")) %>%
dist.df <- read_tsv(dist_path, col_names = c("ID1","ID2","DIST")) %>%
select(ID1, ID2, DIST)
dist.mat <- dist.df %>%
pivot_wider(names_from="ID2", values_from="DIST") %>%
Expand All @@ -55,7 +57,8 @@ clusters <- cutree(as.hclust(tree), h = as.numeric(threshold)) %>%
data.frame() %>%
rownames_to_column(var = "seq") %>%
rename(cluster = 2) %>%
mutate(taxa = taxa_name,
mutate(seq = as.numeric(seq),
taxa = taxa_name,
segment = segment_name) %>%
select(seq, taxa, segment, cluster)

Expand Down
2 changes: 1 addition & 1 deletion bin/input-qc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,5 @@ fi
echo "total,filter1,filter2,filter3,filter4" > ${prefix}-qc-summary.csv
echo "$(cat seqs | wc -l),$(cat f1 | wc -l),$(cat f2 | wc -l),$(cat f3 | wc -l),$(cat f4 | wc -l)" >> ${prefix}-qc-summary.csv
# output cleaned sequences & clean up
cat f3 | awk -v OFS='\n' -v prefix=${prefix} '{print ">"prefix"-"NR, $1}' > ${prefix}.clean.fa
cat f3 | awk -v OFS='\n' -v prefix=${prefix} '{print ">"NR, $1}' > ${prefix}.clean.fa
rm seqs f1 f2 f3 f4
5 changes: 3 additions & 2 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,15 @@ process {
}
withLabel:process_high {
cpus = { check_max( 10 * task.attempt, 'cpus' ) }
memory = { check_max( 14.GB * task.attempt, 'memory' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
}
withLabel:process_long {
time = { check_max( 20.h * task.attempt, 'time' ) }
}
withLabel:process_high_memory {
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
memory = { check_max( 64.GB * task.attempt, 'memory' ) }
cpus = { check_max( 8 , 'cpus' ) }
}
withLabel:error_ignore {
errorStrategy = 'ignore'
Expand Down
8 changes: 4 additions & 4 deletions modules/local/cluster.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ process CLUSTER {
script:
prefix = "${taxa}-${segment}"
"""
gzip -d ${dist}
zcat ${dist} | cut -f 1,2,3 > dists.txt
# run script
cluster.R *.txt "${taxa}" "${segment}" ${params.dist_threshold}
cluster.R dists.txt "${taxa}" "${segment}" ${params.dist_threshold}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down Expand Up @@ -51,9 +51,9 @@ process CLUSTER_LARGE {
script:
prefix = "${taxa}-${segment}"
"""
gzip -d ${dist}
zcat ${dist} | cut -f 1,2,3 > dists.txt
# run script
cluster.R *.txt "${taxa}" "${segment}" ${params.dist_threshold}
cluster.R dists.txt "${taxa}" "${segment}" ${params.dist_threshold}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
2 changes: 1 addition & 1 deletion modules/local/mash.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process MASH {
tag "${taxa}-${segment}"
label 'process_medium'
label 'process_high'
conda "bioconda::mash=2.3"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1' :
Expand Down

0 comments on commit d19a4df

Please sign in to comment.