Skip to content

Commit

Permalink
wrapping things up
Browse files Browse the repository at this point in the history
  • Loading branch information
DOH-JDJ0303 committed May 21, 2024
1 parent 25ec261 commit 1887249
Show file tree
Hide file tree
Showing 28 changed files with 245 additions and 352 deletions.
Binary file added assets/Influenza-A_HA.fa.gz
Binary file not shown.
2 changes: 2 additions & 0 deletions assets/test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
taxa,segment,assembly,length
Influenza_A,HA,,1950
10 changes: 10 additions & 0 deletions bin/assign-remainder.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
#!/usr/bin/env Rscript
version <- "1.0"

# assign-remainder.R
# Author: Jared Johnson, [email protected]

library(tidyverse)

Expand All @@ -10,6 +14,12 @@ remainder_list_path <- args[3]
taxa_name <- args[4]
segment_name <- args[5]

#---- VERSION ----#
if(reps_path == "version"){
cat(version, sep = "\n")
quit(status=0)
}

# set output file name
file.name <- paste(taxa_name,segment_name,"assigned",sep="-")

Expand Down
2 changes: 1 addition & 1 deletion bin/assign-remainder.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ mash sketch -p $threads -o reps -i reps.fa
mash sketch -p $threads -o remainder -i ${remainder}

mash dist -p $threads reps.msh remainder.msh | awk -v OFS=',' -v t=${threshold} '$3 < t {print $1,$2,$3}' > remainder-mash.csv
cat ${remainder} | grep '>' | tr -d '>' > remainder-list.csv
cat ${remainder} | grep '>' | tr -d '>' > remainder-list.csv
32 changes: 0 additions & 32 deletions bin/consensus.R

This file was deleted.

3 changes: 0 additions & 3 deletions bin/remove-dups.sh

This file was deleted.

11 changes: 8 additions & 3 deletions bin/summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ summary_file <- args[1]
fastani_ava_file <- args[2]
fastani_seeds_file <- args[3]
seeds_file <- args[4]
timestamp <- args[5]

#---- VERSION ----#
if(summary_file == "version"){
Expand All @@ -23,7 +24,7 @@ library(tidyverse)
#---- FUNCTIONS ----#
basename_fa <- function(path){
result <- basename(path) %>%
str_remove_all(pattern = ".fa$")
str_remove_all(pattern = ".fa.gz$")
return(result)

}
Expand Down Expand Up @@ -98,6 +99,10 @@ fastani_ava <- fastani_ava %>%
filter(query != ref) %>%
group_by(query, taxa, segment) %>%
summarise(min_ani = round(min(ani), digits = 1), max_ani = round(max(ani), digits = 1)) %>%
mutate(min_ani = case_when(min_ani < 80 ~ '< 80',
TRUE ~ as.character(min_ani)),
max_ani = case_when(max_ani < 80 ~ '< 80',
TRUE ~ as.character(max_ani))) %>%
rename(seq = query) %>%
ungroup() %>%
select(-taxa, -segment)
Expand All @@ -107,7 +112,7 @@ fastani_ava <- fastani_ava %>%
clusters %>%
full_join(fastani_ava, by = "seq") %>%
select(seq,taxa,segment,cluster,n,condensed,length,min_ani,max_ani) %>%
write.csv(file = paste0(format(Sys.Date(), "%s"),"-epitome.csv"), quote = F, row.names = F)
write.csv(file = paste0(timestamp,"-summary.csv"), quote = F, row.names = F)

## WITH SEEDS
if(file.exists(fastani_seeds_file) & file.exists(seeds_file)){
Expand All @@ -129,5 +134,5 @@ if(file.exists(fastani_seeds_file) & file.exists(seeds_file)){
full_join(clusters, by = "seq") %>%
left_join(seeds, by = "ref") %>%
select(seq,taxa,segment,cluster,n,length,min_ani,max_ani, seed, seed_ani) %>%
write.csv(file = paste0(format(Sys.Date(), "%s"),"-epitome.csv"), quote = F, row.names = F)
write.csv(file = paste0(timestamp,"-summary.csv"), quote = F, row.names = F)
}
5 changes: 0 additions & 5 deletions bin/test_ani_dist/README.md

This file was deleted.

110 changes: 0 additions & 110 deletions bin/test_ani_dist/test_ani_dist.py

This file was deleted.

9 changes: 7 additions & 2 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,20 @@ process {
time = { check_max( 4.h * task.attempt, 'time' ) }
}
withLabel:process_medium {
cpus = { check_max( 6 * task.attempt, 'cpus' ) }
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
time = { check_max( 8.h * task.attempt, 'time' ) }
}
withLabel:process_high {
cpus = { check_max( 10 * task.attempt, 'cpus' ) }
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
}
withLabel:process_very_high {
cpus = { check_max( 16 * task.attempt, 'cpus' ) }
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
}
withLabel:process_long {
time = { check_max( 20.h * task.attempt, 'time' ) }
}
Expand Down
22 changes: 21 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,15 @@ process {
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

withName: TIMESTAMP {
container = 'docker.io/jdj0303/epitome-base:1.0'
publishDir = [
path: { "${params.outdir}/" },
pattern: "none"
]
}
withName: INPUT_QC {
container = 'docker.io/jdj0303/epitome-base:1.0'
publishDir = [
path: { "${params.outdir}/${taxa}/${segment}/qc" },
pattern: "*.csv"
Expand Down Expand Up @@ -49,21 +56,25 @@ process {
]
}
withName: CLUSTER {
container = 'docker.io/jdj0303/epitome-base:1.0'
publishDir = [
path: { "${params.outdir}/${taxa}/${segment}/clusters" }
]
}
withName: CLUSTER_LOOSEENDS {
container = 'docker.io/jdj0303/epitome-base:1.0'
publishDir = [
path: { "${params.outdir}/${taxa}/${segment}/clusters" }
]
}
withName: ASSIGN_REMAINDER {
container = 'docker.io/jdj0303/epitome-base:1.0'
publishDir = [
path: { "${params.outdir}/${taxa}/${segment}/clusters" }
]
}
withName: BIND_CLUSTERS {
container = 'docker.io/jdj0303/epitome-base:1.0'
publishDir = [
path: { "${params.outdir}/${taxa}/${segment}/clusters" }
]
Expand All @@ -87,12 +98,14 @@ process {
]
}
withName: CONSENSUS {
container = 'docker.io/jdj0303/epitome-base:1.0'
publishDir = [
path: { "${params.outdir}/${taxa}/${segment}/consensus" },
pattern: "none"
]
}
withName: CONDENSE {
container = 'docker.io/jdj0303/epitome-base:1.0'
publishDir = [
path: { "${params.outdir}/${taxa}/${segment}/consensus" },
pattern: "*.fa"
Expand All @@ -113,10 +126,17 @@ process {
]
}
withName: SUMMARY {
container = 'docker.io/jdj0303/epitome-base:1.0'
publishDir = [
path: { "${params.outdir}/summary" }
]
}
withName: EXPORT {
container = 'docker.io/jdj0303/epitome-base:1.0'
publishDir = [
path: { "${params.outdir}/" }
]
}
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
Expand Down
9 changes: 3 additions & 6 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@ params {
max_time = '6.h'

// Input data
// TODO nf-core: Specify the paths to your test data on nf-core/test-datasets
// TODO nf-core: Give any required params for the test so that command line flags are not needed
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'

// Genome references
genome = 'R64-1-1'
input = ''
//
max_cluster = 500
}
24 changes: 0 additions & 24 deletions conf/test_full.config

This file was deleted.

Loading

0 comments on commit 1887249

Please sign in to comment.