Skip to content

Commit

Permalink
Merge pull request #610 from AlexsLemonade/allyhawkins/altexp-merged-…
Browse files Browse the repository at this point in the history
…objects

Account for feature data when merging objects
  • Loading branch information
allyhawkins authored Dec 11, 2023
2 parents 504a280 + becec88 commit 081f319
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 14 deletions.
9 changes: 8 additions & 1 deletion bin/merge_sces.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ option_list <- list(
help = "number of high variance genes to use for dimension reduction;
the default is n_hvg = 2000"
),
make_option(
opt_str = c("--include_alt_exp"),
action = "store_true",
default = FALSE,
help = "Keep any altExp present in the merged object."
),
make_option(
opt_str = c("-t", "--threads"),
type = "integer",
Expand Down Expand Up @@ -108,7 +114,8 @@ merged_sce <- scpcaTools::merge_sce_list(
sce_list,
batch_column = "library_id",
preserve_rowdata_cols = "gene_symbol",
cell_id_column = "cell_id"
cell_id_column = "cell_id",
include_alt_exp = include_alt_exp
)


Expand Down
41 changes: 28 additions & 13 deletions merge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,22 @@ if(param_error){
process merge_sce {
container params.SCPCATOOLS_CONTAINER
label 'mem_16'
publishDir "${params.checkpoints_dir}/merged"
publishDir "${params.results_dir}/merged/${project_id}"
input:
tuple val(project_id), val(library_ids), path(scpca_nf_file)
tuple val(merge_group_id), val(has_adt), val(library_ids), path(scpca_nf_file)
output:
tuple val(project_id), path(merged_sce_file)
tuple val(merge_group_id), val(has_adt), path(merged_sce_file)
script:
input_library_ids = library_ids.join(',')
input_sces = scpca_nf_file.join(',')
merged_sce_file = "${project_id}_merged.rds"
merged_sce_file = "${merge_group_id}_merged.rds"
"""
merge_sces.R \
--input_library_ids "${input_library_ids}" \
--input_sce_files "${input_sces}" \
--output_sce_file "${merged_sce_file}" \
--n_hvg ${params.num_hvg} \
"${has_adt ? "--include_alt_exp" : ''} \
--threads ${task.cpus}
"""
stub:
Expand All @@ -62,23 +63,23 @@ process merge_report {
publishDir "${params.results_dir}/merged/${merge_group}"
label 'mem_16'
input:
tuple val(merge_group), path(merged_sce_file)
tuple val(merge_group_id), path(merged_sce_file)
path(report_template)
output:
path(merge_report)
script:
merge_report = "${merge_group}_summary_report.html"
merge_report = "${merge_group_id}_summary_report.html"
"""
Rscript -e "rmarkdown::render( \
'${report_template}', \
output_file = '${merge_report}', \
params = list(merge_group = '${merge_group}', \
params = list(merge_group = '${merge_group_id}', \
merged_sce = '${merged_sce_file}', \
batch_column = 'library_id') \
)"
"""
stub:
merge_report = "${merge_group}_summary_report.html"
merge_report = "${merge_group_id}_summary_report.html"
"""
touch ${merge_report}
"""
Expand All @@ -89,12 +90,20 @@ workflow {
// grab project ids to run
project_ids = params.project?.tokenize(',') ?: []

// read in run metafile, filter to projects of interest, and group by project
grouped_libraries_ch = Channel.fromPath(params.run_metafile)
// read in run metafile and filter to projects of interest
libraries_ch = Channel.fromPath(params.run_metafile)
.splitCsv(header: true, sep: '\t')
// filter to only include specified project ids
.filter{it.scpca_project_id in project_ids}
// only include single-cell/single-nuclei which already contain processed altexps, and ensure we don't try to merge libraries from spatial or bulk data

// get all projects that contain at least one library with CITEseq
adt_projects = libraries_ch
.filter{it.technology.startsWith('CITEseq')}
.collect{it.scpca_project_id}
.unique()

grouped_libraries_ch = libraries_ch
// only include single-cell/single-nuclei which ensures we don't try to merge libraries from spatial or bulk data
.filter{it.seq_unit in ['cell', 'nucleus']}
// create tuple of [project id, library_id, processed_sce_file]
.map{[
Expand All @@ -104,11 +113,17 @@ workflow {
]}
// only include libraries that have been processed through scpca-nf
.filter{file(it[2]).exists()}
// make sure we don't have any duplicates of the same library ID hanging around
// this shouldn't be the case since we removed CITE-seq and cell-hashing
// only one row per library ID, this removes all the duplicates that may be present due to CITE/hashing
.unique()
// group tuple by project id: [project_id, [library_id1, library_id2, ...], [sce_file1, sce_file2, ...]]
.groupTuple(by: 0)
// add in boolean for if project contains samples with adt
.map{project_id, library_id_list, sce_file_list -> tuple(
project_id,
project_id in adt_projects, // determines if altExp should be included in the merged object
library_id_list,
sce_file_list
)}

merge_sce(grouped_libraries_ch)

Expand Down

0 comments on commit 081f319

Please sign in to comment.