Skip to content

Commit

Permalink
Merge pull request #801 from AlexsLemonade/jashapiro/filter-merge-count
Browse files Browse the repository at this point in the history
Add minimum number of cells (3) for merge
  • Loading branch information
jashapiro authored Nov 7, 2024
2 parents cc50b09 + cb51115 commit ba74034
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 6 deletions.
13 changes: 13 additions & 0 deletions bin/merge_sces.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@ read_trim_sce <- function(sce_file) {
# get list of sces
sce_list <- purrr::map(input_sce_files, read_trim_sce)

# filter out libraries with fewer than 3 cells (causes errors with PCA)
n_cells <- sce_list |> purrr::map_int(ncol)
included_libs <- names(sce_list)[which(n_cells >= 3)]
lib_diff <- setdiff(names(sce_list), included_libs)
if (length(lib_diff) > 0) {
message(
"The following libraries have fewer than 3 cells and will be excluded from the merged object: ",
paste(lib_diff, collapse = ", ")
)
}
sce_list <- sce_list[included_libs]


# Add cell type annotation columns where needed -------------------------------

# check for present cell type annotations
Expand Down
4 changes: 4 additions & 0 deletions lib/Utils.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ class Utils {
* @return A value from the metadata
*/
static def getMetaVal(file, key) {
if (!file.exists()) {
return(null)
}

def obj = new JsonSlurper().parse(file)
def value = obj[key]

Expand Down
22 changes: 16 additions & 6 deletions merge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -199,22 +199,32 @@ workflow {
filtered_libraries_ch.single_sample
.map{[
it.library_id,
file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_processed.rds")
file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_processed.rds"),
file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_metadata.json")
]}
.filter{!(it[1].exists() && it[1].size() > 0)}
.subscribe{
log.warn("Processed files do not exist for ${it[0]}. This library will not be included in the merged object.")
if(!(it[1].exists() && it[1].size() > 0)){
log.warn("Processed files do not exist for ${it[0]}. This library will not be included in the merged object.")
}
else if(!(it[2].exists() && it[2].size() > 0)){
log.warn("Metadata file does not exist for ${it[0]}. This library will not be included in the merged object.")
}
else if (Utils.getMetaVal(it[2], "processed_cells") < 3){
log.warn("Library ${it[0]} has fewer than 3 cells. This library will not be included in the merged object.")
}
}

grouped_libraries_ch = filtered_libraries_ch.single_sample
// create tuple of [project id, library_id, processed_sce_file]
.map{[
it.project_id,
it.library_id,
file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_processed.rds")
file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_processed.rds"),
file("${params.results_dir}/${it.project_id}/${it.sample_id}/${it.library_id}_metadata.json")
]}
// only include libraries that have been processed through scpca-nf and aren't empty
.filter{it[2].exists() && it[2].size() > 0}
// only include libraries that have been processed through scpca-nf and have at least 3 cells
.filter{it[2].exists() && it[2].size() > 0 && Utils.getMetaVal(it[3], "processed_cells") >= 3}
.map{it[0..2]} // remove metadata file from tuple
// only one row per library ID, this removes all the duplicates that may be present due to CITE/hashing
.unique()
// group tuple by project id: [project_id, [library_id1, library_id2, ...], [sce_file1, sce_file2, ...]]
Expand Down

0 comments on commit ba74034

Please sign in to comment.