Skip to content

Commit

Permalink
Merge pull request #387 from AlexsLemonade/development
Browse files Browse the repository at this point in the history
Merge in `development` for `v0.5.3` release
  • Loading branch information
sjspielman authored Jul 19, 2023
2 parents 4b4de3c + 45fb456 commit bed9eff
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 52 deletions.
90 changes: 56 additions & 34 deletions bin/post_process_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,37 @@ metadata(sce)$min_gene_cutoff <- opt$gene_cutoff
# create adt_scpca_filter column, if CITEseq ----------
alt_exp <- opt$adt_name
if (alt_exp %in% altExpNames(sce)) {
sce$adt_scpca_filter <- ifelse(
altExp(sce, alt_exp)$discard,
"Remove",
"Keep"
)

# assign `adt_scpca_filter_method` metadata based on colData contents
if ("sum.controls" %in% names(colData(altExp(sce, alt_exp)))) {
metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts with isotype controls"
} else if ("ambient.scale" %in% names(colData(altExp(sce, alt_exp)))) {
metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts without isotype controls"
# set up filter with all Keep, and then override to Remove where necessary
sce$adt_scpca_filter <- "Keep"
sce$adt_scpca_filter[which(altExp(sce, alt_exp)$discard)] <- "Remove"

# Warnings for different types of failure:
fail_all_removed <- all(sce$adt_scpca_filter == "Remove")
fail_all_na <- all(is.na(altExp(sce, alt_exp)$discard))

# handle failures - warnings and assign method as "No filter"
if (fail_all_removed | fail_all_na) {
metadata(sce)$adt_scpca_filter_method <- "No filter"
if (fail_all_removed) {
sce$adt_scpca_filter <- "Keep"
warning("Filtering on ADTs attempted to remove all cells. No cells will be removed.")
} else {
warning("ADT filtering failed. No cells will be removed.")
}
} else {
# Handle successes - assign `adt_scpca_filter_method` metadata based on colData contents
if ("sum.controls" %in% names(colData(altExp(sce, alt_exp)))) {
metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts with isotype controls"
} else if ("ambient.scale" %in% names(colData(altExp(sce, alt_exp)))) {
metadata(sce)$adt_scpca_filter_method <- "cleanTagCounts without isotype controls"
} else {
stop("Error in ADT filtering.")
}
}

# make extra sure there are no NAs in `adt_scpca_filter`
if (any(is.na(sce$adt_scpca_filter))) {
stop("Error in ADT filtering.")
}
}
Expand Down Expand Up @@ -161,6 +180,7 @@ processed_sce <- scuttle::logNormCounts(processed_sce)
# Try to normalize ADT counts, if present
if (alt_exp %in% altExpNames(processed_sce)) {


# need `all()` since,if present, this is an array
if( !all(is.null(metadata(altExp(processed_sce, alt_exp))$ambient_profile))){
# Calculate median size factors from the ambient profile
Expand All @@ -173,38 +193,40 @@ if (alt_exp %in% altExpNames(processed_sce)) {
altExp(processed_sce, alt_exp)$sizeFactor <- 0
}

# Perform filtering specifically to allow for normalization
adt_sce <- altExp(processed_sce, alt_exp)
adt_sce <- adt_sce[,adt_sce$discard == FALSE]

# If any size factors are not positive, simply use log1p
if ( any( adt_sce$sizeFactor <= 0 ) ) {
# Perform filtering specifically to allow for normalization
if (!is.null(processed_sce$adt_scpca_filter)) {
adt_sce <- adt_sce[,processed_sce$adt_scpca_filter == "Keep"]
}

# If any size factors are not positive or there was no filtering, simply use log1p
if ( any( adt_sce$sizeFactor <= 0 ) | metadata(processed_sce)$adt_scpca_filter_method == "No filter") {
metadata(processed_sce)$adt_normalization <- "log-normalization"
logcounts(adt_sce) <- log1p(counts(adt_sce))
} else {
# Apply normalization using size factors
metadata(processed_sce)$adt_normalization <- "median-based"
adt_sce <- scuttle::logNormCounts(adt_sce)

# Add this logcounts matrix with NA values added for cells not included in normalization

# first, get the counts matrix and make it NA
result_matrix <- counts(altExp(processed_sce, alt_exp))
result_matrix[,] <- NA

# now get the computed logcounts & fill them in
result_matrix[, colnames(adt_sce)] <- logcounts(adt_sce)

# Check correct number of NAs:
observed_na_count <- sum(is.na(result_matrix))
expected_na_count <- nrow(adt_sce) * (ncol(altExp(processed_sce, alt_exp)) - ncol(adt_sce))
if (observed_na_count < expected_na_count) {
stop("Incorrect number of NAs recovered during ADT normalization.")
}

# Add result_matrix back into correct SCE as logcounts assay
logcounts(altExp(processed_sce, alt_exp)) <- result_matrix
}
# Now that we have logcounts, add back to `processed_sce` but
# with NA values for cells not included in normalization

# first, get the counts matrix and make it NA
result_matrix <- counts(altExp(processed_sce, alt_exp))
result_matrix[,] <- NA

# now get the computed logcounts & fill them in
result_matrix[, colnames(adt_sce)] <- logcounts(adt_sce)

# Check correct number of NAs:
observed_na_count <- sum(is.na(result_matrix))
expected_na_count <- nrow(adt_sce) * (ncol(altExp(processed_sce, alt_exp)) - ncol(adt_sce))
if (observed_na_count < expected_na_count) {
stop("Incorrect number of NAs recovered during ADT normalization.")
}

# Add result_matrix back into correct SCE as logcounts assay
logcounts(altExp(processed_sce, alt_exp)) <- result_matrix
}


Expand Down
8 changes: 4 additions & 4 deletions external-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ nextflow run AlexsLemonade/scpca-nf \
```

Where `<path to config file>` is the **relative** path to the [configuration file](#configuration-files) that you have setup and `<name of profile>` is the name of the profile that you chose when [creating a profile](#setting-up-a-profile-in-the-configuration-file).
This command will pull the `scpca-nf` workflow directly from Github, using the `v0.5.2` version, and run it based on the settings in the configuration file that you have defined.
This command will pull the `scpca-nf` workflow directly from Github, using the `v0.5.3` version, and run it based on the settings in the configuration file that you have defined.

**Note:** `scpca-nf` is under active development.
Using the above command will run the workflow from the `main` branch of the workflow repository.
Expand All @@ -76,7 +76,7 @@ Released versions can be found on the [`scpca-nf` repo releases page](https://gi

```sh
nextflow run AlexsLemonade/scpca-nf \
-r v0.5.2 \
-r v0.5.3 \
-config <path to config file> \
-profile <name of profile>
```
Expand Down Expand Up @@ -280,8 +280,8 @@ If you will be analyzing spatial expression data, you will also need the Cell Ra

If your compute nodes do not have internet access, you will likely have to pre-pull the required container images as well.
When doing this, it is important to be sure that you also specify the revision (version tag) of the `scpca-nf` workflow that you are using.
For example, if you would run `nextflow run AlexsLemonade/scpca-nf -r v0.5.2`, then you will want to set `-r v0.5.2` for `get_refs.py` as well to be sure you have the correct containers.
Be default, `get_refs.py` will download files and images associated with the latest release.
For example, if you would run `nextflow run AlexsLemonade/scpca-nf -r v0.5.3`, then you will want to set `-r v0.5.3` for `get_refs.py` as well to be sure you have the correct containers.
By default, `get_refs.py` will download files and images associated with the latest release.

If your system uses Docker, you can add the `--docker` flag:

Expand Down
2 changes: 1 addition & 1 deletion internal-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ nextflow run AlexsLemonade/scpca-nf -profile ccdl,batch
When running the workflow for a project or group of samples that is ready to be released on ScPCA portal, please use the tag for the latest release:

```
nextflow run AlexsLemonade/scpca-nf -r v0.5.2 -profile ccdl,batch --project SCPCP000000
nextflow run AlexsLemonade/scpca-nf -r v0.5.3 -profile ccdl,batch --project SCPCP000000
```

### Processing example data
Expand Down
1 change: 1 addition & 0 deletions modules/qc-report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

process sce_qc_report{
container params.SCPCATOOLS_CONTAINER
label 'mem_8'
tag "${meta.library_id}"
publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
input:
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ manifest{
homePage = 'https://github.com/AlexsLemonade/scpca-nf'
mainScript = 'main.nf'
defaultBranch = 'main'
version = 'v0.5.2'
version = 'v0.5.3'
}

// global parameters for workflows
Expand Down
39 changes: 27 additions & 12 deletions templates/qc_report/cite_qc.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,25 @@ knitr::kable(antibody_tags, digits = 2) |>

## ADT Post-processing Statistics

Note that low-quality cells as identified by ADT counts are not actually filtered from the SCE object.
Instead, cells that passed the filter threshold are labeled as `"Keep"` within the SCE object, and conversely cells that failed to pass the filtered are labeled as `"Remove"`.

```{r}
if (has_processed) {
filtered_cell_count <- sum(processed_sce$adt_scpca_filter == "Keep")
basic_statistics <- tibble::tibble(
# Note that the adt_scpca_filter_method column is only present in the processed_sce object
"Method used to identify cells to filter" = format(processed_meta$adt_scpca_filter_method),
"Number of cells that pass filtering threshold" = format(filtered_cell_count),
"Percent of cells that pass filtering threshold" =
paste0(round( filtered_cell_count/ncol(processed_sce) * 100, digits = 2), "%"),
"Normalization method" = format(processed_meta$adt_normalization)
) |>
"Method used to identify cells to filter" = format(processed_meta$adt_scpca_filter_method)
)
if (!(processed_meta$adt_scpca_filter_method == "No filter")) {
filtered_cell_count <- sum(processed_sce$adt_scpca_filter == "Keep")
basic_statistics <- basic_statistics |>
# Note that the adt_scpca_filter_method column is only present in the processed_sce object
mutate("Number of cells that pass filtering threshold" = format(filtered_cell_count),
"Percent of cells that pass filtering threshold" = paste0(round( filtered_cell_count/ncol(processed_sce) * 100, digits = 2), "%")
)
}
basic_statistics <- basic_statistics |>
mutate("Normalization method" = format(processed_meta$adt_normalization)) |>
reformat_nulls() |>
t()
Expand All @@ -96,9 +99,13 @@ if (has_processed) {


```{r fig.alt="Cell filtering based on both ADT and RNA counts", results='asis'}
if (has_processed) {
if (has_processed & !(processed_meta$adt_scpca_filter_method == "No filter")) {
glue::glue('
Note that low-quality cells as identified by ADT counts are not actually filtered from the SCE object.
Instead, cells that passed the filter threshold are labeled as `"Keep"` within the SCE object, and conversely cells that failed to pass the filtered are labeled as `"Remove"`.
The plot below displays an overall view of cell filtering based on both ADT and RNA counts. Cells are labeled as one of the following:
- "Keep": This cell is retained based on both RNA and ADT counts.
Expand Down Expand Up @@ -144,6 +151,14 @@ if (has_processed) {
facet_wrap(vars(filter_summary)) +
theme(plot.title = element_text(hjust = 0.5),
legend.position = "none")
} else {
glue::glue("
<div class=\"alert alert-warning\">
No ADT filtering was performed on this library.
</div>
")
}
```

Expand Down

0 comments on commit bed9eff

Please sign in to comment.