Skip to content

Commit

Permalink
Fix bug abundance_info
Browse files Browse the repository at this point in the history
  • Loading branch information
browaeysrobin committed Nov 10, 2023
1 parent 6ee0752 commit b23b735
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 21 deletions.
2 changes: 1 addition & 1 deletion R/expression_processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,7 @@ get_frac_exprs = function(sce, sample_id, celltype_id, group_id, batches = NA, m
# n = min_sample_prop fraction of samples of the smallest group
# min_sample_prop = 0.5 by default
# fraction_cutoff = 0.05 by default
n_smallest_group_tbl = grouping_df_filtered %>% dplyr::group_by(group, celltype) %>% dplyr::count() %>% dplyr::group_by(celltype) %>% dplyr::summarize(n_smallest_group = min(n)) %>% dplyr::mutate(n_min = min_sample_prop * n_smallest_group) %>% distinct()
n_smallest_group_tbl = grouping_df_filtered %>% dplyr::group_by(group, celltype) %>% dplyr::count() %>% dplyr::group_by(celltype) %>% dplyr::summarize(n_smallest_group = min(n)) %>% dplyr::mutate(n_min = min_sample_prop * n_smallest_group) %>% dplyr::mutate(n_min = pmax(n_min, 2)) %>% distinct()

print(paste0("Genes with non-zero counts in at least ",fraction_cutoff*100, "% of cells of a cell type of interest in a particular sample will be considered as expressed in that sample."))

Expand Down
2 changes: 1 addition & 1 deletion R/pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ multi_nichenet_analysis = function(sce,
if(verbose == TRUE){
print("Calculate normalized average and pseudobulk expression")
}
abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network = lr_network, batches = batches, frq_list = frq_list, rel_abundance_df = abundance_info$rel_abundance_df)
abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network = lr_network, batches = batches, frq_list = frq_list, abundance_info = abundance_info)

metadata_combined = SummarizedExperiment::colData(sce) %>% tibble::as_tibble()

Expand Down
13 changes: 8 additions & 5 deletions R/pipeline_wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ get_abundance_info = function(sce, sample_id, group_id, celltype_id, min_cells,
rel_abundance_df = rel_abundance_celltype_vs_group %>% data.frame() %>% tibble::rownames_to_column("group") %>% tidyr::gather(celltype, rel_abundance_scaled, -group) %>% tibble::as_tibble() %>% dplyr::mutate(rel_abundance_scaled = scale_quantile_adapted(rel_abundance_scaled))


return(list(abund_plot_sample = abund_plot, abund_plot_group = abund_plot_boxplot, abund_barplot = abund_barplot, abundance_data = abundance_data, rel_abundance_df = rel_abundance_df))
return(list(abund_plot_sample = abund_plot, abund_plot_group = abund_plot_boxplot, abund_barplot = abund_barplot, abundance_data = abundance_data, abundance_info = abundance_info))

}
#' @title process_abundance_expression_info
Expand Down Expand Up @@ -241,12 +241,12 @@ get_abundance_info = function(sce, sample_id, group_id, celltype_id, min_cells,
#' receivers_oi = SummarizedExperiment::colData(sce)[,celltype_id] %>% unique()
#' abundance_info = get_abundance_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = 10, senders_oi = senders_oi, receivers_oi = receivers_oi)
#' frq_list = get_frac_exprs(sce = sce, sample_id = sample_id, celltype_id = celltype_id, group_id = group_id)
#' abundance_celltype_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = 10, senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network, frq_list = frq_list, rel_abundance_df = abundance_info$rel_abundance_df)
#' abundance_celltype_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = 10, senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network, frq_list = frq_list, abundance_info = abundance_info)
#' }
#'
#' @export
#'
process_abundance_expression_info = function(sce, sample_id, group_id, celltype_id, min_cells, senders_oi, receivers_oi, lr_network, batches = NA, frq_list, rel_abundance_df){
process_abundance_expression_info = function(sce, sample_id, group_id, celltype_id, min_cells, senders_oi, receivers_oi, lr_network, batches = NA, frq_list, abundance_info){

requireNamespace("dplyr")
requireNamespace("ggplot2")
Expand Down Expand Up @@ -297,7 +297,10 @@ process_abundance_expression_info = function(sce, sample_id, group_id, celltype_

celltype_info$frq_df = frq_list$frq_df
celltype_info$frq_df_group = frq_list$frq_df_group
celltype_info$rel_abundance_df = rel_abundance_df
celltype_info$rel_abundance_df = abundance_info$rel_abundance_df

abundance_data_receiver = abundance_info$abundance_data %>% process_abund_info("receiver")
abundance_data_sender = abundance_info$abundance_data %>% process_abund_info("sender")

### Link LR network to Cell type info
receiver_info_ic = suppressMessages(process_info_to_ic(
Expand All @@ -318,7 +321,7 @@ process_abundance_expression_info = function(sce, sample_id, group_id, celltype_
lr_network = lr_network))


return(list(celltype_info = celltype_info, receiver_info_ic = receiver_info_ic, sender_info_ic = sender_info_ic, sender_receiver_info = sender_receiver_info))
return(list(abundance_data_receiver = abundance_data_receiver, abundance_data_sender = abundance_data_sender, celltype_info = celltype_info, receiver_info_ic = receiver_info_ic, sender_info_ic = sender_info_ic, sender_receiver_info = sender_receiver_info))

}
#' @title get_DE_info
Expand Down
40 changes: 26 additions & 14 deletions vignettes/condition_specific_celltype_MISC.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ min_cells = 10
Now we will calculate abundance and expression information for each cell type / sample / group combination with the following functions. In the output of this function, you can also find some 'Cell type abundance diagnostic plots' that will the users which celltype-sample combinations will be left out later on for DE calculation because the nr of cells is lower than de defined minimum defined here above. If too many celltype-sample combinations don't pass this threshold, we recommend to define your cell types in a more general way (use one level higher of the cell type ontology hierarchy) (eg TH17 CD4T cells --> CD4T cells).

```{r}
abundance_info = make_abundance_plots(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, batches = batches)
abundance_info = get_abundance_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, batches = batches)
```

First, we will check the cell type abundance diagnostic plots.
Expand All @@ -165,15 +165,17 @@ Given this plot, we can conclude that "L_T_TIM3._CD38._HLADR." cells are our con
We can automatically check for condition_specific_celltypes with the following code:

```{r}
sample_group_celltype_df = abundance_info$abundance_data %>% filter(n > min_cells) %>% ungroup() %>% distinct(sample_id, group_id) %>% cross_join(abundance_info$abundance_data %>% ungroup() %>% distinct(celltype_id)) %>% arrange(sample_id)
abundance_df = sample_group_celltype_df %>% left_join(abundance_info$abundance_data %>% ungroup())
abundance_df$n[is.na(abundance_df$n)] = 0
abundance_df$keep[is.na(abundance_df$keep)] = FALSE
abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep)))
celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique()
celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present > 0) %>% pull(celltype_id) %>% unique()
condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition)
condition_specific_celltypes
sample_group_celltype_df = abundance_info$abundance_data %>% filter(n > min_cells) %>% ungroup() %>% distinct(sample_id, group_id) %>% cross_join(abundance_info$abundance_data %>% ungroup() %>% distinct(celltype_id)) %>% arrange(sample_id)
abundance_df = sample_group_celltype_df %>% left_join(abundance_info$abundance_data %>% ungroup())
abundance_df$n[is.na(abundance_df$n)] = 0
abundance_df$keep[is.na(abundance_df$keep)] = FALSE
abundance_df_summarized = abundance_df %>% mutate(keep = as.logical(keep)) %>% group_by(group_id, celltype_id) %>% summarise(samples_present = sum((keep)))
celltypes_absent_one_condition = abundance_df_summarized %>% filter(samples_present == 0) %>% pull(celltype_id) %>% unique()
celltypes_present_one_condition = abundance_df_summarized %>% filter(samples_present > 0) %>% pull(celltype_id) %>% unique()
condition_specific_celltypes = intersect(celltypes_absent_one_condition, celltypes_present_one_condition)
print("condition-specific celltypes:")
print(condition_specific_celltypes)
```

We recommend checking whether the automatically-defined condition-specific celltypes are relevant to you. You can always change `condition_specific_celltypes` manually.
Expand Down Expand Up @@ -217,13 +219,24 @@ For downstream visualizations and linking contrasts to their main group, you nee
contrast_tbl = tibble(contrast =
c("M-(S+A)/2","S-(M+A)/2", "A-(S+M)/2"),
group = c("M","S","A"))
sce = sce[, SummarizedExperiment::colData(sce)[,group_id] %in% contrast_tbl$group] # keep only considered groups/conditions
```


## define expressed genes

### Perform the DE analysis for each cell type.

#### Filtering of expressed genes

We will keep genes that are expressed in at least half of the samples (`min_sample_prop`) of the smallest group. A gene is considered as being expressed in a sample if it has non-zero counts in at least 5% of cells (`fraction_cutoff`)
```{r, error = TRUE}
fraction_cutoff = 0.05
min_sample_prop = 0.50
frq_list = get_frac_exprs(sce = sce, sample_id = sample_id, celltype_id = celltype_id, group_id = group_id, batches = batches, min_cells = min_cells, fraction_cutoff = fraction_cutoff, min_sample_prop = min_sample_prop)
```

```{r, error = TRUE}
DE_info = get_DE_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, batches = batches, covariates = covariates, contrasts_oi = contrasts_oi, min_cells = min_cells)
DE_info = get_DE_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, batches = batches, covariates = covariates, contrasts_oi = contrasts_oi, min_cells = min_cells, expressed_df = frq_list$expressed_df)
```
Logically, DE analysis cannot be performed for the condition-specific cell types. This results here in an error for the "L_T_TIM3._CD38._HLADR." cell type.

Expand Down Expand Up @@ -284,7 +297,7 @@ Crucial note: grouping_tbl: group should be the same as in the contrast_tbl, and
# Step 3: Extract expression information from receiver and sender cell types, and link this expression information for ligands of the sender cell types to the corresponding receptors of the receiver cell types

```{r}
abundance_expression_info = get_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network = lr_network, batches = batches)
abundance_expression_info = process_abundance_expression_info(sce = sce, sample_id = sample_id, group_id = group_id, celltype_id = celltype_id, min_cells = min_cells, senders_oi = senders_oi, receivers_oi = receivers_oi, lr_network = lr_network, batches = batches , frq_list = frq_list, abundance_info = abundance_info)
```

# Step 4: Predict NicheNet ligand activities and NicheNet ligand-target links based on these differential expression results
Expand All @@ -298,7 +311,6 @@ NicheNet ligand activity will then be calculated as the enrichment of predicted
```{r}
logFC_threshold = 0.50
p_val_threshold = 0.05
fraction_cutoff = 0.05
```

We will here choose for applying the p-value cutoff on the normal p-values, and not on the p-values corrected for multiple testing. This choice was made here because this dataset has only a few samples per group and we might have a lack of statistical power due to pseudobulking. In case of more samples per group, and a sufficient high number of DE genes per group-celltype (> 50), we would recommend using the adjusted p-values.
Expand Down

0 comments on commit b23b735

Please sign in to comment.