diff --git a/DESCRIPTION b/DESCRIPTION
index 6717042..7ef57cc 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: scMuffin
Title: MUlti-Features INtegrative approach for single-cell data analysis
-Version: 1.1.3
-Date: 2023-11-13
+Version: 1.1.4
+Date: 2023-11-22
Authors@R:
c(person(given = "Valentina",
family = "Nale",
@@ -52,7 +52,8 @@ Imports:
circlize,
Matrix,
pals,
- plotrix
+ plotrix,
+ qvalue
Suggests:
rmarkdown,
knitr,
diff --git a/NAMESPACE b/NAMESPACE
index f4a16db..f05482e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,6 +20,7 @@ export(create_scMuffinList)
export(csea)
export(detect_CNV_regions)
export(diff_map)
+export(es)
export(extract_cluster_enrichment_table)
export(extract_cluster_enrichment_tags)
export(filter_gsl)
@@ -90,6 +91,7 @@ importFrom(org.Hs.eg.db,org.Hs.egSYMBOL)
importFrom(pals,alphabet)
importFrom(parallel,mclapply)
importFrom(plotrix,thigmophobe.labels)
+importFrom(qvalue,qvalue)
importFrom(stats,dhyper)
importFrom(stats,glm)
importFrom(stats,median)
diff --git a/NEWS b/NEWS
index 9aaa6c7..1b92052 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,5 @@
+version 1.1.4
+- updtaes over csea and vignette
version 1.1.3
- improved point text in boxplot_cluster and barplot_cluster
version 1.1.2
diff --git a/R/assess_cluster_enrichment.R b/R/assess_cluster_enrichment.R
index 5597186..f6b0fc4 100644
--- a/R/assess_cluster_enrichment.R
+++ b/R/assess_cluster_enrichment.R
@@ -6,16 +6,17 @@
#' @param min.cells.cluster minimum number of cells of a cluster
#' @param mc.cores number of cores
#' @param csea.k number of CSEA permutations
+#' @param min.k minimum number of valid permutations to support empirical nulls
#' @description Assess cluster enrichment using ORA for categorical features and CSEA for numeric features.
#' @details
-#' The output of CSEA is a table with statistics for every tested gene set (gs_table element) and a table with the results of the leading edge (leading_edge element).
+#' The output of CSEA is a table with statistics for every tested gene set.
#' The output of ORA is composed of a series of tables with enrichment results, one for every possible categorical value. See [extract_cluster_enrichment_table] to extract summary table from CSEA and ORA results.
#'
#' @return scMuffinList with CSEA or ORA elements under scMuffinList$cluster_data for the considered partition
#' @importFrom stats setNames
#' @export
-assess_cluster_enrichment <- function(scMuffinList=NULL, feature_name=NULL, partition_id=NULL, min.cells.feature=100, min.cells.cluster=10, mc.cores=1, csea.k=99){
+assess_cluster_enrichment <- function(scMuffinList=NULL, feature_name=NULL, partition_id=NULL, min.cells.feature=100, min.cells.cluster=10, mc.cores=1, csea.k=99, min.k=10){
if(length(scMuffinList[[feature_name]]$summary) == 0){
stop("Can't find scMuffinList[[feature_name]]$summary\n")
@@ -55,7 +56,7 @@ assess_cluster_enrichment <- function(scMuffinList=NULL, feature_name=NULL, part
cat("CSEA for ", ncol(X), "features\n")
- ans$cluster_csea_res <- cluster_csea(X, cell_clusters = X_clusters, min.cells.feature=min.cells.feature, min.cells.cluster=min.cells.cluster, mc.cores=mc.cores, csea.k=csea.k)
+ ans$cluster_csea_res <- cluster_csea(X, cell_clusters = X_clusters, min.cells.feature=min.cells.feature, min.cells.cluster=min.cells.cluster, mc.cores=mc.cores, csea.k=csea.k, min.k=min.k)
if(length(scMuffinList$cluster_data[partition_id])==0){ # nor ORA neither CSEA
@@ -69,15 +70,24 @@ assess_cluster_enrichment <- function(scMuffinList=NULL, feature_name=NULL, part
}
+ #shared_names <- which(names(scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table) %in% names(ans$cluster_csea_res$gs_table))
+
+ # if(length(shared_names)>0){
+ # scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table[shared_names] <- NULL
+ # scMuffinList$cluster_data[[partition_id]]$CSEA$leading_edge[shared_names] <- NULL
+ # }
+ #
+ # scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table <- c(scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table, ans$cluster_csea_res$gs_table)
+ # scMuffinList$cluster_data[[partition_id]]$CSEA$leading_edge <- c(scMuffinList$cluster_data_full[[partition_id]]$CSEA$leading_edge, ans$cluster_csea_res$leading_edge) ##append
+
+ shared_names <- which(names(scMuffinList$cluster_data[[partition_id]]$CSEA) %in% names(ans$cluster_csea_res))
- shared_names <- which(names(scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table) %in% names(ans$cluster_csea_res$gs_table))
if(length(shared_names)>0){
- scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table[shared_names] <- NULL
- scMuffinList$cluster_data[[partition_id]]$CSEA$leading_edge[shared_names] <- NULL
+ scMuffinList$cluster_data[[partition_id]]$CSEA[shared_names] <- NULL
}
-
- scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table <- c(scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table, ans$cluster_csea_res$gs_table)
- scMuffinList$cluster_data[[partition_id]]$CSEA$leading_edge <- c(scMuffinList$cluster_data_full[[partition_id]]$CSEA$leading_edge, ans$cluster_csea_res$leading_edge) ##append
+
+ scMuffinList$cluster_data[[partition_id]]$CSEA <- c(scMuffinList$cluster_data[[partition_id]]$CSEA, ans$cluster_csea_res)
+
}
}
diff --git a/R/calc_gs_perm.R b/R/calc_gs_perm.R
index 7267d10..ff71d9b 100644
--- a/R/calc_gs_perm.R
+++ b/R/calc_gs_perm.R
@@ -4,14 +4,12 @@
#' @param gs gene set
#' @description Calculate permutations
-calc_gs_perm <- function(rll, perm, gs){
-
- #out <- unlist(lapply(rll, function(x) es(which(perm %in% gs), array(x, dimnames=list(perm)))))
+calc_gs_perm <- function(rll=NULL, perm=NULL, gs=NULL){
out <- setNames(numeric(length(rll)), names(rll))
for(i in 1:length(rll)){
- out[i] <- es(which(perm[[i]] %in% gs), array(rll[[i]], dimnames=list(perm[[i]])))
+ out[i] <- es(which(perm[[i]] %in% gs), array(rll[[i]], dimnames=list(perm[[i]])))[, 1]
}
return(out)
diff --git a/R/cluster_csea.R b/R/cluster_csea.R
index fe7f399..c8a3010 100644
--- a/R/cluster_csea.R
+++ b/R/cluster_csea.R
@@ -6,13 +6,13 @@
#' @param min.cells.cluster minimum number of cells of a cluster
#' @param mc.cores number of cores
#' @param csea.k number of permutations
-#' @param min.k.nes minimum number of not null NES values
+#' @param min.k minimum number of valid permutations to support empirical nulls
#' @return list with two elements: gs_table and leading_edge. See [csea()]
#' @export
#' @description Calculate cluster enrichment by csea approach
-cluster_csea <- function(feature_values=NULL, cell_clusters=NULL, min.cells.feature=100, min.cells.cluster=10, mc.cores=1, csea.k=99, min.k.nes=10){
+cluster_csea <- function(feature_values=NULL, cell_clusters=NULL, min.cells.feature=100, min.cells.cluster=10, mc.cores=1, csea.k=99, min.k=10){
X <- as.matrix(feature_values)
@@ -35,25 +35,12 @@ cluster_csea <- function(feature_values=NULL, cell_clusters=NULL, min.cells.feat
#CSEA process on features-by-cells
gsl <- lapply(split(cell_cluster_ok, cell_cluster_ok), function(x) names(x))
- csea_res <- csea(X, gsl, mc_cores_perm = mc.cores, ord.mode = rep(-1, ncol(X)), k = csea.k, min.size = min.cells.feature, min.k.nes = min.k.nes)
+ csea_res <- csea(X, gsl, mc_cores_perm = mc.cores, ord.mode = rep(-1, ncol(X)), k = csea.k, min.size = min.cells.feature, min.k = min.k)
# insert a row for excluded clusters
if(length(excluded_clusters)>0){
- #debug
- cat(">>>>>>DEBUG<<<<<<")
-
- #gs table
- #csea_res$gs_table <- rbind(csea_res$gs_table, data.frame(id=excluded_clusters, es=0, p_val=1, adj_p_val=1, nes=0, FDRq=1, row.names=excluded_clusters, stringsAsFactors = F)) #old
- csea_res$gs_table <- lapply(csea_res$gs_table, function(x) rbind(x, data.frame(id=excluded_clusters, es=0, p_val=1, adj_p_val=1, nes=0, FDRq=1, n_pos_perm=0, n_neg_perm=0, row.names=excluded_clusters, stringsAsFactors = F)))
-
- csea_res$gs_table <- lapply(csea_res$gs_table, function(x) x[match(names(cluster_size), rownames(x)), ])
-
- #leading_edge
- csea_res$leading_edge <- lapply(csea_res$leading_edge, function(x) rbind(x, data.frame(tags=0, tags_perc=0, list_top=0, list_top_perc=0, lead_edge=0, lead_edge_subset="", row.names=excluded_clusters, stringsAsFactors = F)))
-
- #csea_res$leading_edge <- csea_res$leading_edge[match(names(cluster_size), rownames(csea_res$leading_edge)), ] #old
- csea_res$leading_edge <- lapply(csea_res$leading_edge, function(x) x[match(names(cluster_size), rownames(x)), ])
+ csea_res <- lapply(csea_res, function(x) rbind(x, data.frame(id=excluded_clusters, es=NA, p_val=NA, adj_p_val=NA, nes=NA, FDRq=NA, nperm=NA, tags=NA, tags_perc=NA, list_top=NA, list_top_perc=NA, lead_edge=NA, row.names=excluded_clusters, stringsAsFactors = F)))
}
diff --git a/R/csea.R b/R/csea.R
index 67fd831..7b5fdff 100644
--- a/R/csea.R
+++ b/R/csea.R
@@ -5,10 +5,11 @@
#' @param ord.mode ordering mode: -1 -> descending; 1 ascending; must be of length equal to `ncol(rl)`
#' @param mc_cores_path number of cores to use for parallel calculation of gene set lists; the total number of cpu used will be mc_cores_path * mc_cores_perm
#' @param mc_cores_perm number of cores to use for parallel calculation of ranked list permutations; the total number of cpu used will be mc_cores_path * mc_cores_perm
-#' @param min.k.nes minimum number of not null NES values
+#' @param min.k minimum number of valid permutations to support empirical nulls
#' @param min.size minimum number of cells with a not null value
#' @import parallel
#' @importFrom stats p.adjust
+#' @importFrom qvalue qvalue
#' @return list with two data.frames, gs_table and leading_edge.
#` \enumerate{
#` \item {gs_table} a data.frame with:
@@ -31,7 +32,7 @@
#' @export
-csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cores_path=1, mc_cores_perm=1){
+csea <- function(rl=NULL, gsl=NULL, k=100, min.size=100, ord.mode=-1, min.k=10, mc_cores_path=1, mc_cores_perm=1){
#cheks
if(!is.matrix(rl) | !is.numeric(rl)){
@@ -42,7 +43,7 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor
stop("length(ord.mode) must be equal to ncol(rl)")
}
- min.k.nes <- min(min.k.nes, k)
+ min.k <- min(min.k, k)
#create the list of ranked vectors
rll <- vector('list', ncol(rl))
@@ -57,17 +58,17 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor
cat("Ranked list that passed the checks:", names(rll), "\n")
#real es
- print("ES...")
- #real_es <- lapply(gsl, function(x) lapply(rll, function(y) es(which(names(y) %in% x), y)))
- #real_es <- do.call(rbind, real_es)
-
- real_es_data <- lapply(gsl, function(x) lapply(rll, function(y) es(which(names(y) %in% x), y, le=T)))
+ cat("ES of input data...\n")
+
+ gsl_size <- lengths(gsl)
+
+ real_es_data <- lapply(gsl, function(x) lapply(rll, function(y) es(which(names(y) %in% x), y)))
real_es <- do.call(rbind, lapply(real_es_data, function(x) unlist(lapply(x, function(y) y$es))))
-
+
leading_edge <- vector("list", length(rll))
names(leading_edge) <- names(rll)
for(i in 1:length(leading_edge)){
- leading_edge[[i]] <- do.call(rbind, lapply(real_es_data, function(x) x[[i]]$lea))
+ leading_edge[[i]] <- do.call(rbind, lapply(real_es_data, function(x) x[[i]][, -1]))
}
#permutations
@@ -81,37 +82,9 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor
x_perm[[i]] <- lapply(rll, function(x) sample(rownames(x), length(x)))
}
-
- print("calculating permutations")
- if(mc_cores_path==1){
- if(mc_cores_perm == 1){
- #res <- lapply(gsl, function(x) unlist(lapply(x_perm, function(y) calc_gs_perm(rl, y, x))))
- res <- lapply(gsl, function(x) do.call(rbind, lapply(x_perm, function(y) calc_gs_perm(rll, y, x))))
- }else{
- cat(k, "permutations on", mc_cores_perm, "cores\n")
- #res <- lapply(gsl, function(x) unlist(mclapply(x_perm, function(y) calc_gs_perm(rl, y, x), mc.cores=mc_cores_perm)))
- res <- lapply(gsl, function(x) do.call(rbind, mclapply(x_perm, function(y) calc_gs_perm(rll, y, x), mc.cores=mc_cores_perm)))
- }
- }else{
- cat(length(gsl), "gene sets on", mc_cores_path, "cores\n")
- if(mc_cores_perm == 1){
- #res <- parallel::mclapply(gsl, function(x) unlist(lapply(x_perm, function(y) calc_gs_perm(rl, y, x))), mc.cores = mc_cores_path)
- res <- mclapply(gsl, function(x) do.call(rbind, lapply(x_perm, function(y) calc_gs_perm(rll, y, x))), mc.cores = mc_cores_path)
- }else{
- cat(k, "permutations on", mc_cores_perm, "cores\n")
- #res <- parallel::mclapply(gsl, function(x) unlist(parallel::mclapply(x_perm, function(y) calc_gs_perm(rl, y, x), mc.cores=mc_cores_perm)), mc.cores = mc_cores_path)
- res <- mclapply(gsl, function(x) do.call(rbind, mclapply(x_perm, function(y) calc_gs_perm(rll, y, x), mc.cores=mc_cores_perm)), mc.cores = mc_cores_path)
- }
- }
-
-
- #list of pathwa-by-k matrices of permuted es
- #res <- do.call(rbind, res)
- # if(nrow(res) != length(gsl) | ncol(res) != k){
- # stop("not all the permutations returned a correct value\n")
- # }
- #res <- cbind(real_es, res)
-
+ cat("ES of permutations...\n")
+ res <- mclapply(gsl, function(x) do.call(rbind, mclapply(x_perm, function(y) calc_gs_perm(rll, y, x), mc.cores=mc_cores_perm)), mc.cores = mc_cores_path)
+
temp <- vector('list', length(rll))
names(temp) <- colnames(rl)
for(i in 1:length(rll)){
@@ -121,8 +94,6 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor
rm(temp)
-
-
#statistics
print("calculating statistics...")
@@ -133,54 +104,94 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor
for(i in 1:length(rll)){
- p_val <- apply(res[[i]], 1, function(x) ifelse(x[1] >= 0, sum(x >= x[1]) / length(x), sum(x <= x[1]) / length(x)))
+ n_pos_perm <- rowSums(res[[i]]>0)
+ n_neg_perm <- rowSums(res[[i]]<0)
+
+ p_val <- apply(res[[i]], 1, function(x) ifelse(x[1] >= 0, sum(x >= x[1]) / length(x[x>=0]), sum(x <= x[1]) / length(x[x<=0])))
+
#p-values for real ES == 0 are set to 1
- p_val[res[[i]][, 1] == 0] <- 1
+ #p_val[res[[i]][, 1] == 0] <- 1
+
+ idx_na <- which(res[[i]][, 1] == 0)
+ if(length(idx_na)>0){
+ cat("\tES==0:\n")
+ cat("\t", rownames(res[[i]])[idx_na], "\n")
+ p_val[idx_na] <- NA ###
+ }
+ idx_na <- which(res[[i]][, 1] > 0 & n_pos_perm < min.k)
+ if(length(idx_na)>0){
+ cat("\tES>0 but less than", min.k, " positive ES in permutations\n")
+ cat("\t", rownames(res[[i]])[idx_na], "\n")
+ p_val[idx_na] <- NA ###
+ }
+ idx_na <- which(res[[i]][, 1] < 0 & n_neg_perm < min.k)
+ if(length(idx_na)>0){
+ cat("\tES<0 but less than", min.k, " negative ES in permutations\n")
+ cat("\t", rownames(res[[i]])[idx_na], "\n")
+ p_val[idx_na] <- NA ###
+ }
#normalized ES
- n_pos_perm <- rowSums(res[[i]]>0)
- n_neg_perm <- rowSums(res[[i]]<0)
means <- t(apply(res[[i]], 1, function(x) c(mean(x[x>0]), abs(mean(x[x<0]))))) #positive, negative
means[is.nan(means)] <- 0 #NaN values are caused by the absence of any positive or negative value
nes <- res[[i]] / means[, 1]
nes_neg <- res[[i]] / means[, 2]
nes[res[[i]] < 0] <- nes_neg[res[[i]] < 0]
- nes[is.nan(nes)] <- 0 #NaN values are caused by 0/0
+ nes[is.nan(nes)] <- NA #NaN values are caused by 0/0
rm(means, nes_neg)
- #if there are not at least min.k.nes the NES is unrelieable
- nes[nes>0 & n_pos_perm < min.k.nes] <- 0
- nes[nes<0 & n_neg_perm < min.k.nes] <- 0
+ #if there are not at least min.k the NES is unrelieable
+ nes[nes>0 & n_pos_perm < min.k] <- NA
+ nes[nes<0 & n_neg_perm < min.k] <- NA
#calculate FDR
all_nes <- as.numeric(nes)
- n_nes_pos <- sum(nes>0)
- n_nes_neg <- sum(nes<0)
- n_real_nes_pos <- sum(nes[,1] > 0)
- n_real_nes_neg <- sum(nes[,1] < 0)
- if((n_nes_pos + n_nes_neg) != (k*length(gsl) + nrow(nes))){
- warning('some nes value is equal to zero')
- }
-
+ n_nes_pos <- sum(nes>0, na.rm = T)
+ n_nes_neg <- sum(nes<0, na.rm = T)
+ n_real_nes_pos <- sum(nes[,1] > 0, na.rm = T)
+ n_real_nes_neg <- sum(nes[,1] < 0, na.rm = T)
+
#FDR: NES* > 0: fdrq = #(all positive NESp >= NES*) / #(all positive NESp) / [ #(all NES* >= NES*) / (all positive NES*) ]
#FDR: NES* < 0: fdrq = #(all negative NESp <= NES*) / #(all negative NESp) / [ #(all NES* <= NES*) / (all negative NES*) ]
- fdrq <- sapply(nes[, 1], function(x) ifelse(x>0, sum(all_nes >= x) / n_nes_pos, sum(all_nes <= x) / n_nes_neg))
- fdrq <- fdrq / sapply(nes[, 1], function(x) ifelse(x>0, sum(nes[, 1] >= x) / n_real_nes_pos, sum(nes[, 1] <= x) / n_real_nes_neg))
+ fdrq <- sapply(nes[, 1], function(x) ifelse(x>0, sum(all_nes >= x, na.rm = T) / n_nes_pos, sum(all_nes <= x, na.rm = T) / n_nes_neg))
+ fdrq <- fdrq / sapply(nes[, 1], function(x) ifelse(x>0, sum(nes[, 1] >= x, na.rm = T) / n_real_nes_pos, sum(nes[, 1] <= x, na.rm = T) / n_real_nes_neg))
#q of nes that are equal to 0 are set to 1
- fdrq[nes[, 1] == 0] <- 1
+ #fdrq[nes[, 1] == 0] <- 1
rm(all_nes)
fdrq[fdrq>1] <- 1
-
+ idx_replace <- which(fdrq
In a real scenario, the functions listed above should be run +using appropriate parameter values that take into account the +specificities of the dataset under consideration and the objectives of +the analysis to perform. Here we just wanted to provide a means to +obtain a dataset to follow this vignette. Please read the Seurat +documentation for further details about the functions listed above.
head(scML$partitions[, 1, drop = F])
## global_expr
-## V3 0
-## V4 0
-## V5 7
-## V6 0
-## V7 7
-## V8 7
+## V3 2
+## V4 2
+## V5 6
+## V6 2
+## V7 6
+## V8 6
The function add_features()
can be used to add custom
results to the scMuffinList
. In the following example we
add the cell cycle phase:
## Angiogenesis Apoptosis Cell_Cycle Differentiation DNA_damage
-## V3 -0.089113218 -0.02736876 0.15982556 0.2216575 0.14051428
-## V4 -0.003587037 0.05131560 0.30245301 0.2518166 0.29326775
-## V5 0.122828520 -0.02124108 -0.01387160 0.1500232 0.16742211
-## V6 -0.072394618 -0.01090225 0.38740856 0.2247531 0.19227076
-## V7 0.114843858 0.18815302 -0.11715432 0.1688834 0.03979306
-## V8 0.180094845 0.24514734 -0.07518025 0.1015920 0.11925093
+## Angiogenesis Apoptosis Cell_Cycle Differentiation DNA_damage
+## V3 -0.10918589 -0.027672315 0.14448128 0.2177449 0.10625520
+## V4 -0.02872848 0.065879018 0.28261124 0.2546351 0.28273733
+## V5 0.08863655 -0.003487231 -0.04785548 0.1568680 0.14740373
+## V6 -0.10802785 -0.010414814 0.36535484 0.2201350 0.16137969
+## V7 0.09552221 0.214845193 -0.13056901 0.1659778 0.01677083
+## V8 0.14137853 0.267301590 -0.10286473 0.1054560 0.09103363
+## DNA_repair EMT Hypoxia Inflammation Invasion Metastasis
+## V3 0.148348585 0.05265334 0.09262748 0.009633798 0.3169777 0.2093022
+## V4 0.181192508 0.13265908 0.12957070 0.050349840 0.5034789 0.2555932
+## V5 -0.011344590 0.22409580 0.41523704 0.111271235 0.2040137 0.3528524
+## V6 0.163828298 0.26049885 -0.03046142 0.005814202 0.5683223 0.1564824
+## V7 -0.071774957 0.35638003 0.39819922 0.004184621 0.3586581 0.3512004
+## V8 0.001900824 0.23221609 0.32485036 -0.015495107 0.2552769 0.4519555
+## Proliferation Quiescence Stemness
+## V3 0.19624494 -0.10718702 0.4275265
+## V4 0.31782667 0.14023972 0.6338283
+## V5 0.13657056 0.00557050 0.2942666
+## V6 0.16174960 0.06201775 0.5979552
+## V7 0.17320576 0.03903302 0.2387523
+## V8 -0.03581073 0.07383093 0.2218311
## case case.N case.AV nmark_min avg_control control.AV null_ok
-## V3 0.6209460 40 21 TRUE 0.7100592 100 TRUE
-## V4 0.7282336 40 20 TRUE 0.7318207 100 TRUE
-## V5 0.8418314 40 23 TRUE 0.7190029 100 TRUE
-## V6 0.6632416 40 16 TRUE 0.7356362 100 TRUE
-## V7 0.8363020 40 23 TRUE 0.7214581 100 TRUE
-## V8 0.8964932 40 19 TRUE 0.7163984 100 TRUE
-## avg_delta_score delta_score
-## V3 -0.089113218 -0.089113218
-## V4 -0.003587037 -0.003587037
-## V5 0.122828520 0.122828520
-## V6 -0.072394618 -0.072394618
-## V7 0.114843858 0.114843858
-## V8 0.180094845 0.180094845
+## V3 0.6209460 40 21 TRUE 0.7301319 100 TRUE
+## V4 0.7282336 40 20 TRUE 0.7569621 100 TRUE
+## V5 0.8418314 40 23 TRUE 0.7531949 100 TRUE
+## V6 0.6632416 40 16 TRUE 0.7712694 100 TRUE
+## V7 0.8363020 40 23 TRUE 0.7407798 100 TRUE
+## V8 0.8964932 40 19 TRUE 0.7551147 100 TRUE
+## avg_delta_score delta_score
+## V3 -0.10918589 -0.10918589
+## V4 -0.02872848 -0.02872848
+## V5 0.08863655 0.08863655
+## V6 -0.10802785 -0.10802785
+## V7 0.09552221 0.09552221
+## V8 0.14137853 0.14137853
The values of cell-level scores can be used to color UMAP visualizations, which are automatically generated for every gene set using:
@@ -329,39 +350,36 @@
scML$cluster_data$global_expr$gene_set_scoring$summary
## Angiogenesis Apoptosis Cell_Cycle Differentiation DNA_damage
-## 0 0.047859938 -0.088965155 0.110267077 0.16918450 0.16103980
-## 1 0.056847826 0.036663900 0.006437504 0.16233049 0.18251283
-## 2 0.032503390 -0.034567178 -0.065434867 0.08517490 0.14345603
-## 3 0.056820484 -0.007732416 -0.043253422 0.09640405 0.07634114
-## 4 0.035065898 -0.020565102 -0.069010461 0.09766083 0.13605394
-## 5 0.057012002 -0.007916085 -0.027483616 0.12878842 0.15750102
-## 6 0.009464465 -0.048782422 0.011650033 0.14793156 0.23773401
-## 7 0.180890552 -0.004328227 -0.026837614 0.18052135 0.14210041
-## 8 0.144201389 -0.018635396 0.021841089 0.16421038 0.21497220
-## 9 0.045549704 -0.068738127 0.295309989 0.25482631 0.17238070
-## DNA_repair EMT Hypoxia Inflammation Invasion Metastasis
-## 0 0.098945306 0.1429307 -0.002983632 0.008729023 0.4472744 0.1144382
-## 1 0.009465724 0.2523039 0.333431166 0.152182984 0.2503599 0.3066740
-## 2 -0.025654591 0.1912701 0.210172770 0.038350228 0.1130673 0.2673377
-## 3 -0.011475854 0.2303885 0.167091886 0.005294070 0.1752660 0.2788620
-## 4 0.030970644 0.2275091 0.304570531 0.110102310 0.1778773 0.2702203
-## 5 -0.009794570 0.2157880 0.271551139 0.066140185 0.2598153 0.2959017
-## 6 0.055899011 0.1773771 -0.037244689 0.032686215 0.2602460 0.0970928
-## 7 -0.027613833 0.3274798 0.295935075 0.110663000 0.2846644 0.3815454
-## 8 0.016203928 0.1376003 -0.046727520 -0.001804147 0.2446869 0.1113544
-## 9 0.107220520 0.1154087 0.023735537 0.041855789 0.5635380 0.1552216
-## Proliferation Quiescence Stemness
-## 0 0.17611146 -0.009397549 0.3951332
-## 1 0.01102549 0.068921466 0.2960947
-## 2 -0.05060590 -0.018336789 0.2438539
-## 3 0.01032691 -0.037541588 0.2649289
-## 4 0.12775340 0.153623982 0.3388603
-## 5 0.03528640 -0.031068515 0.2487021
-## 6 0.03657727 -0.077159027 0.3848135
-## 7 -0.03310542 0.051625491 0.2351137
-## 8 0.06128233 0.041054982 0.4134585
-## 9 0.37196567 -0.025262162 0.4170985
+## Angiogenesis Apoptosis Cell_Cycle Differentiation DNA_damage DNA_repair
+## 0 0.005206236 0.019249002 -0.06447346 0.10079093 0.08386659 -0.015634375
+## 1 0.022524297 0.052477813 -0.02173267 0.16058731 0.15353017 -0.016262480
+## 2 0.016285304 -0.077046033 0.09287226 0.16931117 0.14490958 0.104084184
+## 3 -0.011020301 -0.004544935 -0.06073645 0.09458853 0.11932908 -0.001509004
+## 4 0.022337656 0.021403192 -0.04759109 0.14014722 0.12942893 -0.024110561
+## 5 -0.031611017 -0.042869916 0.02839297 0.15244780 0.22201017 0.067711722
+## 6 0.132965431 0.028446423 -0.05078115 0.17077833 0.11716633 -0.035208636
+## 7 0.083809559 -0.027496365 0.01100746 0.15875134 0.20662126 0.027456269
+## 8 0.009743315 -0.072038549 0.28456623 0.24816174 0.16453499 0.118506158
+## EMT Hypoxia Inflammation Invasion Metastasis Proliferation
+## 0 0.1958425 0.152775367 0.007683201 0.02659866 0.24715672 0.01822001
+## 1 0.2445520 0.300890417 0.129862284 0.15558955 0.31501115 0.02251154
+## 2 0.1556018 -0.011922956 -0.003003220 0.38545997 0.10925866 0.16892359
+## 3 0.2323454 0.267548254 0.086860333 0.09265066 0.24611511 0.12064441
+## 4 0.2101481 0.262389267 0.056573420 0.17563272 0.29081642 0.03197286
+## 5 0.1895798 -0.046625305 0.028563842 0.20228425 0.06788279 0.03974853
+## 6 0.3314966 0.287708428 0.080396645 0.20701318 0.37491821 -0.02531225
+## 7 0.1290949 -0.044477324 -0.003007760 0.19486268 0.10988028 0.05104020
+## 8 0.1226556 0.004651646 0.035257815 0.49988275 0.14005943 0.39134694
+## Quiescence Stemness
+## 0 -0.014092199 0.2370518
+## 1 0.086151276 0.2942516
+## 2 0.029009746 0.3854771
+## 3 0.148241530 0.3829096
+## 4 -0.003897101 0.2378097
+## 5 -0.054270405 0.3662737
+## 6 0.073751645 0.2388921
+## 7 0.040777466 0.4215733
+## 8 0.006650415 0.4348696
These mean values are useful to obtain a concise visualization of
gene set expression throughout the dataset, using the function
plot_heatmap_features_by_clusters
:
plot_heatmap_features_by_clusters(scMuffinList = scML,
feature_source = "gss")
## CNV_score
-## V3 42.63922
-## V4 49.41188
-## V5 40.24116
-## V6 45.47030
-## V7 37.84952
-## V8 45.86847
-## V3 V4 V5
-## chr1__LINC00115_826205__UQCRHL_15807161 -0.09649985 -0.1330433 -0.08760797
-## chr1__LINC01128_827797__FLJ37453_15834214 -0.09649985 -0.1330433 -0.08760797
-## chr1__SAMD11_925730__SPEN_15847706 -0.09978001 -0.1363396 -0.08760797
-## chr1__NOC2L_944202__ZBTB17_15941868 -0.09978001 -0.1286003 -0.08760797
-## chr1__HES4_998963__FBXO42_16246839 -0.10306018 -0.1228353 -0.08760797
-## chr1__ISG15_1013496__SZRD1_16367241 -0.10306018 -0.1203667 -0.08760797
-## V6 V7 V8
-## chr1__LINC00115_826205__UQCRHL_15807161 -0.07987470 -0.09295970 -0.1308550
-## chr1__LINC01128_827797__FLJ37453_15834214 -0.07987470 -0.08965322 -0.1308550
-## chr1__SAMD11_925730__SPEN_15847706 -0.07987470 -0.08965322 -0.1250560
-## chr1__NOC2L_944202__ZBTB17_15941868 -0.07657357 -0.08965322 -0.1217379
-## chr1__HES4_998963__FBXO42_16246839 -0.08234611 -0.09295970 -0.1184198
-## chr1__ISG15_1013496__SZRD1_16367241 -0.07904497 -0.09295970 -0.1151017
-## V9 V10 V11
-## chr1__LINC00115_826205__UQCRHL_15807161 -0.1360846 -0.1135812 0.03934666
-## chr1__LINC01128_827797__FLJ37453_15834214 -0.1394048 -0.1135812 0.04272745
-## chr1__SAMD11_925730__SPEN_15847706 -0.1360846 -0.1102118 0.04862384
-## chr1__NOC2L_944202__ZBTB17_15941868 -0.1360846 -0.1135812 0.05452022
-## chr1__HES4_998963__FBXO42_16246839 -0.1360846 -0.1135812 0.05703581
-## chr1__ISG15_1013496__SZRD1_16367241 -0.1360846 -0.1102118 0.06041660
-## V12
-## chr1__LINC00115_826205__UQCRHL_15807161 -0.08363167
-## chr1__LINC01128_827797__FLJ37453_15834214 -0.08363167
-## chr1__SAMD11_925730__SPEN_15847706 -0.08363167
-## chr1__NOC2L_944202__ZBTB17_15941868 -0.08020467
-## chr1__HES4_998963__FBXO42_16246839 -0.08020467
-## chr1__ISG15_1013496__SZRD1_16367241 -0.07423670
-## $chr1__LINC00115_826205__UQCRHL_15807161
-## symbol location
-## 11 LINC00115 826205
-## 12 LINC01128 827797
-## 13 SAMD11 925730
-## 14 NOC2L 944202
-## 15 HES4 998963
-## 16 ISG15 1013496
-## chr start start.loc
-## 1 chr1 chr1__LINC00115_826205__UQCRHL_15807161 826205
-## 4 chr1 chr1__HIPK1_113953876__CHTOP_153633981 113953876
-## 9 chr1 chr1__BLZF1_169367878__CHIT1_203216078 169367878
-## 10 chr1 chr1__LINC00115_826205__UQCRHL_15807161 826205
-## 11 chr1 chr1__LRIG2_113073197__S100A6_153534598 113073197
-## 12 chr1 chr1__ALDH9A1_165662215__LMOD1_201896455 165662215
-## stop stop.loc cluster length
-## 1 chr1__TMEM201_9588910__STMN1_25900115 25900115 0 25073910
-## 4 chr1__RAB13_153981604__C1orf226_162378840 162378840 0 48424964
-## 9 chr1__SLC19A2_169463910__BTG2_203305518 203305518 0 33937640
-## 10 chr1__ZBTB48_6579993__RPL11_23691778 23691778 1 22865573
-## 11 chr1__TPM3_154161812__UAP1_162561439 162561439 1 49488242
-## 12 chr1__METTL18_169792531__ATP2B4_203626831 203626831 1 37964616
+## V3 42.61237
+## V4 49.29867
+## V5 40.00239
+## V6 45.42658
+## V7 37.62340
+## V8 45.69161
+## V3 V4
+## chr1__826205__LINC00115__chr1__15807161__UQCRHL -0.09701447 -0.1335597
+## chr1__827590__LINC01128__chr1__15834214__FLJ37453 -0.09701447 -0.1335597
+## chr1__923922__SAMD11__chr1__15847706__SPEN -0.10029463 -0.1368559
+## chr1__944202__NOC2L__chr1__15941868__ZBTB17 -0.10029463 -0.1291166
+## chr1__998963__HES4__chr1__16246839__FBXO42 -0.10357480 -0.1233517
+## chr1__1013496__ISG15__chr1__16367241__SZRD1 -0.10357480 -0.1208830
+## V5 V6
+## chr1__826205__LINC00115__chr1__15807161__UQCRHL -0.08801707 -0.08020141
+## chr1__827590__LINC01128__chr1__15834214__FLJ37453 -0.08801707 -0.08020141
+## chr1__923922__SAMD11__chr1__15847706__SPEN -0.08801707 -0.08020141
+## chr1__944202__NOC2L__chr1__15941868__ZBTB17 -0.08801707 -0.07690028
+## chr1__998963__HES4__chr1__16246839__FBXO42 -0.08801707 -0.08267282
+## chr1__1013496__ISG15__chr1__16367241__SZRD1 -0.08801707 -0.07937168
+## V7 V8
+## chr1__826205__LINC00115__chr1__15807161__UQCRHL -0.09343743 -0.1312566
+## chr1__827590__LINC01128__chr1__15834214__FLJ37453 -0.09013095 -0.1312566
+## chr1__923922__SAMD11__chr1__15847706__SPEN -0.09013095 -0.1254576
+## chr1__944202__NOC2L__chr1__15941868__ZBTB17 -0.09013095 -0.1221395
+## chr1__998963__HES4__chr1__16246839__FBXO42 -0.09343743 -0.1188214
+## chr1__1013496__ISG15__chr1__16367241__SZRD1 -0.09343743 -0.1155033
+## V9 V10
+## chr1__826205__LINC00115__chr1__15807161__UQCRHL -0.1363821 -0.1140620
+## chr1__827590__LINC01128__chr1__15834214__FLJ37453 -0.1397023 -0.1140620
+## chr1__923922__SAMD11__chr1__15847706__SPEN -0.1363821 -0.1106926
+## chr1__944202__NOC2L__chr1__15941868__ZBTB17 -0.1363821 -0.1140620
+## chr1__998963__HES4__chr1__16246839__FBXO42 -0.1363821 -0.1140620
+## chr1__1013496__ISG15__chr1__16367241__SZRD1 -0.1363821 -0.1106926
+## V11 V12
+## chr1__826205__LINC00115__chr1__15807161__UQCRHL 0.03868385 -0.08410502
+## chr1__827590__LINC01128__chr1__15834214__FLJ37453 0.04206464 -0.08410502
+## chr1__923922__SAMD11__chr1__15847706__SPEN 0.04796102 -0.08410502
+## chr1__944202__NOC2L__chr1__15941868__ZBTB17 0.05385741 -0.08067802
+## chr1__998963__HES4__chr1__16246839__FBXO42 0.05637300 -0.08067802
+## chr1__1013496__ISG15__chr1__16367241__SZRD1 0.05975379 -0.07471005
+## $chr1__826205__LINC00115__chr1__15807161__UQCRHL
+## chr pos symbol
+## X11.1 chr1 826205 LINC00115
+## X12.1 chr1 827590 LINC01128
+## X13.1 chr1 923922 SAMD11
+## X14.1 chr1 944202 NOC2L
+## X15.1 chr1 998963 HES4
+## X16.1 chr1 1013496 ISG15
+## X17.1 chr1 1216930 SDF4
+## X18.1 chr1 1232236 B3GALT6
+## X19.1 chr1 1253911 UBE2J2
+## X110.1 chr1 1280435 SCNN1D
+## X111.1 chr1 1292390 ACAP3
+## X112.1 chr1 1308579 PUSL1
+## X113.1 chr1 1324801 CPTP
+## X114.1 chr1 1335277 DVL1
+## X115.1 chr1 1352688 MXRA8
+## X116.1 chr1 1373735 AURKAIP1
+## X117.1 chr1 1385710 CCNL2
+## X118.1 chr1 1401908 MRPL20
+## X119.1 chr1 1449688 ATAD3C
+## X120.1 chr1 1471764 ATAD3B
+## X121.1 chr1 1512161 ATAD3A
+## X122.1 chr1 1534777 TMEM240
+## X123.1 chr1 1541672 SSU72
+## X124.1 chr1 1615854 MIB2
+## X125.1 chr1 1635224 CDK11B
+## X126.1 chr1 1661477 SLC35E2B
+## X127.1 chr1 1751231 NADK
+## X128.1 chr1 1785284 GNB1
+## X129.1 chr1 2189548 FAAP20
+## X130.1 chr1 2228318 SKI
+## X131.1 chr1 2391840 RER1
+## X132.1 chr1 2403973 PEX10
+## X133.1 chr1 2508536 PANK4
+## X134.1 chr1 2528744 HES5
+## X135.1 chr1 2556364 TNFRSF14
+## X136.1 chr1 3625014 TPRG1L
+## X137.1 chr1 3630769 WRAP73
+## X138.1 chr1 3778558 LRRC47
+## X139.1 chr1 3812085 CEP104
+## X140.1 chr1 3889132 C1orf174
+## X141.1 chr1 6185019 RPL22
+## X142.1 chr1 6221192 ICMT
+## X143.1 chr1 6247352 GPR153
+## X144.1 chr1 6264271 ACOT7
+## X145.1 chr1 6460785 TNFRSF25
+## X146.1 chr1 6467121 PLEKHG5
+## X147.1 chr1 6521346 NOL9
+## X148.1 chr1 6579993 ZBTB48
+## X149.1 chr1 6590723 KLHL21
+## X150.1 chr1 6613730 PHF13
+## X151.1 chr1 6625149 THAP3
+## X152.1 chr1 6634169 DNAJC11
+## X153.1 chr1 6785453 CAMTA1
+## X154.1 chr1 7771295 VAMP3
+## X155.1 chr1 7784428 PER3
+## X156.1 chr1 7961710 PARK7
+## X157.1 chr1 8011726 ERRFI1
+## X158.1 chr1 8352403 RERE
+## X159.1 chr1 8860999 ENO1
+## X160.1 chr1 9148010 MIR34AHG
+## X161.1 chr1 9234773 H6PD
+## X162.1 chr1 9292893 SPSB1
+## X163.1 chr1 9539464 SLC25A33
+## X164.1 chr1 9588910 TMEM201
+## X165.1 chr1 9728925 CLSTN1
+## X166.1 chr1 9848275 CTNNBIP1
+## X167.1 chr1 9922117 LZIC
+## X168.1 chr1 9942922 NMNAT1
+## X169.1 chr1 10032957 UBE4B
+## X170.1 chr1 10210705 KIF1B
+## X171.1 chr1 10399063 PGD
+## X172.1 chr1 10460545 DFFA
+## X173.1 chr1 10474949 PEX14
+## X174.1 chr1 10647210 CASZ1
+## X175.1 chr1 11012653 TARDBP
+## X176.1 chr1 11054589 SRM
+## X177.1 chr1 11066618 EXOSC10
+## X178.1 chr1 11106534 MTOR
+## X179.1 chr1 11273197 UBIAD1
+## X180.1 chr1 11654406 FBXO44
+## X181.1 chr1 11674479 MAD2L2
+## X182.1 chr1 11691709 DRAXIN
+## X183.1 chr1 11736084 AGTRAP
+## X184.1 chr1 11785725 MTHFR
+## X185.1 chr1 11806095 CLCN6
+## X186.1 chr1 11845709 NPPA
+## X187.1 chr1 11919590 KIAA2013
+## X188.1 chr1 11934716 PLOD1
+## X189.1 chr1 11980443 MFN2
+## X190.1 chr1 12019497 MIIP
+## X191.1 chr1 12230029 VPS13D
+## X192.1 chr1 12567909 DHRS3
+## X193.1 chr1 13749414 PRDM2
+## X194.1 chr1 14945918 KAZN
+## X195.1 chr1 15409887 EFHD2
+## X196.1 chr1 15491400 CASP9
+## X197.1 chr1 15526847 DNAJC16
+## X198.1 chr1 15617457 DDI2
+## X199.1 chr1 15684319 PLEKHM2
+## X1100 chr1 15758794 FBLIM1
+## X1101 chr1 15807161 UQCRHL
+## chr start start.loc
+## 1 chr1 chr1__826205__LINC00115__chr1__15807161__UQCRHL 826205
+## 3 chr1 chr1__113929323__HIPK1__chr1__153633981__CHTOP 113929323
+## 5 chr1 chr1__169132530__NME7__chr1__203127725__ADORA1 169132530
+## 6 chr1 chr1__826205__LINC00115__chr1__15807161__UQCRHL 826205
+## 35 chr1 chr1__826205__LINC00115__chr1__15807161__UQCRHL 826205
+## 38 chr1 chr1__93180715__CCDC18__chr1__144887287__SRGAP2B 93180715
+## stop stop.loc cluster length
+## 1 chr1__9588910__TMEM201__chr1__25884178__STMN1 25884178 0 25057973
+## 3 chr1__153990761__RPS27__chr1__162497804__UHMK1 162497804 0 48568481
+## 5 chr1__169792531__METTL18__chr1__203305518__BTG2 203305518 0 34172988
+## 6 chr1__213988532__PROX1__chr1__244653125__DESI2 244653125 1 243826920
+## 35 chr1__9234773__H6PD__chr1__25616790__MAN1C1 25616790 2 24790585
+## 38 chr1__154161812__TPM3__chr1__162561898__UAP1 162561898 2 69381183
Importantly, CNV inference adds the “CNV” partition:
head(scML$partitions)
## global_expr CNV
-## V3 0 2
-## V4 0 3
-## V5 7 0
-## V6 0 3
-## V7 7 0
-## V8 7 1
+## V3 2 1
+## V4 2 1
+## V5 6 0
+## V6 2 1
+## V7 6 0
+## V8 6 2
The calculation can be demanding. For example, it requires approximately 10 minutes on 2 cores (dual Intel(R) Xeon(R), 2.60GHz).
@@ -491,12 +611,12 @@The function plot_profile_CNV
plots the median CNV
profile of a cluster:
plot_profile_CNV(scMuffinList = scML, cluster = 0, cex.points = 0.5)
head(scML$proliferation$summary)
## Proliferation_score
-## V3 0.24639700
-## V4 0.49466334
-## V5 -0.18135986
-## V6 0.35468552
-## V7 -0.07408355
-## V8 -0.11730502
+## V3 0.22333804
+## V4 0.48095696
+## V5 -0.21546692
+## V6 0.33213053
+## V7 -0.08829706
+## V8 -0.11826046
scML$diffusion_map_pseudo_t$summary
## DC1 DC2 dpt branch tips
-## V3 -0.02439247 0.027065472 0.4543284 NA FALSE
-## V4 -0.03067862 0.038183692 0.3605517 1 FALSE
-## V5 0.01926612 0.005728970 1.3628920 2 FALSE
-## V6 -0.03187947 0.047049007 0.3474134 1 FALSE
-## V7 0.01855801 0.004949549 1.3447794 2 FALSE
-## V8 0.01855621 0.005021415 1.3454328 2 FALSE
+## V3 0.02440015 0.027054071 1.3328347 NA FALSE
+## V4 0.03072120 0.038239911 1.4752607 2 FALSE
+## V5 -0.01926577 0.005752026 0.3735439 1 FALSE
+## V6 0.03191479 0.047083928 1.5085980 2 FALSE
+## V7 -0.01855966 0.004971247 0.3738625 1 FALSE
+## V8 -0.01856063 0.005047917 0.3926285 1 FALSE
The full DPT object from destiny is stored in the element
scML$diffusion_map_pseudo_t$full
. This allows the user to
take advantage of destiny functions, such as
@@ -598,17 +718,7 @@
scML$cluster_data$global_expr$CSEA$gs_table$Angiogenesis
-## id es p_val adj_p_val n_pos_perm n_neg_perm nes FDRq
-## 0 0 0.2551729 0.62 0.8857143 100 0 0.9600786 0.946014127
-## 1 1 0.2674230 0.55 0.8857143 100 0 0.9839195 0.989909183
-## 2 2 0.1910471 1.00 1.0000000 100 0 0.6916231 1.000000000
-## 3 3 0.2711880 0.53 0.8857143 98 2 1.0002714 1.000000000
-## 4 4 -0.2306596 0.01 0.0500000 99 1 0.0000000 1.000000000
-## 5 5 0.2985592 0.26 0.6500000 100 0 1.0816991 0.823410696
-## 6 6 0.2494544 0.82 1.0000000 100 0 0.8571037 1.000000000
-## 7 7 0.6061648 0.01 0.0500000 99 1 2.1895320 0.009081736
-## 8 8 0.4113923 0.05 0.1666667 97 3 1.4228520 0.031786075
-## 9 9 0.1897547 0.98 1.0000000 98 2 0.6366039 0.992936428
+## NULL
In this this example, we assess cluster enrichment in relation to the
categorical feature CC_Phase
, namely the cell cycle phase
calculated by means of Seurat function CellCycleScoring()
@@ -625,16 +735,15 @@
scML$cluster_data$global_expr$ORA$CC_Phase$G1
## id N wb bb bd wbd exp er p p_adj
-## 0 0 2828 2049 779 499 0 361.54562 0.000000 1.000000e+00 1.000000e+00
-## 1 1 2828 2049 779 475 459 344.15665 1.333695 2.289783e-50 2.289783e-49
-## 2 2 2828 2049 779 378 343 273.87624 1.252391 8.889784e-21 1.777957e-20
-## 3 3 2828 2049 779 309 293 223.88296 1.308720 2.712597e-26 9.041991e-26
-## 4 4 2828 2049 779 296 272 214.46393 1.268279 2.060936e-18 3.434894e-18
-## 5 5 2828 2049 779 257 253 186.20686 1.358704 5.986769e-32 2.993384e-31
-## 6 6 2828 2049 779 185 145 134.03996 1.081767 3.528719e-02 4.410899e-02
-## 7 7 2828 2049 779 172 169 124.62093 1.356112 6.593125e-21 1.648281e-20
-## 8 8 2828 2049 779 142 115 102.88472 1.117756 1.066766e-02 1.523951e-02
-## 9 9 2828 2049 779 115 0 83.32214 0.000000 1.000000e+00 1.000000e+00
+## 0 0 2831 2056 775 588 547 427.03214 1.280934 4.898697e-43 2.204414e-42
+## 1 1 2831 2056 775 538 516 390.71989 1.320639 4.370181e-53 3.933163e-52
+## 2 2 2831 2056 775 500 0 363.12257 0.000000 1.000000e+00 1.000000e+00
+## 3 3 2831 2056 775 331 299 240.38714 1.243827 4.638303e-17 8.348946e-17
+## 4 4 2831 2056 775 257 255 186.64500 1.366230 1.095704e-34 3.287111e-34
+## 5 5 2831 2056 775 186 148 135.08160 1.095634 1.546645e-02 1.988543e-02
+## 6 6 2831 2056 775 176 174 127.81915 1.361298 1.102600e-22 2.480850e-22
+## 7 7 2831 2056 775 140 117 101.67432 1.150733 1.320545e-03 1.980817e-03
+## 8 8 2831 2056 775 115 0 83.51819 0.000000 1.000000e+00 1.000000e+00
The enrichment analysis results appearing in these tables can be
easily extracted and organized in a clusters-by-values table by means of
extract_cluster_enrichment_table
. For instance, here we
@@ -654,39 +763,16 @@
## Angiogenesis Apoptosis Cell_Cycle Differentiation DNA_damage DNA_repair
-## 0 0.946014127 0.00879397 0.003722084 0.02250000 0.690090090 0.004201681
-## 1 0.989909183 0.01470588 1.000000000 0.11800000 0.102102102 1.000000000
-## 2 1.000000000 0.64049414 0.032258065 1.00000000 1.000000000 1.000000000
-## 3 1.000000000 0.09803922 0.032258065 1.00000000 1.000000000 1.000000000
-## 4 1.000000000 0.23743719 0.032258065 1.00000000 1.000000000 1.000000000
-## 5 0.823410696 0.88819095 0.058064516 1.00000000 0.955955956 1.000000000
-## 6 1.000000000 0.01172529 0.579404467 1.00000000 0.009009009 0.111344538
-## 7 0.009081736 0.01470588 0.048387097 0.02666667 1.000000000 0.031250000
-## 8 0.031786075 0.76331658 0.967741935 0.01000000 0.009009009 0.110644258
-## 9 0.992936428 0.00879397 0.003722084 0.01000000 0.315315315 0.004201681
-## EMT Hypoxia Inflammation Invasion Metastasis Proliferation
-## 0 1.000000000 1.000000000 1.000000000 0.007021063 1.000000000 0.005035247
-## 1 0.013513514 0.006024096 0.006079027 0.968906720 0.006024096 1.000000000
-## 2 0.951351351 0.013253012 1.000000000 1.000000000 0.075301205 1.000000000
-## 3 0.012012012 0.952811245 1.000000000 1.000000000 0.065060241 1.000000000
-## 4 0.416666667 0.006024096 0.030395137 1.000000000 0.201807229 0.005035247
-## 5 0.953453453 0.009036145 0.167173252 0.937311936 0.008032129 1.000000000
-## 6 1.000000000 1.000000000 0.954407295 0.546238716 1.000000000 0.992950655
-## 7 0.009009009 0.006024096 0.036474164 0.252758275 0.006024096 1.000000000
-## 8 1.000000000 1.000000000 1.000000000 0.235205617 1.000000000 0.308408862
-## 9 1.000000000 1.000000000 1.000000000 0.007021063 1.000000000 0.005035247
-## Quiescence Stemness
-## 0 1.000000000 0.007021063
-## 1 0.004454343 1.000000000
-## 2 0.040000000 1.000000000
-## 3 1.000000000 1.000000000
-## 4 0.004454343 0.007021063
-## 5 1.000000000 1.000000000
-## 6 0.040000000 0.007021063
-## 7 0.129175947 1.000000000
-## 8 0.534521158 0.007021063
-## 9 0.040000000 0.007021063
+## Angiogenesis
+## 0 NA
+## 1 0.80000000
+## 2 1.00000000
+## 3 NA
+## 4 0.78624535
+## 5 0.66022305
+## 6 0.01204819
+## 7 0.02272727
+## 8 1.00000000
These tables can be plotted with the function
plot_heatmap_features_by_clusters
. In the following example
we plot NES values with asterisks according to their significance:
Similarly, it’s possible to extract the most significant “tags” of any clusters; for example, here we extract the top 3 tags by FDRq (CSEA) and p_adj (ORA):
@@ -712,22 +798,22 @@
head(scML$cluster_data$global_expr$cluster_tags$CSEA)
## $`0`
-## [1] "Cell_Cycle" "DNA_repair" "Invasion"
+## [1] "Cell_Cycle"
##
## $`1`
-## [1] "Apoptosis" "Inflammation" "Hypoxia"
+## [1] "Hypoxia" "Inflammation" "Metastasis"
##
## $`2`
-## [1] "Hypoxia" "Metastasis" "EMT"
+## [1] "DNA_repair" "Invasion" "Proliferation"
##
## $`3`
-## [1] "EMT" "Metastasis" "Apoptosis"
+## [1] "Hypoxia" "Proliferation" "Quiescence"
##
## $`4`
-## [1] "Quiescence" "Hypoxia" "Stemness"
+## [1] "Hypoxia" "Metastasis" "Cell_Cycle"
##
## $`5`
-## [1] "Metastasis" "Hypoxia" "Inflammation"
+## [1] "DNA_damage" "Stemness" "DNA_repair"
The results of cluster enrichment can be visualized by barplots and boxplots for, respectively, categorical values and quantitative values.
@@ -739,7 +825,7 @@
head(scML$cluster_comparison$overlap_matrix)
## CNV_0 CNV_1 CNV_2 CNV_3 CNV_4
-## CNV_0 1.000000000 0.000000000 0.0000000 0.0000000 0.000000000
-## CNV_1 0.000000000 1.000000000 0.0000000 0.0000000 0.000000000
-## CNV_2 0.000000000 0.000000000 1.0000000 0.0000000 0.000000000
-## CNV_3 0.000000000 0.000000000 0.0000000 1.0000000 0.000000000
-## CNV_4 0.000000000 0.000000000 0.0000000 0.0000000 1.000000000
-## global_expr_0 0.004008016 0.004008016 0.5651303 0.5261845 0.008264463
-## global_expr_0 global_expr_1 global_expr_2 global_expr_3
-## CNV_0 0.004008016 0.6042105 0.394179894 0.372168285
-## CNV_1 0.004008016 0.3368421 0.531746032 0.559870550
-## CNV_2 0.565130261 0.0000000 0.007936508 0.006472492
-## CNV_3 0.526184539 0.0000000 0.002645503 0.000000000
-## CNV_4 0.008264463 0.1157025 0.099173554 0.078512397
-## global_expr_0 1.000000000 0.0000000 0.000000000 0.000000000
-## global_expr_4 global_expr_5 global_expr_6 global_expr_7
-## CNV_0 0.14189189 0.657587549 0.00000000 0.7674419
-## CNV_1 0.19594595 0.334630350 0.01621622 0.2325581
-## CNV_2 0.09459459 0.000000000 0.74054054 0.0000000
-## CNV_3 0.02702703 0.000000000 0.22702703 0.0000000
-## CNV_4 0.66115702 0.008264463 0.01621622 0.0000000
-## global_expr_0 0.00000000 0.000000000 0.00000000 0.0000000
-## global_expr_8 global_expr_9
-## CNV_0 0.007042254 0.000000000
-## CNV_1 0.021126761 0.008695652
-## CNV_2 0.605633803 0.200000000
-## CNV_3 0.345070423 0.782608696
-## CNV_4 0.021126761 0.008695652
-## global_expr_0 0.000000000 0.000000000
+## CNV_0 CNV_1 CNV_2 CNV_3 global_expr_0
+## CNV_0 1.0000000 0.000000000 0.0000000 0.00000000 0.389455782
+## CNV_1 0.0000000 1.000000000 0.0000000 0.00000000 0.005102041
+## CNV_2 0.0000000 0.000000000 1.0000000 0.00000000 0.559523810
+## CNV_3 0.0000000 0.000000000 0.0000000 1.00000000 0.082568807
+## global_expr_0 0.3894558 0.005102041 0.5595238 0.08256881 1.000000000
+## global_expr_1 0.6115242 0.000000000 0.3401487 0.07951070 0.000000000
+## global_expr_1 global_expr_2 global_expr_3 global_expr_4
+## CNV_0 0.6115242 0.00400000 0.12688822 0.665369650
+## CNV_1 0.0000000 0.97800000 0.05740181 0.000000000
+## CNV_2 0.3401487 0.00600000 0.17824773 0.326848249
+## CNV_3 0.0795107 0.01834862 0.64525994 0.007782101
+## global_expr_0 0.0000000 0.00000000 0.00000000 0.000000000
+## global_expr_1 1.0000000 0.00000000 0.00000000 0.000000000
+## global_expr_5 global_expr_6 global_expr_7 global_expr_8
+## CNV_0 0.00000000 0.7386364 0.007142857 0.000000000
+## CNV_1 0.79569892 0.0000000 0.850000000 0.973913043
+## CNV_2 0.02150538 0.2613636 0.007142857 0.008695652
+## CNV_3 0.18279570 0.0000000 0.135714286 0.017391304
+## global_expr_0 0.00000000 0.0000000 0.000000000 0.000000000
+## global_expr_1 0.00000000 0.0000000 0.000000000 0.000000000
Nale V, Di Nanni N, Chiodi A, Mosca E (2023). scMuffin: MUlti-Features INtegrative approach for single-cell data analysis. -R package version 1.1.3. +R package version 1.1.4.
@Manual{, title = {scMuffin: MUlti-Features INtegrative approach for single-cell data analysis}, author = {Valentina Nale and Noemi {Di Nanni} and Alice Chiodi and Ettore Mosca}, year = {2023}, - note = {R package version 1.1.3}, + note = {R package version 1.1.4}, }