diff --git a/DESCRIPTION b/DESCRIPTION index 6717042..7ef57cc 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: scMuffin Title: MUlti-Features INtegrative approach for single-cell data analysis -Version: 1.1.3 -Date: 2023-11-13 +Version: 1.1.4 +Date: 2023-11-22 Authors@R: c(person(given = "Valentina", family = "Nale", @@ -52,7 +52,8 @@ Imports: circlize, Matrix, pals, - plotrix + plotrix, + qvalue Suggests: rmarkdown, knitr, diff --git a/NAMESPACE b/NAMESPACE index f4a16db..f05482e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(create_scMuffinList) export(csea) export(detect_CNV_regions) export(diff_map) +export(es) export(extract_cluster_enrichment_table) export(extract_cluster_enrichment_tags) export(filter_gsl) @@ -90,6 +91,7 @@ importFrom(org.Hs.eg.db,org.Hs.egSYMBOL) importFrom(pals,alphabet) importFrom(parallel,mclapply) importFrom(plotrix,thigmophobe.labels) +importFrom(qvalue,qvalue) importFrom(stats,dhyper) importFrom(stats,glm) importFrom(stats,median) diff --git a/NEWS b/NEWS index 9aaa6c7..1b92052 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,5 @@ +version 1.1.4 +- updtaes over csea and vignette version 1.1.3 - improved point text in boxplot_cluster and barplot_cluster version 1.1.2 diff --git a/R/assess_cluster_enrichment.R b/R/assess_cluster_enrichment.R index 5597186..f6b0fc4 100644 --- a/R/assess_cluster_enrichment.R +++ b/R/assess_cluster_enrichment.R @@ -6,16 +6,17 @@ #' @param min.cells.cluster minimum number of cells of a cluster #' @param mc.cores number of cores #' @param csea.k number of CSEA permutations +#' @param min.k minimum number of valid permutations to support empirical nulls #' @description Assess cluster enrichment using ORA for categorical features and CSEA for numeric features. #' @details -#' The output of CSEA is a table with statistics for every tested gene set (gs_table element) and a table with the results of the leading edge (leading_edge element). +#' The output of CSEA is a table with statistics for every tested gene set. #' The output of ORA is composed of a series of tables with enrichment results, one for every possible categorical value. See [extract_cluster_enrichment_table] to extract summary table from CSEA and ORA results. #' #' @return scMuffinList with CSEA or ORA elements under scMuffinList$cluster_data for the considered partition #' @importFrom stats setNames #' @export -assess_cluster_enrichment <- function(scMuffinList=NULL, feature_name=NULL, partition_id=NULL, min.cells.feature=100, min.cells.cluster=10, mc.cores=1, csea.k=99){ +assess_cluster_enrichment <- function(scMuffinList=NULL, feature_name=NULL, partition_id=NULL, min.cells.feature=100, min.cells.cluster=10, mc.cores=1, csea.k=99, min.k=10){ if(length(scMuffinList[[feature_name]]$summary) == 0){ stop("Can't find scMuffinList[[feature_name]]$summary\n") @@ -55,7 +56,7 @@ assess_cluster_enrichment <- function(scMuffinList=NULL, feature_name=NULL, part cat("CSEA for ", ncol(X), "features\n") - ans$cluster_csea_res <- cluster_csea(X, cell_clusters = X_clusters, min.cells.feature=min.cells.feature, min.cells.cluster=min.cells.cluster, mc.cores=mc.cores, csea.k=csea.k) + ans$cluster_csea_res <- cluster_csea(X, cell_clusters = X_clusters, min.cells.feature=min.cells.feature, min.cells.cluster=min.cells.cluster, mc.cores=mc.cores, csea.k=csea.k, min.k=min.k) if(length(scMuffinList$cluster_data[partition_id])==0){ # nor ORA neither CSEA @@ -69,15 +70,24 @@ assess_cluster_enrichment <- function(scMuffinList=NULL, feature_name=NULL, part } + #shared_names <- which(names(scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table) %in% names(ans$cluster_csea_res$gs_table)) + + # if(length(shared_names)>0){ + # scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table[shared_names] <- NULL + # scMuffinList$cluster_data[[partition_id]]$CSEA$leading_edge[shared_names] <- NULL + # } + # + # scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table <- c(scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table, ans$cluster_csea_res$gs_table) + # scMuffinList$cluster_data[[partition_id]]$CSEA$leading_edge <- c(scMuffinList$cluster_data_full[[partition_id]]$CSEA$leading_edge, ans$cluster_csea_res$leading_edge) ##append + + shared_names <- which(names(scMuffinList$cluster_data[[partition_id]]$CSEA) %in% names(ans$cluster_csea_res)) - shared_names <- which(names(scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table) %in% names(ans$cluster_csea_res$gs_table)) if(length(shared_names)>0){ - scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table[shared_names] <- NULL - scMuffinList$cluster_data[[partition_id]]$CSEA$leading_edge[shared_names] <- NULL + scMuffinList$cluster_data[[partition_id]]$CSEA[shared_names] <- NULL } - - scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table <- c(scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table, ans$cluster_csea_res$gs_table) - scMuffinList$cluster_data[[partition_id]]$CSEA$leading_edge <- c(scMuffinList$cluster_data_full[[partition_id]]$CSEA$leading_edge, ans$cluster_csea_res$leading_edge) ##append + + scMuffinList$cluster_data[[partition_id]]$CSEA <- c(scMuffinList$cluster_data[[partition_id]]$CSEA, ans$cluster_csea_res) + } } diff --git a/R/calc_gs_perm.R b/R/calc_gs_perm.R index 7267d10..ff71d9b 100644 --- a/R/calc_gs_perm.R +++ b/R/calc_gs_perm.R @@ -4,14 +4,12 @@ #' @param gs gene set #' @description Calculate permutations -calc_gs_perm <- function(rll, perm, gs){ - - #out <- unlist(lapply(rll, function(x) es(which(perm %in% gs), array(x, dimnames=list(perm))))) +calc_gs_perm <- function(rll=NULL, perm=NULL, gs=NULL){ out <- setNames(numeric(length(rll)), names(rll)) for(i in 1:length(rll)){ - out[i] <- es(which(perm[[i]] %in% gs), array(rll[[i]], dimnames=list(perm[[i]]))) + out[i] <- es(which(perm[[i]] %in% gs), array(rll[[i]], dimnames=list(perm[[i]])))[, 1] } return(out) diff --git a/R/cluster_csea.R b/R/cluster_csea.R index fe7f399..c8a3010 100644 --- a/R/cluster_csea.R +++ b/R/cluster_csea.R @@ -6,13 +6,13 @@ #' @param min.cells.cluster minimum number of cells of a cluster #' @param mc.cores number of cores #' @param csea.k number of permutations -#' @param min.k.nes minimum number of not null NES values +#' @param min.k minimum number of valid permutations to support empirical nulls #' @return list with two elements: gs_table and leading_edge. See [csea()] #' @export #' @description Calculate cluster enrichment by csea approach -cluster_csea <- function(feature_values=NULL, cell_clusters=NULL, min.cells.feature=100, min.cells.cluster=10, mc.cores=1, csea.k=99, min.k.nes=10){ +cluster_csea <- function(feature_values=NULL, cell_clusters=NULL, min.cells.feature=100, min.cells.cluster=10, mc.cores=1, csea.k=99, min.k=10){ X <- as.matrix(feature_values) @@ -35,25 +35,12 @@ cluster_csea <- function(feature_values=NULL, cell_clusters=NULL, min.cells.feat #CSEA process on features-by-cells gsl <- lapply(split(cell_cluster_ok, cell_cluster_ok), function(x) names(x)) - csea_res <- csea(X, gsl, mc_cores_perm = mc.cores, ord.mode = rep(-1, ncol(X)), k = csea.k, min.size = min.cells.feature, min.k.nes = min.k.nes) + csea_res <- csea(X, gsl, mc_cores_perm = mc.cores, ord.mode = rep(-1, ncol(X)), k = csea.k, min.size = min.cells.feature, min.k = min.k) # insert a row for excluded clusters if(length(excluded_clusters)>0){ - #debug - cat(">>>>>>DEBUG<<<<<<") - - #gs table - #csea_res$gs_table <- rbind(csea_res$gs_table, data.frame(id=excluded_clusters, es=0, p_val=1, adj_p_val=1, nes=0, FDRq=1, row.names=excluded_clusters, stringsAsFactors = F)) #old - csea_res$gs_table <- lapply(csea_res$gs_table, function(x) rbind(x, data.frame(id=excluded_clusters, es=0, p_val=1, adj_p_val=1, nes=0, FDRq=1, n_pos_perm=0, n_neg_perm=0, row.names=excluded_clusters, stringsAsFactors = F))) - - csea_res$gs_table <- lapply(csea_res$gs_table, function(x) x[match(names(cluster_size), rownames(x)), ]) - - #leading_edge - csea_res$leading_edge <- lapply(csea_res$leading_edge, function(x) rbind(x, data.frame(tags=0, tags_perc=0, list_top=0, list_top_perc=0, lead_edge=0, lead_edge_subset="", row.names=excluded_clusters, stringsAsFactors = F))) - - #csea_res$leading_edge <- csea_res$leading_edge[match(names(cluster_size), rownames(csea_res$leading_edge)), ] #old - csea_res$leading_edge <- lapply(csea_res$leading_edge, function(x) x[match(names(cluster_size), rownames(x)), ]) + csea_res <- lapply(csea_res, function(x) rbind(x, data.frame(id=excluded_clusters, es=NA, p_val=NA, adj_p_val=NA, nes=NA, FDRq=NA, nperm=NA, tags=NA, tags_perc=NA, list_top=NA, list_top_perc=NA, lead_edge=NA, row.names=excluded_clusters, stringsAsFactors = F))) } diff --git a/R/csea.R b/R/csea.R index 67fd831..7b5fdff 100644 --- a/R/csea.R +++ b/R/csea.R @@ -5,10 +5,11 @@ #' @param ord.mode ordering mode: -1 -> descending; 1 ascending; must be of length equal to `ncol(rl)` #' @param mc_cores_path number of cores to use for parallel calculation of gene set lists; the total number of cpu used will be mc_cores_path * mc_cores_perm #' @param mc_cores_perm number of cores to use for parallel calculation of ranked list permutations; the total number of cpu used will be mc_cores_path * mc_cores_perm -#' @param min.k.nes minimum number of not null NES values +#' @param min.k minimum number of valid permutations to support empirical nulls #' @param min.size minimum number of cells with a not null value #' @import parallel #' @importFrom stats p.adjust +#' @importFrom qvalue qvalue #' @return list with two data.frames, gs_table and leading_edge. #` \enumerate{ #` \item {gs_table} a data.frame with: @@ -31,7 +32,7 @@ #' @export -csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cores_path=1, mc_cores_perm=1){ +csea <- function(rl=NULL, gsl=NULL, k=100, min.size=100, ord.mode=-1, min.k=10, mc_cores_path=1, mc_cores_perm=1){ #cheks if(!is.matrix(rl) | !is.numeric(rl)){ @@ -42,7 +43,7 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor stop("length(ord.mode) must be equal to ncol(rl)") } - min.k.nes <- min(min.k.nes, k) + min.k <- min(min.k, k) #create the list of ranked vectors rll <- vector('list', ncol(rl)) @@ -57,17 +58,17 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor cat("Ranked list that passed the checks:", names(rll), "\n") #real es - print("ES...") - #real_es <- lapply(gsl, function(x) lapply(rll, function(y) es(which(names(y) %in% x), y))) - #real_es <- do.call(rbind, real_es) - - real_es_data <- lapply(gsl, function(x) lapply(rll, function(y) es(which(names(y) %in% x), y, le=T))) + cat("ES of input data...\n") + + gsl_size <- lengths(gsl) + + real_es_data <- lapply(gsl, function(x) lapply(rll, function(y) es(which(names(y) %in% x), y))) real_es <- do.call(rbind, lapply(real_es_data, function(x) unlist(lapply(x, function(y) y$es)))) - + leading_edge <- vector("list", length(rll)) names(leading_edge) <- names(rll) for(i in 1:length(leading_edge)){ - leading_edge[[i]] <- do.call(rbind, lapply(real_es_data, function(x) x[[i]]$lea)) + leading_edge[[i]] <- do.call(rbind, lapply(real_es_data, function(x) x[[i]][, -1])) } #permutations @@ -81,37 +82,9 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor x_perm[[i]] <- lapply(rll, function(x) sample(rownames(x), length(x))) } - - print("calculating permutations") - if(mc_cores_path==1){ - if(mc_cores_perm == 1){ - #res <- lapply(gsl, function(x) unlist(lapply(x_perm, function(y) calc_gs_perm(rl, y, x)))) - res <- lapply(gsl, function(x) do.call(rbind, lapply(x_perm, function(y) calc_gs_perm(rll, y, x)))) - }else{ - cat(k, "permutations on", mc_cores_perm, "cores\n") - #res <- lapply(gsl, function(x) unlist(mclapply(x_perm, function(y) calc_gs_perm(rl, y, x), mc.cores=mc_cores_perm))) - res <- lapply(gsl, function(x) do.call(rbind, mclapply(x_perm, function(y) calc_gs_perm(rll, y, x), mc.cores=mc_cores_perm))) - } - }else{ - cat(length(gsl), "gene sets on", mc_cores_path, "cores\n") - if(mc_cores_perm == 1){ - #res <- parallel::mclapply(gsl, function(x) unlist(lapply(x_perm, function(y) calc_gs_perm(rl, y, x))), mc.cores = mc_cores_path) - res <- mclapply(gsl, function(x) do.call(rbind, lapply(x_perm, function(y) calc_gs_perm(rll, y, x))), mc.cores = mc_cores_path) - }else{ - cat(k, "permutations on", mc_cores_perm, "cores\n") - #res <- parallel::mclapply(gsl, function(x) unlist(parallel::mclapply(x_perm, function(y) calc_gs_perm(rl, y, x), mc.cores=mc_cores_perm)), mc.cores = mc_cores_path) - res <- mclapply(gsl, function(x) do.call(rbind, mclapply(x_perm, function(y) calc_gs_perm(rll, y, x), mc.cores=mc_cores_perm)), mc.cores = mc_cores_path) - } - } - - - #list of pathwa-by-k matrices of permuted es - #res <- do.call(rbind, res) - # if(nrow(res) != length(gsl) | ncol(res) != k){ - # stop("not all the permutations returned a correct value\n") - # } - #res <- cbind(real_es, res) - + cat("ES of permutations...\n") + res <- mclapply(gsl, function(x) do.call(rbind, mclapply(x_perm, function(y) calc_gs_perm(rll, y, x), mc.cores=mc_cores_perm)), mc.cores = mc_cores_path) + temp <- vector('list', length(rll)) names(temp) <- colnames(rl) for(i in 1:length(rll)){ @@ -121,8 +94,6 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor rm(temp) - - #statistics print("calculating statistics...") @@ -133,54 +104,94 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor for(i in 1:length(rll)){ - p_val <- apply(res[[i]], 1, function(x) ifelse(x[1] >= 0, sum(x >= x[1]) / length(x), sum(x <= x[1]) / length(x))) + n_pos_perm <- rowSums(res[[i]]>0) + n_neg_perm <- rowSums(res[[i]]<0) + + p_val <- apply(res[[i]], 1, function(x) ifelse(x[1] >= 0, sum(x >= x[1]) / length(x[x>=0]), sum(x <= x[1]) / length(x[x<=0]))) + #p-values for real ES == 0 are set to 1 - p_val[res[[i]][, 1] == 0] <- 1 + #p_val[res[[i]][, 1] == 0] <- 1 + + idx_na <- which(res[[i]][, 1] == 0) + if(length(idx_na)>0){ + cat("\tES==0:\n") + cat("\t", rownames(res[[i]])[idx_na], "\n") + p_val[idx_na] <- NA ### + } + idx_na <- which(res[[i]][, 1] > 0 & n_pos_perm < min.k) + if(length(idx_na)>0){ + cat("\tES>0 but less than", min.k, " positive ES in permutations\n") + cat("\t", rownames(res[[i]])[idx_na], "\n") + p_val[idx_na] <- NA ### + } + idx_na <- which(res[[i]][, 1] < 0 & n_neg_perm < min.k) + if(length(idx_na)>0){ + cat("\tES<0 but less than", min.k, " negative ES in permutations\n") + cat("\t", rownames(res[[i]])[idx_na], "\n") + p_val[idx_na] <- NA ### + } #normalized ES - n_pos_perm <- rowSums(res[[i]]>0) - n_neg_perm <- rowSums(res[[i]]<0) means <- t(apply(res[[i]], 1, function(x) c(mean(x[x>0]), abs(mean(x[x<0]))))) #positive, negative means[is.nan(means)] <- 0 #NaN values are caused by the absence of any positive or negative value nes <- res[[i]] / means[, 1] nes_neg <- res[[i]] / means[, 2] nes[res[[i]] < 0] <- nes_neg[res[[i]] < 0] - nes[is.nan(nes)] <- 0 #NaN values are caused by 0/0 + nes[is.nan(nes)] <- NA #NaN values are caused by 0/0 rm(means, nes_neg) - #if there are not at least min.k.nes the NES is unrelieable - nes[nes>0 & n_pos_perm < min.k.nes] <- 0 - nes[nes<0 & n_neg_perm < min.k.nes] <- 0 + #if there are not at least min.k the NES is unrelieable + nes[nes>0 & n_pos_perm < min.k] <- NA + nes[nes<0 & n_neg_perm < min.k] <- NA #calculate FDR all_nes <- as.numeric(nes) - n_nes_pos <- sum(nes>0) - n_nes_neg <- sum(nes<0) - n_real_nes_pos <- sum(nes[,1] > 0) - n_real_nes_neg <- sum(nes[,1] < 0) - if((n_nes_pos + n_nes_neg) != (k*length(gsl) + nrow(nes))){ - warning('some nes value is equal to zero') - } - + n_nes_pos <- sum(nes>0, na.rm = T) + n_nes_neg <- sum(nes<0, na.rm = T) + n_real_nes_pos <- sum(nes[,1] > 0, na.rm = T) + n_real_nes_neg <- sum(nes[,1] < 0, na.rm = T) + #FDR: NES* > 0: fdrq = #(all positive NESp >= NES*) / #(all positive NESp) / [ #(all NES* >= NES*) / (all positive NES*) ] #FDR: NES* < 0: fdrq = #(all negative NESp <= NES*) / #(all negative NESp) / [ #(all NES* <= NES*) / (all negative NES*) ] - fdrq <- sapply(nes[, 1], function(x) ifelse(x>0, sum(all_nes >= x) / n_nes_pos, sum(all_nes <= x) / n_nes_neg)) - fdrq <- fdrq / sapply(nes[, 1], function(x) ifelse(x>0, sum(nes[, 1] >= x) / n_real_nes_pos, sum(nes[, 1] <= x) / n_real_nes_neg)) + fdrq <- sapply(nes[, 1], function(x) ifelse(x>0, sum(all_nes >= x, na.rm = T) / n_nes_pos, sum(all_nes <= x, na.rm = T) / n_nes_neg)) + fdrq <- fdrq / sapply(nes[, 1], function(x) ifelse(x>0, sum(nes[, 1] >= x, na.rm = T) / n_real_nes_pos, sum(nes[, 1] <= x, na.rm = T) / n_real_nes_neg)) #q of nes that are equal to 0 are set to 1 - fdrq[nes[, 1] == 0] <- 1 + #fdrq[nes[, 1] == 0] <- 1 rm(all_nes) fdrq[fdrq>1] <- 1 - + idx_replace <- which(fdrq=0){ - - tags <- sum(idx <= wm) - list_top <- wm - lead_edge_subset <- paste0(names(x)[intersect(1:wm, idx)], collapse = ";") - - }else{ - - tags <- sum(idx > wm) - list_top <- N - wm - lead_edge_subset <- paste0(names(x)[intersect(wm:N, idx)], collapse = ";") - - } - - tags_perc <- tags / length(idx) - list_top_perc <- list_top / length(x) + + + if(es >=0){ + + tags <- sum(idx <= wm) + list_top <- wm + lead_edge_subset <- paste0(names(x)[intersect(1:wm, idx)], collapse = ";") + + }else{ + + tags <- sum(idx >= wm) + list_top <- N - (wm-1) + lead_edge_subset <- paste0(names(x)[intersect(wm:N, idx)], collapse = ";") + } + + tags_perc <- tags / length(idx) + list_top_perc <- list_top / length(x) + } - - if(le){ - - lead_edge <- tags_perc * (1-list_top_perc) * N / (N - Nh) - - return(list(es=es, lea=data.frame(tags=tags, tags_perc=tags_perc, list_top=list_top, list_top_perc=list_top_perc, lead_edge=lead_edge, lead_edge_subset=lead_edge_subset, stringsAsFactors = F))) - - }else{ - - return(es) - } - - - - + + lead_edge <- tags_perc * (1-list_top_perc) * N / (N - Nh) + + return(data.frame(es=es, tags=tags, tags_perc=tags_perc, list_top=list_top, list_top_perc=list_top_perc, lead_edge=lead_edge, lead_edge_subset=lead_edge_subset, stringsAsFactors = F)) + } diff --git a/R/extract_cluster_enrichment_table.R b/R/extract_cluster_enrichment_table.R index 91f3d83..4b59e34 100644 --- a/R/extract_cluster_enrichment_table.R +++ b/R/extract_cluster_enrichment_table.R @@ -3,7 +3,7 @@ #' @param partition_id id of the partition to be used #' @param type "CSEA" or "ORA" #' @param quantity possible values are as follows. CSEA: es, enrichment score; p_val empirical p-value; adj_p_val, FDR (BH); nes normalized enrichment score; FDRq empirical FDR. ORA: wbd (white balls drawn), number of cells with the value of interest in the cluster; exp expected number of cells with the value of interest in the cluster; er, enrichment ratio; p, hypergeometric test p-value; p_adj, BH FDR -#' @param feature_id name of the features that will be considered. Must be any of `colnames(scMuffinList$cluster_data$[[parition_id]]$CSEA$gs_table)` or `names(scMuffinList$cluster_data$[[parition_id]]$ORA)` +#' @param feature_id name of the features that will be considered. Must be any of `colnames(scMuffinList$cluster_data$[[parition_id]]$CSEA)` or `names(scMuffinList$cluster_data$[[parition_id]]$ORA)` #' @description Extract cluster enrichment table with the desired quantity from the results generated by means of [assess_cluster_enrichment()] #' @return A cluster enrichment table with the desired quantity #' @importFrom stats setNames @@ -23,11 +23,13 @@ extract_cluster_enrichment_table <- function(scMuffinList, partition_id=NULL, ty if(type == "CSEA" & !is.null(scMuffinList$cluster_data[[partition_id]]$CSEA)){ - ans <- scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table + #ans <- scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table + ans <- scMuffinList$cluster_data[[partition_id]]$CSEA if(!is.null(feature_id)){ if(!any(names(ans) %in% feature_id)){ - stop("Can't find any names of scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table equal to", feature_id, "\n") + #stop("Can't find any names of scMuffinList$cluster_data[[partition_id]]$CSEA$gs_table equal to", feature_id, "\n") + stop("Can't find any names of scMuffinList$cluster_data[[partition_id]]$CSEA equal to", feature_id, "\n") } ans <- ans[names(ans) %in% feature_id] diff --git a/R/plot_heatmap_dataset_comparison.R b/R/plot_heatmap_dataset_comparison.R index 1ac2614..c902c4f 100644 --- a/R/plot_heatmap_dataset_comparison.R +++ b/R/plot_heatmap_dataset_comparison.R @@ -7,12 +7,13 @@ #' @param res image res #' @param outfile File name to save the figure as png file #' @param show.gs.source whether to show or not gene set source +#' @param na_col color for NA values #' @param ... arguments passed to ComplexHeatmap::Heatmap #' @export #' @import ComplexHeatmap pals #' @importFrom circlize colorRamp2 -plot_heatmap_dataset_comparison <- function(dataset_cmp_list=NULL, type="score", show.gs.source=FALSE, outfile=NULL, width=200, height=200, units="mm", res=300, ...){ +plot_heatmap_dataset_comparison <- function(dataset_cmp_list=NULL, type="score", show.gs.source=FALSE, outfile=NULL, width=200, height=200, units="mm", res=300, na_col="black", ...){ type <- match.arg(type, c("score", "significance")) if(!is.null(outfile)){ @@ -31,11 +32,11 @@ plot_heatmap_dataset_comparison <- function(dataset_cmp_list=NULL, type="score", M <- as.matrix(M) X <- adj_outliers_col(as.numeric(M)) - if(any(X<0)){ - X_extreme <- max(abs(min(X)), max(X)) + if(any(X<0, na.rm = T)){ + X_extreme <- max(abs(min(X, na.rm = T)), max(X, na.rm = T), na.rm = T) X_extreme <- c(-X_extreme, X_extreme) }else{ - X_extreme <- max(X) + X_extreme <- max(X, na.rm = T) X_extreme <- c(0, X_extreme) } @@ -49,7 +50,7 @@ plot_heatmap_dataset_comparison <- function(dataset_cmp_list=NULL, type="score", ca <- columnAnnotation(markers=ca, col=list(markers=setNames(brewer.pastel1(length(levels(ca)))[as.numeric(ca)], ca))) } - hm <- Heatmap(as.matrix(M), col=col_fun, right_annotation = ra, top_annotation = ca, name = name, ...) + hm <- Heatmap(as.matrix(M), col=col_fun, right_annotation = ra, top_annotation = ca, name = name, na_col=na_col, ...) draw(hm) if(!is.null(outfile)){ diff --git a/R/plot_heatmap_features_by_clusters.R b/R/plot_heatmap_features_by_clusters.R index 3e29220..4745c99 100644 --- a/R/plot_heatmap_features_by_clusters.R +++ b/R/plot_heatmap_features_by_clusters.R @@ -11,25 +11,28 @@ #' @param partition_id identifier of the partition to be considered #' @param scale whether to scale the features #' @param pal color palette. Default to rev(pals::brewer.rdylbu(10)) (negative values) or pals::brewer.ylorrd(5)) (positive values) +#' @param na_col color for NA values #' @param ... further arguments to ComplexHeatmap::Heatmap #' @export #' @import ComplexHeatmap grDevices pals #' @importFrom circlize colorRamp2 -plot_heatmap_features_by_clusters <- function(scMuffinList=NULL, feature_source=NULL, partition_id=NULL, significance_matrix=NULL, sig_threshold=0.05, file=NULL, width=180, height=180, units="mm", res=300, scale=TRUE, pal=NULL, ...){ +plot_heatmap_features_by_clusters <- function(scMuffinList=NULL, feature_source=NULL, partition_id=NULL, significance_matrix=NULL, sig_threshold=0.05, file=NULL, width=180, height=180, units="mm", res=300, scale=TRUE, pal=NULL, na_col="black", ...){ if(!is.null(significance_matrix)){ cell_fun_asterisk <- function(j, i, x, y, w, h, fill) { - if(sig_mat[i, j] < sig_threshold) { - grid.text("*", x, y) + if(!is.na(sig_mat[i, j])){ + if(sig_mat[i, j] < sig_threshold) { + grid.text("*", x, y) + } } } }else{ cell_fun_asterisk <- NULL } - + if(is.matrix(feature_source)){ X <- t(feature_source) }else{ @@ -51,7 +54,7 @@ plot_heatmap_features_by_clusters <- function(scMuffinList=NULL, feature_source= } } } - + #Detect and remove null features null_rows <- which(rowSums(abs(X), na.rm = T)==0) if(length(null_rows) > 0){ @@ -90,21 +93,21 @@ plot_heatmap_features_by_clusters <- function(scMuffinList=NULL, feature_source= } if(!is.null(file)){ - png(file, width=width, height=height, units=units, res=res) + png(file, width=width, height=height, units=units, res=res) } if(scale){ X <- t(scale(t(X))) } - h_tot_go <- Heatmap(X, show_row_names = T, cell_fun = cell_fun_asterisk, col=pal, ...) + h_tot_go <- Heatmap(X, show_row_names = T, cell_fun = cell_fun_asterisk, col=pal, na_col=na_col, ...) draw(h_tot_go, heatmap_legend_side = "left") if(!is.null(file)){ dev.off() } - + }else{ message("Are all rows empty?\n") } diff --git a/R/scMuffinList_demo.R b/R/scML_demo.R similarity index 61% rename from R/scMuffinList_demo.R rename to R/scML_demo.R index 133fb26..b2b3122 100644 --- a/R/scMuffinList_demo.R +++ b/R/scML_demo.R @@ -1,3 +1,3 @@ -#' scMuffinList_demo +#' scML_demo #' scMuffinList object with a partial dataset used in vignettes. -"scMuffinList_demo" \ No newline at end of file +"scML_demo" \ No newline at end of file diff --git a/data/scML_demo.rda b/data/scML_demo.rda new file mode 100644 index 0000000..45d4371 Binary files /dev/null and b/data/scML_demo.rda differ diff --git a/data/scMuffinList_demo.rda b/data/scMuffinList_demo.rda deleted file mode 100644 index 15d7bf4..0000000 Binary files a/data/scMuffinList_demo.rda and /dev/null differ diff --git a/docs/404.html b/docs/404.html index c54f415..7466335 100644 --- a/docs/404.html +++ b/docs/404.html @@ -24,7 +24,7 @@ scMuffin - 1.1.3 + 1.1.4 + + + + + +
+
+
+ +
+

scML_demo +scMuffinList object with a partial dataset used in vignettes.

+
+ +
+

Usage

+
scML_demo
+
+ +
+

Format

+

An object of class list of length 9.

+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/docs/reference/sc_create_null.html b/docs/reference/sc_create_null.html index f20f31a..18d8fa6 100644 --- a/docs/reference/sc_create_null.html +++ b/docs/reference/sc_create_null.html @@ -10,7 +10,7 @@ scMuffin - 1.1.3 + 1.1.4