diff --git a/DESCRIPTION b/DESCRIPTION index 1b5c2843..f6fae51b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: SCP Type: Package Title: Single Cell Pipeline -Version: 0.5.2 +Version: 0.5.3 Author: Hao Zhang Maintainer: Hao Zhang Description: An end-to-end Single-Cell Pipeline designed to facilitate comprehensive analysis and exploration of single-cell data. diff --git a/R/SCP-analysis.R b/R/SCP-analysis.R index 9cc13f2a..c6aabf5e 100644 --- a/R/SCP-analysis.R +++ b/R/SCP-analysis.R @@ -1814,7 +1814,7 @@ ListDB <- function(species = "Homo_sapiens", #' Default is c("GO", "GO_BP", "GO_CC", "GO_MF", "KEGG", "WikiPathway", "Reactome", #' "CORUM", "MP", "DO", "HPO", "PFAM", "CSPA", "Surfaceome", "SPRomeDB", "VerSeDa", #' "TFLink", "hTFtarget", "TRRUST", "JASPAR", "ENCODE", "MSigDB", -#' "CellChat", "Chromosome", "GeneType", "Enzyme", "TF"). +#' "CellTalk", "CellChat", "Chromosome", "GeneType", "Enzyme", "TF"). #' @param db_IDtypes A character vector specifying the desired ID types to be used for gene identifiers in the gene annotation databases. Default is c("symbol", "entrez_id", "ensembl_id"). #' @param db_version A character vector specifying the version of the gene annotation databases to be retrieved. Default is "latest". #' @param db_update A logical value indicating whether the gene annotation databases should be forcefully updated. If set to FALSE, the function will attempt to load the cached databases instead. Default is FALSE. @@ -1901,7 +1901,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), "CORUM", "MP", "DO", "HPO", "PFAM", "CSPA", "Surfaceome", "SPRomeDB", "VerSeDa", "TFLink", "hTFtarget", "TRRUST", "JASPAR", "ENCODE", - "MSigDB", "CellChat", + "MSigDB", "CellTalk", "CellChat", "Chromosome", "GeneType", "Enzyme", "TF" ), db_IDtypes = c("symbol", "entrez_id", "ensembl_id"), @@ -1913,15 +1913,14 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), db_list <- list() for (sps in species) { message("Species: ", sps) - default_IDtypes <- c( + default_IDtypes <- list( "GO" = "entrez_id", "GO_BP" = "entrez_id", "GO_CC" = "entrez_id", "GO_MF" = "entrez_id", "KEGG" = "entrez_id", "WikiPathway" = "entrez_id", "Reactome" = "entrez_id", "CORUM" = "symbol", "MP" = "symbol", "DO" = "symbol", "HPO" = "symbol", "PFAM" = "entrez_id", "Chromosome" = "entrez_id", "GeneType" = "entrez_id", "Enzyme" = "entrez_id", "TF" = "symbol", "CSPA" = "symbol", "Surfaceome" = "symbol", "SPRomeDB" = "entrez_id", "VerSeDa" = "symbol", "TFLink" = "symbol", "hTFtarget" = "symbol", "TRRUST" = "symbol", "JASPAR" = "symbol", "ENCODE" = "symbol", - "MSigDB" = "symbol", "CellChat" = "symbol" - # "CellTalk" = "symbol", "DGI" = "entrez_id" + "MSigDB" = c("symbol", "ensembl_id"), "CellTalk" = "symbol", "CellChat" = "symbol" ) if (!is.null(custom_TERM2GENE)) { if (length(db) > 1) { @@ -1931,7 +1930,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), stop("When building a custom database, 'custom_IDtype', 'custom_species' and 'custom_version' must be provided.") } custom_IDtype <- match.arg(custom_IDtype, choices = c("symbol", "entrez_id", "ensembl_id")) - default_IDtypes[db] <- custom_IDtype + default_IDtypes[[db]] <- custom_IDtype } if (isFALSE(db_update) && is.null(custom_TERM2GENE)) { for (term in db) { @@ -2022,7 +2021,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), if (subterm == "GO") { TERM2GENE <- bg[, c("GOALL", org_key)] TERM2NAME <- bg[, c("GOALL", "TERM")] - colnames(TERM2GENE) <- c("Term", default_IDtypes[subterm]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[[subterm]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2NAME[["ONTOLOGY"]] <- bg[["ONTOLOGYALL"]] semData <- NULL @@ -2030,7 +2029,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), simpleterm <- unlist(strsplit(subterm, split = "_"))[2] TERM2GENE <- bg[which(bg[["ONTOLOGYALL"]] %in% simpleterm), c("GOALL", org_key)] TERM2NAME <- bg[which(bg[["ONTOLOGYALL"]] %in% simpleterm), c("GOALL", "TERM")] - colnames(TERM2GENE) <- c("Term", default_IDtypes[subterm]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[[subterm]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2NAME[["ONTOLOGY"]] <- simpleterm semData <- suppressMessages(GOSemSim::godata(orgdb, ont = simpleterm)) @@ -2088,7 +2087,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2NAME[, "Name"] <- gsub(pattern = paste0(" - ", paste0(unlist(strsplit(db_species["KEGG"], split = "_")), collapse = " "), ".*$"), replacement = "", x = TERM2NAME[, "Name"]) TERM2NAME <- TERM2NAME[TERM2NAME[, "Pathway"] %in% TERM2GENE[, "Pathway"], , drop = FALSE] - colnames(TERM2GENE) <- c("Term", default_IDtypes["KEGG"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["KEGG"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2126,7 +2125,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), warning("Use the human annotation to create the WikiPathway database for ", sps, immediate. = TRUE) db_species["WikiPathway"] <- "Homo_sapiens" wiki_sp <- "Homo_sapiens" - gmtfile <- gmtfiles[grep(wiki_sp, gmtfile, fixed = TRUE)] + gmtfile <- gmtfiles[grep(wiki_sp, gmtfiles, fixed = TRUE)] } else { stop("Stop the preparation.") } @@ -2144,7 +2143,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), bg <- do.call(rbind.data.frame, wiki_gmt) TERM2GENE <- bg[, c(1, 2)] TERM2NAME <- bg[, c(1, 3)] - colnames(TERM2GENE) <- c("Term", default_IDtypes["WikiPathway"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["WikiPathway"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2182,7 +2181,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), df$PATHNAME <- gsub(x = df$PATHNAME, pattern = paste0("^", reactome_sp, ": "), replacement = "", perl = TRUE) TERM2GENE <- df[, c(2, 1)] TERM2NAME <- df[, c(2, 3)] - colnames(TERM2GENE) <- c("Term", default_IDtypes["Reactome"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["Reactome"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2217,7 +2216,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2GENE <- read.gmt(gsub(".gz", "", temp)) version <- "Harmonizome 3.0" TERM2NAME <- TERM2GENE[, c(1, 1)] - colnames(TERM2GENE) <- c("Term", default_IDtypes["CORUM"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["CORUM"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2314,7 +2313,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2GENE <- mp_gene[, c("V5", "symbol")] TERM2NAME <- mp_gene[, c("V5", "MP")] - colnames(TERM2GENE) <- c("Term", default_IDtypes["MP"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["MP"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2352,7 +2351,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), } TERM2GENE <- do_df[, c("DOID", "DBObjectSymbol")] TERM2NAME <- do_df[, c("DOID", "DOtermName")] - colnames(TERM2GENE) <- c("Term", default_IDtypes["DO"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["DO"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2390,7 +2389,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2GENE <- hpo[, c("hpo_id", "gene_symbol")] TERM2NAME <- hpo[, c("hpo_id", "hpo_name")] - colnames(TERM2GENE) <- c("Term", default_IDtypes["HPO"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["HPO"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2419,7 +2418,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), bg[is.na(bg[["PFAM_name"]]), "PFAM_name"] <- bg[is.na(bg[["PFAM_name"]]), "PFAM"] TERM2GENE <- bg[, c("PFAM", org_key)] TERM2NAME <- bg[, c("PFAM", "PFAM_name")] - colnames(TERM2GENE) <- c("Term", default_IDtypes["PFAM"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["PFAM"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2444,7 +2443,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), chr[, 2] <- paste0("chr", chr[, 2]) TERM2GENE <- chr[, c(2, 1)] TERM2NAME <- chr[, c(2, 2)] - colnames(TERM2GENE) <- c("Term", default_IDtypes["Chromosome"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["Chromosome"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2469,7 +2468,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), bg <- suppressMessages(select(orgdb, keys = keys(orgdb), columns = c("GENETYPE", org_key))) TERM2GENE <- bg[, c("GENETYPE", org_key)] TERM2NAME <- bg[, c("GENETYPE", "GENETYPE")] - colnames(TERM2GENE) <- c("Term", default_IDtypes["GeneType"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["GeneType"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2516,7 +2515,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), bg[, "Name"] <- enzyme[bg[, "ENZYME"], 2] TERM2GENE <- bg[, c("ENZYME", org_key)] TERM2NAME <- bg[, c("ENZYME", "Name")] - colnames(TERM2GENE) <- c("Term", default_IDtypes["Enzyme"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["Enzyme"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2596,7 +2595,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2GENE <- rbind(data.frame("Term" = "TF", "symbol" = tf[["Symbol"]]), data.frame("Term" = "TF cofactor", "symbol" = tfco[["Symbol"]])) TERM2NAME <- data.frame("Term" = c("TF", "TF cofactor"), "Name" = c("TF", "TF cofactor")) - colnames(TERM2GENE) <- c("Term", default_IDtypes["TF"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["TF"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2635,7 +2634,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), ), , drop = FALSE] TERM2GENE <- data.frame("Term" = "SurfaceProtein", "symbol" = surfacepro[["ENTREZ.gene.symbol"]]) TERM2NAME <- data.frame("Term" = "SurfaceProtein", "Name" = "SurfaceProtein") - colnames(TERM2GENE) <- c("Term", default_IDtypes["CSPA"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["CSPA"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2671,7 +2670,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), unlink(temp) TERM2GENE <- data.frame("Term" = "SurfaceProtein", "symbol" = surfaceome[["UniProt.gene"]]) TERM2NAME <- data.frame("Term" = "SurfaceProtein", "Name" = "SurfaceProtein") - colnames(TERM2GENE) <- c("Term", default_IDtypes["Surfaceome"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["Surfaceome"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2706,7 +2705,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), unlink(temp) TERM2GENE <- data.frame("Term" = "SecretoryProtein", "entrez_id" = unlist(strsplit(spromedb$Gene_ID, ";"))) TERM2NAME <- data.frame("Term" = "SecretoryProtein", "Name" = "SecretoryProtein") - colnames(TERM2GENE) <- c("Term", default_IDtypes["SPRomeDB"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["SPRomeDB"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2747,11 +2746,11 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), close(con) unlink(temp) verseda <- verseda[grep("^>", verseda)] - verseda <- gsub("^>", "", verseda) - verseda_id <- GeneConvert(geneID = verseda, geneID_from_IDtype = "ensembl_peptide_id", geneID_to_IDtype = "symbol", species_from = db_species["VerSeDa"]) + verseda <- gsub("^>|\\.\\d+", "", verseda) + verseda_id <- GeneConvert(geneID = verseda, geneID_from_IDtype = c("ensembl_peptide_id", "refseq_peptide", "refseq_peptide_predicted", "uniprot_isoform", "uniprotswissprot", "uniprotsptrembl"), geneID_to_IDtype = "symbol", species_from = db_species["VerSeDa"]) TERM2GENE <- data.frame("Term" = "SecretoryProtein", "symbol" = unique(verseda_id$geneID_expand$symbol)) TERM2NAME <- data.frame("Term" = "SecretoryProtein", "Name" = "SecretoryProtein") - colnames(TERM2GENE) <- c("Term", default_IDtypes["VerSeDa"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["VerSeDa"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2786,7 +2785,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2GENE <- read.gmt(temp) version <- "v1.0" TERM2NAME <- TERM2GENE[, c(1, 1)] - colnames(TERM2GENE) <- c("Term", default_IDtypes["TFLink"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["TFLink"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2819,7 +2818,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2GENE <- read.table(temp, header = TRUE, fill = T, sep = "\t") version <- "v1.0" TERM2NAME <- TERM2GENE[, c(1, 1)] - colnames(TERM2GENE) <- c("Term", default_IDtypes["hTFtarget"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["hTFtarget"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2838,8 +2837,8 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), if (any(db == "TRRUST") && (!"TRRUST" %in% names(db_list[[sps]]))) { if (!sps %in% c("Homo_sapiens", "Mus_musculus")) { if (isTRUE(convert_species)) { - warning("Use the human annotation to create the hTFtarget database for ", sps, immediate. = TRUE) - db_species["hTFtarget"] <- "Homo_sapiens" + warning("Use the human annotation to create the TRRUST database for ", sps, immediate. = TRUE) + db_species["TRRUST"] <- "Homo_sapiens" } else { warning("hTFtarget database only support Homo_sapiens and Mus_musculus. Consider using convert_species=TRUE", immediate. = TRUE) stop("Stop the preparation.") @@ -2855,7 +2854,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2GENE <- read.table(temp, header = FALSE, fill = T, sep = "\t")[, 1:2] version <- "v2.0" TERM2NAME <- TERM2GENE[, c(1, 1)] - colnames(TERM2GENE) <- c("Term", default_IDtypes["TRRUST"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["TRRUST"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2889,7 +2888,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2GENE <- read.gmt(gsub(".gz", "", temp)) version <- "Harmonizome 3.0" TERM2NAME <- TERM2GENE[, c(1, 1)] - colnames(TERM2GENE) <- c("Term", default_IDtypes["JASPAR"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["JASPAR"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2923,7 +2922,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2GENE <- read.gmt(gsub(".gz", "", temp)) version <- "Harmonizome 3.0" TERM2NAME <- TERM2GENE[, c(1, 1)] - colnames(TERM2GENE) <- c("Term", default_IDtypes["ENCODE"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["ENCODE"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -2961,22 +2960,21 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), ), version)] version <- version[length(version)] version <- regmatches(version, m = regexpr("(?<=href\\=\")\\S+(?=/\"\\>)", version, perl = TRUE)) + # version="2023.1.Hs" "2023.2.Hs" url <- paste0("https://data.broadinstitute.org/gsea-msigdb/msigdb/release/", version, "/msigdb.v", version, ".json") download(url = url, destfile = temp) - lines <- readLines(temp, warn = FALSE) + lines <- paste0(readLines(temp, warn = FALSE), collapse = "") lines <- gsub("\"", "", lines) - genesets <- strsplit(lines, "\\{|\\}") + lines <- gsub("^\\{|\\}$", "", lines) + terms <- strsplit(lines, "\\},")[[1]] term_list <- list() - for (idx in seq_along(genesets)) { - term_content <- genesets[[idx]] - if (length(term_content) == 1) { - next - } - term_name <- trimws(gsub(":|_", " ", term_content[[1]])) - term_id <- trimws(regmatches(term_content[[2]], m = regexpr("(?<=systematicName:)\\S+(?=,pmid)", term_content[[2]], perl = TRUE))) - term_gene <- trimws(regmatches(term_content[[2]], m = regexpr(pattern = "(?<=geneSymbols:\\[)\\S+(?=\\],msigdbURL)", term_content[[2]], perl = TRUE))) - term_collection <- trimws(gsub(".*collection:", "", term_content[[2]])) + for (idx in seq_along(terms)) { + term_content <- terms[[idx]] + term_name <- trimws(gsub(":|_|\\{.*", " ", term_content)) + term_id <- trimws(regmatches(term_content, m = regexpr("(?<=systematicName:)\\S+?(?=,)", term_content, perl = TRUE))) + term_gene <- trimws(regmatches(term_content, m = regexpr(pattern = "(?<=geneSymbols:\\[)\\S+?(?=\\],)", term_content, perl = TRUE))) + term_collection <- trimws(regmatches(term_content, m = regexpr(pattern = "(?<=collection:)\\S+?(?=\\,)", term_content, perl = TRUE))) term_collection <- gsub(":.*", "", term_collection) term_list[[idx]] <- c(id = term_id, "name" = term_name, "gene" = term_gene, "collection" = term_collection) } @@ -2987,7 +2985,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2NAME <- df[, c(1, 2, 4)] TERM2GENE <- df[, c(1, 3)] colnames(TERM2NAME) <- c("Term", "Name", "Collection") - colnames(TERM2GENE) <- c("Term", default_IDtypes["MSigDB"]) + colnames(TERM2GENE) <- c("Term", paste0(default_IDtypes[["MSigDB"]], collapse = ".")) TERM2NAME <- na.omit(unique(TERM2NAME)) TERM2GENE <- na.omit(unique(TERM2GENE)) @@ -3003,6 +3001,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), for (collection in unique(TERM2NAME[["Collection"]])) { db_species[paste0("MSigDB_", collection)] <- db_species["MSigDB"] + default_IDtypes[[paste0("MSigDB_", collection)]] <- default_IDtypes[["MSigDB"]] TERM2NAME_sub <- TERM2NAME[TERM2NAME[["Collection"]] == collection, , drop = FALSE] TERM2GENE_sub <- TERM2GENE[TERM2GENE[["Term"]] %in% TERM2NAME_sub[["Term"]], , drop = FALSE] db_list[[db_species["MSigDB"]]][[paste0("MSigDB_", collection)]][["TERM2GENE"]] <- TERM2GENE_sub @@ -3018,48 +3017,48 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), } ## CellTalk --------------------------------------------------------------------------- - # if (any(db == "CellTalk") && (!"CellTalk" %in% names(db_list[[sps]]))) { - # if (!sps %in% c("Homo_sapiens", "Mus_musculus")) { - # if (isTRUE(convert_species)) { - # warning("Use the human annotation to create the CellTalk database for ", sps, immediate. = TRUE) - # db_species["CellTalk"] <- "Homo_sapiens" - # } else { - # warning("CellTalk database only support Homo_sapiens and Mus_musculus. Consider using convert_species=TRUE", immediate. = TRUE) - # stop("Stop the preparation.") - # } - # } - # message("Preparing database: CellTalk") - # url <- switch(db_species["CellTalk"], - # "Homo_sapiens" = "https://raw.githubusercontent.com/ZJUFanLab/CellTalkDB/master/database/human_lr_pair.rds", - # "Mus_musculus" = "https://raw.githubusercontent.com/ZJUFanLab/CellTalkDB/master/database/mouse_lr_pair.rds" - # ) - # - # temp <- tempfile() - # download(url = url, destfile = temp) - # lr <- readRDS(temp) - # version <- "v1.0" - # - # lr[["ligand_gene_symbol2"]] <- paste0("ligand_", lr[["ligand_gene_symbol"]]) - # lr[["receptor_gene_symbol2"]] <- paste0("receptor_", lr[["receptor_gene_symbol"]]) - # TERM2GENE <- rbind( - # data.frame("Term" = lr[["ligand_gene_symbol2"]], "symbol" = lr[["receptor_gene_symbol"]]), - # data.frame("Term" = lr[["receptor_gene_symbol2"]], "symbol" = lr[["ligand_gene_symbol"]]) - # ) - # TERM2NAME <- TERM2GENE[, c(1, 1)] - # colnames(TERM2GENE) <- c("Term", default_IDtypes["CellTalk"]) - # colnames(TERM2NAME) <- c("Term", "Name") - # TERM2GENE <- na.omit(unique(TERM2GENE)) - # TERM2NAME <- na.omit(unique(TERM2NAME)) - # db_list[[db_species["CellTalk"]]][["CellTalk"]][["TERM2GENE"]] <- TERM2GENE - # db_list[[db_species["CellTalk"]]][["CellTalk"]][["TERM2NAME"]] <- TERM2NAME - # db_list[[db_species["CellTalk"]]][["CellTalk"]][["version"]] <- version - # if (sps == db_species["CellTalk"]) { - # saveCache(db_list[[db_species["CellTalk"]]][["CellTalk"]], - # key = list(version, as.character(db_species["CellTalk"]), "CellTalk"), - # comment = paste0(version, " nterm:", length(TERM2NAME[[1]]), "|", db_species["CellTalk"], "-CellTalk") - # ) - # } - # } + if (any(db == "CellTalk") && (!"CellTalk" %in% names(db_list[[sps]]))) { + if (!sps %in% c("Homo_sapiens", "Mus_musculus")) { + if (isTRUE(convert_species)) { + warning("Use the human annotation to create the CellTalk database for ", sps, immediate. = TRUE) + db_species["CellTalk"] <- "Homo_sapiens" + } else { + warning("CellTalk database only support Homo_sapiens and Mus_musculus. Consider using convert_species=TRUE", immediate. = TRUE) + stop("Stop the preparation.") + } + } + message("Preparing database: CellTalk") + url <- switch(db_species["CellTalk"], + "Homo_sapiens" = "https://raw.githubusercontent.com/ZJUFanLab/CellTalkDB/master/database/human_lr_pair.rds", + "Mus_musculus" = "https://raw.githubusercontent.com/ZJUFanLab/CellTalkDB/master/database/mouse_lr_pair.rds" + ) + + temp <- tempfile() + download(url = url, destfile = temp) + lr <- readRDS(temp) + version <- "v1.0" + + lr[["ligand_gene_symbol2"]] <- paste0("ligand_", lr[["ligand_gene_symbol"]]) + lr[["receptor_gene_symbol2"]] <- paste0("receptor_", lr[["receptor_gene_symbol"]]) + TERM2GENE <- rbind( + data.frame("Term" = lr[["ligand_gene_symbol2"]], "symbol" = lr[["receptor_gene_symbol"]]), + data.frame("Term" = lr[["receptor_gene_symbol2"]], "symbol" = lr[["ligand_gene_symbol"]]) + ) + TERM2NAME <- TERM2GENE[, c(1, 1)] + colnames(TERM2GENE) <- c("Term", default_IDtypes[["CellTalk"]]) + colnames(TERM2NAME) <- c("Term", "Name") + TERM2GENE <- na.omit(unique(TERM2GENE)) + TERM2NAME <- na.omit(unique(TERM2NAME)) + db_list[[db_species["CellTalk"]]][["CellTalk"]][["TERM2GENE"]] <- TERM2GENE + db_list[[db_species["CellTalk"]]][["CellTalk"]][["TERM2NAME"]] <- TERM2NAME + db_list[[db_species["CellTalk"]]][["CellTalk"]][["version"]] <- version + if (sps == db_species["CellTalk"]) { + saveCache(db_list[[db_species["CellTalk"]]][["CellTalk"]], + key = list(version, as.character(db_species["CellTalk"]), "CellTalk"), + comment = paste0(version, " nterm:", length(TERM2NAME[[1]]), "|", db_species["CellTalk"], "-CellTalk") + ) + } + } ## CellChat --------------------------------------------------------------------------- if (any(db == "CellChat") && (!"CellChat" %in% names(db_list[[sps]]))) { @@ -3111,7 +3110,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), TERM2GENE[["symbol"]] <- tolower(TERM2GENE[["symbol"]]) } TERM2NAME <- TERM2GENE[, c(1, 1)] - colnames(TERM2GENE) <- c("Term", default_IDtypes["CellChat"]) + colnames(TERM2GENE) <- c("Term", default_IDtypes[["CellChat"]]) colnames(TERM2NAME) <- c("Term", "Name") TERM2GENE <- na.omit(unique(TERM2GENE)) TERM2NAME <- na.omit(unique(TERM2NAME)) @@ -3158,7 +3157,7 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), } } - # ## MeSH --------------------------------------------------------------------------- + ## MeSH --------------------------------------------------------------------------- # if (any(db == "MeSH") && (!"MeSH" %in% names(db_list[[sps]]))) { # message("Preparing database: MeSH") # # dir.create("~/.cache/R/AnnotationHub",recursive = TRUE,showWarnings = FALSE) @@ -3232,35 +3231,42 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), if (is.na(default_IDtypes[term])) { IDtype <- colnames(TERM2GENE)[2] } else { - IDtype <- default_IDtypes[term] + IDtype <- default_IDtypes[[term]] } - res <- GeneConvert( - geneID = as.character(unique(TERM2GENE[, IDtype])), - geneID_from_IDtype = IDtype, - geneID_to_IDtype = "ensembl_id", - species_from = sp_from, - species_to = sps, - Ensembl_version = Ensembl_version, - mirror = mirror, - biomart = biomart, - max_tries = max_tries - ) - if (is.null(res$geneID_res)) { - warning("Failed to convert species for the database: ", term, immediate. = TRUE) - next + if (grepl("MSigDB_", term)) { + TERM2GENE_map <- db_list[[sps]][["MSigDB"]][["TERM2GENE"]] + TERM2GENE <- TERM2GENE_map[TERM2GENE_map[["Term"]] %in% TERM2GENE[["Term"]], , drop = FALSE] + TERM2NAME <- TERM2NAME[TERM2NAME[["Term"]] %in% TERM2GENE[["Term"]], , drop = FALSE] + } else { + res <- GeneConvert( + geneID = as.character(unique(TERM2GENE[, 2])), + geneID_from_IDtype = IDtype, + geneID_to_IDtype = "ensembl_id", + species_from = sp_from, + species_to = sps, + Ensembl_version = Ensembl_version, + mirror = mirror, + biomart = biomart, + max_tries = max_tries + ) + if (is.null(res$geneID_res)) { + warning("Failed to convert species for the database: ", term, immediate. = TRUE) + next + } + map <- res$geneID_collapse + TERM2GENE[["ensembl_id-converted"]] <- map[as.character(TERM2GENE[, 2]), "ensembl_id"] + TERM2GENE <- unnest(TERM2GENE, cols = "ensembl_id-converted", keep_empty = FALSE) + TERM2GENE <- TERM2GENE[, c("Term", "ensembl_id-converted")] + colnames(TERM2GENE) <- c("Term", "ensembl_id") + TERM2NAME <- TERM2NAME[TERM2NAME[["Term"]] %in% TERM2GENE[["Term"]], , drop = FALSE] } - map <- res$geneID_collapse - TERM2GENE[["ensembl_id-converted"]] <- map[as.character(TERM2GENE[, IDtype]), "ensembl_id"] - TERM2GENE <- unnest(TERM2GENE, cols = "ensembl_id-converted", keep_empty = FALSE) - TERM2GENE <- TERM2GENE[, c("Term", "ensembl_id-converted")] - colnames(TERM2GENE) <- c("Term", "ensembl_id") - TERM2NAME <- TERM2NAME[TERM2NAME[["Term"]] %in% TERM2GENE[["Term"]], , drop = FALSE] + db_info[["TERM2GENE"]] <- unique(TERM2GENE) db_info[["TERM2NAME"]] <- unique(TERM2NAME) version <- paste0(db_info[["version"]], "(converted from ", sp_from, ")") db_info[["version"]] <- version db_list[[sps]][[term]] <- db_info - default_IDtypes[term] <- "ensembl_id" + default_IDtypes[[term]] <- "ensembl_id" ### save cache saveCache(db_list[[sps]][[term]], key = list(version, sps, term), @@ -3279,26 +3285,32 @@ PrepareDB <- function(species = c("Homo_sapiens", "Mus_musculus"), if (is.na(default_IDtypes[term])) { IDtype <- colnames(TERM2GENE)[2] } else { - IDtype <- default_IDtypes[term] + IDtype <- default_IDtypes[[term]] } - res <- GeneConvert( - geneID = as.character(unique(TERM2GENE[, IDtype])), - geneID_from_IDtype = IDtype, - geneID_to_IDtype = IDtypes, - species_from = sps, - species_to = sps, - Ensembl_version = Ensembl_version, - mirror = mirror, - biomart = biomart, - max_tries = max_tries - ) - if (is.null(res$geneID_res)) { - warning("Failed to convert species for the database: ", term, immediate. = TRUE) - next + if (grepl("MSigDB_", term)) { + map <- db_list[[sps]][["MSigDB"]][["TERM2GENE"]][, -1, drop = FALSE] + map <- aggregate(map, by = list(map[[1]]), FUN = function(x) list(unique(x))) + rownames(map) <- map[, 1] + } else { + res <- GeneConvert( + geneID = as.character(unique(TERM2GENE[, 2])), + geneID_from_IDtype = IDtype, + geneID_to_IDtype = IDtypes, + species_from = sps, + species_to = sps, + Ensembl_version = Ensembl_version, + mirror = mirror, + biomart = biomart, + max_tries = max_tries + ) + if (is.null(res$geneID_res)) { + warning("Failed to convert species for the database: ", term, immediate. = TRUE) + next + } + map <- res$geneID_collapse } - map <- res$geneID_collapse for (type in IDtypes) { - TERM2GENE[[type]] <- map[as.character(TERM2GENE[, IDtype]), type] + TERM2GENE[[type]] <- map[as.character(TERM2GENE[, 2]), type] TERM2GENE <- unnest(TERM2GENE, cols = type, keep_empty = TRUE) } db_list[[sps]][[term]][["TERM2GENE"]] <- TERM2GENE @@ -3614,7 +3626,7 @@ RunEnrichment <- function(srt = NULL, group_by = NULL, test.use = "wilcox", DE_t #' #' @inheritParams RunEnrichment #' @param geneScore A numeric vector that specifies the gene scores, for example, the log2(fold change) values of gene expression. -#' @param scoreType This parameter defines the GSEA score type. Possible options are ("std", "pos", "neg"). By default ("std") the enrichment score is computed as in the original GSEA. The "pos" and "neg" score types are intended to be used for one-tailed tests (i.e. when one is interested only in positive ("pos") or negateive ("neg") enrichment). +#' @param scoreType This parameter defines the GSEA score type. Possible options are "std", "pos", "neg". By default ("std") the enrichment score is computed as in the original GSEA. The "pos" and "neg" score types are intended to be used for one-tailed tests (i.e. when one is interested only in positive ("pos") or negateive ("neg") enrichment). #' @returns #' If input is a Seurat object, returns the modified Seurat object with the enrichment result stored in the tools slot. #' diff --git a/R/SCP-plot.R b/R/SCP-plot.R index 989abf5e..14abec00 100644 --- a/R/SCP-plot.R +++ b/R/SCP-plot.R @@ -1327,7 +1327,7 @@ CellDimPlot <- function(srt, group.by, reduction = NULL, dims = c(1, 2), split.b set.seed(seed) if (is.null(split.by)) { - split.by <- "All_cells" + split.by <- "All.groups" srt@meta.data[[split.by]] <- factor("") } for (i in unique(c(group.by, split.by))) { @@ -1432,7 +1432,7 @@ CellDimPlot <- function(srt, group.by, reduction = NULL, dims = c(1, 2), split.b lineages_layers <- lineages_layers[!names(lineages_layers) %in% c("lab_layer", "theme_layer")] } if (!is.null(paga)) { - if (split.by != "All_cells") { + if (split.by != "All.groups") { stop("paga can only plot on the non-split data") } paga_layers <- PAGAPlot(srt, @@ -1449,7 +1449,7 @@ CellDimPlot <- function(srt, group.by, reduction = NULL, dims = c(1, 2), split.b paga_layers <- paga_layers[!names(paga_layers) %in% c("lab_layer", "theme_layer")] } if (!is.null(velocity)) { - if (split.by != "All_cells") { + if (split.by != "All.groups") { stop("velocity can only plot on the non-split data") } velocity_layers <- VelocityPlot(srt, @@ -1582,7 +1582,7 @@ CellDimPlot <- function(srt, group.by, reduction = NULL, dims = c(1, 2), split.b legend.position = legend.position, legend.direction = legend.direction ) - if (split.by != "All_cells") { + if (split.by != "All.groups") { p <- p + facet_grid(. ~ split.by) } @@ -2001,7 +2001,7 @@ FeatureDimPlot <- function(srt, features, reduction = NULL, dims = c(1, 2), spli assay <- assay %||% DefaultAssay(srt) if (is.null(split.by)) { - split.by <- "All_cells" + split.by <- "All.groups" srt@meta.data[[split.by]] <- factor("") } for (i in c(split.by)) { @@ -2054,7 +2054,7 @@ FeatureDimPlot <- function(srt, features, reduction = NULL, dims = c(1, 2), spli features_meta <- features[features %in% colnames(srt@meta.data)] features_embedding <- features[features %in% names(embeddings)] if (length(intersect(features_gene, features_meta)) > 0) { - warning("Features appear in both gene names and metadata names:", paste0(intersect(features_gene, features_meta), collapse = ",")) + warning("Features appear in both gene names and metadata names: ", paste0(intersect(features_gene, features_meta), collapse = ",")) } if (isTRUE(calculate_coexp) && length(features_gene) > 0) { @@ -2156,7 +2156,7 @@ FeatureDimPlot <- function(srt, features, reduction = NULL, dims = c(1, 2), spli dat_all <- cbind(dat_use, dat_exp[row.names(dat_use), features, drop = FALSE]) dat_split <- split.data.frame(dat_all, dat_all[[split.by]]) plist <- lapply(levels(dat_sp[[split.by]]), function(s) { - dat <- dat_split[[ifelse(split.by == "All_cells", 1, s)]][, , drop = FALSE] + dat <- dat_split[[ifelse(split.by == "All.groups", 1, s)]][, , drop = FALSE] for (f in features) { dat[, f][dat[, f] <= bg_cutoff] <- NA if (any(is.infinite(dat[, f]))) { @@ -2259,7 +2259,7 @@ FeatureDimPlot <- function(srt, features, reduction = NULL, dims = c(1, 2), spli labs(title = title, subtitle = s, x = xlab, y = ylab) + scale_x_continuous(limits = c(min(dat_use[, paste0(reduction_key, dims[1])], na.rm = TRUE), max(dat_use[, paste0(reduction_key, dims[1])], na.rm = TRUE))) + scale_y_continuous(limits = c(min(dat_use[, paste0(reduction_key, dims[2])], na.rm = TRUE), max(dat_use[, paste0(reduction_key, dims[2])], na.rm = TRUE))) - if (split.by == "All_cells") { + if (split.by == "All.groups") { p <- p + facet_grid(. ~ features) } else { p <- p + facet_grid(split.by ~ features) @@ -2451,7 +2451,7 @@ FeatureDimPlot <- function(srt, features, reduction = NULL, dims = c(1, 2), spli plist <- lapply(setNames(rownames(comb), rownames(comb)), function(i) { f <- comb[i, "feature"] s <- comb[i, "split"] - dat <- dat_split[[ifelse(split.by == "All_cells", 1, s)]][, c(colnames(dat_use), f), drop = FALSE] + dat <- dat_split[[ifelse(split.by == "All.groups", 1, s)]][, c(colnames(dat_use), f), drop = FALSE] dat[, f][dat[, f] <= bg_cutoff] <- NA if (any(is.infinite(dat[, f]))) { dat[, f][dat[, f] == max(dat[, f], na.rm = TRUE)] <- max(dat[, f][is.finite(dat[, f])], na.rm = TRUE) @@ -2623,7 +2623,7 @@ FeatureDimPlot <- function(srt, features, reduction = NULL, dims = c(1, 2), spli } } if (nrow(dat) > 0) { - if (split.by == "All_cells") { + if (split.by == "All.groups") { p <- p + facet_grid(. ~ features) } else { p <- p + facet_grid(formula(paste0(split.by, "~features"))) @@ -2974,8 +2974,8 @@ FeatureDimPlot3D <- function(srt, features = NULL, reduction = NULL, dims = c(1, } assay <- assay %||% DefaultAssay(srt) if (is.null(split.by)) { - split.by <- "All_cells" - srt@meta.data[[split.by]] <- factor("All_cells") + split.by <- "All.cells" + srt@meta.data[[split.by]] <- factor("All.cells") } for (i in split.by) { if (!i %in% colnames(srt@meta.data)) { @@ -3034,6 +3034,9 @@ FeatureDimPlot3D <- function(srt, features = NULL, reduction = NULL, dims = c(1, features_gene <- features[features %in% rownames(srt@assays[[assay]])] features_meta <- features[features %in% colnames(srt@meta.data)] + if (length(intersect(features_gene, features_meta)) > 0) { + warning("Features appear in both gene names and metadata names: ", paste0(intersect(features_gene, features_meta), collapse = ",")) + } if (isTRUE(calculate_coexp) && length(features_gene) > 0) { if (length(features_meta) > 0) { @@ -3146,9 +3149,9 @@ FeatureDimPlot3D <- function(srt, features = NULL, reduction = NULL, dims = c(1, split_option <- list() genes_option <- list() for (i in 0:nlevels(dat_use[[split.by]])) { - sp <- ifelse(i == 0, "All_cells", levels(dat_use[[split.by]])[i]) + sp <- ifelse(i == 0, "All.cells", levels(dat_use[[split.by]])[i]) ncells <- ifelse(i == 0, nrow(dat_use), table(dat_use[[split.by]])[sp]) - if (i != 0 && sp == "All_cells") { + if (i != 0 && sp == "All.cells") { next } x <- list(dat_use[[paste0(reduction_key, dims[1], sp)]]) @@ -3381,6 +3384,7 @@ FeatureDimPlot3D <- function(srt, features = NULL, reduction = NULL, dims = c(1, #' "Ins1", "Gcg", "Sst", "Ghrl" # Beta, Alpha, Delta, Epsilon #' ), group.by = "SubCellType", plot.by = "feature", stack = TRUE) #' +#' library(Matrix) #' data <- pancreas_sub@assays$RNA@data #' pancreas_sub@assays$RNA@scale.data <- as.matrix(data / rowMeans(data)) #' FeatureStatPlot(pancreas_sub, @@ -3414,6 +3418,12 @@ FeatureStatPlot <- function(srt, stat.by, group.by = NULL, split.by = NULL, bg.b legend.position = "right", legend.direction = "vertical", theme_use = "theme_scp", theme_args = list(), combine = TRUE, nrow = NULL, ncol = NULL, byrow = TRUE, force = FALSE, seed = 11) { + if (is.null(group.by)) { + group.by <- "All.groups" # avoid having the same name with split.by. split.by will be All.groups by default + xlab <- "" + srt[[group.by]] <- factor("All_groups") + } + meta.data <- srt@meta.data meta.data[["cells"]] <- rownames(meta.data) assay <- assay %||% DefaultAssay(srt) @@ -3437,9 +3447,11 @@ FeatureStatPlot <- function(srt, stat.by, group.by = NULL, split.by = NULL, bg.b plist <- list() for (g in unique(meta.reshape[[group.by]])) { if (length(rownames(meta.reshape)[meta.reshape[[group.by]] == g]) > 0) { - meta.reshape[[g]] <- meta.reshape[["Stat.by"]] + meta.use <- meta.reshape + meta.use[[group.by]] <- NULL + colnames(meta.use)[colnames(meta.use) == "Stat.by"] <- g p <- ExpressionStatPlot( - exp.data = exp.data, meta.data = meta.reshape, stat.by = g, group.by = "Features", split.by = split.by, bg.by = NULL, plot.by = "group", fill.by = fill.by, + exp.data = exp.data, meta.data = meta.use, stat.by = g, group.by = "Features", split.by = split.by, bg.by = NULL, plot.by = "group", fill.by = fill.by, cells = rownames(meta.reshape)[meta.reshape[[group.by]] == g], keep_empty = keep_empty, individual = individual, plot_type = plot_type, palette = palette, palcolor = palcolor, alpha = alpha, @@ -3460,7 +3472,6 @@ FeatureStatPlot <- function(srt, stat.by, group.by = NULL, split.by = NULL, bg.b theme_use = theme_use, theme_args = theme_args, force = force, seed = seed ) - meta.reshape[[g]] <- NULL plist <- append(plist, p) } } @@ -3637,14 +3648,17 @@ ExpressionStatPlot <- function(exp.data, meta.data, stat.by, group.by = NULL, sp stop("meta.data is empty.") } if (is.null(group.by)) { - group.by <- "No.group.by" + group.by <- "All.groups" xlab <- "" - meta.data[[group.by]] <- factor("All") + meta.data[[group.by]] <- factor("") } if (is.null(split.by)) { - split.by <- "All_cells" + split.by <- "All.groups" meta.data[[split.by]] <- factor("") } + if (group.by == split.by && group.by == "All.groups") { + legend.position <- "none" + } for (i in unique(c(group.by, split.by, bg.by))) { if (!i %in% colnames(meta.data)) { stop(paste0(i, " is not in the meta.data.")) @@ -3704,6 +3718,9 @@ ExpressionStatPlot <- function(exp.data, meta.data, stat.by, group.by = NULL, sp features_gene <- stat.by[stat.by %in% rownames(exp.data)] features_meta <- stat.by[stat.by %in% colnames(meta.data)] + if (length(intersect(features_gene, features_meta)) > 0) { + warning("Features appear in both gene names and metadata names: ", paste0(intersect(features_gene, features_meta), collapse = ",")) + } if (isTRUE(calculate_coexp) && length(features_gene) > 1) { if (length(features_meta) > 0) { @@ -3826,7 +3843,7 @@ ExpressionStatPlot <- function(exp.data, meta.data, stat.by, group.by = NULL, sp colors <- palette_scp(stat.by, palette = palette, palcolor = palcolor) } if (fill.by == "group") { - if (split.by != "All_cells") { + if (split.by != "All.groups") { colors <- palette_scp(levels(dat_use[[split.by]]), palette = palette, palcolor = palcolor) } else { colors <- palette_scp(levels(dat_use[[g]]), palette = palette, palcolor = palcolor) @@ -3878,14 +3895,14 @@ ExpressionStatPlot <- function(exp.data, meta.data, stat.by, group.by = NULL, sp keynm <- "Features" } if (fill.by == "group") { - dat[, "fill.by"] <- if (split.by == "All_cells") dat[, "group.by"] else dat[, "split.by"] - keynm <- ifelse(split.by == "All_cells", g, split.by) + dat[, "fill.by"] <- if (split.by == "All.groups") dat[, "group.by"] else dat[, "split.by"] + keynm <- ifelse(split.by == "All.groups", g, split.by) } if (fill.by == "expression") { dat[, "fill.by"] <- median_values[paste0(dat[["group.by"]], "-", dat[["split.by"]]), f] keynm <- "Median expression" } - if (split.by != "All_cells") { + if (split.by != "All.groups") { levels_order <- levels(dat[["split.by"]]) } else { levels_order <- levels(dat[["group.by"]]) @@ -4244,6 +4261,7 @@ ExpressionStatPlot <- function(exp.data, meta.data, stat.by, group.by = NULL, sp return(plist) } + #' Statistical plot of cells #' #' @inheritParams StatPlot @@ -4426,12 +4444,12 @@ StatPlot <- function(meta.data, stat.by, group.by = NULL, split.by = NULL, bg.by stop("meta.data is empty.") } if (is.null(group.by)) { - group.by <- "No.group.by" + group.by <- "All.groups" xlab <- "" - meta.data[[group.by]] <- factor("All") + meta.data[[group.by]] <- factor("") } if (is.null(split.by)) { - split.by <- "All_cells" + split.by <- "All.groups" meta.data[[split.by]] <- factor("") } @@ -4517,9 +4535,13 @@ StatPlot <- function(meta.data, stat.by, group.by = NULL, split.by = NULL, bg.by aspect.ratio <- 1 } - if (any(group.by != "No.group.by") && plot_type %in% c("sankey", "chord", "venn", "upset")) { + if (any(group.by != "All.groups") && plot_type %in% c("sankey", "chord", "venn", "upset")) { warning("group.by is not used when plot sankey, chord, venn or upset", immediate. = TRUE) } + if (stat_type == "percent" && plot_type %in% c("sankey", "chord", "venn", "upset")) { + warning("stat_type is forcibly set to 'count' when plot sankey, chord, venn or upset", immediate. = TRUE) + stat_type <- "count" + } dat_all <- meta.data[, unique(c(stat.by, group.by, split.by, bg.by)), drop = FALSE] nlev <- sapply(dat_all, nlevels) nlev <- nlev[nlev > 100] @@ -4578,12 +4600,12 @@ StatPlot <- function(meta.data, stat.by, group.by = NULL, split.by = NULL, bg.by sp <- comb[i, "split_name"] g <- comb[i, "group_name"] single_group <- comb[[i, "group_element"]] - colors_use <- colors[names(colors) %in% dat_split[[ifelse(split.by == "All_cells", 1, sp)]][[stat.by]]] - if (any(is.na(dat_split[[ifelse(split.by == "All_cells", 1, sp)]][[stat.by]])) && isTRUE(NA_stat)) { + colors_use <- colors[names(colors) %in% dat_split[[ifelse(split.by == "All.groups", 1, sp)]][[stat.by]]] + if (any(is.na(dat_split[[ifelse(split.by == "All.groups", 1, sp)]][[stat.by]])) && isTRUE(NA_stat)) { colors_use <- c(colors_use, colors["NA"]) } if (stat_type == "percent") { - dat_use <- dat_split[[ifelse(split.by == "All_cells", 1, sp)]] %>% + dat_use <- dat_split[[ifelse(split.by == "All.groups", 1, sp)]] %>% xtabs(formula = paste0("~", stat.by, "+", g), addNA = NA_stat) %>% as.data.frame() %>% group_by(across(all_of(g)), .drop = FALSE) %>% @@ -4592,7 +4614,7 @@ StatPlot <- function(meta.data, stat.by, group.by = NULL, split.by = NULL, bg.by mutate(value = Freq / groupn) %>% as.data.frame() } else { - dat_use <- dat_split[[ifelse(split.by == "All_cells", 1, sp)]] %>% + dat_use <- dat_split[[ifelse(split.by == "All.groups", 1, sp)]] %>% xtabs(formula = paste0("~", stat.by, "+", g), addNA = NA_stat) %>% as.data.frame() %>% mutate(value = Freq) @@ -4817,7 +4839,7 @@ StatPlot <- function(meta.data, stat.by, group.by = NULL, split.by = NULL, bg.by par(mfrow = c(nrow, ncol)) } for (sp in levels(dat_all[[split.by]])) { - dat_use <- dat_split[[ifelse(split.by == "All_cells", 1, sp)]] + dat_use <- dat_split[[ifelse(split.by == "All.groups", 1, sp)]] if (plot_type == "venn") { check_R(c("ggVennDiagram", "sf")) dat_list <- as.list(dat_use[, stat.by]) @@ -5091,12 +5113,12 @@ FeatureCorPlot <- function(srt, features, group.by = NULL, split.by = NULL, cell } assay <- assay %||% DefaultAssay(srt) if (is.null(split.by)) { - split.by <- "All_cells" + split.by <- "All.groups" srt@meta.data[[split.by]] <- factor("") } if (is.null(group.by)) { - group.by <- "All_cells" - srt@meta.data[[group.by]] <- factor("All_cells") + group.by <- "All.groups" + srt@meta.data[[group.by]] <- factor("") } for (i in c(split.by, group.by)) { if (!i %in% colnames(srt@meta.data)) { @@ -5127,6 +5149,9 @@ FeatureCorPlot <- function(srt, features, group.by = NULL, split.by = NULL, cell features_gene <- features[features %in% rownames(srt@assays[[assay]])] features_meta <- features[features %in% colnames(srt@meta.data)] + if (length(intersect(features_gene, features_meta)) > 0) { + warning("Features appear in both gene names and metadata names: ", paste0(intersect(features_gene, features_meta), collapse = ",")) + } if (isTRUE(calculate_coexp) && length(features_gene) > 0) { if (length(features_meta) > 0) { @@ -5513,7 +5538,7 @@ FeatureCorPlot <- function(srt, features, group.by = NULL, split.by = NULL, cell #' @importFrom patchwork wrap_plots #' @importFrom methods slot #' @export -CellDensityPlot <- function(srt, features, group.by, split.by = NULL, assay = NULL, slot = "data", +CellDensityPlot <- function(srt, features, group.by = NULL, split.by = NULL, assay = NULL, slot = "data", flip = FALSE, reverse = FALSE, x_order = c("value", "rank"), decreasing = NULL, palette = "Paired", palcolor = NULL, cells = NULL, keep_empty = FALSE, @@ -5531,10 +5556,17 @@ CellDensityPlot <- function(srt, features, group.by, split.by = NULL, assay = NU if (!inherits(features, "character")) { stop("'features' is not a character vectors") } + if (is.null(group.by)) { + group.by <- "All.groups" + srt@meta.data[[group.by]] <- factor("") + } if (is.null(split.by)) { - split.by <- "All_cells" + split.by <- "All.groups" srt@meta.data[[split.by]] <- factor("") } + if (group.by == split.by & group.by == "All.groups") { + legend.position <- "none" + } for (i in c(group.by, split.by)) { if (!i %in% colnames(srt@meta.data)) { stop(paste0(i, " is not in the meta.data of srt object.")) @@ -5554,6 +5586,9 @@ CellDensityPlot <- function(srt, features, group.by, split.by = NULL, assay = NU features_gene <- features[features %in% rownames(srt@assays[[assay]])] features_meta <- features[features %in% colnames(srt@meta.data)] + if (length(intersect(features_gene, features_meta)) > 0) { + warning("Features appear in both gene names and metadata names: ", paste0(intersect(features_gene, features_meta), collapse = ",")) + } if (length(features_gene) > 0) { dat_gene <- t(slot(srt@assays[[assay]], slot)[features_gene, , drop = FALSE]) @@ -5652,7 +5687,7 @@ CellDensityPlot <- function(srt, features, group.by, split.by = NULL, assay = NU limits = limits, trans = y.trans, n.breaks = y.nbreaks, expand = c(0, 0) ) - if (split.by != "All_cells") { + if (split.by != "All.groups") { p <- p + facet_grid(. ~ split.by) } p <- p + labs(title = title, subtitle = subtitle, x = f, y = g) @@ -7813,8 +7848,8 @@ GroupHeatmap <- function(srt, features = NULL, group.by = NULL, split.by = NULL, assay <- assay %||% DefaultAssay(srt) if (is.null(group.by)) { - srt@meta.data[["No.group.by"]] <- factor("") - group.by <- "No.group.by" + srt@meta.data[["All.groups"]] <- factor("") + group.by <- "All.groups" } if (any(!group.by %in% colnames(srt@meta.data))) { stop(group.by[!group.by %in% colnames(srt@meta.data)], " is not in the meta data of the Seurat object.") @@ -8126,7 +8161,7 @@ GroupHeatmap <- function(srt, features = NULL, group.by = NULL, split.by = NULL, column_split_list[[cell_group]] <- length(unique(column_split_list[[cell_group]])) } } - if (cell_group != "No.group.by") { + if (cell_group != "All.groups") { funbody <- paste0( " grid.rect(gp = gpar(fill = palette_scp(", paste0("c('", paste0(levels(srt@meta.data[[cell_group]]), collapse = "','"), "')"), ",palette = '", group_palette[i], "',palcolor=c(", paste0("'", paste0(group_palcolor[[i]], collapse = "','"), "'"), "))[nm])) @@ -8177,7 +8212,7 @@ GroupHeatmap <- function(srt, features = NULL, group.by = NULL, split.by = NULL, for (i in seq_along(raw.group.by)) { cell_group <- raw.group.by[i] - if (cell_group != "No.group.by") { + if (cell_group != "All.groups") { lgd[[cell_group]] <- Legend( title = cell_group, labels = levels(srt@meta.data[[cell_group]]), legend_gp = gpar(fill = palette_scp(levels(srt@meta.data[[cell_group]]), palette = raw.group_palette[i], palcolor = group_palcolor[[i]])), border = TRUE @@ -8731,9 +8766,9 @@ GroupHeatmap <- function(srt, features = NULL, group.by = NULL, split.by = NULL, matrix = if (flip) t(mat_list[[cell_group]]) else mat_list[[cell_group]], col = colors, layer_fun = getFunction("layer_fun", where = environment()), - row_title = row_title %||% if (flip) ifelse(cell_group != "No.group.by", cell_group, "") else character(0), + row_title = row_title %||% if (flip) ifelse(cell_group != "All.groups", cell_group, "") else character(0), row_title_side = row_title_side, - column_title = column_title %||% if (flip) character(0) else ifelse(cell_group != "No.group.by", cell_group, ""), + column_title = column_title %||% if (flip) character(0) else ifelse(cell_group != "All.groups", cell_group, ""), column_title_side = column_title_side, row_title_rot = row_title_rot, column_title_rot = column_title_rot, @@ -8989,8 +9024,8 @@ FeatureHeatmap <- function(srt, features = NULL, cells = NULL, group.by = NULL, stop("feature_split must be the same length as features") } if (is.null(group.by)) { - srt@meta.data[["No.group.by"]] <- factor("") - group.by <- "No.group.by" + srt@meta.data[["All.groups"]] <- factor("") + group.by <- "All.groups" } if (any(!group.by %in% colnames(srt@meta.data))) { stop(group.by[!group.by %in% colnames(srt@meta.data)], " is not in the meta data of the Seurat object.") @@ -9254,7 +9289,7 @@ FeatureHeatmap <- function(srt, features = NULL, cells = NULL, group.by = NULL, column_split_list[[cell_group]] <- length(unique(column_split_list[[cell_group]])) } } - if (cell_group != "No.group.by") { + if (cell_group != "All.groups") { funbody <- paste0( " grid.rect(gp = gpar(fill = palette_scp(", paste0("c('", paste0(levels(srt@meta.data[[cell_group]]), collapse = "','"), "')"), ",palette = '", group_palette[i], "',palcolor=c(", paste0("'", paste0(group_palcolor[[i]], collapse = "','"), "'"), "))[nm])) @@ -9304,7 +9339,7 @@ FeatureHeatmap <- function(srt, features = NULL, cells = NULL, group.by = NULL, } for (i in seq_along(raw.group.by)) { cell_group <- raw.group.by[i] - if (cell_group != "No.group.by") { + if (cell_group != "All.groups") { lgd[[cell_group]] <- Legend( title = cell_group, labels = levels(srt@meta.data[[cell_group]]), legend_gp = gpar(fill = palette_scp(levels(srt@meta.data[[cell_group]]), palette = raw.group_palette[i], palcolor = group_palcolor[[i]])), border = TRUE @@ -9664,9 +9699,9 @@ FeatureHeatmap <- function(srt, features = NULL, cells = NULL, group.by = NULL, name = cell_group, matrix = if (flip) t(mat_list[[cell_group]]) else mat_list[[cell_group]], col = colors, - row_title = row_title %||% if (flip) ifelse(cell_group != "No.group.by", cell_group, "") else character(0), + row_title = row_title %||% if (flip) ifelse(cell_group != "All.groups", cell_group, "") else character(0), row_title_side = row_title_side, - column_title = column_title %||% if (flip) character(0) else ifelse(cell_group != "No.group.by", cell_group, ""), + column_title = column_title %||% if (flip) character(0) else ifelse(cell_group != "All.groups", cell_group, ""), column_title_side = column_title_side, row_title_rot = row_title_rot, column_title_rot = column_title_rot, @@ -10040,12 +10075,12 @@ CellCorHeatmap <- function(srt_query, srt_ref = NULL, bulk_ref = NULL, cell_groups <- list() if (is.null(query_group)) { - srt_query@meta.data[["No.group.by"]] <- factor("") - query_group <- "No.group.by" + srt_query@meta.data[["All.groups"]] <- factor("") + query_group <- "All.groups" } if (is.null(ref_group)) { - srt_ref@meta.data[["No.group.by"]] <- factor("") - ref_group <- "No.group.by" + srt_ref@meta.data[["All.groups"]] <- factor("") + ref_group <- "All.groups" } if (!is.factor(srt_query[[query_group, drop = TRUE]])) { srt_query@meta.data[[query_group]] <- factor(srt_query[[query_group, drop = TRUE]], levels = unique(srt_query[[query_group, drop = TRUE]])) @@ -10144,7 +10179,7 @@ CellCorHeatmap <- function(srt_query, srt_ref = NULL, bulk_ref = NULL, lgd[["ht"]] <- Legend(title = exp_name, col_fun = colors, border = TRUE) ha_query_list <- NULL - if (query_group != "No.group.by") { + if (query_group != "All.groups") { if (isFALSE(query_collapsing) && ((isFALSE(flip) & isTRUE(cluster_rows)) || (isTRUE(flip) & isTRUE(cluster_columns)))) { query_cell_annotation <- c(query_group, query_cell_annotation) } else { @@ -10182,7 +10217,7 @@ CellCorHeatmap <- function(srt_query, srt_ref = NULL, bulk_ref = NULL, } ha_ref_list <- NULL - if (ref_group != "No.group.by") { + if (ref_group != "All.groups") { if (isFALSE(ref_collapsing) && ((isFALSE(flip) & isTRUE(cluster_columns)) || (isTRUE(flip) & isTRUE(cluster_rows)))) { ref_cell_annotation <- c(ref_group, ref_cell_annotation) } else { diff --git a/inst/python/SCP_analysis.py b/inst/python/SCP_analysis.py index a7763de8..56201f92 100644 --- a/inst/python/SCP_analysis.py +++ b/inst/python/SCP_analysis.py @@ -265,13 +265,18 @@ def SCVELO(adata=None, h5ad=None, group_by=None, palette=None, if m != "dynamical": scv.tl.rank_velocity_genes(adata, vkey=vkey, groupby=group_by) adata.var[vkey+"_score"]=adata.var["spearmans_score"] - df = scv.get_df(adata.uns["rank_velocity_genes"]["names"]) - adata.uns["rank_"+vkey+"_genes"]=df + df1 = scv.get_df(adata.uns["rank_velocity_genes"]["names"]) + adata.uns["rank_"+vkey+"_genenames"]=df1 + df2 = scv.get_df(adata.uns["rank_velocity_genes"]["scores"]) + adata.uns["rank_"+vkey+"_genescores"]=df2 + del adata.uns["rank_velocity_genes"] else: scv.tl.rank_dynamical_genes(adata, groupby=group_by) - df = scv.get_df(adata.uns['rank_dynamical_genes']['names']) - adata.uns["rank_"+vkey+"_genes"]=df - + df1 = scv.get_df(adata.uns['rank_dynamical_genes']['names']) + adata.uns["rank_"+vkey+"_genenames"]=df1 + df2 = scv.get_df(adata.uns['rank_dynamical_genes']['scores']) + adata.uns["rank_"+vkey+"_genescores"]=df2 + del adata.uns["rank_dynamical_genes"] for cluster in df.columns: #df[0:1].values.ravel()[:12] ### by row diff --git a/man/CellDensityPlot.Rd b/man/CellDensityPlot.Rd index 4ad503a2..38733b4e 100644 --- a/man/CellDensityPlot.Rd +++ b/man/CellDensityPlot.Rd @@ -7,7 +7,7 @@ CellDensityPlot( srt, features, - group.by, + group.by = NULL, split.by = NULL, assay = NULL, slot = "data", diff --git a/man/FeatureStatPlot.Rd b/man/FeatureStatPlot.Rd index be184270..66989ceb 100644 --- a/man/FeatureStatPlot.Rd +++ b/man/FeatureStatPlot.Rd @@ -281,6 +281,7 @@ FeatureStatPlot(pancreas_sub, stat.by = c( "Ins1", "Gcg", "Sst", "Ghrl" # Beta, Alpha, Delta, Epsilon ), group.by = "SubCellType", plot.by = "feature", stack = TRUE) +library(Matrix) data <- pancreas_sub@assays$RNA@data pancreas_sub@assays$RNA@scale.data <- as.matrix(data / rowMeans(data)) FeatureStatPlot(pancreas_sub, diff --git a/man/PrepareDB.Rd b/man/PrepareDB.Rd index d05e4862..7cb6c109 100644 --- a/man/PrepareDB.Rd +++ b/man/PrepareDB.Rd @@ -8,8 +8,8 @@ PrepareDB( species = c("Homo_sapiens", "Mus_musculus"), db = c("GO", "GO_BP", "GO_CC", "GO_MF", "KEGG", "WikiPathway", "Reactome", "CORUM", "MP", "DO", "HPO", "PFAM", "CSPA", "Surfaceome", "SPRomeDB", "VerSeDa", "TFLink", - "hTFtarget", "TRRUST", "JASPAR", "ENCODE", "MSigDB", "CellChat", "Chromosome", - "GeneType", "Enzyme", "TF"), + "hTFtarget", "TRRUST", "JASPAR", "ENCODE", "MSigDB", "CellTalk", "CellChat", + "Chromosome", "GeneType", "Enzyme", "TF"), db_IDtypes = c("symbol", "entrez_id", "ensembl_id"), db_version = "latest", db_update = FALSE, @@ -32,7 +32,7 @@ PrepareDB( Default is c("GO", "GO_BP", "GO_CC", "GO_MF", "KEGG", "WikiPathway", "Reactome", "CORUM", "MP", "DO", "HPO", "PFAM", "CSPA", "Surfaceome", "SPRomeDB", "VerSeDa", "TFLink", "hTFtarget", "TRRUST", "JASPAR", "ENCODE", "MSigDB", -"CellChat", "Chromosome", "GeneType", "Enzyme", "TF").} +"CellTalk", "CellChat", "Chromosome", "GeneType", "Enzyme", "TF").} \item{db_IDtypes}{A character vector specifying the desired ID types to be used for gene identifiers in the gene annotation databases. Default is c("symbol", "entrez_id", "ensembl_id").} diff --git a/man/RunGSEA.Rd b/man/RunGSEA.Rd index 01f4406c..c790f650 100644 --- a/man/RunGSEA.Rd +++ b/man/RunGSEA.Rd @@ -48,7 +48,7 @@ If not specified, the \code{geneID} and \code{geneID_groups} arguments must be p \item{DE_threshold}{A character vector specifying the filter condition for differential expression analysis. This argument is only used if \code{srt} is specified.} -\item{scoreType}{This parameter defines the GSEA score type. Possible options are ("std", "pos", "neg"). By default ("std") the enrichment score is computed as in the original GSEA. The "pos" and "neg" score types are intended to be used for one-tailed tests (i.e. when one is interested only in positive ("pos") or negateive ("neg") enrichment).} +\item{scoreType}{This parameter defines the GSEA score type. Possible options are "std", "pos", "neg". By default ("std") the enrichment score is computed as in the original GSEA. The "pos" and "neg" score types are intended to be used for one-tailed tests (i.e. when one is interested only in positive ("pos") or negateive ("neg") enrichment).} \item{geneID}{A character vector specifying the gene IDs.}