Update readme

zhanghao-njmu · Sep 8, 2023 · c267ef9 · c267ef9
1 parent 004b7ae
commit c267ef9
Show file tree

Hide file tree

Showing 19 changed files with 107 additions and 156 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -4,7 +4,7 @@ Title: Single Cell Pipeline
 Version: 0.5.1
 Author: Hao Zhang
 Maintainer: Hao Zhang <[email protected]>
-Description: SCP provides a comprehensive set of tools for single cell data processing and downstream analysis.
+Description: An end-to-end Single-Cell Pipeline designed to facilitate comprehensive analysis and exploration of single-cell data.
 License: GPL (>= 3)
 Encoding: UTF-8
 LazyData: True

diff --git a/R/SCP-analysis.R b/R/SCP-analysis.R
@@ -5405,11 +5405,12 @@ check_python_element <- function(x, depth = maxDepth(x)) {
 #' @export
 #'
 RunPAGA <- function(srt = NULL, assay_X = "RNA", slot_X = "counts", assay_layers = c("spliced", "unspliced"), slot_layers = "counts",
-                    adata = NULL, group_by = NULL, palette = "Paired", palcolor = NULL,
+                    adata = NULL, group_by = NULL,
                     linear_reduction = NULL, nonlinear_reduction = NULL, basis = NULL,
                     n_pcs = 30, n_neighbors = 30, use_rna_velocity = FALSE, vkey = "stochastic",
                     embedded_with_PAGA = FALSE, paga_layout = "fr", threshold = 0.1, point_size = 20,
                     infer_pseudotime = FALSE, root_group = NULL, root_cell = NULL, n_dcs = 10, n_branchings = 0, min_group_size = 0.01,
+                    palette = "Paired", palcolor = NULL,
                     show_plot = TRUE, dpi = 300, save = FALSE, dirpath = "./", fileprefix = "",
                     return_seurat = !is.null(srt)) {
   check_Python("scanpy")
@@ -5532,7 +5533,7 @@ RunPAGA <- function(srt = NULL, assay_X = "RNA", slot_X = "counts", assay_layers
 #' @export
 #'
 RunSCVELO <- function(srt = NULL, assay_X = "RNA", slot_X = "counts", assay_layers = c("spliced", "unspliced"), slot_layers = "counts",
-                      adata = NULL, group_by = NULL, palette = "Paired", palcolor = NULL,
+                      adata = NULL, group_by = NULL,
                       linear_reduction = NULL, nonlinear_reduction = NULL, basis = NULL,
                       mode = "stochastic", fitting_by = "stochastic",
                       magic_impute = FALSE, knn = 5, t = 2,
@@ -5541,6 +5542,7 @@ RunSCVELO <- function(srt = NULL, assay_X = "RNA", slot_X = "counts", assay_laye
                       arrow_length = 5, arrow_size = 5, arrow_density = 0.5,
                       denoise = FALSE, denoise_topn = 3, kinetics = FALSE, kinetics_topn = 100,
                       calculate_velocity_genes = FALSE, top_n = 6, n_jobs = 1,
+                      palette = "Paired", palcolor = NULL,
                       show_plot = TRUE, dpi = 300, save = FALSE, dirpath = "./", fileprefix = "",
                       return_seurat = !is.null(srt)) {
   check_Python("scvelo")
@@ -5625,13 +5627,14 @@ RunSCVELO <- function(srt = NULL, assay_X = "RNA", slot_X = "counts", assay_laye
 #' @export
 #'
 RunPalantir <- function(srt = NULL, assay_X = "RNA", slot_X = "counts", assay_layers = c("spliced", "unspliced"), slot_layers = "counts",
-                        adata = NULL, group_by = NULL, palette = "Paired", palcolor = NULL,
+                        adata = NULL, group_by = NULL,
                         linear_reduction = NULL, nonlinear_reduction = NULL, basis = NULL,
                         n_pcs = 30, n_neighbors = 30, dm_n_components = 10, dm_alpha = 0, dm_n_eigs = NULL,
                         early_group = NULL, terminal_groups = NULL, early_cell = NULL, terminal_cells = NULL,
                         num_waypoints = 1200, scale_components = TRUE, use_early_cell_as_start = TRUE,
                         adjust_early_cell = FALSE, adjust_terminal_cells = FALSE,
                         max_iterations = 25, n_jobs = 8, point_size = 20,
+                        palette = "Paired", palcolor = NULL,
                         show_plot = TRUE, dpi = 300, save = FALSE, dirpath = "./", fileprefix = "",
                         return_seurat = !is.null(srt)) {
   check_Python("palantir")
@@ -5698,15 +5701,13 @@ RunPalantir <- function(srt = NULL, assay_X = "RNA", slot_X = "counts", assay_la
 #' Run WOT analysis
 #' @inheritParams RunSCVELO
 #'
+#' @examples
 #' @return A \code{anndata} object.
 RunWOT <- function(srt = NULL, assay_X = "RNA", slot_X = "counts", assay_layers = c("spliced", "unspliced"), slot_layers = "counts",
-                   adata = NULL, group_by = NULL, palette = "Paired", palcolor = NULL,
-                   linear_reduction = NULL, nonlinear_reduction = NULL, basis = NULL,
-                   n_pcs = 30, n_neighbors = 30, dm_n_components = 10, dm_alpha = 0, dm_n_eigs = NULL,
-                   early_group = NULL, terminal_groups = NULL, early_cell = NULL, terminal_cells = NULL,
-                   num_waypoints = 1200, scale_components = TRUE, use_early_cell_as_start = TRUE,
-                   adjust_early_cell = FALSE, adjust_terminal_cells = FALSE,
-                   max_iterations = 25, n_jobs = 8, point_size = 20,
+                   adata = NULL, group_by = NULL,
+                   time_field = "Time", growth_iters = 3L, tmap_out = "tmaps/tmap_out",
+                   time_from = 1, time_to = NULL, get_coupling = FALSE, recalculate = FALSE,
+                   palette = "Paired", palcolor = NULL,
                    show_plot = TRUE, dpi = 300, save = FALSE, dirpath = "./", fileprefix = "",
                    return_seurat = !is.null(srt)) {
   check_Python("wot")
@@ -5754,15 +5755,15 @@ RunWOT <- function(srt = NULL, assay_X = "RNA", slot_X = "counts", assay_layers
   args[["palette"]] <- palette_scp(levels(groups) %||% unique(groups), palette = palette, palcolor = palcolor)
 
   SCP_analysis <- reticulate::import_from_path("SCP_analysis", path = system.file("python", package = "SCP", mustWork = TRUE), convert = TRUE)
-  adata <- do.call(SCP_analysis$Palantir, args)
+  adata <- do.call(SCP_analysis$WOT, args)
 
   if (isTRUE(return_seurat)) {
     srt_out <- adata_to_srt(adata)
     if (is.null(srt)) {
       return(srt_out)
     } else {
       srt_out1 <- SrtAppend(srt_raw = srt, srt_append = srt_out)
-      srt_out2 <- SrtAppend(srt_raw = srt_out1, srt_append = srt_out, pattern = "(palantir)|(dm_kernel)|(_diff_potential)", overwrite = TRUE, verbose = FALSE)
+      srt_out2 <- SrtAppend(srt_raw = srt_out1, srt_append = srt_out, pattern = "(trajectory_)|(fates_)|(transition_)|(coupling_)", overwrite = TRUE, verbose = FALSE)
       return(srt_out2)
     }
   } else {

diff --git a/R/SCP-feature_annotation.R b/R/SCP-feature_annotation.R
@@ -46,7 +46,11 @@ AnnotateFeatures <- function(srt, species = "Homo_sapiens", IDtype = c("symbol",
       species = species, db = db, db_update = db_update, db_version = db_version, convert_species = convert_species,
       db_IDtypes = IDtype, Ensembl_version = Ensembl_version, mirror = mirror
     )
-    for (single_db in db) {
+    db_notfound <- setdiff(db, names(db_list[[species]]))
+    if (length(db_notfound) > 0) {
+      warning(paste0("The following databases are not found:", paste0(db_notfound, collapse = ",")))
+    }
+    for (single_db in names(db_list[[species]])) {
       TERM2GENE <- unique(db_list[[species]][[single_db]][["TERM2GENE"]])
       TERM2NAME <- unique(db_list[[species]][[single_db]][["TERM2NAME"]])
       rownames(TERM2NAME) <- TERM2NAME[, 1]
@@ -66,7 +70,7 @@ AnnotateFeatures <- function(srt, species = "Homo_sapiens", IDtype = c("symbol",
         }
         db_sub <- db_df[rownames(db_df) %in% rownames(meta.features), , drop = FALSE]
         if (nrow(db_sub) == 0) {
-          stop(paste0("No db data found in the seurat object. Please check if the species name is correct. The expected feature names are ", paste(head(rownames(db_df), 10), collapse = ","), "."))
+          stop(paste0("No data to append was found in the Seurat object. Please check if the species name is correct. The expected feature names are ", paste(head(rownames(db_df), 10), collapse = ","), "."))
         }
         meta.features <- cbind(meta.features, db_sub[rownames(meta.features), setdiff(colnames(db_sub), colnames(meta.features)), drop = FALSE])
         srt[[assay]]@meta.features <- meta.features

diff --git a/R/utils.R b/R/utils.R
@@ -97,7 +97,8 @@ PrepareEnv <- function(conda = "auto", miniconda_repo = "https://repo.anaconda.c
   }
 
   packages <- c(
-    "numpy==1.21.6", "numba==0.55.2", "scikit-learn==1.1.2", "pandas==1.3.5", "python-igraph==0.10.2", "matplotlib==3.6.3", "palantir==1.0.1",
+    "numpy==1.21.6", "numba==0.55.2", "scikit-learn==1.1.2", "pandas==1.3.5", "python-igraph==0.10.2", "matplotlib==3.6.3",
+    "palantir==1.0.1", "wot==1.0.8.post2",
     "scipy", "versioned-hdf5", "leidenalg", "scanpy", "scvelo"
   )
   check_Python(packages = packages, envname = envname, conda = conda, force = force, ...)

diff --git a/README.Rmd b/README.Rmd
@@ -388,6 +388,8 @@ PAGAPlot(srt = pancreas_sub, reduction = "UMAP", label = TRUE, label_insitu = TR
 
 ### Velocity analysis
 
+> To estimate cell velocity, you need to have both "spliced" and "unspliced" assays in your Seurat object. You can generate these matrices using [velocyto](http://velocyto.org/velocyto.py/index.html), [bustools](https://bustools.github.io/BUS_notebooks_R/velocity.html), or [alevin](https://combine-lab.github.io/alevin-fry-tutorials/2021/alevin-fry-velocity/).
+
 ```{r RunSCVELO}
 pancreas_sub <- RunSCVELO(
   srt = pancreas_sub, group_by = "SubCellType",
@@ -408,11 +410,11 @@ VolcanoPlot(srt = pancreas_sub, group_by = "CellType")
 DEGs <- pancreas_sub@tools$DEtest_CellType$AllMarkers_wilcox
 DEGs <- DEGs[with(DEGs, avg_log2FC > 1 & p_val_adj < 0.05), ]
 # Annotate features with transcription factors and surface proteins
-pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "SP"))
+pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "CSPA"))
 ht <- FeatureHeatmap(
   srt = pancreas_sub, group.by = "CellType", features = DEGs$gene, feature_split = DEGs$group1,
   species = "Mus_musculus", db = c("GO_BP", "KEGG", "WikiPathway"), anno_terms = TRUE,
-  feature_annotation = c("TF", "SP"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
+  feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
   height = 5, width = 4
 )
 print(ht$plot)
@@ -443,14 +445,15 @@ EnrichmentPlot(
 )
 ```
 
+> To ensure that labels are visible, you can adjust the size of the viewer panel on Rstudio IDE.
+
 ```{r Enrichment_enrichmap, fig.height=9.5,fig.width=15}
 EnrichmentPlot(
   srt = pancreas_sub, group_by = "CellType", group_use = "Ductal",
   plot_type = "enrichmap"
 )
 ```
 
-
 ```{r Enrichment_comparison, fig.height=6}
 EnrichmentPlot(srt = pancreas_sub, group_by = "CellType", plot_type = "comparison")
 ```
@@ -462,7 +465,7 @@ pancreas_sub <- RunGSEA(
   srt = pancreas_sub, group_by = "CellType", db = "GO_BP", species = "Mus_musculus",
   DE_threshold = "p_val_adj < 0.05"
 )
-GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", geneSetID = "GO:0007186")
+GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", id_use = "GO:0007186")
 ```
 
 ```{r GSEA_comparison, fig.height=6}
@@ -487,7 +490,7 @@ ht <- DynamicHeatmap(
   species = "Mus_musculus", db = "GO_BP", anno_terms = TRUE, anno_keys = TRUE, anno_features = TRUE,
   heatmap_palette = "viridis", cell_annotation = "SubCellType",
   separate_annotation = list("SubCellType", c("Nnat", "Irx1")), separate_annotation_palette = c("Paired", "Set1"),
-  feature_annotation = c("TF", "SP"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
+  feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
   pseudotime_label = 25, pseudotime_label_color = "red",
   height = 5, width = 2
 )

diff --git a/README.md b/README.md
@@ -451,6 +451,13 @@ PAGAPlot(srt = pancreas_sub, reduction = "UMAP", label = TRUE, label_insitu = TR
 
 ### Velocity analysis
 
+> To estimate cell velocity, you need to have both “spliced” and
+> “unspliced” assays in your Seurat object. You can generate these
+> matrices using [velocyto](http://velocyto.org/velocyto.py/index.html),
+> [bustools](https://bustools.github.io/BUS_notebooks_R/velocity.html),
+> or
+> [alevin](https://combine-lab.github.io/alevin-fry-tutorials/2021/alevin-fry-velocity/).
+
 ``` r
 pancreas_sub <- RunSCVELO(
   srt = pancreas_sub, group_by = "SubCellType",
@@ -480,11 +487,11 @@ VolcanoPlot(srt = pancreas_sub, group_by = "CellType")
 DEGs <- pancreas_sub@tools$DEtest_CellType$AllMarkers_wilcox
 DEGs <- DEGs[with(DEGs, avg_log2FC > 1 & p_val_adj < 0.05), ]
 # Annotate features with transcription factors and surface proteins
-pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "SP"))
+pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "CSPA"))
 ht <- FeatureHeatmap(
   srt = pancreas_sub, group.by = "CellType", features = DEGs$gene, feature_split = DEGs$group1,
   species = "Mus_musculus", db = c("GO_BP", "KEGG", "WikiPathway"), anno_terms = TRUE,
-  feature_annotation = c("TF", "SP"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
+  feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
   height = 5, width = 4
 )
 print(ht$plot)
@@ -534,6 +541,9 @@ EnrichmentPlot(
 
 <img src="README/README-RunEnrichment-4.png" width="100%" style="display: block; margin: auto;" />
 
+> To ensure that labels are visible, you can adjust the size of the
+> viewer panel on Rstudio IDE.
+
 ``` r
 EnrichmentPlot(
   srt = pancreas_sub, group_by = "CellType", group_use = "Ductal",
@@ -556,7 +566,7 @@ pancreas_sub <- RunGSEA(
   srt = pancreas_sub, group_by = "CellType", db = "GO_BP", species = "Mus_musculus",
   DE_threshold = "p_val_adj < 0.05"
 )
-GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", geneSetID = "GO:0007186")
+GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", id_use = "GO:0007186")
 ```
 
 <img src="README/README-RunGSEA-1.png" width="100%" style="display: block; margin: auto;" />
@@ -597,7 +607,7 @@ ht <- DynamicHeatmap(
   species = "Mus_musculus", db = "GO_BP", anno_terms = TRUE, anno_keys = TRUE, anno_features = TRUE,
   heatmap_palette = "viridis", cell_annotation = "SubCellType",
   separate_annotation = list("SubCellType", c("Nnat", "Irx1")), separate_annotation_palette = c("Paired", "Set1"),
-  feature_annotation = c("TF", "SP"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
+  feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
   pseudotime_label = 25, pseudotime_label_color = "red",
   height = 5, width = 2
 )

diff --git a/README/README-DynamicHeatmap-1.png b/README/README-DynamicHeatmap-1.png
diff --git a/README/README-Enrichment_enrichmap-1.png b/README/README-Enrichment_enrichmap-1.png
diff --git a/README/README-FeatureHeatmap-1.png b/README/README-FeatureHeatmap-1.png
diff --git a/README/README-RunCellQC-1.png b/README/README-RunCellQC-1.png
diff --git a/README/README-RunCellQC-2.png b/README/README-RunCellQC-2.png
diff --git a/README/README-RunCellQC-3.png b/README/README-RunCellQC-3.png
diff --git a/README/README-RunEnrichment-4.png b/README/README-RunEnrichment-4.png
diff --git a/README/README-RunKNNPredict-scrna-1.png b/README/README-RunKNNPredict-scrna-1.png