Skip to content

Commit

Permalink
Merge pull request #159 from zhanghao-njmu/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
zhanghao-njmu authored Sep 8, 2023
2 parents c2f6a55 + c267ef9 commit 8996664
Show file tree
Hide file tree
Showing 24 changed files with 243 additions and 200 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Title: Single Cell Pipeline
Version: 0.5.1
Author: Hao Zhang
Maintainer: Hao Zhang <[email protected]>
Description: SCP provides a comprehensive set of tools for single cell data processing and downstream analysis.
Description: An end-to-end Single-Cell Pipeline designed to facilitate comprehensive analysis and exploration of single-cell data.
License: GPL (>= 3)
Encoding: UTF-8
LazyData: True
Expand Down
152 changes: 121 additions & 31 deletions R/SCP-analysis.R

Large diffs are not rendered by default.

15 changes: 5 additions & 10 deletions R/SCP-app.R
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,9 @@ RunSCExplorer <- function(base_dir = "SCExplorer",
}

main_code <- '
if (!file.exists("Rplots.pdf")) {
file.create("Rplots.pdf")
}
data_group <- rhdf5::h5ls(DataFile)$group
meta_group <- rhdf5::h5ls(MetaFile)$group
group <- intersect(data_group, meta_group)
Expand Down Expand Up @@ -1403,11 +1406,7 @@ RunSCExplorer <- function(base_dir = "SCExplorer",
meta_features_name <- rhdf5::h5read(MetaFile, name = paste0("/", dataset2, "/metadata.stat/asfeatures"))
if (is.null(features2)) {
if (is.null(initial_feature)) {
features2 <- initial_feature
} else {
features2 <- meta_features_name[1]
}
features2 <- initial_feature
}
feature_area2 <- gsub(x = unlist(strsplit(feature_area2, "(\\r)|(\\n)", perl = TRUE)), pattern = " ", replacement = "")
features2 <- c(as.character(features2), as.character(feature_area2))
Expand Down Expand Up @@ -1627,11 +1626,7 @@ RunSCExplorer <- function(base_dir = "SCExplorer",
meta_features_name <- rhdf5::h5read(MetaFile, name = paste0("/", dataset4, "/metadata.stat/asfeatures"))
if (is.null(features4)) {
if (is.null(initial_feature)) {
features4 <- initial_feature
} else {
features4 <- meta_features_name[1]
}
features4 <- initial_feature
}
feature_area4 <- gsub(x = unlist(strsplit(feature_area4, "(\\r)|(\\n)", perl = TRUE)), pattern = " ", replacement = "")
features4 <- c(as.character(features4), as.character(feature_area4))
Expand Down
8 changes: 6 additions & 2 deletions R/SCP-feature_annotation.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ AnnotateFeatures <- function(srt, species = "Homo_sapiens", IDtype = c("symbol",
species = species, db = db, db_update = db_update, db_version = db_version, convert_species = convert_species,
db_IDtypes = IDtype, Ensembl_version = Ensembl_version, mirror = mirror
)
for (single_db in db) {
db_notfound <- setdiff(db, names(db_list[[species]]))
if (length(db_notfound) > 0) {
warning(paste0("The following databases are not found:", paste0(db_notfound, collapse = ",")))
}
for (single_db in names(db_list[[species]])) {
TERM2GENE <- unique(db_list[[species]][[single_db]][["TERM2GENE"]])
TERM2NAME <- unique(db_list[[species]][[single_db]][["TERM2NAME"]])
rownames(TERM2NAME) <- TERM2NAME[, 1]
Expand All @@ -66,7 +70,7 @@ AnnotateFeatures <- function(srt, species = "Homo_sapiens", IDtype = c("symbol",
}
db_sub <- db_df[rownames(db_df) %in% rownames(meta.features), , drop = FALSE]
if (nrow(db_sub) == 0) {
stop(paste0("No db data found in the seurat object. Please check if the species name is correct. The expected feature names are ", paste(head(rownames(db_df), 10), collapse = ","), "."))
stop(paste0("No data to append was found in the Seurat object. Please check if the species name is correct. The expected feature names are ", paste(head(rownames(db_df), 10), collapse = ","), "."))
}
meta.features <- cbind(meta.features, db_sub[rownames(meta.features), setdiff(colnames(db_sub), colnames(meta.features)), drop = FALSE])
srt[[assay]]@meta.features <- meta.features
Expand Down
3 changes: 2 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ PrepareEnv <- function(conda = "auto", miniconda_repo = "https://repo.anaconda.c
}

packages <- c(
"numpy==1.21.6", "numba==0.55.2", "scikit-learn==1.1.2", "pandas==1.3.5", "python-igraph==0.10.2", "matplotlib==3.6.3", "palantir==1.0.1",
"numpy==1.21.6", "numba==0.55.2", "scikit-learn==1.1.2", "pandas==1.3.5", "python-igraph==0.10.2", "matplotlib==3.6.3",
"palantir==1.0.1", "wot==1.0.8.post2",
"scipy", "versioned-hdf5", "leidenalg", "scanpy", "scvelo"
)
check_Python(packages = packages, envname = envname, conda = conda, force = force, ...)
Expand Down
13 changes: 8 additions & 5 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,8 @@ PAGAPlot(srt = pancreas_sub, reduction = "UMAP", label = TRUE, label_insitu = TR

### Velocity analysis

> To estimate cell velocity, you need to have both "spliced" and "unspliced" assays in your Seurat object. You can generate these matrices using [velocyto](http://velocyto.org/velocyto.py/index.html), [bustools](https://bustools.github.io/BUS_notebooks_R/velocity.html), or [alevin](https://combine-lab.github.io/alevin-fry-tutorials/2021/alevin-fry-velocity/).
```{r RunSCVELO}
pancreas_sub <- RunSCVELO(
srt = pancreas_sub, group_by = "SubCellType",
Expand All @@ -408,11 +410,11 @@ VolcanoPlot(srt = pancreas_sub, group_by = "CellType")
DEGs <- pancreas_sub@tools$DEtest_CellType$AllMarkers_wilcox
DEGs <- DEGs[with(DEGs, avg_log2FC > 1 & p_val_adj < 0.05), ]
# Annotate features with transcription factors and surface proteins
pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "SP"))
pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "CSPA"))
ht <- FeatureHeatmap(
srt = pancreas_sub, group.by = "CellType", features = DEGs$gene, feature_split = DEGs$group1,
species = "Mus_musculus", db = c("GO_BP", "KEGG", "WikiPathway"), anno_terms = TRUE,
feature_annotation = c("TF", "SP"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
height = 5, width = 4
)
print(ht$plot)
Expand Down Expand Up @@ -443,14 +445,15 @@ EnrichmentPlot(
)
```

> To ensure that labels are visible, you can adjust the size of the viewer panel on Rstudio IDE.
```{r Enrichment_enrichmap, fig.height=9.5,fig.width=15}
EnrichmentPlot(
srt = pancreas_sub, group_by = "CellType", group_use = "Ductal",
plot_type = "enrichmap"
)
```


```{r Enrichment_comparison, fig.height=6}
EnrichmentPlot(srt = pancreas_sub, group_by = "CellType", plot_type = "comparison")
```
Expand All @@ -462,7 +465,7 @@ pancreas_sub <- RunGSEA(
srt = pancreas_sub, group_by = "CellType", db = "GO_BP", species = "Mus_musculus",
DE_threshold = "p_val_adj < 0.05"
)
GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", geneSetID = "GO:0007186")
GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", id_use = "GO:0007186")
```

```{r GSEA_comparison, fig.height=6}
Expand All @@ -487,7 +490,7 @@ ht <- DynamicHeatmap(
species = "Mus_musculus", db = "GO_BP", anno_terms = TRUE, anno_keys = TRUE, anno_features = TRUE,
heatmap_palette = "viridis", cell_annotation = "SubCellType",
separate_annotation = list("SubCellType", c("Nnat", "Irx1")), separate_annotation_palette = c("Paired", "Set1"),
feature_annotation = c("TF", "SP"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
pseudotime_label = 25, pseudotime_label_color = "red",
height = 5, width = 2
)
Expand Down
18 changes: 14 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,13 @@ PAGAPlot(srt = pancreas_sub, reduction = "UMAP", label = TRUE, label_insitu = TR

### Velocity analysis

> To estimate cell velocity, you need to have both “spliced” and
> “unspliced” assays in your Seurat object. You can generate these
> matrices using [velocyto](http://velocyto.org/velocyto.py/index.html),
> [bustools](https://bustools.github.io/BUS_notebooks_R/velocity.html),
> or
> [alevin](https://combine-lab.github.io/alevin-fry-tutorials/2021/alevin-fry-velocity/).
``` r
pancreas_sub <- RunSCVELO(
srt = pancreas_sub, group_by = "SubCellType",
Expand Down Expand Up @@ -480,11 +487,11 @@ VolcanoPlot(srt = pancreas_sub, group_by = "CellType")
DEGs <- pancreas_sub@tools$DEtest_CellType$AllMarkers_wilcox
DEGs <- DEGs[with(DEGs, avg_log2FC > 1 & p_val_adj < 0.05), ]
# Annotate features with transcription factors and surface proteins
pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "SP"))
pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "CSPA"))
ht <- FeatureHeatmap(
srt = pancreas_sub, group.by = "CellType", features = DEGs$gene, feature_split = DEGs$group1,
species = "Mus_musculus", db = c("GO_BP", "KEGG", "WikiPathway"), anno_terms = TRUE,
feature_annotation = c("TF", "SP"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
height = 5, width = 4
)
print(ht$plot)
Expand Down Expand Up @@ -534,6 +541,9 @@ EnrichmentPlot(

<img src="README/README-RunEnrichment-4.png" width="100%" style="display: block; margin: auto;" />

> To ensure that labels are visible, you can adjust the size of the
> viewer panel on Rstudio IDE.
``` r
EnrichmentPlot(
srt = pancreas_sub, group_by = "CellType", group_use = "Ductal",
Expand All @@ -556,7 +566,7 @@ pancreas_sub <- RunGSEA(
srt = pancreas_sub, group_by = "CellType", db = "GO_BP", species = "Mus_musculus",
DE_threshold = "p_val_adj < 0.05"
)
GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", geneSetID = "GO:0007186")
GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", id_use = "GO:0007186")
```

<img src="README/README-RunGSEA-1.png" width="100%" style="display: block; margin: auto;" />
Expand Down Expand Up @@ -597,7 +607,7 @@ ht <- DynamicHeatmap(
species = "Mus_musculus", db = "GO_BP", anno_terms = TRUE, anno_keys = TRUE, anno_features = TRUE,
heatmap_palette = "viridis", cell_annotation = "SubCellType",
separate_annotation = list("SubCellType", c("Nnat", "Irx1")), separate_annotation_palette = c("Paired", "Set1"),
feature_annotation = c("TF", "SP"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
pseudotime_label = 25, pseudotime_label_color = "red",
height = 5, width = 2
)
Expand Down
Binary file modified README/README-DynamicHeatmap-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified README/README-Enrichment_enrichmap-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified README/README-FeatureHeatmap-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified README/README-RunCellQC-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified README/README-RunCellQC-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified README/README-RunCellQC-3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified README/README-RunEnrichment-4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified README/README-RunKNNPredict-scrna-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 5 additions & 3 deletions SCP.Rproj
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
RestoreWorkspace: No
SaveWorkspace: No
AlwaysSaveHistory: Yes

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
Expand All @@ -16,3 +16,5 @@ BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace,vignette

QuitChildProcessesOnExit: Yes
143 changes: 50 additions & 93 deletions inst/python/SCP_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,15 +768,15 @@ def Palantir(adata=None, h5ad=None,group_by=None,palette=None,

return adata

def WOT(adata=None, h5ad=None, group_by=None,palette=None, linear_reduction=None, nonlinear_reduction=None,basis=None,
n_pcs=30,n_neighbors=30, use_rna_velocity=False,vkey="stochastic",
embedded_with_PAGA=False,paga_layout="fr", threshold=0.1, point_size=20,
infer_pseudotime = False,root_cell = None,root_group=None,n_dcs = 10, n_branchings = 0, min_group_size = 0.01,
show_plot=True, dpi=300, save=False, dirpath="./", fileprefix=""):
def WOT(adata=None, h5ad=None, group_by=None,palette=None,
time_field = "Time", growth_iters = 3, tmap_out = "tmaps/tmap_out",
time_from = 1, time_to = None, get_coupling = False,recalculate=False,
show_plot=True, dpi=300, save=False, dirpath="./", fileprefix=""):
import matplotlib.pyplot as plt
import scanpy as sc
import numpy as np
import statistics
import pandas as pd
from math import hypot

import warnings
Expand Down Expand Up @@ -810,101 +810,58 @@ def WOT(adata=None, h5ad=None, group_by=None,palette=None, linear_reduction=None
if group_by is None:
print("group_by must be provided.")
exit()

if linear_reduction is None and nonlinear_reduction is None:
print("linear_reduction or nonlinear_reduction must be provided at least one.")
exit()

if linear_reduction is None:
sc.pp.pca(adata, n_comps = n_pcs)
linear_reduction="X_pca"

if basis is None:
if nonlinear_reduction is not None:
basis=nonlinear_reduction
else:
basis="basis"
adata.obsm["basis"]=adata.obsm[linear_reduction][:,0:2]

if point_size is None:
point_size = min(100000 / adata.shape[0],20)

if infer_pseudotime is True and root_cell is None and root_group is None:
print("root_cell or root_group should be provided.")
exit()

if use_rna_velocity is True:
adata.uns["velocity_graph"]=adata.uns[vkey+"_graph"]
# del adata.uns

adata.obs[group_by] = adata.obs[group_by].astype(dtype = "category")

if "X_diffmap" in adata.obsm_keys():
X_diffmap = adata.obsm['X_diffmap']
del adata.obsm['X_diffmap']
sc.pp.neighbors(adata, n_pcs = n_pcs, use_rep = linear_reduction, n_neighbors = n_neighbors)
adata.obsm['X_diffmap'] = X_diffmap
if pd.api.types.is_categorical_dtype(adata.obs[time_field]):
adata.obs["time_field"] = adata.obs[time_field].cat.codes
elif not pd.api.types.is_numeric_dtype(adata.obs[time_field]):
try:
adata.obs['time_field'] = adata.obs[time_field].astype("float")
except ValueError:
print("Unable to convert column '" + time_field + "' to float type.")
else:
sc.pp.neighbors(adata, n_pcs = n_pcs, use_rep = linear_reduction, n_neighbors = n_neighbors)

sc.tl.paga(adata, groups = group_by, use_rna_velocity = use_rna_velocity)
adata.obs["time_field"] = adata.obs[time_field]

time_dict = dict(zip(adata.obs[time_field],adata.obs["time_field"]))
ot_model <- wot.ot.OTModel(adata, growth_iters = growth_iters, day_field = "time_field")

if use_rna_velocity is True:
sc.pl.paga_compare(adata, basis=basis,palette=palette, threshold = threshold, size = point_size, min_edge_width = 1, node_size_scale = 1,
dashed_edges='connectivities', solid_edges='transitions_confidence', transitions='transitions_confidence',
title = basis,frameon = False, edges = True, save = False, show = False)
if recalculate is True:
ot_model.compute_all_transport_maps(tmap_out = tmap_out)
tmap_model = wot.tmap.TransportMapModel.from_directory(tmap_out)
else:
sc.pl.paga_compare(adata, basis=basis,palette=palette, threshold = threshold, size = point_size, title = basis,frameon = False, edges = True, save = False, show = False)
if show_plot is True:
plt.show()
if save:
plt.savefig('.'.join(filter(None, [fileprefix, "paga_compare.png"])), dpi=dpi)
try:
tmap_model = wot.tmap.TransportMapModel.from_directory(tmap_out)
except ValueError:
ot_model.compute_all_transport_maps(tmap_out = tmap_out)
tmap_model = wot.tmap.TransportMapModel.from_directory(tmap_out)

cell_sets = {}
for k, v in zip(adata.obs[group_by], adata.obs_names):
if k not in cell_sets:
cell_sets[k] = []
cell_sets[k].append(v)

sc.pl.paga(adata, threshold = threshold, layout = paga_layout, title = "PAGA layout: " + paga_layout, frameon = False, save = False, show = False)
if show_plot is True:
plt.show()
if save:
plt.savefig('.'.join(filter(None, [fileprefix, "paga_layout.png"])), dpi=dpi)

sc.tl.draw_graph(adata, init_pos = "paga", layout = paga_layout)
sc.pl.draw_graph(adata, color=group_by,palette=palette, title = "PAGA layout: " + paga_layout, layout = paga_layout, frameon = False, legend_loc="on data",show = False)
if show_plot is True:
plt.show()
if save:
plt.savefig('.'.join(filter(None, [fileprefix, "paga_graph.png"])), dpi=dpi)

if embedded_with_PAGA is True:
umap2d = sc.tl.umap(adata, init_pos = "paga", n_components=2, copy = True)
adata.obsm["PAGAUMAP2D"] = umap2d.obsm["X_umap"]
sc.pl.paga_compare(adata, basis = "PAGAUMAP2D",palette=palette, threshold = threshold, size = point_size, title = "PAGA-initialized UMAP", edges = True, save = False, show = False)
if show_plot is True:
plt.show()
if save:
plt.savefig('.'.join(filter(None, [fileprefix, "paga_umap.png"])), dpi=dpi)

if infer_pseudotime is True:
if root_group is not None and root_cell is None:
cell = adata.obs[group_by].index.values[adata.obs[group_by]==root_group]
root_group_cell=adata.obsm[basis][adata.obs[group_by]==root_group,][:, [0, 1]]
x=statistics.median(root_group_cell[:,0])
y=statistics.median(root_group_cell[:,1])
diff=np.array((x - root_group_cell[:,0], y - root_group_cell[:,1]))
dist=[]
for i in range(diff.shape[1]):
dist.append(hypot(diff[0,i],diff[1,i]))
from_populations = tmap_model.population_from_cell_sets(cell_sets, at_time = time_dict[time_from])

trajectory_ds = tmap_model.trajectories(from_populations)
trajectory_df = pd.DataFrame(trajectory_ds.X, index=trajectory_ds.obs_names, columns=trajectory_ds.var_names)
adata.uns["trajectory_"+str(time_from)]= trajectory_df

root_cell=cell[dist.index(min(dist))]

sc.tl.diffmap(adata, n_comps = n_dcs)
adata.uns['iroot'] = np.flatnonzero(adata.obs_names == root_cell)[0]
sc.tl.dpt(adata, n_dcs = n_dcs, n_branchings = n_branchings, min_group_size = min_group_size)

sc.pl.embedding(adata, basis = basis, color = "dpt_pseudotime", save = False, show = False)
if show_plot is True:
plt.show()
if save:
plt.savefig('.'.join(filter(None, [fileprefix, "dpt_pseudotime.png"])), dpi=dpi)

fates_ds = tmap_model.fates(from_populations)
fates_df = pd.DataFrame(fates_ds.X, index=fates_ds.obs_names, columns=fates_ds.var_names)
adata.uns["fates_"+str(time_from)]= fates_df

# obs_list = wot.tmap.trajectory_trends_from_trajectory(trajectory_ds = trajectory_ds, expression_ds = adata)

if time_to is not None:
to_populations = tmap_model.population_from_cell_sets(cell_sets, at_time = time_dict[time_to])
transition_table = tmap_model.transition_table(from_populations, to_populations)
transition_df = pd.DataFrame(transition_table.X, index=transition_table.obs_names, columns=transition_table.var_names)
adata.uns["transition_"+str(time_from)+"_to_"+str(time_to)]= fates_df
if get_coupling is True:
coupling = tmap_model.get_coupling(time_dict[time_from], time_dict[time_to])
coupling_df = pd.DataFrame(coupling.X, index=coupling.obs_names, columns=coupling.var_names)
adata.uns["coupling_"+str(time_from)+"_to_"+str(time_to)]= coupling_df

finally:
os.chdir(prevdir)
Expand Down
9 changes: 2 additions & 7 deletions man/ListDB.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 8996664

Please sign in to comment.