-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrajectoryTutorial.Rmd
218 lines (172 loc) · 6.21 KB
/
trajectoryTutorial.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
---
title: "Building a trajectory for use with PPR"
author: "Moi Taiga Nicholas"
date: "`r Sys.Date()`"
output: github_document
---
<!-- trajectoryTutorial.md is generated from trajectoryTutorial.Rmd. Please edit that file -->
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.path = "man/figures/TRAJ-",
out.width = "100%",
echo = TRUE,
eval = FALSE
)
```
# Building a trajectory for use with PPR
## Introduction
This tutorial covers how to build a trajectory for use with PPR.
We will use cellxgene to download single-cell RNA-seq data,
We will use `Seurat` to perform basic preprocessing and dimensionality reduction.
We will use the `slingshot` package to infer the trajectory.
We will use `GeneSwitches` to identify switching genes.
## activate the shared conda environment
```{bash}
conda activate /mainfs/ddnb/hack_days/mar25/conda/pprTraj
```
## Load the Required Packages
```{r install_packages, eval=FALSE}
library("Seurat")
library("SingleCellExperiment")
library("slingshot")
library("GeneSwitches")
#library("PathPinpointR")
library(devtools)
load_all()
```
## Downloading Data from CellxGene
Use `cellxgene` to retrieve data manually,
Choose a dataset which represents a simple trajectory,
download the data in the form of a Seurat object.
## load data
```{r load_data}
# load the data
seu <- readRDS("/mainfs/ddnb/hack_days/mar25/data/microgliaCSF1R.rds")
# # set the identity class to Braak.stage
# Idents(seu) <- "Braak.stage"
# # downsample the data
# seu <- subset(x = seu, downsample = 100)
```
## Basic Seurat Preprocessing
Run standard Seurat preprocessing steps: (if neccecary)
```{r preprocessing}
# view the umap using seurat
DimPlot(seu, reduction = "umap", group.by = "Braak.stage")
# seu <- NormalizeData(seu)
# seu <- FindVariableFeatures(seu, selection.method = "vst", nfeatures = 2000)
# seu <- ScaleData(seu)
# seu <- RunPCA(seu)
```
## Convert Seurat object to SingleCellExperiment
```{r convert_to_sce}
sce <- SingleCellExperiment(assays = list(expdata = seu@assays$RNA$counts))
colData(sce) <- DataFrame([email protected])
reducedDims(sce)$UMAP <- seu@[email protected]
```
## Trajectory Inference with Slingshot
```{r slingshot}
sce <- slingshot(sce,
clusterLabels = "Braak.stage",
#start.clus = "2",
#end.clus = "1",
reducedDim = "UMAP")
#Rename the Pseudotime column to work with GeneSwitches
colData(sce)$Pseudotime <- sce$slingPseudotime_1
# plot the trajectory
library("RColorBrewer")
# Generate colors
colors <- colorRampPalette(brewer.pal(11, "Spectral")[-6])(100)
plotcol <- colors[cut(sce$slingPseudotime_1, breaks = 100)]
# Plot the data
plot(reducedDims(sce)$UMAP, col = plotcol, pch = 16, asp = 1)
lines(SlingshotDataSet(sce), lwd = 2, col = "black")
```
## Identify switching genes with GeneSwitches
```{r geneSwitches}
# Identify a cutoff for binarization, by plotting a histogram of gene expression
hist(as.numeric(as.matrix(assays(sce)$expdata)),
freq = FALSE,
breaks = 1000,
main = "Histogram of expression values",
xlab = "Expression value",
ylab = "Density",
xlim = c(0, 6),
ylim = c(0, 1))
# vertical line at 1
abline(v = 0.5, col = "red")
# using the cutoff of 0.5, binarize the expression data
sce <- binarize_exp(sce,
fix_cutoff = TRUE,
binarize_cutoff = 0.5,
ncores = 1)
# Find the switching point of each gene in the sce
sce <- find_switch_logistic_fastglm(sce,
downsample = FALSE,
show_warning = FALSE)
# visualise the switching genes
switching_genes <- filter_switchgenes(sce,
allgenes = TRUE,
r2cutoff = 0)
# Plot the timeline using plot_timeline_ggplot
plot_timeline_ggplot(switching_genes,
timedata = colData(sce)$Pseudotime,
txtsize = 3)
# save as rds
saveRDS(sce, "/mainfs/ddnb/hack_days/mar25/data/TMPsce.rds")
saveRDS(switching_genes, "/mainfs/ddnb/hack_days/mar25/data/TMPswitching_genes.rds")
# load from rds
sce <- readRDS("/mainfs/ddnb/hack_days/mar25/data/TMPsce.rds")
switching_genes <- readRDS("/mainfs/ddnb/hack_days/mar25/data/TMPswitching_genes.rds")
```
## Filter switching genes further
```{r filter_switching_genes}
#run PPR:precison
precision <- precision(sce, plot = FALSE)
# zoom in
precision <- precision(sce, n_sg_range = seq(50, 170, 10), plot = FALSE)
#
precision <- precision(sce, n_sg_range = seq(80, 120, 2), plot = FALSE)
# visualise the switching genes
switching_genes <- filter_switchgenes(sce,
allgenes = TRUE,
r2cutoff = 0,
topnum = 92)
```
## Save the trajectory
```{r save_trajectory}
# the information we need for PPR can be contained in a single table, save as csv
write.csv(switching_genes, "/mainfs/ddnb/hack_days/mar25/traj/microgliaCSF1R.csv")
#
switching_genes <- read.csv("/mainfs/ddnb/hack_days/mar25/traj/microgliaCSF1R.csv")
```
## Make a proxy sample.
```{r load_data}
# extract 50 samples from sce.
```{r}
# order the cell in sce by their pseudotime
sce <- sce[, order(colData(sce)$Pseudotime)]
# extract 50 cells
sce_proxy <- sce[, 50:100]
```
## run PPR
```{r run_ppr}
# First reduce the sample data to only include the switching genes.
sample_sce <- reduce_counts_matrix(sce_proxy, switching_genes)
# binarize the expression data of the samples
sample_sce <- binarize_exp(sample_sce,
fix_cutoff = TRUE,
binarize_cutoff = 0.5,
ncores = 1)
ppr <- predict_position(sample_sce, switching_genes)
## Plotting the predicted position of each sample:
#plot the predicted position of each sample on the reference trajectory.
# show the predicted position of the first sample
# include the position of cells in the reference data, by a given label.
ppr_plot() +
sample_prediction(ppr, label = "Sample 1", col = "red")
#A simpler plot of your results can be generated with the function ppr_vioplot().
ppr_vioplot(ppr, sce, ident = "Braak.stage")
```
# Reference Idents will need to be included in switching_genes.