-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path.Rhistory
136 lines (136 loc) · 4.7 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
knitr::opts_chunk$set(echo = TRUE)
library(tximport) # Importing quant_sf files
library(GenomicFeatures) #Useful genomics tools
library(BiocManager)
library(rhdf5) #To make an hdf5 files
library(Rtsne)
library(DESeq2) # For normalizing
library(scran) # normalization based on cell clusters
library(SingleCellExperiment) #For holding single-cell RNA_seq data
library(biomaRt) # Switching names from ensemble ID to gene symbol
library(Seurat)
library(ggplot2)
library(cowplot) # For violin plots
library(tximport) # Importing quant_sf files
library(GenomicFeatures) #Useful genomics tools
library(BiocManager)
library(rhdf5) #To make an hdf5 files
library(Rtsne)
library(DESeq2) # For normalizing
library(scran) # normalization based on cell clusters
library(SingleCellExperiment) #For holding single-cell RNA_seq data
library(biomaRt) # Switching names from ensemble ID to gene symbol
library(Seurat)
library(ggplot2)
library(cowplot) # For violin plots
# Define the base directory
base_dir <- "C://Users/jonan/Documents/1Work/scWork/Beta Cell Study/Data/LargeScale/quant_files/upenn1/"
# List all quant.sf files
quant_files <- list.files(base_dir, pattern = "quant.sf", full.names = TRUE, recursive = TRUE)
quant_files[1]
quant_files[2]
quant_files[0]
quant_files[3]
quant_files[1]
??regmatches
regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)/quant.sf", filepath))
regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)/quant.sf", quant_files[1]))
quant_files[1]
regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)/quant.sf", quant_files[1]))
a <- regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)/quant.sf", quant_files[1]))
a
a[1]
a[[1]]
a <- regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", quant_files[1]))
a[[1]]
regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", quant_files[1]))
a <- regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", quant_files[1]))
a[[1]]
a[[[1]]]
a[1]
a[[1]]
a[[1]][2]
a[[1]][1]
a[[1]][3]
a[[1]]
dtype(a)
type(a)
str(a)
testing <- []
range(3)
range(0,3)
range(0,1,3)
arange(0,1,3)
range(0,1,3)
range(0,3,1)
seq(1,3)
testing <- c()
for (i in seq(1,3)){
testing[i] <- quant_files[i]
}
a <- regmatches(testing, regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", testing))
str(a)
str(a[1])
str(a[[1]])
# Extract patient names and cell identifiers
extract_info <- function(filepath) {
# Extract patient name and cell ID using regex
match <- regmatches(filepath, regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", filepath))
patient_name <- match[[1]][2]
cell_id <- match[[1]][3]
return(c(filepath, patient_name, cell_id))
}
# Extract patient names and cell identifiers
extract_info <- function(filepath) {
# Extract patient name and cell ID using regex
match <- regmatches(filepath, regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", filepath))
patient_name <- match[[1]][2]
cell_id <- match[[1]][3]
return(c(filepath, patient_name, cell_id))
}
metadata_list <- apply(quant_files, extract_info)
metadata_list <- lapply(quant_files, extract_info)
View(metadata_list)
dtype(metadata_list)
type(metadata_list)
str(metadata_list)
metadata_list[[1]]
metadata_df <- do.call(rbind, metadata_list) %>% as.data.frame()
library(dplyr)
metadata_df <- do.call(rbind, metadata_list) %>% as.data.frame()
colnames(metadata_df) <- c("quant_file", "patient_name", "cell_id")
View(metadata_df)
# Grabbing files address
files <- metadata_df$quant_file
#Reading in the tx2gene dataframe
tx2gene <- read.csv("/Documents and Settings/jonan/Documents/Genomes/Homo-Sapiens/GRCh38/tx2gene-Mappings")
#Reading in the tx2gene dataframe
tx2gene <- read.csv("/Documents and Settings/jonan/Documents/Genomes/Homo-Sapiens/GRCh38/tx2gene-Mappings")
# Import the quant files using tximport
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
txi
#DELETE:
files[1:300]
#DELETE:
files_short <- files[1:300]
view(metadata_df[1:300])
View(metadata_df[1:300])
View(metadata_df[:,1:300])
metadata_df_short <- head(metadata_df, n = 100)
metadata_df_short
View(metadata_df_short)
metadata_df_short <- head(metadata_df, n = 300)
metadata_df_short <- head(metadata_df, n = 300)
#Reading in the tx2gene dataframe
tx2gene <- read.csv("/Documents and Settings/jonan/Documents/Genomes/Homo-Sapiens/GRCh38/tx2gene-Mappings")
metadata_df_short
#Reading in the tx2gene dataframe
tx2gene <- read.csv("/Documents and Settings/jonan/Documents/Genomes/Homo-Sapiens/GRCh38/tx2gene-Mappings")
# Import the quant files using tximport
txi_short <- tximport(files_short, type = "salmon", tx2gene = tx2gene)
# Assign cell IDs from metadata as column names of the count matrix
colnames(txi_short$counts) <- metadata_df_short$cell_id
# Assign patient names as a new column in the count matrix metadata
txi_short$meta <- metadata_df_short
txi_short
str(txi_short)