.Rhistory

knitr::opts_chunk$set(echo = TRUE)
library(tximport) # Importing quant_sf files
library(GenomicFeatures) #Useful genomics tools
library(BiocManager)
library(rhdf5) #To make an hdf5 files
library(Rtsne)
library(DESeq2) # For normalizing
library(scran) # normalization based on cell clusters
library(SingleCellExperiment) #For holding single-cell RNA_seq data
library(biomaRt) # Switching names from ensemble ID to gene symbol
library(Seurat)
library(ggplot2)
library(cowplot) # For violin plots
library(tximport) # Importing quant_sf files
library(GenomicFeatures) #Useful genomics tools
library(BiocManager)
library(rhdf5) #To make an hdf5 files
library(Rtsne)
library(DESeq2) # For normalizing
library(scran) # normalization based on cell clusters
library(SingleCellExperiment) #For holding single-cell RNA_seq data
library(biomaRt) # Switching names from ensemble ID to gene symbol
library(Seurat)
library(ggplot2)
library(cowplot) # For violin plots
# Define the base directory
base_dir <- "C://Users/jonan/Documents/1Work/scWork/Beta Cell Study/Data/LargeScale/quant_files/upenn1/"
# List all quant.sf files
quant_files <- list.files(base_dir, pattern = "quant.sf", full.names = TRUE, recursive = TRUE)
quant_files[1]
quant_files[2]
quant_files[0]
quant_files[3]
quant_files[1]
??regmatches
regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)/quant.sf", filepath))
regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)/quant.sf", quant_files[1]))
quant_files[1]
regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)/quant.sf", quant_files[1]))
a <- regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)/quant.sf", quant_files[1]))
a
a[1]
a[[1]]
a <- regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", quant_files[1]))
a[[1]]
regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", quant_files[1]))
a <- regmatches(quant_files[1], regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", quant_files[1]))
a[[1]]
a[[[1]]]
a[1]
a[[1]]
a[[1]][2]
a[[1]][1]
a[[1]][3]
a[[1]]
dtype(a)
type(a)
str(a)
testing <- []
range(3)
range(0,3)
range(0,1,3)
arange(0,1,3)
range(0,1,3)
range(0,3,1)
seq(1,3)
testing <- c()
for (i in seq(1,3)){
testing[i] <- quant_files[i]
}
a <- regmatches(testing, regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", testing))
str(a)
str(a[1])
str(a[[1]])
# Extract patient names and cell identifiers
extract_info <- function(filepath) {
# Extract patient name and cell ID using regex
match <- regmatches(filepath, regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", filepath))
patient_name <- match[[1]][2]
cell_id <- match[[1]][3]
return(c(filepath, patient_name, cell_id))
}
# Extract patient names and cell identifiers
extract_info <- function(filepath) {
# Extract patient name and cell ID using regex
match <- regmatches(filepath, regexec("([^/]+)_scRNA_(\\d+)_quant/quant.sf", filepath))
patient_name <- match[[1]][2]
cell_id <- match[[1]][3]
return(c(filepath, patient_name, cell_id))
}
metadata_list <- apply(quant_files, extract_info)
metadata_list <- lapply(quant_files, extract_info)
View(metadata_list)
dtype(metadata_list)
type(metadata_list)
str(metadata_list)
metadata_list[[1]]
metadata_df <- do.call(rbind, metadata_list) %>% as.data.frame()
library(dplyr)
metadata_df <- do.call(rbind, metadata_list) %>% as.data.frame()
colnames(metadata_df) <- c("quant_file", "patient_name", "cell_id")
View(metadata_df)
# Grabbing files address
files <- metadata_df$quant_file
#Reading in the tx2gene dataframe
tx2gene <- read.csv("/Documents and Settings/jonan/Documents/Genomes/Homo-Sapiens/GRCh38/tx2gene-Mappings")
#Reading in the tx2gene dataframe
tx2gene <- read.csv("/Documents and Settings/jonan/Documents/Genomes/Homo-Sapiens/GRCh38/tx2gene-Mappings")
# Import the quant files using tximport
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
txi
#DELETE:
files[1:300]
#DELETE:
files_short <- files[1:300]
view(metadata_df[1:300])
View(metadata_df[1:300])
View(metadata_df[:,1:300])
metadata_df_short <- head(metadata_df, n = 100)
metadata_df_short
View(metadata_df_short)
metadata_df_short <- head(metadata_df, n = 300)
metadata_df_short <- head(metadata_df, n = 300)
#Reading in the tx2gene dataframe
tx2gene <- read.csv("/Documents and Settings/jonan/Documents/Genomes/Homo-Sapiens/GRCh38/tx2gene-Mappings")
metadata_df_short
#Reading in the tx2gene dataframe
tx2gene <- read.csv("/Documents and Settings/jonan/Documents/Genomes/Homo-Sapiens/GRCh38/tx2gene-Mappings")
# Import the quant files using tximport
txi_short <- tximport(files_short, type = "salmon", tx2gene = tx2gene)
# Assign cell IDs from metadata as column names of the count matrix
colnames(txi_short$counts) <- metadata_df_short$cell_id
# Assign patient names as a new column in the count matrix metadata
txi_short$meta <- metadata_df_short
txi_short
str(txi_short)