.Rhistory

cat.dist = 0.07,
cat.fontfamily = "serif",
rotation.degree = 270,
margin = 0.2)
class(dea %>% filter(logFC< -1) %>% pull(ENTREZ_GENE_ID))
class(deb %>% filter(logFC< -1) %>% pull(ENTREZ_GENE_ID))
class(mdsc %>% filter(logFC< 0) %>% pull(ENTREZID)) %>% as.character())
v1=venn.diagram(list(dea %>% filter(logFC< -1) %>% pull(ENTREZ_GENE_ID),
v1=venn.diagram(list(dea %>% filter(logFC< -1) %>% pull(ENTREZ_GENE_ID),
deb %>% filter(logFC< -1) %>% pull(ENTREZ_GENE_ID),
mdsc %>% filter(logFC< 0) %>% pull(ENTREZID) %>% as.character()),
filename = NULL,
col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
cex = 1.5,
fontfamily = "serif",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange"),
cat.cex = 1.5,
cat.pos = 0,
cat.dist = 0.07,
cat.fontfamily = "serif",
rotation.degree = 270,
margin = 0.2)
list(dea %>% filter(logFC>1) %>% pull(`Gene Symbol`),
deb %>% filter(logFC>1) %>% pull(`Gene Symbol`),
mdsc %>% filter(logFC>0) %>% pull(SYMBOL) %>% as.character())
class(dea %>% filter(logFC>1) %>% pull(`Gene Symbol`))
class(deb %>% filter(logFC>1) %>% pull(`Gene Symbol`))
class(mdsc %>% filter(logFC>0) %>% pull(SYMBOL) %>% as.character())
v1=venn.diagram(list(dea %>% filter(logFC>1) %>% pull(`Gene Symbol`),
deb %>% filter(logFC>1) %>% pull(`Gene Symbol`),
mdsc %>% filter(logFC>0) %>% pull(SYMBOL) %>% as.character()),
filename = NULL,
col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
cex = 1.5,
fontfamily = "serif",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange"),
cat.cex = 1.5,
cat.pos = 0,
cat.dist = 0.07,
cat.fontfamily = "serif",
rotation.degree = 270,
margin = 0.2)
head(mdsc %>% filter(logFC>0) %>% pull(SYMBOL) %>% as.character())
v1=venn.diagram(list(data1=dea %>% filter(logFC>1) %>% pull(`Gene Symbol`),
data2=deb %>% filter(logFC>1) %>% pull(`Gene Symbol`),
data3=mdsc %>% filter(logFC>0) %>% pull(SYMBOL) %>% as.character()),
filename = NULL,
col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
cex = 1.5,
fontfamily = "serif",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange"),
cat.cex = 1.5,
cat.pos = 0,
cat.dist = 0.07,
cat.fontfamily = "serif",
rotation.degree = 270,
margin = 0.2)
dev.off()
grid.draw(v1)
Reduce(intersect, list(dea %>% filter(logFC>1) %>% pull(`Gene Symbol`),
deb %>% filter(logFC>1) %>% pull(`Gene Symbol`),
mdsc %>% filter(logFC>0) %>% pull(SYMBOL)))
Reduce(intersect, list(dea %>% filter(logFC< -1) %>% pull(`Gene Symbol`),
deb %>% filter(logFC< -1) %>% pull(`Gene Symbol`),
mdsc %>% filter(logFC< 0) %>% pull(SYMBOL)))
Reduce(intersect, list(dea %>% filter(logFC>1) %>% pull(`Gene Symbol`),
deb %>% filter(logFC>1) %>% pull(`Gene Symbol`),
mdsc %>% filter(logFC>1) %>% pull(SYMBOL)))
Reduce(intersect, list(dea %>% filter(logFC< -1) %>% pull(`Gene Symbol`),
deb %>% filter(logFC< -1) %>% pull(`Gene Symbol`),
mdsc %>% filter(logFC< -1) %>% pull(SYMBOL)))
Reduce(intersect, list(dea %>% filter(logFC>1) %>% pull(`Gene Symbol`),
deb %>% filter(logFC>1) %>% pull(`Gene Symbol`),
mdsc %>% filter(logFC>0) %>% pull(SYMBOL)))
Reduce(intersect, list(dea %>% filter(logFC< -1) %>% pull(`Gene Symbol`),
deb %>% filter(logFC< -1) %>% pull(`Gene Symbol`),
mdsc %>% filter(logFC< 0) %>% pull(SYMBOL)))
v2=venn.diagram(list(data1=dea %>% filter(logFC< -1) %>% pull(`Gene Symbol`),
data2=deb %>% filter(logFC< -1) %>% pull(`Gene Symbol`),
data3=mdsc %>% filter(logFC<0) %>% pull(SYMBOL) %>% as.character()),
filename = NULL,
col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
cex = 1.5,
fontfamily = "serif",
fontface = "bold",
cat.col = c("darkblue", "darkgreen", "orange"),
cat.cex = 1.5,
cat.pos = 0,
cat.dist = 0.07,
cat.fontfamily = "serif",
rotation.degree = 270,
margin = 0.2)
dev.off()
grid.draw(v2)
16.98-3.5+13.99-3+39.99+49.79+47.92+35.21+11.29+4.99+11.29+17.89-2.5+11.29+9.79
16.98-3.5+13.99-3+39.99+49.79+47.92+35.21+11.29+4.99+11.29+17.89-2.5+11.29+9.79+9.49+24.99
16.98-3.5+13.99-3+39.99+49.79+11.99/2+38.73/3+47.92/2+35.21+11.29+4.99+11.29+17.89-2.5+11.29+9.79+9.49+24.99+9.99/2
install.packages("textmineR")
library(textmineR)
# load nih_sample data set from textmineR
data(nih_sample)
# create a document term matrix
dtm <- CreateDtm(doc_vec = nih_sample$ABSTRACT_TEXT, # character vector of documents
doc_names = nih_sample$APPLICATION_ID, # document names
ngram_window = c(1, 2), # minimum and maximum n-gram length
stopword_vec = c(stopwords::stopwords("en"), # stopwords from tm
stopwords::stopwords(source = "smart")), # this is the default value
lower = TRUE, # lowercase - this is the default value
remove_punctuation = TRUE, # punctuation - this is the default
remove_numbers = TRUE, # numbers - this is the default
verbose = FALSE, # Turn off status bar for this demo
cpus = 2) # default is all available cpus on the system
# construct the matrix of term counts to get the IDF vector
tf_mat <- TermDocFreq(dtm)
tfidf <- t(dtm[ , tf_mat$term ]) * tf_mat$idf
tfidf <- t(tfidf)
tfidf
csim <- tfidf / sqrt(rowSums(tfidf * tfidf))
csim <- csim %*% t(csim)
csim
?csim
class(csim)
?dgCMatrix
??dgCMatrix
devtools::install_github("IOR-Bioinformatics/PCSF", repos=BiocInstaller::biocinstallRepos(),
dependencies=TRUE, type="source", force=TRUE)
library(survival)
?matlines
fdata <- flchain[flchain$futime >=7,]
fdata$age2 <- cut(fdata$age, c(0,54, 59,64, 69,74,79, 89, 110),
labels = c(paste(c(50,55,60,65,70,75,80),
c(54,59,64,69,74,79,89), sep='-'), "90+"))
fdata$group <- factor(1+ 1*(fdata$flc.grp >7) + 1*(fdata$flc.grp >9),
levels=1:3,
labels=c("FLC < 3.38", "3.38 - 4.71", "FLC > 4.71"))
sfit1 <- survfit(Surv(futime, death) ~ group, fdata)
plot(sfit1, mark.time=F, col=c(1,2,4), lty=1, lwd=2,
xscale=365.25, xlab="Years from Sample",
ylab="Survival")
text(c(11.1, 10.5, 7.5)*365.25, c(.88, .57, .4),
c("FLC < 3.38", "3.38 - 4.71", "FLC > 4.71"), col=c(1,2,4))
allfit <- survfit(Surv(futime, death) ~ group +
age2 + sex, fdata)
temp <- summary(allfit)$table
temp[1:6, c(1,4)]
colnames(temp)
xtime <- seq(0, 14, length=57)*365.25
smat <- matrix(0, nrow=57, ncol=3)
serr <- smat
pi <- with(fdata, table(age2, sex))/nrow(fdata)
for (i in 1:3) {
temp <- allfit[1:16 + (i-1)*16] #curves for group i
for (j in 1:16) {
stemp <- summary(temp[j], times=xtime, extend=T)
smat[,i] <- smat[,i] + pi[j]*stemp$surv
serr[,i] <- serr[,i] + pi[i]*stemp$std.err^2
}
}
serr <- sqrt(serr)
plot(sfit1, lty=2, col=c(1,2,4), xscale=365.25,
xlab="Years from sample", ylab="Survival")
matlines(xtime, smat, type='l', lwd=2, col=c(1,2,4),lty=1)
survdiff(Surv(futime, death) ~ group + strata(age2, sex), fdata)
survdiff(Surv(futime, death) ~ group + age2+sex, fdata)
cfit4a <- coxph(Surv(futime, death) ~ age + sex + strata(group),
data=fdata)
surv4a <- survfit(cfit4a)
plot(surv4a, col=c(1,2,4), mark.time=F, xscale=365.25,
xlab="Years post sample", ylab="Survival")
tab4a <- with(fdata, table(age, sex))
tab4a
uage <- as.numeric(dimnames(tab4a)[[1]])
tdata <- data.frame(age = uage[row(tab4a)],
sex = c("F","M")[col(tab4a)],
count= c(tab4a))
uage
tdata3 <- tdata[rep(1:nrow(tdata), 3),]
tdata3$group <- factor(rep(1:3, each=nrow(tdata)),
labels=levels(fdata$group))
head(tdata3)
sfit4a <- survexp(~group, data=tdata3, weight = count,
ratetable=cfit4a)
plot(sfit4a, mark.time=F, col=c(1,2,4), lty=1, lwd=2,
xscale=365.25, xlab="Years from Sample",
ylab="Survival")
lines(sfit3, mark.time=F, col=c(1,2,4), lty=2, lwd=1,
xscale=365.25)
tfit <- survfit(cfit4a, newdata=tdata, se.fit=FALSE)
curves <- vector('list', 3)
twt <- c(tab4a)/sum(tab4a)
for (i in 1:3) {
temp <- tfit[i,]
curves[[i]] <- list(time=temp$time, surv= c(temp$surv %*% twt))
}
library(BiocManager)
BiocManager::install(c("ATACseqQC", "ChIPpeakAnno", "MotifDb", "GenomicAlignments",
"BSgenome.Hsapiens.UCSC.hg19", "TxDb.Hsapiens.UCSC.hg19.knownGene",
"phastCons100way.UCSC.hg19"))
install.packages("statnet")
install.packages("tergm")
install.packages("statnet")
25.5+5
30.5*1.3
30.5*1.3*5
(38+5)*1.3*5
37.5+4.5
42*1.3*5
37.5+4.5
42*1.3*5
25+4.5
29.8*1.3*5
47.7*0.93
library(ATACseqQC)
## input the bamFile from the ATACseqQC package
bamfile <- system.file("extdata", "GL1.bam",
package="ATACseqQC", mustWork=TRUE)
bamfile.labels <- gsub(".bam", "", basename(bamfile))
source(system.file("extdata", "IGVSnapshot.R", package = "ATACseqQC"))
estimateLibComplexity(readsDupFreq(bamfile))
fragSize <- fragSizeDist(bamfile, bamfile.labels)
possibleTag <- combn(LETTERS, 2)
possibleTag <- c(paste0(possibleTag[1, ], possibleTag[2, ]),
paste0(possibleTag[2, ], possibleTag[1, ]))
library(Rsamtools)
bamTop100 <- scanBam(BamFile(bamfile, yieldSize = 100),
param = ScanBamParam(tag=possibleTag))[[1]]$tag
tags <- names(bamTop100)[lengths(bamTop100)>0]
tags
outPath <- "splited"
dir.create(outPath)
## shift the coordinates of 5'ends of alignments in the bam file
library(BSgenome.Hsapiens.UCSC.hg19)
seqlev <- "chr1" ## subsample data for quick run
which <- as(seqinfo(Hsapiens)[seqlev], "GRanges")
gal <- readBamFile(bamfile, tag=tags, which=which, asMates=TRUE, bigFile=TRUE)
shiftedBamfile <- file.path(outPath, "shifted.bam")
gal1 <- shiftGAlignmentsList(gal, outbam=shiftedBamfile)
?shiftGAlignmentsList
sessionInfo()
library(BiocManager)
install.packages("BiocManager")
library(BiocManager)
BiocManager::install(c("ATACseqQC"))
sessionInfo()
37.5+4.5
42*1.3*5
22+4.5
26.5*1.3*5
22+4.5
26.5*1.3*5
26.5*5
180/132
?BiocManager
library(tidyverse)
dep <- read_delim("../Desktop/gene_effect_corrected.csv")
dep <- read_delim("../Desktop/gene_effect_corrected.csv", delim = "\t")
dep <- read_delim("../Desktop/gene_effect_corrected.csv", delim = ",")
dep2 <- read_delim("../Desktop/Achilles_gene_effect.csv", delim = ",")
dep2[1,] %>% hist()
dep2[1,]
dep2[1,-1] %>% hist
dep2[1,-1] %>% class
dep2[1,-1] %>% as.vector() %>% hed
dep2[1,-1] %>% as.vector() %>% head
dep2[1,-1] %>% as.matrix() %>% head
dep3 <- t(dep2)
dep3[1:5,1:5]
dep2[1:5,1:5]
dep2 <- dep2[,-1]
dep2[1:5,1:5]
?read_delim
dep2 <- read_csv("../Desktop/Achilles_gene_effect.csv")
rns <- dep2$X1
dep2 <- dep2[,-1]
rownames(dep2) <- rns
dep2[1:5,1:5]
dep2 <- dep2[,-1] %>% data.frame()
dep2 <- read_csv("../Desktop/Achilles_gene_effect.csv")
rns <- dep2$X1
dep2 <- dep2[,-1] %>% data.frame()
rownames(dep2) <- rns
dep2[1:5,1:5]
dep3 <- t(dep2)
dep3$`ACH-000880` %>% head
colnames(dep3)[1:5]
dep3$ACH-000880 %>% head
which(colnames(dep3)=="ACH-000880")
dep3[,416] %>% hist()
dep3[,416] %>% order() %>% head(n=10)
?order
dep3[,416] %>% order(decreasing = T) %>% head(n=10)
rownames(dep3)[dep3[,416] %>% order(decreasing = T) %>% head(n=10)]
dep3[dep3[,416] %>% order(decreasing = T) %>% head(n=10), 416]
dep3["GMDS..2762.", 416]
dep3[dep3[,416] %>% order(decreasing = F) %>% head(n=10), 416]
BiocManager::install("UNDO")
library(UNDO)
data(NumericalMixMCF7HS27)
X <- NumericalMixMCF7HS27
data(NumericalMixingMatrix)
A <- NumericalMixingMatrix
dim(X)
x
X
exprs(X)
data(PureMCF7HS27)
S <- exprs(PureMCF7HS27)
head(S)
?two_source_deconv
two_source_deconv(X)
library("genomation", lib.loc="C:/Program Files/R/R-3.5.0/library")
library(genomationData)
genomationDataPath = system.file("extdata", package = "genomationData")
sampleInfo = read.table(file.path(genomationDataPath, "SamplesInfo.txt"), header = TRUE,
sep = "\t", stringsAsFactors = FALSE)
peak.files = list.files(genomationDataPath, full.names = TRUE, pattern = "broadPeak")
names(peak.files) = sampleInfo$sampleName[match(basename(peak.files), sampleInfo$fileName)]
ctcf.peaks = readBroadPeak(peak.files["Ctcf"])
BiocManager::install("genomationData")
library(genomationData)
genomationDataPath = system.file("extdata", package = "genomationData")
sampleInfo = read.table(file.path(genomationDataPath, "SamplesInfo.txt"), header = TRUE,
sep = "\t", stringsAsFactors = FALSE)
peak.files = list.files(genomationDataPath, full.names = TRUE, pattern = "broadPeak")
names(peak.files) = sampleInfo$sampleName[match(basename(peak.files), sampleInfo$fileName)]
ctcf.peaks = readBroadPeak(peak.files["Ctcf"])
ctcf.peaks
data(cpgi)
cpgi
peak.annot = annotateWithFeature(ctcf.peaks, cpgi, intersect.chr = TRUE)
peak.annot
?annotateWithFeature
178/2
11*5*1.3
1100/567000
library(shiny); runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
?fluidRow
?tableOutput
?verbatimTextOutput
if (interactive()) {
shinyApp(
ui = basicPage(
textInput("txt", "Enter the text to display below:"),
verbatimTextOutput("default"),
verbatimTextOutput("placeholder", placeholder = TRUE)
),
server = function(input, output) {
output$default <- renderText({ input$txt })
output$placeholder <- renderText({ input$txt })
}
)
}
runApp('~/dotplot.R')
?h3
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
?switch
?switch
?reactiveTable
?reactiveFileReader
library(DT)
runApp('~/dotplot.R')
?renderDataTable
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/data analysis/christy/stomach_pdl1/ui.R')
runApp('~/data analysis/SLE/ui.R')
?read.csv
library(tidyverse)
?read_csv
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
?tabPanel
?withCallingHandlers
withCallingHandlers({ warning("A"); 1+2 }, warning = function(w) {})
bitr("TET2", fromType = "SYMBOL", toType = "ENTREZID", OrgDb = org.Hs.eg.db)
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
?reactive
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
?enrichKEGG
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
runApp('~/dotplot.R')
?dotplot
?dotplot
runApp('~/dotplot.R')
runApp('~/dotplot.R')
?br
runApp('~/dotplot.R')
}
shinyApp(
ui = fluidPage(
selectInput("state", "Choose a state:",
list(`East Coast` = list("NY", "NJ", "CT"),
`West Coast` = list("WA", "OR", "CA"),
`Midwest` = list("MN", "WI", "IA"))
),
textOutput("result")
),
server = function(input, output) {
output$result <- renderText({
paste("You chose", input$state)
})
}
)
?sliderInput
?renderDataTable
?formatRound
runApp('~/dotplot.R')
?enrichKEGG
runApp('~/dotplot.R')
?dotplot
?cnetplot
?dotplot
runApp('~/dotplot.R')
runApp('~/dotplot.R')
?barplot
?barplot
?clusterProfiler
runApp('~/dotplot.R')
?dotplot
runApp('~/dotplot.R')
?selectInput
runApp('~/dotplot.R')
runApp('~/dotplot.R')
setwd("data analysis/LSC/dotplot_shinyapp/")
runApp('~/data analysis/LSC/dotplot_shinyapp/dotplot.R')
runApp('~/data analysis/LSC/dotplot_shinyapp/dotplot.R')
runApp('~/data analysis/LSC/dotplot_shinyapp/dotplot.R')
BiocManager::repositories()
runApp('~/data analysis/LSC/dotplot_shinyapp/dotplot.R')
runApp('~/data analysis/LSC/dotplot_shinyapp/dotplot.R')
runApp('~/data analysis/LSC/dotplot_shinyapp/dotplot.R')
library(shiny)
?runUrl
?runGitHub
data_download <- function(cohort, datatype="RNAseq"){
library(TCGAbiolinks)
# download TCGA data
# View(getGDCprojects())
# by default download RNA-seq HT-seq count matrix
if (tolower(datatype) == "rnaseq"|"rna-seq") {
query = GDCquery(cohort, data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - Counts")
}
GDCdownload(query, method = "api", files.per.chunk = 10)
data <- GDCprepare(query)
## get expression matrix and clinical data frame
library(SummarizedExperiment)
exp <- assay(data)
clinical <- colData(data)
gene <- rowData(data)
# examine clinical information and remove incomplete ones
gender <- clinical$gender
temp <- clinical[!is.na(clinical$gender),]
exp2 <- exp[,match(as.character(temp$barcode),colnames(exp))]
clinical2 <- clinical[temp$barcode,]
os.status = clinical2$vital_status
os.time = NULL
i=1
for(i in 1:length(os.status)) {
os.time[i] = ifelse(os.status[i]=="Alive"|"alive",
clinical2$days_to_last_follow_up[i],
clinical2$days_to_death[i])
}
## convert tumor stage to numeric
tumor.stage = as.character(clinical2$tumor_stage)
tumor.stage <- gsub(".*iv.*",4,tumor.stage,perl = T)
tumor.stage <- gsub(".*iii.*",3,tumor.stage,perl = T)
tumor.stage <- gsub(".*ii.*",2,tumor.stage,perl = T)
tumor.stage <- gsub(".*i.*",1,tumor.stage,perl = T)
tumor.stage <- gsub("not reported",0,tumor.stage,perl = T)
length(tumor.stage)
tumor.stage = factor(tumor.stage)
# data into
library(edgeR)
x= DGEList(counts = exp2,genes = data.frame(gene))
survdata = data.frame(clinical2$barcode,clinical2$shortLetterCode,os.status,os.time,tumor.stage)
survdata = survdata %>% filter(clinical2.shortLetterCode!="NT")
survdata$os.status = survdata$os.status=="Dead"|"dead"
return(list(x=x, surv=survdata))
}