-
Notifications
You must be signed in to change notification settings - Fork 1
/
helpers.R
119 lines (103 loc) · 4.53 KB
/
helpers.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
library(RColorBrewer)
library(plyr)
library(FNN)
library(fgsea)
library(umap)
cols = brewer.pal(11, 'Spectral')
# Master table of all experiment annotations
ALL_ANNOT <- get_app_data_dropbox('All_expt_annotations.rds')
ALL_DRUGS <- sort(unique(ALL_ANNOT$drug))
ALL_CELLS <- sort(unique(ALL_ANNOT$cell_line))
ALL_TISSUES <- sort(unique(ALL_ANNOT$tissue))
# Read barplot data and original barplot ordering
BARPLOT_DATA <- get_app_data_dropbox('All_nes_q05.rds')
FIXED_BARPLOT_PATHWAYS <- get_app_data_dropbox('fixed_barplot_pathways.rds')
# Possible gene set collections
GMTS <- c('MSigDB C1: positional gene sets',
'MSigDB C2: curated pathways (BioCarta)',
'MSigDB C2: curated pathways (perturbations)',
'MSigDB C2: curated pathways (KEGG)',
'MSigDB C2: curated pathways (Reactome)',
'MSigDB C3: motifs (microRNA targets)',
'MSigDB C3: motifs (TF targets)',
'MSigDB C4: computational (cancer gene neighborhoods)',
'MSigDB C4: computational (cancer modules)',
'MSigDB C5: GO gene sets (biological process)',
'MSigDB C5: GO gene sets (cellular component)',
'MSigDB C5: GO gene sets (molecular function)',
'MSigDB C6: oncogenic signatures',
'DSigDB D1: approved drugs',
'MSigDB: Hallmarks')
names(GMTS) = sort(unique(BARPLOT_DATA$Collection))
GMT_PATHS <- sort(list_app_data_dropbox('gene_sets', full.names = T))
names(GMT_PATHS) = c('All collections', as.character(GMTS))
all_umap <- get_app_data_dropbox('NES_UMAPs/allCollections_UMAP.rds')
UMAP_PATHS <- sort(list_app_data_dropbox('NES_UMAPs', full.names = T))
names(UMAP_PATHS) = c('All collections', as.character(GMTS))
# Colors for tissue UMAP
tissue_cols = c(brewer.pal(11, 'Spectral'),
brewer.pal(10, 'Paired'),
brewer.pal(5, 'Set2'),
'yellow')
names(tissue_cols) = c(ALL_TISSUES, 'UserInput')
# Colors for GSEA Preranked vs InContext plot
quadrant_cols = c('limegreen', 'goldenrod', 'indianred1', 'grey')
names(quadrant_cols) = c('Both sig', 'Preranked only', 'InContext only', 'Not sig')
# Data set and platform mapping
dataset_platforms = c('HGU133 Plus 2.0',
'HGU133A',
'HGU133A 2.0')
names(dataset_platforms) = c('GEO 442 (HGU133 Plus 2.0)',
'CMap Build 01 (HGU133A)',
'NCI-60 (HGU133A 2.0)')
# NES and QVals
#gsea_nes = read.csv('data/Original_GSEAPreranked_hallmarks_NES.csv',
# stringsAsFactors = F)
#gsea_qval = read.csv('data/Original_GSEAPreranked_hallmarks_qvals_ALL.csv',
# stringsAsFactors = F)
#incontext_nes = read.csv('data/Modified_GSEAPreranked_hallmarks_NES.csv',
# stringsAsFactors = F)
#incontext_qval = read.csv('data/Modified_GSEAPreranked_hallmarks_qvals_ALL.csv',
# stringsAsFactors = F)
makeValidRnk <- function(filepath){
if (is.null(filepath)){
results = list(err = '<- Please select a .rnk file to upload',
df = NULL)
} else {
dat = read.table(filepath, stringsAsFactors = F, sep = '\t',
col.names = c('GeneSymbol', 'logFC'))
if (is.character(dat$GeneSymbol) & is.numeric(dat$logFC)){
results = list(err = '',
df = na.omit(dat[order(dat$logFC, decreasing = T),]))
} else{
results = list(err = 'Input rnk does not match the format requirements',
df = NULL)
}
return(results)
}
}
prepUploadedData <- function(rnk, bg){
# rnk = user's validated 2-column ranked list
# bg = is a list with 'rank_indices' (genes x expts) and 'annot' (expts x metadata)
# Subset the user's data and bg data to genes in common
user_genes = row.names(rnk) = rnk$GeneSymbol
bg_genes = row.names(bg$rank_indices)
genes_in_common = sort(intersect(user_genes, bg_genes))
subset_rnk = rnk[genes_in_common,]
subset_rnk = subset_rnk[order(subset_rnk$logFC, decreasing = T),]
subset_bg = bg$rank_indices[genes_in_common,]
# Rank user's genes
user_ranks = sapply(genes_in_common, function(x){
which(row.names(subset_rnk) == x)
})
# Rerank bg genes - this is currently very slow...
bg_ranks = apply(subset_bg, 2, function(x){
order(order(x))
})
# Remove duplicates
# (Putting user input first here guarantees it stays,
# even if it's the same as a rnk list in the bg set)
mat_for_umap = cbind(UserInput = user_ranks, bg_ranks)
mat_for_umap2 = mat_for_umap[,!duplicated(t(mat_for_umap))]
return(mat_for_umap2)
}