-
Notifications
You must be signed in to change notification settings - Fork 0
/
fusions_ided.Rmd
306 lines (261 loc) · 13.4 KB
/
fusions_ided.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
---
title: "Gene overexpression Fusions identified"
output: html_notebook
---
#list of packages used
```{r}
library(readr)
```
#list of fusions ided by outliers
```{r}
outlier_fusions <- all_genes %>% filter(fusion == "Fusion", is_outlier == TRUE) %>% group_by(gene, Cancer) %>% select(rail_id, gene, Project_ID)
outlier_fusions
write_csv(outlier_fusions, "outlier_fusions.csv")
```
#list of fusions ided by overexpression (95th percentile)
```{r}
above_95percent <- all_genes %>% filter(fusion == "Fusion", above_95 == TRUE) %>% group_by(gene, Cancer) %>% select(rail_id, gene, Project_ID)]
above_95percent
write_csv(above_95percent, "above_95percent.csv")
```
#list of fusions ided by both methods
```{r}
ided_by_both <- all_genes %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion == "Fusion") %>% select(rail_id, gene, Project_ID)
ided_by_both
write_csv(ided_by_both, 'ided_by_both.csv')
```
##false positives (only cancer types with fusions)
#overexpression
```{r}
t <- tacc3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BLCA" | Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "ESCA" | Project_ID == "GBM" | Project_ID == "HNSC" | Project_ID == "KIRP" | Project_ID == "LAML" | Project_ID == "LGG" | Project_ID == "LIHC" | Project_ID == "LUAD" | Project_ID == "LUSC" | Project_ID == "STAD") %>%
filter(above_95 == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
al <- alk_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BLCA" | Project_ID == "KIRP" | Project_ID == "LUAD" | Project_ID == "READ" | Project_ID == "SARC" | Project_ID == "SKCM" | Project_ID == "THCA") %>%
filter(above_95 == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
ar <- arhgap26_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "CESC" | Project_ID == "HNSC" | Project_ID == "LUAD" | Project_ID == "LUSC" | Project_ID == "SARC" | Project_ID == "STAD") %>%
filter(above_95 == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
er <- erg_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "CESC" | Project_ID == "LGG" | Project_ID == "PAAD" | Project_ID == "PCPG" | Project_ID == "PRAD") %>%
filter(above_95 == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
et <- etv1_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "LGG" | Project_ID == "PRAD") %>%
filter(above_95 == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
m <- maml3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "LIHC" | Project_ID == "PCPG") %>%
filter(above_95 == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
n <- ntrk3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "COAD" | Project_ID == "HNSC" | Project_ID == "LGG" | Project_ID == "PAAD" | Project_ID == "SKCM" | Project_ID == "THCA") %>%
filter(above_95 == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
ra <- rara_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "GBM" | Project_ID == "LAML" | Project_ID == "OV" | Project_ID == "PCPG" | Project_ID == "READ" | Project_ID == "STAD") %>%
filter(above_95 == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
re <- ret_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "COAD" | Project_ID == "LAML" | Project_ID == "LUAD" | Project_ID == "OV" | Project_ID == "THCA") %>%
filter(above_95 == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
tf <- tfe3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "KIRC" | Project_ID == "KIRP" | Project_ID == "UCEC") %>%
filter(above_95 == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
fpr_specific <- rbind(al, ar, er, et, m, n, ra, re, t, tf)
fpr_specific <- fpr_specific %>% .[order(.$gene, .$Project_ID),]
fpr_specific
write_csv(fpr_specific, 'overexp_fpr_specific.csv')
```
#outliers
```{r}
t <- tacc3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BLCA" | Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "ESCA" | Project_ID == "GBM" | Project_ID == "HNSC" | Project_ID == "KIRP" | Project_ID == "LAML" | Project_ID == "LGG" | Project_ID == "LIHC" | Project_ID == "LUAD" | Project_ID == "LUSC" | Project_ID == "STAD") %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
al <- alk_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BLCA" | Project_ID == "KIRP" | Project_ID == "LUAD" | Project_ID == "READ" | Project_ID == "SARC" | Project_ID == "SKCM" | Project_ID == "THCA") %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
ar <- arhgap26_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "CESC" | Project_ID == "HNSC" | Project_ID == "LUAD" | Project_ID == "LUSC" | Project_ID == "SARC" | Project_ID == "STAD") %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
er <- erg_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "CESC" | Project_ID == "LGG" | Project_ID == "PAAD" | Project_ID == "PCPG" | Project_ID == "PRAD") %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
et <- etv1_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "LGG" | Project_ID == "PRAD") %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
m <- maml3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "LIHC" | Project_ID == "PCPG") %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
n <- ntrk3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "COAD" | Project_ID == "HNSC" | Project_ID == "LGG" | Project_ID == "PAAD" | Project_ID == "SKCM" | Project_ID == "THCA") %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
ra <- rara_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "GBM" | Project_ID == "LAML" | Project_ID == "OV" | Project_ID == "PCPG" | Project_ID == "READ" | Project_ID == "STAD") %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
re <- ret_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "COAD" | Project_ID == "LAML" | Project_ID == "LUAD" | Project_ID == "OV" | Project_ID == "THCA") %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
tf <- tfe3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "KIRC" | Project_ID == "KIRP" | Project_ID == "UCEC") %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
fpr_specific_outlier <- rbind(al, ar, er, et, m, n, ra, re, t, tf)
fpr_specific_outlier <- fpr_specific_outlier %>% .[order(.$gene, .$Project_ID),]
fpr_specific_outlier
write_csv(fpr_specific_outlier, 'fpr_specific_outlier.csv')
```
#shared
```{r}
t <- tacc3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BLCA" | Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "ESCA" | Project_ID == "GBM" | Project_ID == "HNSC" | Project_ID == "KIRP" | Project_ID == "LAML" | Project_ID == "LGG" | Project_ID == "LIHC" | Project_ID == "LUAD" | Project_ID == "LUSC" | Project_ID == "STAD") %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
al <- alk_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BLCA" | Project_ID == "KIRP" | Project_ID == "LUAD" | Project_ID == "READ" | Project_ID == "SARC" | Project_ID == "SKCM" | Project_ID == "THCA") %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
ar <- arhgap26_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "CESC" | Project_ID == "HNSC" | Project_ID == "LUAD" | Project_ID == "LUSC" | Project_ID == "SARC" | Project_ID == "STAD") %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
er <- erg_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "CESC" | Project_ID == "LGG" | Project_ID == "PAAD" | Project_ID == "PCPG" | Project_ID == "PRAD") %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
et <- etv1_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "LGG" | Project_ID == "PRAD") %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
m <- maml3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "LIHC" | Project_ID == "PCPG") %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
n <- ntrk3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "COAD" | Project_ID == "HNSC" | Project_ID == "LGG" | Project_ID == "PAAD" | Project_ID == "SKCM" | Project_ID == "THCA") %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
ra <- rara_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "GBM" | Project_ID == "LAML" | Project_ID == "OV" | Project_ID == "PCPG" | Project_ID == "READ" | Project_ID == "STAD") %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
re <- ret_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BRCA" | Project_ID == "COAD" | Project_ID == "LAML" | Project_ID == "LUAD" | Project_ID == "OV" | Project_ID == "THCA") %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
tf <- tfe3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "KIRC" | Project_ID == "KIRP" | Project_ID == "UCEC") %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>%
select(rail_id, gene, Project_ID)
fpr_specific_shared <- rbind(al, ar, er, et, m, n, ra, re, t, tf)
fpr_specific_shared <- fpr_specific_shared %>% .[order(.$gene, .$Project_ID),]
fpr_specific_shared
write_csv(fpr_specific_shared, 'fpr_specific_shared.csv')
```
#total false positives (including all cancers)
```{r}
outlier_false_pos <- all_genes %>%
filter(is_outlier == TRUE, fusion != "Fusion") %>% select(rail_id, gene, Project_ID)
write_csv(outlier_false_pos, 'outlier_false_pos.csv')
above95_false_pos <- all_genes %>%
filter(above_95 == TRUE, fusion != "Fusion") %>% select(rail_id, gene, Project_ID)
write_csv(above95_false_pos, 'above95_false_pos.csv')
shared_false_pos <- all_genes %>%
filter(above_95 == TRUE, is_outlier == TRUE, fusion != "Fusion") %>% select(rail_id, gene, Project_ID)
write_csv(shared_false_pos, 'shared_false_pos.csv')
```
```{r}
tacc3_combined %>% filter(has_cancer != "GTEx", above_95 == TRUE)
all_genes %>%
filter(has_cancer == "Cancer", above_95 == TRUE, fusion != "Fusion") %>% select(rail_id, gene, Project_ID) %>% distinct(rail_id)
above95_false_pos %>% filter(has_cancer == "Cancer") %>% distinct(rail_id)
```
```{r}
tacc3_combined %>% filter(has_cancer == "Cancer", fusion != "Fusion") %>% filter(Project_ID == "BLCA" | Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "ESCA" | Project_ID == "GBM" | Project_ID == "HNSC" | Project_ID == "KIRP" | Project_ID == "LAML" | Project_ID == "LGG" | Project_ID == "LIHC" | Project_ID == "LUAD" | Project_ID == "LUSC" | Project_ID == "STAD") %>% ungroup() %>% group_by(Project_ID) %>% distinct(rail_id) %>% summarise(n())
```
```{r}
tacc3_combined %>% filter(has_cancer != "GTEx", fusion != "Fusion") %>% filter(Project_ID == "BLCA" | Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "ESCA" | Project_ID == "GBM" | Project_ID == "HNSC" | Project_ID == "KIRP" | Project_ID == "LAML" | Project_ID == "LGG" | Project_ID == "LIHC" | Project_ID == "LUAD" | Project_ID == "LUSC" | Project_ID == "STAD") %>% ungroup() %>% summarise(n())
```
```{r}
tacc3_combined %>% filter(has_cancer == "Cancer", above_95 == TRUE) %>% distinct(rail_id)
tacc3_combined %>%
ungroup() %>%
filter(has_cancer == "Cancer") %>%
filter(Project_ID == "BLCA" | Project_ID == "BRCA" | Project_ID == "CESC" | Project_ID == "ESCA" | Project_ID == "GBM" | Project_ID == "HNSC" | Project_ID == "KIRP" | Project_ID == "LAML" | Project_ID == "LGG" | Project_ID == "LIHC" | Project_ID == "LUAD" | Project_ID == "LUSC" | Project_ID == "STAD") %>%
filter(above_95 == TRUE)
```