-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDMR_cluster.Rmd
89 lines (65 loc) · 1.97 KB
/
DMR_cluster.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
---
title: "Candidate DMR cluster"
author: "Chengzhou Wu"
date: '2023'
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## DMR cluster
```{r warning=FALSE,message=FALSE}
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
library(gbdmr)
```
## Function
Load the functiotions in the R package gbdmr
```{r}
source("src_samebeta_ver2.R")
```
```{r}
# load your data and the data is a matrix where columns are samples and rows correspond to the sites
load('beta.Rdata')
beta <- as.matrix(beta)
cor.threshold=0.5 # the threshold to define the adjacent correlation
```
## get the cluster information
```{r}
#construct an annotation data frame
data(list="IlluminaHumanMethylation450kanno.ilmn12.hg19")
data(Locations)
data(Other)
annotation <- cbind(as.data.frame(Locations), as.data.frame(Other))
# add the annotation to the stats object
common <- intersect(rownames(beta), rownames(annotation))
length(common)
dim(beta)
annotation <- annotation[match(common, rownames(annotation)),]
beta <- beta[match(common, rownames(beta)),]
t.beta <- t(beta)
chr=annotation$chr
pos=annotation$pos
Indexes <- split(1:length(chr),chr)
clusterIDs=rep(NA,length(chr))
LAST=0
for (i in seq(along=Indexes)){
Index <- Indexes[[i]]
temp <- pos[Index]
Index <- Index[order(temp)]
x=colCors(t.beta[,Index[-length(Index)]],t.beta[,Index[-1]])
y <- as.numeric(x <= cor.threshold)
z <- cumsum(c(1, y))
clusterIDs[Index] <- z + LAST
LAST <- max(z) + LAST
}
#clusterIDs # which cluster the cpg belongs
sum(table(clusterIDs))
table(table(clusterIDs))
# Data
blocksize <- c(1, 2, 3, 4, 5, 6, 7, 12)
counts <- c(743, 54, 26, 9, 2, 1, 1, 1)
# Create a data frame
df <- data.frame(value = rep(blocksize, counts))
# Plot histogram
hist(df$value, breaks = seq(0.5, max(df$value) + 0.5, by = 1), col = "skyblue", main = "Histogram Plot", xlab = "Size", ylab = "Frequency")
```