DMR_cluster.Rmd

---
title: "Candidate DMR cluster"
author: "Chengzhou Wu"
date: '2023'
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## DMR cluster



```{r warning=FALSE,message=FALSE}
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
library(gbdmr)
```

## Function 

Load the functiotions in the R package gbdmr 

```{r}
source("src_samebeta_ver2.R")
```

```{r}
# load your data and the data is a matrix where columns are samples and rows correspond to the sites
load('beta.Rdata')
beta <- as.matrix(beta)  
cor.threshold=0.5 # the threshold to define the adjacent correlation 


```

## get the cluster information 

```{r}


#construct an annotation data frame
data(list="IlluminaHumanMethylation450kanno.ilmn12.hg19")
data(Locations)
data(Other)

annotation <- cbind(as.data.frame(Locations), as.data.frame(Other))

# add the annotation to the stats object 
common <- intersect(rownames(beta), rownames(annotation))
length(common)
dim(beta)
annotation <- annotation[match(common, rownames(annotation)),]
beta <- beta[match(common, rownames(beta)),]
t.beta <- t(beta)

chr=annotation$chr
pos=annotation$pos
Indexes <- split(1:length(chr),chr)
clusterIDs=rep(NA,length(chr))
LAST=0
for (i in seq(along=Indexes)){
  Index <- Indexes[[i]]
  temp <- pos[Index]
  Index <- Index[order(temp)]
  x=colCors(t.beta[,Index[-length(Index)]],t.beta[,Index[-1]])
  y <- as.numeric(x <= cor.threshold)
  z <- cumsum(c(1, y))
  clusterIDs[Index] <- z + LAST
  LAST <- max(z) + LAST
}
#clusterIDs # which cluster the cpg belongs
sum(table(clusterIDs))
table(table(clusterIDs))


# Data
blocksize <- c(1, 2, 3, 4, 5, 6, 7, 12)
counts <- c(743, 54, 26, 9, 2, 1, 1, 1)

# Create a data frame
df <- data.frame(value = rep(blocksize, counts))

# Plot histogram
hist(df$value, breaks = seq(0.5, max(df$value) + 0.5, by = 1), col = "skyblue", main = "Histogram Plot", xlab = "Size", ylab = "Frequency")

```