-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread-length-distribution.R
43 lines (33 loc) · 1.69 KB
/
read-length-distribution.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Please specify your working directory using setwd
setwd("/home/brefrachoni/genomics")
# Load packages
library(ggplot2)
library(dplyr)
library(cowplot)
# Import the read-length distribution table
read_length_df <- read.csv("length.csv")
# Organize the imported read-length table
# You can replace the level arguments for your platform, species, or strains
read_length_df$platform <- as.factor(read_length_df$platform)
read_length_df$platform <- factor(read_length_df$platform,level = c("PacBio_CLR","PacBio_HiFi","ONT"))
# Calculate the average read-lengths for each platform
summary_df <- plyr::ddply(read_length_df, "platform", summarise, grp.mean=mean(length))
# Draw a read-length distribution plot for all reads
total.length.plot <- ggplot(read_length_df, aes(x=length, fill=platform, color=platform)) +
geom_histogram(binwidth=100, alpha=0.5, position="dodge") +
geom_vline(data=summary_df, aes(xintercept=grp.mean, color=platform), linetype="dashed", linewidth=0.2) +
labs(x = "Read length (bp)", y = "Count") +
theme_bw()
# Draw a read-length distribution plot for reads ≤ 20 kb in length
kb.length.plot <- ggplot(read_length_df, aes(x=length, fill=platform, color=platform)) +
geom_histogram(binwidth=50, alpha=0.5, position="dodge") +
geom_vline(data=summary_df, aes(xintercept=grp.mean, color=platform), linetype="dashed", linewidth=0.2) +
scale_x_continuous(limit = c(0,20000)) +
labs(x = "Read length (bp)", y = "Count") +
theme_bw()
# Merge both the read-length distribution plots
plot <- plot_grid(total.length.plot, kb.length.plot, ncol = 1)
# Save the figure using the file name, “read.length.pdf”
pdf("read.length.pdf",width=6,height=8,paper='special')
print(plot)
dev.off()