-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLat_SIMPER.Rmd
122 lines (110 loc) · 3.91 KB
/
Lat_SIMPER.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
---
title: "RBW SIMPER"
---
```{r SIMPER: Transects}
library("labdsv")
library("vegan")
library("ggplot2")
library("dplyr")
library("tidyverse")
library("tidyr")
library("clustsig")
```
```{r load data}
setwd("~/Dropbox/UTS/PhD/Projects/Chapter1_Vibrio_latitude_study/Data/16S/Data")
sample_data <- read_csv('SMD.rarefied.csv')
asv_rarefy30k_long <- read_csv("asv_rarefy30k_long.csv")
```
```{bash}
qsub -I -q c3b -l ncpus=11,mem=375GB,walltime=72:00:00
qsub -I -l ncpus=11,mem=375GB,walltime=120:00:00
cd /shared/c3/projects/Nathan.Williams.12034652/Vibrio_Latitude_Project/Vibrio_Latitude_16S_Rerun/fastq_and_analysis/SIMPER
module load devel/R-current;
R
```
```{r Arrange Data on HPC}
library("labdsv")
library("vegan")
library("ggplot2")
library("dplyr")
library("tidyverse")
library("tidyr")
library("clustsig")
setwd("~/Dropbox/UTS/PhD/Projects/Chapter1_Vibrio_latitude_study/Data/16S/Data")
sample_data <- read_csv('SMD.csv')
asv_rarefy30k_long <- read_csv("asv_rarefy30k_long.csv")
Abundance <- asv_rarefy30k_long %>% select(FGID, SampleID, Abundance_Vegan_Rarefied) %>% distinct()
Abundance.spread <- Abundance %>% spread(key='SampleID', value='Abundance_Vegan_Rarefied', fill=0)
rownames(sample_data) <- sample_data$SampleID
L.matrix.simper <- as.matrix(Abundance.spread[,-c(1)])
rownames(L.matrix.simper) <- Abundance.spread$FGID
L.matrix.simper.t <- t(L.matrix.simper)
L.matrix.nz <- L.matrix.simper.t[,colSums(L.matrix.simper.t)!=0]
```
```{r Run SIMPER}
simper.L <- simper(L.matrix.nz, group = sample_data$mycl450, permutations = 999);
saveRDS(simper.L, 'simper.L.RDS')
```
```{r SIMPER: load in simper data}
setwd("~/Dropbox/UTS/PhD/Projects/Chapter1_Vibrio_latitude_study/Data/16S/Data")
simper <- readRDS("simper.L.RDS")
summary(simper, ordered = TRUE, digits = 3)
```
```{r}
summary(simper, ordered = TRUE, digits = 3)
simper_pretty <- simper.pretty(rb.matrix.nz, sample_data, c('Location'), perc_cutoff=0.5, low_cutoff = '0.01', low_val=0.01, 'sigASVcontrib')
```
```{r}
simper.pretty = function(x, metrics, interesting, perc_cutoff, low_cutoff, low_val, output_name){
library(vegan)
#handling otu tables for taxa levels
save=list(0)
if(grepl("Otu", colnames(x)[1])!=TRUE){
#converts output from A.Neumann Taxonomy script
save=list(58)
x=as.data.frame(t(x))
orig_names=colnames(x)
new_names=list()
l=1
for(n in colnames(x)){
ifelse((l<10), (colnames(x)[l]=c(paste0('Otu000',c(l)))), (colnames(x)[l]=c(paste0('Otu00',c(l)))))
new_names=append(new_names, colnames(x)[l])
l=l+1
}
orig_names=as.data.frame(orig_names, row.names = as.character(new_names))
}
#running simper
for(variables in interesting){
test_1=with(metrics, simper(x, metrics[[variables]]))
#parsing through simper output, saving desired info to table
for(name in names(test_1)){
testmx=matrix(ncol=length(interesting))
testmx=cbind(test_1[[name]]$ord,test_1[[name]]$cusum)
sorted=testmx[order(testmx[,1]),]
sorted=cbind(sorted,test_1[[name]]$species)
sorted=sorted[order(sorted[,2]),]
t=matrix(sorted[sorted[,2]<=perc_cutoff,],ncol=3)
i=nrow(t)
#converting percents to percent of whole
while(i>1){
t[i,2]=as.character(as.numeric(t[i,2])-as.numeric(t[i-1,2]))
i=i-1
}
t[,1]=name
write.table(t,file=paste(output_name,'_simper.csv',sep=""), append=TRUE, sep=",", col.names = FALSE)
}}
y=read.table(paste(output_name,'_simper.csv',sep=""), header=FALSE,sep=",",fill = TRUE,row.names = NULL)
file.remove(paste(output_name,'_simper.csv',sep = ""))
y=y[-c(1)]
colnames(y) = c("Comparison", "SIMPER", "OTU")
#removing results lower than low cutoff
if(low_cutoff=='y'){
y=y[!(as.numeric(as.character(y$SIMPER))<low_val),]
}
#prevents changing of colnames if OTU table
if(58 %in% save){
y$OTU=orig_names[match(y$OTU, rownames(orig_names)),1]
}
write.csv(y,file=paste(output_name,'_clean_simper.csv', sep=''))
}
```