-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path03_foldseek_distances_network.Rmd
42 lines (26 loc) · 1.1 KB
/
03_foldseek_distances_network.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
---
title: "03_domainome_structural_network"
author: "Toni Beltran"
date: "28/02/2024"
output: html_document
---
```{r load data}
library(data.table)
base_dir="/path/to/your/files"
setwd(base_dir)
foldseek_distances<-fread("analysis_files/pdb_files/foldseek_easy_allvsall")
foldseek_distances[,distance:=1-V6]
colnames(foldseek_distances)<-c("Source","Target","V3","V4","V5","Weight","Distance")
foldseek_distances$Type<-rep("Undirected",times=nrow(foldseek_distances))
nodes<-data.table(Id=unique(foldseek_distances$Source))
nodes$PFAM_ID<-unlist(lapply(nodes$Id,FUN=function(string){
return(strsplit(string,"_")[[1]][2])
}))
#paint by SCOP class
pfam_to_scop_class<-fread("analysis_files/PFAM_ID_to_SCOP_class.tsv")
nodes<-merge(nodes,pfam_to_scop_class,by="PFAM_ID")
#write node and edge lists to load into gephi
write.table(foldseek_distances[Source!=Target,c("Source","Target","Type","Weight")],
file="output_files/fseek_distances_edgelist.csv",sep=",",row.names = FALSE)
write.table(nodes[,c("Id","scop_class")],file="output_files/fseek_distances_nodelist.csv",sep=",",row.names = FALSE)
```