Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
fischuu authored Mar 10, 2023
1 parent 63653f2 commit e800263
Showing 1 changed file with 38 additions and 0 deletions.
38 changes: 38 additions & 0 deletions R/prepare_kegg_hierachy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Load the data
organism <- "ko"
pwMap <- readLines(paste0("https://www.kegg.jp/kegg-bin/show_organism?menu_type=pathway_maps&org=", organism))

# Get the positions of interest
lvl_one <- grep("<b>", pwMap)


output <- matrix("NA", ncol=3, nrow=length(pwMap))
current_row <- 1
for(i in 1:(length(lvl_one)-1) ){
col_one <- pwMap[lvl_one[i]]
col_one <- gsub("<b>", "", col_one)
col_one <- gsub("</b>", "", col_one)
curLevel <- 1
for(j in (lvl_one[i]+1):(lvl_one[i+1]-1)){
if(length(grep("<ul>", pwMap[j])) == 1) {
curLevel <- curLevel + 1
} else if(length(grep("</ul>", pwMap[j])) == 1){
curLevel <- curLevel - 1
} else {
if(curLevel==2){
col_two <- trimws(pwMap[j])
} else if(curLevel==3){
col_three <- gsub('&nbsp;&nbsp;<a href=\"/pathway/', '', pwMap[j])
col_three <- paste0(organism, strsplit(col_three, organism)[[1]][2])
col_three <- gsub('\">', '_', col_three)
col_three <- gsub('</a><br>', '', col_three)
output[current_row,] <- c(col_one, col_two, col_three)
current_row <- current_row + 1
}
}
}
}

output <- output[1:current_row,]
output <-cbind(output, sapply(strsplit(output[,3], "_"),"[",1))
output

0 comments on commit e800263

Please sign in to comment.