From e800263fa80356afaa2fee2fc368abac53145433 Mon Sep 17 00:00:00 2001 From: Daniel Fischer Date: Fri, 10 Mar 2023 07:39:26 +0200 Subject: [PATCH] Add files via upload --- R/prepare_kegg_hierachy.R | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 R/prepare_kegg_hierachy.R diff --git a/R/prepare_kegg_hierachy.R b/R/prepare_kegg_hierachy.R new file mode 100644 index 0000000..0873ee1 --- /dev/null +++ b/R/prepare_kegg_hierachy.R @@ -0,0 +1,38 @@ +# Load the data + organism <- "ko" + pwMap <- readLines(paste0("https://www.kegg.jp/kegg-bin/show_organism?menu_type=pathway_maps&org=", organism)) + +# Get the positions of interest + lvl_one <- grep("", pwMap) + + +output <- matrix("NA", ncol=3, nrow=length(pwMap)) +current_row <- 1 +for(i in 1:(length(lvl_one)-1) ){ + col_one <- pwMap[lvl_one[i]] + col_one <- gsub("", "", col_one) + col_one <- gsub("", "", col_one) + curLevel <- 1 + for(j in (lvl_one[i]+1):(lvl_one[i+1]-1)){ + if(length(grep("", pwMap[j])) == 1){ + curLevel <- curLevel - 1 + } else { + if(curLevel==2){ + col_two <- trimws(pwMap[j]) + } else if(curLevel==3){ + col_three <- gsub('  ', '_', col_three) + col_three <- gsub('
', '', col_three) + output[current_row,] <- c(col_one, col_two, col_three) + current_row <- current_row + 1 + } + } + } +} + +output <- output[1:current_row,] +output <-cbind(output, sapply(strsplit(output[,3], "_"),"[",1)) +output