v1.2.0

G3viz · Sep 3, 2024 · 8d55903 · 8d55903
1 parent 2fa553b
commit 8d55903
Show file tree

Hide file tree

Showing 5 changed files with 129 additions and 131 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -39,7 +39,9 @@ biocViews:
 Imports:
     jsonlite,
     stringr,
-    cBioPortalData,
+    httr2,
+    AnnotationDbi,
+    org.Hs.eg.db,
     htmlwidgets
 Suggests:
     shiny (>= 1.0.0),

diff --git a/NAMESPACE b/NAMESPACE
@@ -13,14 +13,16 @@ export(parseProteinChange)
 export(readMAF)
 export(renderG3Lollipop)
 export(uniprot2pfam)
-importFrom(cBioPortalData,cBioPortal)
-importFrom(cBioPortalData,getDataByGenes)
-importFrom(cBioPortalData,molecularProfiles)
-importFrom(cBioPortalData,sampleLists)
-importFrom(cBioPortalData,samplesInSampleLists)
+importFrom(AnnotationDbi,mapIds)
 importFrom(htmlwidgets,shinyRenderWidget)
 importFrom(htmlwidgets,shinyWidgetOutput)
+importFrom(httr2,req_perform)
+importFrom(httr2,request)
+importFrom(httr2,resp_body_string)
+importFrom(httr2,resp_status)
+importFrom(jsonlite,fromJSON)
 importFrom(jsonlite,toJSON)
+importFrom(org.Hs.eg.db,org.Hs.eg.db)
 importFrom(stringr,str_extract_all)
 importFrom(utils,capture.output)
 importFrom(utils,data)

diff --git a/R/getMutationsFromCbioportal.R b/R/getMutationsFromCbioportal.R
@@ -12,22 +12,11 @@
 #' @examples
 #' \donttest{
 #' # Usage:
-#' # cBioPortalData has officially replaced the defunct cgdsr.
-#' # Search online for cgdsrMigration.html if interested.
-#' library(cBioPortalData)
-#' cbio <- suppressWarnings(cBioPortal(hostname = "www.cbioportal.org", protocol = "https", api. = "/api/v2/api-docs")
-#'
-#' # list all studies of cBioPortal
-#' all.studies <- getStudies(cbio, buildReport = FALSE)
-#'
-#' # First, select a cancer study that contains mutation data set ("caner_study_id")
-#' # then, query genomic mutation data using a HGNC gene symbol,
-#' # for example
-#' mutation.dat <- getMutationsFromCbioportal("msk_impact_2017", "TP53")
-#' mutation.dat <- getMutationsFromCbioportal("all_stjude_2016", "TP53")
 #' }
-#' @importFrom cBioPortalData cBioPortal molecularProfiles sampleLists getDataByGenes
-#'             samplesInSampleLists
+#' @importFrom httr2 request req_perform resp_status resp_body_string
+#' @importFrom org.Hs.eg.db org.Hs.eg.db
+#' @importFrom AnnotationDbi mapIds
+#' @importFrom jsonlite fromJSON
 #' @importFrom utils write.table
 #'
 #' @return a data frame with columns
@@ -52,104 +41,123 @@ getMutationsFromCbioportal <- function(study.id,
                                        mutation.type.to.class.df = NA){
 
   # ========================
-  # server
-  # cbio <- cBioPortal()
-  # cbio <- suppressWarnings({
-  #   cBioPortal(
-  #    hostname = "www.cbioportal.org",
-  #     protocol = "https",
-  #     api. = "/api/v2/api-docs"
-  #   )
-  # })
-
-  cbio <- tryCatch({
-    cBioPortal(
-      hostname = "www.cbioportal.org",
-      protocol = "https",
-      api. = "/api/v2/api-docs"
-    )
-  }, warning = function(w){
-    # message(w)
-  }, error = function(e){
-    stop("Connection error: can not connect to cBioPortal API")
-  }, finally = {
-    #
-  })
+  # cbioportal server
+  base.url = "https://www.cbioportal.org/api/"
 
   # ========================
-  # get study information
-  genetic.profiles <- molecularProfiles(cbio, studyId = study.id)
-  message("Found study ", study.id)
+  # library(httr2)
+  # library(AnnotationDbi)
+  # library(org.Hs.eg.db)
 
-  # ========================
-  # check if mutation information is available in the study
-  profile.col <- "molecularProfileId"
-  mutation.idx <- grep(pattern = 'mutations$', x = genetic.profiles$molecularProfileId, fixed = FALSE)
-  if(is.integer(mutation.idx) && length(mutation.idx) == 0L){
-    stop("Can not find mutation information in ", study.id, " study")
-  }
-  mutation.profile <- genetic.profiles$molecularProfileId[mutation.idx]
-  message("Found mutation data set ", mutation.profile)
+  # study.id = "msk_impact_2017"
+  # gene.symbol = "TP53"
 
   # ========================
-  case.list.details <- sampleLists(cbio, study.id)
-
-  mutation.case.list.id <- case.list.details$sampleListId
-
-  mutation.case.list.all <- mutation.case.list.id[grep(pattern = '_sequenced$',x = mutation.case.list.id)]
-  num.case <- length(samplesInSampleLists(cbio,mutation.case.list.id)[[mutation.case.list.all]])
-  message(num.case, " cases in this study")
-
-  ### Download mutation data on certain gene from study
-  df <- getDataByGenes(
-        cbio,
-        studyId = study.id,
-        genes = gene.symbol,
-        by = "hugoGeneSymbol",
-        molecularProfileIds = mutation.profile
-      )[[1]]
-
-  extended.mutation.df <- cbind(rep(gene.symbol,nrow(df)),df)
-  colnames(extended.mutation.df) <- c("gene_symbol",colnames(df))
-  # =========================
-  # parse mutation data columns
-  required.colnames <- c("gene_symbol", "proteinChange", "sampleId", "mutationType",
-                         "chr", "startPosition", "endPosition",
-                         "referenceAllele", "variantAllele")
-
-  mapped.colnames <- c("Hugo_Symbol", "Protein_Change", "Sample_ID", "Mutation_Type",
-                       "Chromosome", "Start_Position", "End_Position",
-                       "Reference_Allele", "Variant_Allele")
-
-  # check if any columns are missing
-  missing.columns <- required.colnames[!required.colnames %in% colnames(extended.mutation.df)]
-  if(length(missing.columns) > 0){
-    stop("Some columns are missing: ", paste(missing.columns, collapse =", "))
-  }
-
-  # rename headers according to cbioportal MutationMapper
-  # url: http://www.cbioportal.org/mutation_mapper.jsp
-  mutation.df <- extended.mutation.df[, required.colnames]
-  colnames(mutation.df) <- mapped.colnames
-
-
-  # =============================
-  # map from mutation type to mutation class
-  mutation.df[, "Mutation_Class"] <- mapMutationTypeToMutationClass(mutation.df[, "Mutation_Type"],
-                                                                      mutation.type.to.class.df)
-
-  # =============================
-  # parse amino acid position
-  mutation.df[, "AA_Position"] <- parseProteinChange(mutation.df[, "Protein_Change"],
-                                                  mutation.df[, "Mutation_Class"])
-
-  mutation.df <- mutation.df[order(mutation.df[, "AA_Position"],
-                                   mutation.df[, "Protein_Change"], decreasing = FALSE), ]
-
-  if(!is.na(output.file)){
-    message("Write mutation data to ", output.file)
-    write.table(mutation.df, file = output.file, sep = "\t", quote = FALSE, col.names = TRUE, row.names = FALSE)
-  }
-
-  mutation.df
+  tryCatch({
+    # step 1:
+    # get entrez gene id
+    entrez.id <- suppressMessages(mapIds(
+      org.Hs.eg.db,
+      keys = gene.symbol,
+      column = "ENTREZID",
+      keytype = "SYMBOL",
+      multiVals = "first"
+    ))
+
+    if (!is.na(entrez.id)) {
+      message(paste0("The Entrez Gene ID for ", gene.symbol, " is: ", entrez.id))
+    } else {
+      stop(paste0("[Error] No Entrez Gene ID found for ", gene.symbol))
+    }
+
+    entrez.id = as.character(entrez.id)
+
+    # step2:
+    # check if mutation information is available in the study
+    response <- request(paste0(base.url, "studies/", study.id, "/molecular-profiles")) |>
+      req_perform()
+    status_code <- resp_status(response)
+
+    if(status_code != 200){
+      stop("Can not find Mutation data for this study: ", study.id)
+    }
+
+    res_dataset_df <- response |>
+      resp_body_string() |>
+      fromJSON()
+
+    if(!"MAF" %in% res_dataset_df$datatype){
+      stop("Failed to retrieve data from cBioPortal. Status_cod = ", status_code)
+    }
+
+    # check if mutation dataset exists for this study
+    maf_col_idx = which(res_dataset_df$datatype == "MAF")
+    maf_study_name = res_dataset_df[maf_col_idx, "molecularProfileId"]
+    message("Found mutation dataset for ", study.id, ": ", maf_study_name)
+
+    all.sample.name = paste0(study.id, "_all")
+
+    # get mutation data
+    mutation_cmd = paste0(
+      base.url, "molecular-profiles/", maf_study_name, "/mutations?sampleListId=",
+      all.sample.name, "&entrezGeneId=", entrez.id)
+
+    response2 <- request(mutation_cmd) |> httr2::req_perform()
+    status_code2 <- resp_status(response2)
+
+    if(status_code2 != 200){
+      stop("[Error] can not query mutation data from cBioportal API for the study: ", study.id)
+    }
+
+    # ---------------------------
+    mutation.df <- response2 |>
+      resp_body_string() |>
+      fromJSON()
+
+    mutation.df$geneSymbol <- gene.symbol
+    required.colnames <- c("geneSymbol", "proteinChange", "sampleId", "mutationType",
+                           "chr", "proteinPosStart", "proteinPosEnd",
+                           "referenceAllele", "variantAllele")
+
+    mapped.colnames <- c("Hugo_Symbol", "Protein_Change", "Sample_ID", "Mutation_Type",
+                         "Chromosome", "Start_Position", "End_Position",
+                         "Reference_Allele", "Variant_Allele")
+
+    # check if any columns are missing
+    if(!all(required.colnames %in% colnames(mutation.df))){
+      missing.columns <- all(required.colnames %in% colnames(mutation.df))
+      stop("[Error] Some columns are missing: ", paste(missing.columns, collapse =", "))
+    }
+
+    # rename headers according to cbioportal MutationMapper
+    # url: http://www.cbioportal.org/mutation_mapper.jsp
+    mutation.df <- mutation.df[, required.colnames]
+    colnames(mutation.df) <- mapped.colnames
+
+    # =============================
+    # map from mutation type to mutation class
+    mutation.df[, "Mutation_Class"] <- mapMutationTypeToMutationClass(
+      mutation.df[, "Mutation_Type"],
+      mutation.type.to.class.df)
+
+    # =============================
+    # parse amino acid position
+    mutation.df[, "AA_Position"] <- parseProteinChange(mutation.df[, "Protein_Change"],
+                                                       mutation.df[, "Mutation_Class"])
+
+    mutation.df <- mutation.df[order(mutation.df[, "AA_Position"],
+                                     mutation.df[, "Protein_Change"], decreasing = FALSE), ]
+
+    if(!is.na(output.file)){
+      message("Write mutation data to ", output.file)
+      write.table(mutation.df, file = output.file, sep = "\t", quote = FALSE, col.names = TRUE, row.names = FALSE)
+    }
+
+    return(mutation.df)
+  }, warning = function(w){
+    stop("[Warning] ", w)
+  }, error = function(e){
+    stop("[Error] ", e)
+  }, finally = {
+  })
 }
diff --git a/man/getMutationsFromCbioportal.Rd b/man/getMutationsFromCbioportal.Rd
diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd
@@ -122,8 +122,6 @@ g3Lollipop(mutation.dat,
 [↥ back to top](#top)
 
 
-
-
 ## <a name="ex2"></a>Example 2: visualize genetic mutation data from `CSV` or `TSV` file
 
 In this example, we read genetic mutation data from `CSV` or `TSV` files, and visualize it using some
@@ -255,6 +253,7 @@ g3Lollipop(mutation.dat,
 #### Note:
 - Internet access is required to download data from [cBioPortal](http://www.cbioportal.org/).  This may take more than 10 seconds, or sometimes it may fail.
 - To check what studies are available on cBioPortal
+- `cBioPortalData` or `cBioPortal` R packages are not stable recently. Therefore, we query the mutation data from `cBioPortal` directly using API. This feature may change in later version.
 ```r
 
 # list all studies of cBioPortal