Skip to content

Commit

Permalink
v1.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
phoeguo committed Sep 3, 2024
1 parent 2fa553b commit 8d55903
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 131 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ biocViews:
Imports:
jsonlite,
stringr,
cBioPortalData,
httr2,
AnnotationDbi,
org.Hs.eg.db,
htmlwidgets
Suggests:
shiny (>= 1.0.0),
Expand Down
12 changes: 7 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@ export(parseProteinChange)
export(readMAF)
export(renderG3Lollipop)
export(uniprot2pfam)
importFrom(cBioPortalData,cBioPortal)
importFrom(cBioPortalData,getDataByGenes)
importFrom(cBioPortalData,molecularProfiles)
importFrom(cBioPortalData,sampleLists)
importFrom(cBioPortalData,samplesInSampleLists)
importFrom(AnnotationDbi,mapIds)
importFrom(htmlwidgets,shinyRenderWidget)
importFrom(htmlwidgets,shinyWidgetOutput)
importFrom(httr2,req_perform)
importFrom(httr2,request)
importFrom(httr2,resp_body_string)
importFrom(httr2,resp_status)
importFrom(jsonlite,fromJSON)
importFrom(jsonlite,toJSON)
importFrom(org.Hs.eg.db,org.Hs.eg.db)
importFrom(stringr,str_extract_all)
importFrom(utils,capture.output)
importFrom(utils,data)
Expand Down
228 changes: 118 additions & 110 deletions R/getMutationsFromCbioportal.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,11 @@
#' @examples
#' \donttest{
#' # Usage:
#' # cBioPortalData has officially replaced the defunct cgdsr.
#' # Search online for cgdsrMigration.html if interested.
#' library(cBioPortalData)
#' cbio <- suppressWarnings(cBioPortal(hostname = "www.cbioportal.org", protocol = "https", api. = "/api/v2/api-docs")
#'
#' # list all studies of cBioPortal
#' all.studies <- getStudies(cbio, buildReport = FALSE)
#'
#' # First, select a cancer study that contains mutation data set ("caner_study_id")
#' # then, query genomic mutation data using a HGNC gene symbol,
#' # for example
#' mutation.dat <- getMutationsFromCbioportal("msk_impact_2017", "TP53")
#' mutation.dat <- getMutationsFromCbioportal("all_stjude_2016", "TP53")
#' }
#' @importFrom cBioPortalData cBioPortal molecularProfiles sampleLists getDataByGenes
#' samplesInSampleLists
#' @importFrom httr2 request req_perform resp_status resp_body_string
#' @importFrom org.Hs.eg.db org.Hs.eg.db
#' @importFrom AnnotationDbi mapIds
#' @importFrom jsonlite fromJSON
#' @importFrom utils write.table
#'
#' @return a data frame with columns
Expand All @@ -52,104 +41,123 @@ getMutationsFromCbioportal <- function(study.id,
mutation.type.to.class.df = NA){

# ========================
# server
# cbio <- cBioPortal()
# cbio <- suppressWarnings({
# cBioPortal(
# hostname = "www.cbioportal.org",
# protocol = "https",
# api. = "/api/v2/api-docs"
# )
# })

cbio <- tryCatch({
cBioPortal(
hostname = "www.cbioportal.org",
protocol = "https",
api. = "/api/v2/api-docs"
)
}, warning = function(w){
# message(w)
}, error = function(e){
stop("Connection error: can not connect to cBioPortal API")
}, finally = {
#
})
# cbioportal server
base.url = "https://www.cbioportal.org/api/"

# ========================
# get study information
genetic.profiles <- molecularProfiles(cbio, studyId = study.id)
message("Found study ", study.id)
# library(httr2)
# library(AnnotationDbi)
# library(org.Hs.eg.db)

# ========================
# check if mutation information is available in the study
profile.col <- "molecularProfileId"
mutation.idx <- grep(pattern = 'mutations$', x = genetic.profiles$molecularProfileId, fixed = FALSE)
if(is.integer(mutation.idx) && length(mutation.idx) == 0L){
stop("Can not find mutation information in ", study.id, " study")
}
mutation.profile <- genetic.profiles$molecularProfileId[mutation.idx]
message("Found mutation data set ", mutation.profile)
# study.id = "msk_impact_2017"
# gene.symbol = "TP53"

# ========================
case.list.details <- sampleLists(cbio, study.id)

mutation.case.list.id <- case.list.details$sampleListId

mutation.case.list.all <- mutation.case.list.id[grep(pattern = '_sequenced$',x = mutation.case.list.id)]
num.case <- length(samplesInSampleLists(cbio,mutation.case.list.id)[[mutation.case.list.all]])
message(num.case, " cases in this study")

### Download mutation data on certain gene from study
df <- getDataByGenes(
cbio,
studyId = study.id,
genes = gene.symbol,
by = "hugoGeneSymbol",
molecularProfileIds = mutation.profile
)[[1]]

extended.mutation.df <- cbind(rep(gene.symbol,nrow(df)),df)
colnames(extended.mutation.df) <- c("gene_symbol",colnames(df))
# =========================
# parse mutation data columns
required.colnames <- c("gene_symbol", "proteinChange", "sampleId", "mutationType",
"chr", "startPosition", "endPosition",
"referenceAllele", "variantAllele")

mapped.colnames <- c("Hugo_Symbol", "Protein_Change", "Sample_ID", "Mutation_Type",
"Chromosome", "Start_Position", "End_Position",
"Reference_Allele", "Variant_Allele")

# check if any columns are missing
missing.columns <- required.colnames[!required.colnames %in% colnames(extended.mutation.df)]
if(length(missing.columns) > 0){
stop("Some columns are missing: ", paste(missing.columns, collapse =", "))
}

# rename headers according to cbioportal MutationMapper
# url: http://www.cbioportal.org/mutation_mapper.jsp
mutation.df <- extended.mutation.df[, required.colnames]
colnames(mutation.df) <- mapped.colnames


# =============================
# map from mutation type to mutation class
mutation.df[, "Mutation_Class"] <- mapMutationTypeToMutationClass(mutation.df[, "Mutation_Type"],
mutation.type.to.class.df)

# =============================
# parse amino acid position
mutation.df[, "AA_Position"] <- parseProteinChange(mutation.df[, "Protein_Change"],
mutation.df[, "Mutation_Class"])

mutation.df <- mutation.df[order(mutation.df[, "AA_Position"],
mutation.df[, "Protein_Change"], decreasing = FALSE), ]

if(!is.na(output.file)){
message("Write mutation data to ", output.file)
write.table(mutation.df, file = output.file, sep = "\t", quote = FALSE, col.names = TRUE, row.names = FALSE)
}

mutation.df
tryCatch({
# step 1:
# get entrez gene id
entrez.id <- suppressMessages(mapIds(
org.Hs.eg.db,
keys = gene.symbol,
column = "ENTREZID",
keytype = "SYMBOL",
multiVals = "first"
))

if (!is.na(entrez.id)) {
message(paste0("The Entrez Gene ID for ", gene.symbol, " is: ", entrez.id))
} else {
stop(paste0("[Error] No Entrez Gene ID found for ", gene.symbol))
}

entrez.id = as.character(entrez.id)

# step2:
# check if mutation information is available in the study
response <- request(paste0(base.url, "studies/", study.id, "/molecular-profiles")) |>
req_perform()
status_code <- resp_status(response)

if(status_code != 200){
stop("Can not find Mutation data for this study: ", study.id)
}

res_dataset_df <- response |>
resp_body_string() |>
fromJSON()

if(!"MAF" %in% res_dataset_df$datatype){
stop("Failed to retrieve data from cBioPortal. Status_cod = ", status_code)
}

# check if mutation dataset exists for this study
maf_col_idx = which(res_dataset_df$datatype == "MAF")
maf_study_name = res_dataset_df[maf_col_idx, "molecularProfileId"]
message("Found mutation dataset for ", study.id, ": ", maf_study_name)

all.sample.name = paste0(study.id, "_all")

# get mutation data
mutation_cmd = paste0(
base.url, "molecular-profiles/", maf_study_name, "/mutations?sampleListId=",
all.sample.name, "&entrezGeneId=", entrez.id)

response2 <- request(mutation_cmd) |> httr2::req_perform()
status_code2 <- resp_status(response2)

if(status_code2 != 200){
stop("[Error] can not query mutation data from cBioportal API for the study: ", study.id)
}

# ---------------------------
mutation.df <- response2 |>
resp_body_string() |>
fromJSON()

mutation.df$geneSymbol <- gene.symbol
required.colnames <- c("geneSymbol", "proteinChange", "sampleId", "mutationType",
"chr", "proteinPosStart", "proteinPosEnd",
"referenceAllele", "variantAllele")

mapped.colnames <- c("Hugo_Symbol", "Protein_Change", "Sample_ID", "Mutation_Type",
"Chromosome", "Start_Position", "End_Position",
"Reference_Allele", "Variant_Allele")

# check if any columns are missing
if(!all(required.colnames %in% colnames(mutation.df))){
missing.columns <- all(required.colnames %in% colnames(mutation.df))
stop("[Error] Some columns are missing: ", paste(missing.columns, collapse =", "))
}

# rename headers according to cbioportal MutationMapper
# url: http://www.cbioportal.org/mutation_mapper.jsp
mutation.df <- mutation.df[, required.colnames]
colnames(mutation.df) <- mapped.colnames

# =============================
# map from mutation type to mutation class
mutation.df[, "Mutation_Class"] <- mapMutationTypeToMutationClass(
mutation.df[, "Mutation_Type"],
mutation.type.to.class.df)

# =============================
# parse amino acid position
mutation.df[, "AA_Position"] <- parseProteinChange(mutation.df[, "Protein_Change"],
mutation.df[, "Mutation_Class"])

mutation.df <- mutation.df[order(mutation.df[, "AA_Position"],
mutation.df[, "Protein_Change"], decreasing = FALSE), ]

if(!is.na(output.file)){
message("Write mutation data to ", output.file)
write.table(mutation.df, file = output.file, sep = "\t", quote = FALSE, col.names = TRUE, row.names = FALSE)
}

return(mutation.df)
}, warning = function(w){
stop("[Warning] ", w)
}, error = function(e){
stop("[Error] ", e)
}, finally = {
})
}
13 changes: 0 additions & 13 deletions man/getMutationsFromCbioportal.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions vignettes/introduction.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ g3Lollipop(mutation.dat,
[↥ back to top](#top)




## <a name="ex2"></a>Example 2: visualize genetic mutation data from `CSV` or `TSV` file

In this example, we read genetic mutation data from `CSV` or `TSV` files, and visualize it using some
Expand Down Expand Up @@ -255,6 +253,7 @@ g3Lollipop(mutation.dat,
#### Note:
- Internet access is required to download data from [cBioPortal](http://www.cbioportal.org/). This may take more than 10 seconds, or sometimes it may fail.
- To check what studies are available on cBioPortal
- `cBioPortalData` or `cBioPortal` R packages are not stable recently. Therefore, we query the mutation data from `cBioPortal` directly using API. This feature may change in later version.
```r

# list all studies of cBioPortal
Expand Down

0 comments on commit 8d55903

Please sign in to comment.