diff --git a/.Rbuildignore b/.Rbuildignore index db471f8..ede4d94 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -17,3 +17,4 @@ ^\.DS_Store$ ^examples$ ^R/proteinDomain.R$ +^LICENSE\.md$ diff --git a/.travis.yml b/.travis.yml index 4431fe4..3c36034 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ sudo: false cache: packages install: -- R -e 'install.packages(c("jsonlite", "cgdsr", "stringr", "htmlwidgets", "knitr", "rmarkdown", "kableExtra", "shiny"))' +- R -e 'install.packages(c("jsonlite", "cBioPortalData", "stringr", "htmlwidgets", "knitr", "rmarkdown", "kableExtra", "shiny"))' script: - R CMD build . diff --git a/DESCRIPTION b/DESCRIPTION index 0e936d3..0b130b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,22 +1,24 @@ Package: g3viz Type: Package Title: Interactively Visualize Genetic Mutation Data using a Lollipop-Diagram -Version: 1.1.3 +Version: 1.1.5 Authors@R: c( - person("Xin", "Guo", email = "g3viz.group@gmail.com", role = c("aut", "cre")) + person("Xin", "Guo", email = "g3viz.group@gmail.com", role = c("aut","cre")), + person("Rener", "Zhang", email = "g3viz.group@gmail.com", role = c("ctb")) ) Maintainer: Xin Guo -Description: R interface for 'g3-lollipop' JavaScript library. - Visualize genetic mutation data using an interactive lollipop diagram in RStudio or your browser. +Description: Interface for 'g3-lollipop' JavaScript library. + Visualize genetic mutation data using an interactive lollipop diagram in Studio or your browser. License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Depends: - R (>= 3.0.0) + R (>= 3.5.0) +biocViews: Imports: jsonlite, - cgdsr, stringr, + cBioPortalData, htmlwidgets Suggests: shiny (>= 1.0.0), @@ -25,5 +27,5 @@ Suggests: kableExtra URL: https://github.com/G3viz/g3viz BugReports: https://github.com/G3viz/g3viz/issues -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.0 VignetteBuilder: knitr diff --git a/LICENSE b/LICENSE index 132f277..ae5b1d8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2018-2019 +YEAR: 2022 COPYRIGHT HOLDER: Xin Guo diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..6a44811 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2022 g3viz authors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index a6e9f34..90b26f3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,10 +13,11 @@ export(parseProteinChange) export(readMAF) export(renderG3Lollipop) export(uniprot2pfam) -importFrom(cgdsr,CGDS) -importFrom(cgdsr,getCaseLists) -importFrom(cgdsr,getGeneticProfiles) -importFrom(cgdsr,getMutationData) +importFrom(cBioPortalData,cBioPortal) +importFrom(cBioPortalData,getDataByGenes) +importFrom(cBioPortalData,molecularProfiles) +importFrom(cBioPortalData,sampleLists) +importFrom(cBioPortalData,samplesInSampleLists) importFrom(htmlwidgets,shinyRenderWidget) importFrom(htmlwidgets,shinyWidgetOutput) importFrom(jsonlite,toJSON) diff --git a/NEWS.md b/NEWS.md index 182d1a3..d10842c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +## Version 1.15 (2022/06/30) + - Updated `hgnc2pfam.df.rda` data + - Remove cgdsr dependencies; add cBioPortalData as the substitution + - Updated getMutationsFromCbioportal.R ([issue 16](https://github.com/G3viz/g3viz/issues/16)). + ## Version 1.1.3 (2020/08/24) - Updated Pfam version 33.1 (date: 2020-08-24) - Updated UniProt (date: 2020-08-24) diff --git a/R/g3Lollipop.R b/R/g3Lollipop.R index 3b34a49..5cc0897 100644 --- a/R/g3Lollipop.R +++ b/R/g3Lollipop.R @@ -29,6 +29,8 @@ #' two built-in styles, \emph{blue} or \emph{gray}. Default \code{NA}, #' indicating browser default. #' @param output.filename Specify output file name. +#' @return lollipop diagram for the given mutation data. The chart is interactive +#' within either Shiny applications or Rmd documents under the bindings. #' #' @examples #' @@ -170,6 +172,8 @@ g3Lollipop <- function(mutation.dat, #' @param quoted Is \code{expr} a quoted expression (with \code{quote()})? This #' is useful if you want to save an expression in a variable. #' +#' @return No value returned. It is the binding which enables interactive functions +#' within Shiny applications and Rmd documents. #' @name g3Lollipop-shiny #' #' @importFrom htmlwidgets shinyWidgetOutput diff --git a/R/getMutationsFromCbioportal.R b/R/getMutationsFromCbioportal.R index 55cccbd..edae1d7 100644 --- a/R/getMutationsFromCbioportal.R +++ b/R/getMutationsFromCbioportal.R @@ -9,23 +9,16 @@ #' @param mutation.type.to.class.df mapping table from mutation type to class. #' See \code{\link{mapMutationTypeToMutationClass}} for details. Default #' \code{NA}, which indicates to use default mappings. -#' @param cgds.url the URL for the public CGDS server (Cancer Genomic Data -#' Server). Default is \url{http://www.cbioportal.org/}. Check -#' \emph{cgdsr} R-package for details. -#' @param test.cgds if test CGDS connection. Default is \code{FALSE} #' @examples #' \dontrun{ #' # Usage: -#' # Connection to CGDS (Cange Genomic Data Server). Internet access required. -#' # Note: this may need more than 10 seconds, and sometimes it may fail. -#' library(cgdsr) -#' cgds <- CGDS("http://www.cbioportal.org/") -#' -#' # test if connection is OK (warning: sometimes it may fail) -#' test(cgds) +#' # cBioPortalData has officially replaced the defunct cgdsr. +#' # Search online for cgdsrMigration.html if interested. +#' library(cBioPortalData) +#' cbio <- cBioPortal() #' #' # list all studies of cBioPortal -#' all.studies <- getCancerStudies(cgds) +#' all.studies <- getStudies(cbio, buildReport = FALSE) #' #' # First, select a cancer study that contains mutation data set ("caner_study_id") #' # then, query genomic mutation data using a HGNC gene symbol, @@ -33,13 +26,14 @@ #' mutation.dat <- getMutationsFromCbioportal("msk_impact_2017", "TP53") #' mutation.dat <- getMutationsFromCbioportal("all_stjude_2016", "TP53") #' } -#' @importFrom cgdsr CGDS getGeneticProfiles getCaseLists getMutationData +#' @importFrom cBioPortalData cBioPortal molecularProfiles sampleLists getDataByGenes +#' samplesInSampleLists #' @importFrom utils write.table #' #' @return a data frame with columns #' \describe{ #' \item{Hugo_Symbol}{Hugo gene symbol} -#' \item{Protein_Change}{Protein change information (cBioprotal uses \emph{HGVSp} format)} +#' \item{Protein_Change}{Protein change information (cBioportal uses \emph{HGVSp} format)} #' \item{Sample_ID}{Sample ID} #' \item{Mutation_Type}{mutation type, aka, variant classification.} #' \item{Chromosome}{chromosome} @@ -55,52 +49,53 @@ getMutationsFromCbioportal <- function(study.id, gene.symbol, output.file = NA, - mutation.type.to.class.df = NA, - cgds.url = "http://www.cbioportal.org/", - test.cgds = FALSE){ - # ============================= - # define mutation columns - aa.pos.col <- "AA_Position" - mutation.class.col <- "Mutation_Class" + mutation.type.to.class.df = NA){ # ======================== - # cgds server - cgds <- cgdsr::CGDS(cgds.url) - - # ======================== - # test cgds - if(test.cgds){ - cgdsr::test(cgds) - } + # server + cbio <- cBioPortal() # ======================== # get study information - genetic.profiles <- getGeneticProfiles(cgds, study.id) + genetic.profiles <- molecularProfiles(cbio,studyId = study.id) message("Found study ", study.id) # ======================== - # check if mutation informaiton is available in the study - profile.col <- "genetic_profile_id" - mutation.idx <- grep(pattern = 'mutations$', x = genetic.profiles[, profile.col], fixed = FALSE) + # check if mutation information is available in the study + profile.col <- "molecularProfileId" + mutation.idx <- grep(pattern = 'mutations$', x = genetic.profiles$molecularProfileId, fixed = FALSE) if(is.integer(mutation.idx) && length(mutation.idx) == 0L){ stop("Can not find mutation information in ", study.id, " study") } - mutation.profile <- genetic.profiles[mutation.idx, profile.col] + mutation.profile <- genetic.profiles$molecularProfileId[mutation.idx] message("Found mutation data set ", mutation.profile) # ======================== - # get case list - case.list.details <- getCaseLists(cgds, study.id)[mutation.idx, ] - mutation.case.list.id <- case.list.details$case_list_id - num.case <- length(strsplit(case.list.details$case_ids, " ")[[1]]) + + case.list.details <- sampleLists(cbio, study.id) + + mutation.case.list.id <- case.list.details$sampleListId + + mutation.case.list.all <- mutation.case.list.id[grep(pattern = '_sequenced$',x = mutation.case.list.id)] + num.case <- length(samplesInSampleLists(cbio,mutation.case.list.id)[[mutation.case.list.all]]) message(num.case, " cases in this study") - extended.mutation.df <- getMutationData(cgds, mutation.case.list.id, mutation.profile, gene.symbol) + ### Download mutation data on certain gene from study + df <- getDataByGenes( + cbio, + studyId = study.id, + genes = gene.symbol, + by = "hugoGeneSymbol", + molecularProfileIds = mutation.profile + )[[1]] + + extended.mutation.df <- cbind(rep(gene.symbol,nrow(df)),df) + colnames(extended.mutation.df) <- c("gene_symbol",colnames(df)) # ========================= # parse mutation data columns - required.colnames <- c("gene_symbol", "amino_acid_change", "case_id", "mutation_type", - "chr", "start_position", "end_position", - "reference_allele", "variant_allele") + required.colnames <- c("gene_symbol", "proteinChange", "sampleId", "mutationType", + "chr", "startPosition", "endPosition", + "referenceAllele", "variantAllele") mapped.colnames <- c("Hugo_Symbol", "Protein_Change", "Sample_ID", "Mutation_Type", "Chromosome", "Start_Position", "End_Position", @@ -117,17 +112,18 @@ getMutationsFromCbioportal <- function(study.id, mutation.df <- extended.mutation.df[, required.colnames] colnames(mutation.df) <- mapped.colnames + # ============================= # map from mutation type to mutation class - mutation.df[, mutation.class.col] <- mapMutationTypeToMutationClass(mutation.df[, "Mutation_Type"], + mutation.df[, "Mutation_Class"] <- mapMutationTypeToMutationClass(mutation.df[, "Mutation_Type"], mutation.type.to.class.df) # ============================= # parse amino acid position - mutation.df[, aa.pos.col] <- parseProteinChange(mutation.df[, "Protein_Change"], - mutation.df[, mutation.class.col]) + mutation.df[, "AA_Position"] <- parseProteinChange(mutation.df[, "Protein_Change"], + mutation.df[, "Mutation_Class"]) - mutation.df <- mutation.df[order(mutation.df[, aa.pos.col], + mutation.df <- mutation.df[order(mutation.df[, "AA_Position"], mutation.df[, "Protein_Change"], decreasing = FALSE), ] if(!is.na(output.file)){ diff --git a/R/parseProteinChange.R b/R/parseProteinChange.R index 9d0e2ee..49007dc 100644 --- a/R/parseProteinChange.R +++ b/R/parseProteinChange.R @@ -27,7 +27,7 @@ parseProteinChange <- function(protein.change.vec, mutation.class.vec) { if(!(is.na(d.mc) || d.mc == "Other" || d.mc == "")){ # extract the first numeric value - aa.pos.vec[idx] = as.numeric(str_extract_all(d.pc, "[0-9]+")[[1]])[1] + aa.pos.vec[idx] <- as.numeric(str_extract_all(d.pc, "[0-9]+")[[1]])[1] } # cat(d.pc, " ==> ", aa.pos.vec[idx], "\n") } diff --git a/R/uniprot2pfam.R b/R/uniprot2pfam.R index be8db2a..f3da441 100644 --- a/R/uniprot2pfam.R +++ b/R/uniprot2pfam.R @@ -1,6 +1,6 @@ #' From UniProt ID to Pfam-A domain composition #' -#' @description Map from UniProt ID to Pfam-A domain compostion. +#' @description Map from UniProt ID to Pfam-A domain composition. #' @param uniprot.id UniProt ID #' @return a data frame with columns #' \itemize{ diff --git a/R/zzz.R b/R/zzz.R index 2d9e970..3ac5cf7 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -2,7 +2,7 @@ packageStartupMessage( "g3viz: visualizing gene/genome/gentics data for fun. - Pfam (v33.1) - - UniProt (date: 2020/08/24)\n + - UniProt (date: 2022/07/01)\n Any questions, please send emails to or post on GitHub ." ) } diff --git a/README.md b/README.md index 7c33e8d..27c2126 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # G3viz: an R package to interactively visualize genetic mutation data using a lollipop-diagram - **Date: 2022-08-19** + **Date: 2022-06-30** [![Build Status](https://travis-ci.org/G3viz/g3viz.svg?branch=master)](https://travis-ci.org/G3viz/g3viz) [![CRAN_version](http://www.r-pkg.org/badges/version/g3viz)](https://cran.r-project.org/package=g3viz) @@ -48,12 +48,7 @@ Guo, X., *et al*. (2020). G3viz: an R package to interactively visualize genetic ## What's new -1.1.3 - - [x] Updated Pfam version 33.1 (date: 2020-08-24) - - [x] Updated UniProt (date: 2020-08-24) - - [x] add "prepare_data" folder to instruct how to create `hgnc2pfam.df.rda` data - -1.1.5 +1.1.5 - [x] Updated getMutationsFromCbioportal (date: 2022-06-23) - [x] Remove Dependency cgdsr; Add Dependency cBioPortalData (date: 2022-06-23) - [x] Updated `hgnc2pfam.df.rda` (date: 2022-06-24) diff --git a/build/vignette.rds b/build/vignette.rds new file mode 100644 index 0000000..a31a4fe Binary files /dev/null and b/build/vignette.rds differ diff --git a/data/hgnc2pfam.df.rda b/data/hgnc2pfam.df.rda index b9ed2bf..f69435f 100644 Binary files a/data/hgnc2pfam.df.rda and b/data/hgnc2pfam.df.rda differ diff --git a/man/g3Lollipop-shiny.Rd b/man/g3Lollipop-shiny.Rd index c8c7412..0e378f3 100644 --- a/man/g3Lollipop-shiny.Rd +++ b/man/g3Lollipop-shiny.Rd @@ -24,6 +24,10 @@ string and have \code{'px'} appended.} \item{quoted}{Is \code{expr} a quoted expression (with \code{quote()})? This is useful if you want to save an expression in a variable.} } +\value{ +No value returned. It is the binding which enables interactive functions + within Shiny applications and Rmd documents. +} \description{ Output and render functions for using g3viz lollipop diagram within Shiny applications and interactive Rmd documents. diff --git a/man/g3Lollipop.Rd b/man/g3Lollipop.Rd index cb16f75..959652f 100644 --- a/man/g3Lollipop.Rd +++ b/man/g3Lollipop.Rd @@ -59,6 +59,10 @@ indicating browser default.} \item{output.filename}{Specify output file name.} } +\value{ +lollipop diagram for the given mutation data. The chart is interactive + within either Shiny applications or Rmd documents under the bindings. +} \description{ Render g3lollipop diagram for the given mutation data } diff --git a/man/getMutationsFromCbioportal.Rd b/man/getMutationsFromCbioportal.Rd index c93b3b7..8192102 100644 --- a/man/getMutationsFromCbioportal.Rd +++ b/man/getMutationsFromCbioportal.Rd @@ -8,9 +8,7 @@ getMutationsFromCbioportal( study.id, gene.symbol, output.file = NA, - mutation.type.to.class.df = NA, - cgds.url = "http://www.cbioportal.org/", - test.cgds = FALSE + mutation.type.to.class.df = NA ) } \arguments{ @@ -24,18 +22,12 @@ Default is \code{NA}.} \item{mutation.type.to.class.df}{mapping table from mutation type to class. See \code{\link{mapMutationTypeToMutationClass}} for details. Default \code{NA}, which indicates to use default mappings.} - -\item{cgds.url}{the URL for the public CGDS server (Cancer Genomic Data -Server). Default is \url{http://www.cbioportal.org/}. Check -\emph{cgdsr} R-package for details.} - -\item{test.cgds}{if test CGDS connection. Default is \code{FALSE}} } \value{ a data frame with columns \describe{ \item{Hugo_Symbol}{Hugo gene symbol} - \item{Protein_Change}{Protein change information (cBioprotal uses \emph{HGVSp} format)} + \item{Protein_Change}{Protein change information (cBioportal uses \emph{HGVSp} format)} \item{Sample_ID}{Sample ID} \item{Mutation_Type}{mutation type, aka, variant classification.} \item{Chromosome}{chromosome} @@ -55,16 +47,13 @@ Retrieve and parse mutation data from cBioPortal by the given \examples{ \dontrun{ # Usage: -# Connection to CGDS (Cange Genomic Data Server). Internet access required. -# Note: this may need more than 10 seconds, and sometimes it may fail. -library(cgdsr) -cgds <- CGDS("http://www.cbioportal.org/") - -# test if connection is OK (warning: sometimes it may fail) -test(cgds) +# cBioPortalData has officially replaced the defunct cgdsr. +# Search online for cgdsrMigration.html if interested. +library(cBioPortalData) +cbio <- cBioPortal() # list all studies of cBioPortal -all.studies <- getCancerStudies(cgds) +all.studies <- getStudies(cbio, buildReport = FALSE) # First, select a cancer study that contains mutation data set ("caner_study_id") # then, query genomic mutation data using a HGNC gene symbol, diff --git a/man/uniprot2pfam.Rd b/man/uniprot2pfam.Rd index ce2c364..033a69b 100644 --- a/man/uniprot2pfam.Rd +++ b/man/uniprot2pfam.Rd @@ -22,7 +22,7 @@ a data frame with columns } } \description{ -Map from UniProt ID to Pfam-A domain compostion. +Map from UniProt ID to Pfam-A domain composition. } \examples{ uniprot2pfam("Q5VWM5") # PRAMEF9; PRAMEF15 diff --git a/prepare_data/generate_hgnc2pfam.R b/prepare_data/generate_hgnc2pfam.R index 12a1533..2d263d6 100644 --- a/prepare_data/generate_hgnc2pfam.R +++ b/prepare_data/generate_hgnc2pfam.R @@ -59,7 +59,7 @@ for(idx in 1:nrow(uniprot.to.parse.df)){ message("Download Pfam data from Pfam website ...") pfam_url <- "ftp://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/proteomes/9606.tsv.gz" pfam_fn <- "9606.tsv.gz" -download.file(pfam_url, pfam_fn) +#download.file(pfam_url, pfam_fn) # -------------------------- message("Read Pfam information ...") @@ -88,5 +88,5 @@ hgnc2pfam.df <- hgnc2pfam.df[, c("symbol", "uniprot", "length", "start", "end", "hmm.acc", "hmm.name", "type")] # create Rdata, move this to "data" directory -save(hgnc2pfam.df, file="hgnc2pfam.df.rda", compress = "xz") +#save(hgnc2pfam.df, file="hgnc2pfam.df.rda", compress = "xz") diff --git a/prepare_data/hgnc2pfam.df.rda b/prepare_data/hgnc2pfam.df.rda index b9ed2bf..f69435f 100644 Binary files a/prepare_data/hgnc2pfam.df.rda and b/prepare_data/hgnc2pfam.df.rda differ diff --git a/vignettes/chart_themes.Rmd b/vignettes/chart_themes.Rmd index 41043c9..ddf9049 100644 --- a/vignettes/chart_themes.Rmd +++ b/vignettes/chart_themes.Rmd @@ -22,7 +22,7 @@ vignette: > ```{r, message=FALSE, include = FALSE} # install package library(g3viz) -library(cgdsr) +library(cBioPortalData) library(knitr) ``` diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 22cbac8..d5db291 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -2,7 +2,7 @@ title: 'G3viz: an R package to interactively visualize genetic mutation data using a lollipop-diagram' author: "Xin Guo " -date: '2019-04-06' +date: '2022-06-29' output: html_document: df_print: kable @@ -256,16 +256,11 @@ g3Lollipop(mutation.dat, - Internet access is required to download data from [cBioPortal](http://www.cbioportal.org/). This may take more than 10 seconds, or sometimes it may fail. - To check what studies are available on cBioPortal ```r -# Connect to CGDS (cancer Genomics Data Server) -cgds <- cgdsr::CGDS("http://www.cbioportal.org/") -# Test if connection is OK -cgdsr::test(cgds) +# list all studies of cBioPortal +all.studies <- getStudies(cbio, buildReport = FALSE) -# To list all studies -all.studies <- cgdsr::getCancerStudies(cgds) - -# Pick up a cancer study with mutation data +# Pick up a cancer study (studyId) with mutation data (gene symbol) mutation.dat <- g3viz::getMutationsFromCbioportal("all_stjude_2016", "TP53") ```