diff --git a/DESCRIPTION b/DESCRIPTION index 9d64ffe..923ccc1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,9 +1,10 @@ Package: g3viz Type: Package -Title: visulize MAF genomic annotation data +Title: Visualize Genomic Mutation Data Using an Interactive Lollipop Diagram Version: 0.1.0 Author: person("Xin", "Guo", email = "phoeguo@gmail.com", role=c("aut", "cre")) -Description: a warpper of g3viz javascript library to generate interactive "lollipop-style" diagram to visualize mutation data +Maintainer: Xin Guo +Description: R interface for g3viz Javascript library. Using an interactive `lollipop-diagram` to visualize genomic mutation data. License: MIT Encoding: UTF-8 LazyData: true @@ -11,10 +12,11 @@ Depends: R (>= 3.0.2) Imports: jsonlite, - cgdsr + cgdsr, + stringr, + htmlwidgets Suggests: - shiny (>= 1.0.0), - httpuv (>= 1.4.0) -URL: https://github.com/G3js/g3viz -BugReports: https://github.com/G3js/g3viz/issues + shiny (>= 1.0.0) +URL: https://github.com/G3js/lollipopR +BugReports: https://github.com/G3js/lollipopR/issues RoxygenNote: 6.1.0 diff --git a/NAMESPACE b/NAMESPACE index 8961820..b3d3c13 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,14 @@ export(parseProteinChange) export(readMAF) export(renderG3Lollipop) export(uniprot2pfam) -import(cgdsr) -import(htmlwidgets) -import(jsonlite) +importFrom(cgdsr,CGDS) +importFrom(cgdsr,getCaseLists) +importFrom(cgdsr,getGeneticProfiles) +importFrom(cgdsr,getMutationData) +importFrom(htmlwidgets,shinyRenderWidget) +importFrom(htmlwidgets,shinyWidgetOutput) +importFrom(jsonlite,toJSON) +importFrom(stringr,str_extract_all) +importFrom(utils,capture.output) +importFrom(utils,read.table) +importFrom(utils,write.table) diff --git a/R/formatCamelCase.R b/R/formatCamelCase.R index d3b4df8..888ecd0 100644 --- a/R/formatCamelCase.R +++ b/R/formatCamelCase.R @@ -1,4 +1,4 @@ -#' format a give string to camel case (e.g., \emph{Abc_Efg}) +#' format a give string to camel case (e.g., from \emph{abc_efg} to \emph{Abc_Efg}) #' @param x input string #' @param sep separator. Default \emph{_}. #' diff --git a/R/g3Lollipop.R b/R/g3Lollipop.R index ae4e261..8019c79 100644 --- a/R/g3Lollipop.R +++ b/R/g3Lollipop.R @@ -14,8 +14,7 @@ #' otherwise, use specified. Default \code{NA}. #' @param plot.options options of lollipop plot in list format #' -#' @import htmlwidgets -#' @import jsonlite +#' @importFrom jsonlite toJSON #' #' @export g3Lollipop <- function(mutation.dat, @@ -59,7 +58,7 @@ g3Lollipop <- function(mutation.dat, # get mutation data for the given gene snv.data.df <- mutation.dat[mutation.dat[, gene.symbol.col] == gene.symbol & !is.na(mutation.dat[, aa.pos.col]), ] - snv.data.json <- jsonlite::toJSON(snv.data.df, pretty = FALSE, auto_unbox = TRUE) + snv.data.json <- toJSON(snv.data.df, pretty = FALSE, auto_unbox = TRUE) # get protein domain information domain.data.json <- hgnc2pfam(gene.symbol, uniprot.id) @@ -71,10 +70,10 @@ g3Lollipop <- function(mutation.dat, factor = factor.col ) - snv.data.format.json <- jsonlite::toJSON(snv.data.format, pretty = FALSE, auto_unbox = TRUE) + snv.data.format.json <- toJSON(snv.data.format, pretty = FALSE, auto_unbox = TRUE) # domain data format - domain.data.format = list( + domain.data.format <- list( length = "length", domainType = "pfam", details = list( @@ -83,9 +82,9 @@ g3Lollipop <- function(mutation.dat, name = "hmm.name" ) ) - domain.data.format.json = jsonlite::toJSON(domain.data.format, pretty = FALSE, auto_unbox = TRUE) + domain.data.format.json <- toJSON(domain.data.format, pretty = FALSE, auto_unbox = TRUE) - plot.options.json <- jsonlite::toJSON(plot.options, pretty = FALSE, auto_unbox = TRUE) + plot.options.json <- toJSON(plot.options, pretty = FALSE, auto_unbox = TRUE) x <- list( domainData = domain.data.json, @@ -120,14 +119,17 @@ g3Lollipop <- function(mutation.dat, #' #' @name g3Lollipop-shiny #' +#' @importFrom htmlwidgets shinyWidgetOutput +#' #' @export g3LollipopOutput <- function(outputId, width = '100%', height = '400px'){ - htmlwidgets::shinyWidgetOutput(outputId, 'g3Lollipop', width, height, package = 'g3viz') + shinyWidgetOutput(outputId, 'g3Lollipop', width, height, package = 'g3viz') } #' @rdname g3Lollipop-shiny +#' @importFrom htmlwidgets shinyRenderWidget #' @export renderG3Lollipop <- function(expr, env = parent.frame(), quoted = FALSE) { if (!quoted) { expr <- substitute(expr) } # force quoted - htmlwidgets::shinyRenderWidget(expr, g3LollipopOutput, env, quoted = TRUE) + shinyRenderWidget(expr, g3LollipopOutput, env, quoted = TRUE) } diff --git a/R/g3Lollipop.options.R b/R/g3Lollipop.options.R index 08dc60d..2d8d7b3 100644 --- a/R/g3Lollipop.options.R +++ b/R/g3Lollipop.options.R @@ -2,7 +2,7 @@ #' #' @param chart.width chart width. Default 800. #' @param chart.type \emph{pie} or \emph{circle}. Default \emph{pie}. -#' @param chart.margin chart margin in \emph{list}. Default \code{list(left = 40, right = 20, top = 15, bottom = 25)}. +#' @param chart.margin specify chart margin in _list_ format. Default \code{list(left = 40, right = 20, top = 15, bottom = 25)}. #' @param chart.background chart background. Default \emph{transparent}. #' @param transition.time chart animation transition time in millisecond. Default 600. #' @@ -56,58 +56,58 @@ #' #' @return a list with g3Lollipop plot options #' @export -g3Lollipop.options <- function( chart.width = 800, - chart.type = "pie", - chart.margin = list(left = 40, right = 20, top = 15, bottom = 25), - chart.background = "transparent", - transition.time = 600, - # axis - y.axis.label = "mutations", - axis.label.font = "normal 12px Arial", - axis.label.color = "#4f4f4f", - axis.label.alignment = "middle", - axis.label.dy = "-2em", - # legend - legend.margin = list(left = 10, right = 0, top = 5, bottom = 5), - legend.interactive = TRUE, - legend.title = NA, - # lollipop track - lollipop.track.height = 420, - lollipop.track.background = "rgb(244,244,244)", - # pop size - lollipop.pop.min.size = 2, - lollipop.pop.max.size = 12, - lollipop.pop.info.limit = 8, - lollipop.pop.info.color = "#EEE", - lollipop.line.color = "rgb(42,42,42)", - lollipop.line.width = 0.5, - lollipop.circle.color = "wheat", - lollipop.circle.width = 0.5, - lollipop.label.ratio = 1.4, - lollipop.label.min.font.size = 10, - lollipop.color.scheme = "accent", - # title text - title.text = "", - title.font = "normal 16px Arial", - title.color = "#424242", - title.alignment = "middle", - title.dy = "0.35em", - # annotation track - anno.height = 30, - anno.margin = list(top = 4, bottom = 0), - anno.background = "transparent", - anno.bar.fill = "#e5e3e1", - anno.bar.margin = list(top = 2, bottom = 2), - # domain - domain.color.scheme = "category10", - domain.margin = list(top = 0, bottom = 0), - domain.text.font = "normal 11px Arial", - domain.text.color = "#f2f2f2", - # others - legend = TRUE, - tooltip = TRUE, - brush = TRUE, - zoom = TRUE +g3Lollipop.options <- function(chart.width = 800, + chart.type = "pie", + chart.margin = list(left = 40, right = 20, top = 15, bottom = 25), + chart.background = "transparent", + transition.time = 600, + # axis + y.axis.label = "mutations", + axis.label.font = "normal 12px Arial", + axis.label.color = "#4f4f4f", + axis.label.alignment = "middle", + axis.label.dy = "-2em", + # legend + legend.margin = list(left = 10, right = 0, top = 5, bottom = 5), + legend.interactive = TRUE, + legend.title = NA, + # lollipop track + lollipop.track.height = 420, + lollipop.track.background = "rgb(244,244,244)", + # pop size + lollipop.pop.min.size = 2, + lollipop.pop.max.size = 12, + lollipop.pop.info.limit = 8, + lollipop.pop.info.color = "#EEE", + lollipop.line.color = "rgb(42,42,42)", + lollipop.line.width = 0.5, + lollipop.circle.color = "wheat", + lollipop.circle.width = 0.5, + lollipop.label.ratio = 1.4, + lollipop.label.min.font.size = 10, + lollipop.color.scheme = "accent", + # title text + title.text = "", + title.font = "normal 16px Arial", + title.color = "#424242", + title.alignment = "middle", + title.dy = "0.35em", + # annotation track + anno.height = 30, + anno.margin = list(top = 4, bottom = 0), + anno.background = "transparent", + anno.bar.fill = "#e5e3e1", + anno.bar.margin = list(top = 2, bottom = 2), + # domain + domain.color.scheme = "category10", + domain.margin = list(top = 0, bottom = 0), + domain.text.font = "normal 11px Arial", + domain.text.color = "#f2f2f2", + # others + legend = TRUE, + tooltip = TRUE, + brush = TRUE, + zoom = TRUE ){ # plot settings plot.options <- list(chartWidth = chart.width, diff --git a/R/getDefaultMutationMappingTable.R b/R/getDefaultMutationMappingTable.R index 938c6d9..950924e 100644 --- a/R/getDefaultMutationMappingTable.R +++ b/R/getDefaultMutationMappingTable.R @@ -2,8 +2,8 @@ #' #' @description Return the default mapping table between mutation type to mutation class #' @details -#' Mutation type or variant classificaiton, generally with column named \emph{Variant_Classficiation} or -#' \emph{Mutation_Type} in MAF file, can be classificed as follows +#' Mutation type or variant classification, generally with column named \emph{Variant_Classficiation} or +#' \emph{Mutation_Type} in MAF file, can be classified as follows #' \enumerate{ #' \item Missense #' \itemize{ @@ -27,7 +27,7 @@ #' See \url{https://en.wikipedia.org/wiki/Nonsense_mutation}. #' \item \emph{Nonstop_Mutation} --- variant removes stop codon. #' \item \emph{Splice_Site} --- the variant is within two bases of a splice site. -#' \item \emph{Splice_Region} --- the variant is within splice reguion. +#' \item \emph{Splice_Region} --- the variant is within splice region. #' } #' \item Other #' \itemize{ @@ -38,14 +38,14 @@ #' \item \emph{Fusion} --- gene fusion #' \item \emph{IGR} --- intergenic region. Does not overlap any transcript. #' \item \emph{Intron} --- variant lies between exons within the bounds of the chosen transcript. -#' \item \emph{Translation_Start_Site} --- varaint in translation start site. +#' \item \emph{Translation_Start_Site} --- variant in translation start site. #' \item \emph{De_novo_Start_InFrame} --- New start codon is created by the given variant using the chosen transcript. #' However, it is in frame relative to the coded protein. #' \item \emph{De_novo_Start_OutOfFrame} --- New start codon is created by the given variant using the chosen transcript. #' However, it is out of frame relative to the coded protein. #' \item \emph{Start_Codon_SNP} --- point mutation that overlaps the start codon. #' \item \emph{Start_Codon_Ins} --- insertion that overlaps the start codon. -#' \item \emph{Start_Codon_Del} --- seletion that overlaps the start codon. +#' \item \emph{Start_Codon_Del} --- selection that overlaps the start codon. #' \item \emph{RNA} --- variant lies on one of the RNA transcripts. #' \item \emph{lincRNA} --- variant lies on one of the lincRNAs. #' \item \emph{Unknown} --- Unknown diff --git a/R/getMutationsFromCbioportal.R b/R/getMutationsFromCbioportal.R index f40babc..1ac34e7 100644 --- a/R/getMutationsFromCbioportal.R +++ b/R/getMutationsFromCbioportal.R @@ -9,7 +9,7 @@ #' \code{\link{getDefaultMutationMappingTable}} for details. #' Default \code{NA}, indicating to use \code{\link{getDefaultMutationMappingTable}}. #' @examples -#' # list all cbioportal studies +#' # list all studies of cBioPortal #' library(cgdsr) #' cgds <- CGDS("http://www.cbioportal.org/public-portal/") #' all.studies <- getCancerStudies(cgds) @@ -18,7 +18,8 @@ #' # pick a primary HGNC gene symbol to query #' mutation.dat <- getMutationsFromCbioportal("msk_impact_2017", "TP53") #' mutation.dat <- getMutationsFromCbioportal("all_stjude_2016", "TP53") -#' @import cgdsr +#' @importFrom cgdsr CGDS getGeneticProfiles getCaseLists getMutationData +#' @importFrom utils write.table #' #' @return a data frame with columns #' \itemize{ @@ -31,8 +32,8 @@ #' \item \emph{End_Position} --- end position #' \item \emph{Reference_Allele} --- reference allele #' \item \emph{Variant_Allele} --- variant allele -#' \item \emph{Mutation_Class} --- mutation class (e.g., Truncating/Misense/Inframe/Other) -#' \item \emph{AA_Position} --- amino-acid postion of the variant; if the variant is not in protein-conding region, \code{NA} +#' \item \emph{Mutation_Class} --- mutation class (e.g., Truncating/Missense/Inframe/Other) +#' \item \emph{AA_Position} --- amino-acid position of the variant; if the variant is not in protein-coding region, \code{NA} #' } #' @export getMutationsFromCbioportal <- function(study.id, @@ -46,11 +47,11 @@ getMutationsFromCbioportal <- function(study.id, # ======================== # cgds server - cgds <- cgdsr::CGDS("http://www.cbioportal.org/public-portal/") + cgds <- CGDS("http://www.cbioportal.org/public-portal/") # ======================== # get study information - genetic.profiles <- cgdsr::getGeneticProfiles(cgds, study.id) + genetic.profiles <- getGeneticProfiles(cgds, study.id) message("Found study ", study.id) # ======================== @@ -65,12 +66,12 @@ getMutationsFromCbioportal <- function(study.id, # ======================== # get case list - case.list.details <- cgdsr::getCaseLists(cgds, study.id)[mutation.idx, ] + case.list.details <- getCaseLists(cgds, study.id)[mutation.idx, ] mutation.case.list.id <- case.list.details$case_list_id num.case <- length(strsplit(case.list.details$case_ids, " ")[[1]]) message(num.case, " cases in this study") - extended.mutation.df <- cgdsr::getMutationData(cgds, mutation.case.list.id, mutation.profile, gene.symbol) + extended.mutation.df <- getMutationData(cgds, mutation.case.list.id, mutation.profile, gene.symbol) # ========================= # parse mutation data columns required.colnames <- c("gene_symbol", "amino_acid_change", "case_id", "mutation_type", @@ -107,7 +108,7 @@ getMutationsFromCbioportal <- function(study.id, if(!is.na(output.file)){ message("Write mutation data to ", output.file) - write.table(mutation.df, file = output.file, sep = "\t", quote = FALSE, col.name = TRUE, row.name = FALSE) + write.table(mutation.df, file = output.file, sep = "\t", quote = FALSE, col.names = TRUE, row.names = FALSE) } mutation.df diff --git a/R/hgnc2pfam.R b/R/hgnc2pfam.R index 9f23862..78514e1 100644 --- a/R/hgnc2pfam.R +++ b/R/hgnc2pfam.R @@ -1,7 +1,7 @@ #' Map from Hugo symbol to Pfam domains #' -#' @description Mapping from Hugo symbol to Pfam-A domain compostion. -#' If the given Hugo symbol has multple UniProt ID mappings, +#' @description Mapping from Hugo symbol to Pfam-A domain composition. +#' If the given Hugo symbol has multiple UniProt ID mappings, #' and \code{guess == TRUE}, #' the longest UniProt protein is selected. Return is either a list of a JSON. #' @examples @@ -9,23 +9,26 @@ #' hgnc2pfam("TP53") #' hgnc2pfam("TP53", output.format = "json") #' hgnc2pfam("TP53", output.format = "list") -#' hgnc2pfam("TP53", output.format = "json", uniprot.id = "P84996") # OK +#' hgnc2pfam("TP53", output.format = "json", uniprot.id = "P04637") # OK #' -#' # for gene mapping to multiple uniprot enties +#' # for gene mapping to multiple UniProt enties #' hgnc2pfam("GNAS", guess = TRUE) #' hgnc2pfam("GNAS", guess = FALSE) #' hgnc2pfam("GNAS", output.format = "list") #' hgnc2pfam("GNAS", output.format = "list", uniprot.id = "P84996") -#' hgnc2pfam("GNAS", output.format = "list", uniprot.id = "P84997") # not exists, returns FALSE +#' # hgnc2pfam("GNAS", output.format = "list", uniprot.id = "P84997") # not exists, returns FALSE #' @param hgnc.symbol primary Hugo symbol #' @param output.format output format: JSON or list #' @param uniprot.id UniProt ID, in case that gene symbol maps to multiple UniProt entries. #' @param guess if the given Hugo symbol links to multiple UniProt IDs, #' choose the longest one (\code{guess == TRUE}); #' otherwise \code{NA} (\code{guess == FALSE}). Default \code{TRUE}. -#' @return A list or a Json with attributes: -#' \emph{symbol}, \emph{uniprot}, \emph{length}, and a list of \emph{pfam} entries, including +#' @return A list or a JSON with attributes: +#' \emph{symbol}, \emph{uniprot}, \emph{length}, and a list of \emph{Pfam} entries, including #' \emph{hmm.acc}, \emph{hmm.name}, \emph{start}, \emph{end}, and \emph{type}. +#' +#' @importFrom utils capture.output +#' @importFrom jsonlite toJSON #' @export hgnc2pfam <- function(hgnc.symbol, guess = TRUE, @@ -42,8 +45,8 @@ hgnc2pfam <- function(hgnc.symbol, stop("Output.format should be either json or list.") } - # get uniprot ids - uniprot.df = hgnc2uniprot(hgnc.symbol) + # get UniProt ids + uniprot.df <- hgnc2uniprot(hgnc.symbol) if(nrow(uniprot.df) > 1){ msg <- paste(capture.output(print.data.frame(uniprot.df, row.names = FALSE)), collapse = "\n") @@ -53,7 +56,7 @@ hgnc2pfam <- function(hgnc.symbol, if(!is.na(uniprot.id)){ if(uniprot.id %in% uniprot.df$uniprot){ message("Choose ", uniprot.id) - uniprot.df = subset(uniprot.df, uniprot == uniprot.id) + uniprot.df <- subset(uniprot.df, uniprot == uniprot.id) } else { stop(uniprot.id, " is not mapped to ", hgnc.symbol) } @@ -76,8 +79,8 @@ hgnc2pfam <- function(hgnc.symbol, } if(is.data.frame(pfam.df)){ - pfam.df = pfam.df[, c("hmm.acc", "hmm.name", "start", "end", "type")] - pfam.df = pfam.df[!is.na(pfam.df[,"hmm.acc"]), ] + pfam.df <- pfam.df[, c("hmm.acc", "hmm.name", "start", "end", "type")] + pfam.df <- pfam.df[!is.na(pfam.df[,"hmm.acc"]), ] } # remove empty entries @@ -85,7 +88,7 @@ hgnc2pfam <- function(hgnc.symbol, output.list$pfam <- pfam.df if(output.format == "json"){ - jsonlite::toJSON(output.list, pretty = FALSE, auto_unbox = TRUE) + toJSON(output.list, pretty = FALSE, auto_unbox = TRUE) } else { output.list } diff --git a/R/hgnc2uniprot.R b/R/hgnc2uniprot.R index 3885233..0353b95 100644 --- a/R/hgnc2uniprot.R +++ b/R/hgnc2uniprot.R @@ -25,11 +25,11 @@ hgnc2uniprot <- function(hgnc.symbol){ stop("Missing hgnc.symbol: need to specify a HUGO symbol.") } - hgnc2pfam.file = system.file('data', 'hgnc2pfam.RDS', package = 'g3viz') + hgnc2pfam.file <- system.file('data', 'hgnc2pfam.RDS', package = 'g3viz') #hgnc2pfam.file = "data/hgnc2pfam.RDS" - hgnc2pfam.df = readRDS(file = hgnc2pfam.file) + hgnc2pfam.df <- readRDS(file = hgnc2pfam.file) - uniprot.df = unique(subset(hgnc2pfam.df, symbol == hgnc.symbol)[, c("symbol", "uniprot", "length")]) + uniprot.df <- unique(subset(hgnc2pfam.df, symbol == hgnc.symbol, select = c("symbol", "uniprot", "length"))) uniprot.df } diff --git a/R/mapMutationTypeToMutationClass.R b/R/mapMutationTypeToMutationClass.R index 99d4435..8c68eac 100644 --- a/R/mapMutationTypeToMutationClass.R +++ b/R/mapMutationTypeToMutationClass.R @@ -4,7 +4,7 @@ #' for more details. #' #' @param mutation.type.vec a vector of mutation type information -#' @param mutation.type.to.class.df A mapping table from mutation tytpe (header \emph{Mutation_Type}) +#' @param mutation.type.to.class.df A mapping table from mutation type (header \emph{Mutation_Type}) #' to mutation class (header \emph{Mutation_Class}). #' Default \code{NA} indicates to use default mapping table; see \code{\link{getDefaultMutationMappingTable}}. #' @return a vector of mapped mutation class information diff --git a/R/parseProteinChange.R b/R/parseProteinChange.R index 0e831c6..7a45d58 100644 --- a/R/parseProteinChange.R +++ b/R/parseProteinChange.R @@ -8,6 +8,7 @@ #' @param protein.change.vec a vector of strings with protein change information, usually in HGVSp_short format. #' @param mutation.class.vec a vector of strings with mutation class (or so-called variant classification) information. #' +#' @importFrom stringr str_extract_all #' @export parseProteinChange <- function(protein.change.vec, mutation.class.vec) { if(length(protein.change.vec) != length(mutation.class.vec)){ @@ -26,7 +27,7 @@ parseProteinChange <- function(protein.change.vec, mutation.class.vec) { if(!(is.na(d.mc) || d.mc == "Other" || d.mc == "")){ # extract the first numeric value - aa.pos.vec[idx] = as.numeric(stringr::str_extract_all(d.pc, "[0-9]+")[[1]])[1] + aa.pos.vec[idx] = as.numeric(str_extract_all(d.pc, "[0-9]+")[[1]])[1] } # cat(d.pc, " ==> ", aa.pos.vec[idx], "\n") } diff --git a/R/readMAF.R b/R/readMAF.R index 2339d53..82d666c 100644 --- a/R/readMAF.R +++ b/R/readMAF.R @@ -3,7 +3,7 @@ #' @description Read MAF file. #' For MAF format specification, check \url{https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/}. #' -#' @param maf.file MAF file name. Gunzipped input file allowed, with ".gz" file extension. +#' @param maf.file MAF file name. Gnuzipped input file allowed, with ".gz" file extension. #' @param gene.symbol.col Column name of Hugo gene symbols (e.g., TP53). Default \emph{Hugo_Symbol}. #' @param variant.class.col Column name of variant class information #' (e.g., \emph{Missense_Mutation}, \emph{Nonsense_Mutation}). Default is a list of \emph{Variant_Classification} and \emph{Mutation_Class}. @@ -19,8 +19,10 @@ #' @param sep separator of columns. Default \code{sep = "\\t"}. #' @param ... additional parameters pass to \code{\link[utils]{read.table}}. #' -#' @return a data frame containg representation of the mutation data in the given MAF file., -#' with optional columns of parsed \emph{Mutation_Class} and \emph{Protein_Position}. +#' @importFrom utils write.table read.table +#' +#' @return a data frame containing MAF information, +#' plus optional columns of the parsed \emph{Mutation_Class} and \emph{Protein_Position}. #' #' @export readMAF <- function(maf.file, diff --git a/R/uniprot2pfam.R b/R/uniprot2pfam.R index 2bae4f0..c3fbf34 100644 --- a/R/uniprot2pfam.R +++ b/R/uniprot2pfam.R @@ -8,7 +8,7 @@ #' \item \emph{length} --- protein length #' \item \emph{hmm.acc} --- accession number of Pfam HMM model, e.g., PF08563 #' \item \emph{hmm.name} --- Pfam name, e.g., P53_TAD -#' \item \emph{start} --- Pfam domain start postion +#' \item \emph{start} --- Pfam domain start position #' \item \emph{end} --- Pfam domain end position #' \item \emph{type} --- Pfam type, including domain/motif/family #' } @@ -20,11 +20,14 @@ uniprot2pfam <- function(uniprot.id){ stop("Missing uniprot.id: need to specify a UniPort ID (HUMAN)") } - hgnc2pfam.file = system.file('data', 'hgnc2pfam.RDS', package = 'g3viz') + hgnc2pfam.file <- system.file('data', 'hgnc2pfam.RDS', package = 'g3viz') #hgnc2pfam.file = "data/hgnc2pfam.RDS" - hgnc2pfam.df = readRDS(file = hgnc2pfam.file) + hgnc2pfam.df <- readRDS(file = hgnc2pfam.file) - uniprot.df = unique(subset(hgnc2pfam.df, uniprot == uniprot.id)[, c("uniprot", "length", "hmm.acc", "hmm.name", "start", "end", "type")]) + uniprot.df <- unique( + subset(hgnc2pfam.df, + uniprot == uniprot.id, + select = c("uniprot", "length", "hmm.acc", "hmm.name", "start", "end", "type"))) # sort by domain position uniprot.df <- uniprot.df[with(uniprot.df, order(start, end)),] diff --git a/README.md b/README.md index d65f3c7..a9a7a63 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Easily and effectively visualizing genomic data can help researchers to better understand their data. G3Viz is an [R](https://www.r-project.org/) package, which aims to provide a suite of easy-to-use visualization tools to enable users to interactively visualize genomic data in a web browser, without having to know any HTML5/JavaScript technologies. -## Installation +## Install ```r # Install devtools install.package("devtools") @@ -16,9 +16,9 @@ devtools::install_github("g3js/g3viz") ``` -## g3Lollipop examples +## Examples -### Example 1 +### Example 1: visualize mutation data from [cBioPortal](http://www.cbioportal.org/) Retrieve genomic mutation data of [msk\_impact\_2017](https://www.ncbi.nlm.nih.gov/pubmed/28481359) study for the gene _TP53_ from [cBioPortal](http://www.cbioportal.org/). @@ -34,12 +34,12 @@ g3Lollipop(mutation.dat, gene.symbol = "TP53") > > [Live example](https://bl.ocks.org/phoeguo/raw/583a12e04c6b9d7ca1825cdbdc62f531/) > -> +> > -### Example 2 +### Example 2: visualize mutation data from [MAF](https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/) -Load data from local [MAF](https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/) file, classified the mutation data by detailed _Variant\_Classification_ information (i.e., _Frame\_Shift\_Del_, _Split\_Site_). In this example, the MAF data was downloaded directly from [TCGA-BRCA](https://portal.gdc.cancer.gov/projects/TCGA-BRCA) project GDC Data Portal. +Load data from [MAF](https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/) file, classified the mutation data by detailed _Variant\_Classification_ information (i.e., _Frame\_Shift\_Del_, _Split\_Site_). In this example, the MAF data was downloaded directly from [TCGA-BRCA](https://portal.gdc.cancer.gov/projects/TCGA-BRCA) project GDC Data Portal. ```r library(g3viz) @@ -61,10 +61,10 @@ g3Lollipop(mutation.dat, > > [Live example](https://bl.ocks.org/phoeguo/raw/302a0ff5729f6aa773c33d4bfd3061c4/) > -> +> > -### Example 3 +### Example 3: visualize mutation data in _CSV_ or _TSV_ format Load user-defined file in _CSV_ or _TSV_ format. @@ -75,14 +75,10 @@ library(g3viz) mutation.csv <- system.file("extdata", "ccle.csv", package = "g3viz") # customized column names -gene.symbol.colname <- "Hugo_Symbol" -variant.class.colname <- "Variant_Classification" -protein.change.colname <- "amino_acid_change" - mutation.dat <- readMAF(mutation.csv, - gene.symbol.col = gene.symbol.colname, - variant.class.col = variant.class.colname, - protein.change.col = protein.change.colname, + gene.symbol.col = "Hugo_Symbol", + variant.class.col = "Variant_Classification", + protein.change.col = "amino_acid_change", sep = ",") # separator of csv file # plot options: try to mimic MutationMapper (http://www.cbioportal.org/mutation_mapper.jsp) @@ -114,19 +110,62 @@ g3Lollipop(mutation.dat, > > [Live example](https://bl.ocks.org/phoeguo/raw/60f804c6683de30650e36ee912304754/) > -> +> > -## +## Usage + +1. Read data +Genomic mutation data (_e.g._, [aggregated somatic mutations](https://docs.gdc.cancer.gov/Encyclopedia/pages/Aggregated_Somatic_Mutation/)) can be loaded from +* [MAF](https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/) file, for example, +```r +maf.file <- system.file("extdata", "TCGA.BRCA.varscan.somatic.maf.gz", package = "g3viz") +mutation.dat <- readMAF(maf.file) +``` +* directly from [cBioPortal]MAF (internet access required), for example, +```r +# get mutation data of msk_impact_2017 study from cBioPortal +mutation.dat <- getMutationsFromCbioportal("msk_impact_2017", "TP53") +``` +* _CSV_ or _TSV_ file +```r +# load and read data +mutation.csv <- system.file("extdata", "ccle.csv", package = "g3viz") +mutation.dat <- readMAF(mutation.csv, + gene.symbol.col = "Hugo_Symbol", + variant.class.col = "Variant_Classification", + protein.change.col = "amino_acid_change", + sep = ",") # separator of csv file +``` +2. Set chart options + +Chart options can be specified using `g3Lollipop.options()` function (_e.g._, `g3Lollipop.options(chart.type = "circle", lollipop.track.background = "transparent")`. Use `?g3viz::g3Lollipop.options` to check these options. +These options are listed in the following table. + +Option name | Description +----------------------- | -------------------------------------- +**Chart options** | +----------------------- | -------------------------------------- +chart.width | chart width in pixel. Default `800`. +chart.type | pop type, _pie_ or _circle_. Default `pie`. +chart.margin | specify chart margin in _list_ format. Default `list(left = 40, right = 20, top = 15, bottom = 25)`. +chart.background | chart background. Default `transparent`. +transition.time | chart animation transition time in millisecond. Default `600`. +y.axis.label | Y-axis label text. Default `mutations`. +axis.label.font | css font style shorthand (font-style font-variant font-weight font-size/line-height font-family). Default `normal 12px Arial`. +axis.label.color | axis label text color. Default `#4f4f4f`. +axis.label.alignment | axis label text alignment (start/end/middle). Default `middle` +axis.label.dy | text adjustment of axis label text. Default `-2em`. +----------------------- | -------------------------------------- diff --git a/g3viz.Rproj b/g3viz.Rproj index 2176bb8..f8f7f8e 100644 --- a/g3viz.Rproj +++ b/g3viz.Rproj @@ -18,4 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source -PackageRoxygenize: namespace +PackageRoxygenize: rd,collate,namespace diff --git a/inst/demo/CCLE_APC.png b/inst/extdata/CCLE_APC.png similarity index 100% rename from inst/demo/CCLE_APC.png rename to inst/extdata/CCLE_APC.png diff --git a/inst/demo/MSK_IMPACT_2017_TP53.png b/inst/extdata/MSK_IMPACT_2017_TP53.png similarity index 100% rename from inst/demo/MSK_IMPACT_2017_TP53.png rename to inst/extdata/MSK_IMPACT_2017_TP53.png diff --git a/inst/demo/TCGA_BRCA_PIK3CA.png b/inst/extdata/TCGA_BRCA_PIK3CA.png similarity index 100% rename from inst/demo/TCGA_BRCA_PIK3CA.png rename to inst/extdata/TCGA_BRCA_PIK3CA.png diff --git a/man/formatCamelCase.Rd b/man/formatCamelCase.Rd index f9a3aaf..5f6396c 100644 --- a/man/formatCamelCase.Rd +++ b/man/formatCamelCase.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/formatCamelCase.R \name{formatCamelCase} \alias{formatCamelCase} -\title{format a give string to camel case (e.g., \emph{Abc_Efg})} +\title{format a give string to camel case (e.g., from \emph{abc_efg} to \emph{Abc_Efg})} \usage{ formatCamelCase(x, sep = "_") } @@ -15,5 +15,5 @@ formatCamelCase(x, sep = "_") the string in camel format } \description{ -format a give string to camel case (e.g., \emph{Abc_Efg}) +format a give string to camel case (e.g., from \emph{abc_efg} to \emph{Abc_Efg}) } diff --git a/man/getDefaultMutationMappingTable.Rd b/man/getDefaultMutationMappingTable.Rd index 48fcfab..bc04330 100644 --- a/man/getDefaultMutationMappingTable.Rd +++ b/man/getDefaultMutationMappingTable.Rd @@ -13,8 +13,8 @@ A data frame with headers of \emph{Mutation_Type}, \emph{Mutation_Class}, \emph{ Return the default mapping table between mutation type to mutation class } \details{ -Mutation type or variant classificaiton, generally with column named \emph{Variant_Classficiation} or -\emph{Mutation_Type} in MAF file, can be classificed as follows +Mutation type or variant classification, generally with column named \emph{Variant_Classficiation} or +\emph{Mutation_Type} in MAF file, can be classified as follows \enumerate{ \item Missense \itemize{ @@ -38,7 +38,7 @@ Mutation type or variant classificaiton, generally with column named \emph{Varia See \url{https://en.wikipedia.org/wiki/Nonsense_mutation}. \item \emph{Nonstop_Mutation} --- variant removes stop codon. \item \emph{Splice_Site} --- the variant is within two bases of a splice site. -\item \emph{Splice_Region} --- the variant is within splice reguion. +\item \emph{Splice_Region} --- the variant is within splice region. } \item Other \itemize{ @@ -49,14 +49,14 @@ Mutation type or variant classificaiton, generally with column named \emph{Varia \item \emph{Fusion} --- gene fusion \item \emph{IGR} --- intergenic region. Does not overlap any transcript. \item \emph{Intron} --- variant lies between exons within the bounds of the chosen transcript. -\item \emph{Translation_Start_Site} --- varaint in translation start site. +\item \emph{Translation_Start_Site} --- variant in translation start site. \item \emph{De_novo_Start_InFrame} --- New start codon is created by the given variant using the chosen transcript. However, it is in frame relative to the coded protein. \item \emph{De_novo_Start_OutOfFrame} --- New start codon is created by the given variant using the chosen transcript. However, it is out of frame relative to the coded protein. \item \emph{Start_Codon_SNP} --- point mutation that overlaps the start codon. \item \emph{Start_Codon_Ins} --- insertion that overlaps the start codon. -\item \emph{Start_Codon_Del} --- seletion that overlaps the start codon. +\item \emph{Start_Codon_Del} --- selection that overlaps the start codon. \item \emph{RNA} --- variant lies on one of the RNA transcripts. \item \emph{lincRNA} --- variant lies on one of the lincRNAs. \item \emph{Unknown} --- Unknown diff --git a/man/getMutationsFromCbioportal.Rd b/man/getMutationsFromCbioportal.Rd index 898ee15..166e960 100644 --- a/man/getMutationsFromCbioportal.Rd +++ b/man/getMutationsFromCbioportal.Rd @@ -30,8 +30,8 @@ a data frame with columns \item \emph{End_Position} --- end position \item \emph{Reference_Allele} --- reference allele \item \emph{Variant_Allele} --- variant allele -\item \emph{Mutation_Class} --- mutation class (e.g., Truncating/Misense/Inframe/Other) -\item \emph{AA_Position} --- amino-acid postion of the variant; if the variant is not in protein-conding region, \code{NA} +\item \emph{Mutation_Class} --- mutation class (e.g., Truncating/Missense/Inframe/Other) +\item \emph{AA_Position} --- amino-acid position of the variant; if the variant is not in protein-coding region, \code{NA} } } \description{ @@ -39,7 +39,7 @@ Retrieve and parse mutation data from cBioPortal by the given cBioPortal cancer study ID and the gene symbol. } \examples{ -# list all cbioportal studies +# list all studies of cBioPortal library(cgdsr) cgds <- CGDS("http://www.cbioportal.org/public-portal/") all.studies <- getCancerStudies(cgds) diff --git a/man/hgnc2pfam.Rd b/man/hgnc2pfam.Rd index 7220f67..9f7f34b 100644 --- a/man/hgnc2pfam.Rd +++ b/man/hgnc2pfam.Rd @@ -19,13 +19,13 @@ otherwise \code{NA} (\code{guess == FALSE}). Default \code{TRUE}.} \item{output.format}{output format: JSON or list} } \value{ -A list or a Json with attributes: - \emph{symbol}, \emph{uniprot}, \emph{length}, and a list of \emph{pfam} entries, including +A list or a JSON with attributes: + \emph{symbol}, \emph{uniprot}, \emph{length}, and a list of \emph{Pfam} entries, including \emph{hmm.acc}, \emph{hmm.name}, \emph{start}, \emph{end}, and \emph{type}. } \description{ -Mapping from Hugo symbol to Pfam-A domain compostion. -If the given Hugo symbol has multple UniProt ID mappings, +Mapping from Hugo symbol to Pfam-A domain composition. +If the given Hugo symbol has multiple UniProt ID mappings, and \code{guess == TRUE}, the longest UniProt protein is selected. Return is either a list of a JSON. } @@ -34,12 +34,12 @@ the longest UniProt protein is selected. Return is either a list of a JSON. hgnc2pfam("TP53") hgnc2pfam("TP53", output.format = "json") hgnc2pfam("TP53", output.format = "list") -hgnc2pfam("TP53", output.format = "json", uniprot.id = "P84996") # OK +hgnc2pfam("TP53", output.format = "json", uniprot.id = "P04637") # OK -# for gene mapping to multiple uniprot enties +# for gene mapping to multiple UniProt enties hgnc2pfam("GNAS", guess = TRUE) hgnc2pfam("GNAS", guess = FALSE) hgnc2pfam("GNAS", output.format = "list") hgnc2pfam("GNAS", output.format = "list", uniprot.id = "P84996") -hgnc2pfam("GNAS", output.format = "list", uniprot.id = "P84997") # not exists, returns FALSE +# hgnc2pfam("GNAS", output.format = "list", uniprot.id = "P84997") # not exists, returns FALSE } diff --git a/man/mapMutationTypeToMutationClass.Rd b/man/mapMutationTypeToMutationClass.Rd index 03339cd..c074e27 100644 --- a/man/mapMutationTypeToMutationClass.Rd +++ b/man/mapMutationTypeToMutationClass.Rd @@ -10,7 +10,7 @@ mapMutationTypeToMutationClass(mutation.type.vec, \arguments{ \item{mutation.type.vec}{a vector of mutation type information} -\item{mutation.type.to.class.df}{A mapping table from mutation tytpe (header \emph{Mutation_Type}) +\item{mutation.type.to.class.df}{A mapping table from mutation type (header \emph{Mutation_Type}) to mutation class (header \emph{Mutation_Class}). Default \code{NA} indicates to use default mapping table; see \code{\link{getDefaultMutationMappingTable}}.} } diff --git a/man/readMAF.Rd b/man/readMAF.Rd index d97e348..08b03ec 100644 --- a/man/readMAF.Rd +++ b/man/readMAF.Rd @@ -12,7 +12,7 @@ readMAF(maf.file, gene.symbol.col = "Hugo_Symbol", mutation.type.to.class.df = NA, sep = "\\t", ...) } \arguments{ -\item{maf.file}{MAF file name. Gunzipped input file allowed, with ".gz" file extension.} +\item{maf.file}{MAF file name. Gnuzipped input file allowed, with ".gz" file extension.} \item{gene.symbol.col}{Column name of Hugo gene symbols (e.g., TP53). Default \emph{Hugo_Symbol}.} @@ -39,8 +39,8 @@ Default \code{NA}, indicating to use \code{\link{getDefaultMutationMappingTable} \item{...}{additional parameters pass to \code{\link[utils]{read.table}}.} } \value{ -a data frame containg representation of the mutation data in the given MAF file., - with optional columns of parsed \emph{Mutation_Class} and \emph{Protein_Position}. +a data frame containing MAF information, + plus optional columns of the parsed \emph{Mutation_Class} and \emph{Protein_Position}. } \description{ Read MAF file. diff --git a/man/uniprot2pfam.Rd b/man/uniprot2pfam.Rd index 8b96e54..cea90f8 100644 --- a/man/uniprot2pfam.Rd +++ b/man/uniprot2pfam.Rd @@ -16,7 +16,7 @@ a data frame with columns \item \emph{length} --- protein length \item \emph{hmm.acc} --- accession number of Pfam HMM model, e.g., PF08563 \item \emph{hmm.name} --- Pfam name, e.g., P53_TAD -\item \emph{start} --- Pfam domain start postion +\item \emph{start} --- Pfam domain start position \item \emph{end} --- Pfam domain end position \item \emph{type} --- Pfam type, including domain/motif/family } diff --git a/test/test_data.R b/test/test_data.R deleted file mode 100644 index e69de29..0000000