diff --git a/.Rbuildignore b/.Rbuildignore index ede4d94..c5c3035 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -18,3 +18,4 @@ ^examples$ ^R/proteinDomain.R$ ^LICENSE\.md$ +^vignettes/ diff --git a/docs/chart_themes.html b/docs/chart_themes.html new file mode 100644 index 0000000..e9dd53f --- /dev/null +++ b/docs/chart_themes.html @@ -0,0 +1,2838 @@ + + + + +
+ + + + + + + + + + +The g3viz
package contains 8 ready-to-use chart schemes:
+default, blue, simple, cbioportal,
+nature, nature2, ggplot2, and
+dark.
chart.options = g3Lollipop.theme(
+ theme.name = "default",
+ title.text = "default theme title",
+ y.axis.label = "y-label",
+ legend.title = "legend-title")
+
+g3Lollipop(mutation.dat,
+ plot.options = chart.options,
+ btn.style = "blue",
+ gene.symbol = "TP53")
+#> Factor is set to Mutation_Class
Intuitively and effectively visualizing genetic mutation data can
+help researchers to better understand genomic data and validate
+findings. G3viz
is an R package which provides an
+easy-to-use lollipop-diagram tool. It enables users to interactively
+visualize detailed translational effect of genetic mutations in RStudio
+or a web browser, without having to know any HTML5/JavaScript
+technologies.
The features of g3viz
include
g3viz
Install from R repository
+ +or install development version from github
+ + +MAF
fileMutation Annotation Format (MAF)
+is a commonly-used tab-delimited text file for storing aggregated
+mutation information. It could be generated from VCF
+file using tools like vcf2maf. Translational
+effect of variant alleles in MAF
files are usually in the
+column named Variant_Classification
or
+Mutation_Type
(i.e., Frame_Shift_Del
,
+Split_Site
). In this example, the somatic mutation data of
+the TCGA-BRCA study was originally downloaded from the GDC Data
+Portal.
# System file
+maf.file <- system.file("extdata", "TCGA.BRCA.varscan.somatic.maf.gz", package = "g3viz")
+
+# ============================================
+# Read in MAF file
+# In addition to read data in, g3viz::readMAF function does
+# 1. parse "Mutation_Class" information from the "Variant_Classification"
+# column (also named "Mutation_Type" in some files)
+# 2. parse "AA_position" (amino-acid position) from the "HGVSp_Short" column
+# (also named "amino_acid_change" in some files) (e.g., p.Q136P)
+# ============================================
+mutation.dat <- readMAF(maf.file)
# ============================================
+# Chart 1
+# "default" chart theme
+# ============================================
+chart.options <- g3Lollipop.theme(theme.name = "default",
+ title.text = "PIK3CA gene (default theme)")
+
+g3Lollipop(mutation.dat,
+ gene.symbol = "PIK3CA",
+ plot.options = chart.options,
+ output.filename = "default_theme")
+#> Factor is set to Mutation_Class
+#> legend title is set to Mutation_Class
CSV
or TSV
fileIn this example, we read genetic mutation data from CSV
+or TSV
files, and visualize it using some customized chart options. Note this is equivalent to
+dark chart theme.
# load data
+mutation.csv <- system.file("extdata", "ccle.csv", package = "g3viz")
+
+# ============================================
+# read in data
+# "gene.symbol.col" : column of gene symbol
+# "variant.class.col" : column of variant class
+# "protein.change.col" : colum of protein change column
+# ============================================
+mutation.dat <- readMAF(mutation.csv,
+ gene.symbol.col = "Hugo_Symbol",
+ variant.class.col = "Variant_Classification",
+ protein.change.col = "amino_acid_change",
+ sep = ",") # column-separator of csv file
+
+# set up chart options
+plot.options <- g3Lollipop.options(
+ # Chart settings
+ chart.width = 600,
+ chart.type = "pie",
+ chart.margin = list(left = 30, right = 20, top = 20, bottom = 30),
+ chart.background = "#d3d3d3",
+ transition.time = 300,
+ # Lollipop track settings
+ lollipop.track.height = 200,
+ lollipop.track.background = "#d3d3d3",
+ lollipop.pop.min.size = 1,
+ lollipop.pop.max.size = 8,
+ lollipop.pop.info.limit = 5.5,
+ lollipop.pop.info.dy = "0.24em",
+ lollipop.pop.info.color = "white",
+ lollipop.line.color = "#a9A9A9",
+ lollipop.line.width = 3,
+ lollipop.circle.color = "#ffdead",
+ lollipop.circle.width = 0.4,
+ lollipop.label.ratio = 2,
+ lollipop.label.min.font.size = 12,
+ lollipop.color.scheme = "dark2",
+ highlight.text.angle = 60,
+ # Domain annotation track settings
+ anno.height = 16,
+ anno.margin = list(top = 0, bottom = 0),
+ anno.background = "#d3d3d3",
+ anno.bar.fill = "#a9a9a9",
+ anno.bar.margin = list(top = 4, bottom = 4),
+ domain.color.scheme = "pie5",
+ domain.margin = list(top = 2, bottom = 2),
+ domain.text.color = "white",
+ domain.text.font = "italic 8px Serif",
+ # Y-axis label
+ y.axis.label = "# of TP53 gene mutations",
+ axis.label.color = "#303030",
+ axis.label.alignment = "end",
+ axis.label.font = "italic 12px Serif",
+ axis.label.dy = "-1.5em",
+ y.axis.line.color = "#303030",
+ y.axis.line.width = 0.5,
+ y.axis.line.style = "line",
+ y.max.range.ratio = 1.1,
+ # Chart title settings
+ title.color = "#303030",
+ title.text = "TP53 gene (customized chart options)",
+ title.font = "bold 12px monospace",
+ title.alignment = "start",
+ # Chart legend settings
+ legend = TRUE,
+ legend.margin = list(left=20, right = 0, top = 10, bottom = 5),
+ legend.interactive = TRUE,
+ legend.title = "Variant classification",
+ # Brush selection tool
+ brush = TRUE,
+ brush.selection.background = "#F8F8FF",
+ brush.selection.opacity = 0.3,
+ brush.border.color = "#a9a9a9",
+ brush.border.width = 1,
+ brush.handler.color = "#303030",
+ # tooltip and zoom
+ tooltip = TRUE,
+ zoom = TRUE
+)
+
+g3Lollipop(mutation.dat,
+ gene.symbol = "TP53",
+ protein.change.col = "amino_acid_change",
+ btn.style = "blue", # blue-style chart download buttons
+ plot.options = plot.options,
+ output.filename = "customized_plot")
+#> Factor is set to Mutation_Class
cBioPortal
cBioPortal provides download
+for many cancer genomics data sets. g3viz
has a convenient
+way to retrieve data directly from this portal.
In this example, we first retrieve genetic mutation data of
+TP53
gene for the msk_impact_2017
+study, and then visualize the data using the built-in
+cbioportal
theme, to miminc cBioPortal’s mutation_mapper.
# Retrieve mutation data of "msk_impact_2017" from cBioPortal
+mutation.dat <- getMutationsFromCbioportal("msk_impact_2017", "TP53")
+#> The Entrez Gene ID for TP53 is: 7157
+#> Found mutation dataset for msk_impact_2017: msk_impact_2017_mutations
+
+# "cbioportal" chart theme
+plot.options <- g3Lollipop.theme(theme.name = "cbioportal",
+ title.text = "TP53 gene (cbioportal theme)",
+ y.axis.label = "# of TP53 Mutations")
+
+g3Lollipop(mutation.dat,
+ gene.symbol = "TP53",
+ btn.style = "gray", # gray-style chart download buttons
+ plot.options = plot.options,
+ output.filename = "cbioportal_theme")
+#> Factor is set to Mutation_Class
+#> legend title is set to Mutation_Class
cBioPortalData
or cBioPortal
R packages
+are not stable recently. Therefore, we query the mutation data from
+cBioPortal
directly using API. This feature may change in
+later version.In g3viz
, annotated mutation data can be loaded in three
+ways
from CSV
or TSV
files, as in Example 2.
from cBioPortal +(internet access required), as in Example 3.
In addition to reading mutation data, readMAF
or
+getMutationFromCbioportal
functions also map mutation type
+to mutation class and generate a Mutation_Class
column by
+default. Mutation type is usually in the column of
+Variant_Classification
or Mutation_Type
. The
+default mapping table is,
+Mutation_Type + | ++Mutation_Class + | ++Short_Name + | +
---|---|---|
+Inframe + | +||
+In_Frame_Del + | ++Inframe + | ++IF del + | +
+In_Frame_Ins + | ++Inframe + | ++IF ins + | +
+Silent + | ++Inframe + | ++Silent + | +
+Targeted_Region + | ++Inframe + | ++IF + | +
+Missense + | +||
+Missense_Mutation + | ++Missense + | ++Missense + | +
+Truncating + | +||
+Frame_Shift + | ++Truncating + | ++FS + | +
+Frame_Shift_Del + | ++Truncating + | ++FS del + | +
+Frame_Shift_Ins + | ++Truncating + | ++FS ins + | +
+Nonsense_Mutation + | ++Truncating + | ++Nonsense + | +
+Nonstop_Mutation + | ++Truncating + | ++Nonstop + | +
+Splice_Region + | ++Truncating + | ++Splice + | +
+Splice_Site + | ++Truncating + | ++Splice + | +
+Other + | +||
+3’Flank + | ++Other + | ++3’Flank + | +
+3’UTR + | ++Other + | ++3’UTR + | +
+5’Flank + | ++Other + | ++5’Flank + | +
+5’UTR + | ++Other + | ++5’UTR + | +
+De_novo_Start_InFrame + | ++Other + | ++de_novo_start_inframe + | +
+De_novo_Start_OutOfFrame + | ++Other + | ++de_novo_start_outofframe + | +
+Fusion + | ++Other + | ++Fusion + | +
+IGR + | ++Other + | ++IGR + | +
+Intron + | ++Other + | ++Intron + | +
+lincRNA + | ++Other + | ++lincRNA + | +
+RNA + | ++Other + | ++RNA + | +
+Start_Codon_Del + | ++Other + | ++Nonstart + | +
+Start_Codon_Ins + | ++Other + | ++start_codon_ins + | +
+Start_Codon_SNP + | ++Other + | ++Nonstart + | +
+Translation_Start_Site + | ++Other + | ++TSS + | +
+Unknown + | ++Other + | ++Unknown + | +
Given a HUGO gene symbol,
+users can either use hgnc2pfam
function to retrieve Pfam protein domain information first
+or use all-in-one g3Lollipop
function to directly create
+lollipop-diagram. In case that the given gene has multiple isoforms,
+hgnc2pfam
returns all UniProt entries, and users can
+specify one using the corresponding UniProt
entry. If
+attribute guess
is TRUE
, the Pfam domain
+information of the longest UniProt entry is returned.
# Example 1: TP53 has a single UniProt entry
+hgnc2pfam("TP53", output.format = "list")
+#> $symbol
+#> [1] "TP53"
+#>
+#> $uniprot
+#> [1] "P04637"
+#>
+#> $length
+#> [1] 393
+#>
+#> $pfam
+#> hmm.acc hmm.name start end type
+#> 5350 PF08563 P53_TAD 6 30 Motif
+#> 5349 PF18521 TAD2 35 59 Motif
+#> 5351 PF00870 P53 99 289 Domain
+#> 5348 PF07710 P53_tetramer 319 358 Motif
+
+# Example 2: GNAS has multiple UniProt entries
+# `guess = TRUE`: the Pfam domain information of the longest
+# UniProt protein is returned
+hgnc2pfam("GNAS", guess = TRUE)
+#> GNAS maps to multiple UniProt entries:
+#> symbol uniprot length
+#> GNAS O95467 245
+#> GNAS P63092 394
+#> GNAS P84996 626
+#> GNAS Q5JWF2 1037
+#> Warning in hgnc2pfam("GNAS", guess = TRUE): Pick: Q5JWF2
+#> {"symbol":"GNAS","uniprot":"Q5JWF2","length":1037,"pfam":[{"hmm.acc":"PF00503","hmm.name":"G-alpha","start":663,"end":1026,"type":"Domain"}]}
The g3viz
package contains 8 ready-to-use chart schemes:
+default, blue, simple, cbioportal,
+nature, nature2, ggplot2, and dark.
+Check this tutorial for examples and
+usage.
Figure 1 demonstrates all color
+schemes that g3viz
supports for lollipop-pops and Pfam
+domains.
Chart options can be specified using
+g3Lollipop.options()
function (see example
+2). Here is the full list of chart options,
+Option + | ++Description + | +
---|---|
+Chart settings + | +|
+chart.width + | +
+chart width in px. Default 800 .
+ |
+
+chart.type + | +
+pop type, pie or circle . Default
+pie .
+ |
+
+chart.margin + | +
+specify chart margin in list format. Default
+list(left = 40, right = 20, top = 15, bottom = 25) .
+ |
+
+chart.background + | +
+chart background. Default transparent .
+ |
+
+transition.time + | +
+chart animation transition time in millisecond. Default
+600 .
+ |
+
+Lollipop track settings + | +|
+lollipop.track.height + | +
+height of lollipop track. Default 420 .
+ |
+
+lollipop.track.background + | +
+background of lollipop track. Default rgb(244,244,244) .
+ |
+
+lollipop.pop.min.size + | +
+lollipop pop minimal size in px. Default 2 .
+ |
+
+lollipop.pop.max.size + | +
+lollipop pop maximal size in px. Default 12 .
+ |
+
+lollipop.pop.info.limit + | +
+threshold of lollipop pop size to show count information in middle of
+pop. Default 8 .
+ |
+
+lollipop.pop.info.color + | +
+lollipop pop information text color. Default #EEE .
+ |
+
+lollipop.pop.info.dy + | +
+y-axis direction text adjustment of lollipop pop information. Default
+-0.35em .
+ |
+
+lollipop.line.color + | +
+lollipop line color. Default rgb(42,42,42) .
+ |
+
+lollipop.line.width + | +
+lollipop line width. Default 0.5 .
+ |
+
+lollipop.circle.color + | +
+lollipop circle border color. Default wheat .
+ |
+
+lollipop.circle.width + | +
+lollipop circle border width. Default 0.5 .
+ |
+
+lollipop.label.ratio + | +
+lollipop click-out label font size to circle size ratio. Default
+1.4 .
+ |
+
+lollipop.label.min.font.size + | +
+lollipop click-out label minimal font size. Default 10 .
+ |
+
+lollipop.color.scheme + | +
+color scheme to fill lollipop pops. Default accent . Check
+color schemes for details.
+ |
+
+highlight.text.angle + | +
+the rotation angle of on-click highlight text in degree. Default
+90 .
+ |
+
+Domain annotation track settings + | +|
+anno.height + | +
+height of protein structure annotation track. Default 30 .
+ |
+
+anno.margin + | +
+margin of protein structure annotation track. Default
+list(top = 4, bottom = 0) .
+ |
+
+anno.background + | +
+background of protein structure annotation track. Default
+transparent .
+ |
+
+anno.bar.fill + | +
+background of protein bar in protein structure annotation track. Default
+#E5E3E1 .
+ |
+
+anno.bar.margin + | +
+margin of protein bar in protein structure annotation track. Default
+list(top = 2, bottom = 2) .
+ |
+
+domain.color.scheme + | +
+color scheme of protein domains. Default category10 . Check
+color schemes for details.
+ |
+
+domain.margin + | +
+margin of protein domains. Default
+list(top = 0, bottom = 0) .
+ |
+
+domain.text.font + | +
+domain label text font in shorthand format. Default
+normal 11px Arial .
+ |
+
+domain.text.color + | +
+domain label text color. Default #F2F2F2 .
+ |
+
+Y-axis settings + | +|
+y.axis.label + | +
+Y-axis label text. Default # of mutations .
+ |
+
+axis.label.font + | +
+css font style shorthand (font-style font-variant font-weight
+font-size/line-height font-family). Default
+normal 12px Arial .
+ |
+
+axis.label.color + | +
+axis label text color. Default #4f4f4f .
+ |
+
+axis.label.alignment + | +
+axis label text alignment (start/end/middle). Default
+middle
+ |
+
+axis.label.dy + | +
+text adjustment of axis label text. Default -2em .
+ |
+
+y.axis.line.color + | +
+color of y-axis in-chart lines (ticks). Default #c4c8ca .
+ |
+
+y.axis.line.style + | +
+style of y-axis in-chart lines (ticks), dash or
+line . Default dash .
+ |
+
+y.axis.line.width + | +
+width of y-axis in-chart lines (ticks). Default 1 .
+ |
+
+y.max.range.ratio + | +
+ratio of y-axis range to data value range. Default 1.1 .
+ |
+
+Chart title settings + | +|
+title.text + | ++title of chart. Default ““. + | +
+title.font + | +
+font of chart title. Default normal 16px Arial .
+ |
+
+title.color + | +
+color of chart title. Default #424242 .
+ |
+
+title.alignment + | +
+text alignment of chart title (start/middle/end). Default
+middle .
+ |
+
+title.dy + | +
+text adjustment of chart title. Default 0.35em .
+ |
+
+Chart legend settings + | +|
+legend + | +
+if show legend. Default TRUE .
+ |
+
+legend.margin + | +
+legend margin in list format. Default
+list(left = 10, right = 0, top = 5, bottom = 5) .
+ |
+
+legend.interactive + | +
+legend interactive mode. Default TRUE .
+ |
+
+legend.title + | +
+legend title. If NA , use factor name as
+factor.col . Default is NA .
+ |
+
+Brush selection tool settings + | +|
+brush + | +
+if show brush. Default TRUE .
+ |
+
+brush.selection.background + | +
+background color of selection brush. Default #666 .
+ |
+
+brush.selection.opacity + | +
+background opacity of selection brush. Default 0.2 .
+ |
+
+brush.border.color + | +
+border color of selection brush. Default #969696 .
+ |
+
+brush.handler.color + | +
+color of left and right handlers of selection brush. Default
+#333 .
+ |
+
+brush.border.width + | +
+border width of selection brush. Default 1 .
+ |
+
+Tooltip and zoom tools + | +|
+tooltip + | +
+if show tooltip. Default TRUE .
+ |
+
+zoom + | +
+if enable zoom feature. Default TRUE .
+ |
+
g3Lollipop
also renders two buttons over the
+lollipop-diagram, allowing to save the resulting chart in PNG or
+vector-based SVG file. To save chart programmatically as HTML, you can
+use htmlwidgets::saveWidget
function.
sessionInfo()
+#> R version 4.2.1 (2022-06-23)
+#> Platform: x86_64-apple-darwin17.0 (64-bit)
+#> Running under: macOS 14.6.1
+#>
+#> Matrix products: default
+#> LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
+#>
+#> locale:
+#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+#>
+#> attached base packages:
+#> [1] stats4 stats graphics grDevices utils datasets methods base
+#>
+#> other attached packages:
+#> [1] cBioPortalData_2.8.2 MultiAssayExperiment_1.22.0 SummarizedExperiment_1.26.1 Biobase_2.56.0
+#> [5] GenomicRanges_1.48.0 GenomeInfoDb_1.32.4 IRanges_2.30.1 S4Vectors_0.34.0
+#> [9] BiocGenerics_0.42.0 MatrixGenerics_1.8.1 matrixStats_1.3.0 AnVIL_1.8.7
+#> [13] dplyr_1.1.4 kableExtra_1.4.0 knitr_1.48 rmarkdown_2.28
+#> [17] g3viz_1.2.0
+#>
+#> loaded via a namespace (and not attached):
+#> [1] bitops_1.0-8 bit64_4.0.5 progress_1.2.3 filelock_1.0.3 httr_1.4.7
+#> [6] GenomicDataCommons_1.20.3 tools_4.2.1 bslib_0.8.0 utf8_1.2.4 R6_2.5.1
+#> [11] DBI_1.2.3 colorspace_2.1-1 prettyunits_1.2.0 TCGAutils_1.16.1 tidyselect_1.2.1
+#> [16] bit_4.0.5 curl_5.2.1 compiler_4.2.1 rvest_1.0.4 httr2_1.0.3
+#> [21] cli_3.6.3 formatR_1.14 xml2_1.3.6 DelayedArray_0.22.0 rtracklayer_1.56.1
+#> [26] sass_0.4.9 scales_1.3.0 readr_2.1.5 rappdirs_0.3.3 rapiclient_0.1.6
+#> [31] RCircos_1.2.2 Rsamtools_2.12.0 systemfonts_1.1.0 stringr_1.5.1 digest_0.6.37
+#> [36] svglite_2.1.3 XVector_0.36.0 pkgconfig_2.0.3 htmltools_0.5.8.1 dbplyr_2.5.0
+#> [41] fastmap_1.2.0 limma_3.52.4 highr_0.11 htmlwidgets_1.6.4 rlang_1.1.2
+#> [46] rstudioapi_0.16.0 RSQLite_2.3.7 BiocIO_1.6.0 jquerylib_0.1.4 generics_0.1.3
+#> [51] jsonlite_1.8.8 BiocParallel_1.30.4 RCurl_1.98-1.13 magrittr_2.0.3 GenomeInfoDbData_1.2.8
+#> [56] futile.logger_1.4.3 Matrix_1.5-4.1 Rcpp_1.0.13 munsell_0.5.1 fansi_1.0.6
+#> [61] lifecycle_1.0.4 stringi_1.8.4 yaml_2.3.10 RaggedExperiment_1.20.1 RJSONIO_1.3-1.9
+#> [66] zlibbioc_1.42.0 org.Hs.eg.db_3.15.0 BiocFileCache_2.4.0 grid_4.2.1 blob_1.2.4
+#> [71] parallel_4.2.1 crayon_1.5.3 lattice_0.22-6 Biostrings_2.64.1 splines_4.2.1
+#> [76] GenomicFeatures_1.48.4 hms_1.1.3 KEGGREST_1.36.3 pillar_1.9.0 rjson_0.2.21
+#> [81] codetools_0.2-20 biomaRt_2.52.0 futile.options_1.0.1 XML_3.99-0.16 glue_1.7.0
+#> [86] evaluate_0.24.0 lambda.r_1.2.4 data.table_1.15.4 tzdb_0.4.0 png_0.1-8
+#> [91] vctrs_0.6.5 purrr_1.0.2 tidyr_1.3.1 cachem_1.1.0 xfun_0.47
+#> [96] restfulr_0.0.15 survival_3.7-0 viridisLite_0.4.2 tibble_3.2.1 RTCGAToolbox_2.26.1
+#> [101] GenomicAlignments_1.32.1 AnnotationDbi_1.58.0 memoise_2.0.1