From 26f3a391dd0e0ef3686db394909faf66e94a0f83 Mon Sep 17 00:00:00 2001 From: "Documenter.jl" Date: Tue, 11 Jun 2024 22:55:44 +0000 Subject: [PATCH] build based on 13c9e5b --- dev/.documenter-siteinfo.json | 2 +- dev/api/index.html | 22 +++++++++++----------- dev/examples/genes/index.html | 4 ++-- dev/examples/gtf/index.html | 4 ++-- dev/examples/gwas/index.html | 4 ++-- dev/examples/isoforms/index.html | 4 ++-- dev/examples/locus/index.html | 4 ++-- dev/examples/loops/index.html | 4 ++-- dev/examples/peaks/index.html | 4 ++-- dev/examples/summary/index.html | 4 ++-- dev/examples/twas/index.html | 4 ++-- dev/index.html | 2 +- dev/objects.inv | Bin 836 -> 833 bytes 13 files changed, 31 insertions(+), 31 deletions(-) diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json index e31a813..75a8ad9 100644 --- a/dev/.documenter-siteinfo.json +++ b/dev/.documenter-siteinfo.json @@ -1 +1 @@ -{"documenter":{"julia_version":"1.10.4","generation_timestamp":"2024-06-05T00:28:33","documenter_version":"1.4.1"}} \ No newline at end of file +{"documenter":{"julia_version":"1.10.4","generation_timestamp":"2024-06-11T22:55:39","documenter_version":"1.4.1"}} \ No newline at end of file diff --git a/dev/api/index.html b/dev/api/index.html index a27734c..aa5507f 100644 --- a/dev/api/index.html +++ b/dev/api/index.html @@ -1,16 +1,16 @@ -API · GeneticsMakie.jl

API

GeneticsMakie.findclosestgeneMethod
findclosestgene(chr::AbstractString, bp::Real, gencode::DataFrame; start::Bool, proteincoding::Bool)
-findclosestgene(df::DataFrame, gencode::DataFrame; start::Bool, proteincoding::Bool)

Find the closest gene(s) to a genomic coordinate or a list of genomic coordinates using gencode.

Optionally, the closest gene can be defined from the gene start site using start, and only protein coding genes can be considered using proteincoding. The default start and proteincoding are false.

source
GeneticsMakie.findgeneMethod
findgene(gene::AbstractString, gencode::DataFrame)

Find chromosome, gene start, and gene stop sites for the gene of interest.

source
GeneticsMakie.findgwaslociMethod
findgwasloci(gwas::DataFrame; p::Real)
-findgwasloci(gwas::Vector{DataFrame}; p::Real)

Find genome-wide significant loci for gwas that are separated from each other by at least 1 Mb.

Alternatively, find genome-wide significant loci across multiple gwas that are all separated by at least 1 Mb. p determines the genome-wide significance threshold, which is 5e-8 by default.

source
GeneticsMakie.labelgenomeMethod
labelgenome(g::GridPosition, chromosome::AbstractString, range1::Real, range2::Real)

Label g with a given chromosome and genomic range between range1 and range2.

source
GeneticsMakie.mungesumstats!Method
mungesumstats!(gwas::DataFrame)
-mungesumstats!(gwas::Vector{DataFrame})

Munge gwas by harmonizing the names of columns, their types, and P values, among others.

source
GeneticsMakie.parsegtf!Method
parsegtf!(gencode::DataFrame)

Parse gencode by extracting gene_id, gene_name, gene_type, transcript_id, transcript_support_level information from the info column.

source
GeneticsMakie.plotgenes!Method
plotgenes!(ax::Axis, chromosome::AbstractString, range1::Real, range2::Real, gencode::DataFrame; kwargs)
+API · GeneticsMakie

API

GeneticsMakie.findclosestgeneMethod
findclosestgene(chr::AbstractString, bp::Real, gencode::DataFrame; start::Bool, proteincoding::Bool)
+findclosestgene(df::DataFrame, gencode::DataFrame; start::Bool, proteincoding::Bool)

Find the closest gene(s) to a genomic coordinate or a list of genomic coordinates using gencode.

Optionally, the closest gene can be defined from the gene start site using start, and only protein coding genes can be considered using proteincoding. The default start and proteincoding are false.

source
GeneticsMakie.findgeneMethod
findgene(gene::AbstractString, gencode::DataFrame)

Find chromosome, gene start, and gene stop sites for the gene of interest.

source
GeneticsMakie.findgwaslociMethod
findgwasloci(gwas::DataFrame; p::Real)
+findgwasloci(gwas::Vector{DataFrame}; p::Real)

Find genome-wide significant loci for gwas that are separated from each other by at least 1 Mb.

Alternatively, find genome-wide significant loci across multiple gwas that are all separated by at least 1 Mb. p determines the genome-wide significance threshold, which is 5e-8 by default.

source
GeneticsMakie.labelgenomeMethod
labelgenome(g::GridPosition, chromosome::AbstractString, range1::Real, range2::Real)

Label g with a given chromosome and genomic range between range1 and range2.

source
GeneticsMakie.mungesumstats!Method
mungesumstats!(gwas::DataFrame)
+mungesumstats!(gwas::Vector{DataFrame})

Munge gwas by harmonizing the names of columns, their types, and P values, among others.

source
GeneticsMakie.parsegtf!Method
parsegtf!(gencode::DataFrame)

Parse gencode by extracting gene_id, gene_name, gene_type, transcript_id, transcript_support_level information from the info column.

source
GeneticsMakie.plotgenes!Method
plotgenes!(ax::Axis, chromosome::AbstractString, range1::Real, range2::Real, gencode::DataFrame; kwargs)
 plotgenes!(ax::Axis, chromosome::AbstractString, bp::Real, gencode::DataFrame; kwargs)
-plotgenes!(ax::Axis, gene::AbstractString, gencode::DataFrame; kwargs)

Plot collapsed gene bodies for genes within a given chromosome and genomic range between range1 and range2.

Alternatively, plot within a given chromosome and a certain window around a genomic coordinate bp or plot within a certain window around gene.

Arguments

  • height::Real = 0.25: the height of exons.
  • genecolor = :royalblue: the color of genes.
  • textcolor = :black: the color of gene labels.
  • window::Real = 1e6: the window around bp or gene.
source
GeneticsMakie.plotgenes!Method
plotgenes!(ax::Axis, chromosome::AbstractString, range1::Real, range2::Real, highlight::Tuple{AbstractVector, AbstractVector}, gencode::DataFrame; height::Real)
+plotgenes!(ax::Axis, gene::AbstractString, gencode::DataFrame; kwargs)

Plot collapsed gene bodies for genes within a given chromosome and genomic range between range1 and range2.

Alternatively, plot within a given chromosome and a certain window around a genomic coordinate bp or plot within a certain window around gene.

Arguments

  • height::Real = 0.25: the height of exons.
  • genecolor = :royalblue: the color of genes.
  • textcolor = :black: the color of gene labels.
  • window::Real = 1e6: the window around bp or gene.
source
GeneticsMakie.plotgenes!Method
plotgenes!(ax::Axis, chromosome::AbstractString, range1::Real, range2::Real, highlight::Tuple{AbstractVector, AbstractVector}, gencode::DataFrame; height::Real)
 plotgenes!(ax::Axis, chromosome::AbstractString, bp::Real, highlight::Tuple{AbstractVector, AbstractVector}, gencode::DataFrame; window::Real, height::Real)
-plotgenes!(ax::Axis, gene::AbstractString, highlight::Tuple{AbstractVector, AbstractVector}, gencode::DataFrame; window::Real, height::Real)

Plot gene bodies with a vector of genes highlighted by a vector of colors via highlight.

source
GeneticsMakie.plotgwas!Method
plotgwas!(ax::Axis, gwas::DataFrame; ymax::Real, p::Real, sigline::Bool, sigcolor::Bool, build = 37)

Plot gwas results as a Manhattan plot.

Arguments

  • ymax::Real: the maximum value for y axis.
  • p::Real = 5e-8: the genome-wide significance threshold.
  • linecolor = :red2: the color of genome-wide significance line, which can be turned off by setting to nothing.
  • scattercolor = "#4DB069": the color of genome-wide significant variants, which can be turned off by setting to nothing.
  • chromcolors = ["#0D0D66", "#7592C8"]: the colors of even and odd chromosomes.
  • build::Int = 37: the human genome build.
source
GeneticsMakie.plotisoforms!Method
plotisoforms!(ax::Axis, gene::AbstractString, gencode::DataFrame; kwargs)

Plot each isoform of a given gene on a separate row.

Arguments

  • orderby::Union{Nothing, AbstractVector{<:AbstractString}} = nothing: the order of isoforms.
  • highlight::Union{Nothing, Tuple{AbstractVector, AbstractVector}} = nothing: isoforms to be highlighted and their colors.
  • height::Real = 0.25: the height of exons.
  • isoformcolor = :royalblue: the color of isoforms.
  • textcolor = :black: the color of isoform labels.
  • text::Union{Bool, Symbol} = :top: the position of isoform labels.
source
GeneticsMakie.plotldMethod
plotld(LD::AbstractMatrix; kwargs)
-plotld!(ax::Axis, LD::AbstractMatrix; kwargs)

Heatmap of symmetric correlation matrix LD with the diagonal elements on the x-axis.

Keyword arguments

  • threshold : threshold below which values are ignored (default to 1/9)
  • colormap : colormap of values (default to cgrad(:Blues_9, 9, categorical = true))
  • colorrange : start and end points of colormap (default to (0, 1))
  • strokewidth : width of outline around heatmap boxes (default to 0)
source
GeneticsMakie.plotlocus!Method
plotlocus!(ax::Axis, chromosome::AbstractString, range1::Real, range2::Real, gwas::DataFrame; kwargs)
+plotgenes!(ax::Axis, gene::AbstractString, highlight::Tuple{AbstractVector, AbstractVector}, gencode::DataFrame; window::Real, height::Real)

Plot gene bodies with a vector of genes highlighted by a vector of colors via highlight.

source
GeneticsMakie.plotgwas!Method
plotgwas!(ax::Axis, gwas::DataFrame; ymax::Real, p::Real, sigline::Bool, sigcolor::Bool, build = 37)

Plot gwas results as a Manhattan plot.

Arguments

  • ymax::Real: the maximum value for y axis.
  • p::Real = 5e-8: the genome-wide significance threshold.
  • linecolor = :red2: the color of genome-wide significance line, which can be turned off by setting to nothing.
  • scattercolor = "#4DB069": the color of genome-wide significant variants, which can be turned off by setting to nothing.
  • chromcolors = ["#0D0D66", "#7592C8"]: the colors of even and odd chromosomes.
  • build::Int = 37: the human genome build.
source
GeneticsMakie.plotisoforms!Method
plotisoforms!(ax::Axis, gene::AbstractString, gencode::DataFrame; kwargs)

Plot each isoform of a given gene on a separate row.

Arguments

  • orderby::Union{Nothing, AbstractVector{<:AbstractString}} = nothing: the order of isoforms.
  • highlight::Union{Nothing, Tuple{AbstractVector, AbstractVector}} = nothing: isoforms to be highlighted and their colors.
  • height::Real = 0.25: the height of exons.
  • isoformcolor = :royalblue: the color of isoforms.
  • textcolor = :black: the color of isoform labels.
  • text::Union{Bool, Symbol} = :top: the position of isoform labels.
source
GeneticsMakie.plotldMethod
plotld(LD::AbstractMatrix; kwargs)
+plotld!(ax::Axis, LD::AbstractMatrix; kwargs)

Heatmap of symmetric correlation matrix LD with the diagonal elements on the x-axis.

Keyword arguments

  • threshold : threshold below which values are ignored (default to 1/9)
  • colormap : colormap of values (default to cgrad(:Blues_9, 9, categorical = true))
  • colorrange : start and end points of colormap (default to (0, 1))
  • strokewidth : width of outline around heatmap boxes (default to 0)
source
GeneticsMakie.plotlocus!Method
plotlocus!(ax::Axis, chromosome::AbstractString, range1::Real, range2::Real, gwas::DataFrame; kwargs)
 plotlocus!(ax::Axis, chromosome::AbstractString, bp::Real, gwas::DataFrame; kwargs)
-plotlocus!(ax::Axis, gene::AbstractString, gwas::DataFrame, gencode::DataFrame; kwargs)

Plot gwas results within a given chromosome and genomic range between range1 and range2.

Alternatively, plot within a given chromosome and a certain window around a genomic coordinate bp or plot within a certain window around gene.

Arguments

  • ld::Union{Nothing, SnpData, Tuple{SnpData, Union{AbstractString, Tuple{AbstractString, Int}}}} = nothing: the reference panel for which LD is calculated.
  • ymax::Real: the maximum value for y axis.
  • window::Real = 1e6: the window around bp or gene.
source
GeneticsMakie.plotloops!Method
plotloops!(ax::Axis, chromosome::AbstractString, range1::Real, range2::Real, loopdf::DataFrame; kwargs)
+plotlocus!(ax::Axis, gene::AbstractString, gwas::DataFrame, gencode::DataFrame; kwargs)

Plot gwas results within a given chromosome and genomic range between range1 and range2.

Alternatively, plot within a given chromosome and a certain window around a genomic coordinate bp or plot within a certain window around gene.

Arguments

  • ld::Union{Nothing, SnpData, Tuple{SnpData, Union{AbstractString, Tuple{AbstractString, Int}}}} = nothing: the reference panel for which LD is calculated.
  • ymax::Real: the maximum value for y axis.
  • window::Real = 1e6: the window around bp or gene.
source
GeneticsMakie.plotloops!Method
plotloops!(ax::Axis, chromosome::AbstractString, range1::Real, range2::Real, loopdf::DataFrame; kwargs)
 plotloops!(ax::Axis, chromosome::AbstractString, bp::Real, loopdf::DataFrame; kwargs)
-plotloops!(ax::Axis, gene::AbstractString, loopdf::DataFrame, gencode::DataFrame; kwargs)

Plot loops present in loopdf within a given chromosome and genomic range between range1 and range2.

Alternatively, plot within a given chromosome and a certain window around a genomic coordinate bp or plot within a certain window around gene.

Arguments

  • ymax::Real = 102: the maximum value for y axis.
  • linewidth = 0.25: the line width of the loops' arcs.
  • colorarc = "#9658B2": the color of loops' arcs.
  • colorend = ("#FFBB00", 0.5): the color of loops' ends.
  • resolution = 1000: plot resolution points along x-axis within the given range.
source
GeneticsMakie.plotqq!Method
plotqq!(ax::Axis, gwas::DataFrame; kwargs)
-plotqq!(ax::Axis, P::AbstractVector; kwargs)

Plot QQ plot of P values where the expected distribution is the uniform distribution.

Keyword arguments include xstep::Real and ystep::Real for x and y axes ticks step sizes.

source
GeneticsMakie.plotrgMethod
plotrg(r::AbstractMatrix; kwargs)
-plotrg!(ax::Axis, r::AbstractMatrix; kwargs)

Correlation plot of matrix r.

Keyword arguments

  • circle : whether to draw cicles instead of rectangles (default to true)
  • diagonal : whether to visualize diagonal elements (default to false)
  • colormap : colormap of values (default to :RdBu_10)
  • colorrange : start and end points of colormap (default to (-1, 1))
  • strokewidth : width of outline around surrounding boxes (default to 0.5)
source
+plotloops!(ax::Axis, gene::AbstractString, loopdf::DataFrame, gencode::DataFrame; kwargs)

Plot loops present in loopdf within a given chromosome and genomic range between range1 and range2.

Alternatively, plot within a given chromosome and a certain window around a genomic coordinate bp or plot within a certain window around gene.

Arguments

  • ymax::Real = 102: the maximum value for y axis.
  • linewidth = 0.25: the line width of the loops' arcs.
  • colorarc = "#9658B2": the color of loops' arcs.
  • colorend = ("#FFBB00", 0.5): the color of loops' ends.
  • resolution = 1000: plot resolution points along x-axis within the given range.
source
GeneticsMakie.plotqq!Method
plotqq!(ax::Axis, gwas::DataFrame; kwargs)
+plotqq!(ax::Axis, P::AbstractVector; kwargs)

Plot QQ plot of P values where the expected distribution is the uniform distribution.

Keyword arguments include xstep::Real and ystep::Real for x and y axes ticks step sizes.

source
GeneticsMakie.plotrgMethod
plotrg(r::AbstractMatrix; kwargs)
+plotrg!(ax::Axis, r::AbstractMatrix; kwargs)

Correlation plot of matrix r.

Keyword arguments

  • circle : whether to draw cicles instead of rectangles (default to true)
  • diagonal : whether to visualize diagonal elements (default to false)
  • colormap : colormap of values (default to :RdBu_10)
  • colorrange : start and end points of colormap (default to (-1, 1))
  • strokewidth : width of outline around surrounding boxes (default to 0.5)
source
diff --git a/dev/examples/genes/index.html b/dev/examples/genes/index.html index caa9bd8..3591ad1 100644 --- a/dev/examples/genes/index.html +++ b/dev/examples/genes/index.html @@ -1,5 +1,5 @@ -Plotting genes · GeneticsMakie.jl

Plotting genes

After Parsing GENCODE, we can start plotting gene bodies. GeneticsMakie.jl is transparent in that it shows all genes within a genomic window.

using Pkg
+Plotting genes · GeneticsMakie

Plotting genes

After Parsing GENCODE, we can start plotting gene bodies. GeneticsMakie.jl is transparent in that it shows all genes within a genomic window.

using Pkg
 Pkg.add(["GeneticsMakie", "CairoMakie", "DataFrames", "Arrow"])
 
 using GeneticsMakie, CairoMakie, DataFrames, Arrow
@@ -60,4 +60,4 @@
 resize_to_layout!(f)
 vlines!(ax, start, color = (:gold, 0.5), linewidth = 0.5)
 vlines!(ax, stop, color = (:gold, 0.5), linewidth = 0.5)
-f

Then we can save the figure as below.

save("figs/$(gene)-gene.png", f, px_per_unit = 4)
+f

Then we can save the figure as below.

save("figs/$(gene)-gene.png", f, px_per_unit = 4)
diff --git a/dev/examples/gtf/index.html b/dev/examples/gtf/index.html index fa5a3b1..6f1fe56 100644 --- a/dev/examples/gtf/index.html +++ b/dev/examples/gtf/index.html @@ -1,5 +1,5 @@ -Parsing GENCODE · GeneticsMakie.jl

Parsing GENCODE

Install the relevant packages in the usual way.

using Pkg
+Parsing GENCODE · GeneticsMakie

Parsing GENCODE

Install the relevant packages in the usual way.

using Pkg
 Pkg.add(["GeneticsMakie", "CSV", "DataFrames", "Arrow"])

To plot genes and isoforms, we need a transcriptome annotation. We can use the latest GENCODE annotation for the human genome (GRCh37), where we download the comprehensive gene annotation file in GTF format. We recommend having at least 16 GB RAM available for loading GENCODE annotation.

using GeneticsMakie, CSV, DataFrames, Arrow, Downloads
 isdir("data") || mkdir("data")
 url = "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_39/GRCh37_mapping/gencode.v39lift37.annotation.gtf.gz"
@@ -9,4 +9,4 @@
 h = ["seqnames", "source", "feature", "start", "end", "score", "strand", "phase", "info"]
 gencode = CSV.read("data/gencode/$(file)", DataFrame; delim = "\t", comment = "#", header = h)
Human genome build

The latest human genome assembly is GRCh38, but we use an annotation with coordinates from the older version (GRCh37), because a lot of the GWAS results are shared in GRCh37 genomic coordinates. Make sure to use the matching human genome build when visualizing your results.

The ninth column of a GTF file contains rich information about features, so we can parse this column.

GeneticsMakie.parsegtf!(gencode)
Chromosome names

Chromosome names are munged to not contain “chr” prefix, and their type is String, since there could be non-numerical chromosome names, such as sex chromosomes and mitochondrial genome.

To reduce memory intake, we can also subset gencode to most commonly used columns in downstream analyses.

select!(gencode, :seqnames, :feature, :start, :end, :strand, :gene_id, :gene_name, :gene_type, :transcript_id)

To further reduce memory intake, we can instead store and load GENCODE annotation as an Arrow file.

Arrow.write("data/gencode/$(splitext(file)[1]).arrow", gencode)
 gencode = Arrow.Table("data/gencode/$(splitext(file)[1]).arrow")|> DataFrame

Other transcriptome annotations, such as one from RefSeq, can be used for plotting functions as long as they contain the above columns with the right column names.

Once gencode is ready, we can look up where a gene is on the human genome.

GeneticsMakie.findgene("RBFOX1", gencode)
-GeneticsMakie.findgene("ENSG00000078328", gencode)
Gene names

Make sure to use the correct gene name in case the gene cannot be found. The latest gene names can be looked up in databases such as GeneCards.

+GeneticsMakie.findgene("ENSG00000078328", gencode)
Gene names

Make sure to use the correct gene name in case the gene cannot be found. The latest gene names can be looked up in databases such as GeneCards.

diff --git a/dev/examples/gwas/index.html b/dev/examples/gwas/index.html index f94c8ca..219540b 100644 --- a/dev/examples/gwas/index.html +++ b/dev/examples/gwas/index.html @@ -1,5 +1,5 @@ -Plotting GWAS · GeneticsMakie.jl

Plotting GWAS

After Munging summary statistics, we can use GeneticsMakie.plotgwas! to draw Manhattan plots.

using Pkg
+Plotting GWAS · GeneticsMakie

Plotting GWAS

After Munging summary statistics, we can use GeneticsMakie.plotgwas! to draw Manhattan plots.

using Pkg
 Pkg.add(["GeneticsMakie", "CairoMakie", "DataFrames", "Arrow"])
using GeneticsMakie, CairoMakie, DataFrames, Arrow
 dfs = DataFrame[]
 for key in ["height", "weight"]
@@ -59,4 +59,4 @@
 rowgap!(f.layout, 1, 0)
 rowgap!(f.layout, 2, 5)
 resize_to_layout!(f)
-f

+f

diff --git a/dev/examples/isoforms/index.html b/dev/examples/isoforms/index.html index 227d37d..c294147 100644 --- a/dev/examples/isoforms/index.html +++ b/dev/examples/isoforms/index.html @@ -1,5 +1,5 @@ -Plotting isoforms · GeneticsMakie.jl

Plotting isoforms

After Parsing GENCODE, it is possible to plot isoform bodies. We can focus on NRXN1 gene as our initial example. GeneticsMakie.plotisoforms! returns genomic coordinates for the gene of interest so that an appropriate label can be passed onto GeneticsMakie.labelgenome. NRXN1 gene has many isoforms as we see below, and even more isoforms are likely to be discovered in the future. For this reason, plotting isoforms of multiple genes is not available.

using Pkg
+Plotting isoforms · GeneticsMakie

Plotting isoforms

After Parsing GENCODE, it is possible to plot isoform bodies. We can focus on NRXN1 gene as our initial example. GeneticsMakie.plotisoforms! returns genomic coordinates for the gene of interest so that an appropriate label can be passed onto GeneticsMakie.labelgenome. NRXN1 gene has many isoforms as we see below, and even more isoforms are likely to be discovered in the future. For this reason, plotting isoforms of multiple genes is not available.

using Pkg
 Pkg.add(["GeneticsMakie", "CairoMakie", "DataFrames", "Arrow"])
 
 using GeneticsMakie, CairoMakie, DataFrames, Arrow
@@ -40,4 +40,4 @@
 GeneticsMakie.labelgenome(f[1, 1, Bottom()], chr, range1, range2)
 rowsize!(f.layout, 1, rs)
 resize_to_layout!(f)
-f

+f

diff --git a/dev/examples/locus/index.html b/dev/examples/locus/index.html index 9b7092b..e4e3661 100644 --- a/dev/examples/locus/index.html +++ b/dev/examples/locus/index.html @@ -1,5 +1,5 @@ -Plotting LocusZoom · GeneticsMakie.jl

Plotting LocusZooom

After Parsing GENCODE and Munging summary statistics, we can now put the pieces together to draw the backbone of a LocusZoom plot. We focus on ACAN locus as an example, which reaches strong genome-wide significance in GWAS for height. By default, GeneticsMakie.plotlocus! returns a straightforward scatter plot.

using Pkg
+Plotting LocusZoom · GeneticsMakie

Plotting LocusZooom

After Parsing GENCODE and Munging summary statistics, we can now put the pieces together to draw the backbone of a LocusZoom plot. We focus on ACAN locus as an example, which reaches strong genome-wide significance in GWAS for height. By default, GeneticsMakie.plotlocus! returns a straightforward scatter plot.

using Pkg
 Pkg.add(["GeneticsMakie", "CairoMakie", "DataFrames", "Arrow", "SnpArrays"])
using GeneticsMakie, CairoMakie, DataFrames, Arrow, SnpArrays, Downloads
 dfs = DataFrame[]
 for key in ["height", "weight"]
@@ -86,4 +86,4 @@
     vlines!(axs[i], stop, color = (:gold, 0.5), linewidth = 0.5)
 end
 resize_to_layout!(f)
-f

By using Makie.jl's layout tools, it becomes easy to draw additional tracks. For example, in a separate track, the variants could be colored or could have varying sizes depending on their minor allele frequency. In another example, the variants could be colored based on their inclusion in a credible set post-fine-mapping.

Plotting the intersection of SNPs, not the union

GeneticsMakie.plotlocus! plots only the variants that are present in the reference panel, when the ld keyword argument is specified. Although SNPs that are missing in the reference panel could be plotted differently (e.g. with varying transparency and shape), GeneticsMakie.jl is designed to visualize 100s of phenotypes simultaneously in which case such discrepancy is hard to tell and is confusing. Hence, for more direct comparison of loci across phenotypes, only the variants that are found in the reference panel are shown.

Extremely small P values

There are several GWAS loci that harbor extremely small P values, in which cases the P values will be clamped to the smallest floating point number. Such cases are going to be more common in phenotypes that are reaching saturation in terms of GWAS discovery (e.g. height). In those cases, it is more commonplace to observe allelic heterogneity, and it might be more appropriate to plot alternative measures of strength of association (e.g. Z scores).

Patterns of LD

Oftentimes, chunks of LD blocks hug a single or multiple gene boundaries.

Covering the entire genome

Visualizing 1,500 genomic regions with 2 Mb window will more or less cover the entire human genome. Note that empirically speaking, the probability of an arbitrary 2 Mb window harboring at least one genome-wide significant hit across multiple phenotypes is higher than not harboring any significant association.

Phenome-scale LocusZoom

To visualize 100s of phenotypes simultaneously, summary statistics or other relevant genomic annotations should be converted to memory friendly Arrow.jl or Parquet.jl files.

+f

By using Makie.jl's layout tools, it becomes easy to draw additional tracks. For example, in a separate track, the variants could be colored or could have varying sizes depending on their minor allele frequency. In another example, the variants could be colored based on their inclusion in a credible set post-fine-mapping.

Plotting the intersection of SNPs, not the union

GeneticsMakie.plotlocus! plots only the variants that are present in the reference panel, when the ld keyword argument is specified. Although SNPs that are missing in the reference panel could be plotted differently (e.g. with varying transparency and shape), GeneticsMakie.jl is designed to visualize 100s of phenotypes simultaneously in which case such discrepancy is hard to tell and is confusing. Hence, for more direct comparison of loci across phenotypes, only the variants that are found in the reference panel are shown.

Extremely small P values

There are several GWAS loci that harbor extremely small P values, in which cases the P values will be clamped to the smallest floating point number. Such cases are going to be more common in phenotypes that are reaching saturation in terms of GWAS discovery (e.g. height). In those cases, it is more commonplace to observe allelic heterogneity, and it might be more appropriate to plot alternative measures of strength of association (e.g. Z scores).

Patterns of LD

Oftentimes, chunks of LD blocks hug a single or multiple gene boundaries.

Covering the entire genome

Visualizing 1,500 genomic regions with 2 Mb window will more or less cover the entire human genome. Note that empirically speaking, the probability of an arbitrary 2 Mb window harboring at least one genome-wide significant hit across multiple phenotypes is higher than not harboring any significant association.

Phenome-scale LocusZoom

To visualize 100s of phenotypes simultaneously, summary statistics or other relevant genomic annotations should be converted to memory friendly Arrow.jl or Parquet.jl files.

diff --git a/dev/examples/loops/index.html b/dev/examples/loops/index.html index e766d4f..990edbb 100644 --- a/dev/examples/loops/index.html +++ b/dev/examples/loops/index.html @@ -1,5 +1,5 @@ -Plotting loops · GeneticsMakie.jl

Plotting loops

Additionally, we can visualize long-range DNA interactions captured by ChIA-PET and Hi-C sequencing. A new track with these DNA interactions can be added upon the GENCODE annotations we have processed from Parsing GENCODE. As an example, we focus on the MYC locus in mammary epithelial cells, which has long-range enhancers that are implicated in various epithelial cancers (Spitz et al. 2016). In order to visualize loops within this locus, we will download long-range chromatin interactions provided by the ENCODE project.

using Pkg
+Plotting loops · GeneticsMakie

Plotting loops

Additionally, we can visualize long-range DNA interactions captured by ChIA-PET and Hi-C sequencing. A new track with these DNA interactions can be added upon the GENCODE annotations we have processed from Parsing GENCODE. As an example, we focus on the MYC locus in mammary epithelial cells, which has long-range enhancers that are implicated in various epithelial cancers (Spitz et al. 2016). In order to visualize loops within this locus, we will download long-range chromatin interactions provided by the ENCODE project.

using Pkg
 Pkg.add(["GeneticsMakie", "CairoMakie", "CSV", "DataFrames", "Arrow"])
using GeneticsMakie, CairoMakie, CSV, DataFrames, Arrow, Downloads
 isdir("data/loops") || mkdir("data/loops")
 url = "https://www.encodeproject.org/files/ENCFF730CMY/@@download/ENCFF730CMY.bedpe.gz"
@@ -112,4 +112,4 @@
     vlines!(axs[i], stop, color = (:gold, 0.5), linewidth = 0.5)
 end
 resize_to_layout!(f)
-f

As with the LocusZoom plots, by using Makie.jl's layout tools, it becomes easy to draw additional tracks. For example, in a separate track, we can include chromatin interactions present in other samples. In another example, we can include interactions found through other sequencing methods.

+f

As with the LocusZoom plots, by using Makie.jl's layout tools, it becomes easy to draw additional tracks. For example, in a separate track, we can include chromatin interactions present in other samples. In another example, we can include interactions found through other sequencing methods.

diff --git a/dev/examples/peaks/index.html b/dev/examples/peaks/index.html index ef7f951..c0d8c57 100644 --- a/dev/examples/peaks/index.html +++ b/dev/examples/peaks/index.html @@ -1,5 +1,5 @@ -Plotting peaks · GeneticsMakie.jl

Plotting peaks

Epigenetic sequencing is another data modality of interest that we can visualize with Makie.jl and GeneticsMakie.jl. ChIP-seq and ATAC-seq map epigenetic modifications throughout the genome by identifying DNA-protein interaction sites and open chromatin sites respectively, and although they capture different aspects of the epigenome, their resulting data share the same form and function (i.e. BAM files to capture reads, WIG files to capture coverage and signal, BED files to capture peaks). Consequently, visualizing both types of data will follow the same workflow.

We will start with the annotation from Parsing GENCODE. Accompanying ChIP-seq data will be downloaded from the ENCODE project.

using Pkg
+Plotting peaks · GeneticsMakie

Plotting peaks

Epigenetic sequencing is another data modality of interest that we can visualize with Makie.jl and GeneticsMakie.jl. ChIP-seq and ATAC-seq map epigenetic modifications throughout the genome by identifying DNA-protein interaction sites and open chromatin sites respectively, and although they capture different aspects of the epigenome, their resulting data share the same form and function (i.e. BAM files to capture reads, WIG files to capture coverage and signal, BED files to capture peaks). Consequently, visualizing both types of data will follow the same workflow.

We will start with the annotation from Parsing GENCODE. Accompanying ChIP-seq data will be downloaded from the ENCODE project.

using Pkg
 Pkg.add(["GeneticsMakie", "CairoMakie", "Makie", "CSV", "DataFrames", "Arrow", "BigWig"])
using GeneticsMakie, CairoMakie, Makie, CSV, DataFrames, Arrow, Downloads, BigWig
 url = "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_39/GRCh37_mapping/gencode.v39lift37.annotation.gtf.gz"
 gencode = Arrow.Table("data/gencode/$(splitext(basename(url))[1]).arrow")|> DataFrame
chipseq = Dict(
@@ -195,4 +195,4 @@
     resize_to_layout!(f)
     f
     save("ss4.png",f, px_per_unit = 4)
-end

+end

diff --git a/dev/examples/summary/index.html b/dev/examples/summary/index.html index 9079e91..7c7cd63 100644 --- a/dev/examples/summary/index.html +++ b/dev/examples/summary/index.html @@ -1,5 +1,5 @@ -Munging summary statistics · GeneticsMakie.jl

Munging summary statistics

GWAS summary statistics come in a variety of shapes and flavors, so harmonizing them is crucial in making our lives easier when trying to visualize their results. We can take a peak at GWAS results for height and weight, the two classic anthropometric traits.

using Pkg
+Munging summary statistics · GeneticsMakie

Munging summary statistics

GWAS summary statistics come in a variety of shapes and flavors, so harmonizing them is crucial in making our lives easier when trying to visualize their results. We can take a peak at GWAS results for height and weight, the two classic anthropometric traits.

using Pkg
 Pkg.add(["GeneticsMakie", "CSV", "DataFrames", "Arrow"])
using GeneticsMakie, CSV, DataFrames, Arrow, Downloads
 gwas = Dict(
     "height" => "https://portals.broadinstitute.org/collaboration/giant/images/6/63/Meta-analysis_Wood_et_al%2BUKBiobank_2018.txt.gz",
@@ -20,4 +20,4 @@
 GeneticsMakie.findclosestgene(loci, gencode; start = true) # closest gene from gene start site
 GeneticsMakie.findclosestgene(loci, gencode; proteincoding = true) # closest "protein-coding" gene

To reduce memory intake, we can store and load GWAS summary statistics as Arrow files.

for (i, key) in enumerate(keys(gwas))
     Arrow.write("data/gwas/$(key).arrow", dfs[i])
-end
+end
diff --git a/dev/examples/twas/index.html b/dev/examples/twas/index.html index 1bd72b7..682a840 100644 --- a/dev/examples/twas/index.html +++ b/dev/examples/twas/index.html @@ -1,5 +1,5 @@ -Plotting TWAS · GeneticsMakie.jl

Plotting TWAS

One can also visualize gene-level association results using GeneticsMakie.plotgwas!. Here we focus on the results of meta analysis of gene-level burden test from case and control studies and gene-level Poisson test from family trio studies for schizophrenia.

using Pkg
+Plotting TWAS · GeneticsMakie

Plotting TWAS

One can also visualize gene-level association results using GeneticsMakie.plotgwas!. Here we focus on the results of meta analysis of gene-level burden test from case and control studies and gene-level Poisson test from family trio studies for schizophrenia.

using Pkg
 Pkg.add(["GeneticsMakie", "CairoMakie", "CSV", "DataFrames", "Arrow"])
using GeneticsMakie, CairoMakie, CSV, DataFrames, Arrow, Downloads
 url = "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_39/GRCh37_mapping/gencode.v39lift37.annotation.gtf.gz"
 gencode = Arrow.Table("data/gencode/$(splitext(basename(url))[1]).arrow")|> DataFrame
@@ -23,4 +23,4 @@
 Label(f[1, 1, Top()], text = "SCZ (2022): SCHEMA", fontsize = 8)
 rowsize!(f.layout, 1, 50)
 resize_to_layout!(f)
-f

+f

diff --git a/dev/index.html b/dev/index.html index c54641b..fbc168f 100644 --- a/dev/index.html +++ b/dev/index.html @@ -1,2 +1,2 @@ -Home · GeneticsMakie.jl

GeneticsMakie

The goal of GeneticsMakie.jl is to permit seamless data visualization and exploratory data analysis of the human genome within the larger Julia data science and OpenMendel ecosystems. The package provides convenient wrapper functions for wrangling genetic association results and plotting them using Makie.jl. Every component of a figure can be easily customized and extended, and the package generates high-quality, publication-ready figures.

"mhc"

Getting started

Please peruse the documentations of Makie.jl, CSV.jl, DataFrames.jl, and SnpArrays.jl. Familiarity with these packages will allow visualization of most types of genetic and genomic data. Makie.jl's default layout tools are particularly useful for plotting different genetic and genomic data modalities as separate layers.

An usage case

If you have run a genome-wide association study (GWAS) at the variant-level, and you would like to eyeball genome-wide significant loci across hundreds of phenotypes, then you are in the right place.

+Home · GeneticsMakie

GeneticsMakie

The goal of GeneticsMakie.jl is to permit seamless data visualization and exploratory data analysis of the human genome within the larger Julia data science and OpenMendel ecosystems. The package provides convenient wrapper functions for wrangling genetic association results and plotting them using Makie.jl. Every component of a figure can be easily customized and extended, and the package generates high-quality, publication-ready figures.

"mhc"

Getting started

Please peruse the documentations of Makie.jl, CSV.jl, DataFrames.jl, and SnpArrays.jl. Familiarity with these packages will allow visualization of most types of genetic and genomic data. Makie.jl's default layout tools are particularly useful for plotting different genetic and genomic data modalities as separate layers.

An usage case

If you have run a genome-wide association study (GWAS) at the variant-level, and you would like to eyeball genome-wide significant loci across hundreds of phenotypes, then you are in the right place.

diff --git a/dev/objects.inv b/dev/objects.inv index f1df3164d6ce58719b430e3c640439727e167c4d..242193d05ef6f35a76f111c5ba871ec1423a9336 100644 GIT binary patch delta 10 RcmX@Yc93m?=|&44W&jon15W?| delta 14 VcmX@ec7$z$DYITy&PG!nW&kBg1XBP2