Skip to content

Commit

Permalink
add new chapters
Browse files Browse the repository at this point in the history
  • Loading branch information
berntpopp committed Oct 11, 2023
1 parent babcf37 commit f42d5bc
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 48 deletions.
Binary file not shown.
68 changes: 32 additions & 36 deletions edit_docs/01-analyses-tables.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,22 @@ This table shows the merged results of all analyses files as a wide table with s

```{r, echo=FALSE}
merge_analyses_sources_reformat <- merge_analyses_sources %>%
mutate(hgnc_id = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
mutate(gene = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'" target="_blank" >',
hgnc_id,
'</a>')) %>%
mutate(approved_symbol = paste0('<i>',
'" target="_blank" ><i>',
approved_symbol,
'</i>'))
'</i></a>'),
, .before = approved_symbol) %>%
select(-hgnc_id, -approved_symbol)
datatable(merge_analyses_sources_reformat,
filter = 'top', # this argument positions the filtering input at the top of the table
escape = FALSE, # this argument renders the links as HTML
extensions = 'Buttons', # this argument adds an extension for download buttons
options = list(
dom = 'Blfrtip',
scrollX = '400px',
scroller = TRUE,
buttons = c('copy',
'csv',
'excel',
Expand All @@ -40,14 +41,13 @@ This table shows results of the first analysis searching kidney disease associat

```{r, echo=FALSE}
PanelApp_genes_reformat <- PanelApp_genes %>%
mutate(hgnc_id = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'" target="_blank" >',
mutate(gene = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'</a>')) %>%
mutate(approved_symbol = paste0('<i>',
'" target="_blank" ><i>',
approved_symbol,
'</i>'))
'</i></a>'),
, .before = approved_symbol) %>%
select(-hgnc_id, -approved_symbol, -gene_name_reported)
datatable(PanelApp_genes_reformat,
filter = 'top', # this argument positions the filtering input at the top of the table
Expand All @@ -61,14 +61,13 @@ This table shows results of the second analysis searching kidney disease associa

```{r, echo=FALSE}
Literature_genes_reformat <- Literature_genes %>%
mutate(hgnc_id = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'" target="_blank" >',
mutate(gene = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'</a>')) %>%
mutate(approved_symbol = paste0('<i>',
'" target="_blank" ><i>',
approved_symbol,
'</i>'))
'</i></a>'),
, .before = approved_symbol) %>%
select(-hgnc_id, -approved_symbol, -gene_name_reported)
datatable(Literature_genes_reformat,
filter = 'top', # this argument positions the filtering input at the top of the table
Expand All @@ -82,14 +81,13 @@ This table shows results of the third analysis searching kidney disease associat

```{r, echo=FALSE}
DiagnosticPanels_genes_reformat <- DiagnosticPanels_genes %>%
mutate(hgnc_id = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
mutate(gene = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'" target="_blank" >',
hgnc_id,
'</a>')) %>%
mutate(approved_symbol = paste0('<i>',
'" target="_blank" ><i>',
approved_symbol,
'</i>'))
'</i></a>'),
, .before = approved_symbol) %>%
select(-hgnc_id, -approved_symbol, -gene_name_reported)
datatable(DiagnosticPanels_genes_reformat,
filter = 'top', # this argument positions the filtering input at the top of the table
Expand All @@ -103,14 +101,13 @@ This table shows results of the fourth analysis searching kidney disease associa

```{r, echo=FALSE}
HPO_genes_reformat <- HPO_genes %>%
mutate(hgnc_id = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'" target="_blank" >',
mutate(gene = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'</a>')) %>%
mutate(approved_symbol = paste0('<i>',
'" target="_blank" ><i>',
approved_symbol,
'</i>'))
'</i></a>'),
, .before = approved_symbol) %>%
select(-hgnc_id, -approved_symbol, -gene_name_reported)
datatable(HPO_genes_reformat,
filter = 'top', # this argument positions the filtering input at the top of the table
Expand All @@ -124,14 +121,13 @@ This table shows results of the fifth analysis searching kidney disease associat

```{r, echo=FALSE}
PubTator_genes_reformat <- PubTator_genes %>%
mutate(hgnc_id = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'" target="_blank" >',
mutate(gene = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'</a>')) %>%
mutate(approved_symbol = paste0('<i>',
'" target="_blank" ><i>',
approved_symbol,
'</i>'))
'</i></a>'),
, .before = approved_symbol) %>%
select(-hgnc_id, -approved_symbol, -gene_name_reported)
datatable(PubTator_genes_reformat,
filter = 'top', # this argument positions the filtering input at the top of the table
Expand Down
33 changes: 33 additions & 0 deletions edit_docs/03-high-evidence-curation.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Curation of high evidence genes {#manual-curation}

## Table of high evidence genes

This table shows the annotated high evidence genes.

```{r, echo=FALSE, warning=FALSE, out.width = "100%"}
high_evidence_annotated_table_reformat <- high_evidence_annotated_table %>%
mutate(gene = paste0('<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/HGNC:',
hgnc_id,
'" target="_blank" ><i>',
approved_symbol,
'</i></a>'),
, .before = approved_symbol) %>%
select(-hgnc_id, -approved_symbol)
datatable(high_evidence_annotated_table_reformat,
filter = 'top', # this argument positions the filtering input at the top of the table
escape = FALSE, # this argument renders the links as HTML
extensions = 'Buttons', # this argument adds an extension for download buttons
options = list(
dom = 'Blfrtip',
scrollX = '400px',
scroller = TRUE,
buttons = c('copy',
'csv',
'excel',
'pdf'), # here we add the download buttons for different formats
lengthMenu = list(c(10, 30, 50, -1),
c('10', '30', '50', 'All')) # here we change the selection options
)
)
```
43 changes: 43 additions & 0 deletions edit_docs/04-additional-analyses.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Additional analyses {#additional-analyses}

## Diagnostic panels content overlap

> Below you can see a bar plot of the diagnostic panels content overlap.
We used ten common diagnostic panels that can be ordered for kidney disease analysis and extracted the screened genes from them.
Here we show the overlap of the genes in the different panels.

```{r, echo=FALSE, warning=FALSE, out.width = "100%"}
# Summarize the data
count_data <- DiagnosticPanels_genes %>%
group_by(source_count) %>%
summarise(n = n())
# Calculate the total number of genes
total_genes <- sum(count_data$n)
# Calculate the percentage of genes
count_data <- count_data %>%
mutate(percentage = (n / total_genes) * 100)
# Create a new column for fill color based on source_count values
count_data <- count_data %>%
mutate(fill_color = case_when(
source_count == 1 ~ "Red",
source_count == 10 ~ "Green",
TRUE ~ "Gray"
))
# Create the plot
diagnostic_panels_overlap_plot <- ggplot(count_data, aes(x = source_count, y = n, fill = fill_color)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = paste0(round(percentage, 1), "%")), vjust = -0.5) +
scale_x_continuous(breaks = seq(min(count_data$source_count), max(count_data$source_count), by = 1)) +
scale_fill_identity() +
labs(x = "Count of panels", y = "Number of Genes",
title = paste0("Number of genes in 10 clinical diagnostic panels for kidney disease (n = ", total_genes, ")")) +
theme_minimal()
# convert to interactive using ggplotly
ggplotly(diagnostic_panels_overlap_plot)
```
2 changes: 2 additions & 0 deletions edit_docs/_bookdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ rmd_files:
- "index.Rmd"
- "01-analyses-tables.Rmd"
- "02-analyses-plots.Rmd"
- "03-high-evidence-curation.Rmd"
- "04-additional-analyses.Rmd"
- "references.Rmd"
delete_merged_file: true
language:
Expand Down
34 changes: 22 additions & 12 deletions edit_docs/index.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ url: https://halbritter-lab.github.io/kidney-genetics/
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
## Load libraries
library(readr) # needed to import CSV result files
library(tidyverse) # needed to transform the tables
library(DT) # needed to generate nice data tables in html
library(plotly) # needed for upset plots
library(knitr) # needed for draw.io files
library(knitrdrawio) # needed for draw.io files
library(config) # needed to load the config file
library(readr) # needed to import CSV result files
library(tidyverse) # needed to transform the tables
library(DT) # needed to generate nice data tables in html
library(plotly) # needed for upset plots
library(knitr) # needed for draw.io files
library(knitrdrawio) # needed for draw.io files
library(config) # needed to load the config file
library(ggplot2) # needed for plots
```

<!-- here we load the config file -->
Expand Down Expand Up @@ -57,20 +58,29 @@ source("../analyses/functions/string-functions.R", local = TRUE)
<!-- here we load the result csv file and compute numbers to update the diagrams -->
```{r, echo=FALSE, message=FALSE, warning=FALSE}
## load the CSV data
## load and filter (newest) the files automatically
## load and filter the (newest) files automatically
# the merge_analyses_sources table
merge_analyses_sources <- read_csv(get_newest_file("A_MergeAnalysesSources", "../analyses/A_MergeAnalysesSources/results"))
# the other analyses tables
PanelApp_genes <- read_csv(get_newest_file("01_PanelApp_genes", "../analyses/01_PanelApp/results"))
Literature_genes <- read_csv(get_newest_file("02_Literature_genes", "../analyses/02_Literature/results"))
DiagnosticPanels_genes <- read_csv(get_newest_file("03_DiagnosticPanels_genes", "../analyses/03_DiagnosticPanels/results"))
HPO_genes <- read_csv(get_newest_file("04_HPO_genes", "../analyses/04_HPO/results"))
PubTator_genes <- read_csv(get_newest_file("05_PubTator_genes", "../analyses/05_PubTator/results"))
# the high_evidence_annotated_table
high_evidence_annotated_table <- read_csv(get_newest_file("C_high_evidence_annotated_csv_table", "../analyses/C_AnnotateMergedTable/results"))
# compute numbers for the diagrams
all_genes <- merge_analyses_sources %>%
nrow()
high_evidence_genes <- merge_analyses_sources %>%
filter(evidence_count > 2) %>%
filter(evidence_count > 2)
high_evidence_genes_count <- high_evidence_genes %>%
nrow()
clingen_genes <- "XXX"
Expand All @@ -79,7 +89,7 @@ manualscoring_genes <- "YYY"
replace_strings("static/img/figures/raw/curation_flow_diagram_raw.drawio",
"static/img/figures/updated/curation_flow_diagram_current.drawio",
c("ALL_GENES", "HIGHEVIDENCE_GENES", "CLINGEN_GENES", "MANUALSCORING_GENES"),
c(all_genes, high_evidence_genes, clingen_genes, manualscoring_genes))
c(all_genes, high_evidence_genes_count, clingen_genes, manualscoring_genes))
# compute per source numbers with placeholders
summary_counts <- merge_analyses_sources %>%
Expand Down Expand Up @@ -185,8 +195,8 @@ To automatically group the genes, we will present the results of phenotypic and
<!-- TODO: Provide clustering and grouping results with numbers and proportions -->

The number of genes extracted from the five analyzed sources of information is as follows: (1) `r format_number(summary_counts$count_01_PanelApp)`, (2) `r format_number(summary_counts$count_02_Literature)`, (3) `r format_number(summary_counts$count_03_DiagnosticPanels)`, (4) `r format_number(summary_counts$count_04_HPO)`, and (5) `r format_number(summary_counts$count_05_PubTator)`
Notably, **`r format_number(high_evidence_genes)`** genes (`r percent(high_evidence_genes / all_genes, digits = 1)`) of the **total `r format_number(all_genes)`** genes are present in three or more of the analyzed information sources, thus meeting our evidence criteria, indicating high confidence and their potential for diagnostic use.
Of these high evidence genes, **`r format_number(genes_at_least_one_diagnostic_panel)`** (`r percent(genes_at_least_one_diagnostic_panel / high_evidence_genes, digits = 1)`) are present in at least one, and **`r format_number(genes_all_diagnostic_panels)`** (`r percent(genes_all_diagnostic_panels / high_evidence_genes, digits = 1)`) are present in all 10 comprehensive diagnostic laboratory panels.
Notably, **`r format_number(high_evidence_genes_count)`** genes (`r percent(high_evidence_genes_count / all_genes, digits = 1)`) of the **total `r format_number(all_genes)`** genes are present in three or more of the analyzed information sources, thus meeting our evidence criteria, indicating high confidence and their potential for diagnostic use.
Of these high evidence genes, **`r format_number(genes_at_least_one_diagnostic_panel)`** (`r percent(genes_at_least_one_diagnostic_panel / high_evidence_genes_count, digits = 1)`) are present in at least one, and **`r format_number(genes_all_diagnostic_panels)`** (`r percent(genes_all_diagnostic_panels / high_evidence_genes_count, digits = 1)`) are present in all 10 comprehensive diagnostic laboratory panels.

To ensure currency, Kidney-Genetics will be updated regularly and automatically at XXX week intervals. We will also provide phenotypic and functional clustering results to facilitate gene grouping.
<!-- TODO: Provide information about the planned update framework -->
Expand Down

0 comments on commit f42d5bc

Please sign in to comment.