Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missing orpha features #63

Merged
merged 2 commits into from
Jun 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.

## Unreleased

## [0.10.1]

### Refactor

- Add some missing methods for Orpha diseases


## [0.10.0]

### Feature
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "hpo"
version = "0.10.0"
version = "0.10.1"
edition = "2021"
authors = ["Jonas Marcello <[email protected]>"]
description = "Human Phenotype Ontology Similarity"
Expand Down
6 changes: 3 additions & 3 deletions examples/bench_disease_enrichment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::path::Path;

use rayon::prelude::*;

use hpo::stats::hypergeom::disease_enrichment;
use hpo::stats::hypergeom::omim_disease_enrichment;
use hpo::Ontology;

const GENES: [&str; 6] = ["GBA1", "NPC1", "EZH2", "DMD", "MUC7", "ARID1B"];
Expand Down Expand Up @@ -34,7 +34,7 @@ fn multi_threaded_enrichment(ontology: &Ontology) -> usize {
.unwrap_or_else(|| panic!("{gene} does not exist"))
.to_hpo_set(ontology);
total
+ disease_enrichment(ontology, &set)
+ omim_disease_enrichment(ontology, &set)
.iter()
.fold(0, |count, enrichment| {
if enrichment.pvalue() < 0.0000005 && enrichment.enrichment() > 100.0 {
Expand All @@ -55,7 +55,7 @@ fn single_threaded_enrichment(ontology: &Ontology) -> usize {
.unwrap_or_else(|| panic!("{gene} does not exist"))
.to_hpo_set(ontology);
total
+ disease_enrichment(ontology, &set)
+ omim_disease_enrichment(ontology, &set)
.iter()
.fold(0, |count, enrichment| {
if enrichment.pvalue() < 0.0000005 && enrichment.enrichment() > 100.0 {
Expand Down
2 changes: 1 addition & 1 deletion examples/enrichment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ fn gene_enrichments(ontology: &Ontology, hposet: &HpoSet, output_len: usize) {

/// Prints diseases that are enriched in the hposet
fn disease_enrichments(ontology: &Ontology, hposet: &HpoSet, output_len: usize) {
let mut disease_enrichments = hpo::stats::hypergeom::disease_enrichment(ontology, hposet);
let mut disease_enrichments = hpo::stats::hypergeom::omim_disease_enrichment(ontology, hposet);

disease_enrichments.sort_by(|a, b| {
a.pvalue()
Expand Down
36 changes: 35 additions & 1 deletion src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use std::collections::HashMap;

use crate::annotations::Genes;
use crate::annotations::OmimDiseases;
use crate::annotations::{OmimDiseases, OrphaDiseases};
use crate::similarity::GroupSimilarity;
use crate::similarity::Similarity;
use crate::similarity::SimilarityCombiner;
Expand Down Expand Up @@ -516,6 +516,40 @@ impl<'a> HpoSet<'a> {
.fold(OmimDiseases::default(), |acc, element| &acc | element)
}

/// Returns all [`crate::annotations::OrphaDiseaseId`]s that are associated to the set
///
/// # Panics
///
/// When an `HpoTermId` of the set is not part of the Ontology
///
/// # Examples
///
/// ```
/// use hpo::{Ontology, HpoSet};
/// use hpo::term::HpoGroup;
///
/// let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
///
/// let mut hpos = HpoGroup::new();
/// hpos.insert(12639u32);
/// hpos.insert(818u32);
/// let set = HpoSet::new(&ontology, hpos);
/// for d in set.orpha_disease_ids() {println!("{}", d);};
/// // `Papilloma of choroid plexus (ORPHA:2807)` is linked to `HP:0012639`
/// assert!(set.orpha_disease_ids().contains(&2807u32.into()));
/// ```
pub fn orpha_disease_ids(&self) -> OrphaDiseases {
self.group
.iter()
.map(|term_id| {
self.ontology
.get(term_id)
.expect("HpoTermId must be in Ontology")
.orpha_diseases()
})
.fold(OrphaDiseases::default(), |acc, diseases| &acc | diseases)
}

/// Returns the counts of all categories in the set
///
/// # Examples
Expand Down
19 changes: 16 additions & 3 deletions src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
use std::collections::HashMap;
use std::marker::PhantomData;

use crate::annotations::{AnnotationId, Disease, GeneId, OmimDiseaseId};
use crate::annotations::{AnnotationId, Disease, GeneId, OmimDiseaseId, OrphaDiseaseId};
use crate::HpoTerm;

pub mod hypergeom;
Expand All @@ -25,7 +25,7 @@ pub use linkage::Linkage;
/// The fold enrichment and p-value for an enriched Gene or Disease
///
/// [`Enrichment`] is returned from statistics enrichment methods, such as
/// [`hypergeom::gene_enrichment`] and [`hypergeom::disease_enrichment`].
/// [`hypergeom::gene_enrichment`], [`hypergeom::omim_disease_enrichment`] and [`hypergeom::orpha_disease_enrichment`].
///
#[derive(Debug)]
pub struct Enrichment<T> {
Expand Down Expand Up @@ -143,7 +143,7 @@ impl<'a> SampleSet<GeneId> {

impl<'a> SampleSet<OmimDiseaseId> {
/// Constructs a new `SampleSet` with disease counts from an iterator of [`HpoTerm`]s
pub fn disease<I: IntoIterator<Item = HpoTerm<'a>>>(terms: I) -> Self {
pub fn omim_disease<I: IntoIterator<Item = HpoTerm<'a>>>(terms: I) -> Self {
let term2omimid = |term: HpoTerm<'a>| term.omim_diseases().map(|d| d.id().as_u32());
let (size, counts) = calculate_counts(terms, term2omimid);
Self {
Expand All @@ -154,6 +154,19 @@ impl<'a> SampleSet<OmimDiseaseId> {
}
}

impl<'a> SampleSet<OrphaDiseaseId> {
/// Constructs a new `SampleSet` with disease counts from an iterator of [`HpoTerm`]s
pub fn orpha_disease<I: IntoIterator<Item = HpoTerm<'a>>>(terms: I) -> Self {
let term2omimid = |term: HpoTerm<'a>| term.orpha_diseases().map(|d| d.id().as_u32());
let (size, counts) = calculate_counts(terms, term2omimid);
Self {
size,
counts,
phantom: PhantomData,
}
}
}

impl<T: AnnotationId> SampleSet<T> {
/// Returns the total number of [`HpoTerm`]s in the [`SampleSet`]
fn len(&self) -> u64 {
Expand Down
2 changes: 1 addition & 1 deletion src/stats/hypergeom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,5 @@
mod disease;
mod gene;
mod statrs;
pub use disease::disease_enrichment;
pub use disease::{omim_disease_enrichment, orpha_disease_enrichment};
pub use gene::gene_enrichment;
142 changes: 88 additions & 54 deletions src/stats/hypergeom/disease.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
use tracing::debug;

use crate::annotations::OmimDiseaseId;
use crate::annotations::{AnnotationId, OmimDiseaseId, OrphaDiseaseId};
use crate::stats::hypergeom::statrs::Hypergeometric;
use crate::stats::{f64_from_u64, Enrichment, SampleSet};
use crate::HpoTerm;

/// Calculates the hypergeometric enrichment of diseases within the `set` compared to the `background`
/// Calculates the hypergeometric enrichment of OMIM diseases within the `set` compared to the `background`
///
/// # Examples
///
/// ```
/// use hpo::Ontology;
/// use hpo::{HpoSet, term::HpoGroup};
/// use hpo::stats::hypergeom::disease_enrichment;
/// use hpo::stats::hypergeom::omim_disease_enrichment;
///
/// let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
///
/// let gene = ontology.gene_by_name("KRAS").unwrap();
/// let gene_hpo_set = gene.to_hpo_set(&ontology);
///
/// let mut enrichments = disease_enrichment(&ontology, &gene_hpo_set);
/// let mut enrichments = omim_disease_enrichment(&ontology, &gene_hpo_set);
///
/// // the results are not sorted by default
/// enrichments.sort_by(|a, b| {
Expand All @@ -28,60 +28,94 @@ use crate::HpoTerm;
/// assert!(enrichments.first().unwrap().pvalue() < enrichments.last().unwrap().pvalue());
/// assert!(enrichments.first().unwrap().enrichment() > enrichments.last().unwrap().enrichment());
/// ```
pub fn disease_enrichment<'a, T, U>(background: T, set: U) -> Vec<Enrichment<OmimDiseaseId>>
pub fn omim_disease_enrichment<'a, T, U>(background: T, set: U) -> Vec<Enrichment<OmimDiseaseId>>
where
T: IntoIterator<Item = HpoTerm<'a>>,
U: IntoIterator<Item = HpoTerm<'a>>,
{
fn inner_disease_enrichment(
background: &SampleSet<OmimDiseaseId>,
sample_set: &SampleSet<OmimDiseaseId>,
) -> Vec<Enrichment<OmimDiseaseId>> {
let mut res = Vec::new();
for (disease, observed_successes) in sample_set {
if observed_successes == 0 {
debug!("Skipping {}", disease);
continue;
}
let successes = background
.get(&disease)
.expect("disease must be present in background set");
let hyper = Hypergeometric::new(
// Total number of HPOTerms in the Ontology
// ==> population
background.len(),
// Number of terms in the Ontology that are associated to the disease
// ==> successes
*successes,
// Number of terms in the set
// ==> draws
sample_set.len(),
)
.expect("the set must not be larger than the background");
// subtracting 1, because we want to test including observed_successes
// e.g. "7 or more", but sf by default calculates "more than 7"
let pvalue = hyper.sf(observed_successes - 1);
let enrichment = (f64_from_u64(observed_successes) / f64_from_u64(sample_set.len()))
/ (f64_from_u64(*successes) / f64_from_u64(background.len()));
res.push(Enrichment::annotation(
disease,
pvalue,
observed_successes,
enrichment,
));
debug!(
"Disease:{}\tPopulation: {}, Successes: {}, Draws: {}, Observed: {}",
disease,
background.len(),
successes,
sample_set.len(),
observed_successes
);
}
res
}
let background = SampleSet::omim_disease(background);
let sample_set = SampleSet::omim_disease(set);
inner_disease_enrichment(&background, &sample_set)
}

let background = SampleSet::disease(background);
let sample_set = SampleSet::disease(set);
/// Calculates the hypergeometric enrichment of ORPHA diseases within the `set` compared to the `background`
///
/// # Examples
///
/// ```
/// use hpo::Ontology;
/// use hpo::{HpoSet, term::HpoGroup};
/// use hpo::stats::hypergeom::orpha_disease_enrichment;
///
/// let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
///
/// let gene = ontology.gene_by_name("KRAS").unwrap();
/// let gene_hpo_set = gene.to_hpo_set(&ontology);
///
/// let mut enrichments = orpha_disease_enrichment(&ontology, &gene_hpo_set);
///
/// // the results are not sorted by default
/// enrichments.sort_by(|a, b| {
/// a.pvalue().partial_cmp(&b.pvalue()).unwrap()
/// });
/// assert!(enrichments.first().unwrap().pvalue() < enrichments.last().unwrap().pvalue());
/// assert!(enrichments.first().unwrap().enrichment() > enrichments.last().unwrap().enrichment());
/// ```
pub fn orpha_disease_enrichment<'a, T, U>(background: T, set: U) -> Vec<Enrichment<OrphaDiseaseId>>
where
T: IntoIterator<Item = HpoTerm<'a>>,
U: IntoIterator<Item = HpoTerm<'a>>,
{
let background = SampleSet::orpha_disease(background);
let sample_set = SampleSet::orpha_disease(set);
inner_disease_enrichment(&background, &sample_set)
}

#[inline]
fn inner_disease_enrichment<ID: AnnotationId>(
background: &SampleSet<ID>,
sample_set: &SampleSet<ID>,
) -> Vec<Enrichment<ID>> {
let mut res = Vec::new();
for (disease, observed_successes) in sample_set {
if observed_successes == 0 {
debug!("Skipping {}", disease);
continue;
}
let successes = background
.get(&disease)
.expect("disease must be present in background set");
let hyper = Hypergeometric::new(
// Total number of HPOTerms in the Ontology
// ==> population
background.len(),
// Number of terms in the Ontology that are associated to the disease
// ==> successes
*successes,
// Number of terms in the set
// ==> draws
sample_set.len(),
)
.expect("the set must not be larger than the background");
// subtracting 1, because we want to test including observed_successes
// e.g. "7 or more", but sf by default calculates "more than 7"
let pvalue = hyper.sf(observed_successes - 1);
let enrichment = (f64_from_u64(observed_successes) / f64_from_u64(sample_set.len()))
/ (f64_from_u64(*successes) / f64_from_u64(background.len()));
res.push(Enrichment::annotation(
disease,
pvalue,
observed_successes,
enrichment,
));
debug!(
"Disease:{}\tPopulation: {}, Successes: {}, Draws: {}, Observed: {}",
disease,
background.len(),
successes,
sample_set.len(),
observed_successes
);
}
res
}
Loading