Skip to content

Commit

Permalink
Merge pull request #61 from anergictcell/feature/orphanet
Browse files Browse the repository at this point in the history
Feature/orphanet
  • Loading branch information
anergictcell authored Jun 10, 2024
2 parents 8c83766 + 8df1093 commit d831c08
Show file tree
Hide file tree
Showing 37 changed files with 1,473 additions and 429 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@

All notable changes to this project will be documented in this file.

## Unreleased

### Feature

- Add Orphante diseases (`OrphaDisease`) to Ontology
- Filter gene and disease annotations in subontology based on association with phenotypes
- Add binary version 3
- Add new example ontology

### Refactor

- [**breaking**] Add `Disease` trait that is needed to work with `OmimDisease` and `OrphaDisease`
- Update example ontology
- Update unit- and doctests to align with updated example ontology

## [0.9.0] - 2024-03-27

### Bugfix
Expand Down
14 changes: 12 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ The main structs used in `hpo` are:
- [`HpoSet`](https://docs.rs/hpo/latest/hpo/struct.HpoSet.html) is a collection of `HpoTerm`s, like a patient's clinical information.
- [`Gene`](https://docs.rs/hpo/latest/hpo/annotations/struct.Gene.html) represents a single gene, including information about associated `HpoTerm`s.
- [`OmimDisease`](https://docs.rs/hpo/latest/hpo/annotations/struct.OmimDisease.html) represents a single OMIM-diseases, including information about associated `HpoTerm`s.
- [`OrphaDisease`](https://docs.rs/hpo/latest/hpo/annotations/struct.OrphaDisease.html) represents a single ORPHA-diseases, including information about associated `HpoTerm`s.

The most relevant modules are:
- [`annotations`](https://docs.rs/hpo/latest/hpo/annotations/index.html) contains the `Gene` and `OmimDisease` structs, and some related important types.
- [`annotations`](https://docs.rs/hpo/latest/hpo/annotations/index.html) contains the `Gene`, `OmimDisease` and `OrphaDisease` structs, and some related important types.
- [`similarity`](https://docs.rs/hpo/latest/hpo/similarity/index.html) contains structs and helper functions for similarity comparisons for `HpoTerm` and `HpoSet`.
- [`stats`](https://docs.rs/hpo/latest/hpo/stats/index.html) contains functions to calculate the hypergeometric enrichment score of genes or diseases.

Expand Down Expand Up @@ -67,7 +68,7 @@ Finally, load the data using [`Ontology::from_binary`]:
### Ontology
```rust
use hpo::{Ontology, HpoTermId};
use hpo::annotations::{GeneId, OmimDiseaseId};
use hpo::annotations::{GeneId, OmimDiseaseId, OrphaDiseaseId};

fn example() {
let ontology = Ontology::from_standard("/path/to/master-data/").unwrap();
Expand All @@ -87,6 +88,11 @@ fn example() {
// do something with disease
}

// iterate orpha diseases
for disease in ontology.orpha_diseases() {
// do something with disease
}

// get a single HPO term using HPO ID
let hpo_id = HpoTermId::try_from("HP:0000123").unwrap();
let term = ontology.hpo(hpo_id);
Expand All @@ -98,6 +104,10 @@ fn example() {
let disease_id = OmimDiseaseId::from(12345u32);
let disease = ontology.omim_disease(&disease_id);

// get a single Orpha disease
let disease_id = OrphaDiseaseId::from(12345u32);
let disease = ontology.orpha_disease(&disease_id);

// get a single Gene
let hgnc_id = GeneId::from(12345u32);
let gene = ontology.gene(&hgnc_id);
Expand Down
1 change: 1 addition & 0 deletions examples/bench_compare_gene_to_diseases.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::path::Path;

use rayon::prelude::*;

use hpo::annotations::Disease;
use hpo::similarity::GroupSimilarity;
use hpo::{HpoSet, Ontology};

Expand Down
15 changes: 14 additions & 1 deletion examples/compare_ontologies.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//! This is helpful for checking correctness of the parser modules
//! or to see changes after a new HPO release
use hpo::annotations::Disease;
use hpo::comparison::Comparison;
use hpo::{HpoTermId, Ontology};
use std::{path::Path, process};
Expand Down Expand Up @@ -52,6 +53,14 @@ fn overview(diffs: &Comparison) {
for disease in diffs.removed_omim_diseases() {
println!("Removed\t{}\t{}", disease.id(), disease.name());
}

for disease in diffs.added_orpha_diseases() {
println!("Added\t{}\t{}", disease.id(), disease.name());
}

for disease in diffs.removed_orpha_diseases() {
println!("Removed\t{}\t{}", disease.id(), disease.name());
}
}

/// Prints info about Term-specific changes
Expand Down Expand Up @@ -141,7 +150,11 @@ fn changed_genes(diffs: &Comparison) {
/// Prints info about Gene-specific changes
fn changed_diseases(diffs: &Comparison) {
println!("#Disease Delta\tID\tOld Name:New Name\tn Terms Old\tn Terms New\tAdded Terms\tRemoved Terms");
for disease in diffs.changed_omim_diseases() {
for disease in diffs
.changed_omim_diseases()
.iter()
.chain(diffs.changed_orpha_diseases().iter())
{
print!("Delta\t{}", disease.id());
if let Some(names) = disease.changed_name() {
print!("\t{}:{}", names.0, names.1);
Expand Down
2 changes: 1 addition & 1 deletion examples/dendogram.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::env::Args;
use std::path::Path;

use hpo::annotations::OmimDisease;
use hpo::annotations::{Disease, OmimDisease};
use rayon::prelude::*;

use hpo::similarity::GroupSimilarity;
Expand Down
34 changes: 33 additions & 1 deletion examples/disease_similarity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::io::Write;
use std::{env::Args, time::SystemTime};

use hpo::{
annotations::{OmimDisease, OmimDiseaseId},
annotations::{Disease, OmimDisease, OmimDiseaseId},
similarity::{GraphIc, GroupSimilarity, StandardCombiner},
term::HpoGroup,
HpoSet, HpoTermId, Ontology,
Expand Down Expand Up @@ -120,6 +120,29 @@ fn cross_compare_diseases(
}
}

fn compare_omim_to_orpha(ontology: &Ontology, sim: &GroupSimilarity<GraphIc, StandardCombiner>) {
let omim: Vec<&OmimDisease> = ontology.omim_diseases().collect();

let omim_names: Vec<&str> = omim.iter().map(|d| d.name()).collect();

let omim_sets: Vec<HpoSet> = omim.iter().map(|d| d.to_hpo_set(ontology)).collect();

println!("Orpha\\Omim\t{}", omim_names.join("\t"));

ontology
.orpha_diseases()
.take(100)
.par_bridge()
.for_each(|orpha| {
let orpha_set = orpha.to_hpo_set(ontology);
let mut row = orpha.name().to_string();
for omim_set in omim_sets.iter() {
row.push_str(&format!("\t{}", sim.calculate(&orpha_set, omim_set)));
}
println!("{row}");
})
}

fn main() {
let ontology = Ontology::from_binary("tests/ontology.hpo").unwrap();
let combiner = StandardCombiner::FunSimAvg;
Expand All @@ -135,6 +158,12 @@ fn main() {
if let Ok(num) = arg.parse::<usize>() {
// integer provided, using disease x disease comparisons
cross_compare_diseases(&ontology, &sim, num);
} else if arg == "orpha" {
let sim = GroupSimilarity::new(
StandardCombiner::FunSimAvg,
GraphIc::new(hpo::term::InformationContentKind::Gene),
);
compare_omim_to_orpha(&ontology, &sim);
} else {
// List of HPO terms provided
compare_custom_set_to_diseases(&ontology, &sim, arg);
Expand All @@ -153,6 +182,9 @@ fn main() {
- Cross compare N diseases\n\
disease_similarity <NUMBER OF COMPARISONS>\n\
disease_similarity 20
- Compare all OMIM to all ORPHA diseases\n\
disease_similarity orpha\n\
disease_similarity orpha
");
}
}
Expand Down
6 changes: 5 additions & 1 deletion examples/enrichment.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
use std::{env::Args, process};

use hpo::{annotations::OmimDiseaseId, term::HpoGroup, HpoResult, HpoSet, HpoTermId, Ontology};
use hpo::{
annotations::{Disease, OmimDiseaseId},
term::HpoGroup,
HpoResult, HpoSet, HpoTermId, Ontology,
};

/// Tries to parse an HpoTermId from a string `HP:0007768` or a `u32`
fn id_from_freetext(value: &str) -> HpoResult<HpoTermId> {
Expand Down
153 changes: 153 additions & 0 deletions examples/gene_similarity.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
use rayon::prelude::*;
use std::io;
use std::io::Write;
use std::{env::Args, time::SystemTime};

use hpo::{
annotations::{Gene, GeneId},
similarity::{GraphIc, GroupSimilarity, StandardCombiner},
term::HpoGroup,
HpoSet, HpoTermId, Ontology,
};

/// Calculates the similarity score of two diseases
/// The two diseases are specified as OMIM-ID via CLI arguments
fn compare_two_genes(
ontology: &Ontology,
sim: &GroupSimilarity<GraphIc, StandardCombiner>,
mut args: Args,
) {
let symbol_a = args.nth(1).unwrap();
let symbol_b = args.next().unwrap();

let gene_a = ontology
.gene_by_name(&symbol_a)
.expect("The first gene is not part of the Ontology");
let gene_b = ontology
.gene_by_name(&symbol_b)
.expect("The second gene is not part of the Ontology");

let set_a = gene_a.to_hpo_set(ontology);
let set_b = gene_b.to_hpo_set(ontology);

let res = sim.calculate(&set_a, &set_b);
println!("Similarity is {res}");
}

/// Calculates the similarity score of a custom HPO-Set
/// to all OMIM diseases
/// The HPO-Set is specified as a comma separated list of HPO-Term-IDs
fn compare_custom_set_to_genes(
ontology: &Ontology,
sim: &GroupSimilarity<GraphIc, StandardCombiner>,
terms: String,
) {
let hpo_terms = terms.split(',');
let mut group = HpoGroup::default();
for t in hpo_terms {
group.insert(HpoTermId::try_from(t).expect("Invalid HpoTermId"));
}
let set_a = HpoSet::new(ontology, group);

let start = SystemTime::now();
let mut results: Vec<(&Gene, f32)> = ontology
.genes()
.par_bridge()
.map(|gene| {
let res = sim.calculate(&set_a, &gene.to_hpo_set(ontology));
(gene, res)
})
.collect();
let end = SystemTime::now();
let duration = end.duration_since(start).unwrap();

println!(
"Number of comparisons: {} in {} sec",
results.len(),
duration.as_secs()
);
results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());

let mut stdout = io::stdout().lock();
for x in results {
stdout
.write_all(format!("{}\t{}\t{}\n", x.0.id(), x.0.name(), x.1).as_bytes())
.unwrap();
}
}

/// Calculate the pairwise similarity of all diseases to <num> other
/// diseases.
fn cross_compare_genes(
ontology: &Ontology,
sim: &GroupSimilarity<GraphIc, StandardCombiner>,
num: usize,
) {
let start = SystemTime::now();
let results: Vec<(GeneId, GeneId, f32)> = ontology
.genes()
.par_bridge()
.flat_map(|gene_a| {
ontology
.genes()
.take(num)
.map(|gene_b| {
let res =
sim.calculate(&gene_a.to_hpo_set(ontology), &gene_b.to_hpo_set(ontology));
(*gene_a.id(), *gene_b.id(), res)
})
.collect::<Vec<(GeneId, GeneId, f32)>>()
})
.collect();
let end = SystemTime::now();
let duration = end.duration_since(start).unwrap();

println!(
"Number of comparisons: {} in {} sec",
results.len(),
duration.as_secs()
);

let mut stdout = io::stdout().lock();
for x in results {
stdout
.write_all(format!("{}\t{}\t{}\n", x.0, x.1, x.2).as_bytes())
.unwrap();
}
}

fn main() {
let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
let sim = GroupSimilarity::default();

let mut args = std::env::args();

match args.len() {
3 => compare_two_genes(&ontology, &sim, args),
2 => {
let arg = args.nth(1).unwrap();
if let Ok(num) = arg.parse::<usize>() {
// integer provided, using disease x disease comparisons
cross_compare_genes(&ontology, &sim, num);
} else {
// List of HPO terms provided
compare_custom_set_to_genes(&ontology, &sim, arg);
}
}
_ => {
println!("Calculate similarities of genes\n\n");
println!("\
There are 3 different options:\n\
- Compare 2 genes\n\
gene_similarity <Gene symbol> <Gene symbol>\n\
gene_similarity 618395 615368\n\n\
- Compare all genes to a custom HPO-Set\n\
gene_similarity <HPO-TERM-IDs>\n\
gene_similarity HP:0000750,HP:0000752,HP:0001249,HP:0007018,HP:0010818,HP:0011463\n\n\
- Cross compare N genes\n\
gene_similarity <NUMBER OF COMPARISONS>\n\
gene_similarity 20
");
}
}
}
7 changes: 4 additions & 3 deletions examples/search_by_name.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
//! Prints every term and its associated genes
use hpo::annotations::Disease;
use hpo::Ontology;

fn main() {
let ontology = Ontology::from_binary("tests/ontology.hpo").unwrap();
let cystinosis = ontology.omim_disease_by_name("Cystinosis").unwrap();
println!("first match: {:?}", cystinosis.name());
for result in ontology.omim_diseases_by_name("Cystinosis") {
let congenital = ontology.omim_disease_by_name("congenital").unwrap();
println!("first match: {:?}", congenital.name());
for result in ontology.omim_diseases_by_name("congenital") {
println!("{:?}", result.name());
}
}
Loading

0 comments on commit d831c08

Please sign in to comment.