Skip to content

Commit

Permalink
parse target ontology from mappings header, add nextflow tests
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed Dec 13, 2023
1 parent 62c96df commit e4cbf9a
Show file tree
Hide file tree
Showing 18 changed files with 8,083 additions and 12 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ jobs:
run: python -m pytest --cov=cmat --cov-append tests -k integration

- name: End-to-end test of evidence string generation pipeline
run: bash tests/output_generation/test_pipeline.sh
run: bash tests/pipelines/test_annotation_pipeline.sh

- name: End-to-end test of curation pipelines
run: bash tests/pipelines/test_curation_pipelines.sh

- name: Upload the coverage data to Coveralls
env:
Expand Down
2 changes: 1 addition & 1 deletion bin/trait_mapping/create_table_for_manual_curation.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def get_trait_status(uri, ontology):
try:
previous_comments = pd.read_csv(args.previous_comments, sep='\t', header=None)
previous_comments = dict(zip(previous_comments[0], previous_comments[1]))
except pd.errors.EmptyDataError:
except (FileNotFoundError, pd.errors.EmptyDataError):
previous_comments = {}

# Process all mappings which require manual curation
Expand Down
13 changes: 8 additions & 5 deletions pipelines/export_curation_spreadsheet.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

nextflow.enable.dsl=2

include { getTargetOntology } from './utils.nf'


def helpMessage() {
log.info"""
Expand Down Expand Up @@ -37,10 +39,11 @@ codeRoot = "${projectDir}/.."
workflow {
exportTable()
combineManualAndAutomated(exportTable.out.finishedMappings)
getTargetOntology(params.mappings)
stripMappingsHeader()
mergeWithLatestMappings(combineManualAndAutomated.out.newMappings, stripMappingsHeader.out.previousMappings)
checkDuplicates(mergeWithLatestMappings.out.newMappings)
addDateToHeader(checkDuplicates.out.duplicatesOk, mergeWithLatestMappings.out.newMappings)
addMappingsHeader(checkDuplicates.out.duplicatesOk, mergeWithLatestMappings.out.newMappings, getTargetOntology.out.targetOntology)
if (params.with_feedback) {
createEfoTable(exportTable.out.importTerms)
generateZoomaFeedback(mergeWithLatestMappings.out.newMappings)
Expand Down Expand Up @@ -81,7 +84,6 @@ process exportTable {

script:
"""
# TODO keep target ontology from header
grep -v "^#" ${params.mappings} > previous_mappings.tsv
"""
}
Expand Down Expand Up @@ -196,10 +198,9 @@ process checkDuplicates {
}

/*
* Add generated date to header of final mappings file.
* Add generated date and target ontology to header of final mappings file.
*/
// TODO add target ontology to header
process addDateToHeader {
process addMappingsHeader {
publishDir "${curationRoot}",
overwrite: true,
mode: "copy",
Expand All @@ -208,13 +209,15 @@ process addDateToHeader {
input:
val duplicatesOk
path newMappings
val targetOntology

output:
path "trait_names_to_ontology_mappings.tsv", emit: finalMappings

script:
"""
printf '#generated-date=%(%Y-%m-%d)T\n' > trait_names_to_ontology_mappings.tsv
printf '#ontology=${targetOntology}\n' >> trait_names_to_ontology_mappings.tsv
cat ${newMappings} >> trait_names_to_ontology_mappings.tsv
"""
}
Expand Down
8 changes: 4 additions & 4 deletions pipelines/generate_curation_spreadsheet.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

nextflow.enable.dsl=2

include { getTargetOntology } from './utils.nf'


def helpMessage() {
log.info"""
Expand Down Expand Up @@ -44,12 +46,10 @@ workflow {
} else {
clinvarXml = downloadClinvar()
}

// TODO get target ontology from mappings file
targetOntology = ...
getTargetOntology(params.mappings)
parseTraits(clinvarXml)
splitTraits(parseTraits.out.parsedTraits)
processTraits(splitTraits.out.traitChunk.flatten(), targetOntology)
processTraits(splitTraits.out.traitChunk.flatten(), getTargetOntology.out.targetOntology)
collectAutomatedMappings(processTraits.out.automatedTraits.collect())
collectCurationTraits(processTraits.out.traitsForCuration.collect())
createCurationTable(collectCurationTraits.out.curationTraits)
Expand Down
16 changes: 16 additions & 0 deletions pipelines/utils.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Extract target ontology from mappings file header. Defaults to EFO if missing.
*/
process getTargetOntology {
input:
path mappingsFile

output:
env ONTOLOGY, emit: targetOntology

script:
"""
ONTOLOGY=\$(grep '^#ontology=' ${mappingsFile} | sed 's/#ontology=//g')
ONTOLOGY=\${ONTOLOGY:-EFO}
"""
}
638 changes: 638 additions & 0 deletions tests/pipelines/resources/expected/automated_trait_mappings.tsv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tests/pipelines/resources/expected/curator_comments.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
gc1/gc2 polymorphism Test comment
18 changes: 18 additions & 0 deletions tests/pipelines/resources/expected/google_sheets_table.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
nephronophthisis 2 NT expansion http://purl.obolibrary.org/obo/MONDO_0019005|nephronophthisis|NOT_SPECIFIED|previously-used|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0019005|nephronophthisis|GOOD|eva-clinvar|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0019005|nephronophthisis|GOOD|eva-clinvar|EFO_CURRENT
gc1/gc2 polymorphism 1 NT expansion http://purl.obolibrary.org/obo/MONDO_0010739|Taqi polymorphism|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_118803|solute carrier family 25 member 22|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.3.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0009826|PA polymorphism of alpha-2-globulin|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_122340|guanylate cyclase 2D, retinal|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.3.owl|NOT_CONTAINED
inherited immunodeficiency diseases 2 http://identifiers.org/medgen/C5197805|Inherited Immunodeficiency Diseases|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/C5197805|Inherited Immunodeficiency Diseases|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/mesh/D000081207|Inherited Immunodeficiency Diseases|HIGH|clinvar-xrefs|NOT_CONTAINED
peroxisome biogenesis disorder 5a (zellweger) 2 http://www.orpha.net/ORDO/Orphanet_912|Zellweger syndrome|NOT_SPECIFIED|previously-used|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0013932|peroxisome biogenesis disorder 5A (Zellweger)|GOOD|eva-clinvar|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0013932|peroxisome biogenesis disorder 5A (Zellweger)|GOOD|eva-clinvar|EFO_CURRENT http://www.orpha.net/ORDO/Orphanet_912|Zellweger syndrome|GOOD|cttv|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0013932|peroxisome biogenesis disorder 5A (Zellweger)|GOOD|clinvar-xrefs|EFO_CURRENT http://www.orpha.net/ORDO/Orphanet_912|Zellweger syndrome|GOOD|clinvar-xrefs|EFO_CURRENT https://www.omim.org/entry/614866|Peroxisome biogenesis disorder 5A (Zellweger)|GOOD|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/C3553940|Peroxisome biogenesis disorder 5A (Zellweger)|GOOD|clinvar-xrefs|NOT_CONTAINED
frontometaphyseal dysplasia 2 http://purl.obolibrary.org/obo/MONDO_0015942|frontometaphyseal dysplasia|NOT_SPECIFIED|previously-used|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0015942|frontometaphyseal dysplasia|GOOD|eva-clinvar|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0015942|frontometaphyseal dysplasia|GOOD|eva-clinvar|EFO_CURRENT
chitotriosidase deficiency 1 http://purl.obolibrary.org/obo/MONDO_0013586|Chitotriosidase deficiency|NOT_SPECIFIED|previously-used|EFO_OBSOLETE
congenital muscular dystrophy, alpha-dystroglycan related 1 http://identifiers.org/medgen/CN239202|Congenital Muscular Dystrophy, alpha-dystroglycan related|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/CN239202|Congenital Muscular Dystrophy, alpha-dystroglycan related|HIGH|clinvar-xrefs|NOT_CONTAINED
corneal dystrophy, recessive 1 http://identifiers.org/medgen/CN239343|Corneal Dystrophy, Recessive|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/CN239343|Corneal Dystrophy, Recessive|HIGH|clinvar-xrefs|NOT_CONTAINED
peroxisome biogenesis disorder 10a (zellweger) 1 http://purl.obolibrary.org/obo/MONDO_0013948|peroxisome biogenesis disorder 10A (Zellweger)|GOOD|eva-clinvar|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0013948|peroxisome biogenesis disorder 10A (Zellweger)|GOOD|eva-clinvar|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0013948|peroxisome biogenesis disorder 10A (Zellweger)|GOOD|clinvar-xrefs|EFO_CURRENT http://www.orpha.net/ORDO/Orphanet_912|Zellweger syndrome|GOOD|clinvar-xrefs|EFO_CURRENT http://identifiers.org/medgen/C3553999|Peroxisome biogenesis disorder 10A (Zellweger)|GOOD|clinvar-xrefs|NOT_CONTAINED https://www.omim.org/entry/614882|Peroxisome biogenesis disorder 10A (Zellweger)|GOOD|clinvar-xrefs|NOT_CONTAINED
severe myoclonic epilepsy in infancy 1 http://www.orpha.net/ORDO/Orphanet_33069|Dravet syndrome|NOT_SPECIFIED|previously-used|EFO_OBSOLETE http://purl.obolibrary.org/obo/MONDO_0014960|encephalopathy, progressive, early-onset, with brain edema and/or leukoencephalopathy|NOT_SPECIFIED|replacement|NOT_CONTAINED http://www.ebi.ac.uk/efo/EFO_1001900|myoclonic epilepsy|2|Orphanet:33069|EFO_CURRENT http://purl.obolibrary.org/obo/HP_0002123|Generalized myoclonic seizure|2|Orphanet:33069|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0100079|developmental and epileptic encephalopathy, 6|2|Orphanet:33069|EFO_CURRENT http://purl.obolibrary.org/obo/HP_0011170|Generalized myoclonic-atonic seizure|2|Orphanet:33069|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0100135|Dravet syndrome|2|Orphanet:33069|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0014328|developmental and epileptic encephalopathy, 19|2|Orphanet:33069|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0012812|developmental and epileptic encephalopathy, 4|2|Orphanet:33069|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0100062|developmental and epileptic encephalopathy|3|Orphanet:33069|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0016022|early myoclonic encephalopathy|3|Orphanet:33069|EFO_CURRENT
isolated nonsyndromic congenital heart disease 1 http://identifiers.org/medgen/CN239319|Isolated Nonsyndromic Congenital Heart Disease|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/CN239319|Isolated Nonsyndromic Congenital Heart Disease|HIGH|clinvar-xrefs|NOT_CONTAINED
hypogonadotropic hypogonadism 3 with or without anosmia 1 http://purl.obolibrary.org/obo/MONDO_0009482|hypogonadotropic hypogonadism 3 with or without anosmia|GOOD|clinvar-xrefs|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0009482|hypogonadotropic hypogonadism 3 with or without anosmia|GOOD|clinvar-xrefs|EFO_CURRENT http://identifiers.org/medgen/C3550478|Hypogonadotropic hypogonadism 3 with or without anosmia|GOOD|clinvar-xrefs|NOT_CONTAINED https://www.omim.org/entry/244200|Hypogonadotropic hypogonadism 3 with or without anosmia|GOOD|clinvar-xrefs|NOT_CONTAINED
elfn1-related condition 1 http://purl.obolibrary.org/obo/MONDO_0045054|cancer-related condition|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0021074|precancerous condition|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://purl.obolibrary.org/obo/HP_0025256|Ameliorated by heat|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/HP_0032522|Ameliorated by immunosuppresion|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_568065|EPHB4-related lymphatic-related hydrops fetalis|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.3.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_471012|RAS related|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.3.owl|NOT_CONTAINED
chédiak-higashi syndrome 1 http://www.orpha.net/ORDO/Orphanet_167|Chédiak-Higashi syndrome|NOT_SPECIFIED|previously-used|EFO_CURRENT
2-aminoadipic 2-oxoadipic aciduria 1 http://purl.obolibrary.org/obo/MONDO_0008774|2-aminoadipic 2-oxoadipic aciduria|NOT_SPECIFIED|previously-used|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0008774|2-aminoadipic 2-oxoadipic aciduria|GOOD|eva-clinvar|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0008774|2-aminoadipic 2-oxoadipic aciduria|GOOD|eva-clinvar|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0008774|2-aminoadipic 2-oxoadipic aciduria|GOOD|clinvar-xrefs|EFO_CURRENT https://www.omim.org/entry/204750|2-aminoadipic 2-oxoadipic aciduria|GOOD|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/C1859817|2-aminoadipic 2-oxoadipic aciduria|GOOD|clinvar-xrefs|NOT_CONTAINED
tp63-related spectrum disorders 1 http://identifiers.org/medgen/CN239305|TP63-Related Spectrum Disorders|HIGH|clinvar-xrefs|NOT_CONTAINED http://identifiers.org/medgen/CN239305|TP63-Related Spectrum Disorders|HIGH|clinvar-xrefs|NOT_CONTAINED
autosomal dominant kcnq1-related disease 1 http://purl.obolibrary.org/obo/MONDO_0018832|HTRA1-related autosomal dominant cerebral small vessel disease|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://purl.obolibrary.org/obo/MONDO_0000426|autosomal dominant disease|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|EFO_CURRENT http://www.orpha.net/ORDO/Orphanet_482077|HTRA1-related autosomal dominant cerebral small vessel disease|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.3.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_497757|MME-related autosomal dominant Charcot Marie Tooth disease type 2|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.3.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/HP_0000006|Autosomal dominant inheritance|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/HP_0003743|Genetic anticipation|MEDIUM|http://purl.obolibrary.org/obo/hp/hp-international.owl|NOT_CONTAINED
simvastatin response - toxicity 1 http://www.orpha.net/ORDO/Orphanet_240913|OBSOLETE: Simvastatin toxicity|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.3.owl|NOT_CONTAINED http://www.orpha.net/ORDO/Orphanet_529831|Letrozole toxicity|MEDIUM|https://www.orphadata.com/data/ontologies/ordo/last_version/ORDO_en_4.3.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0027653|abacavir toxicity|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED http://purl.obolibrary.org/obo/MONDO_0023176|formaldehyde poisoning|MEDIUM|http://purl.obolibrary.org/obo/mondo.owl|NOT_CONTAINED
Loading

0 comments on commit e4cbf9a

Please sign in to comment.