Skip to content

Commit

Permalink
CVX and MeSH mappings (#147)
Browse files Browse the repository at this point in the history
CPT mappings to MeSH were problematic since CPT names its things without
the word "vaccine", so it will require a more custom script to make good
predictions.

I did several curations, and left 48 left for later.
  • Loading branch information
cthoyt authored Aug 23, 2023
1 parent 79a98f9 commit 340a87d
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 0 deletions.
42 changes: 42 additions & 0 deletions scripts/generate_vaccine_mappings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Generate vaccine mappings."""

import click
from pyobo.sources.cpt import iter_terms

from biomappings import PredictionTuple
from biomappings.gilda_utils import append_gilda_predictions, get_grounder
from biomappings.resources import append_prediction_tuples
from biomappings.utils import get_script_url


@click.command()
def main():
"""Generate vaccine mappings."""
provenance = get_script_url(__file__)
append_gilda_predictions("cvx", ["mesh", "cpt", "vo"], provenance=provenance)
append_gilda_predictions("cpt", ["mesh", "vo"], provenance=provenance)

preds = []
grounder = get_grounder(["mesh", "vo"], versions=["2023", None])
for term in iter_terms():
texts = [term.name, *(s.name for s in term.synonyms)]
for text in texts:
for scored_match in grounder.ground(text + " vaccine"):
pred = PredictionTuple(
source_prefix=term.prefix,
source_id=term.identifier,
source_name=term.name,
relation="skos:exactMatch",
target_prefix=scored_match.term.db,
target_identifier=scored_match.term.id,
target_name=scored_match.term.entry_name,
type="semapv:LexicalMatching",
confidence=0.9,
source=provenance,
)
preds.append(pred)
append_prediction_tuples(preds)


if __name__ == "__main__":
main()
9 changes: 9 additions & 0 deletions src/biomappings/resources/incorrect.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,15 @@ clo 0037261 3T3-derived cell skos:exactMatch mesh D016475 3T3 Cells semapv:Manua
clo 0037287 COR123 cell skos:exactMatch efo 0002142 CORL23 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0051004 RCB0256 cell skos:exactMatch cellosaurus CVCL_1075 BALL-1 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0051005 RCB1882 cell skos:exactMatch cellosaurus CVCL_1075 BALL-1 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
cpt 90389 TIG skos:exactMatch vo 0010969 Tig semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/e55031/scripts/generate_vaccine_mappings.py 0.762
cpt 90581 anthrax skos:exactMatch mesh D000881 Anthrax semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cpt 90585 BCG skos:exactMatch mesh D009163 Mycobacterium bovis semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.556
cpt 90665 Lyme disease skos:exactMatch mesh D008193 Lyme Disease semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.772
cpt 90704 mumps skos:exactMatch mesh D009107 Mumps semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cpt 90705 measles skos:exactMatch mesh D008457 Measles semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cpt 90706 rubella skos:exactMatch mesh D012409 Rubella semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cpt 90716 varicella skos:exactMatch mesh D002644 Chickenpox semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.54
cpt 90727 plague skos:exactMatch mesh D010930 Plague semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
doid DOID:0001816 angiosarcoma skos:exactMatch mesh D006394 Hemangiosarcoma semapv:ManualMappingCuration orcid:0000-0003-4423-4370
doid DOID:0001816 angiosarcoma skos:exactMatch umls C0018923 Hemangiosarcoma semapv:ManualMappingCuration orcid:0000-0003-4423-4370
doid DOID:0001816 angiosarcoma skos:exactMatch umls C0278592 Adult Angiosarcoma semapv:ManualMappingCuration orcid:0000-0003-4423-4370
Expand Down
41 changes: 41 additions & 0 deletions src/biomappings/resources/mappings.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -3059,6 +3059,47 @@ clo 0037230 Ishikawa 3-H-12 cell skos:exactMatch cellosaurus CVCL_D199 Ishikawa
clo 0037291 MDAMB231 cell skos:exactMatch mesh D000092302 MDA-MB-231 Cells semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.549
clo 0037300 BALL-1 cell skos:exactMatch cellosaurus CVCL_1075 BALL-1 semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:UnspecifiedMatching clo 0.8
clo 0037339 tissue donor skos:exactMatch mesh D014019 Tissue Donors semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/746fde/scripts/generate_clo_mesh_mappings.py 0.54
cpt 90287 botulinum antitoxin skos:exactMatch mesh D001904 Botulinum Antitoxin semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cpt 90287 botulinum antitoxin skos:exactMatch vo 0006001 botulinum antitoxin semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/e55031/scripts/generate_vaccine_mappings.py 0.778
cpt 90291 CMVIG skos:exactMatch mesh C045781 cytomegalovirus-specific hyperimmune globulin semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.556
cpt 90296 diphtheria antitoxin skos:exactMatch mesh D004166 Diphtheria Antitoxin semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cpt 90296 diphtheria antitoxin skos:exactMatch vo 0006005 diphtheria antitoxin semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/e55031/scripts/generate_vaccine_mappings.py 0.778
cpt 90371 HBIG skos:exactMatch mesh C045213 hepatitis B hyperimmune globulin semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.556
cpt 90396 VZIG skos:exactMatch mesh C030799 varicella-zoster immune globulin semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.556
cpt 90581 anthrax skos:exactMatch mesh D022122 Anthrax Vaccines semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90585 BCG skos:exactMatch mesh D001500 BCG Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90585 BCG skos:exactMatch vo 0000771 BCG vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/e55031/scripts/generate_vaccine_mappings.py 0.9
cpt 90665 Lyme disease skos:exactMatch mesh D022123 Lyme Disease Vaccines semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90665 Lyme disease skos:exactMatch vo 0000764 Lyme disease vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/e55031/scripts/generate_vaccine_mappings.py 0.9
cpt 90685 Influenza, injectable,quadrivalent, preservative free, pediatric skos:exactMatch vo 0005462 Influenza, injectable,quadrivalent, preservative free, pediatric semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/e55031/scripts/generate_vaccine_mappings.py 0.778
cpt 90700 DTaP skos:exactMatch mesh D022681 Diphtheria-Tetanus-acellular Pertussis Vaccines semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90701 DTP skos:exactMatch mesh D015721 Diphtheria-Tetanus-Pertussis Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90704 mumps skos:exactMatch mesh D009108 Mumps Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90705 measles skos:exactMatch mesh D008458 Measles Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90706 rubella skos:exactMatch mesh D012411 Rubella Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90707 MMR skos:exactMatch mesh D022542 Measles-Mumps-Rubella Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90710 MMRV skos:exactMatch mesh C050102 measles, mumps, rubella, varicella vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90715 Tdap skos:exactMatch vo 0003180 TDAP semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/e55031/scripts/generate_vaccine_mappings.py 0.762
cpt 90716 varicella skos:exactMatch mesh D019433 Chickenpox Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90720 DTP-Hib skos:exactMatch mesh C080881 diphtheria-tetanus-pertussis-haemophilus b conjugate vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90727 plague skos:exactMatch mesh D010931 Plague Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90728 BCG skos:exactMatch mesh D001500 BCG Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/6301b7/scripts/generate_vaccine_mappings.py 0.9
cpt 90728 BCG skos:exactMatch vo 0000771 BCG vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/e55031/scripts/generate_vaccine_mappings.py 0.9
cpt 90756 Influenza, injectable, MDCK, quadrivalent, preservative skos:exactMatch vo 0005463 Influenza, injectable, MDCK, quadrivalent, preservative semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/e55031/scripts/generate_vaccine_mappings.py 0.778
cvx 10 poliovirus vaccine, inactivated skos:exactMatch mesh D011054 Poliovirus Vaccine, Inactivated semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.725
cvx 103 meningococcal C conjugate vaccine skos:exactMatch mesh C410218 serogroup C meningococcal conjugate vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.556
cvx 11 pertussis vaccine skos:exactMatch mesh D010567 Pertussis Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cvx 12 diphtheria antitoxin skos:exactMatch mesh D004166 Diphtheria Antitoxin semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cvx 19 Bacillus Calmette-Guerin vaccine skos:exactMatch mesh D001500 BCG Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.54
cvx 23 plague vaccine skos:exactMatch mesh D010931 Plague Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cvx 24 anthrax vaccine skos:exactMatch mesh D022122 Anthrax Vaccines semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.54
cvx 27 botulinum antitoxin skos:exactMatch mesh D001904 Botulinum Antitoxin semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cvx 37 yellow fever vaccine skos:exactMatch mesh D022341 Yellow Fever Vaccine semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.762
cvx 64 leishmaniasis vaccine skos:exactMatch mesh D054332 Leishmaniasis Vaccines semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.54
cvx 66 Lyme disease vaccine skos:exactMatch mesh D022123 Lyme Disease Vaccines semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.549
cvx 67 malaria vaccine skos:exactMatch mesh D017780 Malaria Vaccines semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.54
cvx 801 AS03 Adjuvant skos:exactMatch mesh C550253 AS03 adjuvant semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.772
cvx 87 immune globulin, intravenous skos:exactMatch mesh D016756 Immunoglobulins, Intravenous semapv:ManualMappingCuration orcid:0000-0003-4423-4370 semapv:LexicalMatching https://github.com/biomappings/biomappings/blob/8d4bd2/scripts/generate_vaccine_mappings.py 0.502
doid DOID:0040002 aspirin allergy skos:exactMatch umls C0004058 Allergy to aspirin semapv:ManualMappingCuration orcid:0000-0003-4423-4370
doid DOID:0040004 amoxicillin allergy skos:exactMatch umls C0571417 Allergy to amoxicillin semapv:ManualMappingCuration orcid:0000-0003-4423-4370
doid DOID:0040005 ceftriaxone allergy skos:exactMatch umls C0571463 Allergy to ceftriaxone semapv:ManualMappingCuration orcid:0000-0003-4423-4370
Expand Down
Loading

0 comments on commit 340a87d

Please sign in to comment.