From 3404deca3a9f0907ca67def0cbee51a40f67c130 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Mon, 9 Oct 2023 13:47:00 -0400 Subject: [PATCH 1/8] fix: update grouper API --- README.md | 115 +++++++++++++++++++++++++++++ server/lib/genome/groupers/base.rb | 2 +- 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4097589a..2238c111 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,121 @@ rails s Navigate to `localhost:3000/api/graphiql` in your browser. If the example query provided runs successfully, then you're all set. +### Data loading + +To perform a data load from scratch, first run the `reset` task to provide a clean, seeded DB: + +```shell +rake db:reset +``` + +Most DGIdb data comes from static files, typically called `claims.tsv`. The data loader classes expect `server/lib/data/` to contain the following files: + +``` +lib/data +├── bader_lab +│ └── claims.tsv +├── cancer_commons +│ └── claims.tsv +├── caris_molecular_intelligence +│ └── claims.tsv +├── cgi +│ └── claims.tsv +├── chembl +│ └── chembl.db +├── clearity_foundation_biomarkers +│ └── claims.tsv +├── clearity_foundation_clinical_trial +│ └── claims.tsv +├── cosmic +│ └── claims.csv +├── dgene +│ └── claims.tsv +├── drugbank +│ └── claims.xml +├── dtc +│ └── claims.csv +├── ensembl +│ └── claims.tsv +├── entrez +│ └── claims.tsv +├── fda +│ └── claims.tsv +├── foundation_one_genes +│ └── claims.tsv +├── go +│ └── targets.tsv +├── guide_to_pharmacology +│ ├── interactions.csv +│ └── targets_and_families.csv +├── hingorani_casas +│ ├── aag1166_Table S1.xlsx +│ └── claims.tsv +├── hopkins_groom +│ └── claims.tsv +├── human_protein_atlas +│ └── claims.tsv +├── idg +│ ├── claims.json +│ └── claims.tsv +├── msk_impact +│ └── claims.tsv +├── my_cancer_genome +│ └── claims.tsv +├── my_cancer_genome_clinical_trial +│ └── claims.tsv +├── nci +│ ├── claims.tsv +│ └── claims.xml +├── oncokb +│ ├── drug_claim.csv +│ ├── gene_claim.csv +│ ├── gene_claim_aliases.csv +│ ├── interaction_claim.csv +│ ├── interaction_claim_attributes.csv +│ └── interaction_claim_links.csv +├── oncomine +│ └── claims.tsv +├── pharmgkb +│ └── claims.tsv +├── russ_lampel +│ └── claims.tsv +├── talc +│ └── claims.tsv +├── tdg_clinical_trial +│ ├── claims.tsv +├── tempus +│ └── claims.tsv +├── tend +│ └── claims.tsv +└── ttd + └── claims.csv +``` + +First, load claims: + +```shell +rake dgidb:import:all +``` + +Then, run grouping. By default, the groupers will expect a normalizer service to be running locally on port 8000; use the `THERAPY_URL_BASE` and `GENE_URL_BASE` environment variables to specify alternate hosts: + +```shell +export THERAPY_URL_BASE=http://localhost:7999 # no trailing backslash +rake dgidb:group:drugs +export GENE_URL_BASE=http://localhost:7998 # no trailing backslash +rake dgidb:group:genes +rake dgidb:group:interactions +``` + +Finally, normalize remaining metadata: + +```shell +rake dgidb:normalize:drug_approval_types +rake dgidb:normalize:drug_types +rake dgidb:normalize:populate_source_counters +``` + ### Client setup Navigate to the [/client directory](/client): diff --git a/server/lib/genome/groupers/base.rb b/server/lib/genome/groupers/base.rb index 58c73ad9..1b44b6fa 100644 --- a/server/lib/genome/groupers/base.rb +++ b/server/lib/genome/groupers/base.rb @@ -27,7 +27,7 @@ def fetch_json_response(url) def fetch_source_meta url = URI("#{@normalizer_url_root}search?q=") body = fetch_json_response(url) - body['source_matches'].reduce({}) { |map, source| map.update(source['source'] => source['source_meta_']) } + body['source_matches'].transform_values { |value| value['source_meta_'] } end # Normalize claim terms From a44eb060414a0a0538a9749985c58727b07b2119 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Mon, 11 Dec 2023 10:06:30 -0500 Subject: [PATCH 2/8] iterating --- server/lib/genome/groupers/base.rb | 8 +-- server/lib/genome/groupers/drug_grouper.rb | 9 ++-- server/lib/genome/groupers/gene_grouper.rb | 58 ++++++++++++---------- 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/server/lib/genome/groupers/base.rb b/server/lib/genome/groupers/base.rb index 1b44b6fa..55af7900 100644 --- a/server/lib/genome/groupers/base.rb +++ b/server/lib/genome/groupers/base.rb @@ -25,7 +25,7 @@ def fetch_json_response(url) end def fetch_source_meta - url = URI("#{@normalizer_url_root}search?q=") + url = URI("#{@normalizer_host}search?q=") body = fetch_json_response(url) body['source_matches'].transform_values { |value| value['source_meta_'] } end @@ -60,7 +60,7 @@ def normalize_claim(primary_term, claim_aliases) response = retrieve_normalizer_response(claim_alias.alias) match_type = response['match_type'] if !response.nil? && match_type > 0 - concept_id = response[@descriptor_name][@id_name] + concept_id = response[@descriptor_name]['id'][15..] if !claim_responses.key?(concept_id) claim_responses[concept_id] = response end @@ -103,7 +103,7 @@ def retrieve_extension(descriptor, type, default = nil) end def retrieve_normalizer_response(term) - body = fetch_json_response("#{@normalizer_url_root}normalize?q=#{CGI.escape(term)}") + body = fetch_json_response("#{@normalizer_host}normalize?q=#{CGI.escape(term)}") @term_to_match_dict[term.upcase] = get_concept_id(body) unless term == '' || body.nil? body @@ -114,7 +114,7 @@ def key_non_nil_match(term) end def retrieve_normalizer_data(term) - body = fetch_json_response("#{@normalizer_url_root}normalize_unmerged?q=#{CGI.escape(term)}") + body = fetch_json_response("#{@normalizer_host}normalize_unmerged?q=#{CGI.escape(term)}") body['source_matches'] end end diff --git a/server/lib/genome/groupers/drug_grouper.rb b/server/lib/genome/groupers/drug_grouper.rb index c9c20260..ee803b92 100644 --- a/server/lib/genome/groupers/drug_grouper.rb +++ b/server/lib/genome/groupers/drug_grouper.rb @@ -4,8 +4,11 @@ class DrugGrouper < Genome::Groupers::Base attr_reader :term_to_match_dict def initialize - url_base = ENV['THERAPY_URL_BASE'] || 'http://localhost:8000' - @normalizer_url_root = "#{url_base}/therapy/" + url_base = ENV['THERAPY_HOSTNAME'] || 'http://localhost:8000' + if !url_base.ends_with? "/" + url_base += "/" + end + @normalizer_host = "#{url_base}/therapy/" @term_to_match_dict = {} @@ -50,7 +53,7 @@ def run(source_id = nil) end def set_response_structure - url = URI("#{@normalizer_url_root}search?q=") + url = URI("#{@normalizer_host}search?q=") body = fetch_json_response(url) version = body['service_meta_']['version'] if version < '0.4.0' diff --git a/server/lib/genome/groupers/gene_grouper.rb b/server/lib/genome/groupers/gene_grouper.rb index ae33ae87..284b77ae 100644 --- a/server/lib/genome/groupers/gene_grouper.rb +++ b/server/lib/genome/groupers/gene_grouper.rb @@ -4,8 +4,12 @@ class GeneGrouper < Genome::Groupers::Base attr_reader :term_to_match_dict def initialize - url_base = ENV['GENE_URL_BASE'] || 'http://localhost:8000' - @normalizer_url_root = "#{url_base}/gene/" + url_base = ENV['GENE_HOSTNAME'] || 'http://localhost:8000' + if !url_base.ends_with? "/" + url_base += "/" + end + @normalizer_host = "#{url_base}gene/" + @descriptor_name = 'gene' @term_to_match_dict = {} @sources = {} @@ -33,7 +37,7 @@ def run(source_id = nil) puts "Grouping #{claims.length} ungrouped gene claims from #{source_name}" end - set_response_structure + # set_response_structure create_sources pbar = ProgressBar.create(title: 'Grouping genes', total: claims.size, format: "%t: %p%% %a |%B|") @@ -44,8 +48,8 @@ def run(source_id = nil) if normalized_gene.is_a? String normalized_id = normalized_gene else - normalized_id = normalized_gene[@descriptor_name][@id_name] - create_new_gene normalized_gene[@descriptor_name] if Gene.find_by(concept_id: normalized_id).nil? + normalized_id = normalized_gene['normalize_id'] + create_new_gene normalized_gene['gene'] if Gene.find_by(concept_id: normalized_id).nil? end add_claim_to_gene(gene_claim, normalized_id) @@ -53,19 +57,19 @@ def run(source_id = nil) end end - def set_response_structure - @descriptor_name = 'gene_descriptor' - - url = URI("#{@normalizer_url_root}search?q=") - body = fetch_json_response(url) - version = body['service_meta_']['version'] - if version < '0.2.0' - @id_name = 'gene_id' - else - @id_name = 'gene' - end - end - + # def set_response_structure + # @descriptor_name = 'gene_descriptor' + # + # url = URI("#{@normalizer_host}search?q=") + # body = fetch_json_response(url) + # version = body['service_meta_']['version'] + # if version < '0.2.0' + # @id_name = 'gene_id' + # else + # @id_name = 'gene' + # end + # end + # def create_sources gene_source_type = SourceType.find_by(type: 'gene') @@ -131,7 +135,7 @@ def create_sources end def get_concept_id(response) - response[@descriptor_name][@id_name] unless response['match_type'].zero? + response['normalized_id'] unless response['match_type'].zero? end def create_gene_claim(record, source) @@ -209,7 +213,7 @@ def add_grouper_claim_attribute(claim, record) end def add_grouper_data(gene, descriptor) - gene_data = retrieve_normalizer_data(descriptor[@id_name]) + gene_data = retrieve_normalizer_data(descriptor['id'][15..]) gene_data.each do |source_name, source_data| source = @sources[source_name.to_sym] @@ -223,19 +227,19 @@ def add_grouper_data(gene, descriptor) end end - def create_new_gene(descriptor) - name = if descriptor.fetch('label').blank? - descriptor[@id_name] + def create_new_gene(gene_response) + name = if gene_response.fetch('label').blank? + gene_response['id'][15..] else - descriptor['label'] + gene_response['label'] end gene = Gene.where( - concept_id: descriptor[@id_name], + concept_id: gene_response['id'][15..], name: name, - long_name: retrieve_extension(descriptor, 'approved_name') + long_name: retrieve_extension(gene_response, 'approved_name') ).first_or_create - add_grouper_data(gene, descriptor) + add_grouper_data(gene, gene_response) end def add_claim_attributes(claim, gene) From 2a43b9f9aa4cb72c49e5f84dd99fd68a1fb201bc Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Mon, 11 Dec 2023 14:17:06 -0500 Subject: [PATCH 3/8] more updates --- server/lib/genome/groupers/base.rb | 6 +++- server/lib/genome/groupers/drug_grouper.rb | 22 ++------------- server/lib/genome/groupers/gene_grouper.rb | 33 ++++++---------------- 3 files changed, 16 insertions(+), 45 deletions(-) diff --git a/server/lib/genome/groupers/base.rb b/server/lib/genome/groupers/base.rb index 55af7900..31d15395 100644 --- a/server/lib/genome/groupers/base.rb +++ b/server/lib/genome/groupers/base.rb @@ -60,7 +60,7 @@ def normalize_claim(primary_term, claim_aliases) response = retrieve_normalizer_response(claim_alias.alias) match_type = response['match_type'] if !response.nil? && match_type > 0 - concept_id = response[@descriptor_name]['id'][15..] + concept_id = response['normalized_id'] if !claim_responses.key?(concept_id) claim_responses[concept_id] = response end @@ -93,6 +93,10 @@ def normalize_claim(primary_term, claim_aliases) response end + def get_concept_id(response) + response['normalized_id'] unless response['match_type'].zero? + end + def retrieve_extension(descriptor, type, default = nil) unless descriptor.fetch('extensions').blank? descriptor['extensions'].each do |extension| diff --git a/server/lib/genome/groupers/drug_grouper.rb b/server/lib/genome/groupers/drug_grouper.rb index ee803b92..48853755 100644 --- a/server/lib/genome/groupers/drug_grouper.rb +++ b/server/lib/genome/groupers/drug_grouper.rb @@ -8,7 +8,8 @@ def initialize if !url_base.ends_with? "/" url_base += "/" end - @normalizer_host = "#{url_base}/therapy/" + @normalizer_host = "#{url_base}therapy/" + @descriptor_name = 'therapy' @term_to_match_dict = {} @@ -52,19 +53,6 @@ def run(source_id = nil) end end - def set_response_structure - url = URI("#{@normalizer_host}search?q=") - body = fetch_json_response(url) - version = body['service_meta_']['version'] - if version < '0.4.0' - @descriptor_name = 'therapy_descriptor' - @id_name = 'therapy_id' - else - @descriptor_name = 'therapeutic_descriptor' - @id_name = 'therapeutic' - end - end - def create_sources drug_source_type = SourceType.find_by(type: 'drug') @@ -168,10 +156,6 @@ def create_sources } end - def get_concept_id(response) - response[@descriptor_name][@id_name] unless response['match_type'].zero? - end - def produce_concept_id_nomenclature(concept_id) case concept_id when /rxcui:/ @@ -293,7 +277,7 @@ def add_grouper_claim_aliases(claim, record) end def add_grouper_data(drug, descriptor) - drug_data = retrieve_normalizer_data(descriptor[@id_name]) + gene_data = retrieve_normalizer_data(descriptor['id'][15..]) drug_data.each do |source_name, source_data| next if %w[DrugBank ChEMBL GuideToPHARMACOLOGY].include?(source_name) diff --git a/server/lib/genome/groupers/gene_grouper.rb b/server/lib/genome/groupers/gene_grouper.rb index 284b77ae..ded9a00c 100644 --- a/server/lib/genome/groupers/gene_grouper.rb +++ b/server/lib/genome/groupers/gene_grouper.rb @@ -48,8 +48,8 @@ def run(source_id = nil) if normalized_gene.is_a? String normalized_id = normalized_gene else - normalized_id = normalized_gene['normalize_id'] - create_new_gene normalized_gene['gene'] if Gene.find_by(concept_id: normalized_id).nil? + normalized_id = normalized_gene['normalized_id'] + create_new_gene(normalized_gene['gene'], normalized_id) if Gene.find_by(concept_id: normalized_id).nil? end add_claim_to_gene(gene_claim, normalized_id) @@ -57,19 +57,6 @@ def run(source_id = nil) end end - # def set_response_structure - # @descriptor_name = 'gene_descriptor' - # - # url = URI("#{@normalizer_host}search?q=") - # body = fetch_json_response(url) - # version = body['service_meta_']['version'] - # if version < '0.2.0' - # @id_name = 'gene_id' - # else - # @id_name = 'gene' - # end - # end - # def create_sources gene_source_type = SourceType.find_by(type: 'gene') @@ -134,10 +121,6 @@ def create_sources } end - def get_concept_id(response) - response['normalized_id'] unless response['match_type'].zero? - end - def create_gene_claim(record, source) GeneClaim.where( name: record['symbol'], @@ -212,8 +195,8 @@ def add_grouper_claim_attribute(claim, record) ) end - def add_grouper_data(gene, descriptor) - gene_data = retrieve_normalizer_data(descriptor['id'][15..]) + def add_grouper_data(gene, descriptor, normalized_id) + gene_data = retrieve_normalizer_data(normalized_id) gene_data.each do |source_name, source_data| source = @sources[source_name.to_sym] @@ -227,19 +210,19 @@ def add_grouper_data(gene, descriptor) end end - def create_new_gene(gene_response) + def create_new_gene(gene_response, normalized_id) name = if gene_response.fetch('label').blank? - gene_response['id'][15..] + normalized_id else gene_response['label'] end gene = Gene.where( - concept_id: gene_response['id'][15..], + concept_id: normalized_id, name: name, long_name: retrieve_extension(gene_response, 'approved_name') ).first_or_create - add_grouper_data(gene, gene_response) + add_grouper_data(gene, gene_response, normalized_id) end def add_claim_attributes(claim, gene) From cef1e5e5a5a74eb34731e24c6a00455f952a0e68 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 11 Apr 2024 20:57:34 -0400 Subject: [PATCH 4/8] Update env vars --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2238c111..1f63c8a3 100644 --- a/README.md +++ b/README.md @@ -184,12 +184,12 @@ First, load claims: rake dgidb:import:all ``` -Then, run grouping. By default, the groupers will expect a normalizer service to be running locally on port 8000; use the `THERAPY_URL_BASE` and `GENE_URL_BASE` environment variables to specify alternate hosts: +Then, run grouping. By default, the groupers will expect a normalizer service to be running locally on port 8000; use the `THERAPY_HOSTNAME` and `GENE_HOSTNAME` environment variables to specify alternate hosts: ```shell -export THERAPY_URL_BASE=http://localhost:7999 # no trailing backslash +export THERAPY_HOSTNAME=http://localhost:7999 # no trailing backslash rake dgidb:group:drugs -export GENE_URL_BASE=http://localhost:7998 # no trailing backslash +export GENE_HOSTNAME=http://localhost:7998 # no trailing backslash rake dgidb:group:genes rake dgidb:group:interactions ``` From 13ace132eba186763a5bc4162efafd9dc7dd8a34 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 11 Apr 2024 20:57:59 -0400 Subject: [PATCH 5/8] Update citations --- .../lib/genome/importers/file_importers/drugbank.rb | 12 ++++++------ .../lib/genome/importers/file_importers/pharmgkb.rb | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/server/lib/genome/importers/file_importers/drugbank.rb b/server/lib/genome/importers/file_importers/drugbank.rb index 7547848a..a200c0c3 100644 --- a/server/lib/genome/importers/file_importers/drugbank.rb +++ b/server/lib/genome/importers/file_importers/drugbank.rb @@ -26,12 +26,12 @@ def create_new_source { base_url: 'https://go.drugbank.com/drugs', site_url: 'https://go.drugbank.com/', - citation: 'Wishart DS, Feunang YD, Guo AC, Lo EJ, Marcu A, Grant JR, Sajed T, Johnson D, Li C, Sayeeda Z, Assempour N, Iynkkaran I, Liu Y, Maciejewski A, Gale N, Wilson A, Chin L, Cummings R, Le D, Pon A, Knox C, Wilson M. DrugBank 5.0: a major update to the DrugBank database for 2018. Nucleic Acids Res. 2018 Jan 4;46(D1):D1074-D1082. doi: 10.1093/nar/gkx1037. PMID: 29126136; PMCID: PMC5753335.', - citation_short: 'Wishart DS, et al. DrugBank 5.0: a major update to the DrugBank database for 2018. Nucleic Acids Res. 2018 Jan 4;46(D1):D1074-D1082.', - pmid: '29126136', - pmcid: 'PMC5753335', - doi: '10.1093/nar/gkx1037', - source_db_version: '5.1.10', + citation: 'Knox C, Wilson M, Klinger CM, Franklin M, Oler E, Wilson A, Pon A, Cox J, Chin NEL, Strawbridge SA, Garcia-Patino M, Kruger R, Sivakumaran A, Sanford S, Doshi R, Khetarpal N, Fatokun O, Doucet D, Zubkowski A, Rayat DY, Jackson H, Harford K, Anjum A, Zakir M, Wang F, Tian S, Lee B, Liigand J, Peters H, Wang RQR, Nguyen T, So D, Sharp M, da Silva R, Gabriel C, Scantlebury J, Jasinski M, Ackerman D, Jewison T, Sajed T, Gautam V, Wishart DS. DrugBank 6.0: the DrugBank Knowledgebase for 2024. Nucleic Acids Res. 2024 Jan 5;52(D1):D1265-D1275. doi: 10.1093/nar/gkad976. PMID: 37953279; PMCID: PMC10767804.', + citation_short: 'Knox C, et al. DrugBank 6.0: the DrugBank Knowledgebase for 2024. Nucleic Acids Res. 2024 Jan 5;52(D1):D1265-D1275.', + pmid: '37953279', + pmcid: 'PMC10767804', + doi: '10.1093/nar/gkad976', + source_db_version: '5.1.12', source_db_name: 'DrugBank', full_name: 'DrugBank - Open Data Drug & Drug Target Database', license: License::CUSTOM_NON_COMMERCIAL, diff --git a/server/lib/genome/importers/file_importers/pharmgkb.rb b/server/lib/genome/importers/file_importers/pharmgkb.rb index 338855ad..88dbfdec 100644 --- a/server/lib/genome/importers/file_importers/pharmgkb.rb +++ b/server/lib/genome/importers/file_importers/pharmgkb.rb @@ -23,7 +23,7 @@ def create_new_source pmid: '34216021', pmcid: 'PMC8457105', doi: '10.1002/cpt.2350', - source_db_version: '2020-08-18', # using static file, see issue #420 + source_db_version: '2024-04-05', # using static file, see issue #420 source_db_name: source_db_name, full_name: 'PharmGKB - The Pharmacogenomics Knowledgebase', license: License::CC_BY_SA_4_0, From c021f38c333bf8fe4f7717cf6c0d50212a4ee9bd Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 11 Apr 2024 21:06:31 -0400 Subject: [PATCH 6/8] Update groupers --- server/lib/genome/groupers/drug_grouper.rb | 23 +++++++++++----------- server/lib/genome/groupers/gene_grouper.rb | 9 ++++----- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/server/lib/genome/groupers/drug_grouper.rb b/server/lib/genome/groupers/drug_grouper.rb index 48853755..be02c332 100644 --- a/server/lib/genome/groupers/drug_grouper.rb +++ b/server/lib/genome/groupers/drug_grouper.rb @@ -9,7 +9,6 @@ def initialize url_base += "/" end @normalizer_host = "#{url_base}therapy/" - @descriptor_name = 'therapy' @term_to_match_dict = {} @@ -32,7 +31,7 @@ def run(source_id = nil) puts "Grouping #{claims.length} ungrouped drug claims from #{source_name}" end - set_response_structure + # set_response_structure create_sources @@ -44,8 +43,8 @@ def run(source_id = nil) if normalized_drug.is_a? String normalized_id = normalized_drug else - normalized_id = normalized_drug[@descriptor_name][@id_name] - create_new_drug(normalized_drug[@descriptor_name]) if Drug.find_by(concept_id: normalized_id).nil? + normalized_id = normalized_drug['normalized_id'] + create_new_drug(normalized_drug['therapeutic_agent'], normalized_id) if Drug.find_by(concept_id: normalized_id).nil? end add_claim_to_drug(drug_claim, normalized_id) @@ -276,8 +275,8 @@ def add_grouper_claim_aliases(claim, record) end end - def add_grouper_data(drug, descriptor) - gene_data = retrieve_normalizer_data(descriptor['id'][15..]) + def add_grouper_data(drug, drug_response, concept_id) + drug_data = retrieve_normalizer_data(concept_id) drug_data.each do |source_name, source_data| next if %w[DrugBank ChEMBL GuideToPHARMACOLOGY].include?(source_name) @@ -293,15 +292,15 @@ def add_grouper_data(drug, descriptor) end end - def create_new_drug(descriptor) - name = if descriptor.fetch('label').blank? - descriptor[@id_name] + def create_new_drug(drug_response, concept_id) + name = if drug_response['label'].nil? || drug_response['label'].blank? + concept_id else - descriptor['label'] + drug_response['label'] end - drug = Drug.where(concept_id: descriptor[@id_name], name: name.upcase).first_or_create + drug = Drug.where(concept_id: concept_id, name: name.upcase).first_or_create - add_grouper_data(drug, descriptor) + add_grouper_data(drug, drug_response, concept_id) end def find_drug_attribute(drug_claim_attribute) diff --git a/server/lib/genome/groupers/gene_grouper.rb b/server/lib/genome/groupers/gene_grouper.rb index ded9a00c..31b581a9 100644 --- a/server/lib/genome/groupers/gene_grouper.rb +++ b/server/lib/genome/groupers/gene_grouper.rb @@ -9,7 +9,6 @@ def initialize url_base += "/" end @normalizer_host = "#{url_base}gene/" - @descriptor_name = 'gene' @term_to_match_dict = {} @sources = {} @@ -81,8 +80,8 @@ def create_sources source_db_version: source_meta['HGNC']['version'], base_url: 'https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/', site_url: 'https://www.genenames.org', - citation: 'Tweedie S, Braschi B, Gray K, Jones TEM, Seal RL, Yates B, Bruford EA. Genenames.org: the HGNC and VGNC resources in 2021. Nucleic Acids Res. 2021 Jan 8;49(D1):D939-D946. doi: 10.1093/nar/gkaa980. PMID: 33152070; PMCID: PMC7779007.', - citation_short: 'Tweedie S, et al. Genenames.org: the HGNC and VGNC resources in 2021. Nucleic Acids Res. 2021 Jan 8;49(D1):D939-D946.', + citation: 'Seal RL, Braschi B, Gray K, Jones TEM, Tweedie S, Haim-Vilmovsky L, Bruford EA. Genenames.org: the HGNC resources in 2023. Nucleic Acids Res. 2023 Jan 6;51(D1):D1003-D1009. doi: 10.1093/nar/gkac888. PMID: 36243972; PMCID: PMC9825485.', + citation_short: 'Seal RL, et al. Genenames.org: the HGNC resources in 2023. Nucleic Acids Res. 2023 Jan 6;51(D1):D1003-D1009.', pmid: '33152070', pmcid: 'PMC7779007', doi: '10.1093/nar/gkaa980', @@ -96,8 +95,8 @@ def create_sources source_db_version: source_meta['Ensembl']['version'], base_url: 'https://ensembl.org/Homo_sapiens/Gene/Summary?g=', site_url: 'https://ensembl.org', - citation: 'Cunningham F, Allen JE, Allen J, Alvarez-Jarreta J, Amode MR, Armean IM, Austine-Orimoloye O, Azov AG, Barnes I, Bennett R, Berry A, Bhai J, Bignell A, Billis K, Boddu S, Brooks L, Charkhchi M, Cummins C, Da Rin Fioretto L, Davidson C, Dodiya K, Donaldson S, El Houdaigui B, El Naboulsi T, Fatima R, Giron CG, Genez T, Martinez JG, Guijarro-Clarke C, Gymer A, Hardy M, Hollis Z, Hourlier T, Hunt T, Juettemann T, Kaikala V, Kay M, Lavidas I, Le T, Lemos D, Marugán JC, Mohanan S, Mushtaq A, Naven M, Ogeh DN, Parker A, Parton A, Perry M, Piližota I, Prosovetskaia I, Sakthivel MP, Salam AIA, Schmitt BM, Schuilenburg H, Sheppard D, Pérez-Silva JG, Stark W, Steed E, Sutinen K, Sukumaran R, Sumathipala D, Suner MM, Szpak M, Thormann A, Tricomi FF, Urbina-Gómez D, Veidenberg A, Walsh TA, Walts B, Willhoft N, Winterbottom A, Wass E, Chakiachvili M, Flint B, Frankish A, Giorgetti S, Haggerty L, Hunt SE, IIsley GR, Loveland JE, Martin FJ, Moore B, Mudge JM, Muffato M, Perry E, Ruffier M, Tate J, Thybert D, Trevanion SJ, Dyer S, Harrison PW, Howe KL, Yates AD, Zerbino DR, Flicek P. Ensembl 2022. Nucleic Acids Res. 2022 Jan 7;50(D1):D988-D995. doi: 10.1093/nar/gkab1049. PMID: 34791404; PMCID: PMC8728283.', - citation_short: 'Cunningham F, et al. Ensembl 2022. Nucleic Acids Res. 2022 Jan 7;50(D1):D988-D995.', + citation: 'Harrison PW, Amode MR, Austine-Orimoloye O, Azov AG, Barba M, Barnes I, Becker A, Bennett R, Berry A, Bhai J, Bhurji SK, Boddu S, Branco Lins PR, Brooks L, Ramaraju SB, Campbell LI, Martinez MC, Charkhchi M, Chougule K, Cockburn A, Davidson C, De Silva NH, Dodiya K, Donaldson S, El Houdaigui B, Naboulsi TE, Fatima R, Giron CG, Genez T, Grigoriadis D, Ghattaoraya GS, Martinez JG, Gurbich TA, Hardy M, Hollis Z, Hourlier T, Hunt T, Kay M, Kaykala V, Le T, Lemos D, Lodha D, Marques-Coelho D, Maslen G, Merino GA, Mirabueno LP, Mushtaq A, Hossain SN, Ogeh DN, Sakthivel MP, Parker A, Perry M, Piližota I, Poppleton D, Prosovetskaia I, Raj S, Pérez-Silva JG, Salam AIA, Saraf S, Saraiva-Agostinho N, Sheppard D, Sinha S, Sipos B, Sitnik V, Stark W, Steed E, Suner MM, Surapaneni L, Sutinen K, Tricomi FF, Urbina-Gómez D, Veidenberg A, Walsh TA, Ware D, Wass E, Willhoft NL, Allen J, Alvarez-Jarreta J, Chakiachvili M, Flint B, Giorgetti S, Haggerty L, Ilsley GR, Keatley J, Loveland JE, Moore B, Mudge JM, Naamati G, Tate J, Trevanion SJ, Winterbottom A, Frankish A, Hunt SE, Cunningham F, Dyer S, Finn RD, Martin FJ, Yates AD. Ensembl 2024. Nucleic Acids Res. 2024 Jan 5;52(D1):D891-D899. doi: 10.1093/nar/gkad1049. PMID: 37953337; PMCID: PMC10767893.', + citation_short: 'Harrison PW, et al. Ensembl 2024. Nucleic Acids Res. 2024 Jan 5;52(D1):D891-D899.', pmid: '34791404', pmcid: 'PMC8728283', doi: '10.1093/nar/gkab1049', From f020e81c8a252639a0bfcf46f30f8cb26b6cd1fe Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 11 Apr 2024 21:08:16 -0400 Subject: [PATCH 7/8] update file dir --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 1f63c8a3..ffb4383d 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,6 @@ lib/data │ ├── interactions.csv │ └── targets_and_families.csv ├── hingorani_casas -│ ├── aag1166_Table S1.xlsx │ └── claims.tsv ├── hopkins_groom │ └── claims.tsv From 8df4a56e7fd28b3a7e4fc0171981a37bce89e904 Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Thu, 11 Apr 2024 21:10:07 -0400 Subject: [PATCH 8/8] cleanup --- server/lib/genome/groupers/drug_grouper.rb | 1 - server/lib/genome/groupers/gene_grouper.rb | 1 - 2 files changed, 2 deletions(-) diff --git a/server/lib/genome/groupers/drug_grouper.rb b/server/lib/genome/groupers/drug_grouper.rb index be02c332..3acd2b66 100644 --- a/server/lib/genome/groupers/drug_grouper.rb +++ b/server/lib/genome/groupers/drug_grouper.rb @@ -31,7 +31,6 @@ def run(source_id = nil) puts "Grouping #{claims.length} ungrouped drug claims from #{source_name}" end - # set_response_structure create_sources diff --git a/server/lib/genome/groupers/gene_grouper.rb b/server/lib/genome/groupers/gene_grouper.rb index 31b581a9..ea4b35ea 100644 --- a/server/lib/genome/groupers/gene_grouper.rb +++ b/server/lib/genome/groupers/gene_grouper.rb @@ -36,7 +36,6 @@ def run(source_id = nil) puts "Grouping #{claims.length} ungrouped gene claims from #{source_name}" end - # set_response_structure create_sources pbar = ProgressBar.create(title: 'Grouping genes', total: claims.size, format: "%t: %p%% %a |%B|")