Skip to content

Commit

Permalink
[#805] Export Metadata Subfield Funding References
Browse files Browse the repository at this point in the history
Add script to convert CSV of funding data into ruby hash stored within
text file. Use function to read file and extract funding data.
closes #805
  • Loading branch information
Meowcenary committed Mar 1, 2021
1 parent d480fa8 commit 8b3346d
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 17 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"ns0646000"=>[{:funder=>{:name=>"National Library of Medicine (NLM)", :identifier=>"0060t0j89", :scheme=>"ror"}, :award=>{:title=>"", :number=>"F37 LM009568 ", :identifier=>"", :scheme=>""}}], "n870zq819"=>[{:funder=>{:name=>"National Library of Medicine (NLM)", :identifier=>"0060t0j89", :scheme=>"ror"}, :award=>{:title=>"", :number=>"F37 LM009568 ", :identifier=>"", :scheme=>""}}], "9z902z89q"=>[{:funder=>{:name=>"National Library of Medicine (NLM)", :identifier=>"0060t0j89", :scheme=>"ror"}, :award=>{:title=>"", :number=>"K01LM011973-01", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"UL1RR025741", :identifier=>"", :scheme=>""}}], "0z708w45r"=>[{:funder=>{:name=>"Swiss National Science Foundation", :identifier=>"00yjd3n13", :scheme=>"ror"}, :award=>{:title=>"", :number=>"3200B0-108261", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"Mach-Gaensslen Foundation", :identifier=>"0009ezc57", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "hx11xf327"=>[{:funder=>{:name=>"Otho S. A. Sprague Memorial Institute", :identifier=>"00c2b9157", :scheme=>"ror"}, :award=>{:title=>"", :number=>"HE 07057", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"Otho S. A. Sprague Memorial Institute", :identifier=>"00c2b9157", :scheme=>"ror"}, :award=>{:title=>"", :number=>"HE 10573", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"Ernest S. Bazley Trust", :identifier=>"", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "8p58pc98c"=>[{:funder=>{:name=>"National Library of Medicine (NLM)", :identifier=>"0060t0j89", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"Oak Ridge Institute for Science and Education", :identifier=>"", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "c0acc95c-7f32-48de-83a4-c830bec4bb29"=>[{:funder=>{:name=>"National Center for Chronic Disease Prevention and Health Promotion", :identifier=>"021rths28", :scheme=>"ror"}, :award=>{:title=>"", :number=>"DP006255", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Institute on Drug Abuse", :identifier=>"00fq5cm18", :scheme=>"ror"}, :award=>{:title=>"", :number=>"DA027828", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"Department of Health and Human Services", :identifier=>"00b9jhh55", :scheme=>"ror"}, :award=>{:title=>"", :number=>"SM60563-40", :identifier=>"", :scheme=>""}}], "4de23834-0309-48f9-9b9b-05dbaa6940ec"=>[{:funder=>{:name=>"National Institute on Drug Abuse (NIDA)", :identifier=>"00fq5cm18", :scheme=>"ror"}, :award=>{:title=>"", :number=>"2P30DA027828-07", :identifier=>"", :scheme=>""}}], "3e5cf487-2383-4d1f-b697-ed40a8b79670"=>[{:funder=>{:name=>"National Library of Medicine (NLM)", :identifier=>"0060t0j89", :scheme=>"ror"}, :award=>{:title=>"", :number=>"K01LM011973-01", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"UL1TR001422", :identifier=>"", :scheme=>""}}], "2315fdf3-f8ad-461f-8151-0daa68ebfd2c"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"UL1TR001422", :identifier=>"", :scheme=>""}}], "1bff6ec0-af92-48dd-a8e2-d17e4ac4f1ec"=>[{:funder=>{:name=>"Penn Center for AIDS Research (CFAR)", :identifier=>"047939x15", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Institute of Allergy and Infectious Diseases (NIAID)", :identifier=>"043z4tv69", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "37a5967d-8e25-47d1-92d3-9861755ccdc8"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U24TR002306", :identifier=>"", :scheme=>""}}], "6f032b75-efdb-4ff9-8154-7443b2cac1e0"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"UL1TR001422", :identifier=>"", :scheme=>""}}], "d282192b-5d42-4f97-853d-72530ae15ba8"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"UL1TR001422", :identifier=>"", :scheme=>""}}], "1aaa14ed-50b0-4c3c-a249-d088b8424a1d"=>[{:funder=>{:name=>"Eunice Kennedy Shriver National Institute of Child Health and Human Development (NICHD)", :identifier=>"04byxyr05", :scheme=>"ror"}, :award=>{:title=>"", :number=>"P50HD076188", :identifier=>"", :scheme=>""}}], "7f794595-230e-48a0-aee1-d92f567d09a7"=>[{:funder=>{:name=>"Eunice Kennedy Shriver National Institute of Child Health and Human Development (NICHD)", :identifier=>"04byxyr05", :scheme=>"ror"}, :award=>{:title=>"", :number=>"P50HD076188", :identifier=>"", :scheme=>""}}], "28201f5d-8f20-4d1c-add3-409ca6f061a5"=>[{:funder=>{:name=>"Eunice Kennedy Shriver National Institute of Child Health and Human Development (NICHD)", :identifier=>"04byxyr05", :scheme=>"ror"}, :award=>{:title=>"", :number=>"P50HD076188", :identifier=>"", :scheme=>""}}], "50abe4ff-81d9-40ab-bafd-24d68b1d9c40"=>[{:funder=>{:name=>"Eunice Kennedy Shriver National Institute of Child Health and Human Development (NICHD)", :identifier=>"04byxyr05", :scheme=>"ror"}, :award=>{:title=>"", :number=>"P50HD076188", :identifier=>"", :scheme=>""}}], "1b68450e-37e2-4dbe-8630-3ffc812c3040"=>[{:funder=>{:name=>"Eunice Kennedy Shriver National Institute of Child Health and Human Development (NICHD)", :identifier=>"04byxyr05", :scheme=>"ror"}, :award=>{:title=>"", :number=>"P50HD076188", :identifier=>"", :scheme=>""}}], "11306426-f23a-4db9-bb68-b49f43362d41"=>[{:funder=>{:name=>"Eunice Kennedy Shriver National Institute of Child Health and Human Development (NICHD)", :identifier=>"04byxyr05", :scheme=>"ror"}, :award=>{:title=>"", :number=>"P50HD076188", :identifier=>"", :scheme=>""}}], "10014f57-93aa-4fe1-bc0b-7444813e9b3a"=>[{:funder=>{:name=>"Eunice Kennedy Shriver National Institute of Child Health and Human Development (NICHD)", :identifier=>"04byxyr05", :scheme=>"ror"}, :award=>{:title=>"", :number=>"P50HD076188", :identifier=>"", :scheme=>""}}], "09c20041-3882-4d6d-8b74-5469e33612e6"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U24TR002306", :identifier=>"", :scheme=>""}}], "c6f785cf-f992-44ea-9905-ab4316181d91"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"UL1TR001422", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Cancer Institute (NCI)", :identifier=>"040gcmg81", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U54CA202995", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Cancer Institute (NCI)", :identifier=>"040gcmg81", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U54CA202997", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Cancer Institute (NCI)", :identifier=>"040gcmg81", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U54CA203000", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Library of Medicine (NLM)", :identifier=>"0060t0j89", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U24TR002306", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U24TR002306", :identifier=>"", :scheme=>""}}], "fb4aa42b-ffe8-4548-b6cf-2fa6e32a80ac"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U24TR002306", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"UL1TR001422", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Cancer Institute (NCI)", :identifier=>"040gcmg81", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U54CA202995", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Cancer Institute (NCI)", :identifier=>"040gcmg81", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U54CA202997 ", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Cancer Institute (NCI)", :identifier=>"040gcmg81", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U54CA203000", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Institute of Arthritis and Musculoskeletal and Skin Diseases (NIAMS)", :identifier=>"006zn3t30", :scheme=>"ror"}, :award=>{:title=>"", :number=>"P30AR072579 ", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Library of Medicine (NLM)", :identifier=>"0060t0j89", :scheme=>"ror"}, :award=>{:title=>"", :number=>"G08LM012688", :identifier=>"", :scheme=>""}}], "f9070cb2-95e5-4d5d-83f6-d68b50d4e343"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U24TR002306", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"UL1TR001422", :identifier=>"", :scheme=>""}}], "370b309f-c33b-4936-996b-3717ed0f6074"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U24TR002306", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>" UL1TR001422", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Cancer Institute (NCI)", :identifier=>"040gcmg81", :scheme=>"ror"}, :award=>{:title=>"", :number=>" U54CA202995", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Cancer Institute (NCI)", :identifier=>"040gcmg81", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U54CA202997", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Cancer Institute (NCI)", :identifier=>"040gcmg81", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U54CA203000", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Institute of Arthritis and Musculoskeletal and Skin Diseases (NIAMS)", :identifier=>"006zn3t30", :scheme=>"ror"}, :award=>{:title=>"", :number=>"P30AR072579 ", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Library of Medicine (NLM)", :identifier=>"0060t0j89", :scheme=>"ror"}, :award=>{:title=>"", :number=>"G08LM012688", :identifier=>"", :scheme=>""}}], "691d9bf4-5be8-479f-bb74-eff2d98bfa9e"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U24TR002306", :identifier=>"", :scheme=>""}}], "2d858062-bd0a-4b26-bf24-3f1637e7aa94"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"UL1TR001", :identifier=>"", :scheme=>""}}], "a1419467-3a15-4e85-b2f4-1ac0c057c60f"=>[{:funder=>{:name=>"Fogarty International Center", :identifier=>"02xey9a22", :scheme=>"ror"}, :award=>{:title=>"", :number=>"D43TW009575", :identifier=>"", :scheme=>""}}], nil=>[{:funder=>{:name=>"National Cancer Institute (NCI)", :identifier=>"040gcmg81", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U54CA221205", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"American Association for the Study of Liver Diseases", :identifier=>"005tjs461", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK)", :identifier=>"00adh9b73", :scheme=>"ror"}, :award=>{:title=>"", :number=>"R01DK110024", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Institute on Alcohol Abuse and Alcoholism (NIAAA)", :identifier=>"02jzrsm59", :scheme=>"ror"}, :award=>{:title=>"", :number=>"R01AA027179", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"", :identifier=>"", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "72802492-bdd9-4bdb-80a9-85e63cdc09e4"=>[{:funder=>{:name=>"National Center for Advancing Translational Sciences (NCATS)", :identifier=>"04pw6fb54", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U24TR002306", :identifier=>"", :scheme=>""}}], "78254bed-92a7-4708-bcff-16383bab9ce3"=>[{:funder=>{:name=>"", :identifier=>"", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "c423cf8b-8cbb-4407-9b3e-dd30bd86769a"=>[{:funder=>{:name=>"", :identifier=>"", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "d43f1bbc-9099-45fa-83e2-79d1301a0aeb"=>[{:funder=>{:name=>"", :identifier=>"", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "ae10b9d6-f1d1-43bc-a5bc-2b6e024d263b"=>[{:funder=>{:name=>"", :identifier=>"", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "b876a078-d5da-4f9f-b8e4-cc1cd75489a5"=>[{:funder=>{:name=>"", :identifier=>"", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "d593a1e5-bc29-4db0-94d2-3ec5471ba11f"=>[{:funder=>{:name=>"National Heart Lung and Blood Institute (NHLBI)", :identifier=>"012pb6c26", :scheme=>"ror"}, :award=>{:title=>"", :number=>"R01HL115828", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Heart Lung and Blood Institute (NHLBI)", :identifier=>"012pb6c26", :scheme=>"ror"}, :award=>{:title=>"", :number=>"R01HL133504", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Heart Lung and Blood Institute (NHLBI)", :identifier=>"012pb6c26", :scheme=>"ror"}, :award=>{:title=>"", :number=>"K25HL119608", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"National Heart Lung and Blood Institute (NHLBI)", :identifier=>"012pb6c26", :scheme=>"ror"}, :award=>{:title=>"", :number=>"F30HL145995", :identifier=>"", :scheme=>""}}], "45a68a08-9aa6-4e13-8f7e-fd20f9d87010"=>[{:funder=>{:name=>"National Institute of Diabetes and Digestive and Kidney Diseases (NIDDK)", :identifier=>"00adh9b73", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}], "b93105d8-0142-4ddb-baa0-64fea8b981b2"=>[{:funder=>{:name=>"National Institute on Aging", :identifier=>"049v75w11", :scheme=>"ror"}, :award=>{:title=>"", :number=>" U2CAG057441", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"The Environmental Influences on Child Health Outcomes (ECHO)", :identifier=>"", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"Office of the Director, National Institutes of Health (OD)", :identifier=>"00fj8a872", :scheme=>"ror"}, :award=>{:title=>"", :number=>"U24OD023319", :identifier=>"", :scheme=>""}}, {:funder=>{:name=>"", :identifier=>"", :scheme=>"ror"}, :award=>{:title=>"", :number=>"", :identifier=>"", :scheme=>""}}]}
8 changes: 7 additions & 1 deletion app/models/invenio_rdm_record_converter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class InvenioRdmRecordConverter < Sufia::Export::Converter
def initialize(generic_file=nil)
return unless generic_file
@@header_lookup ||= HeaderLookup.new
@@funding_data ||= eval(File.read('app/models/concerns/galtersufia/generic_file/funding_data.txt'))

@record = record_for_export(generic_file)
@file = filename_and_content_path(generic_file)
Expand Down Expand Up @@ -114,7 +115,8 @@ def invenio_metadata(gf)
"subjects": SUBJECT_SCHEMES.map{ |subject_type| subjects_for_scheme(gf.send(subject_type), subject_type) }.flatten,
"dates": gf.date_created.map{ |date| {"date": date, "type": "other", "description": "When the item was originally created."} },
"formats": gf.mime_type,
"locations": gf.based_near.present? ? gf.based_near.shift.split("', ").map{ |location| {place: location.gsub("'", "")} } : {}
"locations": gf.based_near.present? ? gf.based_near.shift.split("', ").map{ |location| {place: location.gsub("'", "")} } : {},
"funding": funding(gf.id)
}
end

Expand Down Expand Up @@ -200,4 +202,8 @@ def additional_descriptions(descriptions)
return nil if additional_descriptions_size < 0
descriptions.last(additional_descriptions_size).map{ |add_desc| {"description": add_desc, "type": "other", "lang": "eng"} }
end

def funding(file_id)
@@funding_data[file_id] || {}
end
end
45 changes: 45 additions & 0 deletions funding_csv_to_hash.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
require 'csv'

funding_csv = CSV.read("prod_funding_sources.csv")
funding_csv.shift
formatted_funding_data = funding_csv.each_with_object({}) do |row, hash|
# name, ror-identifier, award-number, file-id
name = row[0]
identifier = row[1]
award_number = row[2]
file_id = row[3]

if hash.keys.include?(file_id)
hash[file_id] +=
[{
"funder": {
"name": "#{name}",
"identifier": "#{identifier}",
"scheme": "ror"
},
"award": {
"title": "", # always blank
"number": "#{award_number}",
"identifier": "", # always blank
"scheme": "" # always blank
}
}]
else
hash[file_id] =
[{
"funder": {
"name": "#{name}",
"identifier": "#{identifier}",
"scheme": "ror"
},
"award": {
"title": "", # always blank
"number": "#{award_number}",
"identifier": "", # always blank
"scheme": "" # always blank
}
}]
end
end

File.write("app/models/concerns/galtersufia/generic_file/funding_data.txt", formatted_funding_data)
Loading

0 comments on commit 8b3346d

Please sign in to comment.