From 524e1bf8bc4f42aab5322960ee875ca7e8ec186b Mon Sep 17 00:00:00 2001 From: mgithub46 <76885622+mgithub46@users.noreply.github.com> Date: Sat, 2 Jan 2021 16:45:01 -0500 Subject: [PATCH 1/7] Update lib.py Adding COVID Vaccination Data --- fetcher/lib.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fetcher/lib.py b/fetcher/lib.py index a2a0418..742119a 100644 --- a/fetcher/lib.py +++ b/fetcher/lib.py @@ -3,6 +3,8 @@ import typing import hydra import pandas as pd +import os +import urllib.request from fetcher.utils import Fields from fetcher.source_utils import fetch_source, process_source_responses @@ -13,6 +15,8 @@ TS = 'TIMESTAMP' STATE = Fields.STATE.name +site_url = "https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/vaccine_data/raw_data" \ + "/vaccine_data_us_state_timeline.csv " class Fetcher: def __init__(self, cfg): @@ -169,6 +173,14 @@ def save_df_to_db(db_config, df): engine = create_engine(engine_conf) df.to_sql(db_config.table, engine, if_exists='append', chunksize=200, method='multi') +def get_covid_vaccination_data(outputdir): + """Download COVID Vaccination data in CSV format + """ + if os.path.exists(outputdir + '/' + 'covid_vaccination_US.csv'): + os.remove(outputdir + '/' + 'covid_vaccination_US.csv') + + urllib.request.urlretrieve(site_url, os.path.join(outputdir, 'covid_vaccination_US.csv')) + @hydra.main(config_path='..', config_name="config") def main(cfg): @@ -184,5 +196,8 @@ def main(cfg): df = build_dataframe(results, cfg.state, cfg.dataset, cfg.output_date_format, cfg.output) print(df) + # get vaccination data + get_covid_vaccination_data(cfg.outputs) + if 'db' in cfg.dataset and cfg.dataset.db.store: save_df_to_db(cfg.dataset.db, df) From 7b660971a9b96dd355c27572340e0755bbe0a55f Mon Sep 17 00:00:00 2001 From: mgithub46 <76885622+mgithub46@users.noreply.github.com> Date: Sat, 2 Jan 2021 16:46:20 -0500 Subject: [PATCH 2/7] Update run.sh Adding COVID Vaccination data --- bin/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/run.sh b/bin/run.sh index 6a8b3b3..29e9087 100755 --- a/bin/run.sh +++ b/bin/run.sh @@ -18,3 +18,4 @@ fi # run fetch conda run -n c19-data python get_my_data.py dataset=states conda run -n c19-data python tools/push_to_spreadsheet.py push.spreadsheet_id=1brHKBhqiXkkLyiDTDfaBK-tms-R4KVkI-NPFfFZwqYk push.sheet_id=0 push.file=outputs/states.csv creds.type=service creds.key_filepath=../creds/credentials.json +conda run -n c19-data python tools/push_to_spreadsheet.py push.spreadsheet_id=1brHKBhqiXkkLyiDTDfaBK-tms-R4KVkI-NPFfFZwqYk push.sheet_id=289169575 push.file=outputs/covid_vaccination_US.csv creds.type=service creds.key_filepath=../creds/credentials.json From 39ce00ce44445ad3693a56bd876d387bd0e00566 Mon Sep 17 00:00:00 2001 From: mgithub46 <76885622+mgithub46@users.noreply.github.com> Date: Sat, 2 Jan 2021 16:47:20 -0500 Subject: [PATCH 3/7] Update config.yaml Adding COVID Vaccination data --- config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config.yaml b/config.yaml index bfabd40..29006b7 100644 --- a/config.yaml +++ b/config.yaml @@ -28,6 +28,7 @@ dataset: output: ${dataset.name} data_root: ${hydra:runtime.cwd}/dataset/${dataset.name} output_date_format: "%Y%m%d" +outputs: ${hydra:runtime.cwd}/outputs # fetch a single state (or a list) state: ['AK', 'AL', 'AR', 'AS', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'GU', From b2c23930df3557035f702784e49fcaf6b6862a8b Mon Sep 17 00:00:00 2001 From: mgithub46 <76885622+mgithub46@users.noreply.github.com> Date: Sat, 2 Jan 2021 17:03:42 -0500 Subject: [PATCH 4/7] Update config.yaml Added extra line as per build verification --- config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config.yaml b/config.yaml index 29006b7..264aaec 100644 --- a/config.yaml +++ b/config.yaml @@ -30,6 +30,7 @@ data_root: ${hydra:runtime.cwd}/dataset/${dataset.name} output_date_format: "%Y%m%d" outputs: ${hydra:runtime.cwd}/outputs + # fetch a single state (or a list) state: ['AK', 'AL', 'AR', 'AS', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'GU', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', From 49b9eb80a496b6592e3c80a91507bc2cccc038cc Mon Sep 17 00:00:00 2001 From: mgithub46 <76885622+mgithub46@users.noreply.github.com> Date: Sat, 2 Jan 2021 17:12:32 -0500 Subject: [PATCH 5/7] Update lib.py --- fetcher/lib.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fetcher/lib.py b/fetcher/lib.py index 742119a..4066bb3 100644 --- a/fetcher/lib.py +++ b/fetcher/lib.py @@ -10,7 +10,6 @@ from fetcher.source_utils import fetch_source, process_source_responses from fetcher.sources import build_sources - # Indices TS = 'TIMESTAMP' STATE = Fields.STATE.name @@ -18,6 +17,7 @@ site_url = "https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/vaccine_data/raw_data" \ "/vaccine_data_us_state_timeline.csv " + class Fetcher: def __init__(self, cfg): '''Initialize source information''' @@ -173,6 +173,7 @@ def save_df_to_db(db_config, df): engine = create_engine(engine_conf) df.to_sql(db_config.table, engine, if_exists='append', chunksize=200, method='multi') + def get_covid_vaccination_data(outputdir): """Download COVID Vaccination data in CSV format """ @@ -198,6 +199,6 @@ def main(cfg): # get vaccination data get_covid_vaccination_data(cfg.outputs) - + if 'db' in cfg.dataset and cfg.dataset.db.store: save_df_to_db(cfg.dataset.db, df) From 97ffa4d0ba83f86885c7f28dc5c37e622d1d58c0 Mon Sep 17 00:00:00 2001 From: mgithub46 <76885622+mgithub46@users.noreply.github.com> Date: Sat, 2 Jan 2021 17:18:33 -0500 Subject: [PATCH 6/7] Update lib.py --- fetcher/lib.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fetcher/lib.py b/fetcher/lib.py index 4066bb3..783b7b0 100644 --- a/fetcher/lib.py +++ b/fetcher/lib.py @@ -196,8 +196,7 @@ def main(cfg): # This stores the CSV with the requsted fields in order df = build_dataframe(results, cfg.state, cfg.dataset, cfg.output_date_format, cfg.output) print(df) - - # get vaccination data + get_covid_vaccination_data(cfg.outputs) if 'db' in cfg.dataset and cfg.dataset.db.store: From bd081f1b99f77a421205e5190f42d5f2ac3b6627 Mon Sep 17 00:00:00 2001 From: mgithub46 <76885622+mgithub46@users.noreply.github.com> Date: Sat, 2 Jan 2021 17:27:51 -0500 Subject: [PATCH 7/7] Update lib.py --- fetcher/lib.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fetcher/lib.py b/fetcher/lib.py index 783b7b0..6d3be9c 100644 --- a/fetcher/lib.py +++ b/fetcher/lib.py @@ -196,7 +196,6 @@ def main(cfg): # This stores the CSV with the requsted fields in order df = build_dataframe(results, cfg.state, cfg.dataset, cfg.output_date_format, cfg.output) print(df) - get_covid_vaccination_data(cfg.outputs) if 'db' in cfg.dataset and cfg.dataset.db.store: