diff --git a/bin/run.sh b/bin/run.sh index 6a8b3b3..29e9087 100755 --- a/bin/run.sh +++ b/bin/run.sh @@ -18,3 +18,4 @@ fi # run fetch conda run -n c19-data python get_my_data.py dataset=states conda run -n c19-data python tools/push_to_spreadsheet.py push.spreadsheet_id=1brHKBhqiXkkLyiDTDfaBK-tms-R4KVkI-NPFfFZwqYk push.sheet_id=0 push.file=outputs/states.csv creds.type=service creds.key_filepath=../creds/credentials.json +conda run -n c19-data python tools/push_to_spreadsheet.py push.spreadsheet_id=1brHKBhqiXkkLyiDTDfaBK-tms-R4KVkI-NPFfFZwqYk push.sheet_id=289169575 push.file=outputs/covid_vaccination_US.csv creds.type=service creds.key_filepath=../creds/credentials.json diff --git a/config.yaml b/config.yaml index bfabd40..264aaec 100644 --- a/config.yaml +++ b/config.yaml @@ -28,6 +28,8 @@ dataset: output: ${dataset.name} data_root: ${hydra:runtime.cwd}/dataset/${dataset.name} output_date_format: "%Y%m%d" +outputs: ${hydra:runtime.cwd}/outputs + # fetch a single state (or a list) state: ['AK', 'AL', 'AR', 'AS', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'GU', diff --git a/fetcher/lib.py b/fetcher/lib.py index a2a0418..6d3be9c 100644 --- a/fetcher/lib.py +++ b/fetcher/lib.py @@ -3,16 +3,20 @@ import typing import hydra import pandas as pd +import os +import urllib.request from fetcher.utils import Fields from fetcher.source_utils import fetch_source, process_source_responses from fetcher.sources import build_sources - # Indices TS = 'TIMESTAMP' STATE = Fields.STATE.name +site_url = "https://raw.githubusercontent.com/govex/COVID-19/master/data_tables/vaccine_data/raw_data" \ + "/vaccine_data_us_state_timeline.csv " + class Fetcher: def __init__(self, cfg): @@ -170,6 +174,15 @@ def save_df_to_db(db_config, df): df.to_sql(db_config.table, engine, if_exists='append', chunksize=200, method='multi') +def get_covid_vaccination_data(outputdir): + """Download COVID Vaccination data in CSV format + """ + if os.path.exists(outputdir + '/' + 'covid_vaccination_US.csv'): + os.remove(outputdir + '/' + 'covid_vaccination_US.csv') + + urllib.request.urlretrieve(site_url, os.path.join(outputdir, 'covid_vaccination_US.csv')) + + @hydra.main(config_path='..', config_name="config") def main(cfg): print(cfg.dataset.pretty()) @@ -183,6 +196,7 @@ def main(cfg): # This stores the CSV with the requsted fields in order df = build_dataframe(results, cfg.state, cfg.dataset, cfg.output_date_format, cfg.output) print(df) + get_covid_vaccination_data(cfg.outputs) if 'db' in cfg.dataset and cfg.dataset.db.store: save_df_to_db(cfg.dataset.db, df)