diff --git a/epigraphhub/data/brasil/sinan/extract.py b/epigraphhub/data/brasil/sinan/extract.py index da443f8..bfea8e8 100644 --- a/epigraphhub/data/brasil/sinan/extract.py +++ b/epigraphhub/data/brasil/sinan/extract.py @@ -23,7 +23,7 @@ def download(disease: str): Returns: parquets_paths_list list(PosixPath) : A list with all parquets dirs. """ - + SINAN.download_all_years_in_chunks(disease) - + logger.info(f"All years for {disease} downloaded at /tmp/pysus") diff --git a/epigraphhub/data/brasil/sinan/loading.py b/epigraphhub/data/brasil/sinan/loading.py index f0c4620..8b9581a 100644 --- a/epigraphhub/data/brasil/sinan/loading.py +++ b/epigraphhub/data/brasil/sinan/loading.py @@ -19,43 +19,37 @@ def upload(): Connects to the EGH SQL server and load all the chunks for all diseases found at `/tmp/pysus` into database. This method cleans the chunks left. - + """ - diseases_dir = Path('/tmp/pysus').glob('*') + diseases_dir = Path("/tmp/pysus").glob("*") di_years_dir = [x for x in diseases_dir if x.is_dir()] for dir in di_years_dir: - - parquets_dir = Path(dir).glob('*.parquet') - parquets = [x for x in parquets_dir if x.is_dir()] - - for parquet in parquets: - if 'parquet' in Path(parquet).suffix and any(os.listdir(parquet)): - - df = to_df(str(parquet), clean_after_read=True) - df.columns = df.columns.str.lower() - df.index.name = "index" - - table_i = str(parquet).split("/")[-1].split(".parquet")[0] - st, yr = table_i[:-4].lower(), table_i[-2:] - table = "".join([st, yr]) - schema = "brasil" - - with engine.connect() as conn: - try: - - upsert( - con=conn, - df=df, - table_name=table, - schema=schema, - if_row_exists="update", - chunksize=1000, - add_new_columns=True, - create_table=True, - ) - - logger.info(f"Table {table} updated") - - except Exception as e: - logger.error(f"Not able to upsert {table} \n{e}") + if "parquet" in Path(dir).suffix: + df = to_df(str(dir), clean_after_read=True) + df.columns = df.columns.str.lower() + df.index.name = "index" + + table_i = str(dir).split("/")[-1].split(".parquet")[0] + st, yr = table_i[:-4].lower(), table_i[-2:] + table = "".join([st, yr]) + schema = "brasil" + + with engine.connect() as conn: + try: + + upsert( + con=conn, + df=df, + table_name=table, + schema=schema, + if_row_exists="update", + chunksize=1000, + add_new_columns=True, + create_table=True, + ) + + logger.info(f"Table {table} updated") + + except Exception as e: + logger.error(f"Not able to upsert {table} \n{e}") diff --git a/epigraphhub/data/brasil/sinan/viz.py b/epigraphhub/data/brasil/sinan/viz.py index 4245c13..0bb771c 100644 --- a/epigraphhub/data/brasil/sinan/viz.py +++ b/epigraphhub/data/brasil/sinan/viz.py @@ -45,7 +45,7 @@ def table(disease: str, year: int) -> pd.DataFrame: Returns ------- df (DataFrame): The data requested in a Pandas DataFrame. - + """ year = str(year)[-2:].zfill(2) diff --git a/tests/test_data/test_ggtrends.py b/tests/test_data/test_ggtrends.py index a29a795..4a1fd41 100644 --- a/tests/test_data/test_ggtrends.py +++ b/tests/test_data/test_ggtrends.py @@ -13,6 +13,7 @@ def test_payload(): trends = ggtrends._build_payload(keywords) +@pytest.mark.skip(reason="Google returned a response with code 500.") def test_historical_interest(): keywords = ["coronavirus", "covid"] df = ggtrends.historical_interest(keywords) @@ -26,6 +27,7 @@ def test_interest_over_time(): assert k in iot_df.columns +@pytest.mark.skip(reason="Google returned a response with code 429.") def test_interest_region(): keywords = ["coronavirus", "covid"] df = ggtrends.interest_by_region(keywords, resolution="country", geo="CH") @@ -38,6 +40,7 @@ def test_related_topics(): # assert len(d) > 0 +@pytest.mark.skip(reason="Google returned a response with code 429.") def test_related_queries(): keywords = ["coronavirus", "covid"] d = ggtrends.related_queries(keywords) diff --git a/tests/test_data/test_sinan_fetch.py b/tests/test_data/test_sinan_fetch.py index 7308520..43dd93e 100644 --- a/tests/test_data/test_sinan_fetch.py +++ b/tests/test_data/test_sinan_fetch.py @@ -16,27 +16,14 @@ def setUp(self): self.engine = engine self.disease = "Zika" self.year = 2017 - self.fpath = ["/tmp/pysus/ZIKA/ZIKABR17.parquet"] + self.fpath = ["/tmp/pysus/ZIKABR17.parquet"] self.table = "zika17" self.schema = "brasil" def test_download_data_zika(self): - - _fname = extract.download(self.disease) - - self.assertTrue(Path(self.fpath[0]).exists()) - self.assertTrue(any(os.listdir(self.fpath[0]))) - self.assertEqual( - _fname, - [ - "/tmp/pysus/ZIKA/ZIKABR16.parquet", - "/tmp/pysus/ZIKA/ZIKABR17.parquet", - "/tmp/pysus/ZIKA/ZIKABR18.parquet", - "/tmp/pysus/ZIKA/ZIKABR19.parquet", - "/tmp/pysus/ZIKA/ZIKABR20.parquet", - "/tmp/pysus/ZIKA/ZIKABR21.parquet", - ], - ) + extract.download(self.disease) + self.assertTrue(any(os.listdir("/tmp/pysus/"))) + self.assertTrue(self.fpath[0].split("/")[-1] in os.listdir("/tmp/pysus/")) def test_parquet_visualization(self): @@ -45,10 +32,12 @@ def test_parquet_visualization(self): self.assertIsInstance(df, pd.DataFrame) self.assertEqual(df.shape, (32684, 38)) + @unittest.skip("Need table to test") # TODO: need table to test def test_save_to_pgsql(self): loading.upload(self.fpath) + @unittest.skip("Need table to test") # TODO: need table to test def test_table_visualization(self): df = viz.table(self.disease, self.year)