From eae9999d0febfb78eb4560410e146c79aae3497a Mon Sep 17 00:00:00 2001 From: Isaiah Akorita Date: Mon, 27 May 2024 20:16:57 +0100 Subject: [PATCH] added check for new data updates --- update_data.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/update_data.py b/update_data.py index d9d5bbd..91109d5 100644 --- a/update_data.py +++ b/update_data.py @@ -1,5 +1,6 @@ import gspread import logging +import os import pandas as pd import requests import subprocess @@ -8,6 +9,16 @@ # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +def read_existing_data(file_path): + """Reads existing data from a CSV file to compare with newly fetched data.""" + if os.path.exists(file_path): + logging.info("Existing data file found. Reading data...") + return pd.read_csv(file_path) + else: + logging.info("No existing data file found. Assuming first run.") + return pd.DataFrame() + def download_sheet(sheet_id, range_name): """Downloads data from Google Sheets and returns a DataFrame.""" try: @@ -24,7 +35,7 @@ def download_sheet(sheet_id, range_name): sheet = client.open_by_key(sheet_id) # Get the sheet by name - worksheet = sheet.get_worksheet(0) + worksheet = sheet.get_worksheet(range_name) # Get all records of the data data = worksheet.get_all_records() @@ -72,8 +83,14 @@ def git_commit_push(): RANGE_NAME = 'data' FILE_PATH = 'omoku_data.csv' - df = download_sheet(SHEET_ID, RANGE_NAME) - save_to_csv(df, FILE_PATH) + new_data = download_sheet(SHEET_ID, RANGE_NAME) + existing_data = read_existing_data(FILE_PATH) + + if not new_data.empty and new_data.equals(existing_data): + logging.info("No new data to update.") + exit(0) + + save_to_csv(new_data, FILE_PATH) git_commit_push() except Exception as e: logging.critical(f"An unexpected error occurred: {e}")