Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Daily report #68

Open
icaoberg opened this issue Feb 1, 2025 · 0 comments
Open

Daily report #68

icaoberg opened this issue Feb 1, 2025 · 0 comments
Assignees
Labels
question Further information is requested

Comments

@icaoberg
Copy link
Collaborator

icaoberg commented Feb 1, 2025

This is it call @shirey . It nothing but a wrapper around

def daily() -> pd.DataFrame:
    now = datetime.now()
    report_output_directory = "daily-report"
    report_output_filename = (
        f'{report_output_directory}/{str(now.strftime("%Y%m%d"))}.tsv'
    )

    if Path(report_output_filename).exists():
        df = pd.read_csv(report_output_filename, sep="\t")
        return df
    else:
        url = "https://ingest.api.hubmapconsortium.org/datasets/data-status"  # The URL to get the data from
        try:
            response = requests.get(url)  # Send a request to the URL to get the data
            response.raise_for_status()  # Check if the request was successful (no errors)
            json_data = response.json()  # Convert the response to JSON format

            # Ensure 'data' key exists in the JSON
            if "data" in json_data:  # Check if the JSON contains the key 'data'
                df = pd.DataFrame(
                    json_data["data"]
                )  # Create a DataFrame using the data under 'data' key
            else:
                raise KeyError(
                    "'data' key not found in the JSON response"
                )  # Raise an error if 'data' key is missing
        except (
            ValueError,
            KeyError,
        ) as e:  # Catch errors related to value or missing keys
            print(f"Error loading data: {e}")  # Print the error message
            return pd.DataFrame()  # Return an empty DataFrame if there is an error
        except (
            requests.RequestException
        ) as e:  # Catch errors related to the request itself
            print(f"Request failed: {e}")  # Print the error message
            return pd.DataFrame()  # Return an empty DataFrame if the request fails

        if not Path(report_output_directory).exists():
            Path(report_output_directory).mkdir()

        try:
            df.to_csv(report_output_filename, sep="\t", index=False)
        except:
            print(f"Unable to save dataframe to {report_output_filename}.")

        hive_directory = "/hive/hubmap/bdbags/reports/"
        report_output_backup_file = (
            f'{hive_directory}/{str(now.strftime("%Y%m%d"))}.tsv'
        )

        symlink = "/hive/hubmap/bdbags/reports/today.tsv"
        if Path(symlink).exists():
            Path(symlink).unlink()
            Path(symlink).symlink_to(report_output_backup_file)

        return df
@icaoberg icaoberg added the question Further information is requested label Feb 1, 2025
@icaoberg icaoberg self-assigned this Feb 1, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
question Further information is requested
Projects
None yet
Development

No branches or pull requests

1 participant