From 7ae108f30fa5aad40a4b34cdb954f346649bf756 Mon Sep 17 00:00:00 2001 From: ishita9 <36771676+ishita9@users.noreply.github.com> Date: Wed, 2 Oct 2024 11:45:23 -0600 Subject: [PATCH] Feature internal 46 logging (#400) * Adding logging framework for STIGS * Made changes to the test yaml files for agg_stat * Syntax edit * Needed indentation * Synatx edits to agg_stat * Chnaged import statement * Changed import statement * Needed a line of code to be added * Added logging statements and modified tests accordingly * Syntax edit to logger variable as a function argument * Syntax edits * Syntax edits based on pytest outputs * Changes to safe_log function * Positional argument changes for logger * Syntax edits * Edit to a function call * Added logger to test_event_equalize * Changed placement of logger argument * Edits to placement of logger argument * Logging added to utils and metcalcpy by using safe_log in util * Syntax edits * Syntax edits wrt ValueError * Syntax edits to NameError * Minor syntax edits * Minor syntax edits * Minor edits with safe_log * Minor edits to safe_log in agg_eclv * Minor edit to event_equalize in utils * Syntax edits to safe_log * Syntax edits * Added missing bracket * New syntax edits * Edits to Aplin * Final edit to agg_stat * Edits based on SonarQube findings * adding new logging directions * first page of google doc added * fixing indenting and code block to python * Removed extra debug statements * Update actions/upload-artifact@v2 to be actions/upload-artifact@v4 * adding second page of documentation * adding spacing for better presentation * Trying to fix Enumerated list ends without a blank line warning * fixing typo --------- Co-authored-by: Ishita Srivastava Co-authored-by: Lisa Goodrich Co-authored-by: lisagoodrich <33230218+lisagoodrich@users.noreply.github.com> Co-authored-by: Julie Prestopnik --- .github/workflows/documentation.yaml | 2 +- .vscode/settings.json | 4 + docs/Users_Guide/index.rst | 1 + docs/Users_Guide/logging.rst | 112 ++ metcalcpy/agg_eclv.py | 210 ++-- metcalcpy/agg_stat.py | 1108 ++++++++++++------ metcalcpy/agg_stat_bootstrap.py | 94 +- metcalcpy/agg_stat_eqz.py | 58 +- metcalcpy/agg_stat_event_equalize.py | 49 +- metcalcpy/bootstrap.py | 147 ++- metcalcpy/calc_difficulty_index.py | 45 +- metcalcpy/event_equalize.py | 14 +- metcalcpy/event_equalize_against_values.py | 12 +- metcalcpy/logging_config.py | 90 ++ metcalcpy/piecewise_linear.py | 8 +- metcalcpy/scorecard.py | 222 ++-- metcalcpy/sum_stat.py | 63 +- metcalcpy/util/correlation.py | 224 +++- metcalcpy/util/ctc_statistics.py | 509 ++++++-- metcalcpy/util/eclv_statistics.py | 13 +- metcalcpy/util/ecnt_statistics.py | 355 ++++-- metcalcpy/util/grad_statistics.py | 88 +- metcalcpy/util/mcts_statistics.py | 24 +- metcalcpy/util/met_stats.py | 27 +- metcalcpy/util/mode_2d_arearat_statistics.py | 533 +++++++-- metcalcpy/util/mode_2d_ratio_statistics.py | 539 +++++++-- metcalcpy/util/mode_3d_ratio_statistics.py | 513 ++++++-- metcalcpy/util/mode_3d_volrat_statistics.py | 654 ++++++++--- metcalcpy/util/mode_arearat_statistics.py | 272 +++-- metcalcpy/util/mode_ratio_statistics.py | 320 ++--- metcalcpy/util/nbrcnt_statistics.py | 97 +- metcalcpy/util/nbrctc_statistics.py | 165 ++- metcalcpy/util/pstd_statistics.py | 340 ++++-- metcalcpy/util/read_env_vars_in_config.py | 10 +- metcalcpy/util/read_file.py | 91 +- metcalcpy/util/rps_statistics.py | 67 +- metcalcpy/util/safe_log.py | 22 + metcalcpy/util/sal1l2_statistics.py | 72 +- metcalcpy/util/sl1l2_statistics.py | 357 ++++-- metcalcpy/util/ssvar_statistics.py | 208 +++- metcalcpy/util/tost_paired.py | 154 +-- metcalcpy/util/utils.py | 104 +- metcalcpy/util/val1l2_statistics.py | 80 +- metcalcpy/util/vcnt_statistics.py | 369 ++++-- metcalcpy/util/vl1l2_statistics.py | 210 +++- metcalcpy/util/write_mpr.py | 7 +- metcalcpy/validate_mv_python.py | 12 +- metcalcpy/vertical_interp.py | 1 + test/ecnt_agg_stat.yaml | 3 + test/logs/log_agg_eclv.txt | 96 ++ test/rrfs_ecnt_config_agg_stat.yaml | 3 + test/test_agg_eclv.py | 5 +- test/test_agg_ratio.py | 5 +- test/test_scorecard.py | 6 +- test/val1l2_agg_stat.yaml | 3 + test/vcnt_agg_stat.yaml | 3 + test/vl1l2_agg_stat_met_v12.yaml | 3 + 57 files changed, 6643 insertions(+), 2160 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 docs/Users_Guide/logging.rst create mode 100644 metcalcpy/logging_config.py create mode 100644 metcalcpy/util/safe_log.py create mode 100644 test/logs/log_agg_eclv.txt diff --git a/.github/workflows/documentation.yaml b/.github/workflows/documentation.yaml index 3840f8b7..469f1dc0 100644 --- a/.github/workflows/documentation.yaml +++ b/.github/workflows/documentation.yaml @@ -33,7 +33,7 @@ jobs: with: name: documentation path: artifact/documentation - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 if: failure() with: name: documentation_warnings.log diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..03adc8d2 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "IDX.aI.enableInlineCompletion": true, + "IDX.aI.enableCodebaseIndexing": true +} \ No newline at end of file diff --git a/docs/Users_Guide/index.rst b/docs/Users_Guide/index.rst index 545d6b37..cf5e69ba 100644 --- a/docs/Users_Guide/index.rst +++ b/docs/Users_Guide/index.rst @@ -63,6 +63,7 @@ National Center for Atmospheric Research (NCAR) is sponsored by NSF. :numbered: 4 installation + logging vertical_interpolation difficulty_index aggregation diff --git a/docs/Users_Guide/logging.rst b/docs/Users_Guide/logging.rst new file mode 100644 index 00000000..e11a512f --- /dev/null +++ b/docs/Users_Guide/logging.rst @@ -0,0 +1,112 @@ +**************** +Logging In Guide +**************** + + +This guide provides a comprehensive overview of the newly integrated logging capabilities +within METcalcpy. These enhancements are designed to provide users with valuable insights +into the application's execution, aiding in tasks such as debugging, performance monitoring, +and understanding the operational flow of the program. + + +What's New +========== + +Centralized Logging Configuration (**logging_config.py**): +---------------------------------------------------------- + +A new script, **logging_config.py**, has been introduced to centralize the management of logging +configurations. This approach ensures consistency and simplifies the maintenance of logging +settings across all modules within METcalcpy. + + +* Key Feature: :code:`setup_logging` function + + * The :code:`setup_logging` function is the core of **logging_config.py**. It initializes + and configures the logger instance based on parameters specified in a YAML configuration + file. This function reads logging settings such as :code:`log_dir`, + :code:`log_filename`, and :code:`log_level` from the YAML file and sets + up Python's logging module accordingly. + * By isolating the logging configuration in this script, it becomes easier to + manage and update logging behavior without altering the core logic of other modules. + +Example Integration in **agg_stat.py**: + +.. code-block:: py + + from metcalcpy.logging_config import setup_logging + + class AggStat: + def __init__(self, in_params): + self.logger = setup_logging(in_params) + # Other initialization code... + +In this example, when an :code:`AggStat object` is instantiated, it invokes the +:code:`setup_logging` function, passing in the :code:`in_params` dictionary, +which contains logging configurations from a YAML file such as +**val1l2_agg_stat.yaml**. This ensures the logger is configured according to +the user's settings. + +YAML-Driven Configuration +------------------------- + +METcalcpy now allows users to customize logging behavior directly within +their YAML configuration files, eliminating the need for hardcoding +logging settings in Python scripts. + +**Key Parameters in YAML Configuration:** + +:code:`log_dir:` Specifies the directory where log files are stored. + +:code:`log_filename:` Defines the name of the log file. + +:code:`log_level:` Determines the verbosity of the log output. +Available levels are :code:`DEBUG, INFO, WARNING, and ERROR:`. + +:code:`log_level:` By setting the appropriate log level in your YAML configuration +file (e.g., log_level: WARNING), you can control the verbosity of the log output, +ensuring that only the necessary information is recorded. + +METcalcpy supports the following log levels: + + 1. **DEBUG:** + + * **Purpose:** Captures detailed information for diagnosing issues. + * **Use Case:** Ideal during development or troubleshooting to see all + the internal workings of the application. + + 2. **INFO:** + + * **Purpose:** Records general information about the application's execution. + * **Use Case:** Suitable for tracking the progress and key events + in the application's workflow without overwhelming detail. + + 3. **WARNING:** + + * **Purpose:** Logs potential issues that are not immediately critical but + could lead to problems. + * **Use Case:** Useful for highlighting areas that may require attention + but don't stop the application from running. + + 4. **ERROR:** + + * **Purpose:** Captures serious issues that prevent parts of the + application from functioning correctly. + * **Use Case:** Necessary for logging events that require immediate + attention and could cause the application to fail or produce incorrect results. + +Informative Log Formatting +-------------------------- + +Log messages in METcalcpy are meticulously formatted to include detailed information, +improving readability and facilitating easier analysis of log data. + + + + + + + + + + diff --git a/metcalcpy/agg_eclv.py b/metcalcpy/agg_eclv.py index eed39ebd..fc1c9800 100644 --- a/metcalcpy/agg_eclv.py +++ b/metcalcpy/agg_eclv.py @@ -37,17 +37,20 @@ import argparse import yaml import pandas as pd - +import logging +import signal +import numpy as np from metcalcpy.bootstrap import bootstrap_and_value, BootstrapResults from metcalcpy.event_equalize import event_equalize from metcalcpy.util.utils import PRECISION, is_string_strictly_float from metcalcpy.util.eclv_statistics import * from metcalcpy.util.utils import is_string_integer, parse_bool +from metcalcpy.logging_config import setup_logging +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' - class AggEclv: """A class that performs aggregation statistic logic for ECLV data type on input data frame. All parameters including data description and location is in the parameters dictionary @@ -67,39 +70,49 @@ class AggEclv: HEADER = ['thresh_i', 'x_pnt_i', 'y_pnt_i', 'stat_btcl', 'stat_btcu', 'nstats'] def __init__(self, in_params): - """Initialises the class by saving input parameters and reading data from file + """ + Initializes the class by saving input parameters and reading data from file. - Args: - in_params - input parameters as a dictionary - Raises: EmptyDataError or ValueError when the input DataFrame is empty - or doesn't have data + Args: + in_params (dict): Input parameters as a dictionary. + Raises: + pd.errors.EmptyDataError: When the input DataFrame is empty. + KeyError: When an expected key is missing in parameters. """ + + self.logger = setup_logging(in_params) + logger = self.logger + safe_log(logger, "debug", "Initializing AggEclv with parameters.") + self.statistic = None self.current_thresh = None self.params = in_params self.steps = np.arange(self.params['cl_step'], 1, self.params['cl_step']) self.column_names = np.array(self.LINE_TYPE_COLUMNS[self.params['line_type']]) + self.add_base_rate = self.params.get('add_base_rate', 0) - if 'add_base_rate' in self.params.keys(): - self.add_base_rate = self.params['add_base_rate'] - else: - self.add_base_rate = 0 - if self.add_base_rate != 0 or self.add_base_rate != 1: + if self.add_base_rate not in [0, 1]: self.add_base_rate = 0 + safe_log(logger, "warning", f"add_base_rate parameter was invalid. Reset to 0. Received value: {self.params.get('add_base_rate')}") + safe_log(logger, "debug", f"Parameters set: Steps: {self.steps}, Column Names: {self.column_names}, Add Base Rate: {self.add_base_rate}") + try: - self.input_data = pd.read_csv( - self.params['agg_stat_input'], - header=[0], - sep='\t' - ) - except pd.errors.EmptyDataError: + self.input_data = pd.read_csv(self.params['agg_stat_input'], header=0, sep='\t') + safe_log(logger, "info", f"Successfully loaded data from {self.params['agg_stat_input']}") + except pd.errors.EmptyDataError as e: + safe_log(logger, "error", "Input data file is empty, raising EmptyDataError.") + raise + except KeyError as e: + safe_log(logger, "error", f"Parameter with key {str(e)} is missing, raising KeyError.") raise - except KeyError as er: - print(f'ERROR: parameter with key {er} is missing') + except Exception as e: + safe_log(logger, "error", f"Unexpected error occurred during data loading: {str(e)}") raise + self.group_to_value = {} + safe_log(logger, "debug", "AggEclv initialized successfully.") def _calc_stats(self, values): """Calculate the statistic of values for each bootstrap sample @@ -113,23 +126,43 @@ def _calc_stats(self, values): an error """ + logger = self.logger + safe_log(logger, "debug", "Starting to calculate statistics for given values.") + if values is None: + safe_log(logger, "error", "Received None as input for values which is not expected.") + raise ValueError("Input values cannot be None.") - if values is not None and values.ndim == 2: + if values.ndim == 2: # The single value case - stat_values = [ - calculate_eclv(values, self.column_names, self.current_thresh, self.params['line_type'], self.steps, - self.add_base_rate)] - - elif values is not None and values.ndim == 3: - # bootstrapped case + safe_log(logger, "debug", "Processing single value case for statistical calculation.") + try: + stat_values = [ + calculate_eclv(values, self.column_names, self.current_thresh, self.params['line_type'], self.steps, + self.add_base_rate, logger=logger) + ] + safe_log(logger, "info", "Statistics calculated successfully for single value case.") + except Exception as e: + safe_log(logger, "error", f"Failed to calculate statistics for single value case: {str(e)}") + raise + + elif values.ndim == 3: + # Bootstrapped case + safe_log(logger, "debug", "Processing bootstrapped case for statistical calculation.") stat_values = [] - for row in values: - stat_value = [ - calculate_eclv(row, self.column_names, self.current_thresh, self.params['line_type'], self.steps)] - stat_values.append(stat_value) - + try: + for row in values: + stat_value = [ + calculate_eclv(row, self.column_names, self.current_thresh, self.params['line_type'], self.steps, logger=logger) + ] + stat_values.append(stat_value) + safe_log(logger, "info", "Statistics calculated successfully for all bootstrap samples.") + except Exception as e: + safe_log(logger, "error", f"Failed to calculate statistics for bootstrapped case: {str(e)}") + raise else: - raise KeyError("can't calculate statistic") + safe_log(logger, "error", f"Invalid dimension {values.ndim} for values, expected 2 or 3.") + raise KeyError(f"Invalid data dimensions {values.ndim}; expected 2D or 3D array.") + return stat_values def _get_bootstrapped_stats(self, series_data, thresholds): @@ -140,37 +173,33 @@ def _get_bootstrapped_stats(self, series_data, thresholds): BootstrapDistributionResults object """ + logger = self.logger + safe_log(logger, "debug", "Starting the calculation of bootstrapped statistics.") # if the data frame is empty - do nothing and return an empty object if series_data.empty: - return BootstrapResults(lower_bound=None, - value=None, - upper_bound=None) - + safe_log(logger, "warning", "Received an empty DataFrame, returning empty results.") + return BootstrapResults(lower_bound=None, value=None, upper_bound=None) + data = series_data[self.column_names].to_numpy() boot_stat_thresh = {} for ind, thresh in enumerate(thresholds): self.current_thresh = thresh + safe_log(logger, "debug", f"Processing threshold {thresh}.") if self.params['num_iterations'] == 1: - # don't need bootstrapping and CI calculation - - # calculate the statistic and exit + safe_log(logger, "info", "Single iteration mode: no bootstrapping required.") stat_val = self._calc_stats(data)[0] - - results = BootstrapResults(lower_bound=None, - value=stat_val, - upper_bound=None) - + results = BootstrapResults(lower_bound=None, value=stat_val, upper_bound=None) + safe_log(logger, "debug", f"Statistics calculated for threshold {thresh} without bootstrapping.") else: - # need bootstrapping and CI calculation in addition to statistic try: block_length = 1 - # to use circular block bootstrap or not - is_cbb = True - if 'circular_block_bootstrap' in self.params.keys(): + if 'circular_block_bootstrap' in self.params: is_cbb = parse_bool(self.params['circular_block_bootstrap']) + if is_cbb: + block_length = int(math.sqrt(len(data))) + safe_log(logger, "debug", f"Using circular block bootstrap with block length {block_length}.") - if is_cbb: - block_length = int(math.sqrt(len(data))) results = bootstrap_and_value( data, stat_func=self._calc_stats, @@ -179,12 +208,14 @@ def _get_bootstrapped_stats(self, series_data, thresholds): ci_method=self.params['method'], save_data=False, block_length=block_length, - eclv=True - ) - + eclv=True, + logger=logger + ) + safe_log(logger, "info", f"Bootstrapped statistics calculated for threshold {thresh}.") except KeyError as err: + safe_log(logger, "error", f"Failed to calculate bootstrapped statistics due to missing key: {err}") results = BootstrapResults(None, None, None) - print(err) + boot_stat_thresh[ind] = results return boot_stat_thresh @@ -197,13 +228,19 @@ def _init_out_frame(self, fields, row_number): Returns: pandas data frame """ - result = pd.DataFrame() + logger = self.logger + safe_log(logger, "debug", f"Initializing output frame with fields: {fields} and {row_number} rows.") + result = pd.DataFrame(index=range(row_number)) # fill series variables and values for field in fields: if field == 'nstats': - result[field] = [0] * row_number + result[field] = 0 # Initialize 'nstats' with 0s + safe_log(logger, "debug", f"Field '{field}' initialized with zeros across {row_number} rows.") else: - result[field] = [None] * row_number + result[field] = None # Initialize other fields with None + safe_log(logger, "debug", f"Field '{field}' initialized with None across {row_number} rows.") + + safe_log(logger, "info", f"Output DataFrame initialized successfully with fields: {fields}.") return result def _proceed_with_axis(self): @@ -213,24 +250,31 @@ def _proceed_with_axis(self): pandas dataframe with calculated stat values and CI """ + logger = self.logger + safe_log(logger, "debug", "Starting calculation of stat values for the requested Y axis.") + if self.input_data.empty: + safe_log(logger, "warning", "Input data frame is empty. Exiting calculation.") return pd.DataFrame() - series_val = self.params['series_val_1'] if len(series_val) > 0: current_header = list(series_val.keys()) current_header.extend(self.HEADER) + safe_log(logger, "debug", f"Headers set with series values: {current_header}") else: current_header = self.HEADER.copy() + safe_log(logger, "debug", "No series values provided; using default headers.") all_points = list(itertools.product(*series_val.values())) + safe_log(logger, "info", f"Generated all combinations for points to be processed: {len(all_points)} combinations.") out_frame = self._init_out_frame(current_header, 0) - + safe_log(logger, "debug", "Initialized output DataFrame for storing results.") # for each point for point in all_points: out_frame_local = self._init_out_frame(current_header, len(self.steps) + self.add_base_rate) - # filter point data + safe_log(logger, "debug", f"Processing point: {point}") + # filter point data all_filters = [] for field_ind, field in enumerate(series_val.keys()): filter_value = point[field_ind] @@ -244,18 +288,16 @@ def _proceed_with_axis(self): if field in self.input_data.keys(): all_filters.append((self.input_data[field].isin(filter_list))) - # use numpy to select the rows where any record evaluates to True mask = np.array(all_filters).all(axis=0) point_data = self.input_data.loc[mask] - # calculate bootstrap results if 'thresh_i' in point_data.columns: - thresholds = point_data['thresh_i'].unique().tolist() - thresholds.sort() + thresholds = sorted(point_data['thresh_i'].unique().tolist()) else: thresholds = [0] bootstrap_results = self._get_bootstrapped_stats(point_data, thresholds) + safe_log(logger, "debug", f"Bootstrap results obtained for point {point}") for thresh_ind, thresh in enumerate(thresholds): out_frame_local['thresh_i'] = [thresh] * (len(self.steps) + self.add_base_rate) @@ -268,7 +310,10 @@ def _proceed_with_axis(self): frames = [out_frame, out_frame_local] out_frame = pd.concat(frames) - out_frame.reset_index(inplace=True, drop=True) + safe_log(logger, "info", f"Completed processing for point {point}") + + out_frame.reset_index(drop=True, inplace=True) + safe_log(logger, "info", "All data processed successfully. Returning compiled DataFrame.") return out_frame def calculate_stats_and_ci(self): @@ -277,39 +322,50 @@ def calculate_stats_and_ci(self): Writes output data to the file """ - + logger = self.logger + safe_log(logger, "debug", "Starting calculation of statistics and confidence intervals.") + # set random seed if present if self.params['random_seed'] is not None and self.params['random_seed'] != 'None': np.random.seed(self.params['random_seed']) + safe_log(logger, "info", f"Random seed set to {self.params['random_seed']}.") - is_event_equal = parse_bool(self.params['event_equal']) # perform EE if needed - if is_event_equal: + if parse_bool(self.params.get('event_equal', False)): + safe_log(logger, "info", "Event equalization enabled.") fix_vals_permuted_list = [] for key in self.params['fixed_vars_vals_input']: vals_permuted = list(itertools.product(*self.params['fixed_vars_vals_input'][key].values())) - vals_permuted_list = [item for sublist in vals_permuted for item in sublist] - fix_vals_permuted_list.append(vals_permuted_list) + fix_vals_permuted_list.extend(vals_permuted) fix_vals_keys = list(self.params['fixed_vars_vals_input'].keys()) - is_equalize_by_indep = parse_bool(self.params['equalize_by_indep']) + is_equalize_by_indep = parse_bool(self.params.get('equalize_by_indep', False)) self.input_data = event_equalize(self.input_data, 'stat_name', self.params['series_val_1'], fix_vals_keys, - fix_vals_permuted_list, is_equalize_by_indep, False) + fix_vals_permuted_list, is_equalize_by_indep, False, logger=logger) + safe_log(logger, "debug", "Event equalization completed.") + # Process data to calculate statistics out_frame = self._proceed_with_axis() + safe_log(logger, "info", "Statistics and confidence intervals calculation completed.") + + # Determine file writing mode based on configuration header = True mode = 'w' - - if 'append_to_file' in self.params.keys() and self.params['append_to_file'] == 'True': + if parse_bool(self.params.get('append_to_file', False)): header = False mode = 'a' + safe_log(logger, "debug", "Appending to existing file.") - export_csv = out_frame.to_csv(self.params['agg_stat_output'], - index=None, header=header, mode=mode, - sep="\t", na_rep="NA", float_format='%.' + str(PRECISION) + 'f') + # Write output data to file + try: + export_csv = out_frame.to_csv(self.params['agg_stat_output'], index=None, header=header, mode=mode, + sep="\t", na_rep="NA", float_format='%.' + str(PRECISION) + 'f') + safe_log(logger, "info", f"Data successfully written to {self.params['agg_stat_output']} in mode {mode}.") + except Exception as e: + safe_log(logger, "error", f"Failed to write data to file: {str(e)}") if __name__ == "__main__": diff --git a/metcalcpy/agg_stat.py b/metcalcpy/agg_stat.py index c7d70ef4..86e7a183 100644 --- a/metcalcpy/agg_stat.py +++ b/metcalcpy/agg_stat.py @@ -62,6 +62,9 @@ OPERATION_TO_SIGN, perfect_score_adjustment, perform_event_equalization, \ aggregate_field_values, sort_data, DerivedCurveComponent, is_string_strictly_float +from metcalcpy.logging_config import setup_logging +from metcalcpy.util.safe_log import safe_log + __author__ = 'Tatiana Burek' @@ -86,7 +89,9 @@ def __init__(self, in_params): Raises: EmptyDataError or ValueError when the input DataFrame is empty or doesn't have data """ - + self.logger = setup_logging(in_params) + logger = self.logger + safe_log(logger, "debug", "Initializing AggStat with parameters") self.statistic = None self.derived_name_to_values = {} self.params = in_params @@ -97,19 +102,23 @@ def __init__(self, in_params): header=[0], sep='\t' ) - + safe_log(logger, "info", f"Successfully loaded data from {self.params['agg_stat_input']}") cols = self.input_data.columns.to_list() # Convert all col headers to lower case lc_cols = [lc_cols.lower() for lc_cols in cols] self.column_names = np.array(lc_cols) self.input_data.columns = lc_cols - - except pandas.errors.EmptyDataError: + except pd.errors.EmptyDataError as e: + safe_log(logger, "error", "Input data file is empty, raising EmptyDataError.") + raise + except KeyError as e: + safe_log(logger, "error", f"Parameter with key {str(e)} is missing, raising KeyError.") raise - except KeyError as er: - print(f'ERROR: parameter with key {er} is missing') + except Exception as e: + safe_log(logger, "error", f"Unexpected error occurred during data loading: {str(e)}") raise self.group_to_value = {} + safe_log(logger, "debug", "AggStat initialized successfully.") EXEMPTED_VARS = ['SSVAR_Spread', 'SSVAR_RMSE'] STATISTIC_TO_FIELDS = { @@ -267,29 +276,52 @@ def _calc_stats(self, values): an error """ + logger = self.logger func_name = f'calculate_{self.statistic}' + try: + stat_function = globals()[func_name] + except KeyError: + safe_log(logger, "error", f"Statistical function {func_name} not found in globals.") + raise KeyError(f"Function {func_name} not defined.") # some functions have an extra 3rd parameter that represents # if some data preliminary data aggregation was done # if this parameter is present we need to add it + num_parameters = len(signature(globals()[func_name]).parameters) + safe_log(logger, "debug", f"Function {func_name} expects {num_parameters} parameters.") + + if values is None: + safe_log(logger, "error", "Input values array is None.") + raise ValueError("Input values cannot be None.") if values is not None and values.ndim == 2: - + safe_log(logger, "debug", "Processing single value case for statistical calculation.") # The single value case - if num_parameters == 2: - stat_values = [globals()[func_name](values, self.column_names)] - else: - stat_values = [globals()[func_name](values, self.column_names, True)] - elif values is not None and values.ndim == 3: - # bootstrapped case - stat_values = [] - for row in values: + try: if num_parameters == 2: - stat_value = [globals()[func_name](row, self.column_names)] + stat_values = [stat_function(values, self.column_names)] else: - stat_value = [globals()[func_name](row, self.column_names, True)] + stat_values = [stat_function(values, self.column_names, True)] - stat_values.append(stat_value) + except Exception as e: + safe_log(logger, "error", f"Failed to calculate statistics: {e}") + raise + + elif values is not None and values.ndim == 3: + # bootstrapped case + safe_log(logger, "debug", "Processing bootstrapped case for statistical calculation.") + stat_values = [] + try: + for row in values: + if num_parameters == 2: + stat_value = [stat_function(row, self.column_names)] + else: + stat_value = [stat_function(row, self.column_names, True)] + stat_values.append(stat_value) + safe_log(logger, "info", "Statistics calculated successfully for all bootstrap samples.") + except Exception as e: + safe_log(logger, "error", f"Failed during bootstrap calculations: {e}") + raise # pool = mp.Pool(mp.cpu_count()) # stat_values = pool.map(partial(globals()['calculate_{}'.format(stat)], @@ -298,6 +330,7 @@ def _calc_stats(self, values): # pool.join() else: + safe_log(logger, "error", f"Invalid data dimensions {values.ndim}; expected 2D or 3D array.") raise KeyError("can't calculate statistic") return stat_values @@ -314,6 +347,16 @@ def _calc_stats_derived(self, values_both_arrays): a list of calculated derived statistics """ + logger = self.logger + safe_log(logger, "debug", "Starting calculation of derived statistics.") + + if values_both_arrays is None: + safe_log(logger, "error", "Input values array is None.") + raise ValueError("Input values cannot be None.") + + if values_both_arrays.ndim not in [2, 3]: + safe_log(logger, "error", f"Invalid data dimensions {values_both_arrays.ndim}; expected 2D or 3D array.") + raise KeyError("Invalid data dimensions") if values_both_arrays is not None and values_both_arrays.ndim == 2: # The single value case @@ -331,11 +374,12 @@ def _calc_stats_derived(self, values_both_arrays): stat_2 = values_2[0, stat_column_index].lower() func_name_1 = f'calculate_{stat_1}' func_name_2 = f'calculate_{stat_2}' - except ValueError: + except ValueError as e: func_name_1 = f'calculate_{self.statistic}' func_name_2 = f'calculate_{self.statistic}' - - + safe_log(logger, "error", f"Error finding statistics function: {e}") + raise ValueError("Error processing statistic names") + # some functions have an extra 3rd parameter that represents # if some data preliminary data aggregation was done @@ -345,24 +389,33 @@ def _calc_stats_derived(self, values_both_arrays): # calculate stat for the 1st array - if num_parameters_1 == 2: - stat_values_1 = [globals()[func_name_1](values_1, self.column_names)] - else: - stat_values_1 = [globals()[func_name_1](values_1, self.column_names, True)] + try: + if num_parameters_1 == 2: + stat_values_1 = [globals()[func_name_1](values_1, self.column_names, logger=logger)] + else: + stat_values_1 = [globals()[func_name_1](values_1, self.column_names, True, logger=logger)] - # calculate stat for the 2nd array - if num_parameters_2 == 2: - stat_values_2 = [globals()[func_name_2](values_2, self.column_names)] - else: - stat_values_2 = [globals()[func_name_2](values_2, self.column_names, True)] + # calculate stat for the 2nd array + if num_parameters_2 == 2: + stat_values_2 = [globals()[func_name_2](values_2, self.column_names, logger=logger)] + else: + stat_values_2 = [globals()[func_name_2](values_2, self.column_names, True, logger=logger)] + except Exception as e: + safe_log(logger, "error", f"Error calculating statistics: {e}") + raise # calculate derived stat - stat_values = calc_derived_curve_value( - stat_values_1, - stat_values_2, - values_both_arrays[0, -1]) - if not isinstance(stat_values, list): - stat_values = [stat_values] + try: + stat_values = calc_derived_curve_value( + stat_values_1, + stat_values_2, + values_both_arrays[0, -1]) + if not isinstance(stat_values, list): + stat_values = [stat_values] + except Exception as e: + safe_log(logger, "error", f"Error calculating derived statistics: {e}") + raise + elif values_both_arrays is not None and values_both_arrays.ndim == 3: # bootstrapped case stat_values = [] @@ -385,6 +438,8 @@ def _calc_stats_derived(self, values_both_arrays): except ValueError: func_name_1 = f'calculate_{self.statistic}' func_name_2 = f'calculate_{self.statistic}' + safe_log(logger, "error", f"Error finding statistics function: {e}") + raise ValueError("Error processing statistic names") # some functions have an extra 3rd parameter that represents # if some data preliminary data aggregation was done @@ -393,26 +448,35 @@ def _calc_stats_derived(self, values_both_arrays): num_parameters_2 = len(signature(globals()[func_name_2]).parameters) # calculate stat for the 1st array - if num_parameters_1 == 2: - stat_values_1 = [globals()[func_name_1](values_1, self.column_names)] - else: - stat_values_1 = [globals()[func_name_1](values_1, self.column_names, True)] + try: + if num_parameters_1 == 2: + stat_values_1 = [globals()[func_name_1](values_1, self.column_names, logger=logger)] + else: + stat_values_1 = [globals()[func_name_1](values_1, self.column_names, True, logger=logger)] - # calculate stat for the 2nd array - if num_parameters_2 == 2: - stat_values_2 = [globals()[func_name_2](values_2, self.column_names)] - else: - stat_values_2 = [globals()[func_name_2](values_2, self.column_names, True)] + # calculate stat for the 2nd array + if num_parameters_2 == 2: + stat_values_2 = [globals()[func_name_2](values_2, self.column_names, logger=logger)] + else: + stat_values_2 = [globals()[func_name_2](values_2, self.column_names, True, logger=logger)] + except Exception as e: + safe_log(logger, "error", f"Error calculating statistics: {e}") + raise # calculate derived stat - stat_value = calc_derived_curve_value( - stat_values_1, - stat_values_2, - row[0, -1]) - if not isinstance(stat_value, list): - stat_value = [stat_value] - stat_values.append(stat_value) + try: + stat_value = calc_derived_curve_value( + stat_values_1, + stat_values_2, + row[0, -1]) + if not isinstance(stat_value, list): + stat_value = [stat_value] + stat_values.append(stat_value) + safe_log(logger, "info", "Derived statistics calculated successfully.") + except Exception as e: + safe_log(logger, "error", f"Error calculating derived statistics: {e}") + raise # pool = mp.Pool(mp.cpu_count()) # stat_values = pool.map(partial(globals()['calculate_{}'.format(stat)], @@ -431,10 +495,31 @@ def _prepare_sl1l2_data(self, data_for_prepare): Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ + logger = self.logger + safe_log(logger, "debug", "Starting preparation of sl1l2 data.") + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") + if self.statistic in self.STATISTIC_TO_FIELDS.keys(): - for column in self.STATISTIC_TO_FIELDS[self.statistic]: - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['total'].values + try: + for column in self.STATISTIC_TO_FIELDS[self.statistic]: + if column in data_for_prepare.columns and 'total' in data_for_prepare.columns: + data_for_prepare[column] \ + = data_for_prepare[column].values * data_for_prepare['total'].values + safe_log(logger, "debug", f"Data for column '{column}' multiplied by 'total'.") + else: + safe_log(logger, "warning", f"Column '{column}' or 'total' not found in the DataFrame.") + except Exception as e: + safe_log(logger, "error", f"Failed to prepare data for statistic calculation: {e}") + raise + else: + error_message = f"Statistic '{self.statistic}' is not recognized or lacks associated fields." + safe_log(logger, "error", error_message) + raise KeyError(error_message) + + safe_log(logger, "info", "sl1l2 data preparation completed successfully.") def _prepare_sal1l2_data(self, data_for_prepare): """Prepares sal1l2 data. @@ -443,11 +528,32 @@ def _prepare_sal1l2_data(self, data_for_prepare): Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ - if self.statistic in self.STATISTIC_TO_FIELDS.keys(): + logger = self.logger + safe_log(logger, "debug", f"Starting preparation of sal1l2 data for statistic '{self.statistic}'.") + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") + + if self.statistic not in self.STATISTIC_TO_FIELDS: + error_message = f"Statistic '{self.statistic}' is not recognized or lacks associated fields." + safe_log(logger, "error", error_message) + raise KeyError(error_message) + + try: for column in self.STATISTIC_TO_FIELDS[self.statistic]: - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['total'].values + if column in data_for_prepare.columns and 'total' in data_for_prepare.columns: + data_for_prepare[column] = data_for_prepare[column] * data_for_prepare['total'] + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'total'.") + else: + missing_columns = [col for col in [column, 'total'] if col not in data_for_prepare.columns] + safe_log(logger, "warning", f"Missing columns {missing_columns} in DataFrame. Multiplication skipped.") + except Exception as e: + safe_log(logger, "error", f"Failed to prepare data for statistic calculation: {e}") + raise + safe_log(logger, "info", "sal1l2 data preparation completed successfully.") + def _prepare_grad_data(self, data_for_prepare): """Prepares grad data. Multiplies needed for the statistic calculation columns to the 'total'value @@ -455,10 +561,31 @@ def _prepare_grad_data(self, data_for_prepare): Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ - if self.statistic in self.STATISTIC_TO_FIELDS.keys(): + logger = self.logger + safe_log(logger, "debug", f"Starting preparation of grad data for statistic '{self.statistic}'.") + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") + + if self.statistic not in self.STATISTIC_TO_FIELDS: + error_message = f"Statistic '{self.statistic}' is not recognized or lacks associated fields." + safe_log(logger, "error", error_message) + raise KeyError(error_message) + + try: for column in self.STATISTIC_TO_FIELDS[self.statistic]: - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['total'].values + if column in data_for_prepare.columns and 'total' in data_for_prepare.columns: + data_for_prepare[column] = data_for_prepare[column] * data_for_prepare['total'] + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'total'.") + else: + missing_columns = [col for col in [column, 'total'] if col not in data_for_prepare.columns] + safe_log(logger, "warning", f"Missing columns {missing_columns} in DataFrame. Multiplication skipped.") + except Exception as e: + safe_log(logger, "error", f"Failed to prepare data for statistic calculation: {e}") + raise + + safe_log(logger, "info", "Grad data preparation completed successfully.") def _prepare_vl1l2_data(self, data_for_prepare): """Prepares vl1l2 data. @@ -468,19 +595,44 @@ def _prepare_vl1l2_data(self, data_for_prepare): Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ + logger = self.logger + safe_log(logger, "debug", "Starting preparation of vl1l2 data.") + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") + # Determine the MET version for this data. If MET v12.0 or above, use the 'total_dir' column rather than # the 'total' column. - met_version = get_met_version(data_for_prepare) - major = int(met_version.major) + try: + met_version = get_met_version(data_for_prepare, logger=logger) + major = int(met_version.major) + safe_log(logger, "debug", f"Detected MET version: {major}") + except Exception as e: + safe_log(logger, "error", f"Failed to determine MET version from data: {e}") + raise + + if self.statistic not in self.STATISTIC_TO_FIELDS: + error_message = f"Statistic '{self.statistic}' is not recognized or lacks associated fields." + safe_log(logger, "error", error_message) + raise KeyError(error_message) if self.statistic in self.STATISTIC_TO_FIELDS.keys(): - for column in self.STATISTIC_TO_FIELDS[self.statistic]: - if major >= int(12): - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['total_dir'].values - else: - data_for_prepare[column] \ + try: + for column in self.STATISTIC_TO_FIELDS[self.statistic]: + if major >= int(12): + data_for_prepare[column] \ + = data_for_prepare[column].values * data_for_prepare['total_dir'].values + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'total_dir'.") + else: + data_for_prepare[column] \ = data_for_prepare[column].values * data_for_prepare['total'].values + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'total'.") + except Exception as e: + safe_log(logger, "error", f"Error during data preparation: {e}") + raise + + safe_log(logger, "info", "vl1l2 data preparation completed successfully.") def _prepare_val1l2_data(self, data_for_prepare): """Prepares val1l2 data. @@ -492,17 +644,43 @@ def _prepare_val1l2_data(self, data_for_prepare): """ # Determine the MET version for this data. If MET v12.0 or above, use the 'total_dir' column rather than # the 'total' column. - met_version = get_met_version(data_for_prepare) - major = int(met_version.major) + logger = self.logger + safe_log(logger, "debug", "Starting preparation of val1l2 data.") + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") + + try: + met_version = get_met_version(data_for_prepare, logger=logger) + major = int(met_version.major) + safe_log(logger, "debug", f"Detected MET version: {major}") + except Exception as e: + safe_log(logger, "error", f"Failed to determine MET version from data: {e}") + raise + + if self.statistic not in self.STATISTIC_TO_FIELDS: + error_message = f"Statistic '{self.statistic}' is not recognized or lacks associated fields." + safe_log(logger, "error", error_message) + raise KeyError(error_message) if self.statistic in self.STATISTIC_TO_FIELDS.keys(): - for column in self.STATISTIC_TO_FIELDS[self.statistic]: - if major >= int(12): - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['total_dir'].values - else: - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['total'].values + try: + for column in self.STATISTIC_TO_FIELDS[self.statistic]: + if major >= int(12): + data_for_prepare[column] \ + = data_for_prepare[column].values * data_for_prepare['total_dir'].values + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'total_dir'.") + else: + data_for_prepare[column] \ + = data_for_prepare[column].values * data_for_prepare['total'].values + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'total'.") + except Exception as e: + safe_log(logger, "error", f"Error during data preparation: {e}") + raise + + safe_log(logger, "info", "val1l2 data preparation completed successfully.") + def _prepare_vcnt_data(self, data_for_prepare): """Prepares vcnt data. Multiplies needed for the statistic calculation columns to the 'total_dir' value @@ -510,19 +688,43 @@ def _prepare_vcnt_data(self, data_for_prepare): Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ + logger = self.logger + safe_log(logger, "debug", "Starting preparation of vcnt data.") # Determine the MET version for this data. If MET v12.0 or above, use the 'total_dir' column rather than # the 'total' column. - met_version = get_met_version(data_for_prepare) - major = int(met_version.major) + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") + + try: + met_version = get_met_version(data_for_prepare, logger=logger) + major = int(met_version.major) + safe_log(logger, "debug", f"Detected MET version: {major}") + except Exception as e: + safe_log(logger, "error", f"Failed to determine MET version from data: {e}") + raise + + if self.statistic not in self.STATISTIC_TO_FIELDS: + error_message = f"Statistic '{self.statistic}' is not recognized or lacks associated fields." + safe_log(logger, "error", error_message) + raise KeyError(error_message) if self.statistic in self.STATISTIC_TO_FIELDS.keys(): - for column in self.STATISTIC_TO_FIELDS[self.statistic]: - if major >= int(12): - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['total_dir'].values - else: - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['total'].values + try: + for column in self.STATISTIC_TO_FIELDS[self.statistic]: + if major >= int(12): + data_for_prepare[column] \ + = data_for_prepare[column].values * data_for_prepare['total_dir'].values + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'total_dir'.") + else: + data_for_prepare[column] \ + = data_for_prepare[column].values * data_for_prepare['total'].values + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'total'.") + except Exception as e: + safe_log(logger, "error", f"Error during data preparation: {e}") + raise + + safe_log(logger, "info", "vcnt data preparation completed successfully.") def _prepare_ecnt_data(self, data_for_prepare): """Prepares ecnt data. @@ -531,43 +733,92 @@ def _prepare_ecnt_data(self, data_for_prepare): Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ - mse = data_for_prepare['rmse'].values * data_for_prepare['rmse'].values - mse_oerr = data_for_prepare['rmse_oerr'].values * data_for_prepare['rmse_oerr'].values - # crps_climo = data_for_prepare['crps'].values * data_for_prepare['crps'].values - - variance = data_for_prepare['spread'].values * data_for_prepare['spread'].values - variance_oerr = data_for_prepare['spread_oerr'].values * data_for_prepare['spread_oerr'].values - variance_plus_oerr = data_for_prepare['spread_oerr'].values * data_for_prepare['spread_oerr'].values + logger = self.logger + safe_log(logger, "debug", "Starting preparation of ECNT data.") - data_for_prepare['mse'] = mse * data_for_prepare['total'].values - data_for_prepare['mse_oerr'] = mse_oerr * data_for_prepare['total'].values - # data_for_prepare['crps_climo'] = crps_climo * data_for_prepare['total'].values + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") - data_for_prepare['variance'] = variance * data_for_prepare['total'].values - data_for_prepare['variance_oerr'] = variance_oerr * data_for_prepare['total'].values - data_for_prepare['variance_plus_oerr'] = variance_plus_oerr * data_for_prepare['total'].values + try: + mse = data_for_prepare['rmse'].values * data_for_prepare['rmse'].values + mse_oerr = data_for_prepare['rmse_oerr'].values * data_for_prepare['rmse_oerr'].values + # crps_climo = data_for_prepare['crps'].values * data_for_prepare['crps'].values + + variance = data_for_prepare['spread'].values * data_for_prepare['spread'].values + variance_oerr = data_for_prepare['spread_oerr'].values * data_for_prepare['spread_oerr'].values + variance_plus_oerr = data_for_prepare['spread_oerr'].values * data_for_prepare['spread_oerr'].values + + data_for_prepare['mse'] = mse * data_for_prepare['total'].values + data_for_prepare['mse_oerr'] = mse_oerr * data_for_prepare['total'].values + # data_for_prepare['crps_climo'] = crps_climo * data_for_prepare['total'].values + + data_for_prepare['variance'] = variance * data_for_prepare['total'].values + data_for_prepare['variance_oerr'] = variance_oerr * data_for_prepare['total'].values + data_for_prepare['variance_plus_oerr'] = variance_plus_oerr * data_for_prepare['total'].values + safe_log(logger, "debug", "Basic statistical calculations completed.") + + self.column_names = data_for_prepare.columns.values + + if self.statistic in self.STATISTIC_TO_FIELDS.keys(): + for column in self.STATISTIC_TO_FIELDS[self.statistic]: + if column == 'me_ge_obs': + data_for_prepare[column] \ + = data_for_prepare[column].values * data_for_prepare['n_ge_obs'].values + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'n_ge_obs'.") + elif column == 'me_lt_obs': + data_for_prepare[column] \ + = data_for_prepare[column].values * data_for_prepare['n_lt_obs'].values + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'n_lt_obs'.") + else: + data_for_prepare[column] \ + = data_for_prepare[column].values * data_for_prepare['total'].values + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'total'.") + else: + safe_log(logger, "warning", f"Statistic '{self.statistic}' does not have associated fields for ECNT preparation.") + safe_log(logger, "info", "ECNT data preparation completed successfully.") - self.column_names = data_for_prepare.columns.values - if self.statistic in self.STATISTIC_TO_FIELDS.keys(): - for column in self.STATISTIC_TO_FIELDS[self.statistic]: - if column == 'me_ge_obs': - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['n_ge_obs'].values - elif column == 'me_lt_obs': - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['n_lt_obs'].values - else: - data_for_prepare[column] \ - = data_for_prepare[column].values * data_for_prepare['total'].values + except KeyError as e: + safe_log(logger, "error", f"Key error during data preparation: {e}") + raise + except Exception as e: + safe_log(logger, "error", f"Unexpected error during data preparation: {e}") + raise + print(self.statistic) + print('786') def _prepare_rps_data(self, data_for_prepare): - total = data_for_prepare['total'].values - d_rps_climo = data_for_prepare['rps'].values / (1 - data_for_prepare['rpss'].values) - data_for_prepare['rps_climo'] = d_rps_climo * total - data_for_prepare['rps'] = data_for_prepare['rps'].values * total - data_for_prepare['rps_comp'] = data_for_prepare['rps_comp'].values * total - self.column_names = data_for_prepare.columns.values + """Prepares rps data. + Multiplies needed for the statistic calculation columns to the 'total' value + + Args: + data_for_prepare: a 2d numpy array of values we want to calculate the statistic on + """ + logger = self.logger + safe_log(logger, "debug", "Starting preparation of RPS data.") + + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") + + try: + total = data_for_prepare['total'].values + d_rps_climo = data_for_prepare['rps'].values / (1 - data_for_prepare['rpss'].values) + data_for_prepare['rps_climo'] = d_rps_climo * total + safe_log(logger, "debug", f"Column 'rps_climo' successfully calculated.") + data_for_prepare['rps'] = data_for_prepare['rps'].values * total + safe_log(logger, "debug", f"Column 'rps' successfully multiplied by 'total'.") + data_for_prepare['rps_comp'] = data_for_prepare['rps_comp'].values * total + safe_log(logger, "debug", f"Column 'rps_comp' successfully multiplied by 'total'.") + self.column_names = data_for_prepare.columns.values + except KeyError as e: + safe_log(logger, "error", f"Key error during data preparation: {e}") + raise + except Exception as e: + safe_log(logger, "error", f"Unexpected error during data preparation: {e}") + raise def _prepare_ssvar_data(self, data_for_prepare): """Prepares ssvar data. @@ -576,14 +827,28 @@ def _prepare_ssvar_data(self, data_for_prepare): Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ + logger = self.logger + safe_log(logger, "debug", f"Starting preparation of ssvar data for statistic '{self.statistic}'.") + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") # rename bin_n column to total data_for_prepare.rename(columns={"total": "total_orig", "bin_n": "total"}, inplace=True) self.column_names = data_for_prepare.columns.values + if self.statistic not in self.STATISTIC_TO_FIELDS: + error_message = f"Statistic '{self.statistic}' is not recognized or lacks associated fields." + safe_log(logger, "error", error_message) + raise KeyError(error_message) + for column in self.STATISTIC_TO_FIELDS[self.statistic]: data_for_prepare[column] \ = data_for_prepare[column].values * data_for_prepare['total'].values + safe_log(logger, "debug", f"Column '{column}' successfully multiplied by 'total'.") + + safe_log(logger, "info", "ssvar data preparation completed successfully.") def _prepare_nbr_cnt_data(self, data_for_prepare): """Prepares nbrcnt data. @@ -592,6 +857,12 @@ def _prepare_nbr_cnt_data(self, data_for_prepare): Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ + logger = self.logger + safe_log(logger, "debug", f"Starting preparation of nbrcnt data for statistic '{self.statistic}'.") + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") total = data_for_prepare['total'].values fbs = total * data_for_prepare['fbs'].values @@ -602,6 +873,8 @@ def _prepare_nbr_cnt_data(self, data_for_prepare): data_for_prepare['fss'] = fss_den data_for_prepare['f_rate'] = f_rate + safe_log(logger, "info", "nbrcnt data preparation completed successfully.") + def _prepare_pct_data(self, data_for_prepare): """Prepares pct data. Multiplies needed for the statistic calculation columns to the 'total'value @@ -610,17 +883,26 @@ def _prepare_pct_data(self, data_for_prepare): data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ + def _prepare_mctc_data(self, data_for_prepare): """Prepares mctc data. - Nothing needs to be done + Nothingneeds to be done Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ + logger = self.logger + safe_log(logger, "debug", "Starting preparation of MCTC data.") + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") + if 'ec_value' in data_for_prepare.columns: if not (data_for_prepare['ec_value'] == data_for_prepare['ec_value'][0]).all(): raise ValueError('EC_VALUE is NOT constant across MCTC lines') + def _prepare_ctc_data(self, data_for_prepare): """Prepares CTC data. Checks if all values from ec_value column are the same and if not - throws an error @@ -628,6 +910,12 @@ def _prepare_ctc_data(self, data_for_prepare): Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ + logger = self.logger + safe_log(logger, "debug", "Starting preparation of CTC data.") + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") if 'ec_value' in data_for_prepare.columns: if not (data_for_prepare['ec_value'] == data_for_prepare['ec_value'][0]).all(): @@ -641,6 +929,12 @@ def _prepare_cts_data(self, data_for_prepare): Args: data_for_prepare: a 2d numpy array of values we want to calculate the statistic on """ + logger = self.logger + safe_log(logger, "debug", "Starting preparation of CTS data.") + + if data_for_prepare is None: + safe_log(logger, "error", "Input data for preparation is None.") + raise ValueError("Input data cannot be None.") if 'ec_value' in data_for_prepare.columns: if not (data_for_prepare['ec_value'] == data_for_prepare['ec_value'][0]).all(): @@ -655,19 +949,20 @@ def _prepare_nbr_ctc_data(self, data_for_prepare): """ def _get_bootstrapped_stats_for_derived(self, series, distributions, axis="1"): - """ Calculates aggregation derived statistic value and CI intervals if needed for input data - Args: - series: array of length = 3 where + """ + Calculates aggregation derived statistic value and CI intervals if needed for input data. + + Args: + series: array of length = 3 where 1st element - derived series title, - ex. 'DIFF(ENS001v3.6.1_d01 DPT FBAR-ENS001v3.6.1_d02 DPT FBAR)' - others - additional values like indy val and statistic - distributions - dictionary of the series title - to it's BootstrapDistributionResult object - - Returns: - BootstrapDistributionResults object - + others - additional values like indy val and statistic. + distributions: dictionary of the series title to its BootstrapDistributionResult object. + + Returns: + BootstrapDistributionResults object. """ + logger = self.logger + safe_log(logger, "debug", "Starting bootstrapped statistics calculation for derived series.") # get derived name derived_name = '' @@ -676,32 +971,41 @@ def _get_bootstrapped_stats_for_derived(self, series, distributions, axis="1"): if point_component.startswith((operation + '(', operation + ' (')): derived_name = point_component break - # find all components for the 1st and 2nd series - derived_curve_component = self.derived_name_to_values[derived_name] - permute_for_first_series = derived_curve_component.first_component.copy() - for series_comp in series[1:]: - if series_comp not in permute_for_first_series: - permute_for_first_series.append(series_comp) - - # replace first_series components group names to values - for i, perm in enumerate(permute_for_first_series): - if perm in self.group_to_value: - permute_for_first_series[i] = self.group_to_value[perm] - - permute_for_second_series = derived_curve_component.second_component.copy() - for series_comp in series[1:]: - if series_comp not in permute_for_second_series: - permute_for_second_series.append(series_comp) - - # replace second_series components group names to values - for i, perm in enumerate(permute_for_second_series): - if perm in self.group_to_value: - permute_for_second_series[i] = self.group_to_value[perm] - - ds_1 = None - ds_2 = None - - # for each component find its BootstrapDistributionResult object + safe_log(logger, "debug", f"Derived name identified: {derived_name}") + + try: + # find all components for the 1st and 2nd series + derived_curve_component = self.derived_name_to_values[derived_name] + permute_for_first_series = derived_curve_component.first_component.copy() + for series_comp in series[1:]: + if series_comp not in permute_for_first_series: + permute_for_first_series.append(series_comp) + + # replace first_series components group names to values + for i, perm in enumerate(permute_for_first_series): + if perm in self.group_to_value: + permute_for_first_series[i] = self.group_to_value[perm] + + permute_for_second_series = derived_curve_component.second_component.copy() + for series_comp in series[1:]: + if series_comp not in permute_for_second_series: + permute_for_second_series.append(series_comp) + + # replace second_series components group names to values + for i, perm in enumerate(permute_for_second_series): + if perm in self.group_to_value: + permute_for_second_series[i] = self.group_to_value[perm] + + except KeyError as err: + safe_log(logger, "error", f"Error during derived component lookup: {err}") + return BootstrapResults(None, None, None) + + safe_log(logger, "debug", f"First series components: {permute_for_first_series}") + safe_log(logger, "debug", f"Second series components: {permute_for_second_series}") + + ds_1 = ds_2 = None + + # for each component find its BootstrapDistributionResult object for series_to_distrib_key in distributions.keys(): if all(elem in permute_for_first_series for elem in series_to_distrib_key): ds_1 = distributions[series_to_distrib_key] @@ -710,47 +1014,37 @@ def _get_bootstrapped_stats_for_derived(self, series, distributions, axis="1"): if ds_1 is not None and ds_2 is not None: break - # if BootstrapDistributionResult object doesn't exist - # or the original series data size is 0 return an empty object - if ds_1.values is None or ds_2.values is None \ - or ds_1.values.size == 0 or ds_2.values.size == 0: - return BootstrapResults(lower_bound=None, - value=None, - upper_bound=None) - # calculate the number of values in the group if the series has a group - # it is need d for the validation - num_diff_vals_first = 0 - num_diff_vals_second = 0 - for val in permute_for_first_series: - size = len(val.split(GROUP_SEPARATOR)) - if size > 1: - num_diff_vals_first = num_diff_vals_first + size - for val in permute_for_second_series: - size = len(val.split(GROUP_SEPARATOR)) - if size > 1: - num_diff_vals_second = num_diff_vals_second + size - if num_diff_vals_first == 0: - num_diff_vals_first = 1 - if num_diff_vals_second == 0: - num_diff_vals_second = 1 + if ds_1 is None or ds_2 is None: + safe_log(logger, "warning", "Could not find BootstrapDistributionResult objects for one or both series.") + + # if BootstrapDistributionResult object doesn't exist or the original series data size is 0, return empty object + if ds_1.values is None or ds_2.values is None or ds_1.values.size == 0 or ds_2.values.size == 0: + safe_log(logger, "warning", "One or both series have no values. Returning empty BootstrapResults object.") + return BootstrapResults(lower_bound=None, value=None, upper_bound=None) + + # calculate the number of values in the group if the series has a group, needed for validation + num_diff_vals_first = sum(len(val.split(GROUP_SEPARATOR)) for val in permute_for_first_series if len(val.split(GROUP_SEPARATOR)) > 1) + num_diff_vals_second = sum(len(val.split(GROUP_SEPARATOR)) for val in permute_for_second_series if len(val.split(GROUP_SEPARATOR)) > 1) + num_diff_vals_first = max(num_diff_vals_first, 1) + num_diff_vals_second = max(num_diff_vals_second, 1) # validate data if derived_curve_component.derived_operation != 'SINGLE': + safe_log(logger, "debug", "Validating series for derived operation.") self._validate_series_cases_for_derived_operation(ds_1.values, axis, num_diff_vals_first) self._validate_series_cases_for_derived_operation(ds_2.values, axis, num_diff_vals_second) + # handle bootstrapping if self.params['num_iterations'] == 1 or derived_curve_component.derived_operation == 'ETB': - # don't need bootstrapping and CI calculation - - # calculate the derived statistic and exit - + safe_log(logger, "debug", "No bootstrapping required; calculating derived statistic.") if derived_curve_component.derived_operation == 'ETB': index_array = np.where(self.column_names == 'stat_value')[0] func_name = f'calculate_{self.statistic}' for row in ds_1.values: - stat = [globals()[func_name](row[np.newaxis, ...], self.column_names)] + stat = [globals()[func_name](row[np.newaxis, ...], self.column_names, logger=logger)] row[index_array] = stat for row in ds_2.values: - stat = [globals()[func_name](row[np.newaxis, ...], self.column_names)] + stat = [globals()[func_name](row[np.newaxis, ...], self.column_names, logger=logger)] row[index_array] = stat ds_1_value = ds_1.values[:, index_array].flatten().tolist() @@ -759,38 +1053,19 @@ def _get_bootstrapped_stats_for_derived(self, series, distributions, axis="1"): ds_1_value = [ds_1.value] ds_2_value = [ds_2.value] - stat_val = calc_derived_curve_value( - ds_1_value, - ds_2_value, - derived_curve_component.derived_operation) + stat_val = calc_derived_curve_value(ds_1_value, ds_2_value, derived_curve_component.derived_operation) if stat_val is not None: - results = BootstrapResults(lower_bound=None, - value=round_half_up(stat_val[0], 5), - upper_bound=None) + results = BootstrapResults(lower_bound=None, value=round_half_up(stat_val[0], 5), upper_bound=None) else: - results = BootstrapResults(lower_bound=None, - value=None, - upper_bound=None) + results = BootstrapResults(lower_bound=None, value=None, upper_bound=None) results.set_distributions([results.value]) else: - # need bootstrapping and CI calculation in addition to the derived statistic - - # construct joined array with data for series 1 and 2 and operation + safe_log(logger, "debug", "Performing bootstrapping with CI calculation.") operation = np.full((len(ds_1.values), 1), derived_curve_component.derived_operation) - values_both_arrays = np.concatenate((ds_1.values, ds_2.values), axis=1) - values_both_arrays = np.concatenate((values_both_arrays, operation), axis=1) + values_both_arrays = np.concatenate((ds_1.values, ds_2.values, operation), axis=1) try: - # calculate a block length for the circular temporal block bootstrap if needed - block_length = 1 - - # to use circular block bootstrap or not - is_cbb = True - if 'circular_block_bootstrap' in self.params.keys(): - is_cbb = parse_bool(self.params['circular_block_bootstrap']) - - if is_cbb: - block_length = int(math.sqrt(len(values_both_arrays))) + block_length = int(math.sqrt(len(values_both_arrays))) if 'circular_block_bootstrap' in self.params and parse_bool(self.params['circular_block_bootstrap']) else 1 results = bootstrap_and_value( values_both_arrays, stat_func=self._calc_stats_derived, @@ -799,24 +1074,26 @@ def _get_bootstrapped_stats_for_derived(self, series, distributions, axis="1"): ci_method=self.params['method'], alpha=self.params['alpha'], save_data=False, - save_distributions=derived_curve_component.derived_operation == 'DIFF_SIG', - block_length=block_length) + save_distributions=(derived_curve_component.derived_operation == 'DIFF_SIG'), + block_length=block_length, + logger=logger + ) except KeyError as err: - results = BootstrapResults(None, None, None) - print(err) + safe_log(logger, "error", f"Error during bootstrapping: {err}") + return BootstrapResults(None, None, None) + # Post-processing for DIFF_SIG if derived_curve_component.derived_operation == 'DIFF_SIG': - # remove None values in distributions + safe_log(logger, "debug", "Processing DIFF_SIG operation for derived statistics.") distributions = [i for i in results.distributions if i is not None] - diff_sig = None if distributions and results.value is not None: distribution_mean = np.mean(distributions) distribution_under_h0 = distributions - distribution_mean pval = np.mean(np.absolute(distribution_under_h0) <= np.absolute(results.value)) - diff_sig = perfect_score_adjustment(ds_1.value, ds_2.value, self.statistic, pval) - - results.value = diff_sig + diff_sig = perfect_score_adjustment(ds_1.value, ds_2.value, self.statistic, pval, logger=logger) + results.value = diff_sig + safe_log(logger, "info", "Completed derived statistics calculation.") return results def _get_bootstrapped_stats(self, series_data, axis="1"): @@ -827,51 +1104,61 @@ def _get_bootstrapped_stats(self, series_data, axis="1"): BootstrapDistributionResults object """ + logger = self.logger + safe_log(logger, "debug", "Starting bootstrapped statistics calculation.") - # if the data frame is empty - do nothing and return an empty object + # Check if the data frame is empty if series_data.empty: - return BootstrapResults(lower_bound=None, - value=None, - upper_bound=None) - # check if derived series are present + safe_log(logger, "warning", "Input series data is empty. Returning empty BootstrapResults.") + return BootstrapResults(lower_bound=None, value=None, upper_bound=None) + + # Check for derived series has_derived_series = False - if self.params['derived_series_' + axis]: + if self.params.get('derived_series_' + axis): has_derived_series = True + safe_log(logger, "debug", "Derived series found for axis '%s'.") - # sort data by dates + # Sort data by dates and reset index + safe_log(logger, "debug", "Sorting series data.") series_data = sort_data(series_data) series_data.reset_index(inplace=True, drop=True) + safe_log(logger, "debug", "Data sorting completed and index reset.") - if 'line_type' in self.params.keys() and self.params['line_type'] is not None and self.params['line_type'] != 'None': - # find the function that prepares data and execute it + # Prepare data for specific line type if present + if 'line_type' in self.params and self.params['line_type']: func = getattr(self, f"_prepare_{self.params['line_type']}_data") func(series_data) + - # input data has to be in numpy format for bootstrapping + # Convert data to numpy format for bootstrapping + safe_log(logger, "debug", "Converting series data to numpy format.") data = series_data.to_numpy() + # Perform calculation without bootstrapping if only one iteration if self.params['num_iterations'] == 1: - # don't need bootstrapping and CI calculation - - # calculate the statistic and exit - stat_val = self._calc_stats(data)[0] - results = BootstrapResults(lower_bound=None, - value=stat_val, - upper_bound=None) - # save original data only if we need it in the future - # for derived series calculation - if has_derived_series: - results.set_original_values(data) + safe_log(logger, "debug", "No bootstrapping needed (num_iterations = 1). Calculating statistics.") + try: + stat_val = self._calc_stats(data)[0] + results = BootstrapResults(lower_bound=None, value=stat_val, upper_bound=None) + + # Save original data for derived series if needed + if has_derived_series: + safe_log(logger, "debug", "Saving original data for derived series.") + results.set_original_values(data) + except Exception as e: + safe_log(logger, "error", f"Error during statistic calculation: {e}") + raise else: - # need bootstrapping and CI calculation in addition to statistic + # Bootstrapping required with CI calculation + safe_log(logger, "debug", f"Bootstrapping with {self.params['num_iterations']} iterations.") try: block_length = 1 - # to use circular block bootstrap or not - is_cbb = True - if 'circular_block_bootstrap' in self.params.keys(): - is_cbb = parse_bool(self.params['circular_block_bootstrap']) - + # Determine whether to use circular block bootstrap + is_cbb = self.params.get('circular_block_bootstrap', True) if is_cbb: block_length = int(math.sqrt(len(data))) + + # Perform bootstrapping and CI calculation results = bootstrap_and_value( data, stat_func=self._calc_stats, @@ -879,11 +1166,19 @@ def _get_bootstrapped_stats(self, series_data, axis="1"): num_threads=self.params['num_threads'], ci_method=self.params['method'], save_data=has_derived_series, - block_length=block_length) + block_length=block_length, + logger=logger + ) + safe_log(logger, "info", "Bootstrapping and CI calculation completed.") except KeyError as err: + safe_log(logger, "error", f"KeyError during bootstrapping: {err}") results = BootstrapResults(None, None, None) - print(err) + except Exception as e: + safe_log(logger, "error", f"Unexpected error during bootstrapping: {e}") + raise + + safe_log(logger, "debug", "Bootstrapped statistics calculation completed.") return results def _validate_series_cases_for_derived_operation(self, series_data, axis="1", num_diff_vals=1): @@ -901,41 +1196,54 @@ def _validate_series_cases_for_derived_operation(self, series_data, axis="1", nu Returns: This method raises an error if this criteria is False """ - # find indexes of columns of interests - fcst_lead_index = np.where(self.column_names == 'fcst_lead')[0][0] - stat_name_index = np.where(self.column_names == 'stat_name')[0][0] - if "fcst_valid_beg" in self.column_names: - fcst_valid_ind = np.where(self.column_names == 'fcst_valid_beg')[0][0] - elif "fcst_valid" in self.column_names: - fcst_valid_ind = np.where(self.column_names == 'fcst_valid')[0][0] - elif "fcst_init_beg" in self.column_names: - fcst_valid_ind = \ - np.where(self.column_names == 'fcst_init_beg')[0][0] - else: - fcst_valid_ind = \ - np.where(self.column_names == 'fcst_init')[0][0] + logger = self.logger + safe_log(logger, "debug", "Starting validation of series for derived operation.") + safe_log(logger, "debug", f"Axis: {axis}, num_diff_vals: {num_diff_vals}") + try: - # filter columns of interest + # Find indexes of columns of interest + fcst_lead_index = np.where(self.column_names == 'fcst_lead')[0][0] + stat_name_index = np.where(self.column_names == 'stat_name')[0][0] + safe_log(logger, "debug", f"fcst_lead_index: {fcst_lead_index}, stat_name_index: {stat_name_index}") + + if "fcst_valid_beg" in self.column_names: + fcst_valid_ind = np.where(self.column_names == 'fcst_valid_beg')[0][0] + elif "fcst_valid" in self.column_names: + fcst_valid_ind = np.where(self.column_names == 'fcst_valid')[0][0] + elif "fcst_init_beg" in self.column_names: + fcst_valid_ind = np.where(self.column_names == 'fcst_init_beg')[0][0] + else: + fcst_valid_ind = np.where(self.column_names == 'fcst_init')[0][0] + + safe_log(logger, "debug", f"fcst_valid_ind: {fcst_valid_ind}") + + # Filter columns of interest date_lead_stat = series_data[:, [fcst_valid_ind, fcst_lead_index, stat_name_index]] - # find the number of unique combinations + # Find the number of unique combinations unique_date_size = len(set(map(tuple, date_lead_stat))) + safe_log(logger, "debug", f"Unique date-lead-stat combinations found: {unique_date_size}") + except TypeError as err: - print(err) + safe_log(logger, "error", f"Error during filtering columns: {err}") unique_date_size = [] - # identify rows with unique combinations - ind = np.lexsort( - (series_data[:, stat_name_index], - series_data[:, fcst_lead_index], series_data[:, fcst_valid_ind])) - series_data = series_data[ind, :] - - # the length of the frame with unique combinations should be the same - # as the number of unique combinations calculated before + # Identify rows with unique combinations + try: + ind = np.lexsort( + (series_data[:, stat_name_index], + series_data[:, fcst_lead_index], series_data[:, fcst_valid_ind])) + series_data = series_data[ind, :] + safe_log(logger, "debug", f"Series data sorted by valid index, lead, and stat name.") + except Exception as e: + safe_log(logger, "error", f"Error during sorting series data: {e}") + raise - if len(series_data) / num_diff_vals != unique_date_size \ - and self.params['list_stat_' + axis] not in self.EXEMPTED_VARS: - raise NameError("Derived curve can't be calculated." - " Multiple values for one valid date/fcst_lead") + # Validate if the number of unique combinations matches the data length + if len(series_data) / num_diff_vals != unique_date_size and self.params['list_stat_' + axis] not in self.EXEMPTED_VARS: + safe_log(logger, "error", "Validation failed. Derived curve can't be calculated due to multiple values for one valid date/fcst_lead.") + raise NameError("Derived curve can't be calculated. Multiple values for one valid date/fcst_lead") + + safe_log(logger, "info", "Series validation for derived operation completed successfully.") def _init_out_frame(self, series_fields, series): """ Initialises the output frame and add series values to each row @@ -945,18 +1253,35 @@ def _init_out_frame(self, series_fields, series): Returns: pandas data frame """ + logger = self.logger + safe_log(logger, "debug", "Initializing output frame.") + + # Create an empty DataFrame result = pd.DataFrame() + + # Determine the number of rows to be added based on the length of the series row_number = len(series) - # fill series variables and values - for field_ind, field in enumerate(series_fields): - result[field] = [row[field_ind] for row in series] - - # fill the stats and CI values placeholders with None - result['fcst_var'] = [None] * row_number - result['stat_value'] = [None] * row_number - result['stat_btcl'] = [None] * row_number - result['stat_btcu'] = [None] * row_number - result['nstats'] = [None] * row_number + safe_log(logger, "debug", f"Number of rows to initialize: {row_number}") + + try: + # Fill the series variables and values for each field + for field_ind, field in enumerate(series_fields): + safe_log(logger, "debug", f"Filling field '{field}' with values from series.") + result[field] = [row[field_ind] for row in series] + + # Fill the statistical and CI value placeholders with None + safe_log(logger, "debug", "Filling placeholder columns with None values.") + result['fcst_var'] = [None] * row_number + result['stat_value'] = [None] * row_number + result['stat_btcl'] = [None] * row_number + result['stat_btcu'] = [None] * row_number + result['nstats'] = [None] * row_number + + safe_log(logger, "info", "Output frame initialization completed successfully.") + except Exception as e: + safe_log(logger, "error", f"Error during output frame initialization: {e}") + raise + return result def _get_derived_points(self, series_val, indy_vals, axis="1"): @@ -968,53 +1293,80 @@ def _get_derived_points(self, series_val, indy_vals, axis="1"): Returns: a list of all possible values for the each derived points """ + logger = self.logger + safe_log(logger, "debug", f"Starting derived points calculation for axis 'axis'.") - # for each derived series result = [] + + # Loop through each derived series for the specified axis for derived_serie in self.params['derived_series_' + axis]: - # series 1 components + safe_log(logger, "debug", f"Processing derived series: {derived_serie}") + + # Series 1 components ds_1 = derived_serie[0].split(' ') + safe_log(logger, "debug", f"Series 1 components: {ds_1}") - # series 2 components + # Series 2 components ds_2 = derived_serie[1].split(' ') - # find a variable of the operation by comparing values in each derived series component + safe_log(logger, "debug", f"Series 2 components: {ds_2}") + + # Find the variable of the operation by comparing values in each derived series component series_var_vals = () for ind, name in enumerate(ds_1): if name != ds_2[ind]: series_var_vals = (name, ds_2[ind]) + safe_log(logger, "debug", f"Identified differing components at index {ind}: {series_var_vals}") break + # Default to the last key in series_val if no matching variable is found series_var = list(series_val.keys())[-1] if len(series_var_vals) > 0: for var in series_val.keys(): if all(elem in series_val[var] for elem in series_var_vals): series_var = var + safe_log(logger, "debug", f"Identified series variable: {series_var}") break + # Create a copy of series_val and modify it for the derived values derived_val = series_val.copy() derived_val[series_var] = None + # Filter values based on intersections with ds_1 for var in series_val.keys(): - if derived_val[var] is not None \ - and intersection(derived_val[var], ds_1) \ - == intersection(derived_val[var], ds_1): + if derived_val[var] is not None and intersection(derived_val[var], ds_1) == intersection(derived_val[var], ds_1): derived_val[var] = intersection(derived_val[var], ds_1) + safe_log(logger, "debug", f"Updated '{var}' in derived values: {derived_val[var]}") + # Generate the derived curve name derived_curve_name = get_derived_curve_name(derived_serie) derived_val[series_var] = [derived_curve_name] + safe_log(logger, "debug", f"Derived curve name: {derived_curve_name}") + + # If there are independent values, assign them to the appropriate variable if len(indy_vals) > 0: derived_val[self.params['indy_var']] = indy_vals + safe_log(logger, "debug", f"Assigned independent values: {indy_vals}") - self.derived_name_to_values[derived_curve_name] \ - = DerivedCurveComponent(ds_1, ds_2, derived_serie[-1]) + # Store the derived series components in a DerivedCurveComponent + self.derived_name_to_values[derived_curve_name] = DerivedCurveComponent(ds_1, ds_2, derived_serie[-1]) + + # Set the stat_name field if ds_1[-1] == ds_2[-1]: derived_val['stat_name'] = [ds_1[-1]] else: derived_val['stat_name'] = [ds_1[-1] + "," + ds_2[-1]] - result.append(list(itertools.product(*derived_val.values()))) - - return [y for x in result for y in x] + safe_log(logger, "debug", f"Set 'stat_name' for derived values: {derived_val['stat_name']}") + # Create all possible combinations of the derived values + result.append(list(itertools.product(*derived_val.values()))) + safe_log(logger, "debug", f"Derived values appended to result: {derived_val}") + + # Flatten the result list and return it + flattened_result = [y for x in result for y in x] + safe_log(logger, "info", f"Derived points calculation completed. Total derived points: {len(flattened_result)}") + + return flattened_result + def _proceed_with_axis(self, axis="1"): """Calculates stat values for the requested Y axis @@ -1024,49 +1376,66 @@ def _proceed_with_axis(self, axis="1"): pandas dataframe with calculated stat values and CI """ + logger = self.logger + safe_log(logger, "debug", f"Starting to calculate stats for axis: {axis}") + if not self.input_data.empty: - # replace thresh_i values for reliability plot + safe_log(logger, "debug", "Input data is not empty. Proceeding with calculation.") + + # Handle indy_vals for reliability plot if applicable indy_vals = self.params['indy_vals'] if self.params['indy_var'] == 'thresh_i' and self.params['line_type'] == 'pct': + safe_log(logger, "debug", "Replacing thresh_i values for reliability plot.") indy_vals_int = self.input_data['thresh_i'].tolist() indy_vals_int.sort() indy_vals_int = np.unique(indy_vals_int).tolist() indy_vals = list(map(str, indy_vals_int)) - # identify all possible points values by adding series values, indy values - # and statistics and then permute them + # Identify all possible points by adding series values, indy values, and statistics series_val = self.params['series_val_' + axis] all_fields_values = series_val.copy() if indy_vals: all_fields_values[self.params['indy_var']] = indy_vals all_fields_values['stat_name'] = self.params['list_stat_' + axis] all_points = list(itertools.product(*all_fields_values.values())) + safe_log(logger, "debug", f"Total points identified: {len(all_points)}") + # Add derived points if present if self.params['derived_series_' + axis]: - # identifies and add all possible derived points values - all_points.extend(self._get_derived_points(series_val, indy_vals, axis)) + safe_log(logger, "debug", "Identifying and adding derived points.") + derived_points = self._get_derived_points(series_val, indy_vals, axis) + all_points.extend(derived_points) + safe_log(logger, "debug", f"Total derived points added: {len(derived_points)}") - # init the template for output frame + # Initialize the output frame out_frame = self._init_out_frame(all_fields_values.keys(), all_points) + safe_log(logger, "debug", "Initialized output frame.") point_to_distrib = {} - # for each point + # Process each point for point_ind, point in enumerate(all_points): - # get statistic. Use reversed because it is more likely that the stat is in the end + safe_log(logger, "debug", f"Processing point {point_ind + 1}/{len(all_points)}: {point}") + + # Determine the statistic for the point for component in reversed(point): if component in set(self.params['list_stat_' + axis]): self.statistic = component.lower() break + safe_log(logger, "debug", f"Statistic identified: {self.statistic}") + is_derived = is_derived_point(point) + if not is_derived: - # filter point data + safe_log(logger, "debug", f"Processing regular point: {point}") + + # Filtering point data all_filters = [] all_filters_pct = [] - filters_wihtout_indy = [] + filters_without_indy = [] indy_val = None + for field_ind, field in enumerate(all_fields_values.keys()): - filter_value = point[field_ind] if GROUP_SEPARATOR in filter_value: filter_list = re.findall(DATE_TIME_REGEX, filter_value) @@ -1076,89 +1445,89 @@ def _proceed_with_axis(self, axis="1"): filter_list = filter_value.split(';') else: filter_list = [filter_value] + + # Convert values to appropriate types for i, filter_val in enumerate(filter_list): if is_string_integer(filter_val): filter_list[i] = int(filter_val) elif is_string_strictly_float(filter_val): filter_list[i] = float(filter_val) + + # Apply filters if field in self.input_data.keys(): - if field != self.params['indy_var']: # - filters_wihtout_indy. \ - append((self.input_data[field].isin(filter_list))) + if field != self.params['indy_var']: + filters_without_indy.append((self.input_data[field].isin(filter_list))) else: indy_val = filter_value - all_filters.append(self.input_data[field].isin(filter_list)) + if field in series_val.keys(): all_filters_pct.append((self.input_data[field].isin(filter_list))) - # add fcst var + # Apply forecast variable filters fcst_var = None if len(self.params['fcst_var_val_' + axis]) > 0: fcst_var = list(self.params['fcst_var_val_' + axis].keys())[0] if 'fcst_var' in self.input_data.columns: all_filters.append((self.input_data['fcst_var'].isin([fcst_var]))) - # use numpy to select the rows where any record evaluates to True + # Use numpy to apply filters and select rows mask = np.array(all_filters).all(axis=0) point_data = self.input_data.loc[mask] + # Handle percentage line types if self.params['line_type'] == 'pct': + safe_log(logger, "debug", "Processing percentage line type.") if all_filters_pct: mask_pct = np.array(all_filters_pct).all(axis=0) point_data_pct = self.input_data.loc[mask_pct] else: point_data_pct = self.input_data - # collect all columns that starts with oy_i and on_i + + # Calculate additional metrics for percentage line type filter_oy_i = [col for col in point_data_pct if col.startswith('oy_i')] filter_on_i = [col for col in point_data_pct if col.startswith('on_i')] - # calculate oy_total oy_total = point_data_pct[filter_oy_i].values.sum() - - # calculate T sum_n_i_orig_T = point_data_pct[filter_on_i].values.sum() + oy_total - - # calculate o_bar o_bar = oy_total / sum_n_i_orig_T - point_data.insert(len(point_data.columns), 'T', sum_n_i_orig_T) point_data.insert(len(point_data.columns), 'oy_total', oy_total) point_data.insert(len(point_data.columns), 'o_bar', o_bar) - # aggregate point data + # Aggregate point data if necessary series_var_val = self.params['series_val_' + axis] if any(';' in series_val for series_val in series_var_val): point_data = aggregate_field_values(series_var_val, point_data, self.params['line_type']) elif indy_val and ';' in indy_val: - # if aggregated value in indy val - add it to series values add aggregate series_indy_var_val = series_var_val series_indy_var_val[self.params['indy_var']] = [indy_val] - point_data = aggregate_field_values(series_indy_var_val, point_data, - self.params['line_type']) - # calculate bootstrap results + point_data = aggregate_field_values(series_indy_var_val, point_data, self.params['line_type']) + + # Calculate bootstrap results for the point bootstrap_results = self._get_bootstrapped_stats(point_data, axis) - # save bootstrap results point_to_distrib[point] = bootstrap_results n_stats = len(point_data) + safe_log(logger, "debug", f"Bootstrap results calculated for point {point_ind + 1}") else: - # calculate bootstrap results for the derived point - bootstrap_results = self._get_bootstrapped_stats_for_derived( - point, - point_to_distrib, - axis) + # Process derived points + safe_log(logger, "debug", f"Processing derived point: {point}") + bootstrap_results = self._get_bootstrapped_stats_for_derived(point, point_to_distrib, axis) n_stats = 0 - # save results to the output data frame + # Save results to the output data frame out_frame.loc[point_ind, 'fcst_var'] = fcst_var out_frame.loc[point_ind, 'stat_value'] = bootstrap_results.value out_frame.loc[point_ind, 'stat_btcl'] = bootstrap_results.lower_bound out_frame.loc[point_ind, 'stat_btcu'] = bootstrap_results.upper_bound out_frame.loc[point_ind, 'nstats'] = n_stats - + safe_log(logger, "debug", f"Results saved for point {point_ind + 1}") else: + safe_log(logger, "warning", "Input data is empty. Returning an empty DataFrame.") out_frame = pd.DataFrame() + + safe_log(logger, "info", "Completed stat calculations for axis '%s'") return out_frame def calculate_stats_and_ci(self): @@ -1167,62 +1536,85 @@ def calculate_stats_and_ci(self): Writes output data to the file """ + logger = self.logger + safe_log(logger, "info", "Starting calculation of statistics and confidence intervals.") - # set random seed if present + # Set random seed if present if self.params['random_seed'] is not None and self.params['random_seed'] != 'None': np.random.seed(self.params['random_seed']) + safe_log(logger, "debug", f"Random seed set to: {self.params['random_seed']}") + # Parse event equalization flag is_event_equal = parse_bool(self.params['event_equal']) + safe_log(logger, "debug", f"Event equalization flag set to: {is_event_equal}") + # Handle line type 'pct' by appending specific columns if self.params['line_type'] == 'pct': - self.column_names = np.append(self.column_names, 'T') - self.column_names = np.append(self.column_names, 'oy_total') - self.column_names = np.append(self.column_names, 'o_bar') + safe_log(logger, "debug", "Adding additional columns for 'pct' line type.") + self.column_names = np.append(self.column_names, ['T', 'oy_total', 'o_bar']) - # perform grouping + # Perform grouping for series_val_1 + safe_log(logger, "debug", "Starting grouping for series_val_1.") series_val = self.params['series_val_1'] group_to_value_index = 1 if series_val: for key in series_val.keys(): for val in series_val[key]: if GROUP_SEPARATOR in val: - new_name = 'Group_y1_' + str(group_to_value_index) + new_name = f'Group_y1_{group_to_value_index}' self.group_to_value[new_name] = val - group_to_value_index = group_to_value_index + 1 + group_to_value_index += 1 + safe_log(logger, "debug", f"Group created: {new_name} -> {val}") + # Perform grouping for series_val_2 + safe_log(logger, "debug", "Starting grouping for series_val_2.") series_val = self.params['series_val_2'] if series_val: group_to_value_index = 1 - if series_val: - for key in series_val.keys(): - for val in series_val[key]: - if GROUP_SEPARATOR in val: - new_name = 'Group_y2_' + str(group_to_value_index) - self.group_to_value[new_name] = val - group_to_value_index = group_to_value_index + 1 - - # perform EE if needed + for key in series_val.keys(): + for val in series_val[key]: + if GROUP_SEPARATOR in val: + new_name = f'Group_y2_{group_to_value_index}' + self.group_to_value[new_name] = val + group_to_value_index += 1 + safe_log(logger, "debug", f"Group created: {new_name} -> {val}") + + # Perform event equalization if required if is_event_equal: - self.input_data = perform_event_equalization(self.params, self.input_data) + safe_log(logger, "debug", "Performing event equalization.") + self.input_data = perform_event_equalization(self.params, self.input_data, logger=logger) + safe_log(logger, "info", "Event equalization completed.") - # get results for axis1 + # Calculate statistics for axis 1 + safe_log(logger, "info", "Calculating statistics for axis 1.") out_frame = self._proceed_with_axis("1") + safe_log(logger, "debug", f"Axis 1 results shape: {out_frame.shape}") - # get results for axis2 if needed + # Calculate statistics for axis 2 if needed if self.params['series_val_2']: - axis_2_frame= self._proceed_with_axis("2") + safe_log(logger, "info", "Calculating statistics for axis 2.") + axis_2_frame = self._proceed_with_axis("2") out_frame = pd.concat([out_frame, axis_2_frame], ignore_index=True) + safe_log(logger, "debug", f"Axis 2 results shape: {axis_2_frame.shape}") + # Prepare to write the results to file header = True mode = 'w' - if 'append_to_file' in self.params.keys() and self.params['append_to_file'] == 'True': header = False mode = 'a' - - export_csv = out_frame.to_csv(self.params['agg_stat_output'], - index=None, header=header, mode=mode, - sep="\t", na_rep="NA", float_format='%.'+ str(PRECISION) +'f') + safe_log(logger, "debug", f"Writing mode set to: {mode}, header: {header}") + print(out_frame) + # Write the output to a CSV file + output_file = self.params['agg_stat_output'] + safe_log(logger, "info", f"Writing results to file: {output_file}") + try: + out_frame.to_csv(output_file, + index=None, header=header, mode=mode, + sep="\t", na_rep="NA", float_format='%.' + str(PRECISION) + 'f') + safe_log(logger, "info", f"Successfully wrote results to {output_file}") + except Exception as e: + safe_log(logger, "error", f"Error writing to file {output_file}: {e}") if __name__ == "__main__": diff --git a/metcalcpy/agg_stat_bootstrap.py b/metcalcpy/agg_stat_bootstrap.py index 10ffaeb6..cc6104ec 100644 --- a/metcalcpy/agg_stat_bootstrap.py +++ b/metcalcpy/agg_stat_bootstrap.py @@ -49,7 +49,8 @@ from metcalcpy.util.mode_3d_volrat_statistics import * from metcalcpy.util.mode_3d_ratio_statistics import * from metcalcpy.util.utils import is_string_integer, parse_bool, sort_data, is_string_strictly_float - +from metcalcpy.logging_config import setup_logging +from metcalcpy.util.safe_log import safe_log class AggStatBootstrap: """A class that performs aggregation statistic logic fot MODE and MTD ratio statistics on input data frame. @@ -68,7 +69,9 @@ def __init__(self, in_params): Args: in_params - input parameters as a dictionary """ - + self.logger = setup_logging(in_params) + logger = self.logger + safe.logger(logger, "debug", "Initializing AggStatBootstrap with parameters.") self.statistic = None self.derived_name_to_values = {} self.params = in_params @@ -90,40 +93,54 @@ def _init_out_frame(self, series_fields, series): Returns: pandas data frame """ + logger = self.logger + safe.logger(logger, "debug", "Initializing output data frame.") result = pd.DataFrame() row_number = len(series) + safe.logger(logger, "debug", f"Number of rows to initialize: {row_number}") # fill series variables and values for field_ind, field in enumerate(series_fields): result[field] = [row[field_ind] for row in series] - + safe.logger(logger, "debug", f"Field '{field}' initialized with {len(result[field])} entries.") # fill the stats and CI values placeholders with None result['fcst_var'] = [None] * row_number result['stat_value'] = [None] * row_number result['stat_btcl'] = [None] * row_number result['stat_btcu'] = [None] * row_number result['nstats'] = [None] * row_number + + safe.logger(logger, "debug", "Stats and confidence interval placeholders added.") + safe.logger(logger, "debug", f"DataFrame initialized with columns: {result.columns.tolist()}") + return result def _proceed_with_axis(self, axis="1"): + + logger = self.logger + safe.logger(logger, "info", f"Proceeding with axis: {axis}") if not self.input_data.empty: # identify all possible points values by adding series values, indy values # and statistics and then permute them + safe.logger(logger, "debug", "Input data is not empty. Proceeding with calculations.") indy_vals = self.params['indy_vals'] series_val = self.params['series_val_' + axis] all_fields_values = series_val.copy() all_fields_values[self.params['indy_var']] = indy_vals all_fields_values['stat_name'] = self.params['list_stat_' + axis] all_points = list(itertools.product(*all_fields_values.values())) + safe.logger(logger, "debug", f"All points generated: {len(all_points)} points created for axis {axis}.") fcst_var = None if len(self.params['fcst_var_val_' + axis]) > 0 and 'fcst_var' in self.input_data.columns: fcst_var = list(self.params['fcst_var_val_' + axis].keys())[0] - + safe.logger(logger, "debug", f"Forecast variable identified: {fcst_var}") cases = [] out_frame = self._init_out_frame(all_fields_values.keys(), all_points) + safe.logger(logger, "debug", f"Output DataFrame initialized with {len(out_frame)} rows.") point_to_distrib = {} # run the bootstrap flow for each independent variable value for indy_val in indy_vals: + safe.logger(logger, "debug", f"Processing independent value: {indy_val}") # extract the records for the current indy value if is_string_integer(indy_val): filtered_by_indy_data = \ @@ -138,6 +155,7 @@ def _proceed_with_axis(self, axis="1"): all_fields_values = series_val.copy() all_points = list(itertools.product(*all_fields_values.values())) + safe.logger(logger, "debug", f"Number of points for independent value '{indy_val}': {len(all_points)}.") for point in all_points: all_filters = [] @@ -164,6 +182,7 @@ def _proceed_with_axis(self, axis="1"): # use numpy to select the rows where any record evaluates to True mask = np.array(all_filters).all(axis=0) point_data = filtered_by_indy_data.loc[mask] + safe.logger(logger, "debug", f"Point data filtered for point {point}. Number of records: {len(point_data)}") # build a list of cases to sample fcst_valid = point_data.loc[:, 'fcst_valid'].astype(str) @@ -174,6 +193,7 @@ def _proceed_with_axis(self, axis="1"): # calculate bootstrap for cases for stat_upper in self.params['list_stat_' + axis]: self.statistic = stat_upper.lower() + safe.logger(logger, "debug", f"Calculating bootstrap for statistic: {self.statistic}") for point in all_points: all_filters = [] out_frame_filter = [] @@ -198,6 +218,7 @@ def _proceed_with_axis(self, axis="1"): mask_out_frame = np.array(out_frame_filter).all(axis=0) point_data = filtered_by_indy_data.loc[mask] bootstrap_results = self._get_bootstrapped_stats(point_data, cases) + safe.logger(logger, "debug", f"Bootstrap results calculated for point {point}: {bootstrap_results.value}") # save bootstrap results point_to_distrib[point] = bootstrap_results n_stats = len(point_data) @@ -214,19 +235,32 @@ def _proceed_with_axis(self, axis="1"): out_frame.loc[index, 'stat_btcl'] = bootstrap_results.lower_bound out_frame.loc[index, 'stat_btcu'] = bootstrap_results.upper_bound out_frame.loc[index, 'nstats'] = n_stats + safe.logger(logger, "debug", f"Results saved to output DataFrame at index {index} for point {point}.") else: out_frame = pd.DataFrame() + safe.logger(logger, "warning", "Input data is empty. Returning an empty DataFrame.") + + safe.logger(logger, "info", f"Completed processing for axis: {axis}") return out_frame def _get_bootstrapped_stats(self, series_data, cases): + logger = self.logger + safe.logger(logger, "info", "Starting bootstrapping process.") + + safe.logger(logger, "debug", "Sorting series data.") self.series_data = sort_data(series_data) + safe.logger(logger, "debug", f"Data sorted. Number of rows: {len(self.series_data)}") if self.params['num_iterations'] == 1: + safe.logger(logger, "info", "Only one iteration specified. Skipping bootstrapping.") stat_val = self._calc_stats(cases)[0] + safe.logger(logger, "debug", f"Statistic calculated: {stat_val}") results = BootstrapResults(lower_bound=None, value=stat_val, upper_bound=None) + safe.logger(logger, "info", "Statistic calculated without bootstrapping.") else: - # need bootstrapping and CI calculation in addition to statistic + # need bootstrapping and CI calculation in addition to + safe.logger(logger, "info", "Performing bootstrapping and confidence interval calculation.") try: results = bootstrap_and_value_mode( self.series_data, @@ -234,11 +268,16 @@ def _get_bootstrapped_stats(self, series_data, cases): stat_func=self._calc_stats, num_iterations=self.params['num_iterations'], num_threads=self.params['num_threads'], - ci_method=self.params['method']) - + ci_method=self.params['method'], + logger=logger + ) + safe.logger(logger, "debug", "Bootstrapping completed successfully.") except KeyError as err: + safe.logger(logger, "error", f"Error during bootstrapping: {err}") results = BootstrapResults(None, None, None) + safe.logger(logger, "info", "Returning empty BootstrapResults due to error.") print(err) + safe.logger(logger, "info", "Bootstrapping process completed.") return results def _calc_stats(self, cases): @@ -253,23 +292,44 @@ def _calc_stats(self, cases): an error """ + logger = self.logger func_name = f'calculate_{self.statistic}' + safe.logger(logger, "info", f"Starting statistic calculation using function: {func_name}") if cases is not None and cases.ndim == 2: # The single value case + safe.logger(logger, "debug", "Processing single-value case.") # build a data frame with the sampled data data_cases = np.asarray(self.series_data['case']) flat_cases = cases.flatten() values = self.series_data[np.in1d(data_cases, flat_cases)].to_numpy() - stat_values = [globals()[func_name](values, self.column_names)] + safe.logger(logger, "debug", f"Number of values selected for single case: {len(values)}") + # Calculate the statistic for each bootstrap iteration + try: + stat_value = globals()[func_name](values, self.column_names, logger=logger) + stat_values.append([stat_value]) + safe.logger(logger, "info", f"Statistic calculated for bootstrap iteration: {stat_value}") + except Exception as e: + safe.logger(logger, "error", f"Error calculating statistic for bootstrap iteration: {e}") + raise + elif cases is not None and cases.ndim == 3: # bootstrapped case stat_values = [] for row in cases: values_ind = self.series_data['case'].isin(row.flatten()) values = self.series_data[values_ind] - stat_values.append([globals()[func_name](values, self.column_names)]) + safe.logger(logger, "debug", f"Number of values selected for bootstrap iteration: {len(values)}") + # Calculate the statistic for each bootstrap iteration + try: + stat_value = globals()[func_name](values, self.column_names, logger=logger) + stat_values.append([stat_value]) + safe.logger(logger, "info", f"Statistic calculated for bootstrap iteration: {stat_value}") + except Exception as e: + safe.logger(logger, "error", f"Error calculating statistic for bootstrap iteration: {e}") + raise else: + safe.logger(logger, "error", "Invalid input for cases. Cannot calculate statistic.") raise KeyError("can't calculate statistic") return stat_values @@ -277,34 +337,48 @@ def calculate_values(self): """ Performs EE if needed followed by aggregation statistic logic Writes output data to the file """ + logger = self.logger + safe.logger(logger, "info", "Starting calculation of values.") if not self.input_data.empty: + safe.logger(logger, "debug", "Input data is not empty. Proceeding with calculations.") if self.params['random_seed'] is not None and self.params['random_seed'] != 'None': + safe.logger(logger, "debug", f"Random seed set to: {self.params['random_seed']}") np.random.seed(self.params['random_seed']) # perform EE if needed is_event_equal = parse_bool(self.params['event_equal']) if is_event_equal: + safe.logger(logger, "info", "Event equalization required. Performing event equalization.") self._perform_event_equalization() + safe.logger(logger, "debug", "Event equalization completed.") # build the case information for each record + safe.logger(logger, "debug", "Building case information for each record.") fcst_valid = self.input_data.loc[:, 'fcst_valid'].astype(str) indy_var = self.input_data.loc[:, self.params['indy_var']].astype(str) self.input_data['case'] = fcst_valid + '#' + indy_var + safe.logger(logger, "debug", "Case information added to the input data.") # get results for axis1 + safe.logger(logger, "info", "Calculating results for axis 1.") out_frame = self._proceed_with_axis("1") if self.params['series_val_2']: + safe.logger(logger, "info", "Series values for axis 2 detected. Calculating results for axis 2.") out_frame = pd.concat([out_frame, self._proceed_with_axis("2")]) + safe.logger(logger, "debug", "Results for axis 2 calculated and combined with axis 1.") else: + safe.logger(logger, "warning", "Input data is empty. Returning an empty DataFrame.") out_frame = pd.DataFrame() header = True mode = 'w' - + safe.logger(logger, "info", f"Exporting results to {self.params['agg_stat_output']}") export_csv = out_frame.to_csv(self.params['agg_stat_output'], index=None, header=header, mode=mode, sep="\t", na_rep="NA") + safe.logger(logger, "info", "Results successfully exported to CSV.") + def _perform_event_equalization(self): """ Performs event equalisation on input data diff --git a/metcalcpy/agg_stat_eqz.py b/metcalcpy/agg_stat_eqz.py index 0102389e..2ecd8694 100644 --- a/metcalcpy/agg_stat_eqz.py +++ b/metcalcpy/agg_stat_eqz.py @@ -41,7 +41,8 @@ from metcalcpy import GROUP_SEPARATOR from metcalcpy.event_equalize_against_values import event_equalize_against_values from metcalcpy.util.utils import parse_bool - +from metcalcpy.logging_config import setup_logging +from metcalcpy.util.safe_log import safe_log class AggStatEventEqz: """A class that performs event equalisation logic on input data @@ -58,6 +59,9 @@ class AggStatEventEqz: """ def __init__(self, in_params): + self.logger = setup_logging(in_params) + logger = self.logger + safe_log(logger, "debug", "Initializing AggStatEventEqz with parameters.") self.params = in_params self.input_data = pd.read_csv( @@ -72,33 +76,50 @@ def __init__(self, in_params): def calculate_values(self): """Performs event equalisation if needed and saves equalized data to the file. """ + logger = self.logger is_event_equal = parse_bool(self.params['event_equal']) + safe_log(logger, "debug", f"Event equalization flag is set to: {is_event_equal}") # check if EE is needed if not self.input_data.empty and is_event_equal: - # read previously calculated cases - prev_cases = pd.read_csv( - self.params['agg_stat_input_ee'], - header=[0], - sep='\t' - ) + safe_log(logger, "info", "Event equalization is required. Starting EE process.") + try: + safe_log(logger, "debug", f"Reading previous cases from: {self.params['agg_stat_input_ee']}") + prev_cases = pd.read_csv( + self.params['agg_stat_input_ee'], + header=[0], + sep='\t' + ) + safe_log(logger, "debug", f"Successfully read previous cases. Number of records: {len(prev_cases)}") + except FileNotFoundError as e: + safe_log(logger, "error", f"File not found: {self.params['agg_stat_input_ee']}") + raise + except Exception as e: + safe_log(logger, "error", f"Error reading previous cases: {self.params['agg_stat_input_ee']}") + raise # perform for axis 1 + safe_log(logger, "info", "Performing event equalization for axis 1.") output_ee_data = self.perform_ee_on_axis(prev_cases, '1') + safe_log(logger, "debug", "Event equalization for axis 1 completed.") # perform for axis 2 if self.params['series_val_2']: + safe_log(logger, "info", "Series values for axis 2 detected. Performing event equalization for axis 2.") output_ee_data = pd.concat([output_ee_data, self.perform_ee_on_axis(prev_cases, '2')]) + safe_log(logger, "debug", "Event equalization for axis 2 completed.") else: output_ee_data = self.input_data if self.input_data.empty: - logging.info( + safe_log(logger, "warning", 'Event equalisation was not performed because the input data is empty.' ) output_ee_data.to_csv(self.params['agg_stat_output'], index=None, header=True, mode='w', sep="\t", na_rep="NA") + output_file = self.params['agg_stat_output'] + safe_log(logger, "info", f"Data successfully saved to {output_file}.") def perform_ee_on_axis(self, prev_cases, axis='1'): """Performs event equalisation against previously calculated cases for the selected axis @@ -106,17 +127,24 @@ def perform_ee_on_axis(self, prev_cases, axis='1'): A data frame that contains equalized records """ warnings.filterwarnings('error') + logger = self.logger + safe_log(logger, "debug", f"Performing event equalization for axis {axis}.") output_ee_data = pd.DataFrame() for fcst_var, fcst_var_stats in self.params['fcst_var_val_' + axis].items(): - for series_var, series_var_vals in self.params['series_val_' + axis].items(): + safe_log(logger, "debug", f"Processing forecast variable: {fcst_var}") + for series_var, series_var_vals in self.params['series_val_' + axis].items(): + safe_log(logger, "debug", f"Processing series variable: {series_var}") + # remove group separator from series values series_var_vals_no_group = [] for val in series_var_vals: split_val = val.split(GROUP_SEPARATOR) series_var_vals_no_group.extend(split_val) + safe_log(logger, "debug", f"Series variable values (no group): {series_var_vals_no_group}") # filter input data based on fcst_var, statistic and all series variables values + safe_log(logger, "debug", "Filtering input data for event equalization.") series_data_for_ee = self.input_data[ (self.input_data['fcst_var'] == fcst_var) & (self.input_data[series_var].isin(series_var_vals_no_group)) @@ -127,20 +155,28 @@ def perform_ee_on_axis(self, prev_cases, axis='1'): (prev_cases['fcst_var'] == fcst_var) & (prev_cases[series_var].isin(series_var_vals_no_group)) ] + safe_log(logger, "debug", f"Number of records after filtering input data: {len(series_data_for_ee)}") # get unique cases from filtered previous cases - + safe_log(logger, "debug", "Filtering previous cases for event equalization.") series_data_for_prev_cases_unique = series_data_for_prev_cases['equalize'].unique() + safe_log(logger, "debug", f"Unique previous cases for event equalization: {len(series_data_for_prev_cases_unique)}") # perform ee + safe_log(logger, "info", "Performing event equalization against previous cases.") series_data_after_ee = event_equalize_against_values( series_data_for_ee, - series_data_for_prev_cases_unique) + series_data_for_prev_cases_unique, logger=logger) + safe_log(logger, "debug", f"Number of records after event equalization: {len(series_data_after_ee)}") # append EE data to result if output_ee_data.empty: output_ee_data = series_data_after_ee + safe_log(logger, "debug", "Initialized output data with first set of event equalized data.") else: output_ee_data = pd.concat([output_ee_data, series_data_after_ee]) + safe_log(logger, "debug", "Appended event equalized data to the output.") + + safe_log(logger, "info", f"Event equalization for axis {axis} completed. Total records: {len(output_ee_data)}") return output_ee_data diff --git a/metcalcpy/agg_stat_event_equalize.py b/metcalcpy/agg_stat_event_equalize.py index 9a5d49fc..5507d608 100644 --- a/metcalcpy/agg_stat_event_equalize.py +++ b/metcalcpy/agg_stat_event_equalize.py @@ -41,6 +41,8 @@ from metcalcpy import GROUP_SEPARATOR from metcalcpy.event_equalize import event_equalize +from metcalcpy.logging_config import setup_logging +from metcalcpy.util.safe_log import safe_log class AggStatEventEqualize: """A class that performs event equalisation logic on input data @@ -56,6 +58,9 @@ class AggStatEventEqualize: """ def __init__(self, in_params): + self.logger = setup_logging(in_params) + logger = self.logger + safe_log(logger, "debug", "Initializing AggStatEventEqualize with parameters.") self.params = in_params self.input_data = pd.read_csv( @@ -70,84 +75,112 @@ def __init__(self, in_params): def calculate_values(self): """Performs event equalisation if needed and saves equalized data to the file. """ - if not self.input_data.empty: + logger = self.logger + safe_log(logger, "info", "Event equalization is required. Starting EE process.") + if not self.input_data.empty: + safe_log(logger, "info", "Input data is available. Proceeding with event equalization.") # list all fixed variables if 'fixed_vars_vals_input' in self.params: + safe_log(logger, "info", "Fixed variables detected. Preparing permutations of fixed values.") fix_vals_permuted_list = [] for key in self.params['fixed_vars_vals_input']: vals_permuted = list(itertools.product(*self.params['fixed_vars_vals_input'][key].values())) fix_vals_permuted_list.append(vals_permuted) + safe_log(logger, "debug", f"Fixed values for {key}: {vals_permuted}") fix_vals_permuted = [item for sublist in fix_vals_permuted_list for item in sublist] - + safe_log(logger, "debug", f"All fixed value permutations: {fix_vals_permuted}") else: fix_vals_permuted = [] - + safe_log(logger, "info", "No fixed variables provided for event equalization.") # perform EE for each forecast variable on y1 axis + safe_log(logger, "info", "Running event equalization on axis 1.") output_ee_data = self.run_ee_on_axis(fix_vals_permuted, '1') + safe_log(logger, "debug", f"Event equalization for axis 1 completed. Number of records: {len(output_ee_data)}") # if the second Y axis is present - run event equalizer on Y1 # and then run event equalizer on Y1 and Y2 equalized data if self.params['series_val_2']: + safe_log(logger, "info", "Series values for axis 2 detected. Running event equalization on axis 2.") output_ee_data_2 = self.run_ee_on_axis(fix_vals_permuted, '2') - + safe_log(logger, "debug", f"Event equalization for axis 2 completed. Number of records: {len(output_ee_data_2)}") output_ee_data = output_ee_data.drop('equalize', axis=1) output_ee_data_2 = output_ee_data_2.drop('equalize', axis=1) + safe_log(logger, "info", "Concatenating event equalized data from axis 1 and axis 2.") warnings.simplefilter(action='error', category=FutureWarning) all_ee_records = pd.concat([output_ee_data, output_ee_data_2]).reindex() all_series_vars = {} for key in self.params['series_val_2']: all_series_vars[key] = np.unique(self.params['series_val_2'][key] + self.params['series_val_2'][key]) + safe_log(logger, "debug", f"Series variable values for {key}: {all_series_vars[key]}") + safe_log(logger, "info", "Running event equalization on all data.") output_ee_data = event_equalize(all_ee_records, self.params['indy_var'], all_series_vars, list(self.params['fixed_vars_vals_input'].keys()), fix_vals_permuted, True, - False) + False, logger=logger) + safe_log(logger, "debug", f"Final event equalization completed. Number of records: {len(output_ee_data)}") else: output_ee_data = pd.DataFrame() + safe_log(logger, "warning", "Input data is empty. No event equalization will be performed.") + header = True mode = 'w' + output_file = self.params['agg_stat_output'] + output_ee_data.to_csv(self.params['agg_stat_output'], index=None, header=header, mode=mode, sep="\t", na_rep="NA") + safe_log(logger, "info", f"Data successfully saved to {output_file}.") def run_ee_on_axis(self, fix_vals_permuted, axis='1'): """Performs event equalisation against previously calculated cases for the selected axis Returns: A data frame that contains equalized records """ + logger = self.logger + safe_log(logger, "debug", f"Running event equalization for axis {axis}.") output_ee_data = pd.DataFrame() for series_var, series_var_vals in self.params['series_val_' + axis].items(): + safe_log(logger, "debug", f"Processing series variable: {series_var}") # ungroup series value series_var_vals_no_group = [] for val in series_var_vals: split_val = val.split(GROUP_SEPARATOR) series_var_vals_no_group.extend(split_val) - + safe_log(logger, "debug", f"Ungrouped series variable values: {series_var_vals_no_group}") # filter input data based on fcst_var, statistic and all series variables values + safe_log(logger, "debug", "Filtering input data for event equalization.") series_data_for_ee = self.input_data[ self.input_data[series_var].isin(series_var_vals_no_group) ] + safe_log(logger, "debug", f"Number of records after filtering: {len(series_data_for_ee)}") + # perform EE on filtered data + safe_log(logger, "info", f"Performing event equalization on {series_var}.") series_data_after_ee = \ event_equalize(series_data_for_ee, self.params['indy_var'], self.params['series_val_' + axis], list(self.params['fixed_vars_vals_input'].keys()), - fix_vals_permuted, True, False) + fix_vals_permuted, True, False, logger=logger) + safe_log(logger, "debug", f"Number of records after event equalization for {series_var}: {len(series_data_after_ee)}") # append EE data to result if output_ee_data.empty: output_ee_data = series_data_after_ee + safe_log(logger, "debug", "Initialized output data with the first set of event equalized data.") else: warnings.simplefilter(action="error", category=FutureWarning) output_ee_data = pd.concat([output_ee_data, series_data_after_ee]) - + safe_log(logger, "debug", "Appended event equalized data to the output.") + + safe_log(logger, "info", f"Event equalization for axis {axis} completed. Total records: {len(output_ee_data)}") return output_ee_data diff --git a/metcalcpy/bootstrap.py b/metcalcpy/bootstrap.py index f105165e..812ebc93 100644 --- a/metcalcpy/bootstrap.py +++ b/metcalcpy/bootstrap.py @@ -16,7 +16,8 @@ from collections.abc import Iterable import multiprocessing as _multiprocessing import scipy.sparse as _sparse - +from metcalcpy.logging_config import setup_logging +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' @@ -102,7 +103,7 @@ def set_distributions(self, distributions): def bootstrap_and_value(values, stat_func, alpha=0.05, num_iterations=1000, iteration_batch_size=None, num_threads=1, ci_method='perc', - save_data=True, save_distributions=False, block_length: int = 1, eclv: bool = False): + save_data=True, save_distributions=False, block_length: int = 1, eclv: bool = False, logger=None): """Returns bootstrap estimate. Can do the independent and identically distributed (IID) or Circular Block Bootstrap (CBB) methods depending on the block_length Args: @@ -155,13 +156,13 @@ def do_division(distr): stat_func_lists, num_iterations, iteration_batch_size, - num_threads, block_length) + num_threads, block_length, logger=logger) bootstrap_dist = do_division(*distributions) if eclv: - result = _get_confidence_interval_and_value_eclv(bootstrap_dist, stat_val, alpha, ci_method) + result = _get_confidence_interval_and_value_eclv(bootstrap_dist, stat_val, alpha, ci_method, logger=logger) else: - result = _get_confidence_interval_and_value(bootstrap_dist, stat_val, alpha, ci_method) + result = _get_confidence_interval_and_value(bootstrap_dist, stat_val, alpha, ci_method,logger=logger) if save_data: result.set_original_values(values) if save_distributions: @@ -170,7 +171,7 @@ def do_division(distr): def _bootstrap_distribution_cbb(values_lists, stat_func_lists, - num_iterations, iteration_batch_size, num_threads, block_length=1): + num_iterations, iteration_batch_size, num_threads, block_length=1, logger=None): '''Returns the simulated bootstrap distribution. The idea is to sample the same indexes in a bootstrap re-sample across all arrays passed into values_lists. @@ -205,14 +206,16 @@ def _bootstrap_distribution_cbb(values_lists, stat_func_lists, the bootsrapped values. ''' - _validate_arrays(values_lists) + safe_log(logger, "info", "Starting circular block bootstrap distribution process.") + safe_log(logger, "debug","Validating input arrays.") + _validate_arrays(values_lists, logger=logger) if iteration_batch_size is None: iteration_batch_size = num_iterations num_iterations = int(num_iterations) iteration_batch_size = int(iteration_batch_size) - + safe_log(logger, "debug",f"Iteration batch size set to {iteration_batch_size}.") num_threads = int(num_threads) if num_threads == -1: @@ -220,30 +223,32 @@ def _bootstrap_distribution_cbb(values_lists, stat_func_lists, if num_threads <= 1: results = _bootstrap_sim_cbb(values_lists, stat_func_lists, - num_iterations, iteration_batch_size, None, block_length) + num_iterations, iteration_batch_size, None, block_length, logger=logger) else: pool = _multiprocessing.Pool(num_threads) iter_per_job = _np.ceil(num_iterations * 1.0 / num_threads) + safe_log(logger, "debug",f"Iterations per thread: {iter_per_job}.") results = [] for seed in _np.random.randint(0, 2 ** 32 - 1, num_threads): + safe_log(logger, "debug",f"Starting thread with seed {seed}.") r = pool.apply_async(_bootstrap_sim_cbb, (values_lists, stat_func_lists, iter_per_job, iteration_batch_size, seed, block_length)) results.append(r) - + safe_log(logger, "debug","Collecting results from all threads.") results = _np.hstack([res.get() for res in results]) pool.close() - + return results def bootstrap_and_value_mode(values, cases, stat_func, alpha=0.05, num_iterations=1000, iteration_batch_size=None, num_threads=1, ci_method='perc', - save_data=True, save_distributions=False, block_length=1): + save_data=True, save_distributions=False, block_length=1, logger=None): """Returns bootstrap estimate. Args: values: numpy array of values to bootstrap @@ -288,26 +293,38 @@ def bootstrap_and_value_mode(values, cases, stat_func, alpha=0.05, def do_division(distr): return distr - data_cases = _np.asarray(values['case']) - flat_cases = cases.flatten() - values_current = values[_np.in1d(data_cases, flat_cases)].to_numpy() - stat_val = stat_func(values_current)[0] - distributions = _bootstrap_distribution_cbb(values_lists, - stat_func_lists, - num_iterations, - iteration_batch_size, - num_threads, block_length) - - bootstrap_dist = do_division(*distributions) - result = _get_confidence_interval_and_value(bootstrap_dist, stat_val, alpha, ci_method) - if save_data: - result.set_original_values(values) - if save_distributions: - result.set_distributions(bootstrap_dist.flatten('F')) + try: + data_cases = _np.asarray(values['case']) + flat_cases = cases.flatten() + values_current = values[_np.in1d(data_cases, flat_cases)].to_numpy() + safe_log(logger, "debug",f"Selected {len(values_current)} cases for calculation.") + stat_val = stat_func(values_current)[0] + safe_log(logger, "info", f"Calculated statistic value: {stat_val}") + distributions = _bootstrap_distribution_cbb(values_lists, + stat_func_lists, + num_iterations, + iteration_batch_size, + num_threads, block_length, logger=logger) + safe_log(logger, "debug",f"Bootstrap distributions: {distributions}") + + bootstrap_dist = do_division(*distributions) + safe_log(logger, "debug",f"Result after division operation: {bootstrap_dist}") + result = _get_confidence_interval_and_value(bootstrap_dist, stat_val, alpha, ci_method, logger=logger) + safe_log(logger, "info", f"Confidence intervals calculated: {result.lower_bound}, {result.upper_bound}") + + if save_data: + safe_log(logger, "debug","Saving original values to the result.") + result.set_original_values(values) + if save_distributions: + safe_log(logger, "debug","Saving bootstrap distributions to the result.") + result.set_distributions(bootstrap_dist.flatten('F')) + except Exception as e: + safe_log(logger, "error", f"An error occurred during the bootstrap and calculation process: {e}") + raise return result -def _get_confidence_interval_and_value(bootstrap_dist, stat_val, alpha, ci_method): +def _get_confidence_interval_and_value(bootstrap_dist, stat_val, alpha, ci_method, logger=None): """Get the bootstrap confidence interval for a given distribution. Args: bootstrap_dist: numpy array of bootstrap results from @@ -323,16 +340,24 @@ def _get_confidence_interval_and_value(bootstrap_dist, stat_val, alpha, ci_metho # TODO Only percentile method for the confident intervals is implemented if stat_val is None: + safe_log(logger, "warning", "Statistic value is None. Setting confidence interval method to 'None'.") ci_method = "None" if ci_method == 'pivotal': - low = 2 * stat_val - _np.percentile(bootstrap_dist, 100 * (1 - alpha / 2.)) + safe_log(logger, "info", "Using pivotal method to calculate confidence intervals.") + try: + low = 2 * stat_val - _np.percentile(bootstrap_dist, 100 * (1 - alpha / 2.)) + high = 2 * stat_val - _np.percentile(bootstrap_dist, 100 * (alpha / 2.)) + except Exception as e: + safe_log(logger, "error", f"An error occurred during the calculation of confidence intervals: {e}") + raise val = stat_val - high = 2 * stat_val - _np.percentile(bootstrap_dist, 100 * (alpha / 2.)) elif ci_method == 'perc': + safe_log(logger, "info", "Using percentile method to calculate confidence intervals.") # check if All values of bootstrap_dist are equal and if YES - # display a warning and do not calculate CIs - like boot.ci in R if _all_the_same(bootstrap_dist): + safe_log(logger, "warning", f"All values of the bootstrap distribution are equal to {bootstrap_dist[0]}. Cannot calculate confidence intervals.") print(f'All values of t are equal to {bootstrap_dist[0]}. Cannot calculate confidence intervals') low = None high = None @@ -340,17 +365,20 @@ def _get_confidence_interval_and_value(bootstrap_dist, stat_val, alpha, ci_metho bd = bootstrap_dist[bootstrap_dist != _np.array([None])] low = _np.percentile(bd, 100 * (alpha / 2.), method='linear') high = _np.percentile(bd, 100 * (1 - alpha / 2.), method='linear') + safe_log(logger, "debug",f"Percentile method results: low={low}, stat_val={stat_val}, high={high}") val = stat_val else: + safe_log(logger, "warning", "No valid confidence interval method selected.") low = None val = None high = None + safe_log(logger, "info", f"Finished confidence interval calculation: low={low}, value={val}, high={high}") return BootstrapResults(lower_bound=low, value=val, upper_bound=high) -def _get_confidence_interval_and_value_eclv(bootstrap_dist, stat_val, alpha, ci_method): +def _get_confidence_interval_and_value_eclv(bootstrap_dist, stat_val, alpha, ci_method, logger=None): """Get the bootstrap confidence interval for a given distribution for the Economic Cost Loss Relative Value Args: bootstrap_dist: numpy array of bootstrap results from @@ -364,13 +392,14 @@ def _get_confidence_interval_and_value_eclv(bootstrap_dist, stat_val, alpha, ci_ """ # TODO Only percentile method for the confident intervals is implemented - if stat_val is None: + safe_log(logger, "warning", "Statistic value is None. Skipping confidence interval calculation.") val = None low = None high = None else: + safe_log(logger, "debug","Preparing bootstrap distribution for confidence interval calculation.") bd = bootstrap_dist[bootstrap_dist != _np.array([None])] all_values = [] for dist_member in bd: @@ -381,6 +410,8 @@ def _get_confidence_interval_and_value_eclv(bootstrap_dist, stat_val, alpha, ci_ none_in_values = len(stat_val['V']) != sum(x is not None for x in stat_val['V']) stat_btcl = [None] * steps_len stat_btcu = [None] * steps_len + safe_log(logger, "debug",f"Calculated steps length: {steps_len}.") + safe_log(logger, "debug",f"Checking if values contain None: {none_in_values}.") for ind in range(steps_len): low = None @@ -389,9 +420,12 @@ def _get_confidence_interval_and_value_eclv(bootstrap_dist, stat_val, alpha, ci_ if ci_method == 'pivotal': low = 2 * stat_val - _np.percentile(column, 100 * (1 - alpha / 2.)) high = 2 * stat_val - _np.percentile(column, 100 * (alpha / 2.)) + safe_log(logger, "debug",f"Pivotal method result for step {ind}: low={low}, high={high}.") elif ci_method == 'perc': + safe_log(logger, "info", "Using percentile method for confidence interval calculation.") if _all_the_same(column): + safe_log(logger, "warning", f"All values of column at step {ind} are equal to {column[0]}. Skipping confidence interval calculation.") print(f'All values of t are equal to {column[0]}. Cannot calculate confidence intervals') low = None high = None @@ -406,6 +440,7 @@ def _get_confidence_interval_and_value_eclv(bootstrap_dist, stat_val, alpha, ci_ low = stat_btcl high = stat_btcu + safe_log(logger, "info", f"Finished confidence interval calculation: value={val}, lower_bounds={low}, upper_bounds={high}.") return BootstrapResults(lower_bound=low, value=val, upper_bound=high) @@ -420,8 +455,7 @@ def flatten(lis): yield item -def _bootstrap_sim_cbb(values_lists, stat_func_lists, num_iterations, - iteration_batch_size, seed, block_length=1): +def _bootstrap_sim_cbb(values_lists, stat_func_lists, num_iterations, iteration_batch_size, seed, block_length=1, logger=None): """Returns simulated bootstrap distribution. Can do the independent and identically distributed (IID) or Circular Block Bootstrap (CBB) methods depending on the block_length Args: @@ -448,25 +482,32 @@ def _bootstrap_sim_cbb(values_lists, stat_func_lists, num_iterations, """ if seed is not None: + safe_log(logger, "debug",f"Setting random seed: {seed}") _np.random.seed(seed) num_iterations = int(num_iterations) iteration_batch_size = int(iteration_batch_size) - + safe_log(logger, "info", f"Number of iterations: {num_iterations}, iteration batch size: {iteration_batch_size}, block length: {block_length}.") results = [[] for _ in values_lists] for rng in range(0, num_iterations, iteration_batch_size): max_rng = min(iteration_batch_size, num_iterations - rng) - - values_sims = _generate_distributions_cbb(values_lists, max_rng, block_length) - + safe_log(logger, "debug",f"Running bootstrap iteration batch from {rng} to {rng + max_rng}.") + try: + values_sims = _generate_distributions_cbb(values_lists, max_rng, block_length, logger=logger) + safe_log(logger, "debug",f"Generated {max_rng} simulated distributions.") + except Exception as e: + safe_log(logger, "error", f"Error generating distributions in bootstrap: {e}") + raise + for i, values_sim, stat_func in zip(range(len(values_sims)), values_sims, stat_func_lists): + safe_log(logger, "debug",f"Calculating statistic for distribution set {i}.") results[i].extend(stat_func(values_sim)) - + safe_log(logger, "info", "Completed bootstrap simulation.") return _np.array(results) -def _generate_distributions_cbb(values_lists, num_iterations, block_length=1): +def _generate_distributions_cbb(values_lists, num_iterations, block_length=1, logger=None): values_shape = values_lists[0].shape[0] ids = _np.random.choice( values_shape, @@ -479,6 +520,7 @@ def apply_cbb(row): Applyes Circular Block Bootstrap (CBB) method to each row :param row: """ + counter = 0 init_val = row[0] for ind, val in enumerate(row): @@ -500,9 +542,11 @@ def apply_cbb(row): if block_length > 1: # uss CBB + safe_log(logger, "info", f"Applying Circular Block Bootstrap (CBB) with block length: {block_length}") ids = _np.apply_along_axis(apply_cbb, axis=1, arr=ids) results = [values[ids] for values in values_lists] + safe_log(logger, "info", "CBB applied to all rows.") return results @@ -525,29 +569,36 @@ def _all_the_same(elements): return result -def _validate_arrays(values_lists): +def _validate_arrays(values_lists, logger=None): t = values_lists[0] t_type = type(t) + safe_log(logger, "debug",f"Validating arrays. First array type: {t_type}, shape: {t.shape}") if not isinstance(t, _sparse.csr_matrix) and not isinstance(t, _np.ndarray): - raise ValueError(('The arrays must either be of type ' - 'scipy.sparse.csr_matrix or numpy.array')) + safe_log(logger, "error", "Arrays must either be of type scipy.sparse.csr_matrix or numpy.array.") + raise ValueError('The arrays must either be of type ' + 'scipy.sparse.csr_matrix or numpy.array') for _, values in enumerate(values_lists[1:]): if not isinstance(values, t_type): + safe_log(logger, "error", f"Array at index {index} is not of the same type as the first array.") raise ValueError('The arrays must all be of the same type') if t.shape != values.shape: + safe_log(logger, "error", f"Array at index {index} has a different shape: {values.shape}. Expected: {t.shape}.") raise ValueError('The arrays must all be of the same shape') if isinstance(t, _sparse.csr_matrix): if values.shape[0] > 1: - raise ValueError(('The sparse matrix must have shape 1 row X N' - ' columns')) + safe_log(logger, "error", "Sparse matrix must have shape 1 row X N columns.") + raise ValueError('The sparse matrix must have shape 1 row X N' + ' columns') if isinstance(t, _sparse.csr_matrix): if _needs_sparse_unification(values_lists): - raise ValueError(('The non-zero entries in the sparse arrays' - ' must be aligned')) + safe_log(logger, "error", "Non-zero entries in the sparse arrays are not aligned.") + raise ValueError('The non-zero entries in the sparse arrays' + ' must be aligned') + safe_log(logger, "info", "Array validation completed successfully.") def _needs_sparse_unification(values_lists): diff --git a/metcalcpy/calc_difficulty_index.py b/metcalcpy/calc_difficulty_index.py index 5e6de792..54050c8a 100644 --- a/metcalcpy/calc_difficulty_index.py +++ b/metcalcpy/calc_difficulty_index.py @@ -21,6 +21,7 @@ import numpy as np from metcalcpy.piecewise_linear import PiecewiseLinear as plin +from metcalcpy.util.safe_log import safe_log __author__ = 'Bill Campbell (NRL) and Lindsay Blank (NCAR)' __version__ = '0.1.0' @@ -31,7 +32,8 @@ # Only allow 2D fields for now FIELD_DIM = 2 -def _input_check(sigmaij, muij, threshold, fieldijn, sigma_over_mu_ref, under_factor): + +def _input_check(sigmaij, muij, threshold, fieldijn, sigma_over_mu_ref, under_factor, logger=None): """ Check for valid input to _difficulty_index. @@ -55,22 +57,33 @@ def _input_check(sigmaij, muij, threshold, fieldijn, sigma_over_mu_ref, under_fa None. """ + safe_log(logger, "debug",f"Checking input parameters: sigmaij shape: {np.shape(sigmaij)}, " + f"muij type: {type(muij)}, threshold: {threshold}, " + f"fieldijn shape: {np.shape(fieldijn)}, sigma_over_mu_ref: {sigma_over_mu_ref}, " + f"under_factor: {under_factor}") assert isinstance(threshold, (int, float, np.int32, np.float32)) + safe_log(logger, "debug", f"Threshold type is valid: {type(threshold)}") assert np.ndim(sigmaij) == FIELD_DIM + safe_log(logger, "debug", f"sigmaij is {FIELD_DIM}D as expected.") assert np.all(sigmaij) >= EPS fieldshape = np.shape(fieldijn) # muij is a scalar or 2D array if isinstance(muij, np.ndarray): # If muij is an array, it must have the same shape as sigmaij assert np.shape(muij) == np.shape(sigmaij) + safe_log(logger, "debug", "muij is a valid array and matches the shape of sigmaij.") + else: + safe_log(logger, "debug", "muij is a scalar.") assert sigma_over_mu_ref >= EPS + safe_log(logger, "debug", "sigma_over_mu_ref is valid.") assert np.shape(sigmaij) == tuple(np.squeeze(fieldshape[0:-1])) + safe_log(logger, "debug", "sigmaij and fieldijn shapes are compatible.") assert isinstance(under_factor, (int, float, np.int32, np.float32)) assert 0.0 <= under_factor <= 1.0 + safe_log(logger, "debug", "under_factor is valid and within range.") - -def _difficulty_index(sigmaij, muij, threshold, fieldijn, Aplin, sigma_over_mu_ref=EPS, under_factor=0.5): +def _difficulty_index(sigmaij, muij, threshold, fieldijn, Aplin, sigma_over_mu_ref=EPS, under_factor=0.5, logger=None): """ Calculates version 6.1 of forecast difficulty index. The threshold terms all penalize equal (or slightly unequal) spread. @@ -101,38 +114,42 @@ def _difficulty_index(sigmaij, muij, threshold, fieldijn, Aplin, sigma_over_mu_r """ # Check for valid input - _input_check(sigmaij, muij, threshold, fieldijn, sigma_over_mu_ref, under_factor) - + _input_check(sigmaij, muij, threshold, fieldijn, sigma_over_mu_ref, under_factor, logger=logger) + safe_log(logger, "debug", "Input check passed successfully.") # Variance term in range 0 to 1 + safe_log(logger, "debug", "Calculating variance term.") sigma_over_mu = sigmaij / muij sigma_over_mu_max = np.nanmax(sigma_over_mu) + safe_log(logger, "debug", f"sigma_over_mu_max: {sigma_over_mu_max}") # Force reference value to be greater than current max of sigmaij / muij sigma_over_mu_ref = np.nanmax([sigma_over_mu_ref, sigma_over_mu_max]) variance_term = sigma_over_mu / sigma_over_mu_ref + safe_log(logger, "debug", f"variance_term calculated, max variance_term: {np.nanmax(variance_term)}") # Depends on under_factor. under_threshold_count =\ np.ma.masked_greater_equal(fieldijn, threshold).count(axis=-1) nmembers = np.shape(fieldijn)[-1] under_prob = under_threshold_count / nmembers - + safe_log(logger, "debug", f"under_threshold_count: {under_threshold_count}, under_prob: {under_prob}") # Threshold term in range 0 to 1 threshold_term = 1.0 - np.abs(1.0 - under_factor - under_prob) + safe_log(logger, "debug", f"threshold_term: {threshold_term}") # Linear taper factor taper_term = Aplin.values(muij) - + safe_log(logger, "debug", f"taper_term: {taper_term}") # Difficulty index is the average of the two terms # multiplied by a linear taper factor dij = 0.5 * taper_term * (variance_term + threshold_term) - + safe_log(logger, "info", f"Difficulty index calculation complete. Max dij: {np.nanmax(dij)}, Min dij: {np.nanmin(dij)}") return dij def forecast_difficulty(sigmaij, muij, threshold, fieldijn, - Aplin, sigma_over_mu_ref=EPS): + Aplin, sigma_over_mu_ref=EPS, logger=None): """ Calls private function _difficulty_index, to calculate version (v6.1) of forecast difficulty index. @@ -160,7 +177,10 @@ def forecast_difficulty(sigmaij, muij, threshold, fieldijn, Larger (> 0.5) means more difficult. """ + safe_log(logger, "debug", f"sigmaij shape: {sigmaij.shape}, muij: {'scalar' if isinstance(muij, float) else muij.shape}, " + f"threshold: {threshold}, fieldijn shape: {fieldijn.shape}, sigma_over_mu_ref: {sigma_over_mu_ref}") if Aplin is None: + safe_log(logger, "info", "No Aplin provided. Creating default Aplin object (version 6.1).") # Default to envelope version 6.1 xunits="feet" A6_1_name = "A6_1" @@ -171,9 +191,12 @@ def forecast_difficulty(sigmaij, muij, threshold, fieldijn, Aplin =\ plin(A6_1_xlist, A6_1_ylist, xunits=xunits, right=A6_1_right, left=A6_1_left, - name=A6_1_name) + name=A6_1_name) + safe_log(logger, "debug", "Default Aplin object created.") + safe_log(logger, "debug", "Calling _difficulty_index function.") dij = _difficulty_index(sigmaij, muij, threshold, fieldijn, - Aplin, sigma_over_mu_ref) + Aplin, sigma_over_mu_ref, logger=logger) + safe_log(logger, "info", "Forecast difficulty index calculation completed.") return dij if __name__ == "__main__": diff --git a/metcalcpy/event_equalize.py b/metcalcpy/event_equalize.py index dfb172b6..ebe3d933 100644 --- a/metcalcpy/event_equalize.py +++ b/metcalcpy/event_equalize.py @@ -21,10 +21,11 @@ import pandas as pd from metcalcpy import GROUP_SEPARATOR, DATE_TIME_REGEX +from metcalcpy.logging_config import setup_logging +from metcalcpy.util.safe_log import safe_log -def event_equalize(series_data, indy_var, series_var_vals, fix_vars, - fix_vals_permuted, equalize_by_indep, multi): +def event_equalize(series_data, indy_var, series_var_vals, fix_vars, fix_vals_permuted, equalize_by_indep, multi, logger=None): """Performs event equalisation. event_equalize assumes that the input series_data contains data indexed by fcst_valid_beg, @@ -49,6 +50,7 @@ def event_equalize(series_data, indy_var, series_var_vals, fix_vars, A data frame that contains equalized records """ pd.options.mode.chained_assignment = None + safe_log(logger, "info", "Starting event equalization.") column_names = list(series_data) exception_columns = ["", "fcst_valid_beg", 'fcst_lead', 'fcst_valid', 'fcst_init', 'fcst_init_beg', 'VALID', 'LEAD'] if isinstance(fix_vars, str): @@ -62,11 +64,14 @@ def event_equalize(series_data, indy_var, series_var_vals, fix_vars, series_data['equalize'] = series_data.loc[:, 'fcst_valid'].astype(str) + ' ' \ + series_data.loc[:, 'fcst_lead'].astype(str) + safe_log(logger, "debug", "Equalization column added to the data frame.") + # add independent variable if needed if equalize_by_indep and indy_var not in exception_columns: series_data['equalize'] = series_data.loc[:, 'equalize'].astype(str) + " " \ + series_data.loc[:, indy_var].astype(str) + safe_log(logger, "debug", "Equalization column added to the data frame.") vars_for_ee = dict() @@ -83,6 +88,7 @@ def event_equalize(series_data, indy_var, series_var_vals, fix_vars, actual_vals = series_val.split(GROUP_SEPARATOR) series_vals_no_groups.extend(actual_vals) vars_for_ee[series_var] = series_vals_no_groups + safe_log(logger, "debug", f"Series variables added for equalization: {vars_for_ee}") # add fixed variables if present if fix_vars: @@ -92,6 +98,7 @@ def event_equalize(series_data, indy_var, series_var_vals, fix_vars, if isinstance(vals, str): vals = [vals] vars_for_ee[fix_var] = vals + safe_log(logger, "debug", f"Fixed variables added for equalization: {vars_for_ee}") # create a list of permutations representing the all variables for vars_for_ee_permuted = list(itertools.product(*vars_for_ee.values())) @@ -116,6 +123,7 @@ def event_equalize(series_data, indy_var, series_var_vals, fix_vars, # if the list contains repetitive values, show a warning if multi is False and len(permutation_data['equalize']) \ != len(set(permutation_data['equalize'])): + safe_log(logger, "warning", f"Non-unique events detected for permutation {permutation}.") print( f"WARNING: eventEqualize() detected non-unique events for {permutation}" f" using [fcst_valid_beg,fcst_lead)]") @@ -143,6 +151,7 @@ def event_equalize(series_data, indy_var, series_var_vals, fix_vars, discarded_cases = pd.concat([discarded_cases, permutation_cases_not_in_common_cases]) # report the discarded records for discarded_case in discarded_cases: + safe_log(logger, "warning", f"Discarding series member with case {discarded_case} for {permutation}") print(f"WARNING: discarding series member with case {discarded_case}" f" for {permutation}") @@ -156,6 +165,7 @@ def event_equalize(series_data, indy_var, series_var_vals, fix_vars, series_data_ee = series_data[equalization_cases_ind] if len(series_data_ee) != len(series_data): + safe_log(logger, "warning", f"Event equalization removed {len(series_data) - len(series_data_ee)} rows.") print(f"WARNING: event equalization removed {len(series_data) - len(series_data_ee)} rows") return series_data_ee diff --git a/metcalcpy/event_equalize_against_values.py b/metcalcpy/event_equalize_against_values.py index 6681ffae..06209c11 100644 --- a/metcalcpy/event_equalize_against_values.py +++ b/metcalcpy/event_equalize_against_values.py @@ -13,12 +13,12 @@ """ import pandas as pd - +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' - -def event_equalize_against_values(series_data, input_unique_cases): + +def event_equalize_against_values(series_data, input_unique_cases, logger=None): """Performs event equalisation. event_equalize_against_values assumes that the input series_data contains data @@ -38,6 +38,8 @@ def event_equalize_against_values(series_data, input_unique_cases): warning_remove = "WARNING: event equalization removed {} rows" + safe_log(logger, "info", "Starting event equalization.") + column_names = list(series_data) if 'fcst_valid' in column_names: @@ -48,6 +50,7 @@ def event_equalize_against_values(series_data, input_unique_cases): + ' ' + series_data['fcst_lead'].astype(str)) else: + safe_log(logger, "warning", "WARNING: eventEqualize() did not run due to lack of valid time field.") print("WARNING: eventEqualize() did not run due to lack of valid time field") return pd.DataFrame() @@ -55,13 +58,16 @@ def event_equalize_against_values(series_data, input_unique_cases): data_for_unique_cases = series_data[(series_data['equalize'].isin(input_unique_cases))] n_row_cases = len(data_for_unique_cases) if n_row_cases == 0: + safe_log(logger, "warning", "WARNING: discarding all members. No matching cases found.") print(" WARNING: discarding all members") return pd.DataFrame() n_row_ = len(series_data) if n_row_cases != n_row_: + safe_log(logger, "warning", warning_remove.format(n_row_ - n_row_cases)) print(warning_remove.format(n_row_ - n_row_cases)) # remove 'equalize' column data_for_unique_cases = data_for_unique_cases.drop(['equalize'], axis=1) + safe_log(logger, "info", "Event equalization completed successfully.") return data_for_unique_cases diff --git a/metcalcpy/logging_config.py b/metcalcpy/logging_config.py new file mode 100644 index 00000000..ce712ca6 --- /dev/null +++ b/metcalcpy/logging_config.py @@ -0,0 +1,90 @@ +import logging +import os +import sys +import getpass +import signal +import time + +class UserIDFormatter(logging.Formatter): + """ + Custom formatter to add user_id in place of the logger name. + """ + def __init__(self, user_id, fmt=None, datefmt=None): + super().__init__(fmt, datefmt) + self.user_id = user_id + + def format(self, record): + # Override the 'name' attribute with user_id + record.name = self.user_id + return super().format(record) + +def handle_signals(signum, frame): + """ + Handle signals to perform clean shutdown or other custom actions. + """ + logger = logging.getLogger() + logger.warning(f'Received signal {signal.strsignal(signum)}. Shutting down.') + sys.exit(0) + +def setup_logging(config_params): + """ + Set up logging based on the configuration from a YAML file. + + Args: + config_params (dict): The dictionary containing logging configuration (log directory, filename, level). + + Returns: + logger (logging.Logger): Configured logger. + """ + # Get user ID and command line + user_id = getpass.getuser() + command_line = " ".join(sys.argv) + # Create log directory if it doesn't exist, using the path from the config + log_dir = config_params.get('log_dir') # No default here, expect it from YAML + if not log_dir: + log_dir = './logs' # Set default only if not provided + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + # Set log filename, incorporating the log directory path from the config + log_filename = config_params.get('log_filename') # No default here, expect it from YAML + if not log_filename: + #log_filename = 'application.log' # Set default only if not provided + return None + log_file = os.path.join(log_dir, log_filename) + + # Set log level from YAML or use default; convert to appropriate logging level + log_level = config_params.get('log_level') # No default here, expect it from YAML + if not log_level: + log_level = 'WARNING' # Set default only if not provided + log_level = log_level.upper() + + + # Create a custom formatter that uses UTC for date and includes user_id instead of logger name + # Add ' UTC' to the format string for the time + formatter = UserIDFormatter( + user_id=user_id, + fmt='%(asctime)s UTC - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + + # Set up logging to write to a file + file_handler = logging.FileHandler(log_file) + file_handler.setLevel(getattr(logging, log_level, logging.INFO)) + file_handler.setFormatter(formatter) + + logger = logging.getLogger() + logger.setLevel(getattr(logging, log_level, logging.INFO)) + logger.addHandler(file_handler) + + # Set logger to use UTC time + logging.Formatter.converter = time.gmtime + + # Register signal handlers for graceful shutdown + signal.signal(signal.SIGINT, handle_signals) + signal.signal(signal.SIGTERM, handle_signals) + + logger.info(f"User: {user_id} has started the script with command: {command_line}") + + return logger + diff --git a/metcalcpy/piecewise_linear.py b/metcalcpy/piecewise_linear.py index 5fd31bee..a2d709e5 100644 --- a/metcalcpy/piecewise_linear.py +++ b/metcalcpy/piecewise_linear.py @@ -15,6 +15,7 @@ """ import numpy as np +from metcalcpy.util.safe_log import safe_log __author__ = 'Bill Campbell (NRL)' __version__ = '0.1.0' @@ -36,15 +37,20 @@ class PiecewiseLinear(): """ def __init__(self, x_domain, y_range, xunits='feet', - left=np.nan, right=np.nan, name=""): + left=np.nan, right=np.nan, name="", logger=None): + + self.logger = logger len_x = len(x_domain) if len_x < 2: + safe_log(logger, "error", f'X_domain (in {xunits})') raise IncompatibleLengths('Length of xdomain must be at least 2.') if np.any(np.diff(x_domain)) < 0: + safe_log(logger, "error", "X_domain (in {}) is {}".format(xunits, x_domain)) print("X_domain (in {}) is {}".format(xunits, x_domain)) raise UnsortedArray('Xdomain must be sorted in ascending order.') len_y = len(y_range) if len_x != len_y: + safe_log(logger, "error", "X_domain and Y_range must have the same length.") raise IncompatibleLengths('X_domain and Y_range must have same ' + 'length.\n Use left and right to set ' + 'value for points outside the x_domain\n') diff --git a/metcalcpy/scorecard.py b/metcalcpy/scorecard.py index f128f552..b22421ac 100644 --- a/metcalcpy/scorecard.py +++ b/metcalcpy/scorecard.py @@ -34,7 +34,6 @@ from typing import Union import pandas as pd import yaml -import logging import argparse import sys import itertools @@ -51,6 +50,8 @@ from metcalcpy.util.utils import intersection, get_derived_curve_name, \ is_derived_point, is_string_integer, OPERATION_TO_SIGN, calc_derived_curve_value, \ perfect_score_adjustment, sort_data, PRECISION, DerivedCurveComponent, is_string_strictly_float +from metcalcpy.logging_config import setup_logging +from metcalcpy.util.safe_log import safe_log COLUMNS_TO_REMOVE = ['equalize', 'stat_ncl', 'stat_ncu', 'stat_bcl', 'stat_bcu', 'fcst_valid_beg', 'fcst_init_beg'] @@ -78,6 +79,9 @@ def __init__(self, in_params): or doesn't have data """ + self.logger = setup_logging(in_params) + logger = self.logger + safe_log(logger, "debug", "Initializing Scorecard with parameters.") self.params = in_params self.derived_name_to_values = {} # import pandas @@ -89,7 +93,7 @@ def __init__(self, in_params): ) if self.input_data.empty: - logging.warning('The input data is empty. The empty output file was created') + safe_log(logger, "warning", 'The input data is empty. The empty output file was created') export_csv = self.input_data.to_csv(self.params['sum_stat_output'], index=None, header=True, mode="w", sep="\t") @@ -99,7 +103,7 @@ def __init__(self, in_params): except pd.errors.EmptyDataError: raise except KeyError as ex: - logging.error('Parameter with key %s is missing', ex) + safe_log(logger, "error", f'Parameter with key {ex} is missing') raise self.group_to_value = {} @@ -111,10 +115,14 @@ def calculate_scorecard_data(self): Saves the data into the file """ + logger = self.logger + safe_log(logger, "debug", "Calculating Scorecard data.") # identify all possible points values by adding series values, indy values # and statistics and then permute them if self.input_data.empty: + safe_log(logger, "warning", "Input data is empty. Returning an empty DataFrame.") return pd.DataFrame() + safe_log(logger, "debug", "Permuting all possible point values for series and independent variables.") series_val = self.params['series_val_1'] all_fields_values = series_val.copy() @@ -123,7 +131,10 @@ def calculate_scorecard_data(self): if indy_vals: all_fields_values[self.params['indy_var']] = indy_vals all_points = list(itertools.product(*all_fields_values.values())) + safe_log(logger, "debug", f"Generated {len(all_points)} points for calculation.") + if self.params['derived_series_1']: + safe_log(logger, "debug", "Adding derived points for calculation.") # identifies and add all possible derived points values all_points.extend(self._get_derived_points(series_val, indy_vals)) @@ -131,6 +142,7 @@ def calculate_scorecard_data(self): derived_frame = None # for each point for point_ind, point in enumerate(all_points): + safe_log(logger, "debug", f"Processing point {point_ind + 1} / {len(all_points)}: {point}") is_derived = is_derived_point(point) if not is_derived: # only get the data for each point - no calculations needed @@ -176,6 +188,7 @@ def calculate_scorecard_data(self): # print the result to file if derived_frame is not None: + safe_log(logger, "debug", f"Writing results to {self.params['sum_stat_output']}.") header = True mode = 'w' if 'append_to_file' in self.params.keys() and self.params['append_to_file'] == 'True': @@ -184,6 +197,9 @@ def calculate_scorecard_data(self): export_csv = derived_frame.to_csv(self.params['sum_stat_output'], index=None, header=header, mode=mode, sep="\t", na_rep="NA", float_format='%.' + str(PRECISION) + 'f') + safe_log(logger, "debug", "Results successfully written to file.") + else: + safe_log(logger, "warning", "No results to write.") def _get_stats_for_derived(self, series, series_to_data) -> Union[DataFrame, None]: """ Calculates derived statistic value for input data @@ -198,17 +214,22 @@ def _get_stats_for_derived(self, series, series_to_data) -> Union[DataFrame, Non DataFrame containing 1 row with the resulting data or None """ - + logger = self.logger + safe_log(logger, "debug", f"Calculating derived statistic for series {series}.") # get derived name derived_name = '' for operation in OPERATION_TO_SIGN: for point_component in series: if point_component.startswith((operation + '(', operation + ' (')): derived_name = point_component + safe_log(logger, "debug", f"Derived name found: {derived_name}") break # find all components for the 1st and 2nd series derived_curve_component = self.derived_name_to_values[derived_name] + if not derived_curve_component: + safe_log(logger, "error", f"No derived curve component found for {derived_name}") + return None permute_for_first_series = derived_curve_component.first_component.copy() for series_comp in series[1:]: if series_comp not in permute_for_first_series: @@ -229,6 +250,9 @@ def _get_stats_for_derived(self, series, series_to_data) -> Union[DataFrame, Non if perm in self.group_to_value: permute_for_second_series[i] = self.group_to_value[perm] + safe_log(logger, "debug", f"Permuted components for first series: {permute_for_first_series}") + safe_log(logger, "debug", f"Permuted components for second series: {permute_for_second_series}") + ds_1 = None ds_2 = None @@ -243,6 +267,7 @@ def _get_stats_for_derived(self, series, series_to_data) -> Union[DataFrame, Non if ds_1.values is None or ds_2.values is None \ or ds_1.values.size == 0 or ds_2.values.size == 0: + safe_log(logger, "error", "One or both series are missing data. Unable to calculate derived statistic.") return None # validate data @@ -266,9 +291,11 @@ def _get_stats_for_derived(self, series, series_to_data) -> Union[DataFrame, Non # find the number of unique combinations unique_date_size = len(set(map(tuple, date_lead_stat))) except TypeError as err: + safe_log(logger, "error", f"Error during validation: {err}") print(err) unique_date_size = [] if unique_date_size != len(ds_1.values): + safe_log(logger, "error", "Derived curve can't be calculated due to multiple values for one valid date/fcst_lead.") raise NameError("Derived curve can't be calculated." " Multiple values for one valid date/fcst_lead") @@ -282,12 +309,14 @@ def _get_stats_for_derived(self, series, series_to_data) -> Union[DataFrame, Non # calculate derived statistic based on the operation and stat_flag derived_stat = None if derived_curve_component.derived_operation == 'DIFF_SIG': + safe_log(logger, "debug", "Calculating DIFF_SIG.") if self.params['stat_flag'] == 'EMC': derived_stat = self._calculate_diff_sig_emc(stat_values_1, stat_values_2) else: derived_stat = self._calculate_diff_sig_ncar(stat_values_1, stat_values_2) elif derived_curve_component.derived_operation == 'DIFF': + safe_log(logger, "debug", "Calculating DIFF.") # perform the derived operation derived_stat_list = calc_derived_curve_value( stat_values_1, @@ -296,6 +325,7 @@ def _get_stats_for_derived(self, series, series_to_data) -> Union[DataFrame, Non derived_stat = statistics.mean(derived_stat_list) elif derived_curve_component.derived_operation == 'SINGLE': + safe_log(logger, "debug", "Calculating SINGLE.") derived_stat = statistics.mean(stat_values_1) # create dataframe from teh 1st row of the original data @@ -321,6 +351,7 @@ def _get_stats_for_derived(self, series, series_to_data) -> Union[DataFrame, Non # set model df.at[0, 'model'] = derived_name + safe_log(logger, "debug", f"Derived statistic for {derived_name} calculated successfully.") return df def _calculate_diff_sig_ncar(self, ds_1_values, ds_2_values) -> float: @@ -332,17 +363,33 @@ def _calculate_diff_sig_ncar(self, ds_1_values, ds_2_values) -> float: Returns: the difference significance """ - # perform the derived operation - derived_stat_list = calc_derived_curve_value(ds_1_values, ds_2_values, 'DIFF_SIG') - avg = statistics.mean(derived_stat_list) - sdv = statistics.stdev(derived_stat_list) - total = len(derived_stat_list) - t = avg / (sdv / np.sqrt(total)) - p_val = 1 - 2 * pt(abs(t), total - 1, lower_tail=False) - derived_stat = perfect_score_adjustment(statistics.mean(ds_1_values), - statistics.mean(ds_2_values), - self.params['list_stat_1'][0], - p_val) + logger = self.logger + safe_log(logger, "debug", "Calculating DIFF_SIG.") + + # Check if input arrays are not empty + if not ds_1_values or not ds_2_values: + safe_log(logger, "error", "Input arrays for DIFF_SIG calculation are empty.") + raise ValueError("Input arrays for DIFF_SIG calculation must not be empty.") + + try: + safe_log(logger, "debug", f"Calculating derived statistics between ds_1: {ds_1_values} and ds_2: {ds_2_values}.") + # perform the derived operation + derived_stat_list = calc_derived_curve_value(ds_1_values, ds_2_values, 'DIFF_SIG') + avg = statistics.mean(derived_stat_list) + sdv = statistics.stdev(derived_stat_list) + total = len(derived_stat_list) + safe_log(logger, "debug", f"Derived statistics - Avg: {avg}, Stdev: {sdv}, Total: {total}.") + t = avg / (sdv / np.sqrt(total)) + p_val = 1 - 2 * pt(abs(t), total - 1, lower_tail=False) + safe_log(logger, "debug", f"T-statistic: {t}, p-value: {p_val}.") + derived_stat = perfect_score_adjustment(statistics.mean(ds_1_values), + statistics.mean(ds_2_values), + self.params['list_stat_1'][0], + p_val, logger=logger) + safe_log(logger, "debug", f"DIFF_SIG calculation completed successfully. Result: {derived_stat}") + except Exception as err: + safe_log(logger, "error", f"Error during DIFF_SIG calculation: {err}") + raise err return derived_stat def _calculate_diff_sig_emc(self, ds_1_values, ds_2_values) -> float: @@ -354,67 +401,80 @@ def _calculate_diff_sig_emc(self, ds_1_values, ds_2_values) -> float: Returns: the difference significance """ - derived_stat = None - values_1 = np.array(ds_1_values) - values_2 = np.array(ds_2_values) - val2_minus_val1 = np.subtract(values_2, values_1) - acdm = sum(val2_minus_val1) / self.params['ndays'] - acdm_list = [acdm] * len(values_1) - std = math.sqrt(sum(np.subtract(val2_minus_val1, acdm_list) * np.subtract(val2_minus_val1, acdm_list)) / - self.params['ndays']) - nsz = len(ds_1_values) - intvl = round(1.960 * std / math.sqrt(nsz - 1), 6) - mean1 = round(statistics.mean(values_1), 6) - mean2 = round(statistics.mean(values_2), 6) - if self.params['list_stat_1'][0].startswith('ME') or self.params['list_stat_1'][0].startswith('BIAS'): - ds = (abs(mean2 - mean1)) / intvl - sss = abs(mean2) - abs(mean1) - if sss is not None and sss < 0: - ds = (-1) * ds - elif self.params['list_stat_1'][0].startswith('RMSE') \ - or self.params['list_stat_1'][0].startswith('RMSVE'): - ds = (mean2 - mean1) / intvl - else: - ds = (mean1 - mean2) / intvl - if ds is not None: - ds = round(ds, 3) - if self.params['ndays'] >= 80: - alpha1 = 1.960 # 95% confidence level - alpha2 = 2.576 # 99% confidence level - alpha3 = 3.291 # 99.9% confidence level - elif self.params['ndays'] >= 40 and self.params['ndays'] < 80: - alpha1 = 2.0 # 95% confidence level - alpha2 = 2.66 # 99% confidence level - alpha3 = 3.46 # 99.9% confidence level - elif self.params['ndays'] >= 20 and self.params['ndays'] < 40: - alpha1 = 2.042 # 95% confidence level - alpha2 = 2.75 # 99% confidence level - alpha3 = 3.646 # 99.9% confidence level - elif self.params['ndays'] < 20: - alpha1 = 2.228 # 95% confidence level - alpha2 = 3.169 # 99% confidence level - alpha3 = 4.587 # 99.9% confidence level - ds95 = ds - ds99 = ds * alpha1 / alpha2 - ds99 = round(ds99, 3) - ds999 = ds * alpha1 / alpha3; - ds999 = round(ds999, 3) - if ds999 >= 1: - derived_stat = 1 - elif ds999 < 1 and ds99 >= 1: - derived_stat = 0.99 - elif ds99 < 1 and ds95 >= 1: - derived_stat = 0.95 - elif ds95 > -1 and ds95 < 1: - derived_stat = 0 - elif ds95 <= -1 and ds99 > -1: - derived_stat = -0.95 - elif ds99 <= -1 and ds999 > -1: - derived_stat = -0.99 - elif ds999 <= -1 and ds999 > -100.0: - derived_stat = -1 - elif ds999 < -100.0: - derived_stat = -1 + logger = self.logger + safe_log(logger, "debug", "Starting DIFF_SIG EMC calculation.") + try: + # perform the derived operation + derived_stat = None + values_1 = np.array(ds_1_values) + values_2 = np.array(ds_2_values) + val2_minus_val1 = np.subtract(values_2, values_1) + safe_log(logger, "debug", f"Values 1: {values_1}, Values 2: {values_2}, Difference: {val2_minus_val1}") + acdm = sum(val2_minus_val1) / self.params['ndays'] + acdm_list = [acdm] * len(values_1) + std = math.sqrt(sum(np.subtract(val2_minus_val1, acdm_list) * np.subtract(val2_minus_val1, acdm_list)) / + self.params['ndays']) + safe_log(logger, "debug", f"ACDM: {acdm}, Standard Deviation: {std}") + nsz = len(ds_1_values) + intvl = round(1.960 * std / math.sqrt(nsz - 1), 6) + mean1 = round(statistics.mean(values_1), 6) + mean2 = round(statistics.mean(values_2), 6) + safe_log(logger, "debug", f"Mean 1: {mean1}, Mean 2: {mean2}, Interval: {intvl}") + if self.params['list_stat_1'][0].startswith('ME') or self.params['list_stat_1'][0].startswith('BIAS'): + ds = (abs(mean2 - mean1)) / intvl + sss = abs(mean2) - abs(mean1) + if sss is not None and sss < 0: + ds = (-1) * ds + elif self.params['list_stat_1'][0].startswith('RMSE') \ + or self.params['list_stat_1'][0].startswith('RMSVE'): + ds = (mean2 - mean1) / intvl + else: + ds = (mean1 - mean2) / intvl + if ds is not None: + ds = round(ds, 3) + safe_log(logger, "debug", f"DS value before significance thresholding: {ds}") + if self.params['ndays'] >= 80: + alpha1 = 1.960 # 95% confidence level + alpha2 = 2.576 # 99% confidence level + alpha3 = 3.291 # 99.9% confidence level + elif self.params['ndays'] >= 40 and self.params['ndays'] < 80: + alpha1 = 2.0 # 95% confidence level + alpha2 = 2.66 # 99% confidence level + alpha3 = 3.46 # 99.9% confidence level + elif self.params['ndays'] >= 20 and self.params['ndays'] < 40: + alpha1 = 2.042 # 95% confidence level + alpha2 = 2.75 # 99% confidence level + alpha3 = 3.646 # 99.9% confidence level + elif self.params['ndays'] < 20: + alpha1 = 2.228 # 95% confidence level + alpha2 = 3.169 # 99% confidence level + alpha3 = 4.587 # 99.9% confidence level + ds95 = ds + ds99 = ds * alpha1 / alpha2 + ds99 = round(ds99, 3) + ds999 = ds * alpha1 / alpha3; + ds999 = round(ds999, 3) + safe_log(logger, "debug", f"DS95: {ds95}, DS99: {ds99}, DS999: {ds999}") + if ds999 >= 1: + derived_stat = 1 + elif ds999 < 1 and ds99 >= 1: + derived_stat = 0.99 + elif ds99 < 1 and ds95 >= 1: + derived_stat = 0.95 + elif ds95 > -1 and ds95 < 1: + derived_stat = 0 + elif ds95 <= -1 and ds99 > -1: + derived_stat = -0.95 + elif ds99 <= -1 and ds999 > -1: + derived_stat = -0.99 + elif ds999 <= -1 and ds999 > -100.0: + derived_stat = -1 + elif ds999 < -100.0: + derived_stat = -1 + except Exception as err: + safe_log(logger, "error", f"Error during EMC calculation: {err}") + raise err + safe_log(logger, "debug", f"EMC DIFF_SIG calculation completed. Derived Statistic: {derived_stat}") return derived_stat def _get_derived_points(self, series_val, indy_vals): @@ -426,11 +486,13 @@ def _get_derived_points(self, series_val, indy_vals): Returns: a list of all possible values for each derived points """ - + logger = self.logger + safe_log(logger, "debug", "Starting to calculate derived points.") # for each derived series result = [] for derived_serie in self.params['derived_series_1']: # series 1 components + safe_log(logger, "debug", f"Processing derived series: {derived_serie}") ds_1 = derived_serie[0].split(' ') # series 2 components @@ -440,6 +502,7 @@ def _get_derived_points(self, series_val, indy_vals): for ind, name in enumerate(ds_1): if name != ds_2[ind]: series_var_vals = (name, ds_2[ind]) + safe_log(logger, "debug", f"Identified series variable values: {series_var_vals}") break series_var = list(series_val.keys())[-1] @@ -447,6 +510,7 @@ def _get_derived_points(self, series_val, indy_vals): for var in series_val.keys(): if all(elem in series_val[var] for elem in series_var_vals): series_var = var + safe_log(logger, "debug", f"Matched series variable: {series_var}") break derived_val = series_val.copy() @@ -459,6 +523,7 @@ def _get_derived_points(self, series_val, indy_vals): derived_val[var] = intersection(derived_val[var], ds_1) derived_curve_name = get_derived_curve_name(derived_serie) + safe_log(logger, "debug", f"Derived curve name: {derived_curve_name}") derived_val[series_var] = [derived_curve_name] if len(indy_vals) > 0: derived_val[self.params['indy_var']] = indy_vals @@ -470,7 +535,8 @@ def _get_derived_points(self, series_val, indy_vals): else: derived_val['stat_name'] = [ds_1[-1] + "," + ds_2[-1]] result.append(list(itertools.product(*derived_val.values()))) - + + safe_log(logger, "debug", f"Total derived points calculated: {len(result)}") return [y for x in result for y in x] diff --git a/metcalcpy/sum_stat.py b/metcalcpy/sum_stat.py index 1b7030db..91974682 100644 --- a/metcalcpy/sum_stat.py +++ b/metcalcpy/sum_stat.py @@ -56,7 +56,8 @@ from metcalcpy.util.utils import is_string_integer, parse_bool, \ aggregate_field_values, perform_event_equalization, is_string_strictly_float - +from metcalcpy.logging_config import setup_logging +from metcalcpy.util.safe_log import safe_log class SumStat: """A class that performs event equalisation if needed and statistics calculation @@ -80,7 +81,9 @@ def __init__(self, in_params): Raises: EmptyDataError or ValueError when the input DataFrame is empty or doesn't have data """ - + self.logger = setup_logging(in_params) + logger = self.logger + safe_log(logger, "debug", "Initializing Scorecard with parameters.") self.params = in_params # import pandas try: @@ -91,7 +94,7 @@ def __init__(self, in_params): ) if self.input_data.empty: - logging.warning('The input data is empty. The empty output file was created') + safe_log(logger, "warning", 'The input data is empty. The empty output file was created') export_csv = self.input_data.to_csv(self.params['sum_stat_output'], index=None, header=True, mode="w", sep="\t") @@ -99,9 +102,10 @@ def __init__(self, in_params): self.column_names = self.input_data.columns.values except pd.errors.EmptyDataError: + safe_log(logger, "error", 'The input data is empty. The empty output file was created') raise except KeyError as ex: - logging.error('Parameter with key %s is missing', ex) + safe_log(logger, "error", f'Parameter with key {ex} is missing') raise STATISTIC_TO_FIELDS = {'ctc': {"total", "fy_oy", "fy_on", "fn_oy", "fn_on"}, @@ -123,6 +127,8 @@ def calculate_stats(self): """ Calculates summary statistics for each series point Writes output data to the file """ + logger =self.logger + safe_log(logger, "debug", "Calculating summary statistics.") fields = [] try: fields = self.STATISTIC_TO_FIELDS[self.params['line_type']] @@ -140,7 +146,7 @@ def calculate_stats(self): # perform EE if needed is_event_equal = parse_bool(self.params['event_equal']) if is_event_equal: - self.input_data = perform_event_equalization(self.params, self.input_data) + self.input_data = perform_event_equalization(self.params, self.input_data, logger=logger) if not self.input_data.empty: # perform aggregation on a special field - needed for scorecard @@ -154,11 +160,10 @@ def calculate_stats(self): # self.process_row, num_of_processes=mp.cpu_count()) print("--- %s seconds ---" % (time.time() - start_time)) else: - logging.warning('Event equalisation removed all data. ' - 'The empty output file is created') + safe_log(logger, "warning", 'Event equalisation removed all data. The empty output file is created') except KeyError as ex: - logging.error('Parameter with key %s is missing. The empty output file is created', ex) + safe_log(logger, "error", f'Parameter with key {ex} is missing. The empty output file is created') # remove the original fields to save the space for column in fields: @@ -185,28 +190,34 @@ def aggregate_special_fields(self, axis='1'): axis - y1 or y1 axis """ warnings.filterwarnings('error') + logger = self.logger + safe_log(logger, "debug", "Aggregating special fields.") # check if indy_vals have a field that need to be aggregated - the field with ';' has_agg_indy_field = any(any(GROUP_SEPARATOR in i for i in item) for item in self.params['indy_vals']) - + safe_log(logger, "debug", f"Independent variable field with ';' detected: {has_agg_indy_field}") # look if there is a field that need to be aggregated first - the field with ';' series_var_val = self.params['series_val_' + axis] has_agg_series_field = any(any(GROUP_SEPARATOR in i for i in item) for item in series_var_val) - + safe_log(logger, "debug", f"Series variable field with ';' detected: {has_agg_series_field}") if series_var_val and (has_agg_indy_field or has_agg_series_field): # the special field was detected - + safe_log(logger, "info", "Special fields detected for aggregation. Starting aggregation process.") all_points = list(itertools.product(*series_var_val.values())) aggregated_values = pd.DataFrame() series_vars = list(series_var_val.keys()) + safe_log(logger, "debug", f"All points for aggregation: {all_points}") + for indy_val in self.params['indy_vals']: # filter the input frame by each indy value if indy_val is None: filtered_by_indy = self.input_data + safe_log(logger, "debug", f"Using all input data for indy_val: {indy_val}") else: # filter by value or split the value and filter by multiple values filtered_by_indy = self.input_data[ self.input_data[self.params['indy_var']].isin(indy_val.split(';'))] + safe_log(logger, "debug", f"Filtered data for indy_val {indy_val}. Rows remaining: {len(filtered_by_indy)}") for point in all_points: point_data = filtered_by_indy @@ -217,6 +228,8 @@ def aggregate_special_fields(self, axis='1'): actual_series_vals = point[index].split(';') else: actual_series_vals = point[index].split(GROUP_SEPARATOR) + safe_log(logger, "debug", f"Actual series values for {series_var}: {actual_series_vals}") + for ind, val in enumerate(actual_series_vals): if is_string_integer(val): actual_series_vals[ind] = int(val) @@ -224,9 +237,12 @@ def aggregate_special_fields(self, axis='1'): actual_series_vals[ind] = float(val) point_data = \ point_data[point_data[series_vars[index]].isin(actual_series_vals)] + safe_log(logger, "debug", f"Filtered point data for series_var {series_var}. Rows remaining: {len(point_data)}") + # aggregate point data if any(';' in series_val for series_val in point): + safe_log(logger, "debug", "Aggregating data based on series values.") point_data = aggregate_field_values(series_var_val, point_data, self.params['line_type']) @@ -241,12 +257,16 @@ def aggregate_special_fields(self, axis='1'): aggregated_values = pd.concat([aggregated_values, point_data]) self.input_data = aggregated_values self.input_data.reset_index(inplace=True, drop=True) + safe_log(logger, "info", f"Aggregation completed. Rows after aggregation: {len(self.input_data)}") + else: + safe_log(logger, "debug", "No special fields detected for aggregation.") + def process_rows(self): """For each row in the data frame finds the row statistic name, calculates it's value and stores this value in the corresponding column """ - + logger = self.logger for index, row in self.input_data.iterrows(): # statistic name stat = row['stat_name'].lower() @@ -287,12 +307,19 @@ def calculate_statistic(values, columns_names, stat_name, aggregation=False): Raises: an error """ - func_name = f'calculate_{stat_name}' - num_parameters = len(signature(globals()[func_name]).parameters) - if num_parameters == 2: - stat = globals()[func_name](values, columns_names) - else: - stat = globals()[func_name](values, columns_names, aggregation) + logger = self.logger + safe_log(logger, "debug", f"Calculating statistic '{stat_name}' with aggregation: {aggregation}.") + try: + func_name = f'calculate_{stat_name}' + num_parameters = len(signature(globals()[func_name]).parameters) + if num_parameters == 2: + stat = globals()[func_name](values, columns_names, logger=logger) + else: + stat = globals()[func_name](values, columns_names, aggregation, logger=logger) + safe_log(logger, "info", f"Successfully calculated statistic '{stat_name}'.") + except Exception as e: + safe_log(logger, "error", f"An error occurred while calculating statistic '{stat_name}': {e}") + raise return stat diff --git a/metcalcpy/util/correlation.py b/metcalcpy/util/correlation.py index 5f002b79..bda9a627 100644 --- a/metcalcpy/util/correlation.py +++ b/metcalcpy/util/correlation.py @@ -26,9 +26,9 @@ from scipy.integrate import quad from scipy import stats from scipy.special import gamma, betaln, hyp2f1 +from metcalcpy.util.safe_log import safe_log - -def corr(x, y, tail='two-sided', method='pearson', **kwargs): +def corr(x, y, tail='two-sided', method='pearson', logger=None, **kwargs): """(Robust) correlation between two variables. This method was the patrt of pingouin package and was moved from it to METcalcpy @@ -217,12 +217,16 @@ def corr(x, y, tail='two-sided', method='pearson', **kwargs): y = np.asarray(y) assert x.ndim == y.ndim == 1, 'x and y must be 1D array.' assert x.size == y.size, 'x and y must have the same length.' + safe_log(logger, "debug", "Validated that x and y are 1D arrays and have the same length.") _msg = 'tail must be "two-sided" or "one-sided".' assert tail in ['two-sided', 'one-sided'], _msg # Remove rows with missing values - x, y = remove_na(x, y, paired=True) + x, y = remove_na(x, y, paired=True, logger=logger) nx = x.size + safe_log(logger, "debug", f"Removed missing values. Size of x and y is now {nx}.") + + safe_log(logger, "debug", f"Computing correlation using method '{method}'.") # Compute correlation coefficient if method == 'pearson': @@ -237,10 +241,14 @@ def corr(x, y, tail='two-sided', method='pearson', **kwargs): r, pval = percbend(x, y, **kwargs) elif method == 'shepherd': r, pval, outliers = shepherd(x, y, **kwargs) + safe_log(logger, "debug", "Identified outliers in Shepherd's method.") else: raise ValueError(f'Method "{method}" not recognized.') + safe_log(logger, "debug", f"Correlation coefficient (r): {r}, p-value: {pval}.") + if np.isnan(r): + safe_log(logger, "warning", "Correlation computation failed. Returning a DataFrame of NaNs.") # Correlation failed -- new in version v0.3.4, instead of raising an # error we just return a dataframe full of NaN (except sample size). # This avoid sudden stop in pingouin.pairwise_corr. @@ -251,10 +259,11 @@ def corr(x, y, tail='two-sided', method='pearson', **kwargs): # Compute r2 and adj_r2 r2 = r ** 2 adj_r2 = 1 - (((1 - r2) * (nx - 1)) / (nx - 3)) + safe_log(logger, "debug", f"Computed r2: {r2}, adjusted r2: {adj_r2}.") # Compute the parametric 95% confidence interval and power - ci = compute_esci(stat=r, nx=nx, ny=nx, eftype='r', decimals=6) - pr = power_corr(r=r, n=nx, power=None, alpha=0.05, tail=tail) + ci = compute_esci(stat=r, nx=nx, ny=nx, eftype='r', decimals=6, logger=logger) + pr = power_corr(r=r, n=nx, power=None, alpha=0.05, tail=tail, logger=logger) # Create dictionary stats_dict = \ @@ -269,7 +278,8 @@ def corr(x, y, tail='two-sided', method='pearson', **kwargs): # Compute the BF10 for Pearson correlation only if method == 'pearson': - stats_dict['BF10'] = bayesfactor_pearson(r, nx, tail=tail) + stats_dict['BF10'] = bayesfactor_pearson(r, nx, tail=tail, logger=logger) + safe_log(logger, "debug", f"Computed Bayes Factor (BF10): {stats_dict['BF10']}.") # Convert to DataFrame stats_df = pd.DataFrame.from_records(stats_dict, index=[method]) @@ -278,10 +288,11 @@ def corr(x, y, tail='two-sided', method='pearson', **kwargs): col_keep = ['n', 'outliers', 'r', 'CI95%', 'r2', 'adj_r2', 'p-val', 'BF10', 'power'] col_order = [k for k in col_keep if k in stats_df.keys().tolist()] - return _postprocess_dataframe(stats_df)[col_order] + safe_log(logger, "debug", "Finished correlation calculations.") + return _postprocess_dataframe(stats_df, logger=logger)[col_order] -def acf(x: Union[list, np.array], acf_type: str = 'correlation', lag_max: Union[int, None] = None) \ +def acf(x: Union[list, np.array], acf_type: str = 'correlation', lag_max: Union[int, None] = None, logger=None) \ -> Union[list, None]: """ The function acf computes estimates of the autocovariance or autocorrelation function. @@ -302,33 +313,46 @@ def acf(x: Union[list, np.array], acf_type: str = 'correlation', lag_max: Union[ """ # validate acf type if acf_type not in ['covariance', 'correlation']: + safe_log(logger, "error", f"Incorrect acf_type provided: {acf_type}") print('ERROR incorrect acf_type') - return None + return + safe_log(logger, "debug", f"acf_type is set to: {acf_type}") if x is None or len(x) == 0: + safe_log(logger, "warning", "Input array x is None or empty. Returning None.") return None + safe_log(logger, "debug", f"Input array size: {len(x)}") acf_result = [] size = np.size(x) # calculate mean of the array excluding None - mean_val = mean(remove_none(x)) + mean_val = mean(remove_none(x, logger=logger)) + safe_log(logger, "debug", f"Mean of the array (excluding None): {mean_val}") # calculate lag if not provided if lag_max is None: lag_max = math.floor(10 * (math.log10(size) - math.log10(1))) + safe_log(logger, "debug", f"lag_max not provided, calculated lag_max: {lag_max}") lag_max = int(min(lag_max, size - 1)) + safe_log(logger, "debug", f"Final lag_max after adjustment: {lag_max}") + + cov_0 = autocovariance(x, size, 0, mean_val, logger=logger) + safe_log(logger, "debug", f"Autocovariance at lag 0: {cov_0}") - cov_0 = autocovariance(x, size, 0, mean_val) for i in range(lag_max+1): - cov = autocovariance(x, size, i, mean_val) + cov = autocovariance(x, size, i, mean_val, logger=logger) if acf_type == 'covariance': acf_result.append(cov) + safe_log(logger, "debug", f"Covariance at lag {i}: {cov}") elif acf_type == 'correlation': acf_result.append(cov / cov_0) + safe_log(logger, "debug", f"Correlation at lag {i}: {cov / cov_0}") + + safe_log(logger, "info", "Autocorrelation function calculation completed.") return acf_result -def autocovariance(x, list_size, n_lag, mean_val): +def autocovariance(x, list_size, n_lag, mean_val, logger=None): """ The function that computes autocovariance for the first n_lag elements of the list :param x: numeric array @@ -337,6 +361,8 @@ def autocovariance(x, list_size, n_lag, mean_val): :param mean_val: mean value of the array :return: autocovariance """ + safe_log(logger, "debug", f"Calculating autocovariance with list_size={list_size}, n_lag={n_lag}, mean_val={mean_val}") + total_autocov = 0 count_autocov = 0 for i in np.arange(0, list_size - n_lag): @@ -346,7 +372,7 @@ def autocovariance(x, list_size, n_lag, mean_val): return (1 / (count_autocov + n_lag)) * total_autocov -def bicor(x, y, c=9): +def bicor(x, y, c=9, logger=None): """ Biweight midcorrelation. @@ -379,23 +405,31 @@ def bicor(x, y, c=9): 46(11). https://www.ncbi.nlm.nih.gov/pubmed/23050260 """ + safe_log(logger, "debug", f"Starting bicor computation with c={c}") # Calculate median nx = x.size x_median = np.median(x) y_median = np.median(y) + safe_log(logger, "debug", f"Calculated medians: x_median={x_median}, y_median={y_median}") # Raw median absolute deviation x_mad = np.median(np.abs(x - x_median)) y_mad = np.median(np.abs(y - y_median)) + safe_log(logger, "debug", f"Calculated MAD: x_mad={x_mad}, y_mad={y_mad}") + if x_mad == 0 or y_mad == 0: # From Langfelder and Horvath 2012: # "Strictly speaking, a call to bicor in R should return a missing # value if mad(x) = 0 or mad(y) = 0." This avoids division by zero. + safe_log(logger, "warning", "MAD of x or y is zero, returning (np.nan, np.nan)") return np.nan, np.nan # Calculate weights u = (x - x_median) / (c * x_mad) v = (y - y_median) / (c * y_mad) w_x = (1 - u ** 2) ** 2 * ((1 - np.abs(u)) > 0) w_y = (1 - v ** 2) ** 2 * ((1 - np.abs(v)) > 0) + + safe_log(logger, "debug", f"Calculated weights for x and y.") + # Normalize x and y by weights x_norm = (x - x_median) * w_x y_norm = (y - y_median) * w_y @@ -404,10 +438,13 @@ def bicor(x, y, c=9): r = (x_norm * y_norm).sum() / denom tval = r * np.sqrt((nx - 2) / (1 - r ** 2)) pval = 2 * t.sf(abs(tval), nx - 2) + + safe_log(logger, "info", f"Correlation coefficient r={r}, p-value={pval}") + return r, pval -def percbend(x, y, beta=.2): +def percbend(x, y, beta=.2, logger=None): """ Percentage bend correlation (Wilcox 1994). @@ -439,16 +476,22 @@ def percbend(x, y, beta=.2): Toolbox. Frontiers in Psychology. 2012;3:606. doi:10.3389/fpsyg.2012.00606. """ + safe_log(logger, "debug", f"Starting percbend computation with beta={beta}") X = np.column_stack((x, y)) nx = X.shape[0] M = np.tile(np.median(X, axis=0), nx).reshape(X.shape) W = np.sort(np.abs(X - M), axis=0) m = int((1 - beta) * nx) omega = W[m - 1, :] + + safe_log(logger, "debug", f"Calculated omega: {omega}") + P = (X - M) / omega P[np.isinf(P)] = 0 P[np.isnan(P)] = 0 + safe_log(logger, "debug", "Removed infinite and NaN values from P") + # Loop over columns a = np.zeros((2, nx)) for c in [0, 1]: @@ -460,20 +503,23 @@ def percbend(x, y, beta=.2): s[np.where(psi > 1)[0]] = 0 pbos = (np.sum(s) + omega[c] * (i2 - i1)) / (s.size - i1 - i2) a[c] = (X[:, c] - pbos) / omega[c] + safe_log(logger, "debug", f"Processed column {c} of X, calculated pbos={pbos}") # Bend a[a <= -1] = -1 a[a >= 1] = 1 + safe_log(logger, "debug", "Applied bending to a") # Get r, tval and pval a, b = a r = (a * b).sum() / np.sqrt((a ** 2).sum() * (b ** 2).sum()) tval = r * np.sqrt((nx - 2) / (1 - r ** 2)) pval = 2 * t.sf(abs(tval), nx - 2) + safe_log(logger, "info", f"Calculated percentage bend correlation r={r}, pval={pval}") return r, pval -def shepherd(x, y, n_boot=200): +def shepherd(x, y, n_boot=200, logger=None): """ Shepherd's Pi correlation, equivalent to Spearman's rho after outliers removal. @@ -501,20 +547,27 @@ def shepherd(x, y, n_boot=200): Pi is Spearman's Rho after outlier removal. """ + safe_log(logger, "info", "Starting Shepherd's Pi correlation calculation.") + X = np.column_stack((x, y)) + safe_log(logger, "debug", f"Combined x and y into array X: {X}") # Bootstrapping on Mahalanobis distance m = bsmahal(X, X, n_boot) + safe_log(logger, "debug", f"Mahalanobis distances (bootstrap): {m}") # Determine outliers outliers = (m >= 6) + safe_log(logger, "info", f"Identified outliers: {outliers}") # Compute correlation r, pval = spearmanr(x[~outliers], y[~outliers]) + safe_log(logger, "debug", f"Spearman's rho (without outliers): r={r}, p-value={pval}") # (optional) double the p-value to achieve a nominal false alarm rate # pval *= 2 - # pval = 1 if pval > 1 else pval + # pval = 1 if pval > 1 else + safe_log(logger, "info", "Completed Shepherd's Pi correlation calculation.") return r, pval, outliers -def bsmahal(a, b, n_boot=200): +def bsmahal(a, b, n_boot=200, logger=None): """ Bootstraps Mahalanobis distances for Shepherd's pi correlation. @@ -533,24 +586,33 @@ def bsmahal(a, b, n_boot=200): Mahalanobis distance for each row in a, averaged across all the bootstrap resamples. """ + safe_log(logger, "info", "Starting Mahalanobis distance bootstrapping.") n, m = b.shape + safe_log(logger, "debug", f"Input matrix b has shape: {b.shape}") + MD = np.zeros((n, n_boot)) nr = np.arange(n) xB = np.random.choice(nr, size=(n_boot, n), replace=True) + safe_log(logger, "debug", f"Bootstrap indices generated: {xB.shape}") + # Bootstrap the MD for i in np.arange(n_boot): + safe_log(logger, "debug", f"Processing bootstrap sample {i+1}/{n_boot}.") s1 = b[xB[i, :], 0] s2 = b[xB[i, :], 1] X = np.column_stack((s1, s2)) mu = X.mean(0) + safe_log(logger, "debug", f"Mean of bootstrap sample {i+1}: {mu}") _, R = np.linalg.qr(X - mu) sol = np.linalg.solve(R.T, (a - mu).T) MD[:, i] = np.sum(sol ** 2, 0) * (n - 1) # Average across all bootstraps + safe_log(logger, "info", "Completed Mahalanobis distance bootstrapping.") + safe_log(logger, "debug", f"Mahalanobis distances averaged across bootstraps: {MD_mean}") return MD.mean(1) -def bayesfactor_pearson(r, n, tail='two-sided', method='ly', kappa=1.): +def bayesfactor_pearson(r, n, tail='two-sided', method='ly', kappa=1., logger=None): """ Bayes Factor of a Pearson correlation. @@ -647,22 +709,29 @@ def bayesfactor_pearson(r, n, tail='two-sided', method='ly', kappa=1.): """ + safe_log(logger, "info", f"Starting Bayes factor calculation with method: {method}, tail: {tail}, r: {r}, n: {n}") assert method.lower() in ['ly', 'wetzels'], 'Method not recognized.' + safe_log(logger, "debug", f"Method {method} is recognized.") assert tail.lower() in ['two-sided', 'one-sided', 'greater', 'less', 'g', 'l', 'positive', 'negative', 'pos', 'neg'] + safe_log(logger, "debug", f"Tail {tail} is recognized.") # Wrong input if not np.isfinite(r) or n < 2: + safe_log(logger, "warning", "Invalid input: r is not finite or sample size n is too small.") return np.nan assert -1 <= r <= 1, 'r must be between -1 and 1.' + safe_log(logger, "debug", "r is within the valid range (-1 to 1).") if tail.lower() != 'two-sided' and method.lower() == 'wetzels': warnings.warn("One-sided Bayes Factor are not supported by the " "Wetzels's method. Switching to method='ly'.") method = 'ly' + safe_log(logger, "info", "Switching method to 'ly' due to one-sided test.") if method.lower() == 'wetzels': # Wetzels & Wagenmakers, 2012. Integral solving + safe_log(logger, "debug", "Using Wetzels method for Bayes factor calculation.") def fun(g, r, n): return math.exp(((n - 2) / 2) * math.log(1 + g) + (-(n - 1) / 2) @@ -671,8 +740,10 @@ def fun(g, r, n): integr = quad(fun, 0, np.inf, args=(r, n))[0] bf10 = np.sqrt((n / 2)) / gamma(1 / 2) * integr + safe_log(logger, "debug", f"Bayes factor (Wetzels) calculated: {bf10}") else: + safe_log(logger, "debug", "Using Ly method for Bayes factor calculation.") # Ly et al, 2016. Analytical solution. k = kappa lbeta = betaln(1 / k, 1 / k) @@ -681,12 +752,13 @@ def fun(g, r, n): bf10 = math.exp((1 - 2 / k) * math.log(2) + 0.5 * math.log(math.pi) - lbeta + math.lgamma((n + 2 / k - 1) / 2) - math.lgamma((n + 2 / k) / 2) + log_hyperterm) + safe_log(logger, "debug", f"Bayes factor (Ly) calculated: {bf10}") return bf10 def compute_esci(stat=None, nx=None, ny=None, paired=False, eftype='cohen', - confidence=.95, decimals=2): + confidence=.95, decimals=2, logger=None): """Parametric confidence intervals around a Cohen d or a correlation coefficient. @@ -798,6 +870,7 @@ def compute_esci(stat=None, nx=None, ny=None, paired=False, eftype='cohen', >>> print(round(stat, 4), ci) 0.1538 [-0.737 1.045] """ + safe_log(logger, "debug", f"Validating input: eftype={eftype}, stat={stat}, nx={nx}, confidence={confidence}") assert eftype.lower() in ['r', 'pearson', 'spearman', 'cohen', 'd', 'g', 'hedges'] assert stat is not None and nx is not None @@ -805,12 +878,15 @@ def compute_esci(stat=None, nx=None, ny=None, paired=False, eftype='cohen', assert 0 < confidence < 1, 'confidence must be between 0 and 1.' if eftype.lower() in ['r', 'pearson', 'spearman']: + safe_log(logger, "info", f"Computing confidence interval for correlation with nx={nx}") z = np.arctanh(stat) # R-to-z transform se = 1 / np.sqrt(nx - 3) crit = np.abs(norm.ppf((1 - confidence) / 2)) ci_z = np.array([z - crit * se, z + crit * se]) ci = np.tanh(ci_z) # Transform back to r + safe_log(logger, "info", f"Confidence interval computed: {ci}") else: + safe_log(logger, "info", f"Computing confidence interval for effect size: {eftype}") # Cohen d. Results are different than JASP which uses a non-central T # distribution. See github.com/jasp-stats/jasp-issues/issues/525 if ny == 1 or paired: @@ -830,10 +906,12 @@ def compute_esci(stat=None, nx=None, ny=None, paired=False, eftype='cohen', dof = nx + ny - 2 crit = np.abs(t.ppf((1 - confidence) / 2, dof)) ci = np.array([stat - crit * se, stat + crit * se]) + safe_log(logger, "info", f"Confidence interval computed: {ci}") + return np.round(ci, decimals) -def power_corr(r=None, n=None, power=None, alpha=0.05, tail='two-sided'): +def power_corr(r=None, n=None, power=None, alpha=0.05, tail='two-sided', logger=None): """ Evaluate power, sample size, correlation coefficient or significance level of a correlation test. @@ -892,6 +970,7 @@ def power_corr(r=None, n=None, power=None, alpha=0.05, tail='two-sided'): ... alpha=None)) alpha: 0.1377 """ + safe_log(logger, "debug", "Checking the number of None arguments for r, n, power, and alpha.") # Check the number of arguments that are None n_none = sum([v is None for v in [r, n, power, alpha]]) if n_none != 1: @@ -899,20 +978,26 @@ def power_corr(r=None, n=None, power=None, alpha=0.05, tail='two-sided'): # Safety checks if r is not None: + safe_log(logger, "debug", f"Validating r: {r}.") assert -1 <= r <= 1 r = abs(r) if alpha is not None: + safe_log(logger, "debug", f"Validating alpha: {alpha}.") assert 0 < alpha <= 1 if power is not None: + safe_log(logger, "debug", f"Validating power: {power}.") assert 0 < power <= 1 if n is not None: + safe_log(logger, "debug", f"Validating sample size n: {n}.") if n <= 4: + safe_log(logger, "warning", "Sample size is too small to estimate power (n <= 4). Returning NaN.") warnings.warn("Sample size is too small to estimate power " "(n <= 4). Returning NaN.") return np.nan # Define main function if tail == 'two-sided': + safe_log(logger, "debug", "Defining power function for two-sided test.") def func(r, n, power, alpha): dof = n - 2 @@ -925,7 +1010,7 @@ def func(r, n, power, alpha): return power else: - + safe_log(logger, "debug", "Defining power function for one-sided test.") def func(r, n, power, alpha): dof = n - 2 ttt = stats.t.ppf(1 - alpha, dof) @@ -938,43 +1023,52 @@ def func(r, n, power, alpha): # Evaluate missing variable if power is None and n is not None and r is not None: # Compute achieved power given r, n and alpha + safe_log(logger, "info", "Calculating achieved power given r, n, and alpha.") return func(r, n, power=None, alpha=alpha) elif n is None and power is not None and r is not None: + safe_log(logger, "info", "Calculating required sample size given r, power, and alpha.") # Compute required sample size given r, power and alpha def _eval_n(n, r, power, alpha): return func(r, n, power, alpha) - power try: - return brenth(_eval_n, 4 + 1e-10, 1e+09, args=(r, power, alpha)) - except ValueError: # pragma: no cover + result = brenth(_eval_n, 4 + 1e-10, 1e+09, args=(r, power, alpha)) + safe_log(logger, "info", f"Calculated sample size: {result}") + return result + except ValueError as e: # pragma: no cover + safe_log(logger, "error", f"Error calculating sample size: {e}") return np.nan elif r is None and power is not None and n is not None: # Compute achieved r given sample size, power and alpha level - + safe_log(logger, "info", "Calculating achieved r given sample size, power, and alpha.") def _eval_r(r, n, power, alpha): return func(r, n, power, alpha) - power try: - return brenth(_eval_r, 1e-10, 1 - 1e-10, args=(n, power, alpha)) + result = brenth(_eval_r, 1e-10, 1 - 1e-10, args=(n, power, alpha)) + safe_log(logger, "info", f"Calculated correlation coefficient r: {result}") + return result except ValueError: # pragma: no cover return np.nan else: # Compute achieved alpha (significance) level given r, n and power - + safe_log(logger, "info", "Calculating achieved alpha given r, n, and power.") def _eval_alpha(alpha, r, n, power): return func(r, n, power, alpha) - power - try: - return brenth(_eval_alpha, 1e-10, 1 - 1e-10, args=(r, n, power)) - except ValueError: # pragma: no cover + result = brenth(_eval_alpha, 1e-10, 1 - 1e-10, args=(r, n, power)) + safe_log(logger, "info", f"Calculated alpha level: {result}") + return result + except ValueError as e: # pragma: no cover + safe_log(logger, "error", f"Error calculating alpha level: {e}") return np.nan -def _postprocess_dataframe(df): +def _postprocess_dataframe(df, logger=None): """Apply some post-processing to an ouput dataframe (e.g. rounding). Whether and how rounding is applied is governed by options specified in @@ -1008,47 +1102,63 @@ def _postprocess_dataframe(df): Dataframe with post-processing applied """ df = df.copy() + safe_log(logger, "info", "Starting the rounding process for the DataFrame.") for row, col in it.product(df.index, df.columns): - round_option = _get_round_setting_for(row, col) + round_option = _get_round_setting_for(row, col, logger=logger) if round_option is None: + safe_log(logger, "debug", f"Skipping rounding for row {row}, column {col} as no round option is provided.") continue if callable(round_option): newval = round_option(df.at[row, col]) + safe_log(logger, "debug", f"Applying callable rounding for row {row}, column {col}: {newval}") # ensure that dtype changes are processed df[col] = df[col].astype(type(newval)) df.at[row, col] = newval continue if isinstance(df.at[row, col], bool): + safe_log(logger, "debug", f"Skipping rounding for boolean value at row {row}, column {col}.") # No rounding if value is a boolean continue is_number = isinstance(df.at[row, col], numbers.Number) is_array = isinstance(df.at[row, col], np.ndarray) if not any([is_number, is_array]): + safe_log(logger, "debug", f"Skipping non-numeric or non-array value at row {row}, column {col}.") # No rounding if value is not a Number or an array continue if is_array: is_float_array = issubclass(df.at[row, col].dtype.type, np.floating) if not is_float_array: + safe_log(logger, "debug", f"Skipping rounding for non-float array at row {row}, column {col}.") # No rounding if value is not a float array continue df.at[row, col] = np.round(df.at[row, col], decimals=round_option) + safe_log(logger, "info", f"Rounded value at row {row}, column {col} to {round_option} decimal places.") + + safe_log(logger, "info", "Completed the rounding process for the DataFrame.") return df -def _format_bf(bf, precision=3, trim='0'): +def _format_bf(bf, precision=3, trim='0', logger=None): """Format BF10 to floating point or scientific notation. """ + safe_log(logger, "info", f"Formatting Bayes Factor with value: {bf}, precision: {precision}, trim: {trim}") if type(bf) == str: + safe_log(logger, "debug", "Bayes Factor is already a string, returning as is.") return bf if bf >= 1e4 or bf <= 1e-4: out = np.format_float_scientific(bf, precision=precision, trim=trim) + safe_log(logger, "debug", f"Bayes Factor formatted in scientific notation: {out}") else: out = np.format_float_positional(bf, precision=precision, trim=trim) + safe_log(logger, "debug", f"Bayes Factor formatted in floating-point notation: {out}") + + safe_log(logger, "info", f"Formatted Bayes Factor: {out}") return out -def _get_round_setting_for(row, col): +def _get_round_setting_for(row, col, logger=None): + safe_log(logger, "info", f"Retrieving rounding setting for row: {row}, column: {col}") options = { 'round': None, 'round.column.CI95%': 2, @@ -1058,14 +1168,19 @@ def _get_round_setting_for(row, col): 'round.cell.[{}]x[{}]'.format(row, col), 'round.column.{}'.format(col), 'round.row.{}'.format(row)) for key in keys_to_check: + safe_log(logger, "debug", f"Checking for rounding option with key: {key}") try: + rounding_option = options[key] + safe_log(logger, "debug", f"Rounding option found: {rounding_option} for key: {key}") return options[key] except KeyError: + safe_log(logger, "debug", f"No rounding option found for key: {key}") pass + safe_log(logger, "info", f"No specific rounding option found. Using default: {options['round']}") return options['round'] -def remove_na(x, y=None, paired=False, axis='rows'): +def remove_na(x, y=None, paired=False, axis='rows', logger=None): """Remove missing values along a given axis in one or more (paired) numpy arrays. @@ -1107,26 +1222,34 @@ def remove_na(x, y=None, paired=False, axis='rows'): >>> y = np.array([[6, np.nan], [3, 2], [2, 2]]) >>> x_no_nan, y_no_nan = remove_na(x, y, paired=False) """ + safe_log(logger, "debug", f"remove_na called with axis={axis}, paired={paired}.") + safe_log(logger, "debug", f"Initial x shape: {np.shape(x)}, Initial y shape: {np.shape(y) if y is not None else 'None'}.") # Safety checks x = np.asarray(x) assert x.size > 1, 'x must have more than one element.' assert axis in ['rows', 'columns'], 'axis must be rows or columns.' if y is None: - return _remove_na_single(x, axis=axis) + safe_log(logger, "debug", "Removing NA from x only.") + return _remove_na_single(x, axis=axis, logger=logger) elif isinstance(y, (int, float, str)): - return _remove_na_single(x, axis=axis), y + safe_log(logger, "debug", f"y is a scalar: {y}. Removing NA from x only.") + return _remove_na_single(x, axis=axis, logger=logger), y else: # y is list, np.array, pd.Series y = np.asarray(y) # Make sure that we just pass-through if y have only 1 element if y.size == 1: - return _remove_na_single(x, axis=axis), y + safe_log(logger, "debug", "y has only one element. Passing y through.") + return _remove_na_single(x, axis=axis, logger=logger), y if x.ndim != y.ndim or paired is False: + safe_log(logger, "debug", "x and y do not have the same dimension or paired is False. Removing NA separately.") # x and y do not have the same dimension - x_no_nan = _remove_na_single(x, axis=axis) - y_no_nan = _remove_na_single(y, axis=axis) + x_no_nan = _remove_na_single(x, axis=axis, logger=logger) + y_no_nan = _remove_na_single(y, axis=axis, logger=logger) return x_no_nan, y_no_nan + safe_log(logger, "debug", "x and y are paired with the same dimensions. Removing NA from both.") + # At this point, we assume that x and y are paired and have same dimensions if x.ndim == 1: # 1D arrays @@ -1140,6 +1263,7 @@ def remove_na(x, y=None, paired=False, axis='rows'): # Check if missing values are present if ~x_mask.all() or ~y_mask.all(): + safe_log(logger, "info", "Missing values found in x or y. Removing missing values.") ax = 0 if axis == 'rows' else 1 ax = 0 if x.ndim == 1 else ax both = np.logical_and(x_mask, y_mask) @@ -1148,31 +1272,43 @@ def remove_na(x, y=None, paired=False, axis='rows'): return x, y -def _remove_na_single(x, axis='rows'): +def _remove_na_single(x, axis='rows', logger=None): """Remove NaN in a single np.ndarray numpy array. This is an internal Pingouin function. """ + safe_log(logger, "debug", f"Starting _remove_na_single with axis={axis}.") + safe_log(logger, "debug", f"Initial shape of x: {np.shape(x)}") + if x.ndim == 1: # 1D arrays x_mask = ~np.isnan(x) + safe_log(logger, "debug", "Array is 1D. Generated mask for NaN values.") else: # 2D arrays ax = 1 if axis == 'rows' else 0 x_mask = ~np.any(np.isnan(x), axis=ax) + safe_log(logger, "debug", "Array is 2D. Generated mask for NaN values along the specified axis.") # Check if missing values are present if ~x_mask.all(): + safe_log(logger, "info", "Missing values found. Removing missing values.") ax = 0 if axis == 'rows' else 1 ax = 0 if x.ndim == 1 else ax x = x.compress(x_mask, axis=ax) + safe_log(logger, "debug", f"Shape of x after removing NaN values: {np.shape(x)}") return x -def remove_none(x): +def remove_none(x, logger=None): """ Remove missing (None, nan) values from the list :param x: numeric list :return: a list without None or empty array """ + safe_log(logger, "debug", f"Starting remove_none with input list: {x}") + if x is None: + safe_log(logger, "info", "Input list is None, returning an empty list.") return [] - - return [elem for elem in x if elem is not None and not pd.isna(elem)] + filtered_list = [elem for elem in x if elem is not None and not pd.isna(elem)] + + safe_log(logger, "debug", f"Filtered list (without None/nan): {filtered_list}") + return filtered_list diff --git a/metcalcpy/util/ctc_statistics.py b/metcalcpy/util/ctc_statistics.py index 7a7e3c04..da289b1e 100644 --- a/metcalcpy/util/ctc_statistics.py +++ b/metcalcpy/util/ctc_statistics.py @@ -20,12 +20,13 @@ from scipy.special import lambertw from metcalcpy.util.utils import round_half_up, column_data_by_name, \ sum_column_data_by_name, PRECISION +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_baser(input_data, columns_names): +def calculate_baser(input_data, columns_names, logger=None): """Performs calculation of BASER - Base rate, aka Observed relative frequency Args: @@ -38,15 +39,28 @@ def calculate_baser(input_data, columns_names): calculated BASER as float or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting calculate_baser function.") + safe_log(logger, "debug", f"Input columns: {columns_names}") + warnings.filterwarnings('error') try: - result = (sum(input_data[:, np.where(columns_names == 'fy_oy')[0][0]]) - + sum(input_data[:, np.where(columns_names == 'fn_oy')[0][0]])) \ - / sum(input_data[:, np.where(columns_names == 'total')[0][0]]) + fy_oy_index = np.where(columns_names == 'fy_oy')[0][0] + fn_oy_index = np.where(columns_names == 'fn_oy')[0][0] + total_index = np.where(columns_names == 'total')[0][0] + + safe_log(logger, "debug", f"Indexes found - fy_oy: {fy_oy_index}, fn_oy: {fn_oy_index}, total: {total_index}") + + # Perform the BASER calculation + result = (sum(input_data[:, fy_oy_index]) + sum(input_data[:, fn_oy_index])) / sum(input_data[:, total_index]) result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + safe_log(logger, "info", f"BASER calculation successful: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error in BASER calculation: {e}") result = None + warnings.filterwarnings('ignore') return result @@ -76,7 +90,7 @@ def calculate_acc(input_data, columns_names): return result -def calculate_fbias(input_data, columns_names): +def calculate_fbias(input_data, columns_names, logger=None): """Performs calculation of FBIAS - Bias, aka Frequency bias Args: @@ -99,13 +113,15 @@ def calculate_fbias(input_data, columns_names): + sum_column_data_by_name(input_data, columns_names, 'fy_on') result = oyn / oy result = round_half_up(result, PRECISION) + safe_log(logger, "info", f"ACC calculation successful: {result}") except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "error", f"Error in ACC calculation: {e}") result = None warnings.filterwarnings('ignore') return result -def calculate_fmean(input_data, columns_names): +def calculate_fmean(input_data, columns_names, logger=None): """Performs calculation of FMEAN - Forecast mean Args: @@ -119,21 +135,29 @@ def calculate_fmean(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculate_fmean function.") + safe_log(logger, "debug", f"Input columns: {columns_names}") + try: total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"Total value: {total}") if total == 0: + safe_log(logger, "warning", "Total value is 0, returning None.") return None oyn = sum_column_data_by_name(input_data, columns_names, 'fy_oy') \ + sum_column_data_by_name(input_data, columns_names, 'fy_on') + safe_log(logger, "debug", f"Summed values (fy_oy + fy_on): {oyn}") result = oyn / total result = round_half_up(result, PRECISION) + safe_log(logger, "info", f"FMEAN calculation successful: {result}") except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "error", f"Error in FMEAN calculation: {e}") result = None warnings.filterwarnings('ignore') return result -def calculate_pody(input_data, columns_names): +def calculate_pody(input_data, columns_names, logger=None): """Performs calculation of PODY - Probability of Detecting Yes Args: @@ -146,21 +170,28 @@ def calculate_pody(input_data, columns_names): calculated PODY as float or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting calculate_pody function.") + safe_log(logger, "debug", f"Input columns: {columns_names}") warnings.filterwarnings('error') try: fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') + safe_log(logger, "debug", f"fy_oy value: {fy_oy}") fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') + safe_log(logger, "debug", f"fn_oy value: {fn_oy}") oy = fy_oy + fn_oy + safe_log(logger, "debug", f"Total oy value (fy_oy + fn_oy): {oy}") result = fy_oy / oy result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"PODY calculation successful: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error in PODY calculation: {e}") result = None warnings.filterwarnings('ignore') return result -def calculate_pofd(input_data, columns_names): +def calculate_pofd(input_data, columns_names, logger=None): """Performs calculation of POFD - Probability of false detection Args: @@ -173,19 +204,28 @@ def calculate_pofd(input_data, columns_names): calculated POFD as float or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting calculate_pofd function.") + safe_log(logger, "debug", f"Input columns: {columns_names}") warnings.filterwarnings('error') try: fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') + safe_log(logger, "debug", f"fy_on value: {fy_on}") + fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') + safe_log(logger, "debug", f"fn_on value: {fn_on}") oy = fy_on + sum_column_data_by_name(input_data, columns_names, 'fn_on') + oy = fy_on + fn_on + safe_log(logger, "debug", f"Total oy value (fy_on + fn_on): {oy}") result = fy_on / oy result = round_half_up(result, PRECISION) + safe_log(logger, "info", f"POFD calculation successful: {result}") except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "error", f"Error in POFD calculation: {e}") result = None warnings.filterwarnings('ignore') return result -def calculate_ctc_roc(data, ascending): +def calculate_ctc_roc(data, ascending, logger=None): """ Creates a data frame to hold the aggregated contingency table and ROC data Args: :param data: pandas data frame with ctc data and column names: @@ -205,35 +245,44 @@ def calculate_ctc_roc(data, ascending): - pody - pofd """ + safe_log(logger, "debug", "Starting calculate_ctc_roc function.") + safe_log(logger, "debug", f"Sorting data with ascending={ascending}.") # create a data frame to hold the aggregated contingency table and ROC data - sorted_data = sort_by_thresh(data, ascending=ascending) + sorted_data = sort_by_thresh(data, ascending=ascending, logger=logger) + safe_log(logger, "debug", f"Data sorted. Number of rows: {len(sorted_data)}") list_thresh = np.sort(np.unique(sorted_data['fcst_thresh'].to_numpy())) + safe_log(logger, "debug", f"Unique thresholds identified: {list_thresh}") # If descending order was requested for sorting the input dataframe, # reverse the order of the list_thresh to # maintain results in descending order. if not ascending: list_thresh = list_thresh[::-1] - + safe_log(logger, "debug", "Reversed the order of list_thresh due to descending sort.") df_roc = pd.DataFrame( {'thresh': list_thresh, 'pody': None, 'pofd': None}) + safe_log(logger, "debug", "Initialized ROC DataFrame.") index = 0 for thresh in list_thresh: + safe_log(logger, "debug", f"Processing threshold: {thresh}") # create a subset of the sorted_data that contains only the rows of the unique # threshold values subset_data = sorted_data[sorted_data['fcst_thresh'] == thresh] + safe_log(logger, "debug", f"Subset data for threshold {thresh} has {len(subset_data)} rows.") data_np = subset_data.to_numpy() columns = subset_data.columns.values - pody = calculate_pody(data_np, columns) - pofd = calculate_pofd(data_np, columns) + pody = calculate_pody(data_np, columns, logger=logger) + pofd = calculate_pofd(data_np, columns, logger=logger) df_roc.loc[index] = [thresh, pody, pofd] + safe_log(logger, "info", f"ROC values for threshold {thresh}: PODY={pody}, POFD={pofd}") index += 1 + safe_log(logger, "debug", "Finished calculating ROC DataFrame.") return df_roc -def calculate_podn(input_data, columns_names): +def calculate_podn(input_data, columns_names, logger=None): """Performs calculation of PODN - Probability of Detecting No Args: @@ -247,18 +296,28 @@ def calculate_podn(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + + safe_log(logger, "debug", "Starting calculation of PODN.") + try: fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') + safe_log(logger, "debug", f"Sum of fn_on: {fn_on}") + fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') + safe_log(logger, "debug", f"Sum of fy_on: {fy_on}") oy = sum_column_data_by_name(input_data, columns_names, 'fy_on') + fn_on + safe_log(logger, "debug", f"Sum of observations (oy): {oy}") result = fn_on / oy result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated PODN before rounding: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: result = None + safe_log(logger, "error", f"Error in calculating PODN: {str(e)}") + warnings.filterwarnings('ignore') return result -def calculate_far(input_data, columns_names): +def calculate_far(input_data, column_names, logger=None): """Performs calculation of FAR - false alarms Args: @@ -272,18 +331,25 @@ def calculate_far(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of FAR.") try: fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') + safe_log(logger, "debug", f"Sum of fy_on: {fy_on}") + fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') + safe_log(logger, "debug", f"Sum of fy_oy: {fy_oy}") oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') + fy_on + safe_log(logger, "debug", f"Sum of observations (oy): {oy}") result = fy_on / oy result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: result = None + safe_log(logger, "error", f"Error in calculating FAR: {str(e)}") + warnings.filterwarnings('ignore') return result -def calculate_csi(input_data, columns_names): +def calculate_csi(input_data, columns_names, logger=None): """Performs calculation of CSI - Critical success index, aka Threat score Args: @@ -297,20 +363,30 @@ def calculate_csi(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of CSI.") try: fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') + safe_log(logger, "debug", f"Sum of fy_oy: {fy_oy}") + fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') + safe_log(logger, "debug", f"Sum of fy_on: {fy_on}") + fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') + safe_log(logger, "debug", f"Sum of fn_oy: {fn_oy}") oy = fy_oy \ + sum_column_data_by_name(input_data, columns_names, 'fy_on') \ + sum_column_data_by_name(input_data, columns_names, 'fn_oy') + safe_log(logger, "debug", f"Total sum (oy): {oy}") result = fy_oy / oy result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Final CSI result: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: result = None + safe_log(logger, "error", f"Error in calculating CSI: {str(e)}") + warnings.filterwarnings('ignore') return result -def calculate_gss(input_data, columns_names): +def calculate_gss(input_data, columns_names, logger=None): """Performs calculation of GSS = Gilbert skill score, aka Equitable threat score Args: @@ -324,23 +400,35 @@ def calculate_gss(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of GSS.") + try: total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"Sum of total: {total}") if total == 0: + safe_log(logger, "warning", "Total is 0, returning None.") return None fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') + safe_log(logger, "debug", f"Sum of fy_oy: {fy_oy}") fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') + safe_log(logger, "debug", f"Sum of fy_on: {fy_on}") fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') + safe_log(logger, "debug", f"Sum of fn_oy: {fn_oy}") dbl_c = ((fy_oy + fy_on) / total) * (fy_oy + fn_oy) + safe_log(logger, "debug", f"Calculated dbl_c: {dbl_c}") gss = ((fy_oy - dbl_c) / (fy_oy + fy_on + fn_oy - dbl_c)) gss = round_half_up(gss, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Final GSS result: {gss}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: gss = None + safe_log(logger, "error", f"Error in calculating GSS: {str(e)}") + gss = None + warnings.filterwarnings('ignore') return gss -def calculate_hk(input_data, columns_names): +def calculate_hk(input_data, columns_names, logger=None): """Performs calculation of HK - Hanssen Kuipers Discriminant Args: @@ -354,21 +442,29 @@ def calculate_hk(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of HK.") try: - pody = calculate_pody(input_data, columns_names) - pofd = calculate_pofd(input_data, columns_names) + pody = calculate_pody(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated PODY: {pody}") + pofd = calculate_pofd(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated POFD: {pofd}") if pody is None or pofd is None: + safe_log(logger, "warning", "PODY or POFD is None, returning None.") result = None else: result = pody - pofd + safe_log(logger, "debug", f"HK before rounding: {result}") result = round_half_up(result, PRECISION) - except (TypeError, Warning): + safe_log(logger, "info", f"Final HK result: {result}") + except (TypeError, Warning) as e: result = None + safe_log(logger, "error", f"Error in calculating HK: {str(e)}") + warnings.filterwarnings('ignore') return result -def calculate_hss(input_data, columns_names): +def calculate_hss(input_data, columns_names, logger=None): """Performs calculation of HSS - Heidke skill score Args: @@ -382,24 +478,35 @@ def calculate_hss(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of HSS.") try: total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"Total: {total}") if total == 0: + safe_log(logger, "warning", "Total is zero, returning None.") return None fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') + + safe_log(logger, "debug", f"FY_OY: {fy_oy}, FY_ON: {fy_on}, FN_OY: {fn_oy}, FN_ON: {fn_on}") + dbl_c = ((fy_oy + fy_on) / total) * (fy_oy + fn_oy) + ((fn_oy + fn_on) / total) * (fy_on + fn_on) + hss = ((fy_oy + fn_on - dbl_c)/ (total - dbl_c)) hss = round_half_up(hss, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Final HSS result: {hss}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: hss = None + safe_log(logger, "error", f"Error in calculating HSS: {str(e)}") + warnings.filterwarnings('ignore') return hss -def calculate_odds(input_data, columns_names): +def calculate_odds(input_data, columns_names, logger=None): """Performs calculation of ODDS - Odds Ratio Args: @@ -413,23 +520,29 @@ def calculate_odds(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of ODDS.") + try: - pody = calculate_pody(input_data, columns_names) - pofd = calculate_pofd(input_data, columns_names) + pody = calculate_pody(input_data, columns_names, logger=logger) + pofd = calculate_pofd(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"PODY: {pody}, POFD: {pofd}") if pody is None or pofd is None: + safe_log(logger, "warning", "PODY or POFD is None, returning None.") result = None else: - result = (pody * (1 - pofd)) / (pofd * (1 - pody)) result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Final ODDS result: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: result = None + safe_log(logger, "error", f"Error in calculating ODDS: {str(e)}") + warnings.filterwarnings('ignore') return result -def calculate_lodds(input_data, columns_names): +def calculate_lodds(input_data, columns_names, logger=None): """Performs calculation of LODDS - Log Odds Ratio Args: @@ -443,22 +556,32 @@ def calculate_lodds(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of LODDS.") + try: fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') + + safe_log(logger, "debug", f"FY_OY: {fy_oy}, FY_ON: {fy_on}, FN_OY: {fn_oy}, FN_ON: {fn_on}") + if fy_oy is None or fy_on is None or fn_oy is None or fn_on is None: + safe_log(logger, "warning", "One or more input values are None, returning None.") return None v = np.log(fy_oy) + np.log(fn_on) - np.log(fy_on) - np.log(fn_oy) v = round_half_up(v, PRECISION) - except (TypeError, Warning): + safe_log(logger, "info", f"Final LODDS result: {v}") + + except (TypeError, Warning) as e: v = None + safe_log(logger, "error", f"Error in calculating LODDS: {str(e)}") + warnings.filterwarnings('ignore') return v -def calculate_bagss(input_data, columns_names): +def calculate_bagss(input_data, columns_names, logger=None): """Performs calculation of BAGSS - Bias-Corrected Gilbert Skill Score Args: @@ -472,28 +595,37 @@ def calculate_bagss(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of BAGSS.") try: fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') total = sum_column_data_by_name(input_data, columns_names, 'total') fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') + safe_log(logger, "debug", f"FY_OY: {fy_oy}, FN_OY: {fn_oy}, FY_ON: {fy_on}, TOTAL: {total}") + if fy_oy is None or fn_oy is None or fy_on is None or total is None: + safe_log(logger, "warning", "One or more input values are None, returning None.") return None if fy_oy == 0 or fn_oy == 0 or total == 0: + safe_log(logger, "warning", "One or more input values are zero, returning None.") return None dbl_o = fy_oy + fn_oy dbl_lf = np.log(dbl_o / fn_oy) lambert = lambertw(dbl_o / fy_on * dbl_lf).real dbl_ha = dbl_o - (fy_on / dbl_lf) * lambert result = (dbl_ha - (dbl_o ** 2 / total)) / (2 * dbl_o - dbl_ha - (dbl_o ** 2 / total)) + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Final BAGSS result: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: result = None + safe_log(logger, "error", f"Error in calculating BAGSS: {str(e)}") + warnings.filterwarnings('ignore') return result -def calculate_eclv(input_data, columns_names): +def calculate_eclv(input_data, columns_names, logger=None): """Performs calculation of ECLV - Economic Cost/Loss Value Implements R version that returns an array instead of the single value IT WILL NOT WORK - NEED TO CONSULT WITH STATISTICIAN @@ -510,25 +642,36 @@ def calculate_eclv(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of ECLV.") + try: cl_step = 0.05 cl_pts = np.arange(start=cl_step, stop=1, step=cl_step) + safe_log(logger, "debug", f"CL points: {cl_pts}") + fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') - eclv = calculate_economic_value(np.array([fy_oy, fy_on, fn_oy, fn_on]), cl_pts) + + safe_log(logger, "debug", f"FY_OY: {fy_oy}, FY_ON: {fy_on}, FN_OY: {fn_oy}, FN_ON: {fn_on}") + + eclv = calculate_economic_value(np.array([fy_oy, fy_on, fn_oy, fn_on]), cl_pts, logger=logger) common_cases_ind = pd.Series(eclv['cl']).isin(cl_pts) v = eclv['V'][common_cases_ind] v = round_half_up(v, PRECISION) - except (TypeError, Warning): + safe_log(logger, "info", f"Calculated ECLV: {v}") + + except (TypeError, Warning) as e: v = None + safe_log(logger, "error", f"Error in calculating ECLV: {str(e)}") + warnings.filterwarnings('ignore') return v def calculate_economic_value(values, cost_lost_ratio=np.arange(start=0.05, stop=0.95, step=0.05), - add_base_rate: bool = False) -> Union[dict, None]: + add_base_rate: bool = False, logger=None) -> Union[dict, None]: """Calculates the economic value of a forecast based on a cost/loss ratio. Similar to R script function 'value' from the 'verification' package @@ -552,6 +695,8 @@ def calculate_economic_value(values, cost_lost_ratio=np.arange(start=0.05, stop= or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of economic value.") + try: if len(values) == 4: n = sum(values) @@ -559,10 +704,14 @@ def calculate_economic_value(values, cost_lost_ratio=np.arange(start=0.05, stop= h = values[0] / (values[0] + values[2]) s = (values[0] + values[2]) / n + safe_log(logger, "debug", f"Values: n={n}, F={f}, H={h}, s={s}") + if add_base_rate is True: cl_local = np.append(cost_lost_ratio, s) + safe_log(logger, "debug", f"Base rate added to cost/loss ratio: {cl_local}") else: cl_local = np.copy(cost_lost_ratio) + safe_log(logger, "debug", f"Cost/loss ratio: {cl_local}") cl_local.sort() v_1 = (1 - f) - s / (1 - s) * (1 - cl_local) / cl_local * (1 - h) @@ -582,15 +731,19 @@ def calculate_economic_value(values, cost_lost_ratio=np.arange(start=0.05, stop= 'cl': cl_local, 's': round_half_up(s, PRECISION), 'n': n} + safe_log(logger, "info", f"Economic value calculated successfully: vmax={result['vmax']}") else: result = None - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "warning", "Invalid input values; calculation returned None.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: result = None + safe_log(logger, "error", f"Error in calculating economic value: {str(e)}") + warnings.filterwarnings('ignore') return result -def calculate_ctc_total(input_data, columns_names): +def calculate_ctc_total(input_data, columns_names, logger=None): """Calculates the Total number of matched pairs for Contingency Table Counts Args: @@ -603,11 +756,20 @@ def calculate_ctc_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + safe_log(logger, "debug", "Starting calculation of CTC Total.") + try: + total = sum_column_data_by_name(input_data, columns_names, 'total') + result = round_half_up(total, PRECISION) + safe_log(logger, "info", f"Calculated CTC Total: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + result = None + safe_log(logger, "error", f"Error in calculating CTC Total: {str(e)}") + + return result -def calculate_cts_total(input_data, columns_names): + +def calculate_cts_total(input_data, columns_names, logger=None): """Calculates the Total number of matched pairs for Contingency Table Statistics Args: @@ -620,11 +782,20 @@ def calculate_cts_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + safe_log(logger, "debug", "Starting calculation of CTS Total.") + + try: + total = sum_column_data_by_name(input_data, columns_names, 'total') + result = round_half_up(total, PRECISION) + safe_log(logger, "info", f"Calculated CTS Total: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + result = None + safe_log(logger, "error", f"Error in calculating CTS Total: {str(e)}") + + return result -def calculate_ctc_fn_on(input_data, columns_names): +def calculate_ctc_fn_on(input_data, columns_names, logger=None): """Calculates the Number of forecast no and observation no for Contingency Table Statistics Args: @@ -637,11 +808,20 @@ def calculate_ctc_fn_on(input_data, columns_names): calculated Number of forecast no and observation no as float or None if some data values are missing or invalid """ - fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') - return round_half_up(fn_on, PRECISION) + safe_log(logger, "debug", "Starting calculation of CTC FN_ON.") + + try: + fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') + result = round_half_up(fn_on, PRECISION) + safe_log(logger, "info", f"Calculated CTC FN_ON: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + result = None + safe_log(logger, "error", f"Error in calculating CTC FN_ON: {str(e)}") + + return result -def calculate_ctc_fn_oy(input_data, columns_names): +def calculate_ctc_fn_oy(input_data, columns_names, logger=None): """Calculates the Number of forecast no and observation yes for Contingency Table Statistics Args: @@ -654,11 +834,20 @@ def calculate_ctc_fn_oy(input_data, columns_names): calculated Number of forecast no and observation yes as float or None if some data values are missing or invalid """ - fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') - return round_half_up(fn_oy, PRECISION) + safe_log(logger, "debug", "Starting calculation of CTC FN_OY.") + try: + fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') + result = round_half_up(fn_oy, PRECISION) + safe_log(logger, "info", f"Calculated CTC FN_OY: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + result = None + safe_log(logger, "error", f"Error in calculating CTC FN_OY: {str(e)}") + + return result -def calculate_ctc_fy_on(input_data, columns_names): + +def calculate_ctc_fy_on(input_data, columns_names, logger=None): """Calculates the Number of forecast yes and observation no for Contingency Table Statistics Args: @@ -671,11 +860,20 @@ def calculate_ctc_fy_on(input_data, columns_names): calculated Number of forecast yes and observation no as float or None if some data values are missing or invalid """ - fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') - return round_half_up(fy_on, PRECISION) + safe_log(logger, "debug", "Starting calculation of CTC FY_ON.") + try: + fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') + result = round_half_up(fy_on, PRECISION) + safe_log(logger, "info", f"Calculated CTC FY_ON: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + result = None + safe_log(logger, "error", f"Error in calculating CTC FY_ON: {str(e)}") + + return result -def calculate_ctc_fy_oy(input_data, columns_names): + +def calculate_ctc_fy_oy(input_data, columns_names, logger=None): """Calculates the Number of forecast yes and observation yes for Contingency Table Statistics Args: @@ -688,11 +886,20 @@ def calculate_ctc_fy_oy(input_data, columns_names): calculated Number of forecast yes and observation yes as float or None if some data values are missing or invalid """ - fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') - return round_half_up(fy_oy, PRECISION) + safe_log(logger, "debug", "Starting calculation of CTC FY_OY.") + + try: + fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') + result = round_half_up(fy_oy, PRECISION) + safe_log(logger, "info", f"Calculated CTC FY_OY: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + result = None + safe_log(logger, "error", f"Error in calculating CTC FY_OY: {str(e)}") + + return result -def calculate_ctc_oy(input_data, columns_names): +def calculate_ctc_oy(input_data, columns_names, logger=None): """Calculates the Total Number of forecast yes and observation yes plus Number of forecast no and observation yes for Contingency Table Statistics @@ -706,12 +913,21 @@ def calculate_ctc_oy(input_data, columns_names): calculated OY as float or None if some data values are missing or invalid """ - fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') - fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') - return round_half_up(fy_oy + fn_oy, PRECISION) + safe_log(logger, "debug", "Starting calculation of CTC OY.") + try: + fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') + fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') + result = round_half_up(fy_oy + fn_oy, PRECISION) + safe_log(logger, "info", f"Calculated CTC OY: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + result = None + safe_log(logger, "error", f"Error in calculating CTC OY: {str(e)}") -def calculate_ctc_on(input_data, columns_names): + return result + + +def calculate_ctc_on(input_data, columns_names, logger=None): """Calculates the Total Number of forecast yes and observation no plus Number of forecast no and observation no for Contingency Table Statistics @@ -725,12 +941,13 @@ def calculate_ctc_on(input_data, columns_names): calculated ON as float or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting calculation of CTC ON.") fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') return round_half_up(fy_on + fn_on, PRECISION) -def calculate_ctc_fy(input_data, columns_names): +def calculate_ctc_fy(input_data, columns_names, logger=None): """Calculates the Total Number of forecast yes and observation no plus Number of forecast yes and observation yes for Contingency Table Statistics @@ -744,12 +961,23 @@ def calculate_ctc_fy(input_data, columns_names): calculated FY as float or None if some data values are missing or invalid """ - fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') - fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') - return round_half_up(fy_on + fy_oy, PRECISION) + try: + fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') + fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') + result = round_half_up(fy_on + fn_on, PRECISION) + + # Logging the successful calculation + safe_log(logger, "info", f"Calculated CTC ON: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + # Logging the error if an exception occurs + safe_log(logger, "error", f"Error in calculating CTC ON: {str(e)}") + result = None + + return result -def calculate_ctc_fn(input_data, columns_names): +def calculate_ctc_fn(input_data, columns_names, logger=None): """Calculates the Total Number of forecast no and observation no plus Number of forecast no and observation yes for Contingency Table Statistics @@ -763,40 +991,71 @@ def calculate_ctc_fn(input_data, columns_names): calculated FN as float or None if some data values are missing or invalid """ - fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') - fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') - return round_half_up(fn_on + fn_oy, PRECISION) + safe_log(logger, "debug", "Starting calculation of CTC FN.") + try: + fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') + fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') + result = round_half_up(fn_on + fn_oy, PRECISION) + + # Logging the successful calculation + safe_log(logger, "info", f"Calculated CTC FN: {result}") -def pod_yes(input_data, columns_names): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + # Logging the error if an exception occurs + safe_log(logger, "error", f"Error in calculating CTC FN: {str(e)}") + result = None + + return result + + +def pod_yes(input_data, columns_names, logger=None): warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of POD (yes).") + try: fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') num = fy_oy den = fy_oy + sum_column_data_by_name(input_data, columns_names, 'fn_oy') result = num / den result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + # Log the successful calculation + safe_log(logger, "info", f"Calculated POD (yes): {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + # Log the error + safe_log(logger, "error", f"Error in calculating POD (yes): {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def pod_no(input_data, columns_names): +def pod_no(input_data, columns_names, logger=None): warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of POD (no).") + try: fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') num = fn_on den = fn_on + sum_column_data_by_name(input_data, columns_names, 'fy_on') result = num / den result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + # Log the successful calculation + safe_log(logger, "info", f"Calculated POD (no): {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + # Log the error + safe_log(logger, "error", f"Error in calculating POD (no): {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_odds1(input_data, columns_names): +def calculate_odds1(input_data, columns_names, logger=None): """Performs calculation of ODDS - Odds Ratio Args: @@ -810,21 +1069,33 @@ def calculate_odds1(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of ODDS.") + try: - py = pod_yes(input_data, columns_names) - pn = calculate_pofd(input_data, columns_names) + py = pod_yes(input_data, columns_names, logger=logger) + pn = calculate_pofd(input_data, columns_names, logger=logger) + + # Log the intermediate values + safe_log(logger, "debug", f"POD (yes): {py}, POFD: {pn}") num = py / (1 - py) den = pn / (1 - pn) result = num / den result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + # Log the successful calculation + safe_log(logger, "info", f"Calculated ODDS: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + # Log the error + safe_log(logger, "error", f"Error in calculating ODDS: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_orss(input_data, columns_names): +def calculate_orss(input_data, columns_names, logger=None): """Performs calculation of ORSS - Odds Ratio Skill Score Args: @@ -838,23 +1109,33 @@ def calculate_orss(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + + safe_log(logger, "debug", "Starting calculation of ORSS.") + try: fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') + safe_log(logger, "debug", f"FY_OY: {fy_oy}, FN_ON: {fn_on}, FY_ON: {fy_on}, FN_OY: {fn_oy}") + num = fy_oy * fn_on - fy_on * fn_oy den = fy_oy * fn_on + fy_on * fn_oy result = num / den result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Calculated ORSS: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + # Log the error + safe_log(logger, "error", f"Error in calculating ORSS: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_sedi(input_data, columns_names): +def calculate_sedi(input_data, columns_names, logger=None): """Performs calculation of SEDI - Symmetric Extremal Depenency Index Args: @@ -868,24 +1149,35 @@ def calculate_sedi(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of SEDI.") + try: fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') + safe_log(logger, "debug", f"FN_ON: {fn_on}, FY_ON: {fy_on}") + f = fy_on / (fy_on + fn_on) - h = pod_yes(input_data, columns_names) + h = pod_yes(input_data, columns_names, logger=logger) + + safe_log(logger, "debug", f"F (false alarm rate): {f}, H (hit rate): {h}") + num = math.log(f) - math.log(h) - math.log(1 - f) + math.log(1 - h) den = math.log(f) + math.log(h) + math.log(1 - f) + math.log(1 - h) result = num / den result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Calculated SEDI: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error in calculating SEDI: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_seds(input_data, columns_names): +def calculate_seds(input_data, columns_names, logger=None): """Performs calculation of SEDS - Symmetric Extreme Dependency Score Args: @@ -899,24 +1191,31 @@ def calculate_seds(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of SEDS.") + try: fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"FY_OY: {fy_oy}, FY_ON: {fy_on}, FN_OY: {fn_oy}, Total: {total}") + num = math.log((fy_oy + fy_on) / total) + math.log((fy_oy + fn_oy) / total) den = math.log(fy_oy / total) result = num / den - 1.0 result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Calculated SEDS: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error in calculating SEDS: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_edi(input_data, columns_names): +def calculate_edi(input_data, columns_names, logger=None): """Performs calculation of EDI - Extreme Dependency Index Args: @@ -930,24 +1229,32 @@ def calculate_edi(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of EDI.") + try: fn_on = sum_column_data_by_name(input_data, columns_names, 'fn_on') fy_on = sum_column_data_by_name(input_data, columns_names, 'fy_on') total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"FN_ON: {fn_on}, FY_ON: {fy_on}, Total: {total}") f = fy_on / (fy_on + fn_on) - h = pod_yes(input_data, columns_names) + h = pod_yes(input_data, columns_names, logger=logger) num = math.log(f) - math.log(h) den = math.log(f) + math.log(h) result = num / den result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Calculated EDI: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + # Log the error + safe_log(logger, "error", f"Error in calculating EDI: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_eds(input_data, columns_names): +def calculate_eds(input_data, columns_names, logger=None): """Performs calculation of EDS - Extreme Dependency Score Args: @@ -961,24 +1268,31 @@ def calculate_eds(input_data, columns_names): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of EDS.") + try: fy_oy = sum_column_data_by_name(input_data, columns_names, 'fy_oy') fn_oy = sum_column_data_by_name(input_data, columns_names, 'fn_oy') total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"FY_OY: {fy_oy}, FN_OY: {fn_oy}, Total: {total}") num = math.log((fy_oy + fn_oy) / total) den = math.log(fy_oy / total) result = 2.0 * num / den - 1.0 result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Calculated EDS: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error in calculating EDS: {str(e)}") result = None + warnings.filterwarnings('ignore') return result def sort_by_thresh(input_dataframe: pd.DataFrame, sort_column_name: str = 'fcst_thresh', - ascending: bool = True) -> pd.DataFrame: + ascending: bool = True, logger=None) -> pd.DataFrame: """ Sorts the input pandas dataframe by threshold values in the specified column that have format "operator value", ie >=1. This is done by first separating @@ -1023,6 +1337,7 @@ def sort_by_thresh(input_dataframe: pd.DataFrame, sort_column_name: str = 'fcst_ # If the df_input dataframe is empty (most likely as a result of event equalization), # return the df_input data frame. if input_dataframe.empty: + safe_log(logger, "warning", "Input dataframe is empty. Returning original dataframe.") return input_dataframe for thresh in requested_thresh: @@ -1119,7 +1434,9 @@ def sort_by_thresh(input_dataframe: pd.DataFrame, sort_column_name: str = 'fcst_ # sort with ignore_index=True because we don't need to keep the original index values. We # want the rows to be newly indexed to reflect the reordering. Use inplace=False because # we don't want to modify the input dataframe's order, we want a new dataframe. + safe_log(logger, "debug", f"Sorting by columns: {sort_by_cols} in {'ascending' if ascending else 'descending'} order.") sorted_dataframe = input_dataframe.sort_values(by=sort_by_cols, inplace=False, ascending=ascending, ignore_index=True) + safe_log(logger, "info", "Dataframe sorted successfully.") - return sorted_dataframe + return sorted_dataframe \ No newline at end of file diff --git a/metcalcpy/util/eclv_statistics.py b/metcalcpy/util/eclv_statistics.py index bd113e7c..df041b01 100644 --- a/metcalcpy/util/eclv_statistics.py +++ b/metcalcpy/util/eclv_statistics.py @@ -17,12 +17,12 @@ from metcalcpy.util.ctc_statistics import calculate_economic_value from metcalcpy.util.utils import sum_column_data_by_name - +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' def calculate_eclv(input_data: np.array, columns_names: np.array, - thresh: Union[float, None], line_type: str, cl_pts: list, add_base_rate: int = 0) \ + thresh: Union[float, None], line_type: str, cl_pts: list, add_base_rate: int = 0, logger=None) \ -> Union[dict, None]: """Performs calculation of ECLV - The Economic Cost Loss Value @@ -52,11 +52,14 @@ def calculate_eclv(input_data: np.array, columns_names: np.array, """ warnings.filterwarnings('error') + # some validation if line_type != 'ctc' and line_type != 'pct': + safe_log(logger, "error", f"Incorrect line type {line_type} for calculating ECLV.") print(f'ERROR: incorrect line type {line_type} for calculating ECLV ') return None if line_type == 'pct' and thresh is None: + safe_log(logger, "error", "Threshold is required for line type 'pct' in calculating ECLV.") print(f'ERROR: provide thresh for calculating ECLV ') return None @@ -77,9 +80,11 @@ def calculate_eclv(input_data: np.array, columns_names: np.array, n10 = sum_column_data_by_name(input_data, columns_names, 'fy_on') n01 = sum_column_data_by_name(input_data, columns_names, 'fn_oy') n00 = sum_column_data_by_name(input_data, columns_names, 'fn_on') - + safe_log(logger, "debug", f"n11: {n11}, n10: {n10}, n01: {n01}, n00: {n00}") result = calculate_economic_value(np.array([n11, n10, n01, n00]), cl_pts, add_base_rate == 1) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "ECLV calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ECLV calculation failed due to an error: {e}") result = None warnings.filterwarnings('ignore') return result diff --git a/metcalcpy/util/ecnt_statistics.py b/metcalcpy/util/ecnt_statistics.py index 10e9e7bc..0b931272 100644 --- a/metcalcpy/util/ecnt_statistics.py +++ b/metcalcpy/util/ecnt_statistics.py @@ -16,8 +16,9 @@ import math from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values +from metcalcpy.util.safe_log import safe_log -def calculate_ecnt_crps(input_data, columns_names, aggregation=False): +def calculate_ecnt_crps(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_CRPS - The Continuous Ranked Probability Score Args: @@ -31,11 +32,17 @@ def calculate_ecnt_crps(input_data, columns_names, aggregation=False): calculated ECNT_CRPS as float or None if some data values are missing or invalid """ - - return weighted_average(input_data, columns_names, 'crps', aggregation) + safe_log(logger, "debug", "Starting ECNT_CRPS calculation.") + try: + result = weighted_average(input_data, columns_names, 'crps', aggregation, logger=logger) + safe_log(logger, "info", "ECNT_CRPS calculation completed successfully.") + except Exception as e: + safe_log(logger, "error", f"ECNT_CRPS calculation failed due to an error: {e}") + result = None + return result -def calculate_ecnt_crpscl(input_data, columns_names, aggregation=False): +def calculate_ecnt_crpscl(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_CRPSCL - Climatological Continuous Ranked Probability Score (normal distribution) @@ -50,10 +57,17 @@ def calculate_ecnt_crpscl(input_data, columns_names, aggregation=False): calculated ECNT_CRPSCL as float or None if some data values are missing or invalid """ - return weighted_average(input_data, columns_names, 'crpscl', aggregation) + safe_log(logger, "debug", "Starting ECNT_CRPSCL calculation.") + try: + result = weighted_average(input_data, columns_names, 'crpscl', aggregation, logger=logger) + safe_log(logger, "info", "ECNT_CRPSCL calculation completed successfully.") + except Exception as e: + safe_log(logger, "error", f"ECNT_CRPSCL calculation failed due to an error: {e}") + result = None + return result -def calculate_ecnt_crpss(input_data, columns_names, aggregation=False): +def calculate_ecnt_crpss(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_CRPSS - The Continuous Ranked Probability Skill Score (normal distribution) @@ -69,18 +83,21 @@ def calculate_ecnt_crpss(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting ECNT_CRPSS calculation.") try: total = get_total_values(input_data, columns_names, aggregation) crpscl = sum_column_data_by_name(input_data, columns_names, 'crpscl') / total crps = sum_column_data_by_name(input_data, columns_names, 'crps') / total crpss = 1 - crps / crpscl result = round_half_up(crpss, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "ECNT_CRPSS calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ECNT_CRPSS calculation failed due to an error: {e}") result = None warnings.filterwarnings('ignore') return result -def calculate_ecnt_crps_emp(input_data, columns_names, aggregation=False): +def calculate_ecnt_crps_emp(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_CRPS_EMP - The Continuous Ranked Probability Score (empirical distribution) @@ -96,10 +113,16 @@ def calculate_ecnt_crps_emp(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ - return weighted_average(input_data, columns_names, 'crps_emp', aggregation) - + safe_log(logger, "debug", "Starting ECNT_CRPS_EMP calculation.") + try: + result = weighted_average(input_data, columns_names, 'crps_emp', aggregation, logger=logger) + safe_log(logger, "info", "ECNT_CRPS_EMP calculation completed successfully.") + except Exception as e: + safe_log(logger, "error", f"ECNT_CRPS_EMP calculation failed due to an error: {e}") + result = None + return result -def calculate_ecnt_crps_emp_fair(input_data, columns_names, aggregation=False): +def calculate_ecnt_crps_emp_fair(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_CRPS_EMP_FAIR - The Continuous Ranked Probability Score (empirical distribution) adjusted by the mean absolute difference of the ensemble members @@ -115,10 +138,17 @@ def calculate_ecnt_crps_emp_fair(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ - return weighted_average(input_data, columns_names, 'crps_emp_fair', aggregation) + safe_log(logger, "debug", "Starting ECNT_CRPS_EMP_FAIR calculation.") + try: + result = weighted_average(input_data, columns_names, 'crps_emp_fair', aggregation, logger=logger) + safe_log(logger, "info", "ECNT_CRPS_EMP_FAIR calculation completed successfully.") + except Exception as e: + safe_log(logger, "error", f"ECNT_CRPS_EMP_FAIR calculation failed due to an error: {e}") + result = None + return result -def calculate_ecnt_spread_md(input_data, columns_names, aggregation=False): +def calculate_ecnt_spread_md(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SPREAD_MD - The pairwise Mean Absolute Difference of the unperturbed ensemble members @@ -133,10 +163,18 @@ def calculate_ecnt_spread_md(input_data, columns_names, aggregation=False): calculated SPREAD_MD as float or None if some data values are missing or invalid """ - return weighted_average(input_data, columns_names, 'spread_md', aggregation) + + safe_log(logger, "debug", "Starting SPREAD_MD calculation.") + try: + result = weighted_average(input_data, columns_names, 'spread_md', aggregation, logger=logger) + safe_log(logger, "info", "SPREAD_MD calculation completed successfully.") + except Exception as e: + safe_log(logger, "error", f"SPREAD_MD calculation failed due to an error: {e}") + result = None + return result -def weighted_average(input_data, columns_names, column_name, aggregation=False): +def weighted_average(input_data, columns_names, column_name, aggregation=False, logger=None): """ Performs aggregation over multiple cases using a weighted average approach, where the weight is defined by the number of matched pairs in the TOTAL column @@ -148,18 +186,21 @@ def weighted_average(input_data, columns_names, column_name, aggregation=False): :param aggregation: if the aggregation on fields was performed :return: aggregated column values or None if some data values are missing or invalid """ + + safe_log(logger, "debug", f"Starting weighted average calculation for column: {column_name}.") warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) statistic = sum_column_data_by_name(input_data, columns_names, column_name) / total result = round_half_up(statistic, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Weighted average calculation for column {column_name} completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Weighted average calculation failed for column {column_name} due to error: {e}") result = None warnings.filterwarnings('ignore') return result - -def calculate_ecnt_crpscl_emp(input_data, columns_names, aggregation=False): +def calculate_ecnt_crpscl_emp(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_CRPSCL_EMP - Climatological Continuous Ranked Probability Score (empirical distribution) @@ -175,9 +216,18 @@ def calculate_ecnt_crpscl_emp(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ - return weighted_average(input_data, columns_names, 'crpscl_emp', aggregation) + safe_log(logger, "debug", "Starting ECNT_CRPSCL_EMP calculation.") + + result = weighted_average(input_data, columns_names, 'crpscl_emp', aggregation, logger=logger) + + if result is not None: + safe_log(logger, "info", "ECNT_CRPSCL_EMP calculation completed successfully.") + else: + safe_log(logger, "error", "ECNT_CRPSCL_EMP calculation failed.") + + return result -def calculate_ecnt_crpss_emp(input_data, columns_names, aggregation=False): +def calculate_ecnt_crpss_emp(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_CRPSS_EMP - The Continuous Ranked Probability Skill Score (empirical distribution) @@ -192,6 +242,9 @@ def calculate_ecnt_crpss_emp(input_data, columns_names, aggregation=False): calculated ECNT_CRPSS_EMP as float or None if some data values are missing or invalid """ + + safe_log(logger, "debug", "Starting ECNT_CRPSS_EMP calculation.") + warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) @@ -199,13 +252,16 @@ def calculate_ecnt_crpss_emp(input_data, columns_names, aggregation=False): crpscl_emp = sum_column_data_by_name(input_data, columns_names, 'crpscl_emp') / total crpss_emp = 1 - crps_emp/crpscl_emp result = round_half_up(crpss_emp, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "ECNT_CRPSS_EMP calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ECNT_CRPSS_EMP calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') - return result + finally: + warnings.filterwarnings('ignore') + return result -def calculate_ecnt_ign(input_data, columns_names, aggregation=False): +def calculate_ecnt_ign(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_IGN - The Ignorance Score Args: @@ -220,10 +276,19 @@ def calculate_ecnt_ign(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ - return weighted_average(input_data, columns_names, 'ign', aggregation) + safe_log(logger, "debug", "Starting ECNT_IGN calculation.") + + try: + result = weighted_average(input_data, columns_names, 'ign', aggregation, logger=logger) + safe_log(logger, "info", "ECNT_IGN calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ECNT_IGN calculation failed: {str(e)}") + result = None + + return result -def calculate_ecnt_me(input_data, columns_names, aggregation=False): +def calculate_ecnt_me(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_ME - The Mean Error of the ensemble mean (unperturbed or supplied) @@ -239,9 +304,18 @@ def calculate_ecnt_me(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ - return weighted_average(input_data, columns_names, 'me', aggregation) + safe_log(logger, "debug", "Starting ECNT_ME calculation.") -def calculate_ecnt_mae(input_data, columns_names, aggregation=False): + try: + result = weighted_average(input_data, columns_names, 'me', aggregation, logger=logger) + safe_log(logger, "info", "ECNT_ME calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ECNT_ME calculation failed: {str(e)}") + result = None + + return result + +def calculate_ecnt_mae(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_MAE - The Mean Absolute Error of the ensemble mean (unperturbed or supplied) @@ -257,9 +331,18 @@ def calculate_ecnt_mae(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ - return weighted_average(input_data, columns_names, 'mae', aggregation) + safe_log(logger, "debug", "Starting ECNT_MAE calculation.") + + try: + result = weighted_average(input_data, columns_names, 'mae', aggregation, logger=logger) + safe_log(logger, "info", "ECNT_MAE calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ECNT_MAE calculation failed: {str(e)}") + result = None + + return result -def calculate_ecnt_mae_oerr(input_data, columns_names, aggregation=False): +def calculate_ecnt_mae_oerr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of MAE_OERR - The Mean Absolute Error of the PERTURBED ensemble mean (e.g. with Observation Error) @@ -275,10 +358,19 @@ def calculate_ecnt_mae_oerr(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ - return weighted_average(input_data, columns_names, 'mae_oerr', aggregation) + safe_log(logger, "debug", "Starting MAE_OERR calculation.") + + try: + result = weighted_average(input_data, columns_names, 'mae_oerr', aggregation, logger=logger) + safe_log(logger, "info", "MAE_OERR calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"MAE_OERR calculation failed: {str(e)}") + result = None + + return result -def calculate_ecnt_rmse(input_data, columns_names, aggregation=False): +def calculate_ecnt_rmse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_RMSE - The Root Mean Square Error of the ensemble mean (unperturbed or supplied) @@ -294,18 +386,24 @@ def calculate_ecnt_rmse(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting RMSE calculation.") + warnings.filterwarnings('error') try: - wa = weighted_average(input_data, columns_names, 'mse', aggregation) + wa = weighted_average(input_data, columns_names, 'mse', aggregation, logger=logger) rmse = math.sqrt(wa) result = round_half_up(rmse, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "RMSE calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"RMSE calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') + finally: + warnings.filterwarnings('ignore') + return result -def calculate_ecnt_spread(input_data, columns_names, aggregation=False): +def calculate_ecnt_spread(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_SPREAD - The mean of the spread (standard deviation) of the unperturbed ensemble member values at each observation location @@ -321,18 +419,24 @@ def calculate_ecnt_spread(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting ECNT_SPREAD calculation.") + warnings.filterwarnings('error') try: - wa = weighted_average(input_data, columns_names, 'variance', aggregation) + wa = weighted_average(input_data, columns_names, 'variance', aggregation, logger=logger) spread = math.sqrt(wa) result = round_half_up(spread, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "ECNT_SPREAD calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ECNT_SPREAD calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') + finally: + warnings.filterwarnings('ignore') + return result -def calculate_ecnt_me_oerr(input_data, columns_names, aggregation=False): +def calculate_ecnt_me_oerr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_ME_OERR - The Mean Error of the PERTURBED ensemble mean (e.g. with Observation Error) Args: @@ -347,10 +451,22 @@ def calculate_ecnt_me_oerr(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ - return weighted_average(input_data, columns_names, 'me_oerr', aggregation) + safe_log(logger, "debug", "Starting ECNT_ME_OERR calculation.") + + warnings.filterwarnings('error') + try: + result = weighted_average(input_data, columns_names, 'me_oerr', aggregation, logger=logger) + safe_log(logger, "info", "ECNT_ME_OERR calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ECNT_ME_OERR calculation failed: {str(e)}") + result = None + finally: + warnings.filterwarnings('ignore') + + return result -def calculate_ecnt_rmse_oerr(input_data, columns_names, aggregation=False): +def calculate_ecnt_rmse_oerr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_RMSE_OERR - The Root Mean Square Error of the PERTURBED ensemble mean (e.g.with Observation Error) Args: @@ -365,18 +481,24 @@ def calculate_ecnt_rmse_oerr(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting ECNT_RMSE_OERR calculation.") + warnings.filterwarnings('error') try: - wa = weighted_average(input_data, columns_names, 'mse_oerr', aggregation) + wa = weighted_average(input_data, columns_names, 'mse_oerr', aggregation, logger=logger) mse_oerr = math.sqrt(wa) result = round_half_up(mse_oerr, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "ECNT_RMSE_OERR calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ECNT_RMSE_OERR calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') + finally: + warnings.filterwarnings('ignore') + return result -def calculate_ecnt_spread_oerr(input_data, columns_names, aggregation=False): +def calculate_ecnt_spread_oerr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ECNT_SPREAD_OERR - The mean of the spread (standard deviation) of the PERTURBED ensemble member values (e.g. with Observation Error ) at each observation location @@ -392,18 +514,24 @@ def calculate_ecnt_spread_oerr(input_data, columns_names, aggregation=False): calculated ECNT_SPREAD_OERR as float or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting ECNT_SPREAD_OERR calculation.") + warnings.filterwarnings('error') try: - wa = weighted_average(input_data, columns_names, 'variance_oerr', aggregation) + wa = weighted_average(input_data, columns_names, 'variance_oerr', aggregation, logger=logger) spread_oerr = math.sqrt(wa) result = round_half_up(spread_oerr, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "ECNT_SPREAD_OERR calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ECNT_SPREAD_OERR calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') + finally: + warnings.filterwarnings('ignore') + return result -def calculate_ecnt_spread_plus_oerr(input_data, columns_names, aggregation=False): +def calculate_ecnt_spread_plus_oerr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SPREAD_PLUS_OERR - The square root of the sum of unperturbed ensemble variance and the observation error variance Args: @@ -417,17 +545,23 @@ def calculate_ecnt_spread_plus_oerr(input_data, columns_names, aggregation=False calculated SPREAD_PLUS_OERR as float or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting SPREAD_PLUS_OERR calculation.") + warnings.filterwarnings('error') try: - wa = weighted_average(input_data, columns_names, 'variance_plus_oerr', aggregation) + wa = weighted_average(input_data, columns_names, 'variance_plus_oerr', aggregation, logger=logger) spread_plus_oerr = math.sqrt(wa) result = round_half_up(spread_plus_oerr, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "SPREAD_PLUS_OERR calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"SPREAD_PLUS_OERR calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') + finally: + warnings.filterwarnings('ignore') + return result -def calculate_ecnt_n_ge_obs(input_data, columns_names, aggregation=False): +def calculate_ecnt_n_ge_obs(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of N_GE_OBS - The number of ensemble values greater than or equal to their observations Args: @@ -441,16 +575,22 @@ def calculate_ecnt_n_ge_obs(input_data, columns_names, aggregation=False): calculated N_GE_OBS as float or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting N_GE_OBS calculation.") + warnings.filterwarnings('error') try: n_ge_obs = sum_column_data_by_name(input_data, columns_names, 'n_ge_obs') result = round_half_up(n_ge_obs, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "N_GE_OBS calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"N_GE_OBS calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') + finally: + warnings.filterwarnings('ignore') + return result -def calculate_ecnt_n_lt_obs(input_data, columns_names, aggregation=False): +def calculate_ecnt_n_lt_obs(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of N_LT_OBS - The number of ensemble values less than their observations Args: @@ -464,16 +604,23 @@ def calculate_ecnt_n_lt_obs(input_data, columns_names, aggregation=False): calculated N_LT_OBS as float or None if some data values are missing or invalid """ + + safe_log(logger, "debug", "Starting N_LT_OBS calculation.") + warnings.filterwarnings('error') try: n_lt_obs = sum_column_data_by_name(input_data, columns_names, 'n_lt_obs') result = round_half_up(n_lt_obs, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "N_LT_OBS calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"N_LT_OBS calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') + finally: + warnings.filterwarnings('ignore') + return result -def calculate_ecnt_me_ge_obs(input_data, columns_names, aggregation=False): +def calculate_ecnt_me_ge_obs(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ME_GE_OBS - The Mean Error of the ensemble values greater than or equal to their observations Args: @@ -487,17 +634,23 @@ def calculate_ecnt_me_ge_obs(input_data, columns_names, aggregation=False): calculated ME_GE_OBS as float or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting ME_GE_OBS calculation.") + warnings.filterwarnings('error') try: n_ge_obs = sum_column_data_by_name(input_data, columns_names, 'n_ge_obs') - me_ge_obs = sum_column_data_by_name(input_data, columns_names, 'me_ge_obs')/n_ge_obs + me_ge_obs = sum_column_data_by_name(input_data, columns_names, 'me_ge_obs') / n_ge_obs result = round_half_up(me_ge_obs, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "ME_GE_OBS calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ME_GE_OBS calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') + finally: + warnings.filterwarnings('ignore') + return result -def calculate_ecnt_me_lt_obs(input_data, columns_names, aggregation=False): +def calculate_ecnt_me_lt_obs(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ME_GE_OBS - The Mean Error of the ensemble values greater than or equal to their observations Args: @@ -511,17 +664,21 @@ def calculate_ecnt_me_lt_obs(input_data, columns_names, aggregation=False): calculated ME_GE_OBS as float or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting ME_LT_OBS calculation.") + warnings.filterwarnings('error') try: n_lt_obs = sum_column_data_by_name(input_data, columns_names, 'n_lt_obs') - me_lt_obs = sum_column_data_by_name(input_data, columns_names, 'me_lt_obs')/n_lt_obs + me_lt_obs = sum_column_data_by_name(input_data, columns_names, 'me_lt_obs') / n_lt_obs result = round_half_up(me_lt_obs, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "ME_LT_OBS calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"ME_LT_OBS calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') - return result + finally: + warnings.filterwarnings('ignore') -def calculate_ecnt_bias_ratio(input_data, columns_names, aggregation=False): +def calculate_ecnt_bias_ratio(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of BIAS_RATIO - The Bias Ratio Args: input_data: 2-dimensional numpy array with data for the calculation @@ -534,19 +691,28 @@ def calculate_ecnt_bias_ratio(input_data, columns_names, aggregation=False): calculated BIAS_RATIO as float or None if some data values are missing or invalid """ + safe_log(logger, "debug", "Starting BIAS_RATIO calculation.") + warnings.filterwarnings('error') try: - me_ge_obs = calculate_ecnt_me_ge_obs(input_data, columns_names) - me_lt_obs = calculate_ecnt_me_lt_obs(input_data, columns_names) - bias_ratio = me_ge_obs/abs(me_lt_obs) + me_ge_obs = calculate_ecnt_me_ge_obs(input_data, columns_names, logger=logger) + me_lt_obs = calculate_ecnt_me_lt_obs(input_data, columns_names, logger=logger) + if me_lt_obs == 0: + raise ZeroDivisionError("Division by zero encountered in BIAS_RATIO calculation.") + + bias_ratio = me_ge_obs / abs(me_lt_obs) result = round_half_up(bias_ratio, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", "BIAS_RATIO calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"BIAS_RATIO calculation failed: {str(e)}") result = None - warnings.filterwarnings('ignore') + finally: + warnings.filterwarnings('ignore') + return result -def calculate_ecnt_total(input_data, columns_names): +def calculate_ecnt_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Ensemble Continuous Statistics Args: input_data: 2-dimensional numpy array with data for the calculation @@ -558,10 +724,19 @@ def calculate_ecnt_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + safe_log(logger, "debug", "Starting Total number of matched pairs calculation.") -def calculate_ecnt_ign_conv_oerr(input_data, columns_names, aggregation=False): + try: + total = sum_column_data_by_name(input_data, columns_names, 'total') + result = round_half_up(total, PRECISION) + safe_log(logger, "info", "Total number of matched pairs calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Total number of matched pairs calculation failed: {str(e)}") + result = None + + return result + +def calculate_ecnt_ign_conv_oerr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of IGN_CONV_OERR - The error-convolved logarithmic scoring rule (ignorance score) @@ -577,10 +752,19 @@ def calculate_ecnt_ign_conv_oerr(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ - return weighted_average(input_data, np.array(columns_names), 'ign_conv_oerr', aggregation) + safe_log(logger, "debug", "Starting IGN_CONV_OERR calculation.") + try: + result = weighted_average(input_data, np.array(columns_names), 'ign_conv_oerr', aggregation, logger=logger) + safe_log(logger, "info", "IGN_CONV_OERR calculation completed successfully.") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"IGN_CONV_OERR calculation failed: {str(e)}") + result = None -def calculate_ecnt_ign_corr_oerr(input_data, columns_names, aggregation=False): + return result + + +def calculate_ecnt_ign_corr_oerr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of IGN_CORR_OERR - The error-corrected logarithmic scoring rule (ignorance score) @@ -596,6 +780,15 @@ def calculate_ecnt_ign_corr_oerr(input_data, columns_names, aggregation=False): or None if some data values are missing or invalid """ - return weighted_average(input_data, np.array(columns_names), 'ign_corr_oerr', aggregation) + safe_log(logger, "debug", "Starting calculation of IGN_CORR_OERR.") + + try: + result = weighted_average(input_data, np.array(columns_names), 'ign_corr_oerr', aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated IGN_CORR_OERR: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error calculating IGN_CORR_OERR: {str(e)}") + result = None + + return result diff --git a/metcalcpy/util/grad_statistics.py b/metcalcpy/util/grad_statistics.py index bebd38fe..9e0810b6 100644 --- a/metcalcpy/util/grad_statistics.py +++ b/metcalcpy/util/grad_statistics.py @@ -13,12 +13,13 @@ """ import warnings from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_fgbar(input_data, columns_names, aggregation=False): +def calculate_fgbar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of FGBAR - Mean of absolute value of forecast gradients Args: @@ -33,17 +34,23 @@ def calculate_fgbar(input_data, columns_names, aggregation=False): or None if some of the data values are missing or invalid """ warnings.filterwarnings('error') + + safe_log(logger, "debug", "Starting calculation of FGBAR.") + try: total = get_total_values(input_data, columns_names, aggregation) result = sum_column_data_by_name(input_data, columns_names, 'fgbar') / total result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated FGBAR: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error calculating FGBAR: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_ogbar(input_data, columns_names, aggregation=False): +def calculate_ogbar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of OGBAR - Mean of absolute value of observed gradients Args: @@ -58,17 +65,23 @@ def calculate_ogbar(input_data, columns_names, aggregation=False): or None if some of the data values are missing or invalid """ warnings.filterwarnings('error') + + safe_log(logger, "debug", "Starting calculation of OGBAR.") + try: total = get_total_values(input_data, columns_names, aggregation) result = sum_column_data_by_name(input_data, columns_names, 'ogbar') / total result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated OGBAR: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error calculating OGBAR: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_mgbar(input_data, columns_names, aggregation=False): +def calculate_mgbar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of MGBAR - Mean of maximum of absolute values of forecast and observed gradients @@ -84,17 +97,23 @@ def calculate_mgbar(input_data, columns_names, aggregation=False): or None if some of the data values are missing or invalid """ warnings.filterwarnings('error') + + safe_log(logger, "debug", "Starting calculation of MGBAR.") + try: total = get_total_values(input_data, columns_names, aggregation) result = sum_column_data_by_name(input_data, columns_names, 'mgbar') / total result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated MGBAR: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error calculating MGBAR: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_egbar(input_data, columns_names, aggregation=False): +def calculate_egbar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of EGBAR - Mean of absolute value of forecast minus observed gradients Args: @@ -109,17 +128,23 @@ def calculate_egbar(input_data, columns_names, aggregation=False): or None if some of the data values are missing or invalid """ warnings.filterwarnings('error') + + safe_log(logger, "debug", "Starting calculation of EGBAR.") + try: total = get_total_values(input_data, columns_names, aggregation) result = sum_column_data_by_name(input_data, columns_names, 'egbar') / total result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated EGBAR: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error calculating EGBAR: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_s1(input_data, columns_names, aggregation=False): +def calculate_s1(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of S1 - S1 score Args: @@ -134,19 +159,25 @@ def calculate_s1(input_data, columns_names, aggregation=False): or None if some of the data values are missing or invalid """ warnings.filterwarnings('error') + + safe_log(logger, "debug", "Starting calculation of S1 score.") + try: total = get_total_values(input_data, columns_names, aggregation) egbar = sum_column_data_by_name(input_data, columns_names, 'egbar') / total mgbar = sum_column_data_by_name(input_data, columns_names, 'mgbar') / total result = 100 * egbar / mgbar result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated S1 score: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error calculating S1 score: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_s1_og(input_data, columns_names, aggregation=False): +def calculate_s1_og(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of S1_OG - S1 score with respect to observed gradient Args: @@ -160,20 +191,29 @@ def calculate_s1_og(input_data, columns_names, aggregation=False): calculated S1_OG as float or None if some of the data values are missing or invalid """ + warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total calculated: {total}") + egbar = sum_column_data_by_name(input_data, columns_names, 'egbar') / total + safe_log(logger, "debug", f"EG Bar calculated: {egbar}") + ogbar = sum_column_data_by_name(input_data, columns_names, 'ogbar') / total + safe_log(logger, "debug", f"OG Bar calculated: {ogbar}") + result = 100 * egbar / ogbar result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Result calculated: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error encountered: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_fgog_ratio(input_data, columns_names, aggregation=False): +def calculate_fgog_ratio(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of FGOG_RATIO - Ratio of forecast and observed gradients Args: @@ -190,17 +230,25 @@ def calculate_fgog_ratio(input_data, columns_names, aggregation=False): warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total calculated: {total}") + fgbar = sum_column_data_by_name(input_data, columns_names, 'fgbar') / total + safe_log(logger, "debug", f"FG Bar calculated: {fgbar}") + ogbar = sum_column_data_by_name(input_data, columns_names, 'ogbar') / total + safe_log(logger, "debug", f"OG Bar calculated: {ogbar}") + result = 100 * fgbar / ogbar result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"FGOG Ratio calculated: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error encountered: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_grad_total(input_data, columns_names): +def calculate_grad_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Gradient partial sums Args: input_data: 2-dimensional numpy array with data for the calculation @@ -212,5 +260,11 @@ def calculate_grad_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + try: + total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"Total number of matched pairs calculated: {total}") + result = round_half_up(total, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error encountered during calculation: {str(e)}") + result = None + return result \ No newline at end of file diff --git a/metcalcpy/util/mcts_statistics.py b/metcalcpy/util/mcts_statistics.py index 1919b1c9..f21ca9ef 100644 --- a/metcalcpy/util/mcts_statistics.py +++ b/metcalcpy/util/mcts_statistics.py @@ -14,11 +14,12 @@ import warnings import numpy as np from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' -def calculate_mcts_hss_ec(input_data, columns_names): +def calculate_mcts_hss_ec(input_data, columns_names, logger=None): """Performs calculation of HSS_EC - a skill score based on Accuracy, Args: @@ -38,29 +39,38 @@ def calculate_mcts_hss_ec(input_data, columns_names): n_cat = row[np.where(columns_names == 'n_cat')[0][0]] ec_value = row[np.where(columns_names == 'ec_value')[0][0]] - # aggregate all fi_oj in one row + safe_log(logger, "debug", f"Number of categories (n_cat): {n_cat}") + safe_log(logger, "debug", f"Expected correct (ec_value): {ec_value}") + + # Aggregate all fi_oj in one row for index in range(n_cat * n_cat): column_name = 'fi_oj_' + str(index) row[np.where(columns_names == column_name)[0][0]] = \ sum_column_data_by_name(input_data, columns_names, column_name) - # init contingency table + # Initialize contingency table cont_table = [[0] * n_cat for _ in range(n_cat)] - # fill contingency table + # Fill contingency table for index in range(n_cat * n_cat): i_value = row[np.where(columns_names == 'i_value_' + str(index))[0][0]] j_value = row[np.where(columns_names == 'j_value_' + str(index))[0][0]] fi_oj = row[np.where(columns_names == 'fi_oj_' + str(index))[0][0]] cont_table[i_value - 1][j_value - 1] = fi_oj - # calculate the sum of the counts on the diagonal and - # the sum of the counts across the whole MCTC table + safe_log(logger, "debug", f"Contingency table: {cont_table}") + + # Calculate the sum of the counts on the diagonal and the sum of the counts across the whole MCTC table diag_count = sum([cont_table[i][j] for i in range(n_cat) for j in range(n_cat) if i == j]) sum_all = sum(sum(cont_table, [])) result = (diag_count - (ec_value * sum_all)) / (sum_all - (ec_value * sum_all)) result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + safe_log(logger, "debug", f"Calculated HSS_EC: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error encountered during calculation: {str(e)}") result = None + warnings.filterwarnings('ignore') return result diff --git a/metcalcpy/util/met_stats.py b/metcalcpy/util/met_stats.py index 33770dca..ce9e4ed6 100644 --- a/metcalcpy/util/met_stats.py +++ b/metcalcpy/util/met_stats.py @@ -13,12 +13,13 @@ """ import math import numpy as np +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def get_column_index_by_name(columns, column_name): +def get_column_index_by_name(columns, column_name, logger=None): """Finds the index of the specified column in the array Args: @@ -30,12 +31,17 @@ def get_column_index_by_name(columns, column_name): or None if the column name does not exist in the array """ index_array = np.where(columns == column_name)[0] + if index_array.size == 0: + safe_log(logger, "warning", f"Column '{column_name}' not found in the array.") return None - return index_array[0] + + column_index = index_array[0] + safe_log(logger, "debug", f"Column '{column_name}' found at index {column_index}.") + return column_index -def calc_direction(u_comp, v_comp): +def calc_direction(u_comp, v_comp, logger=None): """ Calculated the direction of the wind from it's u and v components in degrees Args: u_comp: u wind component @@ -45,16 +51,20 @@ def calc_direction(u_comp, v_comp): direction of the wind in degrees or None if one of the components is less then tolerance """ tolerance = 1e-5 + if abs(u_comp) < tolerance and abs(v_comp) < tolerance: + safe_log(logger, "warning", "Both u and v components are below tolerance, returning None.") return None - + direction = np.arctan2(u_comp, v_comp) - # convert to [0,360] + # Convert to [0, 360] direction = direction - 360 * math.floor(direction / 360) + safe_log(logger, "debug", f"Calculated wind direction: {direction} degrees.") + return direction -def calc_speed(u_comp, v_comp): +def calc_speed(u_comp, v_comp, logger=None): """ Calculated the speed of the wind from it's u and v components Args: u_comp: u wind component @@ -65,6 +75,9 @@ def calc_speed(u_comp, v_comp): """ try: result = np.sqrt(u_comp * u_comp + v_comp * v_comp) - except (TypeError, Warning): + safe_log(logger, "debug", f"Calculated wind speed: {result}.") + except (TypeError, Warning) as e: result = None + safe_log(logger, "warning", f"Failed to calculate wind speed: {str(e)}.") + return result diff --git a/metcalcpy/util/mode_2d_arearat_statistics.py b/metcalcpy/util/mode_2d_arearat_statistics.py index 8fe366e3..888215a2 100644 --- a/metcalcpy/util/mode_2d_arearat_statistics.py +++ b/metcalcpy/util/mode_2d_arearat_statistics.py @@ -13,12 +13,13 @@ """ from metcalcpy.util.mode_arearat_statistics import * from metcalcpy.util.utils import column_data_by_name_value, TWO_D_DATA_FILTER +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_2d_arearat_fsa_asa(input_data, columns_names): +def calculate_2d_arearat_fsa_asa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d simple objects that are forecast Args: @@ -31,11 +32,21 @@ def calculate_2d_arearat_fsa_asa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fsa_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted statistic using filtered data.") + result = calculate_arearat_fsa_asa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted statistic: {str(e)}.") + return None -def calculate_2d_arearat_osa_asa(input_data, columns_names): +def calculate_2d_arearat_osa_asa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple objects that are observation Args: @@ -48,11 +59,21 @@ def calculate_2d_arearat_osa_asa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_osa_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted statistic using filtered data.") + result = calculate_arearat_osa_asa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted statistic: {str(e)}.") + return None -def calculate_2d_arearat_asm_asa(input_data, columns_names): +def calculate_2d_arearat_asm_asa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d simple objects that are matched Args: @@ -65,11 +86,21 @@ def calculate_2d_arearat_asm_asa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_asm_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of matched objects using filtered data.") + result = calculate_arearat_asm_asa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of matched objects: {str(e)}.") + return None -def calculate_2d_arearat_asu_asa(input_data, columns_names): +def calculate_2d_arearat_asu_asa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d simple objects that are unmatched Args: @@ -82,11 +113,21 @@ def calculate_2d_arearat_asu_asa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_asu_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of unmatched objects using filtered data.") + result = calculate_arearat_asu_asa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of unmatched objects: {str(e)}.") + return None -def calculate_2d_arearat_fsm_fsa(input_data, columns_names): +def calculate_2d_arearat_fsm_fsa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d simple forecast objects that are matched Args: @@ -99,11 +140,21 @@ def calculate_2d_arearat_fsm_fsa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fsm_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of matched forecast objects using filtered data.") + result = calculate_arearat_fsm_fsa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of matched forecast objects: {str(e)}.") + return None -def calculate_2d_arearat_fsu_fsa(input_data, columns_names): +def calculate_2d_arearat_fsu_fsa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d simple forecast objects that are unmatched Args: @@ -116,11 +167,21 @@ def calculate_2d_arearat_fsu_fsa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fsu_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of unmatched forecast objects using filtered data.") + result = calculate_arearat_fsu_fsa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of unmatched forecast objects: {str(e)}.") + return None -def calculate_2d_arearat_osm_osa(input_data, columns_names): +def calculate_2d_arearat_osm_osa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d simple observation objects that are matched Args: @@ -133,11 +194,21 @@ def calculate_2d_arearat_osm_osa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_osm_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of matched observation objects using filtered data.") + result = calculate_arearat_osm_osa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of matched observation objects: {str(e)}.") + return None -def calculate_2d_arearat_osu_osa(input_data, columns_names): +def calculate_2d_arearat_osu_osa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d simple observation objects that are unmatched Args: @@ -150,11 +221,21 @@ def calculate_2d_arearat_osu_osa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_osu_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of unmatched observation objects using filtered data.") + result = calculate_arearat_osu_osa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of unmatched observation objects: {str(e)}.") + return None -def calculate_2d_arearat_fsm_asm(input_data, columns_names): +def calculate_2d_arearat_fsm_asm(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d simple matched objects that are forecasts Args: @@ -167,11 +248,21 @@ def calculate_2d_arearat_fsm_asm(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fsm_asm(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of matched forecast objects using filtered data.") + result = calculate_arearat_fsm_asm(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of matched forecast objects: {str(e)}.") + return None -def calculate_2d_arearat_osm_asm(input_data, columns_names): +def calculate_2d_arearat_osm_asm(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d simple matched objects that are observations Args: @@ -184,11 +275,21 @@ def calculate_2d_arearat_osm_asm(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_osm_asm(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of matched observation objects using filtered data.") + result = calculate_arearat_osm_asm(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of matched observation objects: {str(e)}.") + return None -def calculate_2d_arearat_osu_asu(input_data, columns_names): +def calculate_2d_arearat_osu_asu(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of s2d imple unmatched objects that are observation Args: input_data: 2-dimensional numpy array with data for the calculation @@ -200,11 +301,21 @@ def calculate_2d_arearat_osu_asu(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_osu_asu(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of unmatched observation objects using filtered data.") + result = calculate_arearat_osu_asu(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of unmatched observation objects: {str(e)}.") + return None -def calculate_2d_arearat_fsa_aaa(input_data, columns_names): +def calculate_2d_arearat_fsa_aaa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted ? Args: @@ -217,11 +328,21 @@ def calculate_2d_arearat_fsa_aaa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fsa_aaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of forecast objects against all area using filtered data.") + result = calculate_arearat_fsa_aaa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of forecast objects against all area: {str(e)}.") + return None -def calculate_2d_arearat_osa_aaa(input_data, columns_names): +def calculate_2d_arearat_osa_aaa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted ? Args: @@ -234,11 +355,21 @@ def calculate_2d_arearat_osa_aaa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_osa_aaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of observation objects against all area using filtered data.") + result = calculate_arearat_osa_aaa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of observation objects against all area: {str(e)}.") + return None -def calculate_2d_arearat_fsa_faa(input_data, columns_names): +def calculate_2d_arearat_fsa_faa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of all 2d forecast objects that are simple Args: @@ -251,11 +382,21 @@ def calculate_2d_arearat_fsa_faa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fsa_faa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of all 2d forecast objects that are simple.") + result = calculate_arearat_fsa_faa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of all 2d forecast objects that are simple: {str(e)}.") + return None -def calculate_2d_arearat_fca_faa(input_data, columns_names): +def calculate_2d_arearat_fca_faa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of all 2d forecast objects that are cluster Args: @@ -268,11 +409,21 @@ def calculate_2d_arearat_fca_faa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fca_faa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of all 2d forecast objects that are cluster.") + result = calculate_arearat_fca_faa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of all 2d forecast objects that are cluster: {str(e)}.") + return None -def calculate_2d_arearat_osa_oaa(input_data, columns_names): +def calculate_2d_arearat_osa_oaa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of all 2d observation objects that are simple Args: @@ -285,11 +436,21 @@ def calculate_2d_arearat_osa_oaa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_osa_oaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of all 2d observation objects that are simple.") + result = calculate_arearat_osa_oaa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of all 2d observation objects that are simple: {str(e)}.") + return None -def calculate_2d_arearat_oca_oaa(input_data, columns_names): +def calculate_2d_arearat_oca_oaa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of all 2d observation objects that are cluster Args: @@ -302,11 +463,21 @@ def calculate_2d_arearat_oca_oaa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_oca_oaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of all 2d observation objects that are cluster.") + result = calculate_arearat_oca_oaa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of all 2d observation objects that are cluster: {str(e)}.") + return None -def calculate_2d_arearat_fca_aca(input_data, columns_names): +def calculate_2d_arearat_fca_aca(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d cluster objects that are forecast Args: @@ -319,11 +490,21 @@ def calculate_2d_arearat_fca_aca(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fca_aca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of 2d cluster objects that are forecast.") + result = calculate_arearat_fca_aca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of 2d cluster objects that are forecast: {str(e)}.") + return None -def calculate_2d_arearat_oca_aca(input_data, columns_names): +def calculate_2d_arearat_oca_aca(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of 2d cluster objects that are observation Args: @@ -336,11 +517,21 @@ def calculate_2d_arearat_oca_aca(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_oca_aca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating area-weighted % of 2d cluster objects that are observation.") + result = calculate_arearat_oca_aca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2D area-weighted % of 2d cluster objects that are observation: {str(e)}.") + return None -def calculate_2d_arearat_fsa_osa(input_data, columns_names): +def calculate_2d_arearat_fsa_osa(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of 2d simple forecasts to 2d simple observations [frequency bias] @@ -354,11 +545,21 @@ def calculate_2d_arearat_fsa_osa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fsa_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area Ratio of 2d simple forecasts to 2d simple observations.") + result = calculate_arearat_fsa_osa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area Ratio of 2d simple forecasts to 2d simple observations: {str(e)}.") + return None -def calculate_2d_arearat_osa_fsa(input_data, columns_names): +def calculate_2d_arearat_osa_fsa(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of 2d simple observations to 2d simple forecasts [1 / frequency bias] @@ -372,11 +573,21 @@ def calculate_2d_arearat_osa_fsa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_osa_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area Ratio of 2d simple observations to 2d simple forecasts.") + result = calculate_arearat_osa_fsa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area Ratio of 2d simple observations to 2d simple forecasts: {str(e)}.") + return None -def calculate_2d_arearat_aca_asa(input_data, columns_names): +def calculate_2d_arearat_aca_asa(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of 2d cluster objects to 2d simple objects Args: @@ -389,11 +600,21 @@ def calculate_2d_arearat_aca_asa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_aca_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area Ratio of 2d cluster objects to 2d simple objects.") + result = calculate_arearat_aca_asa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area Ratio of 2d cluster objects to 2d simple objects: {str(e)}.") + return None -def calculate_2d_arearat_asa_aca(input_data, columns_names): +def calculate_2d_arearat_asa_aca(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of 2d simple objects to 2d cluster objects Args: @@ -406,11 +627,20 @@ def calculate_2d_arearat_asa_aca(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_asa_aca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area Ratio of 2d simple objects to 2d cluster objects.") + result = calculate_arearat_asa_aca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area Ratio of 2d simple objects to 2d cluster objects: {str(e)}.") + return None - -def calculate_2d_arearat_fca_fsa(input_data, columns_names): +def calculate_2d_arearat_fca_fsa(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of 2d cluster forecast objects to 2d simple forecast objects @@ -424,11 +654,21 @@ def calculate_2d_arearat_fca_fsa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fca_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area Ratio of 2d cluster forecast objects to 2d simple forecast objects.") + result = calculate_arearat_fca_fsa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area Ratio of 2d cluster forecast objects to 2d simple forecast objects: {str(e)}.") + return None -def calculate_2d_arearat_fsa_fca(input_data, columns_names): +def calculate_2d_arearat_fsa_fca(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of 2d simple forecast objects to 2d cluster forecast objects @@ -442,11 +682,22 @@ def calculate_2d_arearat_fsa_fca(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_fsa_fca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area Ratio of 2d simple forecast objects to 2d cluster forecast objects.") + result = calculate_arearat_fsa_fca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area Ratio of 2d simple forecast objects to 2d cluster forecast objects: {str(e)}.") + return None + -def calculate_2d_arearat_oca_osa(input_data, columns_names): +def calculate_2d_arearat_oca_osa(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of 2d cluster observation objects to 2d simple observation objects @@ -460,11 +711,20 @@ def calculate_2d_arearat_oca_osa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_oca_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area Ratio of 2d cluster observation objects to 2d simple observation objects.") + result = calculate_arearat_oca_osa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area Ratio of 2d cluster observation objects to 2d simple observation objects: {str(e)}.") + return None - -def calculate_2d_arearat_osa_oca(input_data, columns_names): +def calculate_2d_arearat_osa_oca(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of 2d simple observation objects to 2d cluster observation objects @@ -478,11 +738,21 @@ def calculate_2d_arearat_osa_oca(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_arearat_osa_oca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area Ratio of 2d simple observation objects to 2d cluster observation objects.") + result = calculate_arearat_osa_oca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area Ratio of 2d simple observation objects to 2d cluster observation objects: {str(e)}.") + return None -def calculate_2d_objahits(input_data, columns_names): +def calculate_2d_objahits(input_data, columns_names, logger=None): """Performs calculation of Area 2d Hits =/2 Args: @@ -495,11 +765,21 @@ def calculate_2d_objahits(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objahits(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area 2d Hits.") + result = calculate_objahits(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area 2d Hits: {str(e)}.") + return None -def calculate_2d_objamisses(input_data, columns_names): +def calculate_2d_objamisses(input_data, columns_names, logger=None): """Performs calculation of Area 2d Misses = OSU Args: @@ -512,11 +792,21 @@ def calculate_2d_objamisses(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objamisses(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area 2d Misses (OSU).") + result = calculate_objamisses(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area 2d Misses (OSU): {str(e)}.") + return None -def calculate_2d_objafas(input_data, columns_names): +def calculate_2d_objafas(input_data, columns_names, logger=None): """Performs calculation of Area 2d False Alarms = FSU Args: @@ -529,11 +819,21 @@ def calculate_2d_objafas(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objafas(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area 2d False Alarms (FSU).") + result = calculate_objafas(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area 2d False Alarms (FSU): {str(e)}.") + return None -def calculate_2d_objacsi(input_data, columns_names): +def calculate_2d_objacsi(input_data, columns_names, logger=None): """Performs calculation of Area 2d critical success index CSI = hits //2 + OSU + FSU] Args: @@ -546,11 +846,21 @@ def calculate_2d_objacsi(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objacsi(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area 2d Critical Success Index (CSI).") + result = calculate_objacsi(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area 2d Critical Success Index (CSI): {str(e)}.") + return None -def calculate_2d_objapody(input_data, columns_names): +def calculate_2d_objapody(input_data, columns_names, logger=None): """Performs calculation of Area 2d prob of detecting yes PODY = hits //2 + OSU] Args: @@ -563,11 +873,20 @@ def calculate_2d_objapody(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objapody(filtered_data, columns_names) - + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area 2d Probability of Detecting Yes (PODY).") + result = calculate_objapody(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area 2d Probability of Detecting Yes (PODY): {str(e)}.") + return None -def calculate_2d_objafar(input_data, columns_names): +def calculate_2d_objafar(input_data, columns_names, logger=None): """Performs calculation of Area 2d FAR = false alarms //2 + FSU] Args: @@ -580,5 +899,15 @@ def calculate_2d_objafar(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objafar(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Area 2d False Alarm Ratio (FAR).") + result = calculate_objafar(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Area 2d False Alarm Ratio (FAR): {str(e)}.") + return None diff --git a/metcalcpy/util/mode_2d_ratio_statistics.py b/metcalcpy/util/mode_2d_ratio_statistics.py index 55a26668..b7ffb599 100644 --- a/metcalcpy/util/mode_2d_ratio_statistics.py +++ b/metcalcpy/util/mode_2d_ratio_statistics.py @@ -13,12 +13,13 @@ """ from metcalcpy.util.mode_ratio_statistics import * from metcalcpy.util.utils import column_data_by_name_value, TWO_D_DATA_FILTER +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_2d_ratio_fsa_asa(input_data, columns_names): +def calculate_2d_ratio_fsa_asa(input_data, columns_names, logger=None): """Performs calculation % of 2d simple objects that are forecast Args: @@ -32,11 +33,21 @@ def calculate_2d_ratio_fsa_asa(input_data, columns_names): or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fsa_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating percentage of 2D simple objects that are forecast.") + result = calculate_ratio_fsa_asa(filtered_data, columns_names, logger=logger) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple objects that are forecast: {str(e)}.") + return None -def calculate_2d_ratio_osa_asa(input_data, columns_names): +def calculate_2d_ratio_osa_asa(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple objects that are observation Args: @@ -49,11 +60,21 @@ def calculate_2d_ratio_osa_asa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_osa_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating percentage of 2D simple objects that are observation.") + result = calculate_ratio_osa_asa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple objects that are observation: {str(e)}.") + return None -def calculate_2d_ratio_asm_asa(input_data, columns_names): +def calculate_2d_ratio_asm_asa(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple objects that are matched Args: @@ -66,11 +87,21 @@ def calculate_2d_ratio_asm_asa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_asm_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating percentage of 2D simple objects that are matched.") + result = calculate_ratio_asm_asa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple objects that are matched: {str(e)}.") + return None -def calculate_2d_ratio_asu_asa(input_data, columns_names): +def calculate_2d_ratio_asu_asa(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple objects that are unmatched Args: @@ -83,11 +114,20 @@ def calculate_2d_ratio_asu_asa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_asu_asa(filtered_data, columns_names) - + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating percentage of 2D simple objects that are unmatched.") + result = calculate_ratio_asu_asa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple objects that are unmatched: {str(e)}.") + return None -def calculate_2d_ratio_fsm_fsa(input_data, columns_names): +def calculate_2d_ratio_fsm_fsa(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple forecast objects that are matched Args: @@ -100,11 +140,21 @@ def calculate_2d_ratio_fsm_fsa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fsm_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating percentage of 2D simple forecast objects that are matched.") + result = calculate_ratio_fsm_fsa(filtered_data, columns_names) -def calculate_2d_ratio_fsu_fsa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple forecast objects that are matched: {str(e)}.") + return None + + +def calculate_2d_ratio_fsu_fsa(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple forecast objects that are unmatched Args: @@ -117,11 +167,21 @@ def calculate_2d_ratio_fsu_fsa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fsu_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating percentage of 2D simple forecast objects that are unmatched.") + result = calculate_ratio_fsu_fsa(filtered_data, columns_names) -def calculate_2d_ratio_osm_osa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple forecast objects that are unmatched: {str(e)}.") + return None + + +def calculate_2d_ratio_osm_osa(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple simple observation objects that are matched Args: @@ -134,11 +194,20 @@ def calculate_2d_ratio_osm_osa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_osm_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating percentage of 2D simple observation objects that are matched.") + result = calculate_ratio_osm_osa(filtered_data, columns_names) -def calculate_2d_ratio_osu_osa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple observation objects that are matched: {str(e)}.") + return None + +def calculate_2d_ratio_osu_osa(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple simple observation objects that are unmatched Args: @@ -151,11 +220,21 @@ def calculate_2d_ratio_osu_osa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_osu_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating percentage of 2D simple observation objects that are unmatched.") + result = calculate_ratio_osu_osa(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple observation objects that are unmatched: {str(e)}.") + return None -def calculate_2d_ratio_fsm_asm(input_data, columns_names): + +def calculate_2d_ratio_fsm_asm(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple matched objects that are forecasts Args: @@ -168,11 +247,22 @@ def calculate_2d_ratio_fsm_asm(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fsm_asm(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating percentage of 2D simple matched objects that are forecasts.") + result = calculate_ratio_fsm_asm(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple matched objects that are forecasts: {str(e)}.") + return None -def calculate_2d_ratio_osm_asm(input_data, columns_names): + + +def calculate_2d_ratio_osm_asm(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple matched objects that are observations Args: @@ -185,11 +275,21 @@ def calculate_2d_ratio_osm_asm(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_osm_asm(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating percentage of 2D simple matched objects that are observations.") + result = calculate_ratio_osm_asm(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple matched objects that are observations: {str(e)}.") + return None -def calculate_2d_ratio_fsu_asu(input_data, columns_names): +def calculate_2d_ratio_fsu_asu(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple unmatched objects that are forecast Args: @@ -202,11 +302,21 @@ def calculate_2d_ratio_fsu_asu(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fsu_asu(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating percentage of 2D simple unmatched objects that are forecasts.") + result = calculate_ratio_fsu_asu(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple unmatched objects that are forecasts: {str(e)}.") + return None -def calculate_2d_ratio_osu_asu(input_data, columns_names): +def calculate_2d_ratio_osu_asu(input_data, columns_names, logger=None): """Performs calculation of % of 2d simple unmatched objects that are observation Args: @@ -219,11 +329,21 @@ def calculate_2d_ratio_osu_asu(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_osu_asu(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating percentage of 2D simple unmatched objects that are observations.") + result = calculate_ratio_osu_asu(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate percentage of 2D simple unmatched objects that are observations: {str(e)}.") + return None -def calculate_2d_ratio_fsa_aaa(input_data, columns_names): +def calculate_2d_ratio_fsa_aaa(input_data, columns_names, logger=None): """Performs calculation of ? Args: @@ -236,11 +356,21 @@ def calculate_2d_ratio_fsa_aaa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fsa_aaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the 2D ratio for FSA against AAA.") + result = calculate_ratio_fsa_aaa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the 2D ratio for FSA against AAA: {str(e)}.") + return None -def calculate_2d_ratio_osa_aaa(input_data, columns_names): +def calculate_2d_ratio_osa_aaa(input_data, columns_names, logger=None): """Performs calculation of ? Args: @@ -253,11 +383,21 @@ def calculate_2d_ratio_osa_aaa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_osa_aaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of osa to aaa.") + result = calculate_ratio_osa_aaa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of osa to aaa: {str(e)}.") + return None -def calculate_2d_ratio_fsa_faa(input_data, columns_names): +def calculate_2d_ratio_fsa_faa(input_data, columns_names, logger=None): """Performs calculation of % of all 2d forecast objects that are simple Args: @@ -270,11 +410,22 @@ def calculate_2d_ratio_fsa_faa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fsa_faa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of FSA (Forecast Simple Area) to FAA (All Forecast Area).") + result = calculate_ratio_fsa_faa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + + safe_log(logger, "warning", f"Failed to calculate the ratio of FSA to FAA: {str(e)}.") + return None -def calculate_2d_ratio_fca_faa(input_data, columns_names): +def calculate_2d_ratio_fca_faa(input_data, columns_names, logger=None): """Performs calculation of % of all 2d forecast objects that are cluster Args: @@ -287,11 +438,21 @@ def calculate_2d_ratio_fca_faa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fca_faa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating the ratio of FCA (Forecast Cluster Area) to FAA (All Forecast Area).") + result = calculate_ratio_fca_faa(filtered_data, columns_names) -def calculate_2d_ratio_osa_oaa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of FCA to FAA: {str(e)}.") + return None + + +def calculate_2d_ratio_osa_oaa(input_data, columns_names, logger=None): """Performs calculation of % of all 2d observation objects that are simple' Args: @@ -304,11 +465,23 @@ def calculate_2d_ratio_osa_oaa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_osa_oaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating the ratio of OSA to OAA.") + result = calculate_ratio_osa_oaa(filtered_data, columns_names) -def calculate_2d_ratio_oca_oaa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of OSA to OAA: {str(e)}.") + return None + finally: + warnings.filterwarnings('ignore') + + +def calculate_2d_ratio_oca_oaa(input_data, columns_names, logger=None): """Performs calculation of % of all 2d observation objects that are cluster Args: @@ -321,11 +494,21 @@ def calculate_2d_ratio_oca_oaa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_oca_oaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of OSA (Observation Simple Area) to OAA (All Observation Area).") + result = calculate_ratio_osa_oaa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of OSA to OAA: {str(e)}.") + return None -def calculate_2d_ratio_fca_aca(input_data, columns_names): +def calculate_2d_ratio_fca_aca(input_data, columns_names, logger=None): """Performs calculation of % of 2d cluster objects that are forecast Args: @@ -342,7 +525,7 @@ def calculate_2d_ratio_fca_aca(input_data, columns_names): return calculate_ratio_fca_aca(filtered_data, columns_names) -def calculate_2d_ratio_oca_aca(input_data, columns_names): +def calculate_2d_ratio_oca_aca(input_data, columns_names, logger=None): """Performs calculation of % of 2d cluster objects that are observation Args: @@ -355,11 +538,21 @@ def calculate_2d_ratio_oca_aca(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_oca_aca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of FCA to ACA.") + result = calculate_ratio_fca_aca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of FCA to ACA: {str(e)}.") + return None -def calculate_2d_ratio_fsa_osa(input_data, columns_names): +def calculate_2d_ratio_fsa_osa(input_data, columns_names, logger=None): """Performs calculation of Ratio of 2d simple forecasts to 2d simple observations [frequency bias] @@ -373,11 +566,21 @@ def calculate_2d_ratio_fsa_osa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fsa_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of FSA to OSA (frequency bias).") + result = calculate_ratio_fsa_osa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of FSA to OSA: {str(e)}.") + return None -def calculate_2d_ratio_osa_fsa(input_data, columns_names): +def calculate_2d_ratio_osa_fsa(input_data, columns_names, logger=None): """Performs calculation of Ratio of 2d simple observations to 2d simple forecasts [1 / frequency bias] @@ -391,11 +594,23 @@ def calculate_2d_ratio_osa_fsa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_osa_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of OSA to FSA (1 / frequency bias).") + result = calculate_ratio_osa_fsa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of OSA to FSA: {str(e)}.") + return None + finally: + warnings.filterwarnings('ignore') -def calculate_2d_ratio_aca_asa(input_data, columns_names): +def calculate_2d_ratio_aca_asa(input_data, columns_names, logger=None): """Performs calculation of Ratio of 2d cluster objects to 2d simple objects Args: @@ -408,11 +623,21 @@ def calculate_2d_ratio_aca_asa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_aca_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of ACA to ASA.") + result = calculate_ratio_aca_asa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of ACA to ASA: {str(e)}.") + return None -def calculate_2d_ratio_asa_aca(input_data, columns_names): +def calculate_2d_ratio_asa_aca(input_data, columns_names, logger=None): """Performs calculation of Ratio of 2d simple objects to 2d cluster objects Args: @@ -425,11 +650,21 @@ def calculate_2d_ratio_asa_aca(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_asa_aca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of ASA to ACA.") + result = calculate_ratio_asa_aca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of ASA to ACA: {str(e)}.") + return None -def calculate_2d_ratio_fca_fsa(input_data, columns_names): +def calculate_2d_ratio_fca_fsa(input_data, columns_names, logger=None): """Performs calculation of Ratio of 2d cluster forecast objects to 2d simple forecast objects Args: @@ -442,11 +677,21 @@ def calculate_2d_ratio_fca_fsa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fca_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of FCA to FSA.") + result = calculate_ratio_fca_fsa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of FCA to FSA: {str(e)}.") + return None -def calculate_2d_ratio_fsa_fca(input_data, columns_names): +def calculate_2d_ratio_fsa_fca(input_data, columns_names, logger=None): """Performs calculation of Ratio of 2d simple forecast objects to 2d cluster forecast objects Args: @@ -459,11 +704,21 @@ def calculate_2d_ratio_fsa_fca(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_fsa_fca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of FSA to FCA.") + result = calculate_ratio_fsa_fca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of FSA to FCA: {str(e)}.") + return None -def calculate_2d_ratio_oca_osa(input_data, columns_names): +def calculate_2d_ratio_oca_osa(input_data, columns_names, logger=None): """Performs calculation of Ratio of 2d cluster observation objects to 2d simple observation objects @@ -477,11 +732,21 @@ def calculate_2d_ratio_oca_osa(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_oca_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of OCA to OSA.") + result = calculate_ratio_oca_osa(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of OCA to OSA: {str(e)}.") + return None -def calculate_2d_ratio_osa_oca(input_data, columns_names): + +def calculate_2d_ratio_osa_oca(input_data, columns_names, logger=None): """Performs calculation of Ratio of 2d simple observation objects to 2d cluster observation objects @@ -495,11 +760,21 @@ def calculate_2d_ratio_osa_oca(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_ratio_osa_oca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating the ratio of OSA to OCA.") + result = calculate_ratio_osa_oca(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate the ratio of OSA to OCA: {str(e)}.") + return None -def calculate_2d_objhits(input_data, columns_names): + +def calculate_2d_objhits(input_data, columns_names, logger=None): """Performs calculation of 2d Hits =/2 Args: @@ -512,11 +787,22 @@ def calculate_2d_objhits(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objhits(filtered_data, columns_names) + + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 2d object hits.") + result = calculate_objhits(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2d object hits: {str(e)}.") + return None -def calculate_2d_objmisses(input_data, columns_names): + +def calculate_2d_objmisses(input_data, columns_names, logger=None): """Performs calculation of 2d Misses = OSU Args: @@ -529,11 +815,22 @@ def calculate_2d_objmisses(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objmisses(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 2d object misses.") + result = calculate_objmisses(filtered_data, columns_names) -def calculate_2d_objfas(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2d object misses: {str(e)}.") + return None + + +def calculate_2d_objfas(input_data, columns_names, logger=None): """Performs calculation of 2d False Alarms = FSU Args: @@ -546,11 +843,21 @@ def calculate_2d_objfas(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objfas(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 2d object false alarms.") + result = calculate_objfas(filtered_data, columns_names) -def calculate_2d_objcsi(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2d object false alarms: {str(e)}.") + return None + + +def calculate_2d_objcsi(input_data, columns_names, logger=None): """Performs calculation of 2d CSI = hits //2 + OSU + FSU] Args: @@ -563,11 +870,21 @@ def calculate_2d_objcsi(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objcsi(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 2d object CSI.") + result = calculate_objcsi(filtered_data, columns_names) -def calculate_2d_objpody(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2d object CSI: {str(e)}.") + return None + + +def calculate_2d_objpody(input_data, columns_names, logger=None): """Performs calculation of 2d Probability of Detecting Yes PODY = hits //2 + OSU] Args: @@ -580,11 +897,21 @@ def calculate_2d_objpody(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objpody(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 2d object PODY.") + result = calculate_objpody(filtered_data, columns_names) -def calculate_2d_objfar(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2d object PODY: {str(e)}.") + return None + + +def calculate_2d_objfar(input_data, columns_names, logger=None): """Performs calculation of False alarm ratio FAR = false alarms //2 + FSU] Args: @@ -597,5 +924,15 @@ def calculate_2d_objfar(input_data, columns_names): calculated BASER as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) - return calculate_objfar(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on TWO_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, TWO_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 2d object FAR.") + result = calculate_objfar(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 2d object FAR: {str(e)}.") + return None \ No newline at end of file diff --git a/metcalcpy/util/mode_3d_ratio_statistics.py b/metcalcpy/util/mode_3d_ratio_statistics.py index 85356513..22804441 100644 --- a/metcalcpy/util/mode_3d_ratio_statistics.py +++ b/metcalcpy/util/mode_3d_ratio_statistics.py @@ -10,9 +10,9 @@ from metcalcpy.util.mode_ratio_statistics import * from metcalcpy.util.utils import column_data_by_name_value, THREE_D_DATA_FILTER +from metcalcpy.util.safe_log import safe_log - -def calculate_3d_ratio_fsa_asa(input_data, columns_names): +def calculate_3d_ratio_fsa_asa(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple objects that are forecast Args: input_data: 2-dimensional numpy array with data for the calculation @@ -24,11 +24,21 @@ def calculate_3d_ratio_fsa_asa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fsa_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3d ratio FSA/ASA.") + result = calculate_ratio_fsa_asa(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio FSA/ASA: {str(e)}.") + return None -def calculate_3d_ratio_osa_asa(input_data, columns_names): + +def calculate_3d_ratio_osa_asa(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple objects that are observation Args: @@ -41,11 +51,21 @@ def calculate_3d_ratio_osa_asa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_osa_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3d ratio OSA/ASA.") + result = calculate_ratio_osa_asa(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio OSA/ASA: {str(e)}.") + return None -def calculate_3d_ratio_asm_asa(input_data, columns_names): + +def calculate_3d_ratio_asm_asa(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple objects that are matched Args: @@ -58,11 +78,21 @@ def calculate_3d_ratio_asm_asa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_asm_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3d ratio ASM/ASA.") + result = calculate_ratio_asm_asa(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio ASM/ASA: {str(e)}.") + return None -def calculate_3d_ratio_asu_asa(input_data, columns_names): + +def calculate_3d_ratio_asu_asa(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple objects that are unmatched Args: @@ -75,11 +105,21 @@ def calculate_3d_ratio_asu_asa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_asu_asa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3d ratio ASU/ASA.") + result = calculate_ratio_asu_asa(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio ASU/ASA: {str(e)}.") + return None -def calculate_3d_ratio_fsm_fsa(input_data, columns_names): + +def calculate_3d_ratio_fsm_fsa(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple forecast objects that are matched Args: @@ -92,11 +132,21 @@ def calculate_3d_ratio_fsm_fsa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fsm_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3d ratio FSM/FSA.") + result = calculate_ratio_fsm_fsa(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio FSM/FSA: {str(e)}.") + return None -def calculate_3d_ratio_fsu_fsa(input_data, columns_names): + +def calculate_3d_ratio_fsu_fsa(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple forecast objects that are unmatched Args: @@ -109,11 +159,21 @@ def calculate_3d_ratio_fsu_fsa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fsu_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3d ratio FSU/FSA.") + result = calculate_ratio_fsu_fsa(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio FSU/FSA: {str(e)}.") + return None -def calculate_3d_ratio_osm_osa(input_data, columns_names): + +def calculate_3d_ratio_osm_osa(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple simple observation objects that are matched Args: @@ -126,11 +186,20 @@ def calculate_3d_ratio_osm_osa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_osm_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3d ratio OSM/OSA.") + result = calculate_ratio_osm_osa(filtered_data, columns_names) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio OSM/OSA: {str(e)}.") + return None -def calculate_3d_ratio_osu_osa(input_data, columns_names): +def calculate_3d_ratio_osu_osa(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple simple observation objects that are unmatched Args: @@ -143,11 +212,21 @@ def calculate_3d_ratio_osu_osa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_osu_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3d ratio OSU/OSA.") + result = calculate_ratio_osu_osa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio OSU/OSA: {str(e)}.") + return None -def calculate_3d_ratio_fsm_asm(input_data, columns_names): +def calculate_3d_ratio_fsm_asm(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple matched objects that are forecasts Args: @@ -160,11 +239,21 @@ def calculate_3d_ratio_fsm_asm(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fsm_asm(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3d ratio FSM/ASM.") + result = calculate_ratio_fsm_asm(filtered_data, columns_names) -def calculate_3d_ratio_osm_asm(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio FSM/ASM: {str(e)}.") + return None + + +def calculate_3d_ratio_osm_asm(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple matched objects that are observations Args: @@ -177,11 +266,21 @@ def calculate_3d_ratio_osm_asm(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_osm_asm(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3d ratio OSM/ASM.") + result = calculate_ratio_osm_asm(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio OSM/ASM: {str(e)}.") + return None -def calculate_3d_ratio_fsu_asu(input_data, columns_names): +def calculate_3d_ratio_fsu_asu(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple unmatched objects that are forecast Args: @@ -194,11 +293,21 @@ def calculate_3d_ratio_fsu_asu(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fsu_asu(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3d ratio FSU/ASU.") + result = calculate_ratio_fsu_asu(filtered_data, columns_names) -def calculate_3d_ratio_osu_asu(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio FSU/ASU: {str(e)}.") + return None + + +def calculate_3d_ratio_osu_asu(input_data, columns_names, logger=None): """Performs calculation of % of 3d simple unmatched objects that are observation Args: @@ -211,11 +320,21 @@ def calculate_3d_ratio_osu_asu(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_osu_asu(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3d ratio OSU/ASU.") + result = calculate_ratio_osu_asu(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3d ratio OSU/ASU: {str(e)}.") + return None -def calculate_3d_ratio_fsa_aaa(input_data, columns_names): +def calculate_3d_ratio_fsa_aaa(input_data, columns_names, logger=None): """Performs calculation of ? Args: @@ -228,11 +347,21 @@ def calculate_3d_ratio_fsa_aaa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fsa_aaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3D ratio FSA/AAA.") + result = calculate_ratio_fsa_aaa(filtered_data, columns_names) -def calculate_3d_ratio_osa_aaa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio FSA/AAA: {str(e)}.") + return None + + +def calculate_3d_ratio_osa_aaa(input_data, columns_names, logger=None): """Performs calculation of ? Args: @@ -245,11 +374,21 @@ def calculate_3d_ratio_osa_aaa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_osa_aaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3D ratio OSA/AAA.") + result = calculate_ratio_osa_aaa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio OSA/AAA: {str(e)}.") + return None -def calculate_3d_ratio_fsa_faa(input_data, columns_names): +def calculate_3d_ratio_fsa_faa(input_data, columns_names, logger=None): """Performs calculation of % of all 3d forecast objects that are simple Args: @@ -262,11 +401,21 @@ def calculate_3d_ratio_fsa_faa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fsa_faa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3D ratio FSA/FAA.") + result = calculate_ratio_fsa_faa(filtered_data, columns_names) -def calculate_3d_ratio_fca_faa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio FSA/FAA: {str(e)}.") + return None + + +def calculate_3d_ratio_fca_faa(input_data, columns_names, logger=None): """Performs calculation of % of all 3d forecast objects that are cluster Args: @@ -279,11 +428,21 @@ def calculate_3d_ratio_fca_faa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fca_faa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3D ratio FCA/FAA.") + result = calculate_ratio_fca_faa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio FCA/FAA: {str(e)}.") + return None -def calculate_3d_ratio_osa_oaa(input_data, columns_names): +def calculate_3d_ratio_osa_oaa(input_data, columns_names, logger=None): """Performs calculation of % of all 3d observation objects that are simple Args: @@ -296,11 +455,21 @@ def calculate_3d_ratio_osa_oaa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_osa_oaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3D ratio OSA/OAA.") + result = calculate_ratio_osa_oaa(filtered_data, columns_names) -def calculate_3d_ratio_oca_oaa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio OSA/OAA: {str(e)}.") + return None + + +def calculate_3d_ratio_oca_oaa(input_data, columns_names, logger=None): """Performs calculation of % of all 3d observation objects that are cluster Args: @@ -313,11 +482,21 @@ def calculate_3d_ratio_oca_oaa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_oca_oaa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3D ratio OCA/OAA.") + result = calculate_ratio_oca_oaa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio OCA/OAA: {str(e)}.") + return None -def calculate_3d_ratio_fca_aca(input_data, columns_names): +def calculate_3d_ratio_fca_aca(input_data, columns_names, logger=None): """Performs calculation of % of 3d cluster objects that are forecast Args: @@ -330,11 +509,21 @@ def calculate_3d_ratio_fca_aca(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fca_aca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3D ratio FCA/ACA.") + result = calculate_ratio_fca_aca(filtered_data, columns_names) -def calculate_3d_ratio_fsa_osa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio FCA/ACA: {str(e)}.") + return None + + +def calculate_3d_ratio_fsa_osa(input_data, columns_names, logger=None): """Performs calculation of Ratio of simple 3d forecasts to simple 3d observations [frequency bias] Args: @@ -347,11 +536,21 @@ def calculate_3d_ratio_fsa_osa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fsa_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3D ratio FSA/OSA (frequency bias).") + result = calculate_ratio_fsa_osa(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio FSA/OSA: {str(e)}.") + return None -def calculate_3d_ratio_osa_fsa(input_data, columns_names): +def calculate_3d_ratio_osa_fsa(input_data, columns_names, logger=None): """Performs calculation of Ratio of simple 3d observations to simple 3d forecasts [1 / frequency bias] Args: @@ -364,11 +563,21 @@ def calculate_3d_ratio_osa_fsa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_osa_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3D ratio OSA/FSA [1 / frequency bias].") + result = calculate_ratio_osa_fsa(filtered_data, columns_names) -def calculate_3d_ratio_asa_aca(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio OSA/FSA: {str(e)}.") + return None + + +def calculate_3d_ratio_asa_aca(input_data, columns_names, logger=None): """Performs calculation of Ratio of simple 3d objects to 3d cluster objects Args: @@ -381,11 +590,21 @@ def calculate_3d_ratio_asa_aca(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_asa_aca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3D ratio ASA/ACA (simple to cluster objects).") + result = calculate_ratio_asa_aca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio ASA/ACA: {str(e)}.") + return None -def calculate_3d_ratio_fca_fsa(input_data, columns_names): +def calculate_3d_ratio_fca_fsa(input_data, columns_names, logger=None): """Performs calculation of Ratio of 3d cluster forecast objects to 3d simple forecast objects' Args: @@ -398,11 +617,21 @@ def calculate_3d_ratio_fca_fsa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fca_fsa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3D ratio FCA/FSA (cluster to simple forecast objects).") + result = calculate_ratio_fca_fsa(filtered_data, columns_names) -def calculate_3d_ratio_fsa_fca(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio FCA/FSA: {str(e)}.") + return None + + +def calculate_3d_ratio_fsa_fca(input_data, columns_names, logger=None): """Performs calculation of Ratio of simple 3d forecast objects to cluster 3d forecast objects Args: @@ -415,11 +644,21 @@ def calculate_3d_ratio_fsa_fca(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_fsa_fca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3D ratio FSA/FCA (simple to cluster forecast objects).") + result = calculate_ratio_fsa_fca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio FSA/FCA: {str(e)}.") + return None -def calculate_3d_ratio_oca_osa(input_data, columns_names): +def calculate_3d_ratio_oca_osa(input_data, columns_names, logger=None): """Performs calculation of Ratio of cluster 3d observation objects to simple 3d observation objects Args: @@ -432,11 +671,21 @@ def calculate_3d_ratio_oca_osa(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_oca_osa(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3D ratio OCA/OSA (cluster to simple observation objects).") + result = calculate_ratio_oca_osa(filtered_data, columns_names) -def calculate_3d_ratio_osa_oca(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio OCA/OSA: {str(e)}.") + return None + + +def calculate_3d_ratio_osa_oca(input_data, columns_names, logger=None): """Performs calculation of Ratio of simple 3d observation objects to cluster 3d observation objects Args: @@ -449,11 +698,21 @@ def calculate_3d_ratio_osa_oca(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_ratio_osa_oca(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3D ratio OSA/OCA (simple to cluster observation objects).") + result = calculate_ratio_osa_oca(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D ratio OSA/OCA: {str(e)}.") + return None -def calculate_3d_objhits(input_data, columns_names): +def calculate_3d_objhits(input_data, columns_names, logger=None): """Performs calculation of Hits =/2 Args: @@ -466,11 +725,21 @@ def calculate_3d_objhits(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_objhits(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3D object hits.") + result = calculate_objhits(filtered_data, columns_names) -def calculate_3d_objmisses(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D object hits: {str(e)}.") + return None + + +def calculate_3d_objmisses(input_data, columns_names, logger=None): """Performs calculation of Misses = OSU Args: @@ -483,11 +752,21 @@ def calculate_3d_objmisses(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_objmisses(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3D object misses.") + result = calculate_objmisses(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D object misses: {str(e)}.") + return None -def calculate_3d_objfas(input_data, columns_names): +def calculate_3d_objfas(input_data, columns_names, logger=None): """Performs calculation of False Alarms = FSU Args: @@ -500,11 +779,21 @@ def calculate_3d_objfas(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_objfas(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3D object false alarms.") + result = calculate_objfas(filtered_data, columns_names) -def calculate_3d_objcsi(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D object false alarms: {str(e)}.") + return None + + +def calculate_3d_objcsi(input_data, columns_names, logger=None): """Performs calculation of critical success index CSI = hits //2 + OSU + FSU] Args: @@ -517,11 +806,21 @@ def calculate_3d_objcsi(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_objcsi(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3D object critical success index (CSI).") + result = calculate_objcsi(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D object critical success index (CSI): {str(e)}.") + return None -def calculate_3d_objpody(input_data, columns_names): +def calculate_3d_objpody(input_data, columns_names, logger=None): """Performs calculation of Probability of Detecting Yes PODY = hits //2 + OSU] Args: @@ -534,11 +833,21 @@ def calculate_3d_objpody(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_objpody(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating 3D object Probability of Detecting Yes (PODY).") + result = calculate_objpody(filtered_data, columns_names) -def calculate_3d_objfar(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D object Probability of Detecting Yes (PODY): {str(e)}.") + return None + + +def calculate_3d_objfar(input_data, columns_names, logger=None): """Performs calculation of False alarm ratio FAR = false alarms //2 + FSU] Args: @@ -551,5 +860,15 @@ def calculate_3d_objfar(input_data, columns_names): calculated statistic as float or None if some of the data values are missing or invalid """ - filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) - return calculate_objfar(filtered_data, columns_names) + try: + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating 3D object False Alarm Ratio (FAR).") + result = calculate_objfar(filtered_data, columns_names) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate 3D object False Alarm Ratio (FAR): {str(e)}.") + return None diff --git a/metcalcpy/util/mode_3d_volrat_statistics.py b/metcalcpy/util/mode_3d_volrat_statistics.py index 1f660357..0ca9fb79 100644 --- a/metcalcpy/util/mode_3d_volrat_statistics.py +++ b/metcalcpy/util/mode_3d_volrat_statistics.py @@ -13,12 +13,13 @@ """ from metcalcpy.util.mode_arearat_statistics import * from metcalcpy.util.utils import column_data_by_name_value, THREE_D_DATA_FILTER +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_3d_volrat_fsa_asa(input_data, columns_names): +def calculate_3d_volrat_fsa_asa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple objects that are forecast Args: @@ -31,12 +32,24 @@ def calculate_3d_volrat_fsa_asa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fsa_asa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio calculation.") + columns_names_new = rename_column(columns_names, logger=logger) + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) -def calculate_3d_volrat_osa_asa(input_data, columns_names): + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple objects that are forecast.") + result = calculate_arearat_fsa_asa(filtered_data, columns_names_new) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple objects that are forecast: {str(e)}.") + return None + + +def calculate_3d_volrat_osa_asa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple objects that are observation Args: @@ -49,12 +62,24 @@ def calculate_3d_volrat_osa_asa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_osa_asa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio calculation.") + columns_names_new = rename_column(columns_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple objects that are observation.") + result = calculate_arearat_osa_asa(filtered_data, columns_names_new) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple objects that are observation: {str(e)}.") + return None -def calculate_3d_volrat_asm_asa(input_data, columns_names): +def calculate_3d_volrat_asm_asa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple objects that are matched Args: @@ -67,13 +92,24 @@ def calculate_3d_volrat_asm_asa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio ASM/ASA calculation.") + columns_names_new = rename_column(columns_names, logger=logger) - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_asm_asa(filtered_data, columns_names_new) + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple objects that are matched.") + result = calculate_arearat_asm_asa(filtered_data, columns_names_new) -def calculate_3d_volrat_asu_asa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple objects that are matched: {str(e)}.") + return None + + +def calculate_3d_volrat_asu_asa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple objects that are unmatched Args: @@ -86,12 +122,24 @@ def calculate_3d_volrat_asu_asa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_asu_asa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio ASU/ASA calculation.") + columns_names_new = rename_column(columns_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple objects that are unmatched.") + result = calculate_arearat_asu_asa(filtered_data, columns_names_new) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple objects that are unmatched: {str(e)}.") + return None -def calculate_3d_volrat_fsm_fsa(input_data, columns_names): +def calculate_3d_volrat_fsm_fsa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple forecast objects that are matched Args: @@ -104,12 +152,24 @@ def calculate_3d_volrat_fsm_fsa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fsm_fsa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio FSM/FSA calculation.") + columns_names_new = rename_column(columns_names, logger=logger) + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) -def calculate_3d_volrat_fsu_fsa(input_data, columns_names): + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple forecast objects that are matched.") + result = calculate_arearat_fsm_fsa(filtered_data, columns_names_new) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple forecast objects that are matched: {str(e)}.") + return None + + +def calculate_3d_volrat_fsu_fsa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple forecast objects that are unmatched Args: @@ -122,12 +182,24 @@ def calculate_3d_volrat_fsu_fsa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fsu_fsa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio FSU/FSA calculation.") + columns_names_new = rename_column(columns_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple forecast objects that are unmatched.") + result = calculate_arearat_fsu_fsa(filtered_data, columns_names_new) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple forecast objects that are unmatched: {str(e)}.") + return None -def calculate_3d_volrat_osm_osa(input_data, columns_names): +def calculate_3d_volrat_osm_osa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple observation objects that are matched Args: @@ -140,12 +212,24 @@ def calculate_3d_volrat_osm_osa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_osm_osa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OSM/OSA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) -def calculate_3d_volrat_osu_osa(input_data, columns_names): + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple observation objects that are matched.") + result = calculate_arearat_osm_osa(filtered_data, columns_names_new) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple observation objects that are matched: {str(e)}.") + return None + + +def calculate_3d_volrat_osu_osa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple observation objects that are unmatched Args: @@ -158,12 +242,23 @@ def calculate_3d_volrat_osu_osa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_osu_osa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OSU/OSA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple observation objects that are unmatched.") + result = calculate_arearat_osu_osa(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple observation objects that are unmatched: {str(e)}.") + return None -def calculate_3d_volrat_fsm_asm(input_data, columns_names): +def calculate_3d_volrat_fsm_asm(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple matched objects that are forecasts Args: @@ -176,13 +271,24 @@ def calculate_3d_volrat_fsm_asm(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio FSM/ASM calculation.") + columns_names_new = rename_column(column_names, logger=logger) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fsm_asm(filtered_data, columns_names_new) + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple matched objects that are forecasts.") + result = calculate_arearat_fsm_asm(filtered_data, columns_names_new) -def calculate_3d_volrat_osm_asm(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple matched objects that are forecasts: {str(e)}.") + return None + + +def calculate_3d_volrat_osm_asm(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple matched objects that are observations Args: @@ -195,12 +301,24 @@ def calculate_3d_volrat_osm_asm(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_osm_asm(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OSM/ASM calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple matched objects that are observations.") + result = calculate_arearat_osm_asm(filtered_data, columns_names_new) -def calculate_3d_volrat_osu_asu(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple matched objects that are observations: {str(e)}.") + return None + + +def calculate_3d_volrat_osu_asu(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d simple unmatched objects that are observation Args: @@ -213,12 +331,24 @@ def calculate_3d_volrat_osu_asu(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_osu_asu(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OSU/ASU calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating Volume-weighted % of 3D simple unmatched objects that are observations.") + result = calculate_arearat_osu_asu(filtered_data, columns_names_new) -def calculate_3d_volrat_fsa_aaa(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted % of 3D simple unmatched objects that are observations: {str(e)}.") + return None + + +def calculate_3d_volrat_fsa_aaa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted ? Args: @@ -231,12 +361,24 @@ def calculate_3d_volrat_fsa_aaa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fsa_aaa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio FSA/AAA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume-weighted statistic.") + result = calculate_arearat_fsa_aaa(filtered_data, columns_names_new) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted statistic: {str(e)}.") + return None -def calculate_3d_volrat_osa_aaa(input_data, columns_names): +def calculate_3d_volrat_osa_aaa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted ? Args: @@ -249,12 +391,24 @@ def calculate_3d_volrat_osa_aaa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_osa_aaa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OSA/AAA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) -def calculate_3d_volrat_fsa_faa(input_data, columns_names): + safe_log(logger, "debug", "Calculating Volume-weighted statistic.") + result = calculate_arearat_osa_aaa(filtered_data, columns_names_new) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted statistic: {str(e)}.") + return None + + +def calculate_3d_volrat_fsa_faa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of all 3d forecast objects that are simple Args: @@ -268,12 +422,24 @@ def calculate_3d_volrat_fsa_faa(input_data, columns_names): or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fsa_faa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio FSA/FAA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume-weighted statistic.") + result = calculate_arearat_fsa_faa(filtered_data, columns_names_new) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted statistic: {str(e)}.") + return None -def calculate_3d_volrat_fca_faa(input_data, columns_names): +def calculate_3d_volrat_fca_faa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of all 3d forecast objects that are cluster Args: @@ -286,12 +452,24 @@ def calculate_3d_volrat_fca_faa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fca_faa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio FCA/FAA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) -def calculate_3d_volrat_osa_oaa(input_data, columns_names): + safe_log(logger, "debug", "Calculating Volume-weighted statistic.") + result = calculate_arearat_fca_faa(filtered_data, columns_names_new) + + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted statistic: {str(e)}.") + return None + + +def calculate_3d_volrat_osa_oaa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of all 3d observation objects that are simple Args: @@ -304,12 +482,24 @@ def calculate_3d_volrat_osa_oaa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_osa_oaa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OSA/OAA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume-weighted statistic.") + result = calculate_arearat_osa_oaa(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted statistic: {str(e)}.") + return None -def calculate_3d_volrat_oca_oaa(input_data, columns_names): + +def calculate_3d_volrat_oca_oaa(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of all 3d observation objects that are cluster Args: @@ -322,12 +512,24 @@ def calculate_3d_volrat_oca_oaa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_oca_oaa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OCA/OAA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume-weighted statistic for 3D cluster observation objects.") + result = calculate_arearat_oca_oaa(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted statistic: {str(e)}.") + return None -def calculate_3d_volrat_fca_aca(input_data, columns_names): + +def calculate_3d_volrat_fca_aca(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d cluster objects that are forecast Args: @@ -340,12 +542,24 @@ def calculate_3d_volrat_fca_aca(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fca_aca(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio FCA/ACA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume-weighted statistic for 3D cluster forecast objects.") + result = calculate_arearat_fca_aca(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted statistic: {str(e)}.") + return None -def calculate_3d_volrat_oca_aca(input_data, columns_names): + +def calculate_3d_volrat_oca_aca(input_data, columns_names, logger=None): """Performs calculation of Volume-weighted % of 3d cluster objects that are observation Args: @@ -358,12 +572,24 @@ def calculate_3d_volrat_oca_aca(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_oca_aca(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OCA/ACA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume-weighted statistic for 3D cluster observation objects.") + result = calculate_arearat_oca_aca(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume-weighted statistic: {str(e)}.") + return None -def calculate_3d_volrat_fsa_osa(input_data, columns_names): + +def calculate_3d_volrat_fsa_osa(input_data, columns_names, logger=None): """Performs calculation of Volume Ratio of 3d simple forecasts to 3d simple observations [frequency bias] @@ -377,13 +603,24 @@ def calculate_3d_volrat_fsa_osa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio FSA/OSA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fsa_osa(filtered_data, columns_names_new) + safe_log(logger, "debug", "Calculating Volume Ratio of 3D simple forecasts to 3D simple observations.") + result = calculate_arearat_fsa_osa(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume Ratio: {str(e)}.") + return None -def calculate_3d_volrat_osa_fsa(input_data, columns_names): + +def calculate_3d_volrat_osa_fsa(input_data, columns_names, logger=None): """Performs calculation of Volume Ratio of 3d simple observations to 3d simple forecasts [1 / frequency bias] @@ -397,12 +634,24 @@ def calculate_3d_volrat_osa_fsa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_osa_fsa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OSA/FSA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume Ratio of 3D simple observations to 3D simple forecasts.") + result = calculate_arearat_osa_fsa(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume Ratio: {str(e)}.") + return None -def calculate_3d_volrat_aca_asa(input_data, columns_names): + +def calculate_3d_volrat_aca_asa(input_data, columns_names, logger=None): """Performs calculation of Volume Ratio of 3d cluster objects to 3d simple objects Args: @@ -415,12 +664,24 @@ def calculate_3d_volrat_aca_asa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_aca_asa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio ACA/ASA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume Ratio of 3D cluster objects to 3D simple objects.") + result = calculate_arearat_aca_asa(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume Ratio ACA/ASA: {str(e)}.") + return None -def calculate_3d_volrat_asa_aca(input_data, columns_names): + +def calculate_3d_volrat_asa_aca(input_data, columns_names, logger=None): """Performs calculation of Volume Ratio of 3d simple objects to 3d cluster objects Args: @@ -433,12 +694,24 @@ def calculate_3d_volrat_asa_aca(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_asa_aca(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio ASA/ACA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume Ratio of 3D simple objects to 3D cluster objects.") + result = calculate_arearat_asa_aca(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume Ratio ASA/ACA: {str(e)}.") + return None -def calculate_3d_volrat_fca_fsa(input_data, columns_names): + +def calculate_3d_volrat_fca_fsa(input_data, columns_names, logger=None): """Performs calculation of Volume Ratio of 3d cluster forecast objects to 3d simple forecast objects @@ -452,12 +725,24 @@ def calculate_3d_volrat_fca_fsa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fca_fsa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio FCA/FSA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + safe_log(logger, "debug", "Calculating Volume Ratio of 3D cluster forecast objects to 3D simple forecast objects.") + result = calculate_arearat_fca_fsa(filtered_data, columns_names_new) -def calculate_3d_volrat_fsa_fca(input_data, columns_names): + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume Ratio FCA/FSA: {str(e)}.") + return None + + +def calculate_3d_volrat_fsa_fca(input_data, columns_names, logger=None): """Performs calculation of Volume Ratio of 3d simple forecast objects to 3d cluster forecast objects @@ -471,13 +756,24 @@ def calculate_3d_volrat_fsa_fca(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio FSA/FCA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_fsa_fca(filtered_data, columns_names_new) + safe_log(logger, "debug", "Calculating Volume Ratio of 3D simple forecast objects to 3D cluster forecast objects.") + result = calculate_arearat_fsa_fca(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume Ratio FSA/FCA: {str(e)}.") + return None -def calculate_3d_volrat_oca_osa(input_data, columns_names): + +def calculate_3d_volrat_oca_osa(input_data, columns_names, logger=None): """Performs calculation of Volume Ratio of 3d cluster observation objects to 3d simple observation objects @@ -491,12 +787,24 @@ def calculate_3d_volrat_oca_osa(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_oca_osa(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OCA/OSA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume Ratio of 3D cluster observation objects to 3D simple observation objects.") + result = calculate_arearat_oca_osa(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume Ratio OCA/OSA: {str(e)}.") + return None -def calculate_3d_volrat_osa_oca(input_data, columns_names): + +def calculate_3d_volrat_osa_oca(input_data, columns_names, logger=None): """Performs calculation of Volume Ratio of 3d simple observation objects to 3d cluster observation objects @@ -510,12 +818,24 @@ def calculate_3d_volrat_osa_oca(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_arearat_osa_oca(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Ratio OSA/OCA calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume Ratio of 3D simple observation objects to 3D cluster observation objects.") + result = calculate_arearat_osa_oca(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume Ratio OSA/OCA: {str(e)}.") + return None -def calculate_3d_objvhits(input_data, columns_names): + +def calculate_3d_objvhits(input_data, columns_names, logger=None): """Performs calculation of Volume 3d Hits =/2 Args: @@ -528,13 +848,24 @@ def calculate_3d_objvhits(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Hits calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_objahits(filtered_data, columns_names_new) + safe_log(logger, "debug", "Calculating Volume 3D Hits.") + result = calculate_objahits(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume 3D Hits: {str(e)}.") + return None -def calculate_3d_objvmisses(input_data, columns_names): + +def calculate_3d_objvmisses(input_data, columns_names, logger=None): """Performs calculation of Volume 3d Misses = OSU Args: @@ -547,12 +878,24 @@ def calculate_3d_objvmisses(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_objamisses(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Renaming columns for 3D Volume Misses calculation.") + columns_names_new = rename_column(column_names, logger=logger) + + safe_log(logger, "debug", "Filtering data based on THREE_D_DATA_FILTER.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + + safe_log(logger, "debug", "Calculating Volume 3D Misses.") + result = calculate_objamisses(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation complete. Result: {result}.") + return result + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Failed to calculate Volume 3D Misses: {str(e)}.") + return None -def calculate_3d_objvfas(input_data, columns_names): + +def calculate_3d_objvfas(input_data, columns_names, logger=None): """Performs calculation of Volume 3d False Alarms = FSU Args: @@ -565,12 +908,17 @@ def calculate_3d_objvfas(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_objafas(filtered_data, columns_names_new) + try: + columns_names_new = rename_column(column_names, logger=logger) + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + result = calculate_objafas(filtered_data, columns_names_new) + except Exception as e: + safe_log(logger, "error", f"Failed to calculate 3d objvfas: {str(e)}") + result = None + return result -def calculate_3d_objvcsi(input_data, columns_names): +def calculate_3d_objvcsi(input_data, columns_names, logger=None): """Performs calculation of Volume 3d critical success index CSI = hits //2 + OSU + FSU] Args: @@ -583,12 +931,25 @@ def calculate_3d_objvcsi(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_objacsi(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Starting the renaming of columns.") + columns_names_new = rename_column(column_names, logger=logger) + safe_log(logger, "debug", f"Renamed columns: {columns_names_new}") + + safe_log(logger, "debug", "Filtering data based on the new column names.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + safe_log(logger, "debug", f"Filtered data: {filtered_data}") + safe_log(logger, "debug", "Starting calculation of objvcsi.") + result = calculate_objacsi(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation result: {result}") + except Exception as e: + safe_log(logger, "error", f"Failed to calculate 3d objvcsi: {str(e)}") + result = None + return result -def calculate_3d_objvpody(input_data, columns_names): + +def calculate_3d_objvpody(input_data, columns_names, logger=None): """Performs calculation of Volume 3d prob of detecting yes PODY = hits //2 + OSU] Args: @@ -601,12 +962,25 @@ def calculate_3d_objvpody(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_objapody(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Starting the renaming of columns.") + columns_names_new = rename_column(column_names, logger=logger) + safe_log(logger, "debug", f"Renamed columns: {columns_names_new}") + + safe_log(logger, "debug", "Filtering data based on the new column names.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + safe_log(logger, "debug", f"Filtered data: {filtered_data}") + + safe_log(logger, "debug", "Starting calculation of objvpody.") + result = calculate_objapody(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation result: {result}") + except Exception as e: + safe_log(logger, "error", f"Failed to calculate 3d objvpody: {str(e)}") + result = None + return result -def calculate_3d_objvfar(input_data, columns_names): +def calculate_3d_objvfar(input_data, columns_names, logger=None): """Performs calculation of Volume 3d FAR = false alarms //2 + FSU] Args: @@ -619,12 +993,25 @@ def calculate_3d_objvfar(input_data, columns_names): calculated statistic or None if some of the data values are missing or invalid """ - columns_names_new = rename_column(columns_names) - filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) - return calculate_objafar(filtered_data, columns_names_new) + try: + safe_log(logger, "debug", "Starting the renaming of columns.") + columns_names_new = rename_column(column_names, logger=logger) + safe_log(logger, "debug", f"Renamed columns: {columns_names_new}") + + safe_log(logger, "debug", "Filtering data based on the new column names.") + filtered_data = column_data_by_name_value(input_data, columns_names_new, THREE_D_DATA_FILTER) + safe_log(logger, "debug", f"Filtered data: {filtered_data}") + + safe_log(logger, "debug", "Starting calculation of objvfar.") + result = calculate_objafar(filtered_data, columns_names_new) + safe_log(logger, "debug", f"Calculation result: {result}") + except Exception as e: + safe_log(logger, "error", f"Failed to calculate 3d objvfar: {str(e)}") + result = None + return result -def rename_column(columns_names): +def rename_column(columns_names, logger=None): """Change the column name array element from 'volume' to 'area' Args: @@ -636,6 +1023,7 @@ def rename_column(columns_names): columns_names_new = [] for index, name in enumerate(columns_names): if name == 'volume': + safe_log(logger, "debug", f"Renaming column '{name}' to 'area' at index {index}.") columns_names_new.insert(index, 'area') else: columns_names_new.insert(index, name) diff --git a/metcalcpy/util/mode_arearat_statistics.py b/metcalcpy/util/mode_arearat_statistics.py index 18d453cb..5118e1d9 100644 --- a/metcalcpy/util/mode_arearat_statistics.py +++ b/metcalcpy/util/mode_arearat_statistics.py @@ -14,12 +14,13 @@ import warnings from metcalcpy.util.utils import round_half_up, PRECISION, \ column_data_by_name_value, sum_column_data_by_name +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_arearat_fsa_asa(input_data, columns_names): +def calculate_arearat_fsa_asa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple objects that are forecast Args: @@ -36,19 +37,19 @@ def calculate_arearat_fsa_asa(input_data, columns_names): nominator_filter = {'fcst_flag': 1, 'simple_flag': 1} denominator_filter = {'simple_flag': 1} - try: + try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None - warnings.filterwarnings('ignore') return result -def calculate_arearat_osa_asa(input_data, columns_names): +def calculate_arearat_osa_asa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple objects that are observation Args: @@ -70,14 +71,14 @@ def calculate_arearat_osa_asa(input_data, columns_names): denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None - warnings.filterwarnings('ignore') return result -def calculate_arearat_asm_asa(input_data, columns_names): +def calculate_arearat_asm_asa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple objects that are matched Args: @@ -93,20 +94,19 @@ def calculate_arearat_asm_asa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'simple_flag': 1, 'matched_flag': 1} denominator_filter = {'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None - warnings.filterwarnings('ignore') return result -def calculate_arearat_asu_asa(input_data, columns_names): +def calculate_arearat_asu_asa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple objects that are unmatched Args: @@ -122,20 +122,19 @@ def calculate_arearat_asu_asa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'simple_flag': 1, 'matched_flag': 0} denominator_filter = {'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None - warnings.filterwarnings('ignore') return result -def calculate_arearat_fsm_fsa(input_data, columns_names): +def calculate_arearat_fsm_fsa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple forecast objects that are matched Args: @@ -151,20 +150,19 @@ def calculate_arearat_fsm_fsa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 1, 'matched_flag': 1} denominator_filter = {'fcst_flag': 1, 'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None - warnings.filterwarnings('ignore') return result -def calculate_arearat_fsu_fsa(input_data, columns_names): +def calculate_arearat_fsu_fsa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple forecast objects that are unmatched Args: @@ -180,20 +178,19 @@ def calculate_arearat_fsu_fsa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 1, 'matched_flag': 0} denominator_filter = {'fcst_flag': 1, 'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None - warnings.filterwarnings('ignore') return result -def calculate_arearat_osm_osa(input_data, columns_names): +def calculate_arearat_osm_osa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple simple observation objects that are matched Args: @@ -209,20 +206,20 @@ def calculate_arearat_osm_osa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1, 'matched_flag': 1} denominator_filter = {'fcst_flag': 0, 'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_osu_osa(input_data, columns_names): +def calculate_arearat_osu_osa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple simple observation objects that are unmatched @@ -239,20 +236,20 @@ def calculate_arearat_osu_osa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1, 'matched_flag': 0} denominator_filter = {'fcst_flag': 0, 'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_fsm_asm(input_data, columns_names): +def calculate_arearat_fsm_asm(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple matched objects that are forecasts Args: @@ -268,20 +265,20 @@ def calculate_arearat_fsm_asm(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 1, 'matched_flag': 0} denominator_filter = {'fcst_flag': 1, 'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_osm_asm(input_data, columns_names): +def calculate_arearat_osm_asm(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple matched objects that are observations Args: @@ -297,20 +294,20 @@ def calculate_arearat_osm_asm(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1, 'matched_flag': 1} denominator_filter = {'simple_flag': 1, 'matched_flag': 1} - - try: + try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_osu_asu(input_data, columns_names): +def calculate_arearat_osu_asu(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of simple unmatched objects that are observation Args: @@ -332,14 +329,16 @@ def calculate_arearat_osu_asu(input_data, columns_names): denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None + warnings.filterwarnings('ignore') return result -def calculate_arearat_fsa_aaa(input_data, columns_names): +def calculate_arearat_fsa_aaa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted ? Args: @@ -359,14 +358,15 @@ def calculate_arearat_fsa_aaa(input_data, columns_names): nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(input_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_osa_aaa(input_data, columns_names): +def calculate_arearat_osa_aaa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted ? Args: @@ -381,19 +381,19 @@ def calculate_arearat_osa_aaa(input_data, columns_names): """ warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(input_data, columns_names, 'area') - result = round_half_up(nominator / (denominator), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_fsa_faa(input_data, columns_names): +def calculate_arearat_fsa_faa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of all forecast objects that are simple Args: @@ -409,20 +409,20 @@ def calculate_arearat_fsa_faa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 1} denominator_filter = {'fcst_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_fca_faa(input_data, columns_names): +def calculate_arearat_fca_faa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of all forecast objects that are cluster Args: @@ -438,20 +438,20 @@ def calculate_arearat_fca_faa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 0} denominator_filter = {'fcst_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_osa_oaa(input_data, columns_names): +def calculate_arearat_osa_oaa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of all observation objects that are simple Args: @@ -467,20 +467,20 @@ def calculate_arearat_osa_oaa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1} denominator_filter = {'fcst_flag': 0} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_oca_oaa(input_data, columns_names): +def calculate_arearat_oca_oaa(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of all observation objects that are cluster Args: @@ -496,20 +496,20 @@ def calculate_arearat_oca_oaa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 0} denominator_filter = {'fcst_flag': 0} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_fca_aca(input_data, columns_names): +def calculate_arearat_fca_aca(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of cluster objects that are forecast Args: @@ -525,20 +525,20 @@ def calculate_arearat_fca_aca(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 0} denominator_filter = {'simple_flag': 0} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_oca_aca(input_data, columns_names): +def calculate_arearat_oca_aca(input_data, columns_names, logger=None): """Performs calculation of Area-weighted % of cluster objects that are observation Args: @@ -554,20 +554,20 @@ def calculate_arearat_oca_aca(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 0} denominator_filter = {'simple_flag': 0} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation: {str(e)}.") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_fsa_osa(input_data, columns_names): +def calculate_arearat_fsa_osa(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of simple forecasts to simple observations [frequency bias] @@ -584,20 +584,20 @@ def calculate_arearat_fsa_osa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 1} denominator_filter = {'fcst_flag': 0, 'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) - nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_osa_fsa(input_data, columns_names): +def calculate_arearat_osa_fsa(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of simple observations to simple forecasts [1 / frequency bias] @@ -614,20 +614,20 @@ def calculate_arearat_osa_fsa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1} denominator_filter = {'fcst_flag': 0, 'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) - nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_aca_asa(input_data, columns_names): +def calculate_arearat_aca_asa(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of cluster objects to simple objects Args: @@ -643,20 +643,20 @@ def calculate_arearat_aca_asa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'simple_flag': 0} denominator_filter = {'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) - nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_asa_aca(input_data, columns_names): +def calculate_arearat_asa_aca(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of simple objects to cluster objects Args: @@ -672,20 +672,20 @@ def calculate_arearat_asa_aca(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'simple_flag': 1} denominator_filter = {'simple_flag': 0} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) - nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_fca_fsa(input_data, columns_names): +def calculate_arearat_fca_fsa(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of cluster forecast objects to simple forecast objects Args: @@ -701,20 +701,20 @@ def calculate_arearat_fca_fsa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 0} denominator_filter = {'fcst_flag': 1, 'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) - nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_fsa_fca(input_data, columns_names): +def calculate_arearat_fsa_fca(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of simple forecast objects to cluster forecast objects Args: @@ -730,20 +730,20 @@ def calculate_arearat_fsa_fca(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 1} denominator_filter = {'fcst_flag': 1, 'simple_flag': 0} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) - nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_oca_osa(input_data, columns_names): +def calculate_arearat_oca_osa(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of cluster observation objects to simple observation objects @@ -760,20 +760,20 @@ def calculate_arearat_oca_osa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 0} denominator_filter = {'fcst_flag': 0, 'simple_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) - nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_arearat_osa_oca(input_data, columns_names): +def calculate_arearat_osa_oca(input_data, columns_names, logger=None): """Performs calculation of Area Ratio of simple observation objects to cluster observation objects @@ -790,20 +790,20 @@ def calculate_arearat_osa_oca(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1} denominator_filter = {'fcst_flag': 0, 'simple_flag': 0} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) - nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator_data = column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = sum_column_data_by_name(denominator_data, columns_names, 'area') result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objahits(input_data, columns_names): +def calculate_objahits(input_data, columns_names, logger=None): """Performs calculation of Area Hits =/2 Args: @@ -818,19 +818,19 @@ def calculate_objahits(input_data, columns_names): """ warnings.filterwarnings('error') nominator_filter = {'simple_flag': 1, 'matched_flag': 1} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') denominator = 2 result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objamisses(input_data, columns_names): +def calculate_objamisses(input_data, columns_names, logger=None): """Performs calculation of Area Misses = OSU Args: @@ -845,18 +845,18 @@ def calculate_objamisses(input_data, columns_names): """ warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1, 'matched_flag': 0} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') result = round_half_up(nominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objafas(input_data, columns_names): +def calculate_objafas(input_data, columns_names, logger=None): """Performs calculation of Area False Alarms = FSU Args: @@ -871,18 +871,18 @@ def calculate_objafas(input_data, columns_names): """ warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 1, 'matched_flag': 0} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') result = round_half_up(nominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objacsi(input_data, columns_names): +def calculate_objacsi(input_data, columns_names, logger=None): """Performs calculation of Area critical success index CSI = hits //2 + OSU + FSU] Args: @@ -899,26 +899,22 @@ def calculate_objacsi(input_data, columns_names): nominator_filter = {'simple_flag': 1, 'matched_flag': 1} denominator_filter_1 = {'simple_flag': 1, 'matched_flag': 1} denominator_filter_2 = {'simple_flag': 1, 'matched_flag': 0} - try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') / 2 - - denominator_1_data = \ - column_data_by_name_value(input_data, columns_names, denominator_filter_1) + denominator_1_data = column_data_by_name_value(input_data, columns_names, denominator_filter_1) denominator_1 = sum_column_data_by_name(denominator_1_data, columns_names, 'area') / 2 - - denominator_2_data = \ - column_data_by_name_value(input_data, columns_names, denominator_filter_2) + denominator_2_data = column_data_by_name_value(input_data, columns_names, denominator_filter_2) denominator_2 = sum_column_data_by_name(denominator_2_data, columns_names, 'area') result = round_half_up(nominator / (denominator_1 + denominator_2), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objapody(input_data, columns_names): +def calculate_objapody(input_data, columns_names, logger=None): """Performs calculation of Area prob of detecting yes PODY = hits //2 + OSU] Args: @@ -939,22 +935,19 @@ def calculate_objapody(input_data, columns_names): try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') - - denominator_1_data = \ - column_data_by_name_value(input_data, columns_names, denominator_filter_1) + denominator_1_data = column_data_by_name_value(input_data, columns_names, denominator_filter_1) denominator_1 = sum_column_data_by_name(denominator_1_data, columns_names, 'area') - - denominator_2_data = \ - column_data_by_name_value(input_data, columns_names, denominator_filter_2) + denominator_2_data = column_data_by_name_value(input_data, columns_names, denominator_filter_2) denominator_2 = sum_column_data_by_name(denominator_2_data, columns_names, 'area') result = round_half_up(nominator / (denominator_1 + 2 * denominator_2), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objafar(input_data, columns_names): +def calculate_objafar(input_data, columns_names, logger=None): """Performs calculation of Area FAR = false alarms //2 + FSU] Args: @@ -975,16 +968,13 @@ def calculate_objafar(input_data, columns_names): try: nominator_data = column_data_by_name_value(input_data, columns_names, nominator_filter) nominator = sum_column_data_by_name(nominator_data, columns_names, 'area') - - denominator_1_data = \ - column_data_by_name_value(input_data, columns_names, denominator_filter_1) + denominator_1_data = column_data_by_name_value(input_data, columns_names, denominator_filter_1) denominator_1 = sum_column_data_by_name(denominator_1_data, columns_names, 'area') - - denominator_2_data = \ - column_data_by_name_value(input_data, columns_names, denominator_filter_2) + denominator_2_data = column_data_by_name_value(input_data, columns_names, denominator_filter_2) denominator_2 = sum_column_data_by_name(denominator_2_data, columns_names, 'area') result = round_half_up(nominator / (denominator_1 + 2 * denominator_2), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') - return result + return result \ No newline at end of file diff --git a/metcalcpy/util/mode_ratio_statistics.py b/metcalcpy/util/mode_ratio_statistics.py index a61d2f28..57b3d813 100644 --- a/metcalcpy/util/mode_ratio_statistics.py +++ b/metcalcpy/util/mode_ratio_statistics.py @@ -13,12 +13,13 @@ """ import warnings from metcalcpy.util.utils import round_half_up, PRECISION, nrow_column_data_by_name_value +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_ratio_asm_asa(input_data, columns_names): +def calculate_ratio_asm_asa(input_data, columns_names, logger=None): """Performs calculation of % of simple objects that are matched Args: @@ -35,16 +36,17 @@ def calculate_ratio_asm_asa(input_data, columns_names): nominator_filter = {'simple_flag': 1, 'matched_flag': 1} denominator_filter = {'simple_flag': 1} try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fsa_asa(input_data, columns_names): +def calculate_ratio_fsa_asa(input_data, columns_names, logger=None): """Performs calculation of % of simple objects that are forecast Args: @@ -61,16 +63,17 @@ def calculate_ratio_fsa_asa(input_data, columns_names): nominator_filter = {'fcst_flag': 1, 'simple_flag': 1} denominator_filter = {'simple_flag': 1} try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_osa_asa(input_data, columns_names): +def calculate_ratio_osa_asa(input_data, columns_names, logger=None): """Performs calculation of % of simple objects that are observation Args: @@ -86,17 +89,18 @@ def calculate_ratio_osa_asa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1} denominator_filter = {'simple_flag': 1} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_asu_asa(input_data, columns_names): +def calculate_ratio_asu_asa(input_data, columns_names, logger=None): """Performs calculation of % of simple objects that are unmatched Args: @@ -116,13 +120,14 @@ def calculate_ratio_asu_asa(input_data, columns_names): nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fsm_fsa(input_data, columns_names): +def calculate_ratio_fsm_fsa(input_data, columns_names, logger=None): """Performs calculation of % of simple forecast objects that are matched Args: @@ -142,13 +147,14 @@ def calculate_ratio_fsm_fsa(input_data, columns_names): nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fsu_fsa(input_data, columns_names): +def calculate_ratio_fsu_fsa(input_data, columns_names, logger=None): """Performs calculation of % of simple forecast objects that are unmatched Args: @@ -166,15 +172,16 @@ def calculate_ratio_fsu_fsa(input_data, columns_names): denominator_filter = {'fcst_flag': 1, 'simple_flag': 1} try: nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_osm_osa(input_data, columns_names): +def calculate_ratio_osm_osa(input_data, columns_names, logger=None): """Performs calculation of % of simple simple observation objects that are matched Args: @@ -191,16 +198,18 @@ def calculate_ratio_osm_osa(input_data, columns_names): nominator_filter = {'fcst_flag': 0, 'simple_flag': 1, 'matched_flag': 1} denominator_filter = {'fcst_flag': 0, 'simple_flag': 1} try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_osu_osa(input_data, columns_names): +def calculate_ratio_osu_osa(input_data, columns_names, logger=None): """Performs calculation of % of simple simple observation objects that are unmatched Args: @@ -218,15 +227,16 @@ def calculate_ratio_osu_osa(input_data, columns_names): denominator_filter = {'fcst_flag': 0, 'simple_flag': 1} try: nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fsm_asm(input_data, columns_names): +def calculate_ratio_fsm_asm(input_data, columns_names, logger=None): """Performs calculation of % of simple matched objects that are forecasts Args: @@ -244,15 +254,16 @@ def calculate_ratio_fsm_asm(input_data, columns_names): denominator_filter = {'matched_flag': 1, 'simple_flag': 1} try: nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_osm_asm(input_data, columns_names): +def calculate_ratio_osm_asm(input_data, columns_names, logger=None): """Performs calculation of % of simple matched objects that are observations Args: @@ -268,17 +279,18 @@ def calculate_ratio_osm_asm(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1, 'matched_flag': 0} denominator_filter = {'matched_flag': 1, 'simple_flag': 1} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fsu_asu(input_data, columns_names): +def calculate_ratio_fsu_asu(input_data, columns_names, logger=None): """Performs calculation of % of simple unmatched objects that are forecast Args: @@ -294,17 +306,18 @@ def calculate_ratio_fsu_asu(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 1, 'matched_flag': 0} denominator_filter = {'matched_flag': 0, 'simple_flag': 1} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_osu_asu(input_data, columns_names): +def calculate_ratio_osu_asu(input_data, columns_names, logger=None): """Performs calculation of % of simple unmatched objects that are observation Args: @@ -320,17 +333,18 @@ def calculate_ratio_osu_asu(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1, 'matched_flag': 0} denominator_filter = {'matched_flag': 0, 'simple_flag': 1} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fsa_aaa(input_data, columns_names): +def calculate_ratio_fsa_aaa(input_data, columns_names, logger=None): """Performs calculation of ? Args: @@ -348,16 +362,17 @@ def calculate_ratio_fsa_aaa(input_data, columns_names): nominator_filter = {'fcst_flag': 1, 'simple_flag': 1} denominator_filter = {} try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) - result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_osa_aaa(input_data, columns_names): +def calculate_ratio_osa_aaa(input_data, columns_names, logger=None): """Performs calculation of ? Args: @@ -374,17 +389,18 @@ def calculate_ratio_osa_aaa(input_data, columns_names): ##!!!!!!!! This is the division by the count of all object_id nominator_filter = {'fcst_flag': 0, 'simple_flag': 1} denominator_filter = {} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) - result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fsa_faa(input_data, columns_names): +def calculate_ratio_fsa_faa(input_data, columns_names, logger=None): """Performs calculation of % of all forecast objects that are simple Args: @@ -400,17 +416,18 @@ def calculate_ratio_fsa_faa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 1} denominator_filter = {'fcst_flag': 1} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fca_faa(input_data, columns_names): +def calculate_ratio_fca_faa(input_data, columns_names, logger=None): """Performs calculation of % of all forecast objects that are cluster Args: @@ -427,16 +444,17 @@ def calculate_ratio_fca_faa(input_data, columns_names): nominator_filter = {'fcst_flag': 1, 'simple_flag': 0} denominator_filter = {'fcst_flag': 1} try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_osa_oaa(input_data, columns_names): +def calculate_ratio_osa_oaa(input_data, columns_names, logger=None): """Performs calculation of % of all observation objects that are simple Args: @@ -453,16 +471,17 @@ def calculate_ratio_osa_oaa(input_data, columns_names): nominator_filter = {'fcst_flag': 0, 'simple_flag': 1} denominator_filter = {'fcst_flag': 0} try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_oca_oaa(input_data, columns_names): +def calculate_ratio_oca_oaa(input_data, columns_names, logger=None): """Performs calculation of % of all observation objects that are cluster Args: @@ -478,17 +497,18 @@ def calculate_ratio_oca_oaa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 0} denominator_filter = {'fcst_flag': 0} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fca_aca(input_data, columns_names): +def calculate_ratio_fca_aca(input_data, columns_names, logger=None): """Performs calculation of % of cluster objects that are forecast Args: @@ -504,17 +524,18 @@ def calculate_ratio_fca_aca(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 0} denominator_filter = {'simple_flag': 0} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_oca_aca(input_data, columns_names): +def calculate_ratio_oca_aca(input_data, columns_names, logger=None): """Performs calculation of % of cluster objects that are observation Args: @@ -530,17 +551,18 @@ def calculate_ratio_oca_aca(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 0} denominator_filter = {'simple_flag': 0} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fsa_osa(input_data, columns_names): +def calculate_ratio_fsa_osa(input_data, columns_names, logger=None): """Performs calculation of Ratio of simple forecasts to simple observations [frequency bias] Args: @@ -558,15 +580,16 @@ def calculate_ratio_fsa_osa(input_data, columns_names): denominator_filter = {'fcst_flag': 0, 'simple_flag': 1} try: nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_osa_fsa(input_data, columns_names): +def calculate_ratio_osa_fsa(input_data, columns_names, logger=None): """Performs calculation of Ratio of simple observations to simple forecasts [1 / frequency bias] Args: @@ -582,17 +605,18 @@ def calculate_ratio_osa_fsa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1} denominator_filter = {'fcst_flag': 1, 'simple_flag': 1} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) - result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + result = round_half_up(nominator / denominator, PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_aca_asa(input_data, columns_names): +def calculate_ratio_aca_asa(input_data, columns_names, logger=None): """Performs calculation of Ratio of cluster objects to simple objects Args: @@ -608,17 +632,18 @@ def calculate_ratio_aca_asa(input_data, columns_names): warnings.filterwarnings('error') nominator_filter = {'simple_flag': 0} denominator_filter = {'simple_flag': 1} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_asa_aca(input_data, columns_names): +def calculate_ratio_asa_aca(input_data, columns_names, logger=None): """Performs calculation of Ratio of simple objects to cluster objects Args: @@ -635,16 +660,17 @@ def calculate_ratio_asa_aca(input_data, columns_names): nominator_filter = {'simple_flag': 1} denominator_filter = {'simple_flag': 0} try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fca_fsa(input_data, columns_names): +def calculate_ratio_fca_fsa(input_data, columns_names, logger=None): """Performs calculation of Ratio of cluster forecast objects to simple forecast objects Args: @@ -661,16 +687,17 @@ def calculate_ratio_fca_fsa(input_data, columns_names): nominator_filter = {'fcst_flag': 1, 'simple_flag': 0} denominator_filter = {'fcst_flag': 1, 'simple_flag': 1} try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_fsa_fca(input_data, columns_names): +def calculate_ratio_fsa_fca(input_data, columns_names, logger=None): """Performs calculation of Ratio of simple forecast objects to cluster forecast objects Args: @@ -690,13 +717,14 @@ def calculate_ratio_fsa_fca(input_data, columns_names): nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_oca_osa(input_data, columns_names): +def calculate_ratio_oca_osa(input_data, columns_names, logger=None): """Performs calculation of Ratio of cluster observation objects to simple observation objects Args: @@ -713,16 +741,17 @@ def calculate_ratio_oca_osa(input_data, columns_names): nominator_filter = {'fcst_flag': 0, 'simple_flag': 0} denominator_filter = {'fcst_flag': 0, 'simple_flag': 1} try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ratio_osa_oca(input_data, columns_names): +def calculate_ratio_osa_oca(input_data, columns_names, logger=None): """Performs calculation of Ratio of simple observation objects to cluster observation objects Args: @@ -742,13 +771,14 @@ def calculate_ratio_osa_oca(input_data, columns_names): nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) denominator = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter) result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objhits(input_data, columns_names): +def calculate_objhits(input_data, columns_names, logger=None): """Performs calculation of Hits =/2 Args: @@ -763,18 +793,18 @@ def calculate_objhits(input_data, columns_names): """ warnings.filterwarnings('error') nominator_filter = {'simple_flag': 1, 'matched_flag': 1} - - try: + try: nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) denominator = 2 result = round_half_up(nominator / denominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objmisses(input_data, columns_names): +def calculate_objmisses(input_data, columns_names, logger=None): """Performs calculation of Misses = OSU Args: @@ -789,17 +819,17 @@ def calculate_objmisses(input_data, columns_names): """ warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 0, 'simple_flag': 1, 'matched_flag': 0} - try: nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) result = round_half_up(nominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objfas(input_data, columns_names): +def calculate_objfas(input_data, columns_names, logger=None): """Performs calculation of False Alarms = FSU Args: @@ -814,17 +844,17 @@ def calculate_objfas(input_data, columns_names): """ warnings.filterwarnings('error') nominator_filter = {'fcst_flag': 1, 'simple_flag': 1, 'matched_flag': 0} - - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) result = round_half_up(nominator, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objcsi(input_data, columns_names): +def calculate_objcsi(input_data, columns_names, logger=None): """Performs calculation of CSI = hits //2 + OSU + FSU] Args: @@ -841,21 +871,19 @@ def calculate_objcsi(input_data, columns_names): nominator_filter = {'simple_flag': 1, 'matched_flag': 1} denominator_filter_1 = {'simple_flag': 1, 'matched_flag': 1} denominator_filter_2 = {'simple_flag': 1, 'matched_flag': 0} - - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) / 2 - denominator_1 = \ - nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_1) / 2 - denominator_2 = \ - nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_2) - result = round_half_up(nominator / (denominator_1 + denominator_2), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + try: + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) / 2 + denominator_1 = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_1) / 2 + denominator_2 = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_2) + result = round_half_up(nominator / (denominator_1 + denominator_2), PRECISION) + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objpody(input_data, columns_names): +def calculate_objpody(input_data, columns_names, logger=None): """Performs calculation of Probability of Detecting Yes PODY = hits //2 + OSU] Args: @@ -872,21 +900,19 @@ def calculate_objpody(input_data, columns_names): nominator_filter = {'simple_flag': 1, 'matched_flag': 1} denominator_filter_1 = {'simple_flag': 1, 'matched_flag': 1} denominator_filter_2 = {'fcst_flag': 1, 'simple_flag': 1, 'matched_flag': 0} - try: - nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator_1 = \ - nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_1) - denominator_2 = \ - nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_2) + nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) + denominator_1 = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_1) + denominator_2 = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_2) result = round_half_up(nominator / (denominator_1 + 2 * denominator_2), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_objfar(input_data, columns_names): +def calculate_objfar(input_data, columns_names, logger=None): """Performs calculation of False alarm ratio FAR = false alarms //2 + FSU] Args: @@ -903,15 +929,13 @@ def calculate_objfar(input_data, columns_names): nominator_filter = {'fcst_flag': 1, 'simple_flag': 1, 'matched_flag': 0} denominator_filter_1 = {'fcst_flag': 1, 'simple_flag': 1, 'matched_flag': 0} denominator_filter_2 = {'simple_flag': 1, 'matched_flag': 1} - try: nominator = nrow_column_data_by_name_value(input_data, columns_names, nominator_filter) - denominator_1 = \ - nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_1) - denominator_2 = \ - nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_2) + denominator_1 = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_1) + denominator_2 = nrow_column_data_by_name_value(input_data, columns_names, denominator_filter_2) result = round_half_up(nominator / (denominator_1 + denominator_2 / 2), PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred: {str(e)}") result = None warnings.filterwarnings('ignore') - return result + return result \ No newline at end of file diff --git a/metcalcpy/util/nbrcnt_statistics.py b/metcalcpy/util/nbrcnt_statistics.py index e2dd1a0f..623a9f17 100644 --- a/metcalcpy/util/nbrcnt_statistics.py +++ b/metcalcpy/util/nbrcnt_statistics.py @@ -14,12 +14,13 @@ import warnings from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_nbr_fbs(input_data, columns_names, aggregation=False): +def calculate_nbr_fbs(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of NBR_FBS - Fractions Brier Score Args: @@ -35,16 +36,23 @@ def calculate_nbr_fbs(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Calculating total values from input data.") total = get_total_values(input_data, columns_names, aggregation) + + safe_log(logger, "debug", "Calculating Fractions Brier Score (FBS).") fbs = sum_column_data_by_name(input_data, columns_names, 'fbs') / total + + safe_log(logger, "debug", "Rounding the FBS to the defined precision.") result = round_half_up(fbs, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_FBS calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_nbr_fss(input_data, columns_names, aggregation=False): +def calculate_nbr_fss(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of NBR_FSS - Fractions Skill Score Args: @@ -60,18 +68,29 @@ def calculate_nbr_fss(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Calculating total values from input data.") total = get_total_values(input_data, columns_names, aggregation) + + safe_log(logger, "debug", "Calculating denominator for FSS.") fss_den = sum_column_data_by_name(input_data, columns_names, 'fss') / total + + safe_log(logger, "debug", "Calculating Fractions Brier Score (FBS).") fbs = sum_column_data_by_name(input_data, columns_names, 'fbs') / total + + safe_log(logger, "debug", "Calculating Fractions Skill Score (FSS).") fss = 1.0 - fbs / fss_den + + safe_log(logger, "debug", "Rounding the FSS to the defined precision.") result = round_half_up(fss, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_FSS calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_nbr_afss(input_data, columns_names, aggregation=False): +def calculate_nbr_afss(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of NBR_AFSS - Asymptotic Fractions Skill Score Args: @@ -87,21 +106,35 @@ def calculate_nbr_afss(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Calculating total values from input data.") total = get_total_values(input_data, columns_names, aggregation) + + safe_log(logger, "debug", "Calculating forecast rate (f_rate).") f_rate = sum_column_data_by_name(input_data, columns_names, 'f_rate') / total + + safe_log(logger, "debug", "Calculating observation rate (o_rate).") o_rate = sum_column_data_by_name(input_data, columns_names, 'o_rate') / total - + + safe_log(logger, "debug", "Calculating numerator for AFSS.") afss_num = 2.0 * f_rate * o_rate + + safe_log(logger, "debug", "Calculating denominator for AFSS.") afss_den = f_rate * f_rate + o_rate * o_rate + + safe_log(logger, "debug", "Calculating Asymptotic Fractions Skill Score (AFSS).") afss = afss_num / afss_den + + safe_log(logger, "debug", "Rounding the AFSS to the defined precision.") result = round_half_up(afss, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_AFSS calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_nbr_ufss(input_data, columns_names, aggregation=False): +def calculate_nbr_ufss(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of NBR_UFSS - Uniform Fractions Skill Score Args: @@ -117,17 +150,26 @@ def calculate_nbr_ufss(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Calculating total values from input data.") total = get_total_values(input_data, columns_names, aggregation) + + safe_log(logger, "debug", "Calculating observation rate (o_rate).") o_rate = sum_column_data_by_name(input_data, columns_names, 'o_rate') / total + + safe_log(logger, "debug", "Calculating Uniform Fractions Skill Score (UFSS).") ufss = 0.5 + o_rate / 2.0 + + safe_log(logger, "debug", "Rounding the UFSS to the defined precision.") result = round_half_up(ufss, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_UFSS calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_nbr_f_rate(input_data, columns_names, aggregation=False): +def calculate_nbr_f_rate(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of NBR_F_RATE - Forecast event frequency Args: @@ -143,16 +185,23 @@ def calculate_nbr_f_rate(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Calculating total values from input data.") total = get_total_values(input_data, columns_names, aggregation) + + safe_log(logger, "debug", "Calculating forecast event frequency (f_rate).") f_rate = sum_column_data_by_name(input_data, columns_names, 'f_rate') / total + + safe_log(logger, "debug", "Rounding the F_RATE to the defined precision.") result = round_half_up(f_rate, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_F_RATE calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_nbr_o_rate(input_data, columns_names, aggregation=False): +def calculate_nbr_o_rate(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of NBR_O_RATE - Observed event frequency Args: @@ -168,16 +217,23 @@ def calculate_nbr_o_rate(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Calculating total values from input data.") total = get_total_values(input_data, columns_names, aggregation) + + safe_log(logger, "debug", "Calculating observed event frequency (o_rate).") o_rate = sum_column_data_by_name(input_data, columns_names, 'o_rate') / total + + safe_log(logger, "debug", "Rounding the O_RATE to the defined precision.") result = round_half_up(o_rate, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_O_RATE calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_nbr_cnt_total(input_data, columns_names): +def calculate_nbr_cnt_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Neighborhood Continuous Statistics Args: input_data: 2-dimensional numpy array with data for the calculation @@ -189,5 +245,14 @@ def calculate_nbr_cnt_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + try: + safe_log(logger, "debug", "Starting calculation of total matched pairs.") + total = sum_column_data_by_name(input_data, columns_names, 'total') + + safe_log(logger, "debug", f"Total matched pairs before rounding: {total}") + result = round_half_up(total, PRECISION) + + safe_log(logger, "debug", f"Total matched pairs after rounding: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during total matched pairs calculation: {str(e)}") + result = None diff --git a/metcalcpy/util/nbrctc_statistics.py b/metcalcpy/util/nbrctc_statistics.py index eb9e3ba0..14b7e2a6 100644 --- a/metcalcpy/util/nbrctc_statistics.py +++ b/metcalcpy/util/nbrctc_statistics.py @@ -16,12 +16,13 @@ calculate_fmean, calculate_pody, calculate_pofd, calculate_podn, calculate_far, calculate_csi, \ calculate_gss, calculate_hk, calculate_hss, calculate_odds from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_nbr_baser(input_data, columns_names): +def calculate_nbr_baser(input_data, columns_names, logger=None): """Performs calculation of NBR_BASER - Base rate Args: @@ -35,10 +36,18 @@ def calculate_nbr_baser(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_baser(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_BASER.") + result = calculate_baser(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_BASER: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_BASER calculation: {str(e)}") + result = None + return result -def calculate_nbr_acc(input_data, columns_names): + +def calculate_nbr_acc(input_data, columns_names, logger=None): """Performs calculation of NBR_ACC - Accuracy Args: @@ -52,10 +61,18 @@ def calculate_nbr_acc(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_acc(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_ACC.") + result = calculate_acc(input_data, columns_names) + safe_log(logger, "debug", f"Calculated NBR_ACC: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_ACC calculation: {str(e)}") + result = None + + return result -def calculate_nbr_fbias(input_data, columns_names): +def calculate_nbr_fbias(input_data, columns_names, logger=None): """Performs calculation of NBR_FBIAS - Frequency Bias Args: @@ -69,10 +86,18 @@ def calculate_nbr_fbias(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_fbias(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_FBIAS.") + result = calculate_fbias(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_FBIAS: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_FBIAS calculation: {str(e)}") + result = None + + return result -def calculate_nbr_fmean(input_data, columns_names): +def calculate_nbr_fmean(input_data, columns_names, logger=None): """Performs calculation of NBR_FMEAN - Forecast mean Args: @@ -86,10 +111,17 @@ def calculate_nbr_fmean(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_fmean(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_FMEAN.") + result = calculate_fmean(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_FMEAN: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_FMEAN calculation: {str(e)}") + result = None + return result -def calculate_nbr_pody(input_data, columns_names): +def calculate_nbr_pody(input_data, columns_names, logger=None): """Performs calculation of NBR_PODY - Probability of detecting yes Args: @@ -103,10 +135,17 @@ def calculate_nbr_pody(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_pody(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_PODY.") + result = calculate_pody(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_PODY: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_PODY calculation: {str(e)}") + result = None + return result -def calculate_nbr_pofd(input_data, columns_names): +def calculate_nbr_pofd(input_data, columns_names, logger=None): """Performs calculation of NBR_POFD - Probability of false detection Args: @@ -120,10 +159,18 @@ def calculate_nbr_pofd(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_pofd(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_POFD.") + result = calculate_pofd(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_POFD: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_POFD calculation: {str(e)}") + result = None + return result -def calculate_nbr_podn(input_data, columns_names): + +def calculate_nbr_podn(input_data, columns_names, logger=None): """Performs calculation of NBR_PODN - Probability of false detection Args: @@ -137,10 +184,18 @@ def calculate_nbr_podn(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_podn(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_PODN.") + result = calculate_podn(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_PODN: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_PODN calculation: {str(e)}") + result = None + + return result -def calculate_nbr_far(input_data, columns_names): +def calculate_nbr_far(input_data, columns_names, logger=None): """Performs calculation of NBR_FAR - False alarm ratio Args: @@ -154,10 +209,16 @@ def calculate_nbr_far(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_far(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_FAR.") + result = calculate_far(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_FAR: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_FAR calculation: {str(e)}") + result = None -def calculate_nbr_csi(input_data, columns_names): +def calculate_nbr_csi(input_data, columns_names, logger=None): """Performs calculation of NBR_CSI - Critical Success Index Args: @@ -171,10 +232,18 @@ def calculate_nbr_csi(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_csi(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_CSI.") + result = calculate_csi(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_CSI: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_CSI calculation: {str(e)}") + result = None + return result -def calculate_nbr_gss(input_data, columns_names): + +def calculate_nbr_gss(input_data, columns_names, logger=None): """Performs calculation of NBR_GSS - Gilbert Skill Score Args: @@ -188,10 +257,18 @@ def calculate_nbr_gss(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_gss(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_GSS.") + result = calculate_gss(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_GSS: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_GSS calculation: {str(e)}") + result = None + + return result -def calculate_nbr_hk(input_data, columns_names): +def calculate_nbr_hk(input_data, columns_names, logger=None): """Performs calculation of NBR_HK - Hanssen-Kuipers Discriminant Args: @@ -205,10 +282,18 @@ def calculate_nbr_hk(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_hk(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_HK.") + result = calculate_hk(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_HK: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_HK calculation: {str(e)}") + result = None + + return result -def calculate_nbr_hss(input_data, columns_names): +def calculate_nbr_hss(input_data, columns_names, logger=None): """Performs calculation of NBR_HSS - Heidke Skil lScore Args: @@ -222,10 +307,18 @@ def calculate_nbr_hss(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_hss(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_HSS.") + result = calculate_hss(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_HSS: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_HSS calculation: {str(e)}") + result = None + return result -def calculate_nbr_odds(input_data, columns_names): + +def calculate_nbr_odds(input_data, columns_names, logger=None): """Performs calculation of NBR_ODDS - Odds Ratio Args: @@ -239,10 +332,18 @@ def calculate_nbr_odds(input_data, columns_names): or None if some of the data values are missing or invalid """ - return calculate_odds(input_data, columns_names) + try: + safe_log(logger, "debug", "Starting calculation of NBR_ODDS.") + result = calculate_odds(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated NBR_ODDS: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during NBR_ODDS calculation: {str(e)}") + result = None + + return result -def calculate_nbr_ctc_total(input_data, columns_names): +def calculate_nbr_ctc_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Neighborhood Contingency Table Statistics Args: @@ -255,5 +356,11 @@ def calculate_nbr_ctc_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + try: + safe_log(logger, "debug", "Starting calculation of Total number of matched pairs for Neighborhood Contingency Table Statistics.") + total = sum_column_data_by_name(input_data, columns_names, 'total') + result = round_half_up(total, PRECISION) + safe_log(logger, "debug", f"Calculated total: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during calculation of total: {str(e)}") + result = None \ No newline at end of file diff --git a/metcalcpy/util/pstd_statistics.py b/metcalcpy/util/pstd_statistics.py index 6a890d7a..e100b8cf 100644 --- a/metcalcpy/util/pstd_statistics.py +++ b/metcalcpy/util/pstd_statistics.py @@ -17,12 +17,13 @@ from metcalcpy.util.met_stats import get_column_index_by_name from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '2.0.1' -def calculate_pstd_brier(input_data, columns_names): +def calculate_pstd_brier(input_data, columns_names, logger=None): """Performs calculation of PSTD_BRIER - Brier Score Args: @@ -37,23 +38,29 @@ def calculate_pstd_brier(input_data, columns_names): """ warnings.filterwarnings('error') try: - df_pct_perm = _calc_common_stats(columns_names, input_data) + df_pct_perm = _calc_common_stats(columns_names, input_data, logger=logger) t_table = df_pct_perm['n_i'].sum() o_bar_table = df_pct_perm['oy_i'].sum() / t_table - o_bar = input_data[0, get_column_index_by_name(columns_names, 'o_bar')] - reliability = calc_reliability(t_table, df_pct_perm) - resolution = calc_resolution(t_table, df_pct_perm, o_bar) - uncertainty = calc_uncertainty(o_bar_table) + o_bar = input_data[0, get_column_index_by_name(columns_names, 'o_bar', logger=logger)] + reliability = calc_reliability(t_table, df_pct_perm, logger=logger) + resolution = calc_resolution(t_table, df_pct_perm, o_bar, logger=logger) + uncertainty = calc_uncertainty(o_bar_table, logger=logger) + + safe_log(logger, "debug", f"reliability: {reliability}, resolution: {resolution}, uncertainty: {uncertainty}") brier = reliability - resolution + uncertainty result = round_half_up(brier, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError, AttributeError): + + safe_log(logger, "debug", f"Calculated Brier Score: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError, AttributeError) as e: + safe_log(logger, "warning", f"Exception occurred during Brier Score calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_pstd_bss_smpl(input_data, columns_names): +def calculate_pstd_bss_smpl(input_data, columns_names, logger=None): """Performs calculation of BSS_SMPL - Brier Skill Score relative to sample climatology Args: @@ -68,24 +75,30 @@ def calculate_pstd_bss_smpl(input_data, columns_names): """ warnings.filterwarnings('error') try: - df_pct_perm = _calc_common_stats(columns_names, input_data) + df_pct_perm = _calc_common_stats(columns_names, input_data, logger=logger) t_table = df_pct_perm['n_i'].sum() o_bar_table = df_pct_perm['oy_i'].sum() / t_table - o_bar = input_data[0, get_column_index_by_name(columns_names, 'o_bar')] - reliability = calc_reliability(t_table, df_pct_perm) - resolution = calc_resolution(t_table, df_pct_perm, o_bar) - uncertainty = calc_uncertainty(o_bar_table) + o_bar = input_data[0, get_column_index_by_name(columns_names, 'o_bar', logger=logger)] + + reliability = calc_reliability(t_table, df_pct_perm, logger=logger) + resolution = calc_resolution(t_table, df_pct_perm, o_bar, logger=logger) + uncertainty = calc_uncertainty(o_bar_table, logger=logger) + + safe_log(logger, "debug", f"reliability: {reliability}, resolution: {resolution}, uncertainty: {uncertainty}") bss_smpl = (resolution - reliability) / uncertainty result = round_half_up(bss_smpl, PRECISION) + + safe_log(logger, "debug", f"Calculated Brier Skill Score (BSS_SMPL): {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during BSS_SMPL calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_pstd_baser(input_data, columns_names): +def calculate_pstd_baser(input_data, columns_names, logger=None): """Performs calculation of BASER - The Base Rate Args: input_data: 2-dimensional numpy array with data for the calculation @@ -99,19 +112,19 @@ def calculate_pstd_baser(input_data, columns_names): """ warnings.filterwarnings('error') try: - - df_pct_perm = _calc_common_stats(columns_names, input_data) + df_pct_perm = _calc_common_stats(columns_names, input_data, logger=logger) t_table = df_pct_perm['n_i'].sum() baser = df_pct_perm['oy_i'].sum() / t_table result = round_half_up(baser, PRECISION) - - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated BASER: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during BASER calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_pstd_reliability(input_data, columns_names): +def calculate_pstd_reliability(input_data, columns_names, logger=None): """Performs calculation of RELIABILITY - Reliability Args: @@ -126,19 +139,20 @@ def calculate_pstd_reliability(input_data, columns_names): """ warnings.filterwarnings('error') try: - df_pct_perm = _calc_common_stats(columns_names, input_data) + df_pct_perm = _calc_common_stats(columns_names, input_data, logger=logger) t_table = df_pct_perm['n_i'].sum() - - reliability = calc_reliability(t_table, df_pct_perm) + reliability = calc_reliability(t_table, df_pct_perm, logger=logger) result = round_half_up(reliability, PRECISION) + safe_log(logger, "debug", f"Calculated RELIABILITY: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during RELIABILITY calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_pstd_resolution(input_data, columns_names): +def calculate_pstd_resolution(input_data, columns_names, logger=None): """Performs calculation of RESOLUTION - Resolution Args: @@ -153,18 +167,20 @@ def calculate_pstd_resolution(input_data, columns_names): """ warnings.filterwarnings('error') try: - df_pct_perm = _calc_common_stats(columns_names, input_data) - o_bar = input_data[0, get_column_index_by_name(columns_names, 'o_bar')] + df_pct_perm = _calc_common_stats(columns_names, input_data, logger=logger) + o_bar = input_data[0, get_column_index_by_name(columns_names, 'o_bar', logger=logger)] t_table = df_pct_perm['n_i'].sum() - resolution = calc_resolution(t_table, df_pct_perm, o_bar) + resolution = calc_resolution(t_table, df_pct_perm, o_bar, logger=logger) result = round_half_up(resolution, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated RESOLUTION: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during RESOLUTION calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_pstd_uncertainty(input_data, columns_names): +def calculate_pstd_uncertainty(input_data, columns_names, logger=None): """Performs calculation of UNCERTAINTY - Uncertainty Args: @@ -179,20 +195,21 @@ def calculate_pstd_uncertainty(input_data, columns_names): """ warnings.filterwarnings('error') try: - df_pct_perm = _calc_common_stats(columns_names, input_data) + df_pct_perm = _calc_common_stats(columns_names, input_data, logger=logger) t_table = df_pct_perm['n_i'].sum() o_bar_table = df_pct_perm['oy_i'].sum() / t_table - - uncertainty = calc_uncertainty(o_bar_table) + uncertainty = calc_uncertainty(o_bar_table, logger=logger) result = round_half_up(uncertainty, PRECISION) + safe_log(logger, "debug", f"Calculated UNCERTAINTY: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during UNCERTAINTY calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_pstd_calibration(input_data, columns_names): +def calculate_pstd_calibration(input_data, columns_names, logger=None): """Performs calculation of calibration Args: @@ -207,18 +224,20 @@ def calculate_pstd_calibration(input_data, columns_names): """ warnings.filterwarnings('error') try: - oy_i = sum(input_data[:, get_column_index_by_name(columns_names, 'oy_i')]) - n_i = calculate_pstd_ni(input_data, columns_names) + oy_i = sum(input_data[:, get_column_index_by_name(columns_names, 'oy_i', logger=logger)]) + n_i = calculate_pstd_ni(input_data, columns_names, logger=logger) calibration = oy_i / n_i result = round_half_up(calibration, PRECISION) + safe_log(logger, "debug", f"Calculated calibration: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during calibration calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_pstd_ni(input_data, columns_names): +def calculate_pstd_ni(input_data, columns_names, logger=None): """Performs calculation of ni - Uncertainty Args: @@ -233,18 +252,20 @@ def calculate_pstd_ni(input_data, columns_names): """ warnings.filterwarnings('error') try: - oy_i = sum(input_data[:, get_column_index_by_name(columns_names, 'oy_i')]) - on_i = sum(input_data[:, get_column_index_by_name(columns_names, 'on_i')]) + oy_i = sum(input_data[:, get_column_index_by_name(columns_names, 'oy_i', logger=logger)]) + on_i = sum(input_data[:, get_column_index_by_name(columns_names, 'on_i', logger=logger)]) n_i = oy_i + on_i result = round_half_up(n_i, PRECISION) + safe_log(logger, "debug", f"Calculated ni: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during ni calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_pstd_roc_auc(input_data, columns_names): +def calculate_pstd_roc_auc(input_data, columns_names, logger=None): """Performs calculation of ROC_AUC - Area under the receiver operating characteristic curve Args: @@ -259,40 +280,56 @@ def calculate_pstd_roc_auc(input_data, columns_names): """ warnings.filterwarnings('error') - df_pct_perm = _calc_common_stats(columns_names, input_data) - roc = _calc_pct_roc(df_pct_perm) - # add 1st and last rows - final_roc = pd.DataFrame( - {'thresh': 0, 'n11': 0, 'n10': 0, 'n01': 0, 'n00': 0, 'pody': 1, 'pofd': 1}, - index=[0]) - final_roc = pd.concat([final_roc, roc]) - final_roc = pd.concat([final_roc, - pd.DataFrame( - {'thresh': 0, 'n11': 0, 'n10': 0, 'n01': 0, 'n00': 0, 'pody': 0, 'pofd': 0}, - index=[0]) ]) - - final_roc.reset_index(inplace=True, drop=True) - roc_auc = 0 - for index, row in final_roc.iterrows(): - if index != 0: - roc_auc = roc_auc + 0.5 * (final_roc.iloc[index - 1]['pody'] + row.pody) \ - * (final_roc.iloc[index - 1]['pofd'] - row.pofd) - - result = round_half_up(roc_auc, PRECISION) + try: + df_pct_perm = _calc_common_stats(columns_names, input_data, logger=logger) + roc = _calc_pct_roc(df_pct_perm, logger=logger) + + # Add first and last rows + final_roc = pd.DataFrame( + {'thresh': 0, 'n11': 0, 'n10': 0, 'n01': 0, 'n00': 0, 'pody': 1, 'pofd': 1}, + index=[0]) + final_roc = pd.concat([final_roc, roc]) + final_roc = pd.concat([final_roc, + pd.DataFrame( + {'thresh': 0, 'n11': 0, 'n10': 0, 'n01': 0, 'n00': 0, 'pody': 0, 'pofd': 0}, + index=[0]) ]) + + final_roc.reset_index(inplace=True, drop=True) + safe_log(logger, "debug", "Prepared final ROC curve with added boundary points.") + + roc_auc = 0 + for index, row in final_roc.iterrows(): + if index != 0: + increment = 0.5 * (final_roc.iloc[index - 1]['pody'] + row.pody) \ + * (final_roc.iloc[index - 1]['pofd'] - row.pofd) + roc_auc += increment + safe_log(logger, "debug", f"Step {index}: Added area increment {increment} to ROC_AUC, current value: {roc_auc}.") + + result = round_half_up(roc_auc, PRECISION) + safe_log(logger, "debug", f"Final calculated ROC_AUC: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError, AttributeError) as e: + safe_log(logger, "warning", f"Exception occurred during ROC_AUC calculation: {str(e)}") + result = None return result -def calc_uncertainty(o_bar_table): +def calc_uncertainty(o_bar_table, logger=None): """Performs calculation of uncertainty Args: o_bar_table Returns: uncertainty """ - uncertainty = o_bar_table * (1 - o_bar_table) - return uncertainty + try: + uncertainty = o_bar_table * (1 - o_bar_table) + safe_log(logger, "debug", f"Calculated uncertainty: {uncertainty}") + return uncertainty + except (TypeError, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during uncertainty calculation: {str(e)}") + return None -def calc_resolution(t_table, df_pct_perm, o_bar): +def calc_resolution(t_table, df_pct_perm, o_bar, logger=None): """Performs calculation of resolution Args: t_table df_pct_perm @@ -300,13 +337,17 @@ def calc_resolution(t_table, df_pct_perm, o_bar): Returns: resolution """ - resolution = sum([row.n_i * (row.o_bar_i - o_bar) * (row.o_bar_i - o_bar) - for index, row in df_pct_perm.iterrows()]) \ - / t_table - return resolution + try: + resolution = sum([row.n_i * (row.o_bar_i - o_bar) ** 2 + for index, row in df_pct_perm.iterrows()]) / t_table + safe_log(logger, "debug", f"Calculated resolution: {resolution}") + return resolution + except (TypeError, ZeroDivisionError, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during resolution calculation: {str(e)}") + return None -def calc_reliability(t_table, df_pct_perm): +def calc_reliability(t_table, df_pct_perm, logger=None): """Performs calculation of reliability Args: t_table df_pct_perm @@ -314,13 +355,17 @@ def calc_reliability(t_table, df_pct_perm): Returns: reliability """ - reliability = sum([row.n_i * (row.thresh_i - row.o_bar_i) * (row.thresh_i - row.o_bar_i) - for index, row in df_pct_perm.iterrows()]) \ - / t_table - return reliability + try: + reliability = sum([row.n_i * (row.thresh_i - row.o_bar_i) ** 2 + for index, row in df_pct_perm.iterrows()]) / t_table + safe_log(logger, "debug", f"Calculated reliability: {reliability}") + return reliability + except (TypeError, ZeroDivisionError, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during reliability calculation: {str(e)}") + return None -def _calc_common_stats(columns_names, input_data): +def _calc_common_stats(columns_names, input_data, logger=None): """ Creates a data frame to hold the aggregated contingency table and ROC data Args: input_data: 2-dimensional numpy array with data for the calculation @@ -336,34 +381,50 @@ def _calc_common_stats(columns_names, input_data): - n_i - o_bar_i """ - pct_perm = {'thresh_i': [], 'oy_i': [], 'on_i': []} - for column in columns_names: - index = get_column_index_by_name(columns_names, column) - - if "oy_i" in column: - sum_val = sum_column_data_by_name(input_data, columns_names, column) - pct_perm['oy_i'].append(sum_val) - elif "on_i" in column: - sum_val = sum_column_data_by_name(input_data, columns_names, column) - pct_perm['on_i'].append(sum_val) - elif 'thresh_i' in column: - pct_perm['thresh_i'].append(input_data[0, index]) - # calculate vectors and constants to use below - df_pct_perm = pd.DataFrame(pct_perm) - df_pct_perm.reset_index(inplace=True, drop=True) - - n_i = [row.oy_i + row.on_i for index, row in df_pct_perm.iterrows()] - df_pct_perm['n_i'] = n_i - - # use only records with n_i != 0 - df_pct_perm = df_pct_perm[df_pct_perm['n_i'] != 0] - o_bar_i = [row.oy_i / row.n_i for index, row in df_pct_perm.iterrows()] - calibration_i = [row.oy_i / row.n_i for index, row in df_pct_perm.iterrows()] - df_pct_perm['o_bar_i'] = o_bar_i - return df_pct_perm - - -def _calc_pct_roc(data): + try: + pct_perm = {'thresh_i': [], 'oy_i': [], 'on_i': []} + + for column in columns_names: + index = get_column_index_by_name(columns_names, column, logger=logger) + + if "oy_i" in column: + sum_val = sum_column_data_by_name(input_data, columns_names, column) + pct_perm['oy_i'].append(sum_val) + safe_log(logger, "debug", f"Aggregated oy_i for {column}: {sum_val}") + + elif "on_i" in column: + sum_val = sum_column_data_by_name(input_data, columns_names, column) + pct_perm['on_i'].append(sum_val) + safe_log(logger, "debug", f"Aggregated on_i for {column}: {sum_val}") + + elif 'thresh_i' in column: + thresh_val = input_data[0, index] + pct_perm['thresh_i'].append(thresh_val) + safe_log(logger, "debug", f"Threshold value for {column}: {thresh_val}") + + # Create DataFrame + df_pct_perm = pd.DataFrame(pct_perm) + df_pct_perm.reset_index(inplace=True, drop=True) + + # Calculate n_i and o_bar_i + df_pct_perm['n_i'] = [row.oy_i + row.on_i for index, row in df_pct_perm.iterrows()] + safe_log(logger, "debug", "Calculated n_i for all rows.") + + # Filter out rows where n_i is 0 + df_pct_perm = df_pct_perm[df_pct_perm['n_i'] != 0] + safe_log(logger, "debug", "Filtered out rows with n_i = 0.") + + df_pct_perm['o_bar_i'] = [row.oy_i / row.n_i for index, row in df_pct_perm.iterrows()] + safe_log(logger, "debug", "Calculated o_bar_i for all rows.") + + return df_pct_perm + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during common stats calculation: {str(e)}") + return None + + +def _calc_pct_roc(data, logger=None): """ Creates a data frame to hold the aggregated contingency table and ROC data Args: data: pandas data frame with pstd data and column names: @@ -384,33 +445,46 @@ def _calc_pct_roc(data): - pofd """ # create a data frame to hold the aggregated contingency table and ROC data - list_thresh = np.unique(np.sort(data['thresh_i'].to_numpy())) - df_roc = pd.DataFrame( - {'thresh': list_thresh, 'n11': None, 'n10': None, - 'n01': None, 'n00': None, 'pody': None, 'pofd': None}) + try: + safe_log(logger, "debug", "Starting calculation of ROC data.") + + # Create a DataFrame to hold the aggregated contingency table and ROC data + list_thresh = np.unique(np.sort(data['thresh_i'].to_numpy())) + safe_log(logger, "debug", f"Thresholds identified: {list_thresh}") + + df_roc = pd.DataFrame( + {'thresh': list_thresh, 'n11': None, 'n10': None, + 'n01': None, 'n00': None, 'pody': None, 'pofd': None}) - # build the ROC contingency data table - for thresh in list_thresh: - is_bigger = data['thresh_i'] > thresh - # use df_roc.loc rather than df_roc.at in pandas versions above 1.2.3 - df_roc.loc[df_roc.index[df_roc["thresh"] == thresh], 'n11'] = sum(data[is_bigger]['oy_i']) - df_roc.loc[df_roc.index[df_roc["thresh"] == thresh], 'n10'] = sum(data[is_bigger]['on_i']) + # Build the ROC contingency data table + for thresh in list_thresh: + safe_log(logger, "debug", f"Processing threshold: {thresh}") - is_less = data['thresh_i'] <= thresh - # use df_roc.loc rather than df_roc.at in pandas versions above 1.2.3 - df_roc.loc[df_roc.index[df_roc["thresh"] == thresh], 'n01'] = sum(data[is_less]['oy_i']) - df_roc.loc[df_roc.index[df_roc["thresh"] == thresh], 'n00'] = sum(data[is_less]['on_i']) + is_bigger = data['thresh_i'] > thresh + df_roc.loc[df_roc.index[df_roc["thresh"] == thresh], 'n11'] = sum(data[is_bigger]['oy_i']) + df_roc.loc[df_roc.index[df_roc["thresh"] == thresh], 'n10'] = sum(data[is_bigger]['on_i']) + safe_log(logger, "debug", f"Calculated n11 and n10 for threshold {thresh}.") - df_roc.reset_index(inplace=True, drop=True) + is_less = data['thresh_i'] <= thresh + df_roc.loc[df_roc.index[df_roc["thresh"] == thresh], 'n01'] = sum(data[is_less]['oy_i']) + df_roc.loc[df_roc.index[df_roc["thresh"] == thresh], 'n00'] = sum(data[is_less]['on_i']) + safe_log(logger, "debug", f"Calculated n01 and n00 for threshold {thresh}.") - # generate the pody and pofd scores from the contingency tables - df_roc['pody'] = [row.n11 / (row.n11 + row.n01) for index, row in df_roc.iterrows()] - df_roc['pofd'] = [row.n10 / (row.n10 + row.n00) for index, row in df_roc.iterrows()] + df_roc.reset_index(inplace=True, drop=True) - return df_roc + # Generate the pody and pofd scores from the contingency tables + df_roc['pody'] = [row.n11 / (row.n11 + row.n01) for index, row in df_roc.iterrows()] + df_roc['pofd'] = [row.n10 / (row.n10 + row.n00) for index, row in df_roc.iterrows()] + safe_log(logger, "debug", "Calculated pody and pofd scores.") + return df_roc + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during ROC calculation: {str(e)}") + return None -def calculate_pct_total(input_data, columns_names): + +def calculate_pct_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Contingency Table Counts for Probabilistic forecasts Args: @@ -423,5 +497,17 @@ def calculate_pct_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + try: + + # Summing up the 'total' column data + total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"Total matched pairs calculated: {total}") + + result = round_half_up(total, PRECISION) + safe_log(logger, "debug", f"Result after rounding: {result}") + + return result + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during total matched pairs calculation: {str(e)}") + return None \ No newline at end of file diff --git a/metcalcpy/util/read_env_vars_in_config.py b/metcalcpy/util/read_env_vars_in_config.py index c25c0f2c..e22ebaf1 100644 --- a/metcalcpy/util/read_env_vars_in_config.py +++ b/metcalcpy/util/read_env_vars_in_config.py @@ -11,9 +11,9 @@ import os import re import yaml +from metcalcpy.util.safe_log import safe_log - -def parse_config(path=None, data=None, tag='!ENV'): +def parse_config(path=None, data=None, tag='!ENV',logger=None): """ Load a yaml configuration file and resolve any environment variables The environment variables must have !ENV before them and be in this format @@ -48,6 +48,7 @@ def constructor_env_variables(loader, node): variable """ value = loader.construct_scalar(node) + safe_log(logger, "debug", f"Processing value: {value}") match = pattern.findall(value) # to find all env variables in line if match: full_value = value @@ -55,15 +56,20 @@ def constructor_env_variables(loader, node): full_value = full_value.replace( f'${{{g}}}', os.environ.get(g, g) ) + safe_log(logger, "debug", f"Replaced {g} with {full_value}") return full_value return value loader.add_constructor(tag, constructor_env_variables) if path: + safe_log(logger, "debug", f"Loading YAML configuration from path: {path}") with open(path) as conf_data: return yaml.load(conf_data, Loader=loader) elif data: + safe_log(logger, "debug", "Loading YAML configuration from provided data stream.") return yaml.load(data, Loader=loader) else: raise ValueError('Either a path or data should be defined as input') + + safe_log(logger, "debug", "YAML configuration loaded and processed successfully.") \ No newline at end of file diff --git a/metcalcpy/util/read_file.py b/metcalcpy/util/read_file.py index 370c9068..491e2ee6 100644 --- a/metcalcpy/util/read_file.py +++ b/metcalcpy/util/read_file.py @@ -27,10 +27,11 @@ from METdatadb import METdbLoad as dbload from METdatadb.METdbLoad.ush import read_data_files from METdatadb.METdbLoad.ush.read_load_xml import XmlLoadFile +from metcalcpy.util.safe_log import safe_log class ReadMETOutput: - def __init__(self): + def __init__(self, logger=None): """ Creates a output reader with a list of files to read """ self.flags = {} @@ -49,6 +50,7 @@ def __init__(self): self.flags['drop_indexes'] = False self.flags['apply_indexes'] = False self.flags['load_xml'] = True + self.logger = logger def readYAMLConfig(self,configFile): """ Returns a file or list of files @@ -59,14 +61,30 @@ def readYAMLConfig(self,configFile): Returns: returns a list containing a single or multiple file names including path """ - # Retrieve the contents of a YAML custom config file to over-ride - # or augment settings defined by the default config file. - #Use a configure file parser that handles environment variables - files_dict = parse_config(configFile) + logger = self.logger + try: + # Retrieve the contents of a YAML custom config file to override + # or augment settings defined by the default config file. + # Use a config file parser that handles environment variables. + files_dict = parse_config(configFile, logger=logger) - #parse_config returns a dictionary, read_data_files wants a list - files = files_dict['files'] - return files + if files_dict is None: + safe_log(logger, "error", "Failed to parse the YAML configuration. 'files_dict' is None.") + return [] + + # parse_config returns a dictionary, read_data_files expects a list + files = files_dict.get('files', []) + + if not files: + safe_log(logger, "warning", "No 'files' entry found in the YAML configuration.") + else: + safe_log(logger, "debug", f"Files retrieved from YAML configuration: {files}") + + return files + + except Exception as e: + safe_log(logger, "error", f"An error occurred while reading the YAML configuration: {str(e)}") + return [] def readXMLConfig(self,configFile): """ Returns a file or list of files @@ -75,15 +93,26 @@ def readXMLConfig(self,configFile): Returns: returns a list containg a single or multiple file names including path """ - - # Retrieve the contents of an XML custom config file to over-ride - # or augment settings defined by the default config file. - # Uses XmlLoadFile from METdatadb - - XML_LOADFILE = XmlLoadFile(configFile) - XML_LOADFILE.read_xml() - - return XML_LOADFILE.load_files + logger = self.logger + safe_log(logger, "debug", f"Attempting to read XML configuration from file: {configFile}") + + try: + # Retrieve the contents of an XML custom config file to override + # or augment settings defined by the default config file. + # Uses XmlLoadFile from METdatadb. + XML_LOADFILE = XmlLoadFile(configFile) + + safe_log(logger, "debug", "Reading XML file.") + XML_LOADFILE.read_xml() + + files = XML_LOADFILE.load_files() + safe_log(logger, "debug", f"Files retrieved from XML configuration: {files}") + + return files + + except Exception as e: + safe_log(logger, "error", f"An error occurred while reading the XML configuration: {str(e)}") + return [] def readData(self,files_from_config): """ @@ -92,19 +121,29 @@ def readData(self,files_from_config): Returns: a pandas DataFrame containing MET output contents """ + logger = self.logger + safe_log(logger, "debug", f"Starting to read data from the files: {files_from_config}") + + try: + # Initialize the ReadDataFiles class instance + file_data = read_data_files.ReadDataFiles() - file_data = read_data_files.ReadDataFiles() + # Read in the data files, with options specified by XML flags + # Set load_flags and line_types empty so that everything is read + line_types = [] + safe_log(logger, "debug", f"Reading data with flags: {self.flags} and line_types: {line_types}") + + file_data.read_data(self.flags, files_from_config, line_types) - # read in the data files, with options specified by XML flags - #set load_flags and line_types empty so that everything is read - line_types = [] - file_data.read_data(self.flags, - files_from_config, - line_types) + # Retrieve the data as a pandas DataFrame + df = file_data.stat_data + safe_log(logger, "debug", f"Data reading completed. DataFrame shape: {df.shape}") + return df - df = file_data.stat_data - return df + except Exception as e: + safe_log(logger, "error", f"An error occurred while reading data: {str(e)}") + return pd.DataFrame() # Return an empty DataFrame in case of error def main(): """ diff --git a/metcalcpy/util/rps_statistics.py b/metcalcpy/util/rps_statistics.py index 64e23703..5ffaf2b0 100644 --- a/metcalcpy/util/rps_statistics.py +++ b/metcalcpy/util/rps_statistics.py @@ -14,12 +14,13 @@ import warnings from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_rps(input_data, columns_names, aggregation=False): +def calculate_rps(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of RPS - Ranked Probability Score Args: @@ -35,16 +36,26 @@ def calculate_rps(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting RPS calculation") + + # Retrieve the total number of values, considering aggregation if specified total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values calculated: {total}") + + # Calculate the Ranked Probability Score rps = sum_column_data_by_name(input_data, columns_names, 'rps') / total result = round_half_up(rps, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + safe_log(logger, "debug", f"RPS calculation completed successfully. Result: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during RPS calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_rps_comp(input_data, columns_names, aggregation=False): +def calculate_rps_comp(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of RPS_COMP - Complement of the Ranked Probability Score It is computed simply as RPS_COMP = 1 - RPS @@ -61,16 +72,26 @@ def calculate_rps_comp(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting RPS_COMP calculation") + + # Retrieve the total number of values, considering aggregation if specified total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values calculated: {total}") + + # Calculate the Complement of the Ranked Probability Score (RPS_COMP) rps_comp = sum_column_data_by_name(input_data, columns_names, 'rps_comp') / total result = round_half_up(rps_comp, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + safe_log(logger, "debug", f"RPS_COMP calculation completed successfully. Result: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during RPS_COMP calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_rpss(input_data, columns_names, aggregation=False): +def calculate_rpss(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of RPSS - Args: @@ -86,18 +107,33 @@ def calculate_rpss(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting RPSS calculation") + + # Retrieve the total number of values, considering aggregation if specified total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values calculated: {total}") + + # Calculate the Ranked Probability Score (RPS) rps = sum_column_data_by_name(input_data, columns_names, 'rps') / total + safe_log(logger, "debug", f"RPS calculated: {rps}") + + # Calculate the climatological Ranked Probability Score (RPS_CLIMO) rps_climo = sum_column_data_by_name(input_data, columns_names, 'rps_climo') / total + safe_log(logger, "debug", f"RPS_CLIMO calculated: {rps_climo}") + + # Calculate the Ranked Probability Skill Score (RPSS) rpss = 1 - rps / rps_climo result = round_half_up(rpss, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + safe_log(logger, "debug", f"RPSS calculation completed successfully. Result: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during RPSS calculation: {str(e)}") result = None - warnings.filterwarnings('ignore') return result -def calculate_rps_total(input_data, columns_names): +def calculate_rps_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Ranked Probability Score Statistics Args: @@ -110,5 +146,16 @@ def calculate_rps_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + try: + safe_log(logger, "debug", "Starting calculation of total matched pairs for RPS statistics") + + # Calculate the total number of matched pairs + total = sum_column_data_by_name(input_data, columns_names, 'total') + result = round_half_up(total, PRECISION) + + safe_log(logger, "debug", f"Total matched pairs calculated successfully. Result: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during RPS total calculation: {str(e)}") + result = None + + return result \ No newline at end of file diff --git a/metcalcpy/util/safe_log.py b/metcalcpy/util/safe_log.py new file mode 100644 index 00000000..6251aa30 --- /dev/null +++ b/metcalcpy/util/safe_log.py @@ -0,0 +1,22 @@ +# ============================* + # ** Copyright UCAR (c) 2020 + # ** University Corporation for Atmospheric Research (UCAR) + # ** National Center for Atmospheric Research (NCAR) + # ** Research Applications Lab (RAL) + # ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA + # ============================* + + +def safe_log(logger, log_level, message): + """ + Safely logs a message using the provided logger and log level. + + Args: + logger (logging.Logger): The logger object. If None, the message will not be logged. + log_level (str): The logging level to use (e.g., "info", "debug"). + message (str): The message to log. + """ + if logger: + log_method = getattr(logger, log_level, None) + if callable(log_method): + log_method(message) \ No newline at end of file diff --git a/metcalcpy/util/sal1l2_statistics.py b/metcalcpy/util/sal1l2_statistics.py index 6a7913d1..51c140ea 100644 --- a/metcalcpy/util/sal1l2_statistics.py +++ b/metcalcpy/util/sal1l2_statistics.py @@ -14,12 +14,13 @@ import warnings import numpy as np from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_anom_corr(input_data, columns_names, aggregation=False): +def calculate_anom_corr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ANOM_CORR - The Anomaly Correlation including normal confidence limits @@ -36,28 +37,42 @@ def calculate_anom_corr(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of ANOM_CORR") + total = get_total_values(input_data, columns_names, aggregation) ffbar = sum_column_data_by_name(input_data, columns_names, 'ffabar') / total fbar = sum_column_data_by_name(input_data, columns_names, 'fabar') / total oobar = sum_column_data_by_name(input_data, columns_names, 'ooabar') / total obar = sum_column_data_by_name(input_data, columns_names, 'oabar') / total fobar = sum_column_data_by_name(input_data, columns_names, 'foabar') / total + + safe_log(logger, "debug", "Intermediate values calculated successfully") + v = (total ** 2 * ffbar - total ** 2 * fbar ** 2) \ * (total ** 2 * oobar - total ** 2 * obar ** 2) + if v <= 0: + safe_log(logger, "warning", "Variance calculation resulted in a non-positive value") return None + anom_corr = (total ** 2 * fobar - total ** 2 * fbar * obar) / np.sqrt(v) + if anom_corr > 1: + safe_log(logger, "warning", f"Anomaly correlation exceeded 1: {anom_corr}") anom_corr = None else: anom_corr = round_half_up(anom_corr, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + safe_log(logger, "debug", f"ANOM_CORR calculated successfully: {anom_corr}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during ANOM_CORR calculation: {str(e)}") anom_corr = None warnings.filterwarnings('ignore') return anom_corr -def calculate_anom_corr_raw(input_data, columns_names, aggregation=False): +def calculate_anom_corr_raw(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ANOM_CORR_RAW - The Uncentered Anomaly Correlation including normal confidence limits @@ -74,27 +89,42 @@ def calculate_anom_corr_raw(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of ANOM_CORR_RAW") + total = get_total_values(input_data, columns_names, aggregation) ffbar = sum_column_data_by_name(input_data, columns_names, 'ffabar') / total oobar = sum_column_data_by_name(input_data, columns_names, 'ooabar') / total fobar = sum_column_data_by_name(input_data, columns_names, 'foabar') / total + + safe_log(logger, "debug", "Intermediate values calculated successfully") + v = ffbar * oobar + if v < 0: + safe_log(logger, "warning", "Variance calculation resulted in a negative value") return None + anom_corr_raw = fobar / np.sqrt(v) + if anom_corr_raw > 1: + safe_log(logger, "warning", f"Anomaly correlation raw exceeded 1: {anom_corr_raw}") anom_corr_raw = 1 elif anom_corr_raw < -1: + safe_log(logger, "warning", f"Anomaly correlation raw below -1: {anom_corr_raw}") anom_corr_raw = -1 anom_corr_raw = round_half_up(anom_corr_raw, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + safe_log(logger, "debug", f"ANOM_CORR_RAW calculated successfully: {anom_corr_raw}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during ANOM_CORR_RAW calculation: {str(e)}") anom_corr_raw = None warnings.filterwarnings('ignore') return anom_corr_raw -def calculate_rmsfa(input_data, columns_names, aggregation=False): +def calculate_rmsfa(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of RMSFA - Root mean squared forecast anomaly (f-c) including normal confidence limits @@ -111,20 +141,30 @@ def calculate_rmsfa(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of RMSFA") + total = get_total_values(input_data, columns_names, aggregation) ffbar = sum_column_data_by_name(input_data, columns_names, 'ffabar') / total - if ffbar is None or ffbar < 0: + + if ffbar is None: + safe_log(logger, "warning", "FFBAR is None, cannot calculate RMSFA") + result = None + elif ffbar < 0: + safe_log(logger, "warning", f"FFBAR is negative: {ffbar}, cannot calculate RMSFA") result = None else: result = np.sqrt(ffbar) result = round_half_up(result, PRECISION) - except (TypeError, Warning): + safe_log(logger, "debug", f"RMSFA calculated successfully: {result}") + + except (TypeError, Warning) as e: + safe_log(logger, "warning", f"Exception occurred during RMSFA calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_rmsoa(input_data, columns_names, aggregation=False): +def calculate_rmsoa(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of RMSOA - Root mean squared observation anomaly (o-c) including normal confidence limits @@ -141,14 +181,24 @@ def calculate_rmsoa(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of RMSOA") + total = get_total_values(input_data, columns_names, aggregation) oobar = sum_column_data_by_name(input_data, columns_names, 'ooabar') / total - if oobar is None or oobar < 0: + + if oobar is None: + safe_log(logger, "warning", "OOBAR is None, cannot calculate RMSOA") + result = None + elif oobar < 0: + safe_log(logger, "warning", f"OOBAR is negative: {oobar}, cannot calculate RMSOA") result = None else: result = np.sqrt(oobar) result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"RMSOA calculated successfully: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during RMSOA calculation: {str(e)}") result = None warnings.filterwarnings('ignore') - return result + return result \ No newline at end of file diff --git a/metcalcpy/util/sl1l2_statistics.py b/metcalcpy/util/sl1l2_statistics.py index 12508b45..b2832a94 100644 --- a/metcalcpy/util/sl1l2_statistics.py +++ b/metcalcpy/util/sl1l2_statistics.py @@ -14,12 +14,13 @@ import warnings import numpy as np from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_fbar(input_data, columns_names, aggregation=False): +def calculate_fbar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of FBAR - Forecast mean Args: @@ -35,16 +36,25 @@ def calculate_fbar(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of FBAR") + total = get_total_values(input_data, columns_names, aggregation) result = sum_column_data_by_name(input_data, columns_names, 'fbar') / total - result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + if result is None: + safe_log(logger, "warning", "FBAR calculation resulted in None") + else: + result = round_half_up(result, PRECISION) + safe_log(logger, "debug", f"FBAR calculated successfully: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during FBAR calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_obar(input_data, columns_names, aggregation=False): +def calculate_obar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of OBAR - Observation Mean Args: @@ -60,16 +70,25 @@ def calculate_obar(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of OBAR") + total = get_total_values(input_data, columns_names, aggregation) result = sum_column_data_by_name(input_data, columns_names, 'obar') / total - result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + if result is None: + safe_log(logger, "warning", "OBAR calculation resulted in None") + else: + result = round_half_up(result, PRECISION) + safe_log(logger, "debug", f"OBAR calculated successfully: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during OBAR calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_fstdev(input_data, columns_names, aggregation=False): +def calculate_fstdev(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of FSTDEV - Forecast standard deviation Args: @@ -85,21 +104,33 @@ def calculate_fstdev(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of FSTDEV") + total1 = 1 total = sum_column_data_by_name(input_data, columns_names, 'total') if aggregation: total1 = total + + safe_log(logger, "debug", f"Total: {total}, Total1: {total1}, Aggregation: {aggregation}") + fbar = sum_column_data_by_name(input_data, columns_names, 'fbar') / total1 ffbar = sum_column_data_by_name(input_data, columns_names, 'ffbar') / total1 - result = calculate_stddev(fbar * total, ffbar * total, total) + + safe_log(logger, "debug", f"Fbar: {fbar}, FFbar: {ffbar}") + + result = calculate_stddev(fbar * total, ffbar * total, total, logger=logger) result = round_half_up(result, PRECISION) - except (TypeError, Warning): + + safe_log(logger, "debug", f"FSTDEV calculated successfully: {result}") + + except (TypeError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during FSTDEV calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ostdev(input_data, columns_names, aggregation=False): +def calculate_ostdev(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of OSTDEV - Observation Standard Deviation Args: @@ -115,21 +146,36 @@ def calculate_ostdev(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of OSTDEV") + total1 = 1 total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"Total sum calculated: {total}") + if aggregation: total1 = total + safe_log(logger, "debug", f"Total1 value for aggregation: {total1}") + obar = sum_column_data_by_name(input_data, columns_names, 'obar') / total1 + safe_log(logger, "debug", f"Obar calculated: {obar}") + oobar = sum_column_data_by_name(input_data, columns_names, 'oobar') / total1 - result = calculate_stddev(obar * total, oobar * total, total) + safe_log(logger, "debug", f"Oobar calculated: {oobar}") + + result = calculate_stddev(obar * total, oobar * total, total, logger=logger) + safe_log(logger, "debug", f"Standard deviation calculated: {result}") + result = round_half_up(result, PRECISION) - except (TypeError, Warning): + safe_log(logger, "debug", f"Final rounded OSTDEV result: {result}") + + except (TypeError, Warning, ZeroDivisionError, ValueError) as e: + safe_log(logger, "error", f"Error occurred during OSTDEV calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_fobar(input_data, columns_names, aggregation=False): +def calculate_fobar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of FOBAR - Average product of forecast and observation Args: @@ -145,16 +191,25 @@ def calculate_fobar(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of FOBAR") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total sum calculated: {total}") + result = sum_column_data_by_name(input_data, columns_names, 'fobar') / total + safe_log(logger, "debug", f"Intermediate FOBAR result: {result}") + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Final rounded FOBAR result: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during FOBAR calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ffbar(input_data, columns_names, aggregation=False): +def calculate_ffbar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of FFBAR - Average of forecast squared Args: @@ -170,16 +225,25 @@ def calculate_ffbar(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of FFBAR") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total sum calculated: {total}") + result = sum_column_data_by_name(input_data, columns_names, 'ffbar') / total + safe_log(logger, "debug", f"Intermediate FFBAR result: {result}") + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Final rounded FFBAR result: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during FFBAR calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_oobar(input_data, columns_names, aggregation=False): +def calculate_oobar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of OOBAR - Average of observation squared Args: @@ -195,16 +259,25 @@ def calculate_oobar(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of OOBAR") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total sum calculated: {total}") + result = sum_column_data_by_name(input_data, columns_names, 'oobar') / total + safe_log(logger, "debug", f"Intermediate OOBAR result: {result}") + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Final rounded OOBAR result: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during OOBAR calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_mae(input_data, columns_names, aggregation=False): +def calculate_mae(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of MAE - Mean absolute error Args: @@ -220,16 +293,25 @@ def calculate_mae(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of MAE") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total sum calculated: {total}") + result = sum_column_data_by_name(input_data, columns_names, 'mae') / total + safe_log(logger, "debug", f"Intermediate MAE result: {result}") + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Final rounded MAE result: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during MAE calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_mbias(input_data, columns_names, aggregation=False): +def calculate_mbias(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of MBIAS - Multiplicative Bias Args: @@ -245,21 +327,33 @@ def calculate_mbias(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of MBIAS") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total sum calculated: {total}") + obar = sum_column_data_by_name(input_data, columns_names, 'obar') / total + safe_log(logger, "debug", f"Calculated OBAR (Observation Mean): {obar}") + if obar == 0: + safe_log(logger, "warning", "OBAR is zero, returning None for MBIAS") result = None else: fbar = sum_column_data_by_name(input_data, columns_names, 'fbar') / total + safe_log(logger, "debug", f"Calculated FBAR (Forecast Mean): {fbar}") + result = fbar / obar result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Final rounded MBIAS result: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during MBIAS calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_pr_corr(input_data, columns_names, aggregation=False): +def calculate_pr_corr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of PR_CORR - Pearson correlation coefficient including normal confidence limits @@ -276,29 +370,49 @@ def calculate_pr_corr(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of PR_CORR") + total1 = 1 total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"Total sum calculated: {total}") + if aggregation: total1 = total + safe_log(logger, "debug", "Aggregation is enabled, setting total1 to total") + ffbar = sum_column_data_by_name(input_data, columns_names, 'ffbar') / total1 fbar = sum_column_data_by_name(input_data, columns_names, 'fbar') / total1 oobar = sum_column_data_by_name(input_data, columns_names, 'oobar') / total1 obar = sum_column_data_by_name(input_data, columns_names, 'obar') / total1 fobar = sum_column_data_by_name(input_data, columns_names, 'fobar') / total1 + + safe_log(logger, "debug", f"Calculated values - FFBar: {ffbar}, FBar: {fbar}, OOBAR: {oobar}, OBAR: {obar}, FOBAR: {fobar}") + v = (total ** 2 * ffbar - total ** 2 * fbar ** 2) \ * (total ** 2 * oobar - total ** 2 * obar ** 2) - pr_corr = (total ** 2 * fobar - total ** 2 * fbar * obar) / np.sqrt(v) - if v <= 0 or pr_corr > 1: + + if v <= 0: + safe_log(logger, "warning", "Calculation of variance 'v' resulted in a non-positive value, returning None for PR_CORR") pr_corr = None else: - pr_corr = round_half_up(pr_corr, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + pr_corr = (total ** 2 * fobar - total ** 2 * fbar * obar) / np.sqrt(v) + safe_log(logger, "debug", f"Calculated PR_CORR before rounding: {pr_corr}") + + if pr_corr > 1: + safe_log(logger, "warning", "PR_CORR value exceeds 1, setting PR_CORR to None") + pr_corr = None + else: + pr_corr = round_half_up(pr_corr, PRECISION) + safe_log(logger, "debug", f"Final rounded PR_CORR result: {pr_corr}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during PR_CORR calculation: {str(e)}") pr_corr = None warnings.filterwarnings('ignore') return pr_corr -def calculate_fe(input_data, columns_names, aggregation=False): +def calculate_fe(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of FE - Fractional error Args: @@ -314,18 +428,30 @@ def calculate_fe(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting FE calculation") total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values calculated: {total}") + fbar = sum_column_data_by_name(input_data, columns_names, 'fbar') / total + safe_log(logger, "debug", f"fbar calculated: {fbar}") + obar = sum_column_data_by_name(input_data, columns_names, 'obar') / total + safe_log(logger, "debug", f"obar calculated: {obar}") + result = (fbar - obar) / fbar + safe_log(logger, "debug", f"Fractional error calculated: {result}") + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Final result after rounding: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during FE calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_me(input_data, columns_names, aggregation=False): +def calculate_me(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ME - Mean error, aka Additive bias Args: @@ -341,18 +467,30 @@ def calculate_me(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting ME calculation") total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values calculated: {total}") + fbar = sum_column_data_by_name(input_data, columns_names, 'fbar') / total + safe_log(logger, "debug", f"fbar calculated: {fbar}") + obar = sum_column_data_by_name(input_data, columns_names, 'obar') / total + safe_log(logger, "debug", f"obar calculated: {obar}") + result = fbar - obar + safe_log(logger, "debug", f"Mean error (ME) calculated: {result}") + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Final result after rounding: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred during ME calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_me2(input_data, columns_names, aggregation=False): +def calculate_me2(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ME2 - The square of the mean error (bias) Args: @@ -368,16 +506,27 @@ def calculate_me2(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: - me = calculate_me(input_data, columns_names, aggregation) - result = me ** 2 - result = round_half_up(result, PRECISION) - except (TypeError, Warning): + safe_log(logger, "debug", "Starting ME2 calculation") + me = calculate_me(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"ME calculated: {me}") + + if me is not None: + result = me ** 2 + safe_log(logger, "debug", f"ME squared (ME2) calculated: {result}") + + result = round_half_up(result, PRECISION) + safe_log(logger, "debug", f"Final ME2 result after rounding: {result}") + else: + safe_log(logger, "warning", "ME calculation returned None, so ME2 cannot be calculated") + result = None + except (TypeError, Warning) as e: + safe_log(logger, "warning", f"Exception occurred during ME2 calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_mse(input_data, columns_names, aggregation=False): +def calculate_mse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of MSE - Mean squared error Args: @@ -393,19 +542,31 @@ def calculate_mse(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting MSE calculation") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values calculated: {total}") + ffbar = sum_column_data_by_name(input_data, columns_names, 'ffbar') / total oobar = sum_column_data_by_name(input_data, columns_names, 'oobar') / total fobar = sum_column_data_by_name(input_data, columns_names, 'fobar') / total + + safe_log(logger, "debug", f"FFBAR: {ffbar}, OOBAR: {oobar}, FOBAR: {fobar}") + result = ffbar + oobar - 2 * fobar + safe_log(logger, "debug", f"Calculated MSE before rounding: {result}") + result = round_half_up(result, PRECISION) - except (TypeError, Warning): + safe_log(logger, "debug", f"Final MSE result after rounding: {result}") + + except (TypeError, Warning) as e: + safe_log(logger, "warning", f"Exception occurred during MSE calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_msess(input_data, columns_names, aggregation=False): +def calculate_msess(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of MSESS - The mean squared error skill score Args: @@ -421,17 +582,24 @@ def calculate_msess(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: - ostdev = calculate_ostdev(input_data, columns_names, aggregation) - mse = calculate_mse(input_data, columns_names, aggregation) + safe_log(logger, "debug", "Starting MSESS calculation") + + ostdev = calculate_ostdev(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated OSTDEV: {ostdev}") + + mse = calculate_mse(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated MSE: {mse}") result = 1.0 - mse / ostdev ** 2 result = round_half_up(result, PRECISION) + safe_log(logger, "debug", f"Final MSESS result after rounding: {result}") except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "warning", f"Exception occurred during MSESS calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_rmse(input_data, columns_names, aggregation=False): +def calculate_rmse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of RMSE - Root-mean squared error Args: @@ -447,15 +615,21 @@ def calculate_rmse(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: - result = np.sqrt(calculate_mse(input_data, columns_names, aggregation)) + safe_log(logger, "debug", "Starting RMSE calculation") + + mse = calculate_mse(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated MSE: {mse}") + result = np.sqrt(calculate_mse(input_data, columns_names, aggregation, logger=logger)) result = round_half_up(result, PRECISION) - except (TypeError, Warning): + safe_log(logger, "debug", f"Final RMSE result after rounding: {result}") + except (TypeError, Warning) as e: + safe_log(logger, "warning", f"Exception occurred during RMSE calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_si(input_data, columns_names, aggregation=False): +def calculate_si(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SI - Scatter Index Args: @@ -471,17 +645,24 @@ def calculate_si(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: - rmse = calculate_rmse(input_data, columns_names, aggregation) - obar = calculate_obar(input_data, columns_names, aggregation) + safe_log(logger, "debug", "Starting SI calculation") + + rmse = calculate_rmse(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated RMSE: {rmse}") + + obar = calculate_obar(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated OBAR: {obar}") result = rmse / obar result = round_half_up(result, PRECISION) - except (TypeError, Warning): + safe_log(logger, "debug", f"Final SI result after rounding: {result}") + except (TypeError, Warning, ZeroDivisionError) as e: + safe_log(logger, "warning", f"Exception occurred during SI calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_estdev(input_data, columns_names, aggregation=False): +def calculate_estdev(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of ESTDEV - Standard deviation of the error Args: @@ -497,18 +678,27 @@ def calculate_estdev(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting ESTDEV calculation") + total = sum_column_data_by_name(input_data, columns_names, 'total') - me = calculate_me(input_data, columns_names) - mse = calculate_mse(input_data, columns_names) - result = calculate_stddev(me * total, mse * total, total) + safe_log(logger, "debug", f"Calculated total: {total}") + + me = calculate_me(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated ME: {me}") + + mse = calculate_mse(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated MSE: {mse}") + result = calculate_stddev(me * total, mse * total, total, logger=logger) result = round_half_up(result, PRECISION) - except (TypeError, Warning): + safe_log(logger, "debug", f"Final ESTDEV result after rounding: {result}") + except (TypeError, Warning, ZeroDivisionError) as e: + safe_log(logger, "warning", f"Exception occurred during ESTDEV calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_bcmse(input_data, columns_names, aggregation=False): +def calculate_bcmse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of BCMSE - Bias-corrected mean squared error Args: @@ -524,20 +714,27 @@ def calculate_bcmse(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting BCMSE calculation") + + mse = calculate_mse(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated MSE: {mse}") - mse = calculate_mse(input_data, columns_names, aggregation) - me = calculate_me(input_data, columns_names, aggregation) + me = calculate_me(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated ME: {me}") result = mse - me ** 2 result = round_half_up(result, PRECISION) + safe_log(logger, "debug", f"Final BCMSE result after rounding: {result}") if result < 0: + safe_log(logger, "debug", "BCMSE result is negative, setting to 0.") return 0. - except (TypeError, Warning): + except (TypeError, Warning, ZeroDivisionError) as e: + safe_log(logger, "warning", f"Exception occurred during BCMSE calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_bcrmse(input_data, columns_names, aggregation=False): +def calculate_bcrmse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of BCRMSE - Bias-corrected root mean square error Args: @@ -553,15 +750,17 @@ def calculate_bcrmse(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: - result = np.sqrt(calculate_bcmse(input_data, columns_names, aggregation)) + result = np.sqrt(calculate_bcmse(input_data, columns_names, aggregation, logger=logger)) result = round_half_up(result, PRECISION) - except (TypeError, Warning): + safe_log(logger, "debug", f"Final BCRMSE result after rounding: {result}") + except (TypeError, Warning, ZeroDivisionError) as e: + safe_log(logger, "warning", f"Exception occurred during BCRMSE calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_stddev(sum_total, sum_sq, n): +def calculate_stddev(sum_total, sum_sq, n, logger=None): """Performs calculation of STDDEV - Standard deviation Args: @@ -575,15 +774,23 @@ def calculate_stddev(sum_total, sum_sq, n): or None if some of the data values are missing or invalid """ if n < 1: + safe_log(logger, "warning", f"Invalid number of observations: {n}") return None + + safe_log(logger, "debug", f"Calculating variance with sum_total: {sum_total}, sum_sq: {sum_sq}, n: {n}") v = (sum_sq - sum_total * sum_total / n) / (n - 1) + if v < 0: + safe_log(logger, "warning", f"Calculated variance is negative: {v}") return None - return np.sqrt(v) + stddev = np.sqrt(v) + safe_log(logger, "debug", f"Calculated standard deviation: {stddev}") + + return stddev -def calculate_sl1l2_total(input_data, columns_names): +def calculate_sl1l2_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Scalar Partial Sums Args: @@ -596,11 +803,18 @@ def calculate_sl1l2_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + try: + safe_log(logger, "debug", "Calculating total number of matched pairs for Scalar Partial Sums") + total = sum_column_data_by_name(input_data, columns_names, 'total') + result = round_half_up(total, PRECISION) + safe_log(logger, "debug", f"Total number of matched pairs calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating total number of matched pairs: {str(e)}") + return None -def calculate_sal1l2_total(input_data, columns_names): +def calculate_sal1l2_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Scalar Anomaly Partial Sums Args: @@ -613,5 +827,12 @@ def calculate_sal1l2_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + try: + safe_log(logger, "debug", "Calculating total number of matched pairs for Scalar Anomaly Partial Sums") + total = sum_column_data_by_name(input_data, columns_names, 'total') + result = round_half_up(total, PRECISION) + safe_log(logger, "debug", f"Total number of matched pairs calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating total number of matched pairs: {str(e)}") + return None \ No newline at end of file diff --git a/metcalcpy/util/ssvar_statistics.py b/metcalcpy/util/ssvar_statistics.py index 56cc068d..299a0ce3 100644 --- a/metcalcpy/util/ssvar_statistics.py +++ b/metcalcpy/util/ssvar_statistics.py @@ -21,12 +21,13 @@ calculate_bcmse, calculate_bcrmse, calculate_rmse, \ calculate_me2, calculate_msess from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_ssvar_fbar(input_data, columns_names, aggregation=False): +def calculate_ssvar_fbar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_FBAR - Average forecast value Args: @@ -40,10 +41,17 @@ def calculate_ssvar_fbar(input_data, columns_names, aggregation=False): calculated SSVAR_FBAR as float or None if some of the data values are missing or invalid """ - return calculate_fbar(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_FBAR - Average forecast value") + result = calculate_fbar(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_FBAR calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_FBAR: {str(e)}") + return None -def calculate_ssvar_fstdev(input_data, columns_names, aggregation=False): +def calculate_ssvar_fstdev(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_FSTDEV - Standard deviation of the error Args: @@ -57,10 +65,17 @@ def calculate_ssvar_fstdev(input_data, columns_names, aggregation=False): calculated SSVAR_FSTDEV as float or None if some of the data values are missing or invalid """ - return calculate_fstdev(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_FSTDEV - Standard deviation of the forecast") + result = calculate_fstdev(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_FSTDEV calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_FSTDEV: {str(e)}") + return None -def calculate_ssvar_obar(input_data, columns_names, aggregation=False): +def calculate_ssvar_obar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_OBAR - Average observed value Args: @@ -74,10 +89,17 @@ def calculate_ssvar_obar(input_data, columns_names, aggregation=False): calculated SSVAR_OBAR as float or None if some of the data values are missing or invalid """ - return calculate_obar(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_OBAR - Average observed value") + result = calculate_obar(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_OBAR calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_OBAR: {str(e)}") + return None -def calculate_ssvar_ostdev(input_data, columns_names, aggregation=False): +def calculate_ssvar_ostdev(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_OSTDEV - Standard deviation of the error Args: @@ -91,10 +113,17 @@ def calculate_ssvar_ostdev(input_data, columns_names, aggregation=False): calculated SSVAR_OSTDEV as float or None if some of the data values are missing or invalid """ - return calculate_ostdev(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_OSTDEV - Standard deviation of the error") + result = calculate_ostdev(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_OSTDEV calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_OSTDEV: {str(e)}") + return None -def calculate_ssvar_pr_corr(input_data, columns_names, aggregation=False): +def calculate_ssvar_pr_corr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_PR_CORR - Pearson correlation coefficient Args: @@ -108,10 +137,17 @@ def calculate_ssvar_pr_corr(input_data, columns_names, aggregation=False): calculated SSVAR_PR_CORR as float or None if some of the data values are missing or invalid """ - return calculate_pr_corr(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_PR_CORR - Pearson correlation coefficient") + result = calculate_pr_corr(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_PR_CORR calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_PR_CORR: {str(e)}") + return None -def calculate_ssvar_me(input_data, columns_names, aggregation=False): +def calculate_ssvar_me(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_ME - Mean error Args: @@ -125,10 +161,16 @@ def calculate_ssvar_me(input_data, columns_names, aggregation=False): calculated SSVAR_ME as float or None if some of the data values are missing or invalid """ - return calculate_me(input_data, columns_names, aggregation) - - -def calculate_ssvar_estdev(input_data, columns_names, aggregation=False): + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_ME - Mean error") + result = calculate_me(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_ME calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_ME: {str(e)}") + return None + +def calculate_ssvar_estdev(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_ESTDEV - Standard deviation of the error Args: @@ -142,10 +184,17 @@ def calculate_ssvar_estdev(input_data, columns_names, aggregation=False): calculated SSVAR_ESTDEV as float or None if some of the data values are missing or invalid """ - return calculate_estdev(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_ESTDEV - Standard deviation of the error") + result = calculate_estdev(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_ESTDEV calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_ESTDEV: {str(e)}") + return None -def calculate_ssvar_mse(input_data, columns_names, aggregation=False): +def calculate_ssvar_mse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_MSE - Mean squared error Args: @@ -158,10 +207,17 @@ def calculate_ssvar_mse(input_data, columns_names, aggregation=False): calculated SSVAR_MSE as float or None if some of the data values are missing or invalid """ - return calculate_mse(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_MSE - Mean squared error") + result = calculate_mse(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_MSE calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_MSE: {str(e)}") + return None -def calculate_ssvar_bcmse(input_data, columns_names, aggregation=False): +def calculate_ssvar_bcmse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_BCMSE - Bias corrected root mean squared error Args: @@ -175,10 +231,17 @@ def calculate_ssvar_bcmse(input_data, columns_names, aggregation=False): calculated SSVAR_BCMSE as float or None if some of the data values are missing or invalid """ - return calculate_bcmse(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_BCMSE - Bias corrected root mean squared error") + result = calculate_bcmse(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_BCMSE calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_BCMSE: {str(e)}") + return None -def calculate_ssvar_bcrmse(input_data, columns_names, aggregation=False): +def calculate_ssvar_bcrmse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_BCRMSE - Bias corrected root mean squared error Args: @@ -192,10 +255,17 @@ def calculate_ssvar_bcrmse(input_data, columns_names, aggregation=False): calculated SSVAR_BCRMSE as float or None if some of the data values are missing or invalid """ - return calculate_bcrmse(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_BCRMSE - Bias corrected root mean squared error") + result = calculate_bcrmse(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_BCRMSE calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_BCRMSE: {str(e)}") + return None -def calculate_ssvar_rmse(input_data, columns_names, aggregation=False): +def calculate_ssvar_rmse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_RMSE - Root mean squared error Args: @@ -209,10 +279,17 @@ def calculate_ssvar_rmse(input_data, columns_names, aggregation=False): calculated SSVAR_RMSE as float or None if some of the data values are missing or invalid """ - return calculate_rmse(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_RMSE - Root mean squared error") + result = calculate_rmse(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_RMSE calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_RMSE: {str(e)}") + return None -def calculate_ssvar_anom_corr(input_data, columns_names, aggregation=False): +def calculate_ssvar_anom_corr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_ANOM_CORR - Args: @@ -226,17 +303,29 @@ def calculate_ssvar_anom_corr(input_data, columns_names, aggregation=False): calculated SSVAR_ANOM_CORR as float or None if some of the data values are missing or invalid """ - # change the names to comply with sal1l2 names - sal1l2_columns_names = np.copy(columns_names) - sal1l2_columns_names[sal1l2_columns_names == 'ffbar'] = 'ffabar' - sal1l2_columns_names[sal1l2_columns_names == 'fbar'] = 'fabar' - sal1l2_columns_names[sal1l2_columns_names == 'oobar'] = 'ooabar' - sal1l2_columns_names[sal1l2_columns_names == 'obar'] = 'oabar' - sal1l2_columns_names[sal1l2_columns_names == 'fobar'] = 'foabar' - return calculate_anom_corr(input_data, sal1l2_columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_ANOM_CORR") + + # change the names to comply with sal1l2 names + sal1l2_columns_names = np.copy(columns_names) + sal1l2_columns_names[sal1l2_columns_names == 'ffbar'] = 'ffabar' + sal1l2_columns_names[sal1l2_columns_names == 'fbar'] = 'fabar' + sal1l2_columns_names[sal1l2_columns_names == 'oobar'] = 'ooabar' + sal1l2_columns_names[sal1l2_columns_names == 'obar'] = 'oabar' + sal1l2_columns_names[sal1l2_columns_names == 'fobar'] = 'foabar' + + safe_log(logger, "debug", f"Column names adjusted for sal1l2: {sal1l2_columns_names}") + + result = calculate_anom_corr(input_data, sal1l2_columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_ANOM_CORR calculated: {result}") + + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_ANOM_CORR: {str(e)}") + return None -def calculate_ssvar_me2(input_data, columns_names, aggregation=False): +def calculate_ssvar_me2(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_ME2 - Args: @@ -250,10 +339,19 @@ def calculate_ssvar_me2(input_data, columns_names, aggregation=False): calculated SSVAR_ME2 as float or None if some of the data values are missing or invalid """ - return calculate_me2(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_ME2") + + result = calculate_me2(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_ME2 calculated: {result}") + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_ME2: {str(e)}") + return None -def calculate_ssvar_msess(input_data, columns_names, aggregation=False): + +def calculate_ssvar_msess(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_MSESS - Args: @@ -268,10 +366,19 @@ def calculate_ssvar_msess(input_data, columns_names, aggregation=False): or None if some of the data values are missing or invalid """ - return calculate_msess(input_data, columns_names, aggregation) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_MSESS") + + result = calculate_msess(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"SSVAR_MSESS calculated: {result}") + + return result + except Exception as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_MSESS: {str(e)}") + return None -def calculate_ssvar_spread(input_data, columns_names, aggregation=False): +def calculate_ssvar_spread(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of SSVAR_SPREAD - Args: @@ -287,17 +394,23 @@ def calculate_ssvar_spread(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of SSVAR_SPREAD") + total = get_total_values(input_data, columns_names, aggregation) var_mean = sum_column_data_by_name(input_data, columns_names, 'var_mean') / total result = np.sqrt(var_mean) result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + + safe_log(logger, "debug", f"SSVAR_SPREAD calculated: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_SPREAD: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_ssvar_total(input_data, columns_names): +def calculate_ssvar_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Spread/Skill Variance Args: @@ -310,5 +423,16 @@ def calculate_ssvar_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total_orig') - return round_half_up(total, PRECISION) + try: + safe_log(logger, "debug", "Starting calculation of SSVAR_TOTAL") + + total = sum_column_data_by_name(input_data, columns_names, 'total_orig') + result = round_half_up(total, PRECISION) + + safe_log(logger, "debug", f"SSVAR_TOTAL calculated: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "warning", f"Exception occurred while calculating SSVAR_TOTAL: {str(e)}") + result = None + + return result diff --git a/metcalcpy/util/tost_paired.py b/metcalcpy/util/tost_paired.py index 5de53e2f..fedd22fc 100644 --- a/metcalcpy/util/tost_paired.py +++ b/metcalcpy/util/tost_paired.py @@ -17,6 +17,7 @@ import math from metcalcpy.util.utils import round_half_up, PRECISION +from metcalcpy.util.safe_log import safe_log CODE_TO_OUTCOME_TO_MESSAGE = { 'diff_eqv': 'statistically different from zero and statistically equivalent to zero', @@ -26,7 +27,7 @@ } -def pt(q, df, ncp=0, lower_tail=True): +def pt(q, df, ncp=0, lower_tail=True, logger=None): """ Calculates the cumulative of the t-distribution @@ -46,10 +47,11 @@ def pt(q, df, ncp=0, lower_tail=True): result = nct.cdf(x=q, df=df, nc=ncp, loc=0, scale=1) if lower_tail is False: result = 1 - result + safe_log(logger, "debug", f"Adjusted result for upper tail: {result}") return result -def qt(p, df, ncp=0): +def qt(p, df, ncp=0, logger=None): """ Calculates the quantile function of the t-distribution @@ -65,10 +67,11 @@ def qt(p, df, ncp=0): result = t.ppf(q=p, df=df, loc=0, scale=1) else: result = nct.ppf(q=p, df=df, nc=ncp, loc=0, scale=1) + safe_log(logger, "debug", f"Calculated non-central t-distribution quantile: {result}") return result -def tost_paired(n, m1, m2, sd1, sd2, r12, low_eqbound_dz, high_eqbound_dz, alpha=None): +def tost_paired(n, m1, m2, sd1, sd2, r12, low_eqbound_dz, high_eqbound_dz, alpha=None, logger=None): """ TOST function for a dependent t-test (Cohen's dz). Based on Rscript function TOSTpaired @@ -100,86 +103,89 @@ def tost_paired(n, m1, m2, sd1, sd2, r12, low_eqbound_dz, high_eqbound_dz, alpha """ if not alpha: alpha = 0.05 + + safe_log(logger, "debug", f"Starting TOST paired analysis with alpha={alpha}, n={n}, m1={m1}, m2={m2}, " + f"sd1={sd1}, sd2={sd2}, r12={r12}, low_eqbound_dz={low_eqbound_dz}, " + f"high_eqbound_dz={high_eqbound_dz}") + if low_eqbound_dz >= high_eqbound_dz: - print( - 'WARNING: The lower bound is equal to or larger than the upper bound.' - ' Check the plot and output to see if the bounds are specified as you intended.') + safe_log(logger, "warning", 'WARNING: The lower bound is equal to or larger than the upper bound. ' + 'Check the plot and output to see if the bounds are specified as you intended.') if n < 2: - print("The sample size should be larger than 1.") + safe_log(logger, "error", "The sample size should be larger than 1.") sys.exit() if 1 <= alpha or alpha <= 0: - print("The alpha level should be a positive value between 0 and 1.") + safe_log(logger, "error", "The alpha level should be a positive value between 0 and 1.") sys.exit() + if sd1 <= 0 or sd2 <= 0: - print("The standard deviation should be a positive value.") + safe_log(logger, "error", "The standard deviation should be a positive value.") sys.exit() + if 1 < r12 or r12 < -1: - print("The correlation should be a value between -1 and 1.") + safe_log(logger, "error", "The correlation should be a value between -1 and 1.") sys.exit() - sdif = math.sqrt(sd1 * sd1 + sd2 * sd2 - 2 * r12 * sd1 * sd2) - low_eqbound = low_eqbound_dz * sdif - high_eqbound = high_eqbound_dz * sdif - se = sdif / math.sqrt(n) - t = (m1 - m2) / se - degree_f = n - 1 - - pttest = 2 * pt(abs(t), degree_f, lower_tail=False) - - t1 = ((m1 - m2) - (low_eqbound_dz * sdif)) / se - p1 = pt(t1, degree_f, lower_tail=False) - t2 = ((m1 - m2) - (high_eqbound_dz * sdif)) / se - p2 = pt(t2, degree_f, lower_tail=True) - - ll90 = ((m1 - m2) - qt(1 - alpha, degree_f) * se) - ul90 = ((m1 - m2) + qt(1 - alpha, degree_f) * se) - ptost = max(p1, p2) - - dif = (m1 - m2) - ll95 = ((m1 - m2) - qt(1 - (alpha / 2), degree_f) * se) - ul95 = ((m1 - m2) + qt(1 - (alpha / 2), degree_f) * se) - xlim_l = min(ll90, low_eqbound) - max(ul90 - ll90, high_eqbound - low_eqbound) / 10 - xlim_u = max(ul90, high_eqbound) + max(ul90 - ll90, high_eqbound - low_eqbound) / 10 - - if pttest <= alpha and ptost <= alpha: - combined_outcome = 'diff_eqv' - - if pttest < alpha and ptost > alpha: - combined_outcome = 'diff_no_eqv' - - if pttest > alpha and ptost <= alpha: - combined_outcome = 'no_diff_eqv' - - if pttest > alpha and ptost > alpha: - combined_outcome = 'no_diff_no_eqv' - - if pttest < alpha: - test_outcome = 'significant' - else: - test_outcome = 'non-significant' - - if ptost < alpha: - tost_outcome = 'significant' - else: - tost_outcome = 'non-significant' - - return { - 'dif': round_half_up(dif, PRECISION), - 't': (round_half_up(t1, PRECISION), round_half_up(t2, PRECISION)), - 'p': (round_half_up(p1, PRECISION), round_half_up(p2, PRECISION)), - 'degrees_of_freedom': round_half_up(degree_f, PRECISION), - 'ci_tost': (round_half_up(ll90, PRECISION), round_half_up(ul90, PRECISION) ), - 'ci_ttest': (round_half_up(ll95, PRECISION), round_half_up(ul95, PRECISION)), - 'eqbound': (round_half_up(low_eqbound, PRECISION), round_half_up(high_eqbound, PRECISION)), - 'xlim': (round_half_up(xlim_l, PRECISION), round_half_up(xlim_u, PRECISION)), - 'combined_outcome': combined_outcome, - 'test_outcome': test_outcome, - 'tost_outcome': tost_outcome - } - - - - - + try: + sdif = math.sqrt(sd1 * sd1 + sd2 * sd2 - 2 * r12 * sd1 * sd2) + low_eqbound = low_eqbound_dz * sdif + high_eqbound = high_eqbound_dz * sdif + se = sdif / math.sqrt(n) + t = (m1 - m2) / se + degree_f = n - 1 + + pttest = 2 * pt(abs(t), degree_f, lower_tail=False, logger=logger) + + t1 = ((m1 - m2) - (low_eqbound_dz * sdif)) / se + p1 = pt(t1, degree_f, lower_tail=False, logger=logger) + t2 = ((m1 - m2) - (high_eqbound_dz * sdif)) / se + p2 = pt(t2, degree_f, lower_tail=True, logger=logger) + + ll90 = ((m1 - m2) - qt(1 - alpha, degree_f, logger=logger) * se) + ul90 = ((m1 - m2) + qt(1 - alpha, degree_f, logger=logger) * se) + ptost = max(p1, p2) + + dif = (m1 - m2) + ll95 = ((m1 - m2) - qt(1 - (alpha / 2), degree_f, logger=logger) * se) + ul95 = ((m1 - m2) + qt(1 - (alpha / 2), degree_f, logger=logger) * se) + xlim_l = min(ll90, low_eqbound) - max(ul90 - ll90, high_eqbound - low_eqbound) / 10 + xlim_u = max(ul90, high_eqbound) + max(ul90 - ll90, high_eqbound - low_eqbound) / 10 + + combined_outcome = '' + if pttest <= alpha and ptost <= alpha: + combined_outcome = 'diff_eqv' + + if pttest < alpha and ptost > alpha: + combined_outcome = 'diff_no_eqv' + + if pttest > alpha and ptost <= alpha: + combined_outcome = 'no_diff_eqv' + + if pttest > alpha and ptost > alpha: + combined_outcome = 'no_diff_no_eqv' + + test_outcome = 'significant' if pttest < alpha else 'non-significant' + tost_outcome = 'significant' if ptost < alpha else 'non-significant' + + safe_log(logger, "info", f"TOST Paired analysis completed. Test Outcome: {test_outcome}, " + f"TOST Outcome: {tost_outcome}, Combined Outcome: {combined_outcome}") + + return { + 'dif': round_half_up(dif, PRECISION), + 't': (round_half_up(t1, PRECISION), round_half_up(t2, PRECISION)), + 'p': (round_half_up(p1, PRECISION), round_half_up(p2, PRECISION)), + 'degrees_of_freedom': round_half_up(degree_f, PRECISION), + 'ci_tost': (round_half_up(ll90, PRECISION), round_half_up(ul90, PRECISION)), + 'ci_ttest': (round_half_up(ll95, PRECISION), round_half_up(ul95, PRECISION)), + 'eqbound': (round_half_up(low_eqbound, PRECISION), round_half_up(high_eqbound, PRECISION)), + 'xlim': (round_half_up(xlim_l, PRECISION), round_half_up(xlim_u, PRECISION)), + 'combined_outcome': combined_outcome, + 'test_outcome': test_outcome, + 'tost_outcome': tost_outcome + } + + except Exception as e: + safe_log(logger, "error", f"Exception occurred during TOST Paired analysis: {str(e)}") + return None \ No newline at end of file diff --git a/metcalcpy/util/utils.py b/metcalcpy/util/utils.py index c0791c0c..9a0ff8ff 100644 --- a/metcalcpy/util/utils.py +++ b/metcalcpy/util/utils.py @@ -32,7 +32,7 @@ from metcalcpy import GROUP_SEPARATOR, DATE_TIME_REGEX from metcalcpy.event_equalize import event_equalize from metcalcpy.util.wald_wolfowitz_runs_test import runs_test - +from metcalcpy.util.safe_log import safe_log OPERATION_TO_SIGN = { 'DIFF': '-', @@ -62,11 +62,11 @@ class DerivedCurveComponent: """ Holds components and the operation for a derived series """ - def __init__(self, first_component, second_component, derived_operation): + def __init__(self, first_component, second_component, derived_operation, logger=None): self.first_component = first_component self.second_component = second_component self.derived_operation = derived_operation - + self.logger = logger def represents_int(possible_int): """Checks if the value is integer. @@ -136,7 +136,7 @@ def get_derived_curve_name(list_of_names): return f"{operation}({list_of_names[0]}{OPERATION_TO_SIGN[operation]}{list_of_names[1]})" -def calc_derived_curve_value(val1, val2, operation): +def calc_derived_curve_value(val1, val2, operation, logger=None): """Performs the operation with two numpy arrays. Operations can be 'DIFF' - difference between elements of array 1 and 2 @@ -155,9 +155,11 @@ def calc_derived_curve_value(val1, val2, operation): or None if one of arrays is None or one of the elements is None or arrays have different size """ + safe_log(logger, "debug", f"Starting operation {operation} with val1: {val1} and val2: {val2}") if val1 is None or val2 is None or None in val1 \ or None in val2 or len(val1) != len(val2): + safe_log(logger, "warning", "Input values are invalid: None detected or lengths mismatch.") return None result_val = None @@ -181,6 +183,7 @@ def calc_derived_curve_value(val1, val2, operation): corr_val, -0.001, 0.001 ) + safe_log(logger, "debug", f"Operation {operation} completed. Result: {result_val}") return result_val @@ -276,7 +279,6 @@ def sum_column_data_by_name(input_data, columns, column_name, rm_none=True): calculated SUM as float or None if all of the data values are non """ - data_array = column_data_by_name(input_data, columns, column_name) if data_array is None or np.isnan(data_array).all(): @@ -377,7 +379,7 @@ def nrow_column_data_by_name_value(input_data, columns, filters): return input_data_filtered.shape[0] -def perfect_score_adjustment(mean_stats_1, mean_stats_2, statistic, pval): +def perfect_score_adjustment(mean_stats_1, mean_stats_2, statistic, pval, logger=None): """ Adjusts the perfect score depending on the statistic Args: @@ -422,17 +424,21 @@ def perfect_score_adjustment(mean_stats_1, mean_stats_2, statistic, pval): if statistic.upper() in na_perf_score_stats: result = None + safe_log(logger, "debug", f"Statistic {statistic} falls under NA perfect score stats. Returning None.") elif statistic.upper() in zero_perf_score_stats \ and abs(mean_stats_1) > abs(mean_stats_2): result = pval * -1 + safe_log(logger, "debug", f"Statistic {statistic} falls under zero perfect score stats. Adjusted p-value: {result}") elif statistic.upper() in one_perf_score_stats \ and abs(mean_stats_1 - 1) > abs(mean_stats_2 - 1): result = pval * -1 + safe_log(logger, "debug", f"Statistic {statistic} falls under one perfect score stats. Adjusted p-value: {result}") else: - print( - f"WARNING: statistic {statistic} doesn't belong to any of the perfect score groups. Returning unprocessed p-value") + safe_log(logger, "warning", + f"Statistic {statistic} doesn't belong to any of the perfect score groups. Returning unprocessed p-value.") result = pval + safe_log(logger, "debug", f"Final adjusted p-value for statistic {statistic}: {result}") return result @@ -482,7 +488,7 @@ def get_total_dir_values(input_data, columns_names, aggregation): total = sum_column_data_by_name(input_data, columns_names, 'total_dir') return total -def aggregate_field_values(series_var_val, input_data_frame, line_type): +def aggregate_field_values(series_var_val, input_data_frame, line_type, logger=None): """Finds and aggregates statistics for fields with values containing ';'. Aggregation happens by valid and lead times These fields are coming from the scorecard and looks like this: vx_mask : ['EAST;NMT']. @@ -504,9 +510,11 @@ def aggregate_field_values(series_var_val, input_data_frame, line_type): unique_lead = input_data_frame.fcst_lead.unique() for series_var, series_vals in series_var_val.items(): + safe_log(logger, "info", f"Processing series variable: {series_var}") for series_val in series_vals: if ';' in series_val: # found the aggregated field + safe_log(logger, "debug", f"Found aggregated field: {series_val}") single_values = series_val.split(';') # for each valid @@ -527,7 +535,7 @@ def aggregate_field_values(series_var_val, input_data_frame, line_type): input_data_frame = input_data_frame.drop(index=rows_indexes) aggregated_result = calc_series_sums(rows_for_agg, line_type) - + safe_log(logger, "debug", f"Aggregated result for {series_var}: {aggregated_result}") # record the result as a first row in the old selection for field in input_data_frame.columns: if field in aggregated_result.columns.values: @@ -566,6 +574,7 @@ def aggregate_field_values(series_var_val, input_data_frame, line_type): aggregated_result = calc_series_sums(rows_for_agg, line_type) + safe_log(logger, "debug", f"Aggregated result for fcst_lead: {aggregated_result}") # record the result as a first row in the old selection for field in input_data_frame.columns: if field in aggregated_result.columns.values: @@ -587,11 +596,11 @@ def aggregate_field_values(series_var_val, input_data_frame, line_type): # add it to the result input_data_frame = pd.concat([input_data_frame,(rows_for_agg.iloc[:1])]) - + safe_log(logger, "debug", "Completed aggregation of field values.") return input_data_frame -def calc_series_sums(input_df, line_type): +def calc_series_sums(input_df, line_type, logger=None): """ Aggregates column values of the input data frame. Aggregation depends on the line type. Following line types are currently supported : ctc, sl1l2, sal1l2, vl1l2, val1l2, grad, nbrcnt, ecnt, rps @@ -602,13 +611,16 @@ def calc_series_sums(input_df, line_type): Returns: Pandas DataFrame with aggregated values """ + + safe_log(logger, "debug", f"Starting aggregation for line type: {line_type}") # create an array from the dataframe sums_data_frame = pd.DataFrame() # calculate aggregated total value and add it to the result total = sum_column_data_by_name(input_df.to_numpy(), input_df.columns, 'total') sums_data_frame['total'] = [total] - + safe_log(logger, "debug", f"Total calculated: {total}") + # proceed for the line type if line_type in ('ctc', 'nbrctc'): column_names = ['fy_oy', 'fy_on', 'fn_oy', 'fn_on'] @@ -621,6 +633,7 @@ def calc_series_sums(input_df, line_type): for column in column_names: sums_data_frame[column] = [np.nansum(input_df[column] * input_df.total.astype(float)) / total] + safe_log(logger, "debug", f"Aggregated {column} for line type {line_type}") elif line_type == 'sal1l2': sums_data_frame['fbar'] = [np.nansum(input_df['fabar'] * input_df.total.astype(float)) @@ -633,6 +646,7 @@ def calc_series_sums(input_df, line_type): / total] sums_data_frame['oobar'] = [np.nansum(input_df['ooabar'] * input_df.total.astype(float)) / total] + safe_log(logger, "debug", f"Aggregated all columns for line type sal1l2") elif line_type == 'vl1l2': column_names = ['ufbar', 'vfbar', 'uobar', 'vobar', 'uvfobar', @@ -640,18 +654,21 @@ def calc_series_sums(input_df, line_type): for column in column_names: sums_data_frame[column] = [np.nansum(input_df[column] * input_df.total.astype(float)) / total] + safe_log(logger, "debug", f"Aggregated {column} for line type {line_type}") elif line_type == 'val1l2': column_names = ['ufabar', 'vfabar', 'uoabar', 'voabar', 'uvfoabar', 'uvffabar', 'uvooabar'] for column in column_names: sums_data_frame[column] = [np.nansum(input_df[column] * input_df.total.astype(float)) / total] + safe_log(logger, "debug", f"Aggregated {column} for line type {line_type}") elif line_type == 'grad': column_names = ['fgbar', 'ogbar', 'mgbar', 'egbar'] for column in column_names: sums_data_frame[column] = [np.nansum(input_df[column] * input_df.total.astype(float)) / total] + safe_log(logger, "debug", f"Aggregated {column} for line type {line_type}") elif line_type == 'nbrcnt': dbl_fbs = np.nansum(input_df['fbs'] * input_df.total.astype(float)) / total @@ -671,6 +688,7 @@ def calc_series_sums(input_df, line_type): sums_data_frame['ufss'] = [dbl_u_fss] sums_data_frame['f_rate'] = [dbl_f_rate] sums_data_frame['o_rate'] = [dbl_o_rate] + safe_log(logger, "debug", f"Aggregated all columns for line type nbrcnt") elif line_type == 'ecnt': mse = input_df['rmse'] * input_df['rmse'] @@ -684,6 +702,8 @@ def calc_series_sums(input_df, line_type): for column in column_names: sums_data_frame[column] = [np.nansum(input_df[column] * input_df.total.astype(float)) / total] + safe_log(logger, "debug", f"Aggregated {column} for line type ecnt") + elif line_type == 'rps': d_rps_climo = input_df['rps'] / (1 - input_df['rpss']) sums_data_frame['rps'] = [np.nansum(input_df["rps"] * input_df.total.astype(float)) @@ -692,11 +712,12 @@ def calc_series_sums(input_df, line_type): / total] sums_data_frame['rps_climo'] = [np.nansum(d_rps_climo * input_df.total.astype(float)) / total] + safe_log(logger, "debug", f"Aggregated all columns for line type rps") return sums_data_frame -def equalize_axis_data(fix_vals_keys, fix_vals_permuted, params, input_data, axis='1'): +def equalize_axis_data(fix_vals_keys, fix_vals_permuted, params, input_data, axis='1', logger=None): """ Performs event equalisation on the specified axis on input data. Args: fix_vals_permuted - fixed values @@ -709,6 +730,8 @@ def equalize_axis_data(fix_vals_keys, fix_vals_permuted, params, input_data, axi # for each statistic for the specified axis + safe_log(logger, "debug", f"Starting event equalization for axis: {axis}") + if 'fcst_var_val_' + axis in params: fcst_var_val = params['fcst_var_val_' + axis] if fcst_var_val is None: @@ -724,6 +747,7 @@ def equalize_axis_data(fix_vals_keys, fix_vals_permuted, params, input_data, axi # requested statistics but instead should do it only ones to avoid data multiplication if 'stat_name' not in input_data.keys(): fcst_var_stats_current = [fcst_var_stats[0]] + safe_log(logger, "debug", f"Processing forecast variable: {fcst_var} with stats: {fcst_var_stats_current}") for fcst_var_stat in fcst_var_stats_current: # for each series for the specified axis @@ -749,7 +773,7 @@ def equalize_axis_data(fix_vals_keys, fix_vals_permuted, params, input_data, axi series_data_for_ee = series_data_for_ee[series_data_for_ee['fcst_var'] == fcst_var] if 'stat_name' in input_data.keys(): series_data_for_ee = series_data_for_ee[series_data_for_ee["stat_name"] == fcst_var_stat] - + safe_log(logger, "debug", f"Filtered data for forecast variable: {fcst_var} and statistic: {fcst_var_stat}") # perform EE on filtered data # for SSVAR line_type use equalization of multiple events series_data_after_ee = \ @@ -757,7 +781,7 @@ def equalize_axis_data(fix_vals_keys, fix_vals_permuted, params, input_data, axi params['series_val_' + axis], fix_vals_keys, fix_vals_permuted, True, - params['line_type'] == "ssvar") + params['line_type'] == "ssvar", logger=logger) # append EE data to result if output_ee_data.empty: @@ -765,6 +789,7 @@ def equalize_axis_data(fix_vals_keys, fix_vals_permuted, params, input_data, axi else: warnings.simplefilter(action="error", category=FutureWarning) output_ee_data = pd.concat([output_ee_data, series_data_after_ee]) + safe_log(logger, "debug", f"Appended equalized data for forecast variable: {fcst_var}") try: output_ee_data_valid = output_ee_data.drop('equalize', axis=1) @@ -772,19 +797,21 @@ def equalize_axis_data(fix_vals_keys, fix_vals_permuted, params, input_data, axi # It is possible to produce an empty data frame after applying event equalization. Print an informational # message before returning the data frame. if output_ee_data_valid.empty: + safe_log(logger, "warning", "Event equalization produced no results. Data frame is empty.") print(f"\nINFO: Event equalization has produced no results. Data frame is empty.") return output_ee_data_valid - except (KeyError, AttributeError): + except (KeyError, AttributeError) as e: # Two possible exceptions are raised when the data frame is empty *and* is missing the 'equalize' column # following event equalization. Return the empty dataframe # without dropping the 'equalize' column, and print an informational message. print(f"\nINFO: No resulting data after performing event equalization of axis", axis) + safe_log(logger, "warning", f"No resulting data after performing event equalization on axis {axis}: {e}") return output_ee_data -def perform_event_equalization(params, input_data): +def perform_event_equalization(params, input_data, logger=None): """ Performs event equalisation on input data. If there ara 2 axis: perform EE on each and then on both Args: @@ -794,10 +821,12 @@ def perform_event_equalization(params, input_data): DataFrame with equalised data """ + safe_log(logger, "debug", "Starting event equalization process.") # list all fixed variables fix_vals_permuted_list = [] fix_vals_keys = [] if 'fixed_vars_vals_input' in params: + safe_log(logger, "debug", "Processing fixed variables for equalization.") for key in params['fixed_vars_vals_input']: if type(params['fixed_vars_vals_input'][key]) is dict: list_for_permut = params['fixed_vars_vals_input'][key].values() @@ -809,7 +838,8 @@ def perform_event_equalization(params, input_data): fix_vals_keys = list(params['fixed_vars_vals_input'].keys()) - # perform EE for each forecast variable on the axis 1 + # perform EE for each forecast variable on the axis + safe_log(logger, "debug", "Performing event equalization on axis 1.") output_ee_data = \ equalize_axis_data(fix_vals_keys, fix_vals_permuted_list, params, input_data, axis='1') @@ -828,15 +858,17 @@ def perform_event_equalization(params, input_data): for key in all_series: all_series[key] = list(set(all_series[key])) + safe_log(logger, "debug", "Performing combined event equalization on Y1 and Y2.") # run EE on run event equalizer on Y1 and Y2 output_ee_data = event_equalize(all_ee_records, params['indy_var'], all_series, fix_vals_keys, fix_vals_permuted_list, True, - params['line_type'] == "ssvar") + params['line_type'] == "ssvar", logger=logger) output_ee_data = output_ee_data.drop('equalize', axis=1) + safe_log(logger, "debug", "Event equalization process completed.") return output_ee_data @@ -1175,7 +1207,7 @@ def qt(p, df, ncp=0): def tost_paired(n: int, m1: float, m2: float, sd1: float, sd2: float, r12: float, low_eqbound_dz: float, - high_eqbound_dz: float, alpha: float = None) -> dict: + high_eqbound_dz: float, alpha: float = None, logger=None) -> dict: """ TOST function for a dependent t-test (Cohen's dz). Based on Rscript function TOSTpaired @@ -1208,21 +1240,27 @@ def tost_paired(n: int, m1: float, m2: float, sd1: float, sd2: float, r12: float if not alpha: alpha = 0.05 if low_eqbound_dz >= high_eqbound_dz: + safe_log(logger, "warning", + "The lower bound is equal to or larger than the upper bound. Check the bounds specification.") print( 'WARNING: The lower bound is equal to or larger than the upper bound.' ' Check the plot and output to see if the bounds are specified as you intended.') if n < 2: + safe_log(logger, "error", "The sample size should be larger than 1.") print("The sample size should be larger than 1.") sys.exit() if 1 <= alpha or alpha <= 0: + safe_log(logger, "error", "The alpha level should be a positive value between 0 and 1.") print("The alpha level should be a positive value between 0 and 1.") sys.exit() if sd1 <= 0 or sd2 <= 0: + safe_log(logger, "error", "The standard deviation should be a positive value.") print("The standard deviation should be a positive value.") sys.exit() if 1 < r12 or r12 < -1: + safe_log(logger, "error", "The correlation should be a value between -1 and 1.") print("The correlation should be a value between -1 and 1.") sys.exit() @@ -1241,6 +1279,7 @@ def tost_paired(n: int, m1: float, m2: float, sd1: float, sd2: float, r12: float p2 = pt(t2, degree_f, lower_tail=True) ptost = max(p1, p2) else: + safe_log(logger, "warning", "Standard error is zero; cannot compute TOST statistics.") pttest = None t1 = None p1 = None @@ -1289,7 +1328,7 @@ def tost_paired(n: int, m1: float, m2: float, sd1: float, sd2: float, r12: float t = (None, None) p = (None, None) - return { + result = { 'dif': round_half_up(dif, PRECISION), 't': t, 'p': p, @@ -1302,9 +1341,11 @@ def tost_paired(n: int, m1: float, m2: float, sd1: float, sd2: float, r12: float 'test_outcome': test_outcome, 'tost_outcome': tost_outcome } + safe_log(logger, "debug", f"TOST paired calculation completed. Result: {result}") + return result -def calculate_mtd_revision_stats(series_data: DataFrame, lag_max: Union[int, None] = None) -> dict: +def calculate_mtd_revision_stats(series_data: DataFrame, lag_max: Union[int, None] = None, logger=None) -> dict: """ Calculates Mode-TD revision stats :param series_data - DataFrame with columns 'stat_value' and 'revision_id' @@ -1319,14 +1360,18 @@ def calculate_mtd_revision_stats(series_data: DataFrame, lag_max: Union[int, Non auto_cor_p - p-value of autocorrelation auto_cor_r - estimated autocorrelation for lag_max """ + safe_log(logger, "debug", "Starting calculation of Mode-TD revision stats.") + result = { 'ww_run': None, 'auto_cor_p': None, 'auto_cor_r': None } if len(series_data) == 0: + safe_log(logger, "warning", "Input series_data is empty. Returning default result.") return result if not {'stat_value', 'revision_id'}.issubset(series_data.columns): + safe_log(logger, "error", "DataFrame doesn't have correct columns. Expected 'stat_value' and 'revision_id'.") print("DataFrame doesn't have correct columns") return result @@ -1353,13 +1398,16 @@ def func(a): acf_value = acf(data_for_stats, 'correlation', lag_max) if acf_value is not None: result['auto_cor_r'] = round(acf_value[-1], 2) + safe_log(logger, "debug", f"Calculated autocorrelation: {result['auto_cor_r']}") # qnorm((1 + 0.05)/2) = 0.06270678 result['auto_cor_p'] = round(0.06270678 / math.sqrt(np.size(data_for_stats)), 2) + safe_log(logger, "debug", f"Calculated p-value for autocorrelation: {result['auto_cor_p']}") p_value = runs_test(data_for_stats, 'left.sided', 'median')['p_value'] if p_value is not None: result['ww_run'] = round(p_value, 2) + safe_log(logger, "debug", f"Calculated Wald-Wolfowitz runs test p-value: {result['ww_run']}") return result @@ -1419,7 +1467,7 @@ def autocor_coef(data: list) -> Union[None, float]: return sx * sy / (sx - (n - 1) * sxx) + sxy / (sxx - sx * sx / (n - 1)) -def get_met_version(input_data:Union[pd.DataFrame, np.array], column_names:list=None) -> str: +def get_met_version(input_data:Union[pd.DataFrame, np.array], column_names:list=None, logger=None) -> str: """ Determines the version of MET for this data Args: @@ -1432,8 +1480,10 @@ def get_met_version(input_data:Union[pd.DataFrame, np.array], column_names:list= version: a dataclass containing the major, minor, and bugfix values of the version """ + safe_log(logger, "debug", "Starting to determine MET version.") if isinstance(input_data, np.ndarray): if column_names is None: + safe_log(logger, "error", "numpy array input requires a list of column names.") raise ValueError("numpy array input requires a list of column names.") else: lc_column_names = [cur_col.lower() for cur_col in column_names] @@ -1444,12 +1494,15 @@ def get_met_version(input_data:Union[pd.DataFrame, np.array], column_names:list= cols = df.columns.to_list() lc_cols = [cur_col.lower() for cur_col in cols] df.columns = lc_cols + safe_log(logger, "debug", "Converted DataFrame column names to lowercase.") else: + safe_log(logger, "error", "Input data must be either a numpy array or pandas dataframe.") raise ValueError("input data must be either a numpy array or pandas dataframe") # Get the version from the data (the first row) versions = df['version'].to_list() full_version = versions[0] + safe_log(logger, "debug", f"Extracted full version string: {full_version}") # Use an immutable (frozen=True) dataclass to hold the major, # minor, and bugfix values that make up the version number. @@ -1469,7 +1522,8 @@ def get_met_version(input_data:Union[pd.DataFrame, np.array], column_names:list= bugfix = 0 version = Version(major, minor, bugfix) - + safe_log(logger, "debug", f"Created version dataclass: {version}") + return version diff --git a/metcalcpy/util/val1l2_statistics.py b/metcalcpy/util/val1l2_statistics.py index ce1dcbaf..f718489c 100644 --- a/metcalcpy/util/val1l2_statistics.py +++ b/metcalcpy/util/val1l2_statistics.py @@ -16,12 +16,13 @@ from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values, get_met_version, \ get_total_dir_values +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_val1l2_anom_corr(input_data, columns_names, aggregation=False): +def calculate_val1l2_anom_corr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VAL1L2_ANOM_CORR - Args: @@ -37,7 +38,11 @@ def calculate_val1l2_anom_corr(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting VAL1L2_ANOM_CORR calculation.") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values calculated: {total}") + ufabar = sum_column_data_by_name(input_data, columns_names, 'ufabar') / total vfabar = sum_column_data_by_name(input_data, columns_names, 'vfabar') / total uoabar = sum_column_data_by_name(input_data, columns_names, 'uoabar') / total @@ -45,15 +50,19 @@ def calculate_val1l2_anom_corr(input_data, columns_names, aggregation=False): uvfoabar = sum_column_data_by_name(input_data, columns_names, 'uvfoabar') / total uvffabar = sum_column_data_by_name(input_data, columns_names, 'uvffabar') / total uvooabar = sum_column_data_by_name(input_data, columns_names, 'uvooabar') / total - result = calc_wind_corr(ufabar, vfabar, uoabar, voabar, uvfoabar, uvffabar, uvooabar) + safe_log(logger, "debug", f"Summed values: ufabar={ufabar}, vfabar={vfabar}, uoabar={uoabar}, " + f"voabar={voabar}, uvfoabar={uvfoabar}, uvffabar={uvffabar}, uvooabar={uvooabar}") + result = calc_wind_corr(ufabar, vfabar, uoabar, voabar, uvfoabar, uvffabar, uvooabar, logger=logger) result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Final VAL1L2_ANOM_CORR result: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during VAL1L2_ANOM_CORR calculation: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calc_wind_corr(uf, vf, uo, vo, uvfo, uvff, uvoo): +def calc_wind_corr(uf, vf, uo, vo, uvfo, uvff, uvoo, logger=None): """Calculates wind correlation Args: uf - Mean(uf-uc) @@ -71,12 +80,13 @@ def calc_wind_corr(uf, vf, uo, vo, uvfo, uvff, uvoo): try: corr = (uvfo - uf * uo - vf * vo) / (np.sqrt(uvff - uf * uf - vf * vf) * np.sqrt(uvoo - uo * uo - vo * vo)) - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during wind correlation calculation: {str(e)}") corr = None return corr -def calculate_val1l2_total(input_data, columns_names): +def calculate_val1l2_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Vector Anomaly Partial Sums Args: @@ -89,10 +99,23 @@ def calculate_val1l2_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + try: + safe_log(logger, "debug", "Starting calculation of VAL1L2 total number of matched pairs.") + + total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"Total value before rounding: {total}") -def calculate_val1l2_total_dir(input_data, columns_names): + result = round_half_up(total, PRECISION) + safe_log(logger, "debug", f"Rounded total number of matched pairs: {result}") + + return result + + except Exception as e: + safe_log(logger, "error", f"Error during calculation of VAL1L2 total number of matched pairs: {str(e)}") + return None + + +def calculate_val1l2_total_dir(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for well-defined forecast and observation wind directions (TOTAL_DIR column) Args: @@ -105,12 +128,24 @@ def calculate_val1l2_total_dir(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total_dir') - return round_half_up(total, PRECISION) + try: + safe_log(logger, "debug", "Starting calculation of VAL1L2 total number of matched pairs for wind directions.") + total = sum_column_data_by_name(input_data, columns_names, 'total_dir') + safe_log(logger, "debug", f"Total_DIR value before rounding: {total}") + result = round_half_up(total, PRECISION) + safe_log(logger, "debug", f"Rounded total number of matched pairs for wind directions: {result}") -def calculate_val1l2_dira_me(input_data, columns_names, aggregation=False): + return result + + except Exception as e: + safe_log(logger, "error", f"Error during calculation of VAL1L2 total number of matched pairs for wind directions: {str(e)}") + return None + + + +def calculate_val1l2_dira_me(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of DIRA_ME Args: input_data: 2-dimensional numpy array with data for the calculation @@ -121,18 +156,24 @@ def calculate_val1l2_dira_me(input_data, columns_names, aggregation=False): dira_me """ try: + safe_log(logger, "debug", "Starting calculation of DIRA_ME.") + total = get_total_dir_values(input_data, np.array(columns_names), aggregation) + safe_log(logger, "debug", f"Total direction values for DIRA_ME calculation: {total}") + result = sum_column_data_by_name(input_data, np.array(columns_names), 'dira_me') / total result = round_half_up(result, PRECISION) + safe_log(logger, "debug", f"Rounded DIRA_ME value: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of DIRA_ME: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_val1l2_dira_mae(input_data, columns_names, aggregation=False): +def calculate_val1l2_dira_mae(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of DIRA_MAE Args: input_data: 2-dimensional numpy array with data for the calculation @@ -143,17 +184,24 @@ def calculate_val1l2_dira_mae(input_data, columns_names, aggregation=False): dira_mae statistic """ try: + safe_log(logger, "debug", "Starting calculation of DIRA_MAE.") + total = get_total_dir_values(input_data, np.array(columns_names), aggregation) + safe_log(logger, "debug", f"Total direction values for DIRA_MAE calculation: {total}") + result = sum_column_data_by_name(input_data, np.array(columns_names), 'dira_mae') / total result = round_half_up(result, PRECISION) + safe_log(logger, "debug", f"Rounded DIRA_MAE value: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of DIRA_MAE: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_val1l2_dira_mse(input_data, columns_names, aggregation=False): +def calculate_val1l2_dira_mse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of DIRA_MSE Args: input_data: 2-dimensional numpy array with data for the calculation diff --git a/metcalcpy/util/vcnt_statistics.py b/metcalcpy/util/vcnt_statistics.py index 2b2cdc21..6a35aeda 100644 --- a/metcalcpy/util/vcnt_statistics.py +++ b/metcalcpy/util/vcnt_statistics.py @@ -16,6 +16,7 @@ from metcalcpy.util.met_stats import calc_direction, calc_speed from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values, \ get_total_dir_values +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' @@ -23,7 +24,7 @@ from metcalcpy.util.vl1l2_statistics import calculate_vl1l2_fvar, calculate_vl1l2_ovar -def calculate_vcnt_fbar(input_data, columns_names, aggregation=False): +def calculate_vcnt_fbar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_FBAR - Mean value of forecast wind speed Args: @@ -39,16 +40,24 @@ def calculate_vcnt_fbar(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of VCNT_FBAR.") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values for VCNT_FBAR calculation: {total}") + result = sum_column_data_by_name(input_data, columns_names, 'f_speed_bar') / total + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Rounded VCNT_FBAR value: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_FBAR: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_vcnt_obar(input_data, columns_names, aggregation=False): +def calculate_vcnt_obar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_OBAR - Mean value of observed wind speed Args: @@ -64,16 +73,24 @@ def calculate_vcnt_obar(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of VCNT_OBAR.") + total = get_total_dir_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values for VCNT_OBAR calculation: {total}") + result = sum_column_data_by_name(input_data, columns_names, 'o_speed_bar') / total + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Rounded VCNT_OBAR value: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_OBAR: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_vcnt_fs_rms(input_data, columns_names, aggregation=False): +def calculate_vcnt_fs_rms(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_FS_RMS - Root mean square forecast wind speed Args: @@ -89,17 +106,28 @@ def calculate_vcnt_fs_rms(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of VCNT_FS_RMS.") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values for VCNT_FS_RMS calculation: {total}") + uvffbar = sum_column_data_by_name(input_data, columns_names, 'uvffbar') / total + safe_log(logger, "debug", f"Calculated uvffbar value: {uvffbar}") + result = np.sqrt(uvffbar) + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Rounded VCNT_FS_RMS value: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_FS_RMS: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_vcnt_os_rms(input_data, columns_names, aggregation=False): +def calculate_vcnt_os_rms(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_OS_RMS - Root mean square observed wind speed Args: @@ -115,17 +143,27 @@ def calculate_vcnt_os_rms(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of VCNT_OS_RMS.") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values for VCNT_OS_RMS calculation: {total}") + uvoobar = sum_column_data_by_name(input_data, columns_names, 'uvoobar') / total + safe_log(logger, "debug", f"Calculated uvoobar value: {uvoobar}") + result = np.sqrt(uvoobar) + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Rounded VCNT_OS_RMS value: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_OS_RMS: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_vcnt_msve(input_data, columns_names, aggregation=False): +def calculate_vcnt_msve(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_MSVE - Mean squared length of the vector difference between the forecast and observed winds @@ -142,22 +180,35 @@ def calculate_vcnt_msve(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of VCNT_MSVE.") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total values for VCNT_MSVE calculation: {total}") + uvffbar = sum_column_data_by_name(input_data, columns_names, 'uvffbar') / total uvfobar = sum_column_data_by_name(input_data, columns_names, 'uvfobar') / total uvoobar = sum_column_data_by_name(input_data, columns_names, 'uvoobar') / total + + safe_log(logger, "debug", f"Calculated uvffbar: {uvffbar}, uvfobar: {uvfobar}, uvoobar: {uvoobar}") + mse = uvffbar - 2 * uvfobar + uvoobar + safe_log(logger, "debug", f"Calculated MSE value: {mse}") + if mse < 0: + safe_log(logger, "warning", "MSE value is negative, setting result to None.") result = None else: result = round_half_up(mse, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Rounded VCNT_MSVE value: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_MSVE: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_vcnt_rmsve(input_data, columns_names, aggregation=False): +def calculate_vcnt_rmsve(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_RMSVE - Square root of Mean squared length of the vector Args: input_data: 2-dimensional numpy array with data for the calculation @@ -172,16 +223,23 @@ def calculate_vcnt_rmsve(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: - msve = calculate_vcnt_msve(input_data, columns_names, aggregation) + safe_log(logger, "debug", "Starting calculation of VCNT_RMSVE.") + + msve = calculate_vcnt_msve(input_data, columns_names, aggregation, logger=logger) + + safe_log(logger, "debug", f"Calculated MSVE value: {msve}") result = np.sqrt(msve) result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Rounded VCNT_RMSVE value: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_RMSVE: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_vcnt_fstdev(input_data, columns_names, aggregation=False): +def calculate_vcnt_fstdev(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_FSTDEV - Standard deviation of the forecast wind speed Args: input_data: 2-dimensional numpy array with data for the calculation @@ -196,15 +254,23 @@ def calculate_vcnt_fstdev(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: - result = np.sqrt(calculate_vl1l2_fvar(input_data, columns_names, aggregation)) + safe_log(logger, "debug", "Starting calculation of VCNT_FSTDEV.") + + fvar = calculate_vl1l2_fvar(input_data, columns_names, aggregation, logger=logger) + + safe_log(logger, "debug", f"Calculated FVAR value: {fvar}") + result = np.sqrt(fvar) result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Rounded VCNT_FSTDEV value: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_FSTDEV: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_vcnt_ostdev(input_data, columns_names, aggregation=False): +def calculate_vcnt_ostdev(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_OSTDEV - Standard deviation of the observed wind speed Args: input_data: 2-dimensional numpy array with data for the calculation @@ -219,15 +285,23 @@ def calculate_vcnt_ostdev(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: - result = np.sqrt(calculate_vl1l2_ovar(input_data, columns_names, aggregation)) + safe_log(logger, "debug", "Starting calculation of VCNT_OSTDEV.") + + ovar = calculate_vl1l2_ovar(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated OVAR value: {ovar}") + result = np.sqrt(ovar) result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Rounded VCNT_OSTDEV value: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_OSTDEV: {str(e)}") result = None + warnings.filterwarnings('ignore') return result -def calculate_vcnt_fdir(input_data, columns_names, aggregation=False): +def calculate_vcnt_fdir(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_FDIR - Direction of the average forecast wind vector Args: input_data: 2-dimensional numpy array with data for the calculation @@ -242,18 +316,26 @@ def calculate_vcnt_fdir(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of VCNT_FDIR.") + total = get_total_values(input_data, columns_names, aggregation) + ufbar = sum_column_data_by_name(input_data, columns_names, 'ufbar') / total vfbar = sum_column_data_by_name(input_data, columns_names, 'vfbar') / total - fdir = calc_direction(-ufbar, -vfbar) + safe_log(logger, "debug", f"Calculated UFBar: {ufbar}, VFBar: {vfbar}") + + fdir = calc_direction(-ufbar, -vfbar, logger=logger) result = round_half_up(fdir, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated VCNT_FDIR: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_FDIR: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_vcnt_odir(input_data, columns_names, aggregation=False): +def calculate_vcnt_odir(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_ODIR - Direction of the average observed wind vector Args: input_data: 2-dimensional numpy array with data for the calculation @@ -268,18 +350,26 @@ def calculate_vcnt_odir(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of VCNT_ODIR.") + total = get_total_values(input_data, columns_names, aggregation) + uobar = sum_column_data_by_name(input_data, columns_names, 'uobar') / total vobar = sum_column_data_by_name(input_data, columns_names, 'vobar') / total - odir = calc_direction(-uobar, -vobar) + safe_log(logger, "debug", f"Calculated UOBar: {uobar}, VOBar: {vobar}") + + odir = calc_direction(-uobar, -vobar, logger=logger) result = round_half_up(odir, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): - result = None + safe_log(logger, "debug", f"Calculated VCNT_ODIR: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_ODIR: {str(e)}") + result = Non warnings.filterwarnings('ignore') return result -def calculate_vcnt_fbar_speed(input_data, columns_names, aggregation=False): +def calculate_vcnt_fbar_speed(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_FBAR_SPEED - Length (speed) of the average forecast wind vector Args: input_data: 2-dimensional numpy array with data for the calculation @@ -294,18 +384,22 @@ def calculate_vcnt_fbar_speed(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of VCNT_FBAR_SPEED.") total = get_total_values(input_data, columns_names, aggregation) ufbar = sum_column_data_by_name(input_data, columns_names, 'ufbar') / total vfbar = sum_column_data_by_name(input_data, columns_names, 'vfbar') / total - fspd = calc_speed(ufbar, vfbar) + safe_log(logger, "debug", f"Calculated UFBar: {ufbar}, VFBar: {vfbar}") + fspd = calc_speed(ufbar, vfbar, logger=logger) result = round_half_up(fspd, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated VCNT_FBAR_SPEED: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_FBAR_SPEED: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_vcnt_obar_speed(input_data, columns_names, aggregation=False): +def calculate_vcnt_obar_speed(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_OBAR_SPEED - Length (speed) of the average observed wind vector Args: input_data: 2-dimensional numpy array with data for the calculation @@ -320,18 +414,24 @@ def calculate_vcnt_obar_speed(input_data, columns_names, aggregation=False): """ warnings.filterwarnings('error') try: + safe_log(logger, "debug", "Starting calculation of VCNT_OBAR_SPEED.") total = get_total_values(input_data, columns_names, aggregation) uobar = sum_column_data_by_name(input_data, columns_names, 'uobar') / total vobar = sum_column_data_by_name(input_data, columns_names, 'vobar') / total - fspd = calc_speed(uobar, vobar) + safe_log(logger, "debug", f"Calculated UOBar: {uobar}, VOBar: {vobar}") + + fspd = calc_speed(uobar, vobar, logger=logger) result = round_half_up(fspd, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated VCNT_OBAR_SPEED: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_OBAR_SPEED: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_vcnt_vdiff_speed(input_data, columns_names, aggregation=False): +def calculate_vcnt_vdiff_speed(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_VDIFF_SPEED - Length (speed) of the vector deference between the average forecast and average observed wind vectors @@ -347,21 +447,25 @@ def calculate_vcnt_vdiff_speed(input_data, columns_names, aggregation=False): or None if some of the data values are missing or invalid """ warnings.filterwarnings('error') + safe_log(logger, "debug", "Starting calculation of VCNT_VDIFF_SPEED.") try: total = get_total_values(input_data, columns_names, aggregation) ufbar = sum_column_data_by_name(input_data, columns_names, 'ufbar') / total uobar = sum_column_data_by_name(input_data, columns_names, 'uobar') / total vfbar = sum_column_data_by_name(input_data, columns_names, 'vfbar') / total vobar = sum_column_data_by_name(input_data, columns_names, 'vobar') / total - vdiff_spd = calc_speed(ufbar - uobar, vfbar - vobar) + safe_log(logger, "debug", f"Calculated UFBAR: {ufbar}, UOBAR: {uobar}, VFBAR: {vfbar}, VOBAR: {vobar}") + vdiff_spd = calc_speed(ufbar - uobar, vfbar - vobar, logger=logger) result = round_half_up(vdiff_spd, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "debug", f"Calculated VCNT_VDIFF_SPEED: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error during calculation of VCNT_VDIFF_SPEED: {str(e)}") result = None warnings.filterwarnings('ignore') return result -def calculate_vcnt_vdiff_dir(input_data, columns_names, aggregation=False): +def calculate_vcnt_vdiff_dir(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_VDIFF_DIR - Direction of the vector deference between the average forecast and average wind vector @@ -376,22 +480,39 @@ def calculate_vcnt_vdiff_dir(input_data, columns_names, aggregation=False): calculated VCNT_VDIFF_DIR as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VCNT_VDIFF_DIR.") warnings.filterwarnings('error') + try: total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total value calculated: {total}") + ufbar = sum_column_data_by_name(input_data, columns_names, 'ufbar') / total + safe_log(logger, "debug", f"ufbar: {ufbar}") + uobar = sum_column_data_by_name(input_data, columns_names, 'uobar') / total + safe_log(logger, "debug", f"uobar: {uobar}") + vfbar = sum_column_data_by_name(input_data, columns_names, 'vfbar') / total + safe_log(logger, "debug", f"vfbar: {vfbar}") + vobar = sum_column_data_by_name(input_data, columns_names, 'vobar') / total - vdiff_dir = calc_direction(-(ufbar - uobar), -(vfbar - vobar)) + safe_log(logger, "debug", f"vobar: {vobar}") + + vdiff_dir = calc_direction(-(ufbar - uobar), -(vfbar - vobar), logger=logger) + safe_log(logger, "debug", f"Calculated direction: {vdiff_dir}") + result = round_half_up(vdiff_dir, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None + safe_log(logger, "info", "Finished calculation of VCNT_VDIFF_DIR.") warnings.filterwarnings('ignore') return result -def calculate_vcnt_speed_err(input_data, columns_names, aggregation=False): +def calculate_vcnt_speed_err(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_SPEED_ERR - Deference between the length of the average forecast wind vector and the average observed wind vector (in the sense F - O) @@ -407,18 +528,29 @@ def calculate_vcnt_speed_err(input_data, columns_names, aggregation=False): calculated VCNT_SPEED_ERR as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VCNT_SPEED_ERR.") warnings.filterwarnings('error') try: - speed_bias = calculate_vcnt_fbar_speed(input_data, columns_names, aggregation) \ - - calculate_vcnt_obar_speed(input_data, columns_names, aggregation) + fbar_speed = calculate_vcnt_fbar_speed(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated fbar_speed: {fbar_speed}") + + obar_speed = calculate_vcnt_obar_speed(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated obar_speed: {obar_speed}") + + speed_bias = fbar_speed - obar_speed + safe_log(logger, "debug", f"Calculated speed_bias (F - O): {speed_bias}") + result = round_half_up(speed_bias, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VCNT_SPEED_ERR.") return result -def calculate_vcnt_speed_abserr(input_data, columns_names, aggregation=False): +def calculate_vcnt_speed_abserr(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_SPEED_ABSERR - Absolute value of diference between the length of the average forecast wind vector and the average observed wind vector (in the sense F - O) @@ -434,17 +566,25 @@ def calculate_vcnt_speed_abserr(input_data, columns_names, aggregation=False): calculated VCNT_SPEED_ABSERR as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VCNT_SPEED_ABSERR.") warnings.filterwarnings('error') try: - spd_abserr = abs(calculate_vcnt_speed_err(input_data, columns_names, aggregation)) + speed_err = calculate_vcnt_speed_err(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated speed_err: {speed_err}") + + spd_abserr = abs(speed_err) + safe_log(logger, "debug", f"Calculated absolute speed error: {spd_abserr}") + result = round_half_up(spd_abserr, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VCNT_SPEED_ABSERR.") return result - -def calculate_vcnt_dir_err(input_data, columns_names, aggregation=False): +def calculate_vcnt_dir_err(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_DIR_ERR - Signed angle between the directions of the average forecast and observed wind vectors. Positive if the forecast wind vector is counter clockwise from the observed wind vector @@ -460,34 +600,56 @@ def calculate_vcnt_dir_err(input_data, columns_names, aggregation=False): calculated VCNT_DIR_ERR as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VCNT_DIR_ERR.") warnings.filterwarnings('error') try: - f_len = calculate_vcnt_fbar_speed(input_data, columns_names, aggregation) + f_len = calculate_vcnt_fbar_speed(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated f_len: {f_len}") + total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total value calculated: {total}") + ufbar = sum_column_data_by_name(input_data, columns_names, 'ufbar') / total + safe_log(logger, "debug", f"ufbar: {ufbar}") + vfbar = sum_column_data_by_name(input_data, columns_names, 'vfbar') / total + safe_log(logger, "debug", f"vfbar: {vfbar}") + uf = ufbar / f_len vf = vfbar / f_len + safe_log(logger, "debug", f"Normalized forecast wind vector components: uf={uf}, vf={vf}") + + o_len = calculate_vcnt_obar_speed(input_data, columns_names, logger=logger) + safe_log(logger, "debug", f"Calculated o_len: {o_len}") - o_len = calculate_vcnt_obar_speed(input_data, columns_names) uobar = sum_column_data_by_name(input_data, columns_names, 'uobar') / total + safe_log(logger, "debug", f"uobar: {uobar}") + vobar = sum_column_data_by_name(input_data, columns_names, 'vobar') / total + safe_log(logger, "debug", f"vobar: {vobar}") + uo = uobar / o_len vo = vobar / o_len + safe_log(logger, "debug", f"Normalized observed wind vector components: uo={uo}, vo={vo}") a = vf * uo - uf * vo b = uf * uo + vf * vo + safe_log(logger, "debug", f"Components a={a}, b={b} for direction calculation") - dir_err = calc_direction(a, b) + dir_err = calc_direction(a, b, logger=logger) + safe_log(logger, "debug", f"Calculated direction error: {dir_err}") result = round_half_up(dir_err, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VCNT_DIR_ERR.") return result -def calculate_vcnt_dir_abser(input_data, columns_names, aggregation=False): +def calculate_vcnt_dir_abser(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VCNT_DIR_ABSERR - Absolute value of signed angle between the directions of the average forecast and observed wind vectors. Positive if the forecast wind vector @@ -504,112 +666,179 @@ def calculate_vcnt_dir_abser(input_data, columns_names, aggregation=False): calculated VCNT_DIR_ABSERR as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VCNT_DIR_ABSERR.") warnings.filterwarnings('error') try: - ang_btw = abs(calculate_vcnt_dir_err(input_data, columns_names, aggregation)) + dir_err = calculate_vcnt_dir_err(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated dir_err: {dir_err}") + + ang_btw = abs(dir_err) + safe_log(logger, "debug", f"Calculated absolute direction error: {ang_btw}") + result = round_half_up(ang_btw, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VCNT_DIR_ABSERR.") return result -def calculate_vcnt_anom_corr(input_data, columns_names, aggregation=False): +def calculate_vcnt_anom_corr(input_data, columns_names, aggregation=False, logger=None): + safe_log(logger, "info", "Starting calculation of VCNT_ANOM_CORR.") warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) # n + safe_log(logger, "debug", f"Total value calculated: {total}") + fa_speed_bar = sum_column_data_by_name(input_data, columns_names, 'fa_speed_bar') # f + safe_log(logger, "debug", f"Calculated fa_speed_bar: {fa_speed_bar}") + oa_speed_bar = sum_column_data_by_name(input_data, columns_names, 'oa_speed_bar') # o + safe_log(logger, "debug", f"Calculated oa_speed_bar: {oa_speed_bar}") + uvffabar = sum_column_data_by_name(input_data, columns_names, 'uvffabar') # ff + safe_log(logger, "debug", f"Calculated uvffabar: {uvffabar}") + uvfoabar = sum_column_data_by_name(input_data, columns_names, 'uvfoabar') # fo + safe_log(logger, "debug", f"Calculated uvfoabar: {uvfoabar}") + uvooabar = sum_column_data_by_name(input_data, columns_names, 'uvooabar') # oo + safe_log(logger, "debug", f"Calculated uvooabar: {uvooabar}") v = (total * uvffabar - fa_speed_bar * fa_speed_bar) * (total * uvooabar - oa_speed_bar * oa_speed_bar) + safe_log(logger, "debug", f"Intermediate variable v calculated: {v}") + result = ((total * uvfoabar) - (fa_speed_bar * oa_speed_bar)) / np.sqrt(v) + safe_log(logger, "debug", f"Raw anomaly correlation calculated: {result}") # Check the computed range if result > 1: result = 1.0 elif result < -1: result = -1.0 + safe_log(logger, "info", f"Final anomaly correlation result: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VCNT_ANOM_CORR.") return result -def calculate_vcnt_anom_corr_uncntr(input_data, columns_names): +def calculate_vcnt_anom_corr_uncntr(input_data, columns_names, logger=None): + safe_log(logger, "info", "Starting calculation of VCNT_ANOM_CORR_UNCNTR.") warnings.filterwarnings('error') try: uvffabar = sum_column_data_by_name(input_data, columns_names, 'uvffabar') # ff + safe_log(logger, "debug", f"Calculated uvffabar: {uvffabar}") + uvooabar = sum_column_data_by_name(input_data, columns_names, 'uvooabar') # oo + safe_log(logger, "debug", f"Calculated uvooabar: {uvooabar}") + uvfoabar = sum_column_data_by_name(input_data, columns_names, 'uvfoabar') # fo + safe_log(logger, "debug", f"Calculated uvfoabar: {uvfoabar}") v = uvffabar * uvooabar + safe_log(logger, "debug", f"Intermediate variable v calculated: {v}") + result = uvfoabar / np.sqrt(v) + safe_log(logger, "debug", f"Raw uncentered anomaly correlation calculated: {result}") # Check the computed range if result > 1: result = 1.0 elif result < -1: result = -1.0 + safe_log(logger, "info", f"Final uncentered anomaly correlation result: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VCNT_ANOM_CORR_UNCNTR.") return result -def calculate_vcnt_dir_me(input_data, columns_names, aggregation=False): +def calculate_vcnt_dir_me(input_data, columns_names, aggregation=False, logger=None): + safe_log(logger, "info", "Starting calculation of VCNT_DIR_ME.") warnings.filterwarnings('error') try: total = get_total_dir_values(input_data, np.array(columns_names), aggregation) - result = sum_column_data_by_name(input_data,np.array(columns_names), 'dir_me') / total + safe_log(logger, "debug", f"Total direction values calculated: {total}") + + result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_me') / total + safe_log(logger, "debug", f"Raw mean error of direction calculated: {result}") result = round_half_up(result, PRECISION) + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VCNT_DIR_ME.") return result -def calculate_vcnt_dir_mae(input_data, columns_names, aggregation=False): +def calculate_vcnt_dir_mae(input_data, columns_names, aggregation=False, logger=None): + safe_log(logger, "info", "Starting calculation of VCNT_DIR_MAE.") warnings.filterwarnings('error') try: total = get_total_dir_values(input_data, np.array(columns_names), aggregation) + safe_log(logger, "debug", f"Total direction values calculated: {total}") + result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_mae') / total + safe_log(logger, "debug", f"Raw mean absolute error of direction calculated: {result}") result = round_half_up(result, PRECISION) + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VCNT_DIR_MAE.") return result - -def calculate_vcnt_dir_mse(input_data, columns_names, aggregation=False): +def calculate_vcnt_dir_mse(input_data, columns_names, aggregation=False, logger=None): + safe_log(logger, "info", "Starting calculation of VCNT_DIR_MSE.") warnings.filterwarnings('error') try: total = get_total_dir_values(input_data, np.array(columns_names), aggregation) + safe_log(logger, "debug", f"Total direction values calculated: {total}") + result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_mse') / total + safe_log(logger, "debug", f"Raw mean squared error of direction calculated: {result}") result = round_half_up(result, PRECISION) + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VCNT_DIR_MSE.") return result -def calculate_vcnt_dir_rmse(input_data, columns_names, aggregation=False): +def calculate_vcnt_dir_rmse(input_data, columns_names, aggregation=False, logger=None): + safe_log(logger, "info", "Starting calculation of VCNT_DIR_RMSE.") warnings.filterwarnings('error') try: - result = np.sqrt(calculate_vcnt_dir_mse(input_data, np.array(columns_names), aggregation)) + mse_result = calculate_vcnt_dir_mse(input_data, np.array(columns_names), aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated mean squared error: {mse_result}") + + result = np.sqrt(mse_result) + safe_log(logger, "debug", f"Calculated root mean squared error: {result}") result = round_half_up(result, PRECISION) + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') - return result + safe_log(logger, "info", "Finished calculation of VCNT_DIR_RMSE.") + return result \ No newline at end of file diff --git a/metcalcpy/util/vl1l2_statistics.py b/metcalcpy/util/vl1l2_statistics.py index 1cac77ba..e7a07c97 100644 --- a/metcalcpy/util/vl1l2_statistics.py +++ b/metcalcpy/util/vl1l2_statistics.py @@ -16,12 +16,13 @@ from metcalcpy.util.met_stats import calc_speed from metcalcpy.util.utils import round_half_up, sum_column_data_by_name, PRECISION, get_total_values, \ get_total_dir_values +from metcalcpy.util.safe_log import safe_log __author__ = 'Tatiana Burek' __version__ = '0.1.0' -def calculate_vl1l2_bias(input_data, columns_names, aggregation=False): +def calculate_vl1l2_bias(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VL1L2_BIAS - Args: @@ -35,20 +36,32 @@ def calculate_vl1l2_bias(input_data, columns_names, aggregation=False): calculated VL1L2_BIAS as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VL1L2_BIAS.") warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total value calculated: {total}") + uvffbar = sum_column_data_by_name(input_data, columns_names, 'uvffbar') / total + safe_log(logger, "debug", f"Calculated uvffbar: {uvffbar}") + uvoobar = sum_column_data_by_name(input_data, columns_names, 'uvoobar') / total + safe_log(logger, "debug", f"Calculated uvoobar: {uvoobar}") + bias = np.sqrt(uvffbar) - np.sqrt(uvoobar) + safe_log(logger, "debug", f"Calculated bias: {bias}") + result = round_half_up(bias, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VL1L2_BIAS.") return result - -def calculate_vl1l2_fvar(input_data, columns_names, aggregation=False): +def calculate_vl1l2_fvar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VL1L2_FVAR - Args: @@ -62,20 +75,33 @@ def calculate_vl1l2_fvar(input_data, columns_names, aggregation=False): calculated VL1L2_FVAR as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VL1L2_FVAR.") warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total value calculated: {total}") + uvffbar = sum_column_data_by_name(input_data, columns_names, 'uvffbar') / total + safe_log(logger, "debug", f"Calculated uvffbar: {uvffbar}") + f_speed_bar = sum_column_data_by_name(input_data, columns_names, 'f_speed_bar') / total + safe_log(logger, "debug", f"Calculated f_speed_bar: {f_speed_bar}") + result = uvffbar - f_speed_bar * f_speed_bar + safe_log(logger, "debug", f"Calculated forecast variance: {result}") + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VL1L2_FVAR.") return result -def calculate_vl1l2_ovar(input_data, columns_names, aggregation=False): +def calculate_vl1l2_ovar(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VL1L2_OVAR - Args: @@ -89,20 +115,33 @@ def calculate_vl1l2_ovar(input_data, columns_names, aggregation=False): calculated VL1L2_OVAR as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VL1L2_OVAR.") warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total value calculated: {total}") + uvoobar = sum_column_data_by_name(input_data, columns_names, 'uvoobar') / total + safe_log(logger, "debug", f"Calculated uvoobar: {uvoobar}") + o_speed_bar = sum_column_data_by_name(input_data, columns_names, 'o_speed_bar') / total + safe_log(logger, "debug", f"Calculated o_speed_bar: {o_speed_bar}") + result = uvoobar - o_speed_bar * o_speed_bar + safe_log(logger, "debug", f"Calculated observed variance: {result}") + result = round_half_up(result, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VL1L2_OVAR.") return result -def calculate_vl1l2_fspd(input_data, columns_names, aggregation=False): +def calculate_vl1l2_fspd(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VL1L2_FSPD - Args: @@ -116,20 +155,33 @@ def calculate_vl1l2_fspd(input_data, columns_names, aggregation=False): calculated VL1L2_FSPD as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VL1L2_FSPD.") warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total value calculated: {total}") + ufbar = sum_column_data_by_name(input_data, columns_names, 'ufbar') / total + safe_log(logger, "debug", f"Calculated ufbar: {ufbar}") + vfbar = sum_column_data_by_name(input_data, columns_names, 'vfbar') / total - fspd = calc_speed(ufbar, vfbar) + safe_log(logger, "debug", f"Calculated vfbar: {vfbar}") + + fspd = calc_speed(ufbar, vfbar, logger=logger) + safe_log(logger, "debug", f"Calculated forecast speed: {fspd}") + result = round_half_up(fspd, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VL1L2_FSPD.") return result -def calculate_vl1l2_ospd(input_data, columns_names, aggregation=False): +def calculate_vl1l2_ospd(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VL1L2_OSPD - Args: @@ -143,20 +195,33 @@ def calculate_vl1l2_ospd(input_data, columns_names, aggregation=False): calculated VL1L2_OSPD as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VL1L2_OSPD.") warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total value calculated: {total}") + uobar = sum_column_data_by_name(input_data, columns_names, 'uobar') / total + safe_log(logger, "debug", f"Calculated uobar: {uobar}") + vobar = sum_column_data_by_name(input_data, columns_names, 'vobar') / total - ospd = calc_speed(uobar, vobar) + safe_log(logger, "debug", f"Calculated vobar: {vobar}") + + ospd = calc_speed(uobar, vobar, logger=logger) + safe_log(logger, "debug", f"Calculated observed speed: {ospd}") + result = round_half_up(ospd, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VL1L2_OSPD.") return result -def calculate_vl1l2_speed_err(input_data, columns_names, aggregation=False): +def calculate_vl1l2_speed_err(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VL1L2_SPEED_ERR - Args: @@ -170,18 +235,30 @@ def calculate_vl1l2_speed_err(input_data, columns_names, aggregation=False): calculated VL1L2_SPEED_ERR as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VL1L2_SPEED_ERR.") warnings.filterwarnings('error') try: - speed_bias = calculate_vl1l2_fspd(input_data, columns_names, aggregation) \ - - calculate_vl1l2_ospd(input_data, columns_names, aggregation) + fspd = calculate_vl1l2_fspd(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated forecast speed (VL1L2_FSPD): {fspd}") + + ospd = calculate_vl1l2_ospd(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated observed speed (VL1L2_OSPD): {ospd}") + + speed_bias = fspd - ospd + safe_log(logger, "debug", f"Calculated speed bias (VL1L2_SPEED_ERR): {speed_bias}") + result = round_half_up(speed_bias, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VL1L2_SPEED_ERR.") return result -def calculate_vl1l2_msve(input_data, columns_names, aggregation=False): +def calculate_vl1l2_msve(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VL1L2_MSVE - Args: @@ -195,24 +272,40 @@ def calculate_vl1l2_msve(input_data, columns_names, aggregation=False): calculated VL1L2_MSVE as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VL1L2_MSVE.") warnings.filterwarnings('error') try: total = get_total_values(input_data, columns_names, aggregation) + safe_log(logger, "debug", f"Total value calculated: {total}") + uvffbar = sum_column_data_by_name(input_data, columns_names, 'uvffbar') / total + safe_log(logger, "debug", f"Calculated uvffbar: {uvffbar}") + uvfobar = sum_column_data_by_name(input_data, columns_names, 'uvfobar') / total + safe_log(logger, "debug", f"Calculated uvfobar: {uvfobar}") + uvoobar = sum_column_data_by_name(input_data, columns_names, 'uvoobar') / total + safe_log(logger, "debug", f"Calculated uvoobar: {uvoobar}") + msve = uvffbar - 2.0 * uvfobar + uvoobar + safe_log(logger, "debug", f"Calculated mean squared vector error (MSVE): {msve}") + if msve < 0: + safe_log(logger, "warning", f"MSVE is negative, setting result to None.") result = None else: result = round_half_up(msve, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VL1L2_MSVE.") return result -def calculate_vl1l2_rmsve(input_data, columns_names, aggregation=False): +def calculate_vl1l2_rmsve(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of VL1L2_RMSVE - Args: input_data: 2-dimensional numpy array with data for the calculation @@ -224,17 +317,31 @@ def calculate_vl1l2_rmsve(input_data, columns_names, aggregation=False): calculated VL1L2_RMSVE as float or None if some of the data values are missing or invalid """ + safe_log(logger, "info", "Starting calculation of VL1L2_RMSVE.") warnings.filterwarnings('error') try: - rmsve = np.sqrt(calculate_vl1l2_msve(input_data, columns_names, aggregation)) - result = round_half_up(rmsve, PRECISION) - except (TypeError, ZeroDivisionError, Warning, ValueError): + msve = calculate_vl1l2_msve(input_data, columns_names, aggregation, logger=logger) + safe_log(logger, "debug", f"Calculated mean squared vector error (MSVE): {msve}") + + if msve is None: + safe_log(logger, "warning", "MSVE calculation returned None, RMSVE cannot be calculated.") + result = None + else: + rmsve = np.sqrt(msve) + safe_log(logger, "debug", f"Calculated root mean squared vector error (RMSVE): {rmsve}") + + result = round_half_up(rmsve, PRECISION) + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") + + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of VL1L2_RMSVE.") return result -def calculate_vl1l2_total(input_data, columns_names): +def calculate_vl1l2_total(input_data, columns_names, logger=None): """Performs calculation of Total number of matched pairs for Vector Partial Sums Args: @@ -247,11 +354,23 @@ def calculate_vl1l2_total(input_data, columns_names): calculated Total number of matched pairs as float or None if some of the data values are missing or invalid """ - total = sum_column_data_by_name(input_data, columns_names, 'total') - return round_half_up(total, PRECISION) + safe_log(logger, "info", "Starting calculation of total number of matched pairs (VL1L2_TOTAL).") + try: + total = sum_column_data_by_name(input_data, columns_names, 'total') + safe_log(logger, "debug", f"Calculated total number of matched pairs: {total}") + result = round_half_up(total, PRECISION) + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") -def calculate_vl1l2_dir_me(input_data, columns_names, aggregation=False): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") + result = None + + safe_log(logger, "info", "Finished calculation of VL1L2_TOTAL.") + return result + + +def calculate_vl1l2_dir_me(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of DIR_ME, which was added in MET v12.0 Args: input_data: 2-dimensional numpy array with data for the calculation @@ -261,19 +380,27 @@ def calculate_vl1l2_dir_me(input_data, columns_names, aggregation=False): Returns: dir_me """ + safe_log(logger, "info", "Starting calculation of DIR_ME.") + warnings.filterwarnings('error') try: total = get_total_dir_values(input_data, np.array(columns_names), aggregation) + safe_log(logger, "debug", f"Total direction values calculated: {total}") + result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_me') / total + safe_log(logger, "debug", f"Raw DIR_ME calculated: {result}") result = round_half_up(result, PRECISION) + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of DIR_ME.") return result -def calculate_vl1l2_dir_mae(input_data, columns_names, aggregation=False): +def calculate_vl1l2_dir_mae(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of DIR_MAE Args: input_data: 2-dimensional numpy array with data for the calculation @@ -283,19 +410,27 @@ def calculate_vl1l2_dir_mae(input_data, columns_names, aggregation=False): Returns: dir_mae statistic """ + safe_log(logger, "info", "Starting calculation of DIR_MAE.") + warnings.filterwarnings('error') try: total = get_total_dir_values(input_data, np.array(columns_names), aggregation) + safe_log(logger, "debug", f"Total direction values calculated: {total}") + result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_mae') / total + safe_log(logger, "debug", f"Raw DIR_MAE calculated: {result}") result = round_half_up(result, PRECISION) + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') + safe_log(logger, "info", "Finished calculation of DIR_MAE.") return result -def calculate_vl1l2_dir_mse(input_data, columns_names, aggregation=False): +def calculate_vl1l2_dir_mse(input_data, columns_names, aggregation=False, logger=None): """Performs calculation of DIR_MSE Args: input_data: 2-dimensional numpy array with data for the calculation @@ -305,14 +440,21 @@ def calculate_vl1l2_dir_mse(input_data, columns_names, aggregation=False): Returns: dir_mse statistic """ + safe_log(logger, "info", "Starting calculation of DIR_MSE.") + warnings.filterwarnings('error') try: total = get_total_dir_values(input_data, np.array(columns_names), aggregation) + safe_log(logger, "debug", f"Total direction values calculated: {total}") + result = sum_column_data_by_name(input_data, np.array(columns_names), 'dir_mse') / total + safe_log(logger, "debug", f"Raw DIR_MSE calculated: {result}") result = round_half_up(result, PRECISION) + safe_log(logger, "info", f"Result rounded to precision {PRECISION}: {result}") - except (TypeError, ZeroDivisionError, Warning, ValueError): + except (TypeError, ZeroDivisionError, Warning, ValueError) as e: + safe_log(logger, "error", f"Error occurred during calculation: {e}") result = None warnings.filterwarnings('ignore') - return result - + safe_log(logger, "info", "Finished calculation of DIR_MSE.") + return result \ No newline at end of file diff --git a/metcalcpy/util/write_mpr.py b/metcalcpy/util/write_mpr.py index f9df9dc4..c79e784c 100644 --- a/metcalcpy/util/write_mpr.py +++ b/metcalcpy/util/write_mpr.py @@ -9,7 +9,7 @@ import numpy as np -def write_mpr_file(data_fcst,data_obs,lats_in,lons_in,fcst_lead,fcst_valid,obs_lead,obs_valid,mod_name,desc,fcst_var,fcst_unit,fcst_lev,obs_var,obs_unit,obs_lev,maskname,obsslev,outdir,outfile_prefix): +def write_mpr_file(data_fcst,data_obs,lats_in,lons_in,fcst_lead,fcst_valid,obs_lead,obs_valid,mod_name,desc,fcst_var,fcst_unit,fcst_lev,obs_var,obs_unit,obs_lev,maskname,obsslev,outdir,outfile_prefix, logger=None): """ Function to write an output mpr file given a 1d array of observation and forecast data @@ -62,6 +62,7 @@ def write_mpr_file(data_fcst,data_obs,lats_in,lons_in,fcst_lead,fcst_valid,obs_l """ Get the data length to create the INDEX and TOTAL variables in the MPR line """ + safe_log(logger, "info", "Starting to write MPR file.") dlength = len(data_obs) index_num = np.arange(0,dlength,1)+1 @@ -93,6 +94,7 @@ def write_mpr_file(data_fcst,data_obs,lats_in,lons_in,fcst_lead,fcst_valid,obs_l Create the output directory if it doesn't exist """ if not os.path.exists(outdir): + safe_log(logger, "info", f"Created output directory: {outdir}") os.makedirs(outdir) """ @@ -102,6 +104,8 @@ def write_mpr_file(data_fcst,data_obs,lats_in,lons_in,fcst_lead,fcst_valid,obs_l ft_stamp = fcst_lead[0]+'L_'+fcst_valid_str[0:8]+'_'+fcst_valid_str[9:15]+'V' full_outfile = os.path.join(outdir,outfile_prefix+'_'+ft_stamp+'.stat') + safe_log(logger, "info", f"Writing output MPR file: {full_outfile}") + """ Write the file """ @@ -120,3 +124,4 @@ def write_mpr_file(data_fcst,data_obs,lats_in,lons_in,fcst_lead,fcst_valid,obs_l str(dlength),str(index_num[dpt]),'NA',lats_in[dpt],lons_in[dpt],obsslev[dpt],'NA',data_fcst[dpt], data_obs[dpt],'NA','NA','NA','NA')) + safe_log(logger, "info", f"Successfully wrote MPR file: {full_outfile}") \ No newline at end of file diff --git a/metcalcpy/validate_mv_python.py b/metcalcpy/validate_mv_python.py index 7e7eeaf9..ddcee6a1 100644 --- a/metcalcpy/validate_mv_python.py +++ b/metcalcpy/validate_mv_python.py @@ -40,7 +40,8 @@ import yaml from metcalcpy.compare_images import CompareImages - +from metcalcpy.logging_config import setup_logging +from metcalcpy.util.safe_log import safe_log def replace_name(old_name, postfix): """Adds postfix to the end of a file name but before the extension @@ -83,13 +84,17 @@ def main(params): """ # remove old output + logger = setup_logging(params) + safe_log(logger, "info", "Cleaning old outputs.") clean(params) # find XML files test_xml = get_test_xml(params) + safe_log(logger, "info", f"Found {len(test_xml)} XML files to process.") # rerun each XML with Python for file in test_xml: + safe_log(logger, "info", f'Checking {file}') print(f'\nChecking {file}') doc = xml.dom.minidom.parse(file) @@ -116,6 +121,7 @@ def main(params): new_xml_file = params['output_xml_dir'] + os.path.basename(replace_name(file, 'py')) with open(new_xml_file, "w") as xml_file: doc.writexml(xml_file) + safe_log(logger, "info", f"New XML saved: {new_xml_file}") # run METviewer with the new XML and wait till it is done process = subprocess.Popen([params['mv_home'] + '/bin/mv_batch.sh', new_xml_file], @@ -131,6 +137,7 @@ def main(params): if not os.path.exists(original_plot_path) \ and not os.path.exists(new_image_path): # if both images don't exist - success + safe_log(logger, "info", f'SUCCESS: For {plot_name} both images do not exist.') print(f'SUCCESS: For {plot_name} both images don\'t exist') # remove new XML os.remove(new_xml_file) @@ -145,6 +152,7 @@ def main(params): compare = CompareImages(original_plot_path, new_image_path) ssim = compare.get_mssim() if ssim == 1.0: + safe_log(logger, "info", f'SUCCESS: For {plot_name} images are identical.') print(f'SUCCESS: For {plot_name} images are identical') # remove new image os.remove(new_image_path) @@ -156,12 +164,14 @@ def main(params): delete_similar_files(params['output_data_dir'], plot_name) else: + safe_log(logger, "error", f'ERROR: For {plot_name} images are different.') print(f'ERROR: For {plot_name} images are different') # add more diagnostic images compare.save_thresh_image(params['output_plots_dir'] + replace_name(plot_name, 'thresh')) except KeyError as err: + safe_log(logger, "error", f'ERROR: For {plot_name} : {err}') print(f'ERROR: For {plot_name} : {err}') diff --git a/metcalcpy/vertical_interp.py b/metcalcpy/vertical_interp.py index 8cbba86d..0c8894ef 100644 --- a/metcalcpy/vertical_interp.py +++ b/metcalcpy/vertical_interp.py @@ -48,6 +48,7 @@ import numpy as np import xarray as xr # http://xarray.pydata.org/ import netCDF4 as nc +from metcalcpy.util.safe_log import safe_log """ Import Pint and MetPy modules diff --git a/test/ecnt_agg_stat.yaml b/test/ecnt_agg_stat.yaml index 4354cc53..91a097d9 100644 --- a/test/ecnt_agg_stat.yaml +++ b/test/ecnt_agg_stat.yaml @@ -27,3 +27,6 @@ series_val_1: model: - FCST series_val_2: {} +log_dir: !ENV "${TEST_DIR}/logs" +log_filename: log_agg_stat_ecnt.txt +log_level: WARNING diff --git a/test/logs/log_agg_eclv.txt b/test/logs/log_agg_eclv.txt new file mode 100644 index 00000000..17bc673f --- /dev/null +++ b/test/logs/log_agg_eclv.txt @@ -0,0 +1,96 @@ +2024-09-04 23:27:28 UTC - ishitas - INFO - User: ishitas has started the script with command: /home/ishitas/.local/lib/python3.10/site-packages/pytest/__main__.py -s --log-cli-level=INFO test_agg_eclv.py +2024-09-04 23:27:28 UTC - ishitas - INFO - Successfully loaded data from /d1/personal/ishitas/METcalcpy/test/data/agg_eclv_data.data +2024-09-04 23:27:28 UTC - ishitas - INFO - Random seed set to 1. +2024-09-04 23:27:28 UTC - ishitas - INFO - Generated all combinations for points to be processed: 2 combinations. +2024-09-04 23:27:28 UTC - ishitas - INFO - Output DataFrame initialized successfully with fields: ['model', 'fcst_lev', 'thresh_i', 'x_pnt_i', 'y_pnt_i', 'stat_btcl', 'stat_btcu', 'nstats']. +2024-09-04 23:27:28 UTC - ishitas - INFO - Output DataFrame initialized successfully with fields: ['model', 'fcst_lev', 'thresh_i', 'x_pnt_i', 'y_pnt_i', 'stat_btcl', 'stat_btcu', 'nstats']. +2024-09-04 23:27:28 UTC - ishitas - INFO - Statistics calculated successfully for single value case. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Bootstrapped statistics calculated for threshold 0. +2024-09-04 23:27:29 UTC - ishitas - INFO - Completed processing for point ('WRF', 'Z10') +2024-09-04 23:27:29 UTC - ishitas - INFO - Output DataFrame initialized successfully with fields: ['model', 'fcst_lev', 'thresh_i', 'x_pnt_i', 'y_pnt_i', 'stat_btcl', 'stat_btcu', 'nstats']. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for single value case. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics calculated successfully for all bootstrap samples. +2024-09-04 23:27:29 UTC - ishitas - INFO - Bootstrapped statistics calculated for threshold 0. +2024-09-04 23:27:29 UTC - ishitas - INFO - Completed processing for point ('WRF', 'P850-700') +2024-09-04 23:27:29 UTC - ishitas - INFO - All data processed successfully. Returning compiled DataFrame. +2024-09-04 23:27:29 UTC - ishitas - INFO - Statistics and confidence intervals calculation completed. +2024-09-04 23:27:29 UTC - ishitas - INFO - Data successfully written to /d1/personal/ishitas/METcalcpy/test/data/agg_eclv_data_output.data in mode w. diff --git a/test/rrfs_ecnt_config_agg_stat.yaml b/test/rrfs_ecnt_config_agg_stat.yaml index b659034a..a8843444 100644 --- a/test/rrfs_ecnt_config_agg_stat.yaml +++ b/test/rrfs_ecnt_config_agg_stat.yaml @@ -41,3 +41,6 @@ series_val_1: model: - RRFS_GEFS_GF.SPP.SPPT series_val_2: {} +log_dir: !ENV "${TEST_DIR}/logs" +log_filename: log_agg_stat_rrfs_ecnt.txt +log_level: DEBUG diff --git a/test/test_agg_eclv.py b/test/test_agg_eclv.py index 339ab153..52b54c9e 100644 --- a/test/test_agg_eclv.py +++ b/test/test_agg_eclv.py @@ -43,7 +43,10 @@ def settings(): 'agg_stat1': ['ECLV'], 'circular_block_bootstrap': True, 'equalize_by_indep': 'True', - 'cl_step': 0.05 + 'cl_step': 0.05, + 'log_dir': f'{cwd}/logs/', + 'log_filename': 'log_agg_eclv.txt', + 'log_level': 'WARNING' } agg_stat = AggEclv(params) settings_dict = dict() diff --git a/test/test_agg_ratio.py b/test/test_agg_ratio.py index 05254db2..612bfb5c 100644 --- a/test/test_agg_ratio.py +++ b/test/test_agg_ratio.py @@ -67,7 +67,10 @@ def settings(): 'APCP_03': ['ECNT_RMSE','ECNT_SPREAD'] }, 'list_stat_1':['ECNT_RMSE', 'ECNT_SPREAD'], - 'list_stat_2':[] + 'list_stat_2':[], + 'log_dir': f'{cwd}/logs/', + 'log_filename': 'log_agg_stat.txt', + 'log_level': 'WARNING' } agg_stat = AggStat(params) settings_dict = dict() diff --git a/test/test_scorecard.py b/test/test_scorecard.py index e94ae094..627dc989 100644 --- a/test/test_scorecard.py +++ b/test/test_scorecard.py @@ -84,8 +84,10 @@ def settings(): 'NoahMPv3.5.1_d01']}, 'stat_flag': 'NCAR', 'sum_stat_input': f'{cwd}/data/scorecard.data', - 'sum_stat_output': f'{cwd}/data/scorecard_output.data' - + 'sum_stat_output': f'{cwd}/data/scorecard_output.data', + 'log_dir': f'{cwd}/logs/', + 'log_filename': 'log_scorecard.txt', + 'log_level': 'WARNING' } scorecard = Scorecard(params) settings_dict = dict() diff --git a/test/val1l2_agg_stat.yaml b/test/val1l2_agg_stat.yaml index 4fbcd881..8053d64d 100644 --- a/test/val1l2_agg_stat.yaml +++ b/test/val1l2_agg_stat.yaml @@ -41,3 +41,6 @@ series_val_1: model: - FCST series_val_2: {} +log_dir: !ENV "${TEST_DIR}/logs" +log_filename: log_agg_stat_val1l2.txt +log_level: WARNING \ No newline at end of file diff --git a/test/vcnt_agg_stat.yaml b/test/vcnt_agg_stat.yaml index aded8e42..78573d7a 100644 --- a/test/vcnt_agg_stat.yaml +++ b/test/vcnt_agg_stat.yaml @@ -43,3 +43,6 @@ series_val_1: model: - FCST series_val_2: {} +log_dir: !ENV "${TEST_DIR}/logs" +log_filename: log_agg_stat_vcnt.txt +log_level: WARNING diff --git a/test/vl1l2_agg_stat_met_v12.yaml b/test/vl1l2_agg_stat_met_v12.yaml index c74458e1..3db4df7a 100644 --- a/test/vl1l2_agg_stat_met_v12.yaml +++ b/test/vl1l2_agg_stat_met_v12.yaml @@ -41,3 +41,6 @@ series_val_1: model: - FCST series_val_2: {} +log_dir: !ENV "${TEST_DIR}/logs" +log_filename: log_agg_stat_vl1l2.txt +log_level: WARNING