From 9d876c4787a52582f9f2edc34d38daf35dacaa28 Mon Sep 17 00:00:00 2001 From: John Halley Gotway Date: Thu, 12 Dec 2024 16:58:52 +0000 Subject: [PATCH] Per #3030, enhance Series-Analysis to aggregate the GRAD line type incrementally across multiple runs. However, this can only be done when requesting that 'ALL' GRAD columns be written. --- docs/Users_Guide/series-analysis.rst | 2 +- .../test_unit/config/SeriesAnalysisConfig | 14 + .../test_unit/xml/unit_series_analysis.xml | 5 + .../core/series_analysis/series_analysis.cc | 301 ++++++++++++++++-- 4 files changed, 295 insertions(+), 27 deletions(-) diff --git a/docs/Users_Guide/series-analysis.rst b/docs/Users_Guide/series-analysis.rst index 8c6c5f05b7..0be4477c90 100644 --- a/docs/Users_Guide/series-analysis.rst +++ b/docs/Users_Guide/series-analysis.rst @@ -188,4 +188,4 @@ The output_stats array controls the type of output that the Series-Analysis tool 13. GRAD for Gradient Statistics (See :numref:`table_GS_format_info_GRAD`) -.. note:: When the -input option is used, all partial sum and contingency table count columns are required to aggregate statistics across multiple runs. To facilitate this, the output_stats entries for the CTC, SL1L2, SAL1L2, and PCT line types can be set to "ALL" to indicate that all available columns for those line types should be written. +.. note:: When the -input option is used, all partial sum and contingency table count columns are required to aggregate statistics across multiple runs. To facilitate this, the output_stats entries for the CTC, SL1L2, SAL1L2, PCT, and GRAD line types can be set to "ALL" to indicate that all available columns for those line types should be written. diff --git a/internal/test_unit/config/SeriesAnalysisConfig b/internal/test_unit/config/SeriesAnalysisConfig index 047b454afc..72945b1266 100644 --- a/internal/test_unit/config/SeriesAnalysisConfig +++ b/internal/test_unit/config/SeriesAnalysisConfig @@ -109,6 +109,19 @@ mask = { poly = "${MASK_POLY}"; } +//////////////////////////////////////////////////////////////////////////////// + +// +// Gradient statistics +// May be set separately in each "obs.field" entry +// +gradient = { + dx = [ 1, 3 ]; + dy = [ 1, 3 ]; +} + +//////////////////////////////////////////////////////////////////////////////// + // // Number of grid points to be processed concurrently. Set smaller to use less // memory but increase the number of passes through the data. If set <= 0, all @@ -139,6 +152,7 @@ output_stats = { pstd = [ ${PSTD_STATS} ]; pjc = [ ${PJC_STATS} ]; prc = [ ${PRC_STATS} ]; + grad = [ ${GRAD_STATS} ]; } //////////////////////////////////////////////////////////////////////////////// diff --git a/internal/test_unit/xml/unit_series_analysis.xml b/internal/test_unit/xml/unit_series_analysis.xml index 96cda729dd..61075a94eb 100644 --- a/internal/test_unit/xml/unit_series_analysis.xml +++ b/internal/test_unit/xml/unit_series_analysis.xml @@ -40,6 +40,7 @@ PSTD_STATS PJC_STATS PRC_STATS + GRAD_STATS "ALL" \ -fcst &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F006.grib \ @@ -81,6 +82,7 @@ PSTD_STATS PJC_STATS PRC_STATS + GRAD_STATS "ALL" \ -fcst &DATA_DIR_MODEL;/grib1/gfs_hmt/gfs_2012040900_F030.grib \ @@ -131,6 +133,7 @@ PSTD_STATS "TOTAL", "ROC_AUC", "BRIER", "BRIER_NCL", "BRIER_NCU" PJC_STATS "CALIBRATION_1", "REFINEMENT_1" PRC_STATS "PODY_1", "POFD_1" + GRAD_STATS \ -fcst &OUTPUT_DIR;/series_analysis/input_fcst_file_list \ @@ -174,6 +177,7 @@ PSTD_STATS "TOTAL", "ROC_AUC", "BRIER", "BRIER_NCL", "BRIER_NCU" PJC_STATS "CALIBRATION_1", "REFINEMENT_1" PRC_STATS "PODY_1", "POFD_1" + GRAD_STATS \ -fcst &OUTPUT_DIR;/series_analysis/aggregate_fcst_file_list \ @@ -210,6 +214,7 @@ PSTD_STATS PJC_STATS PRC_STATS + GRAD_STATS \ -fcst &DATA_DIR_MODEL;/grib1/arw-fer-gep1/arw-fer-gep1_2012040900_F012.grib \ diff --git a/src/tools/core/series_analysis/series_analysis.cc b/src/tools/core/series_analysis/series_analysis.cc index e5c3a62fe8..30de509c8f 100644 --- a/src/tools/core/series_analysis/series_analysis.cc +++ b/src/tools/core/series_analysis/series_analysis.cc @@ -37,6 +37,7 @@ // 016 01/29/24 Halley Gotway MET #2801 Configure time difference warnings. // 017 07/05/24 Halley Gotway MET #2924 Support forecast climatology. // 018 07/26/24 Halley Gotway MET #1371 Aggregate previous output. +// 019 12/09/24 Halley Gotway MET #3030 Add GRAD output. // //////////////////////////////////////////////////////////////////////// @@ -95,6 +96,9 @@ static void do_continuous (int, const PairDataPoint *); static void do_partialsums (int, const PairDataPoint *); static void do_probabilistic (int, const PairDataPoint *); static void do_climo_brier (int, double, int, PCTInfo &); +static void do_gradient (int, int, int, + const PairDataPoint *, + const PairDataPoint *); static int read_aggr_total (int); static void read_aggr_ctc (int, const CTSInfo &, CTSInfo &); @@ -102,6 +106,7 @@ static void read_aggr_mctc (int, const MCTSInfo &, MCTSInfo &); static void read_aggr_sl1l2 (int, const SL1L2Info &, SL1L2Info &); static void read_aggr_sal1l2 (int, const SL1L2Info &, SL1L2Info &); static void read_aggr_pct (int, const PCTInfo &, PCTInfo &); +static void read_aggr_grad (int, const GRADInfo &, GRADInfo &); static void store_stat_categorical(int, STATLineType, const ConcatString &, @@ -118,12 +123,16 @@ static void store_stat_continuous(int, static void store_stat_probabilistic(int, STATLineType, const ConcatString &, const PCTInfo &); +static void store_stat_gradient(int, + STATLineType, const ConcatString &, + const GRADInfo &); static void store_stat_all_ctc (int, const CTSInfo &); static void store_stat_all_mctc (int, const MCTSInfo &); static void store_stat_all_sl1l2 (int, const SL1L2Info &); static void store_stat_all_sal1l2(int, const SL1L2Info &); static void store_stat_all_pct (int, const PCTInfo &); +static void store_stat_all_grad (int, const GRADInfo &); static ConcatString build_nc_var_name_categorical( STATLineType, const ConcatString &, @@ -140,6 +149,9 @@ static ConcatString build_nc_var_name_continuous( static ConcatString build_nc_var_name_probabilistic( STATLineType, const ConcatString &, const PCTInfo &, double); +static ConcatString build_nc_var_name_gradient( + STATLineType, const ConcatString &, + const GRADInfo &); static void setup_nc_file(const VarInfo *, const VarInfo *); static void add_stat_data(const ConcatString &, const ConcatString &, @@ -149,6 +161,7 @@ static void write_stat_data(); static void set_range(const unixtime &, unixtime &, unixtime &); static void set_range(const int &, int &, int &); +static void set_pair_dims(vector &, int, int); static void clean_up(); @@ -834,8 +847,6 @@ DataPlane read_aggr_data_plane(const ConcatString &var_name, //////////////////////////////////////////////////////////////////////// void process_scores() { - int x; - int y; int i_point = 0; VarInfo *fcst_info = (VarInfo *) nullptr; VarInfo *obs_info = (VarInfo *) nullptr; @@ -844,6 +855,13 @@ void process_scores() { vector pd_block; const char *method_name = "process_scores() "; + // X and Y gradient pairs + bool do_grad = (!conf_info.fcst_info[0]->is_prob() && + conf_info.get_n_grad() > 0 && + conf_info.output_stats[STATLineType::grad].n() > 0); + vector gx_pd_block; + vector gy_pd_block; + // Climatology mean and standard deviation DataPlane fcmn_dp, fcsd_dp; DataPlane ocmn_dp, ocsd_dp; @@ -876,22 +894,33 @@ void process_scores() { // Retrieve the data planes for the current series entry get_series_data(i_series, fcst_info, obs_info, fcst_dp, obs_dp); - // Initialize PairDataPoint vector, if needed - // block_size is defined in get_series_data() + // Set the pair dimensions if(pd_block.empty()) { - pd_block.resize(conf_info.block_size); - for(auto &pd : pd_block) pd.extend(n_series_pair); + set_pair_dims(pd_block, conf_info.block_size, n_series_pair); } + // Set the gradient pair dimensions + if(gx_pd_block.empty() || gy_pd_block.empty()) { + int n_grad_pd = conf_info.block_size * conf_info.get_n_grad(); + set_pair_dims(gx_pd_block, n_grad_pd, n_series_pair); + set_pair_dims(gy_pd_block, n_grad_pd, n_series_pair); + } + // Beginning of each data pass if(i_series == 0) { - // Re-initialize the PairDataPoint objects + // Re-initialize the pairs for(auto &pd : pd_block) { pd.erase(); pd.set_climo_cdf_info_ptr(&conf_info.cdf_info); } + // Re-initialize the gradient pairs + if(do_grad) { + for(auto &pd : gx_pd_block) pd.erase(); + for(auto &pd : gy_pd_block) pd.erase(); + } + // Starting grid point i_point = i_read*conf_info.block_size; @@ -949,10 +978,14 @@ void process_scores() { set_range(obs_dp.lead(), obs_lead_beg, obs_lead_end); // Store matched pairs for each grid point - for(int i=0; if_na.n() + + (aggr_file.empty() ? 0 : read_aggr_total(i_grid)); int n_series = n_series_pair + n_series_aggr; // Check for the required number of matched pairs if(n_valid / (double) n_series < conf_info.vld_data_thresh) { mlog << Debug(4) - << "[" << i+1 << " of " << conf_info.block_size + << "[" << i_block+1 << " of " << conf_info.block_size << "] Skipping point (" << x << ", " << y << ") with " << n_valid << " of " << n_series << " valid matched pairs.\n"; @@ -1001,7 +1092,7 @@ void process_scores() { } else { mlog << Debug(4) - << "[" << i+1 << " of " << conf_info.block_size + << "[" << i_block+1 << " of " << conf_info.block_size << "] Processing point (" << x << ", " << y << ") with " << n_valid << " of " << n_series << " valid matched pairs.\n"; } @@ -1011,27 +1102,27 @@ void process_scores() { (conf_info.output_stats[STATLineType::fho].n() + conf_info.output_stats[STATLineType::ctc].n() + conf_info.output_stats[STATLineType::cts].n()) > 0) { - do_categorical(i_point+i, &pd_block[i]); + do_categorical(i_grid, pd_ptr); } // Compute multi-category contingency table counts and statistics if(!conf_info.fcst_info[0]->is_prob() && (conf_info.output_stats[STATLineType::mctc].n() + conf_info.output_stats[STATLineType::mcts].n()) > 0) { - do_multicategory(i_point+i, &pd_block[i]); + do_multicategory(i_grid, pd_ptr); } // Compute continuous statistics if(!conf_info.fcst_info[0]->is_prob() && conf_info.output_stats[STATLineType::cnt].n() > 0) { - do_continuous(i_point+i, &pd_block[i]); + do_continuous(i_grid, pd_ptr); } // Compute partial sums if(!conf_info.fcst_info[0]->is_prob() && (conf_info.output_stats[STATLineType::sl1l2].n() + conf_info.output_stats[STATLineType::sal1l2].n()) > 0) { - do_partialsums(i_point+i, &pd_block[i]); + do_partialsums(i_grid, pd_ptr); } // Compute probabilistics counts and statistics @@ -1040,11 +1131,27 @@ void process_scores() { conf_info.output_stats[STATLineType::pstd].n() + conf_info.output_stats[STATLineType::pjc].n() + conf_info.output_stats[STATLineType::prc].n()) > 0) { - do_probabilistic(i_point+i, &pd_block[i]); + do_probabilistic(i_grid, pd_ptr); } - } // end for i + // Compute gradient statistics + if(do_grad) { + + // Loop over the gradient options + for(int i_grad=0; i_gradf_na, pd_gy->f_na, + pd_gx->o_na, pd_gy->o_na, pd_gx->wgt_na); + + // Aggregate current gradients with previous statistics + if(aggr_file.nonempty()) { + + // Aggregate gradients + GRADInfo aggr_grad; + read_aggr_grad(n, grad_info, aggr_grad); + grad_info += aggr_grad; + } + + // Add statistic value for each possible GRAD column + for(int j=0; j &pd_block, + int n_pd, int n_pair) { + + // Resize to the number of objects + pd_block.resize(n_pd); + + // Reserve space for the number of pairs + for(auto &pd : pd_block) pd.extend(n_pair); + + return; +} + + //////////////////////////////////////////////////////////////////////// void clean_up() {