From d819874d616f98a25187bfd3091073a2e6d5279e Mon Sep 17 00:00:00 2001 From: thunderhoser Date: Wed, 21 Oct 2020 19:19:46 -0600 Subject: [PATCH] Added input arg for echo classification. --- .../gg_utils/echo_classification.py | 53 ++++-- .../gg_utils/echo_classification_test.py | 13 +- .../scripts/run_echo_classification.py | 151 ++++++++++-------- 3 files changed, 134 insertions(+), 83 deletions(-) diff --git a/gewittergefahr/gg_utils/echo_classification.py b/gewittergefahr/gg_utils/echo_classification.py index 5f92c20f..d0bd5963 100644 --- a/gewittergefahr/gg_utils/echo_classification.py +++ b/gewittergefahr/gg_utils/echo_classification.py @@ -12,6 +12,7 @@ import os.path import numpy import netCDF4 +from scipy.ndimage import label as label_image from scipy.interpolate import RectBivariateSpline from scipy.ndimage.filters import median_filter, convolve from gewittergefahr.gg_io import netcdf_io @@ -49,6 +50,7 @@ HALVE_RESOLUTION_KEY = 'halve_resolution_for_peakedness' MIN_ECHO_TOP_KEY = 'min_echo_top_m_asl' ECHO_TOP_LEVEL_KEY = 'echo_top_level_dbz' +MIN_SIZE_KEY = 'min_size_pixels' MIN_COMPOSITE_REFL_CRITERION1_KEY = 'min_composite_refl_criterion1_dbz' MIN_COMPOSITE_REFL_CRITERION5_KEY = 'min_composite_refl_criterion5_dbz' MIN_COMPOSITE_REFL_AML_KEY = 'min_composite_refl_aml_dbz' @@ -59,6 +61,7 @@ HALVE_RESOLUTION_KEY: False, MIN_ECHO_TOP_KEY: 10000., ECHO_TOP_LEVEL_KEY: 25., + MIN_SIZE_KEY: 5, MIN_COMPOSITE_REFL_CRITERION1_KEY: 25., MIN_COMPOSITE_REFL_CRITERION5_KEY: 25., MIN_COMPOSITE_REFL_AML_KEY: 45. @@ -95,6 +98,9 @@ def _check_input_args(option_dict): option_dict[MIN_ECHO_TOP_KEY] )) option_dict[ECHO_TOP_LEVEL_KEY] = float(option_dict[ECHO_TOP_LEVEL_KEY]) + option_dict[MIN_SIZE_KEY] = int(numpy.round( + option_dict[MIN_SIZE_KEY] + )) option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY] = float( option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY] ) @@ -107,6 +113,7 @@ def _check_input_args(option_dict): error_checking.assert_is_boolean(option_dict[HALVE_RESOLUTION_KEY]) error_checking.assert_is_greater(option_dict[MIN_ECHO_TOP_KEY], 0) error_checking.assert_is_greater(option_dict[ECHO_TOP_LEVEL_KEY], 0.) + error_checking.assert_is_greater(option_dict[MIN_SIZE_KEY], 1) error_checking.assert_is_greater( option_dict[MIN_COMPOSITE_REFL_CRITERION5_KEY], 0. ) @@ -163,7 +170,7 @@ def _neigh_metres_to_rowcol(neigh_radius_metres, grid_metadata_dict): num_rows = 1 + 2 * int(numpy.ceil( neigh_radius_metres / y_spacing_metres )) - + mean_latitude_deg = ( numpy.max(grid_metadata_dict[LATITUDES_KEY]) - numpy.min(grid_metadata_dict[LATITUDES_KEY]) @@ -393,6 +400,8 @@ def _apply_convective_criterion1( numerator_matrix = numpy.sum( peakedness_matrix_dbz > peakedness_threshold_matrix_dbz, axis=-1 ) + + # TODO(thunderhoser): Should check for non-NaN, rather than non-zero. denominator_matrix = numpy.sum(this_reflectivity_matrix_dbz > 0, axis=-1) this_flag_matrix = numpy.logical_and( numerator_matrix == 0, denominator_matrix == 0 @@ -495,29 +504,32 @@ def _apply_convective_criterion3( ) -def _apply_convective_criterion4(convective_flag_matrix): +def _apply_convective_criterion4(convective_flag_matrix, min_size_pixels): """Applies criterion 4 for convective classification. - Criterion 4 states: if pixel (i, j) is marked convective but none of its - neighbours are marked convective, (i, j) is not actually convective. + Criterion 4 states: if pixel (i, j) is marked convective but is not part of + a connected region with size of >= K pixels, (i, j) is not actually + convective. :param convective_flag_matrix: M-by-N numpy array of Boolean flags (True if convective, False if not). + :param min_size_pixels: Minimum size of connected region. :return: convective_flag_matrix: Updated version of input. """ - weight_matrix = numpy.full((3, 3), 1.) - weight_matrix = weight_matrix / weight_matrix.size + region_id_matrix = label_image( + convective_flag_matrix.astype(int), structure=numpy.full((3, 3), 1.) + )[0] + num_regions = numpy.max(region_id_matrix) - average_matrix = convolve( - convective_flag_matrix.astype(float), weights=weight_matrix, - mode='constant', cval=0. - ) + for i in range(num_regions): + these_indices = numpy.where(region_id_matrix == i + 1) + if len(these_indices[0]) >= min_size_pixels: + continue - return numpy.logical_and( - convective_flag_matrix, - average_matrix > weight_matrix[0, 0] + TOLERANCE - ) + convective_flag_matrix[these_indices] = False + + return convective_flag_matrix def _apply_convective_criterion5( @@ -586,6 +598,8 @@ def find_convective_pixels(reflectivity_matrix_dbz, grid_metadata_dict, used for criterion 3. option_dict['echo_top_level_dbz'] Critical reflectivity (used to compute echo top for criterion 3). + option_dict['min_size_pixels']: Minimum connected-region size (for criterion + 4). option_dict['min_composite_refl_criterion1_dbz'] Minimum composite (column-max) reflectivity for criterion 1. This may be None. option_dict['min_composite_refl_criterion5_dbz'] Minimum composite @@ -611,6 +625,7 @@ def find_convective_pixels(reflectivity_matrix_dbz, grid_metadata_dict, halve_resolution_for_peakedness = option_dict[HALVE_RESOLUTION_KEY] min_echo_top_m_asl = option_dict[MIN_ECHO_TOP_KEY] echo_top_level_dbz = option_dict[ECHO_TOP_LEVEL_KEY] + min_size_pixels = option_dict[MIN_SIZE_KEY] min_composite_refl_criterion1_dbz = ( option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY] ) @@ -692,7 +707,8 @@ def find_convective_pixels(reflectivity_matrix_dbz, grid_metadata_dict, print('Applying criterion 4 for convective classification...') convective_flag_matrix = _apply_convective_criterion4( - convective_flag_matrix + convective_flag_matrix=convective_flag_matrix, + min_size_pixels=min_size_pixels ) print('Number of convective pixels = {0:d}'.format( @@ -793,6 +809,7 @@ def write_classifications(convective_flag_matrix, grid_metadata_dict, halve_resolution_for_peakedness = option_dict[HALVE_RESOLUTION_KEY] min_echo_top_m_asl = option_dict[MIN_ECHO_TOP_KEY] echo_top_level_dbz = option_dict[ECHO_TOP_LEVEL_KEY] + min_size_pixels = option_dict[MIN_SIZE_KEY] min_composite_refl_criterion1_dbz = ( option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY] ) @@ -818,6 +835,7 @@ def write_classifications(convective_flag_matrix, grid_metadata_dict, ) netcdf_dataset.setncattr(MIN_ECHO_TOP_KEY, min_echo_top_m_asl) netcdf_dataset.setncattr(ECHO_TOP_LEVEL_KEY, echo_top_level_dbz) + netcdf_dataset.setncattr(MIN_SIZE_KEY, min_size_pixels) netcdf_dataset.setncattr( MIN_COMPOSITE_REFL_CRITERION1_KEY, min_composite_refl_criterion1_dbz ) @@ -900,6 +918,11 @@ def read_classifications(netcdf_file_name): getattr(netcdf_dataset, MIN_COMPOSITE_REFL_AML_KEY) } + try: + option_dict[MIN_SIZE_KEY] = getattr(netcdf_dataset, MIN_SIZE_KEY) + except: + option_dict[MIN_SIZE_KEY] = 2 + if option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY] < 0: option_dict[MIN_COMPOSITE_REFL_CRITERION1_KEY] = None diff --git a/gewittergefahr/gg_utils/echo_classification_test.py b/gewittergefahr/gg_utils/echo_classification_test.py index b3ca2af7..a8542bc3 100644 --- a/gewittergefahr/gg_utils/echo_classification_test.py +++ b/gewittergefahr/gg_utils/echo_classification_test.py @@ -273,10 +273,12 @@ def test_apply_convective_criterion4_main(self): """ this_flag_matrix = echo_classifn._apply_convective_criterion4( - CRITERION3_FLAG_MATRIX) + convective_flag_matrix=CRITERION3_FLAG_MATRIX, min_size_pixels=2 + ) self.assertTrue(numpy.array_equal( - this_flag_matrix, CRITERION4_FLAG_MATRIX)) + this_flag_matrix, CRITERION4_FLAG_MATRIX + )) def test_apply_convective_criterion4_dummy(self): """Ensures correct output from _apply_convective_criterion4. @@ -286,10 +288,13 @@ def test_apply_convective_criterion4_dummy(self): """ this_flag_matrix = echo_classifn._apply_convective_criterion4( - DUMMY_CRITERION3_FLAG_MATRIX) + convective_flag_matrix=DUMMY_CRITERION3_FLAG_MATRIX, + min_size_pixels=2 + ) self.assertTrue(numpy.array_equal( - this_flag_matrix, DUMMY_CRITERION4_FLAG_MATRIX)) + this_flag_matrix, DUMMY_CRITERION4_FLAG_MATRIX + )) def test_apply_convective_criterion5(self): """Ensures correct output from _apply_convective_criterion5.""" diff --git a/gewittergefahr/scripts/run_echo_classification.py b/gewittergefahr/scripts/run_echo_classification.py index d73f41c7..c96f6466 100644 --- a/gewittergefahr/scripts/run_echo_classification.py +++ b/gewittergefahr/scripts/run_echo_classification.py @@ -27,119 +27,131 @@ MAX_PEAKEDNESS_HEIGHT_ARG_NAME = 'max_peakedness_height_m_asl' MIN_ECHO_TOP_ARG_NAME = 'min_echo_top_m_asl' ECHO_TOP_LEVEL_ARG_NAME = 'echo_top_level_dbz' +MIN_SIZE_ARG_NAME = 'min_size_pixels' MIN_COMPOSITE_REFL_CRITERION1_ARG_NAME = 'min_composite_refl_criterion1_dbz' MIN_COMPOSITE_REFL_CRITERION5_ARG_NAME = 'min_composite_refl_criterion5_dbz' MIN_COMPOSITE_REFL_AML_ARG_NAME = 'min_composite_refl_aml_dbz' RADAR_SOURCE_HELP_STRING = ( 'Source of radar data (must be accepted by ' - '`radar_utils.check_data_source`).') - + '`radar_utils.check_data_source`).' +) SPC_DATE_HELP_STRING = ( 'SPC date (format "yyyymmdd"). Echo classification will be done for all ' - 'time steps on this date.') - + 'time steps on this date.' +) TARRED_RADAR_DIR_HELP_STRING = ( '[used only if {0:s} = "{1:s}"] Name of top-level directory with tarred ' 'MYRORSS files. These files will be untarred before processing, to the ' 'directory `{2:s}`, and the untarred files will be deleted after ' 'processing.' -).format(RADAR_SOURCE_ARG_NAME, radar_utils.MYRORSS_SOURCE_ID, - RADAR_DIR_ARG_NAME) - +).format( + RADAR_SOURCE_ARG_NAME, radar_utils.MYRORSS_SOURCE_ID, RADAR_DIR_ARG_NAME +) RADAR_DIR_HELP_STRING = ( 'Name of top-level radar directory. Files therein will be found by either ' - '`myrorss_and_mrms_io.find_raw_file` or `gridrad_io.find_file`.') - + '`myrorss_and_mrms_io.find_raw_file` or `gridrad_io.find_file`.' +) OUTPUT_DIR_HELP_STRING = ( 'Name of top-level output directory. Echo classifications will be written ' 'by `echo_classification.write_classifications`, to locations therein ' - 'determined by `echo_classification.find_classification_file`.') - + 'determined by `echo_classification.find_classification_file`.' +) PEAKEDNESS_NEIGH_HELP_STRING = ( - 'Neighbourhood radius for peakedness calculations.') - + 'Neighbourhood radius for peakedness calculations.' +) MAX_PEAKEDNESS_HEIGHT_HELP_STRING = ( - 'Max height (metres above sea level) for peakedness calculations.') - + 'Max height (metres above sea level) for peakedness calculations.' +) MIN_ECHO_TOP_HELP_STRING = ( - 'Minimum echo top (metres above sea level), used for criterion 3.') - + 'Minimum echo top (metres above sea level), used for criterion 3.' +) ECHO_TOP_LEVEL_HELP_STRING = ( - 'Critical reflectivity (used to compute echo top for criterion 3).') - + 'Critical reflectivity (used to compute echo top for criterion 3).' +) +MIN_SIZE_HELP_STRING = 'Minimum connected-region size.' MIN_COMPOSITE_REFL_CRITERION1_HELP_STRING = ( 'Minimum composite (column-max) reflectivity for criterion 1. To exclude ' - 'this criterion, make the value negative.') - + 'this criterion, make the value negative.' +) MIN_COMPOSITE_REFL_CRITERION5_HELP_STRING = ( - 'Minimum composite reflectivity for criterion 5.') - + 'Minimum composite reflectivity for criterion 5.' +) MIN_COMPOSITE_REFL_AML_HELP_STRING = ( - 'Minimum composite reflectivity above melting level, used for criterion 2.') + 'Minimum composite reflectivity above melting level, used for criterion 2.' +) DEFAULT_TARRED_RADAR_DIR_NAME = '/condo/swatcommon/common/myrorss' INPUT_ARG_PARSER = argparse.ArgumentParser() INPUT_ARG_PARSER.add_argument( '--' + RADAR_SOURCE_ARG_NAME, type=str, required=True, - help=RADAR_SOURCE_HELP_STRING) - + help=RADAR_SOURCE_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + SPC_DATE_ARG_NAME, type=str, required=True, - help=SPC_DATE_HELP_STRING) - + help=SPC_DATE_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + TARRED_RADAR_DIR_ARG_NAME, type=str, required=False, - default=DEFAULT_TARRED_RADAR_DIR_NAME, help=TARRED_RADAR_DIR_HELP_STRING) - + default=DEFAULT_TARRED_RADAR_DIR_NAME, help=TARRED_RADAR_DIR_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + RADAR_DIR_ARG_NAME, type=str, required=True, - help=RADAR_DIR_HELP_STRING) - + help=RADAR_DIR_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + OUTPUT_DIR_ARG_NAME, type=str, required=True, - help=OUTPUT_DIR_HELP_STRING) - + help=OUTPUT_DIR_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + PEAKEDNESS_NEIGH_ARG_NAME, type=float, required=False, - default=echo_classifn.DEFAULT_OPTION_DICT[ - echo_classifn.PEAKEDNESS_NEIGH_KEY], - help=PEAKEDNESS_NEIGH_HELP_STRING) - + default= + echo_classifn.DEFAULT_OPTION_DICT[echo_classifn.PEAKEDNESS_NEIGH_KEY], + help=PEAKEDNESS_NEIGH_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + MAX_PEAKEDNESS_HEIGHT_ARG_NAME, type=float, required=False, - default=echo_classifn.DEFAULT_OPTION_DICT[ - echo_classifn.MAX_PEAKEDNESS_HEIGHT_KEY], - help=MAX_PEAKEDNESS_HEIGHT_HELP_STRING) - + default= + echo_classifn.DEFAULT_OPTION_DICT[echo_classifn.MAX_PEAKEDNESS_HEIGHT_KEY], + help=MAX_PEAKEDNESS_HEIGHT_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + MIN_ECHO_TOP_ARG_NAME, type=int, required=False, default=echo_classifn.DEFAULT_OPTION_DICT[echo_classifn.MIN_ECHO_TOP_KEY], - help=MIN_ECHO_TOP_HELP_STRING) - + help=MIN_ECHO_TOP_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + ECHO_TOP_LEVEL_ARG_NAME, type=float, required=False, default=echo_classifn.DEFAULT_OPTION_DICT[echo_classifn.ECHO_TOP_LEVEL_KEY], - help=ECHO_TOP_LEVEL_HELP_STRING) - + help=ECHO_TOP_LEVEL_HELP_STRING +) +INPUT_ARG_PARSER.add_argument( + '--' + MIN_SIZE_ARG_NAME, type=int, required=False, + default=echo_classifn.DEFAULT_OPTION_DICT[echo_classifn.MIN_SIZE_KEY], + help=MIN_SIZE_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + MIN_COMPOSITE_REFL_CRITERION1_ARG_NAME, type=float, required=False, default=echo_classifn.DEFAULT_OPTION_DICT[ - echo_classifn.MIN_COMPOSITE_REFL_CRITERION1_KEY], - help=MIN_COMPOSITE_REFL_CRITERION1_HELP_STRING) - + echo_classifn.MIN_COMPOSITE_REFL_CRITERION1_KEY + ], + help=MIN_COMPOSITE_REFL_CRITERION1_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + MIN_COMPOSITE_REFL_CRITERION5_ARG_NAME, type=float, required=False, default=echo_classifn.DEFAULT_OPTION_DICT[ - echo_classifn.MIN_COMPOSITE_REFL_CRITERION5_KEY], - help=MIN_COMPOSITE_REFL_CRITERION5_HELP_STRING) - + echo_classifn.MIN_COMPOSITE_REFL_CRITERION5_KEY + ], + help=MIN_COMPOSITE_REFL_CRITERION5_HELP_STRING +) INPUT_ARG_PARSER.add_argument( '--' + MIN_COMPOSITE_REFL_AML_ARG_NAME, type=float, required=False, default=echo_classifn.DEFAULT_OPTION_DICT[ - echo_classifn.MIN_COMPOSITE_REFL_AML_KEY], - help=MIN_COMPOSITE_REFL_AML_HELP_STRING) + echo_classifn.MIN_COMPOSITE_REFL_AML_KEY + ], + help=MIN_COMPOSITE_REFL_AML_HELP_STRING +) def _run_for_gridrad( @@ -211,7 +223,8 @@ def _run_for_gridrad( reflectivity_matrix_dbz=reflectivity_matrix_dbz, grid_metadata_dict=grid_metadata_dict, valid_time_unix_sec=valid_times_unix_sec[i], - option_dict=option_dict) + option_dict=option_dict + )[0] print('Number of convective pixels = {0:d}\n'.format( numpy.sum(convective_flag_matrix) @@ -388,7 +401,8 @@ def _run_for_myrorss( reflectivity_matrix_dbz=reflectivity_matrix_dbz, grid_metadata_dict=coarse_grid_metadata_dict, valid_time_unix_sec=valid_times_unix_sec[i], - option_dict=option_dict) + option_dict=option_dict + )[0] print('Number of convective pixels = {0:d}\n'.format( numpy.sum(convective_flag_matrix) @@ -434,8 +448,8 @@ def _run_for_myrorss( def _run(radar_source_name, spc_date_string, top_radar_dir_name_tarred, top_radar_dir_name, top_output_dir_name, peakedness_neigh_metres, max_peakedness_height_m_asl, min_echo_top_m_asl, echo_top_level_dbz, - min_composite_refl_criterion1_dbz, min_composite_refl_criterion5_dbz, - min_composite_refl_aml_dbz): + min_size_pixels, min_composite_refl_criterion1_dbz, + min_composite_refl_criterion5_dbz, min_composite_refl_aml_dbz): """Runs echo classification (convective vs. non-convective). This is effectively the main method. @@ -449,6 +463,7 @@ def _run(radar_source_name, spc_date_string, top_radar_dir_name_tarred, :param max_peakedness_height_m_asl: Same. :param min_echo_top_m_asl: Same. :param echo_top_level_dbz: Same. + :param min_size_pixels: Same. :param min_composite_refl_criterion1_dbz: Same. :param min_composite_refl_criterion5_dbz: Same. :param min_composite_refl_aml_dbz: Same. @@ -464,6 +479,7 @@ def _run(radar_source_name, spc_date_string, top_radar_dir_name_tarred, # radar_source_name != radar_utils.GRIDRAD_SOURCE_ID, echo_classifn.MIN_ECHO_TOP_KEY: min_echo_top_m_asl, echo_classifn.ECHO_TOP_LEVEL_KEY: echo_top_level_dbz, + echo_classifn.MIN_SIZE_KEY: min_size_pixels, echo_classifn.MIN_COMPOSITE_REFL_CRITERION1_KEY: min_composite_refl_criterion1_dbz, echo_classifn.MIN_COMPOSITE_REFL_CRITERION5_KEY: @@ -491,19 +507,26 @@ def _run(radar_source_name, spc_date_string, top_radar_dir_name_tarred, radar_source_name=getattr(INPUT_ARG_OBJECT, RADAR_SOURCE_ARG_NAME), spc_date_string=getattr(INPUT_ARG_OBJECT, SPC_DATE_ARG_NAME), top_radar_dir_name_tarred=getattr( - INPUT_ARG_OBJECT, TARRED_RADAR_DIR_ARG_NAME), + INPUT_ARG_OBJECT, TARRED_RADAR_DIR_ARG_NAME + ), top_radar_dir_name=getattr(INPUT_ARG_OBJECT, RADAR_DIR_ARG_NAME), top_output_dir_name=getattr(INPUT_ARG_OBJECT, OUTPUT_DIR_ARG_NAME), peakedness_neigh_metres=getattr( - INPUT_ARG_OBJECT, PEAKEDNESS_NEIGH_ARG_NAME), + INPUT_ARG_OBJECT, PEAKEDNESS_NEIGH_ARG_NAME + ), max_peakedness_height_m_asl=getattr( - INPUT_ARG_OBJECT, MAX_PEAKEDNESS_HEIGHT_ARG_NAME), + INPUT_ARG_OBJECT, MAX_PEAKEDNESS_HEIGHT_ARG_NAME + ), min_echo_top_m_asl=getattr(INPUT_ARG_OBJECT, MIN_ECHO_TOP_ARG_NAME), echo_top_level_dbz=getattr(INPUT_ARG_OBJECT, ECHO_TOP_LEVEL_ARG_NAME), + min_size_pixels=getattr(INPUT_ARG_OBJECT, MIN_SIZE_ARG_NAME), min_composite_refl_criterion1_dbz=getattr( - INPUT_ARG_OBJECT, MIN_COMPOSITE_REFL_CRITERION1_ARG_NAME), + INPUT_ARG_OBJECT, MIN_COMPOSITE_REFL_CRITERION1_ARG_NAME + ), min_composite_refl_criterion5_dbz=getattr( - INPUT_ARG_OBJECT, MIN_COMPOSITE_REFL_CRITERION5_ARG_NAME), + INPUT_ARG_OBJECT, MIN_COMPOSITE_REFL_CRITERION5_ARG_NAME + ), min_composite_refl_aml_dbz=getattr( - INPUT_ARG_OBJECT, MIN_COMPOSITE_REFL_AML_ARG_NAME) + INPUT_ARG_OBJECT, MIN_COMPOSITE_REFL_AML_ARG_NAME + ) )