From e6d46e529349f57343682ddfd2c55ecf29fe1b06 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Fri, 16 Feb 2024 11:50:31 +0200 Subject: [PATCH 01/31] log file i of n --- homonim/cli.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/homonim/cli.py b/homonim/cli.py index b126578..05b83be 100644 --- a/homonim/cli.py +++ b/homonim/cli.py @@ -153,6 +153,8 @@ def _nodata_cb(ctx: click.Context, param: click.Option, value): return None else: # check value is a number and can be cast to output dtype + # TODO: there is a bug here if --nodata is specified before --dtype on the CLI (ctx.params['dtype'] does not + # exist) try: value = float(value.lower()) if not rio.dtypes.can_cast_dtype(value, ctx.params['dtype']): @@ -394,6 +396,7 @@ def fuse( overwrite: bool, cmp_file: pathlib.Path, cmp_bands: Tuple[int], build_ovw: bool, proc_crs: ProcCrs, conf: pathlib.Path, param_image: bool, force_match: bool, **kwargs ): + # TODO: don't strip Examples heading underline # @formatter:off """ Correct image(s) to surface reflectance. @@ -454,9 +457,9 @@ def fuse( # iterate over and correct source file(s) try: - for src_filename in src_file: + for src_i, src_filename in enumerate(src_file): out_path = pathlib.Path(out_dir) if out_dir is not None else src_filename.parent - logger.info(f'\nCorrecting {src_filename.name}') + logger.info(f'\nCorrecting {src_filename.name} ({src_i + 1} of {len(src_file)})') with RasterFuse( src_filename, ref_file, proc_crs=proc_crs, src_bands=src_bands, ref_bands=ref_bands, force=force_match, ) as raster_fuse: # yapf: disable @@ -562,8 +565,8 @@ def compare( else src_bands ) # yapf: disable # iterate over source files, comparing with reference - for src_filename, src_bands in zip(src_file, src_bands_list): - logger.info(f'\nComparing {src_filename.name}') + for src_i, (src_filename, src_bands) in enumerate(zip(src_file, src_bands_list)): + logger.info(f'\nComparing {src_filename.name} ({src_i + 1} of {len(src_file)})') start_time = timer() with RasterCompare( src_filename, ref_file, proc_crs=proc_crs, src_bands=src_bands, ref_bands=ref_bands, force=force_match, @@ -616,8 +619,8 @@ def stats(param_files: Tuple[pathlib.Path, ...], output: pathlib.Path): meta_dict = {} # process parameter file(s), storing results - for param_filename in param_files: - logger.info(f'\nProcessing {param_filename.name}') + for param_i, param_filename in enumerate(param_files): + logger.info(f'\nProcessing {param_filename.name} ({param_i + 1} of {len(param_files)})') with ParamStats(param_filename) as param_stats: stats_dict[str(param_filename)] = param_stats.stats() meta_dict[str(param_filename)] = param_stats.metadata From 532527267cb45f22389475985b97b374a03f5b80 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Sat, 17 Feb 2024 17:09:18 +0200 Subject: [PATCH 02/31] add dytpe conversion test --- tests/test_fuse_api.py | 1 + tests/test_raster_array.py | 69 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/tests/test_fuse_api.py b/tests/test_fuse_api.py index 002d36b..74cb0c2 100644 --- a/tests/test_fuse_api.py +++ b/tests/test_fuse_api.py @@ -349,6 +349,7 @@ def test_tags(tmp_path: Path, ref_file_50cm_float): assert (yaml.safe_load(tags['FUSE_MAX_BLOCK_MEM']) == block_config['max_block_mem']) assert (yaml.safe_load(tags['FUSE_THREADS']) == block_config['threads']) + @pytest.mark.parametrize( 'src_file, ref_file, src_bands, ref_bands, force, exp_bands', [ ('file_rgb_50cm_float', 'file_rgb_100cm_float', None, None, False, (1, 2, 3)), diff --git a/tests/test_raster_array.py b/tests/test_raster_array.py index ec15cfd..75fda68 100644 --- a/tests/test_raster_array.py +++ b/tests/test_raster_array.py @@ -31,6 +31,7 @@ from homonim.errors import ImageProfileError, ImageFormatError from homonim.raster_array import RasterArray +from homonim import utils def test_read_only_properties(array_byte, profile_byte): @@ -267,3 +268,71 @@ def test_reprojection(ra_rgb_byte): assert ( reprj_ra.array[:, reprj_ra.mask].mean() == pytest.approx(ra_rgb_byte.array[:, ra_rgb_byte.mask].mean(), abs=.1) ) + + +@pytest.mark.parametrize('src_dtype, src_nodata, dst_dtype, dst_nodata', [ + ('float32', float('nan'), 'uint8', 1), + ('float32', float('nan'), 'int8', 1), + ('float32', float('nan'), 'uint16', 1), + ('float32', float('nan'), 'int16', 1), + ('float32', float('nan'), 'uint32', 1), + ('float32', float('nan'), 'int32', 1), + # ('float32', float('nan'), 'int64', 0), # overflow + ('float32', float('nan'), 'float32', float('nan')), + ('float32', float('nan'), 'float64', float('nan')), + ('float64', float('nan'), 'int32', 1), + # ('float64', float('nan'), 'int64', 1), # overflow + ('float64', float('nan'), 'float32', float('nan')), + ('float64', float('nan'), 'float64', float('nan')), + ('int64', 1, 'int32', 1), + ('int64', 1, 'int64', 1), + ('int64', 1, 'float32', float('nan')), + ('int64', 1, 'float64', float('nan')), + ('float32', float('nan'), 'float32', None), # nodata unchanged +]) +def test_convert_dtype(profile_100cm_float: dict, src_dtype: str, src_nodata: float, dst_dtype: str, dst_nodata: float): + """ Test dtype conversion with combinations covering rounding, clipping (with and w/o type promotion) and + re-masking. + """ + src_dtype_info = np.iinfo(src_dtype) if np.issubdtype(src_dtype, np.integer) else np.finfo(src_dtype) + dst_dtype_info = np.iinfo(dst_dtype) if np.issubdtype(dst_dtype, np.integer) else np.finfo(dst_dtype) + + # create array that spans the src_dtype range, includes decimals, excludes -1..1 (to allow nodata == +-1), + # and is padded with nodata + array = np.geomspace(2, src_dtype_info.max, 50, dtype=src_dtype).reshape(5, 10) + if src_dtype_info.min != 0: + array = np.concatenate((array, np.geomspace(-2, src_dtype_info.min, 50, dtype=src_dtype).reshape(5, 10))) + array = np.pad(array, (1, 1), constant_values=src_nodata) + src_ra = RasterArray( + array, crs=profile_100cm_float['crs'], transform=profile_100cm_float['transform'], nodata=src_nodata + ) + + # convert src_ra to dtype + test_ra = src_ra.copy() + test_ra.convert_dtype(dst_dtype, nodata=dst_nodata) + + # create rounded & clipped array in src_dtype to test against + test_array = array + if np.issubdtype(dst_dtype, np.integer): + test_array = np.clip(np.round(test_array), dst_dtype_info.min, dst_dtype_info.max) + elif np.issubdtype(src_dtype, np.floating): + # don't clip float but set out of range vals to +-inf (as np.astype does) + test_array[test_array < dst_dtype_info.min] = float('-inf') + test_array[test_array > dst_dtype_info.max] = float('inf') + assert np.any(test_array[src_ra.mask] % 1 != 0) # check contains decimals + + assert test_ra.dtype == dst_dtype + if dst_nodata: + assert utils.nan_equals(test_ra.nodata, dst_nodata) + assert np.any(test_ra.mask) + assert np.all(test_ra.mask == src_ra.mask) + # use approx test for case of (expected) precision loss e.g. float64->float32 or int64->float32 + assert test_ra.array[test_ra.mask] == pytest.approx(test_array[src_ra.mask], rel=1e-6) + + +def test_convert_dtype_error(ra_100cm_float: RasterArray): + """ Test dtype conversion raises an error when the nodata value cannot be cast to the conversion dtype. """ + test_ra = ra_100cm_float.copy() + with pytest.raises(ValueError) as ex: + test_ra.convert_dtype('uint8', nodata=float('nan')) + assert 'cast' in str(ex.value) From 79ee4324a54aca2db9ddb00e3625d08d2c3c4255 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Sat, 17 Feb 2024 17:13:09 +0200 Subject: [PATCH 03/31] add convert_dtype to round/clip/convert dtype in-place --- homonim/cli.py | 1 + homonim/raster_array.py | 53 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/homonim/cli.py b/homonim/cli.py index 05b83be..fcbdcd1 100644 --- a/homonim/cli.py +++ b/homonim/cli.py @@ -155,6 +155,7 @@ def _nodata_cb(ctx: click.Context, param: click.Option, value): # check value is a number and can be cast to output dtype # TODO: there is a bug here if --nodata is specified before --dtype on the CLI (ctx.params['dtype'] does not # exist) + # TODO: add test for case where --nodata cannot be cast to --dtype try: value = float(value.lower()) if not rio.dtypes.can_cast_dtype(value, ctx.params['dtype']): diff --git a/homonim/raster_array.py b/homonim/raster_array.py index 1180bc8..9224307 100644 --- a/homonim/raster_array.py +++ b/homonim/raster_array.py @@ -64,7 +64,7 @@ def __init__( transform: rasterio.transform.Affine ``array`` geo-transform. nodata: optional - A number or nan, specifying the nodata value to use for masking the array. + A number or nan, specifying the nodata value for masking the array. window: rasterio.windows.Window, optional Optional window into the transform specifying the array region. """ @@ -297,6 +297,7 @@ def proj_profile(self) -> Dict: @property def mask(self) -> numpy.ndarray: """ 2D boolean mask corresponding to valid pixels in the array. """ + # TODO: cache this and delete if nodata is changed if self._nodata is None: return np.full(self._array.shape[-2:], True) mask = ~utils.nan_equals(self._array, self._nodata) @@ -465,6 +466,54 @@ def to_file(self, filename: Union[str, pathlib.Path], driver: str = 'GTiff', **k with rio.open(filename, 'w', driver=driver, **self.profile, **kwargs) as out_im: out_im.write(self._array, indexes=range(1, self.count + 1) if self.count > 1 else 1) + def convert_dtype(self, dtype: str, nodata: Union[float, None] = None): + """ + Convert RasterArray ``dtype`` in-place, rounding and clipping values when necessary. + + Parameters + ---------- + dtype: str + Data type to convert to. + nodata: float, optional + A number or nan specifiying the nodata value for masking the converted array. Should be castable to + ``dtype``. If not specified, the array is converted as is and the ``nodata`` property left unchanged. + """ + if nodata is not None and not rio.dtypes.can_cast_dtype(nodata, dtype): + raise ValueError(f"'nodata' value: {nodata} cannot be safely cast to '{dtype}'") + + # store nodata mask if rounding, clipping or casting *might* invalidate the current mask + mask = None + unsafe_cast = not np.can_cast(self.dtype, dtype, casting='safe') + if nodata is not None and unsafe_cast: + mask = self.mask + + # round if converting from float to integer dtype + if unsafe_cast and np.issubdtype(self.dtype, np.floating) and np.issubdtype(dtype, np.integer): + np.round(self._array, out=self._array) + + # clip if converting to integer dtype with smaller range than current dtype + if unsafe_cast and np.issubdtype(dtype, np.integer): + src_info = np.iinfo(self.dtype) if np.issubdtype(self.dtype, np.integer) else np.finfo(self.dtype) + dst_info = np.iinfo(dtype) + if src_info.min < dst_info.min or src_info.max > dst_info.max: + # If necessary and possible, promote source dtype to be able to represent destination dtype exactly. + # (This works around the (undocumented?) situation where even if a clipped array contains only values + # that can be represented as the destination dtype, there is still casting overflow when these values + # are near the dtype limits e.g. float32->int32 should be promoted to float64->int32. There is no way + # of preventing this situation for float*->int64.) + self._array = self._array.astype(np.promote_types(self.dtype, dtype), copy=False) + np.clip(self._array, dst_info.min, dst_info.max, out=self._array) + + # convert dtype + self._array = self._array.astype(dtype, copy=False, casting='unsafe') + + # set new nodata value + if mask is not None: + self._array[~mask] = nodata + self._nodata = nodata + elif nodata is not None: + self.nodata = nodata + def reproject( self, crs: Optional[CRS] = None, transform: Optional[Affine] = None, shape: Optional[Tuple[int, int]] = None, nodata: float = default_nodata, dtype: str = default_dtype, resampling: Resampling = Resampling.lanczos, @@ -498,7 +547,7 @@ def reproject( """ if transform is not None and shape is None: - raise ValueError('If `transform` is specified, `shape` must also be specified') + raise ValueError('If `transform` is specified, `shape` is required') if isinstance(resampling, str): resampling = Resampling[resampling] From c049a4a15d0d140a06f5cdd6b822f89505726566 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Sat, 17 Feb 2024 17:15:11 +0200 Subject: [PATCH 04/31] round/clip/convert corrected block to raster dtype before writing --- homonim/fuse.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/homonim/fuse.py b/homonim/fuse.py index 7508494..0b3bead 100644 --- a/homonim/fuse.py +++ b/homonim/fuse.py @@ -292,7 +292,7 @@ def _out_files( def _process_block( self, block_pair: BlockPair, model: KernelModel, corr_im: DatasetWriter, - param_im: Optional[DatasetWriter] = None, + param_im: Optional[DatasetWriter] = None ): """ Thread-safe method to correct an image block to surface reflectance using the supplied correction ``model``. @@ -303,8 +303,10 @@ def _process_block( # fit and apply the sliding kernel models param_ra = model.fit(src_ra, ref_ra) corr_ra = model.apply(src_ra, param_ra) - # change the corrected nodata value so that is masked correctly for corr_im - corr_ra.nodata = corr_im.nodata + + # convert corrected dtype and change nodata value for corr_im + corr_ra.convert_dtype(corr_im.dtypes[0], nodata=corr_im.nodata) + with self._corr_lock: # write the corrected block corr_ra.to_rio_dataset(corr_im, indexes=block_pair.band_i + 1, window=block_pair.src_out_block) From 23182d9e34850f51669d79c1470f3e1b5a645980 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Tue, 20 Feb 2024 12:31:45 +0200 Subject: [PATCH 05/31] - cache mask - change to_rio_dataset() to convert dtype internally & write internal mask when nodata==None --- homonim/raster_array.py | 125 +++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 58 deletions(-) diff --git a/homonim/raster_array.py b/homonim/raster_array.py index 9224307..048b439 100644 --- a/homonim/raster_array.py +++ b/homonim/raster_array.py @@ -90,7 +90,7 @@ def __init__( raise TypeError("`transform` must be an instance of rasterio.transform.Affine") self._nodata = nodata - self._nodata_mask = None + self._mask = None @classmethod def from_profile(cls, array: Optional[np.ndarray], profile: Dict, window: Optional[Window] = None) -> 'RasterArray': @@ -229,6 +229,7 @@ def array(self) -> numpy.ndarray: def array(self, value: numpy.ndarray): if np.all(value.shape[-2:] == self._array.shape[-2:]): self._array = value + self._mask = None else: raise ValueError("'value' and 'array' shapes must match") @@ -297,20 +298,24 @@ def proj_profile(self) -> Dict: @property def mask(self) -> numpy.ndarray: """ 2D boolean mask corresponding to valid pixels in the array. """ - # TODO: cache this and delete if nodata is changed - if self._nodata is None: - return np.full(self._array.shape[-2:], True) - mask = ~utils.nan_equals(self._array, self._nodata) - if self._array.ndim > 2: - mask = np.any(mask, axis=0) - return mask + if self._mask is None: + if self._nodata is None: + self._mask = np.full(self._array.shape[-2:], True) + else: + self._mask = ~utils.nan_equals(self._array, self._nodata) + if self._array.ndim > 2: + self._mask = np.any(self._mask, axis=0) + return self._mask @mask.setter def mask(self, value: numpy.ndarray): + # TODO: allow nodata=None with a mask or use numpy masked array if self._array.ndim == 2: self._array[~value] = self._nodata else: self._array[:, ~value] = self._nodata + # force re-calculation of mask (should not set it to value as there may be other pixels == nodata) + self._mask = None @property def mask_ra(self) -> 'RasterArray': @@ -332,6 +337,7 @@ def nodata(self, value: float): # if new nodata value is None, remove the current mask # if current nodata is None, there is no mask, so just set the new nodata value and return self._nodata = value + self._mask = None elif not (utils.nan_equals(value, self._nodata)): # if the new nodata value is different to the current nodata, # set the mask area in array to the new nodata value and return @@ -341,6 +347,49 @@ def nodata(self, value: float): else: self._array[nodata_mask] = value self._nodata = value + # force re-calculation of mask (there may be pixels other than ~self.mask == new nodata value) + self._mask = None + + def _convert_array_dtype(self, dtype: str, nodata: Union[float, None] = None) -> np.array: + """ Return the image array converted to ``dtype``, rounding and clipping when necessary. Passing ``nodata`` + will set nodata areas in the returned array to this value. + """ + if nodata is not None and not rio.dtypes.can_cast_dtype(nodata, dtype): + raise ValueError(f"'nodata' value: {nodata} cannot be safely cast to '{dtype}'") + + # create a copy of the array if nodata, rounding, clipping or casting might change it + unsafe_cast = not np.can_cast(self.dtype, dtype, casting='safe') + nodata_change = nodata is not None and not utils.nan_equals(nodata, self.nodata) + array = self._array + if nodata_change or unsafe_cast: + # If necessary and possible, promote source dtype to be able to represent destination dtype exactly. + # (This works around the (undocumented?) situation where even if a clipped array contains only values + # that can be represented as the destination dtype, there is still casting overflow when these values + # are near the dtype limits e.g. float32->int32 should be promoted to float64->int32. There is no way + # of preventing this situation for float*->int64.) + array = array.astype(np.promote_types(self.dtype, dtype), copy=True) + + # round if converting from float to integer dtype + if unsafe_cast and np.issubdtype(self.dtype, np.floating) and np.issubdtype(dtype, np.integer): + np.round(array, out=array) + + # clip if converting to integer dtype with smaller range than current dtype + if unsafe_cast and np.issubdtype(dtype, np.integer): + src_info = np.iinfo(self.dtype) if np.issubdtype(self.dtype, np.integer) else np.finfo(self.dtype) + dst_info = np.iinfo(dtype) + if src_info.min < dst_info.min or src_info.max > dst_info.max: + np.clip(array, dst_info.min, dst_info.max, out=array) + + # convert dtype + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=RuntimeWarning) + array = array.astype(dtype, copy=False, casting='unsafe') + + # set nodata value if it has changed, or may be invalid after rounding, clipping and casting + if nodata_change or (nodata is not None and unsafe_cast): + array[~self.mask] = nodata + + return array def copy(self) -> 'RasterArray': """ Create a deep copy of the RasterArray. """ @@ -384,7 +433,9 @@ def to_rio_dataset( """ Write the RasterArray into a rasterio dataset. - Note that typically, the dataset bounds would encompass the RasterArray bounds. + The RasterArray mask is written as an internal mask band when the dataset's nodata is None, otherwise no mask + is written, and the RasterArray array is written as is. Note that typically, dataset bounds would encompass + the RasterArray bounds. Parameters ---------- @@ -429,6 +480,7 @@ def to_rio_dataset( f'The length of indexes ({len(indexes)}) exceeds the number of bands in the ' f'RasterArray ({self.count})' ) + # TODO: warn if nodata doesn't match (switch logger.warning -> warnings.warn at same time) if window is None: # a window defining the region in the dataset corresponding to the RasterArray extents @@ -445,7 +497,12 @@ def to_rio_dataset( f'bounds of the RasterArray ({bounded_ra.bounds})' ) - rio_dataset.write(bounded_ra.array, window=window, indexes=indexes, **kwargs) + # convert data type and write to dataset + array = bounded_ra._convert_array_dtype(rio_dataset.dtypes[0], nodata=rio_dataset.nodata) + rio_dataset.write(array, window=window, indexes=indexes, **kwargs) + if rio_dataset.nodata is None and (1 in np.array(indexes)): + # write internal mask once (for first band) if nodata is None + rio_dataset.write_mask(bounded_ra.mask, window=window) def to_file(self, filename: Union[str, pathlib.Path], driver: str = 'GTiff', **kwargs): """ @@ -466,54 +523,6 @@ def to_file(self, filename: Union[str, pathlib.Path], driver: str = 'GTiff', **k with rio.open(filename, 'w', driver=driver, **self.profile, **kwargs) as out_im: out_im.write(self._array, indexes=range(1, self.count + 1) if self.count > 1 else 1) - def convert_dtype(self, dtype: str, nodata: Union[float, None] = None): - """ - Convert RasterArray ``dtype`` in-place, rounding and clipping values when necessary. - - Parameters - ---------- - dtype: str - Data type to convert to. - nodata: float, optional - A number or nan specifiying the nodata value for masking the converted array. Should be castable to - ``dtype``. If not specified, the array is converted as is and the ``nodata`` property left unchanged. - """ - if nodata is not None and not rio.dtypes.can_cast_dtype(nodata, dtype): - raise ValueError(f"'nodata' value: {nodata} cannot be safely cast to '{dtype}'") - - # store nodata mask if rounding, clipping or casting *might* invalidate the current mask - mask = None - unsafe_cast = not np.can_cast(self.dtype, dtype, casting='safe') - if nodata is not None and unsafe_cast: - mask = self.mask - - # round if converting from float to integer dtype - if unsafe_cast and np.issubdtype(self.dtype, np.floating) and np.issubdtype(dtype, np.integer): - np.round(self._array, out=self._array) - - # clip if converting to integer dtype with smaller range than current dtype - if unsafe_cast and np.issubdtype(dtype, np.integer): - src_info = np.iinfo(self.dtype) if np.issubdtype(self.dtype, np.integer) else np.finfo(self.dtype) - dst_info = np.iinfo(dtype) - if src_info.min < dst_info.min or src_info.max > dst_info.max: - # If necessary and possible, promote source dtype to be able to represent destination dtype exactly. - # (This works around the (undocumented?) situation where even if a clipped array contains only values - # that can be represented as the destination dtype, there is still casting overflow when these values - # are near the dtype limits e.g. float32->int32 should be promoted to float64->int32. There is no way - # of preventing this situation for float*->int64.) - self._array = self._array.astype(np.promote_types(self.dtype, dtype), copy=False) - np.clip(self._array, dst_info.min, dst_info.max, out=self._array) - - # convert dtype - self._array = self._array.astype(dtype, copy=False, casting='unsafe') - - # set new nodata value - if mask is not None: - self._array[~mask] = nodata - self._nodata = nodata - elif nodata is not None: - self.nodata = nodata - def reproject( self, crs: Optional[CRS] = None, transform: Optional[Affine] = None, shape: Optional[Tuple[int, int]] = None, nodata: float = default_nodata, dtype: str = default_dtype, resampling: Resampling = Resampling.lanczos, From d41c9ad4bb4675b416427a4460a67128d30303cb Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Tue, 20 Feb 2024 12:45:01 +0200 Subject: [PATCH 06/31] change to_file() to write internal mask when nodata==None --- homonim/raster_array.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/homonim/raster_array.py b/homonim/raster_array.py index 048b439..cd0e764 100644 --- a/homonim/raster_array.py +++ b/homonim/raster_array.py @@ -431,7 +431,7 @@ def to_rio_dataset( window: Optional[Window] = None, **kwargs ): """ - Write the RasterArray into a rasterio dataset. + Write the RasterArray into a rasterio dataset, converting data types with rounding and clipping when necessary. The RasterArray mask is written as an internal mask band when the dataset's nodata is None, otherwise no mask is written, and the RasterArray array is written as is. Note that typically, dataset bounds would encompass @@ -508,6 +508,9 @@ def to_file(self, filename: Union[str, pathlib.Path], driver: str = 'GTiff', **k """ Write the RasterArray to an image file. + The RasterArray mask is written as an internal mask band when :attr:`~RasterArray.nodata` is None, + otherwise the file's nodata property is set, and the RasterArray array is written as is. + Parameters ---------- filename: str, pathlib.Path @@ -521,7 +524,9 @@ def to_file(self, filename: Union[str, pathlib.Path], driver: str = 'GTiff', **k """ with rio.Env(GDAL_NUM_THREADS='ALL_CPUs', GTIFF_FORCE_RGBA=False): with rio.open(filename, 'w', driver=driver, **self.profile, **kwargs) as out_im: - out_im.write(self._array, indexes=range(1, self.count + 1) if self.count > 1 else 1) + out_im.write(array, indexes=range(1, self.count + 1) if self.count > 1 else 1) + if out_im.nodata is None: + out_im.write_mask(self.mask) def reproject( self, crs: Optional[CRS] = None, transform: Optional[Affine] = None, shape: Optional[Tuple[int, int]] = None, From 31e05c18e5815dc143a985f1ac1e538387d8b5f1 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Tue, 20 Feb 2024 12:46:35 +0200 Subject: [PATCH 07/31] corrected block data type now converted in to_rio_dataset() --- homonim/fuse.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/homonim/fuse.py b/homonim/fuse.py index 0b3bead..d890fbb 100644 --- a/homonim/fuse.py +++ b/homonim/fuse.py @@ -130,7 +130,8 @@ def create_out_profile( dtype: str, optional Output image data type. One of: uint8|uint16|int16|uint32|int32|float32|float64. nodata: float, optional - Output image nodata value. + Output image nodata value. If set to None, an internal mask is written (recommended when + ``creation_options`` are configured for lossy, e.g. JPEG, compression). creation_options: dict, optional Driver specific creation options e.g. ``dict(compression='deflate')``. See the `GDAL docs `_ for available keys and values. @@ -304,10 +305,8 @@ def _process_block( param_ra = model.fit(src_ra, ref_ra) corr_ra = model.apply(src_ra, param_ra) - # convert corrected dtype and change nodata value for corr_im - corr_ra.convert_dtype(corr_im.dtypes[0], nodata=corr_im.nodata) - - with self._corr_lock: # write the corrected block + # write the corrected block + with self._corr_lock: corr_ra.to_rio_dataset(corr_im, indexes=block_pair.band_i + 1, window=block_pair.src_out_block) if param_im: From 54c9d207bd99e596a0222e70a69c1550b6fd7957 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Tue, 20 Feb 2024 12:47:19 +0200 Subject: [PATCH 08/31] configure rasterio environment to write internal masks --- homonim/raster_pair.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/homonim/raster_pair.py b/homonim/raster_pair.py index c1c69c8..668550e 100644 --- a/homonim/raster_pair.py +++ b/homonim/raster_pair.py @@ -142,9 +142,10 @@ def validate_image_format(im: rasterio.DatasetReader): f'Could not read image {name}. JPEG compression with NBITS==12 is not supported, ' f'you probably need to recompress this image.' ) - if 'IMAGE_STRUCTURE' in im.tag_namespaces(1) and 'NBITS' in im.tags(1, 'IMAGE_STRUCTURE'): - if im.tags(1,'IMAGE_STRUCTURE')['NBITS'] == '12': - raise twelve_bit_jpeg_error + # TODO: only throw the exception if/when a gdal/rio exception is thrown. + # if 'IMAGE_STRUCTURE' in im.tag_namespaces(1) and 'NBITS' in im.tags(1, 'IMAGE_STRUCTURE'): + # if im.tags(1,'IMAGE_STRUCTURE')['NBITS'] == '12': + # raise twelve_bit_jpeg_error try: _ = im.read(1, window=im.block_window(1, 0, 0)) @@ -297,7 +298,6 @@ def open(self): self._ref_win = utils.expand_window_to_grid(self._ref_im.window(*self._src_im.bounds)) self._src_win = utils.expand_window_to_grid(self._src_im.window(*self._ref_im.window_bounds(self._ref_win))) - def close(self): """ Close the source and reference image datasets. """ self._src_im.close() @@ -306,7 +306,11 @@ def close(self): def __enter__(self): self._stack = ExitStack() # TODO: revert to GDAL_NUM_THREADS='ALL_CPUS' when https://github.com/rasterio/rasterio/issues/2847 is resolved - self._stack.enter_context(rio.Env(GDAL_NUM_THREADS=1, GTIFF_FORCE_RGBA=False)) + self._stack.enter_context( + rio.Env(GDAL_NUM_THREADS='ALL_CPUS', GTIFF_FORCE_RGBA=False, GDAL_TIFF_INTERNAL_MASK=True) + ) + # TODO: rio is logging warnings (e.g. with 12 bit jpeg) which don't get redirected. rather do logging as in + # orthority. self._stack.enter_context(logging_redirect_tqdm([logging.getLogger(__package__)])) self.open() return self From 989129cb48918a2a966339008a5ec3643bad648b Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Tue, 20 Feb 2024 12:49:09 +0200 Subject: [PATCH 09/31] add test_to_rio_dataset_nodata_none() and test mask cacheing in test_nodata_mask() --- tests/test_fuse_api.py | 4 ++- tests/test_raster_array.py | 64 +++++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/tests/test_fuse_api.py b/tests/test_fuse_api.py index 74cb0c2..8ca6e94 100644 --- a/tests/test_fuse_api.py +++ b/tests/test_fuse_api.py @@ -124,7 +124,7 @@ def test_basic_fusion( ) ), dict( - driver='GTiff', dtype='uint8', nodata=0, + driver='GTiff', dtype='uint8', nodata=None, creation_options=dict( tiled=True, blockxsize=64, blockysize=64, compress='jpeg', interleave='pixel', photometric='ycbcr' ) @@ -139,8 +139,10 @@ def test_out_profile(file_rgb_100cm_float, tmp_path: Path, out_profile: Dict): with raster_fuse: raster_fuse.process(corr_filename, Model.gain_blk_offset, (3, 3), out_profile=out_profile) assert (corr_filename.exists()) + out_profile.update(**out_profile['creation_options']) out_profile.pop('creation_options') + with rio.open(file_rgb_100cm_float, 'r') as src_ds, rio.open(corr_filename, 'r') as fuse_ds: # test output image has been set with out_profile properties for k, v in out_profile.items(): diff --git a/tests/test_raster_array.py b/tests/test_raster_array.py index 75fda68..2ff30e5 100644 --- a/tests/test_raster_array.py +++ b/tests/test_raster_array.py @@ -24,7 +24,7 @@ import pytest import rasterio as rio from rasterio.crs import CRS -from rasterio.enums import Resampling +from rasterio.enums import Resampling, MaskFlags from rasterio.transform import Affine, from_bounds from rasterio.windows import Window from rasterio.warp import transform_bounds @@ -72,12 +72,19 @@ def test_nodata_mask(ra_byte): assert (ra_byte.mask_ra.array == mask).all() assert ra_byte.mask_ra.transform == ra_byte.transform + # test mask changed after setting altered masked array + array = ra_byte.array.copy() + array[np.divide(mask.shape, 2).astype('int')] = ra_byte.nodata + mask[np.divide(mask.shape, 2).astype('int')] = False + ra_byte.array = array + assert (ra_byte.mask == mask).all() + # test mask unchanged after setting stacked array ra_byte.array = np.stack((ra_byte.array, ra_byte.array), axis=0) assert (ra_byte.mask == mask).all() # test altering the mask - mask[np.divide(mask.shape, 2).astype('int')] = False + mask[(np.divide(mask.shape, 2) + 1).astype('int')] = False ra_byte.mask = mask assert (ra_byte.mask == mask).all() @@ -192,6 +199,23 @@ def test_to_rio_dataset(ra_byte, tmp_path: Path): assert (test_array == ra_byte.array).all() +def test_to_rio_dataset_nodata_none(ra_byte, tmp_path: Path): + """ Test writing raster array to dataset with nodata=None writes an internal mask. """ + ds_filename = tmp_path.joinpath('temp.tif') + profile = ra_byte.profile + profile.update(nodata=None) + with rio.open(ds_filename, 'w', driver='GTiff', **profile) as ds: + ra_byte.to_rio_dataset(ds) + + with rio.open(ds_filename, 'r') as ds: + assert ds.nodata is None + assert ds.mask_flag_enums[0] == [MaskFlags.per_dataset] + test_mask = ds.dataset_mask().astype('bool') + test_array = ds.read(indexes=1) + assert (test_mask == ra_byte.mask).all() + assert (test_array[test_mask] == ra_byte.array[ra_byte.mask]).all() + + def test_to_rio_dataset_crop(ra_rgb_byte, tmp_path: Path): """ Test writing a raster array to a dataset where the dataset & raster array sizes differ. """ ds_filename = tmp_path.joinpath('temp.tif') @@ -290,7 +314,7 @@ def test_reprojection(ra_rgb_byte): ('int64', 1, 'float64', float('nan')), ('float32', float('nan'), 'float32', None), # nodata unchanged ]) -def test_convert_dtype(profile_100cm_float: dict, src_dtype: str, src_nodata: float, dst_dtype: str, dst_nodata: float): +def test_convert_array_dtype(profile_100cm_float: dict, src_dtype: str, src_nodata: float, dst_dtype: str, dst_nodata: float): """ Test dtype conversion with combinations covering rounding, clipping (with and w/o type promotion) and re-masking. """ @@ -307,32 +331,36 @@ def test_convert_dtype(profile_100cm_float: dict, src_dtype: str, src_nodata: fl array, crs=profile_100cm_float['crs'], transform=profile_100cm_float['transform'], nodata=src_nodata ) - # convert src_ra to dtype - test_ra = src_ra.copy() - test_ra.convert_dtype(dst_dtype, nodata=dst_nodata) + # convert to dtype + src_copy_ra = src_ra.copy() + test_array = src_copy_ra._convert_array_dtype(dst_dtype, nodata=dst_nodata) + + # test converting did not change src_copy_ra + assert utils.nan_equals(src_copy_ra.array, src_ra.array).all() + assert (src_copy_ra.mask == src_ra.mask).all() # create rounded & clipped array in src_dtype to test against - test_array = array + ref_array = array if np.issubdtype(dst_dtype, np.integer): - test_array = np.clip(np.round(test_array), dst_dtype_info.min, dst_dtype_info.max) + ref_array = np.clip(np.round(ref_array), dst_dtype_info.min, dst_dtype_info.max) elif np.issubdtype(src_dtype, np.floating): # don't clip float but set out of range vals to +-inf (as np.astype does) - test_array[test_array < dst_dtype_info.min] = float('-inf') - test_array[test_array > dst_dtype_info.max] = float('inf') - assert np.any(test_array[src_ra.mask] % 1 != 0) # check contains decimals + ref_array[ref_array < dst_dtype_info.min] = float('-inf') + ref_array[ref_array > dst_dtype_info.max] = float('inf') + assert np.any(ref_array[src_ra.mask] % 1 != 0) # check contains decimals - assert test_ra.dtype == dst_dtype + assert test_array.dtype == dst_dtype if dst_nodata: - assert utils.nan_equals(test_ra.nodata, dst_nodata) - assert np.any(test_ra.mask) - assert np.all(test_ra.mask == src_ra.mask) + test_mask = ~utils.nan_equals(test_array, dst_nodata) + assert np.any(test_mask) + assert (test_mask == src_ra.mask).all() # use approx test for case of (expected) precision loss e.g. float64->float32 or int64->float32 - assert test_ra.array[test_ra.mask] == pytest.approx(test_array[src_ra.mask], rel=1e-6) + assert test_array[src_ra.mask] == pytest.approx(ref_array[src_ra.mask], rel=1e-6) -def test_convert_dtype_error(ra_100cm_float: RasterArray): +def test_convert_array_dtype_error(ra_100cm_float: RasterArray): """ Test dtype conversion raises an error when the nodata value cannot be cast to the conversion dtype. """ test_ra = ra_100cm_float.copy() with pytest.raises(ValueError) as ex: - test_ra.convert_dtype('uint8', nodata=float('nan')) + test_ra._convert_array_dtype('uint8', nodata=float('nan')) assert 'cast' in str(ex.value) From 5f52ced78190b6da2ee1cdfb71786cf1734207a7 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Tue, 20 Feb 2024 13:03:42 +0200 Subject: [PATCH 10/31] fix array ref --- homonim/raster_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/homonim/raster_array.py b/homonim/raster_array.py index cd0e764..1a1f316 100644 --- a/homonim/raster_array.py +++ b/homonim/raster_array.py @@ -524,7 +524,7 @@ def to_file(self, filename: Union[str, pathlib.Path], driver: str = 'GTiff', **k """ with rio.Env(GDAL_NUM_THREADS='ALL_CPUs', GTIFF_FORCE_RGBA=False): with rio.open(filename, 'w', driver=driver, **self.profile, **kwargs) as out_im: - out_im.write(array, indexes=range(1, self.count + 1) if self.count > 1 else 1) + out_im.write(self._array, indexes=range(1, self.count + 1) if self.count > 1 else 1) if out_im.nodata is None: out_im.write_mask(self.mask) From 72c382f98856e3477eecf2874aea4f2ee7307ac8 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Tue, 20 Feb 2024 13:35:49 +0200 Subject: [PATCH 11/31] change test_out_profile() for when nodata==None --- tests/test_fuse_cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_fuse_cli.py b/tests/test_fuse_cli.py index ba4646f..951978b 100644 --- a/tests/test_fuse_cli.py +++ b/tests/test_fuse_cli.py @@ -27,6 +27,7 @@ from click.testing import CliRunner from rasterio.warp import Resampling from rasterio.vrt import WarpedVRT +from rasterio.enums import MaskFlags from homonim import utils from homonim.cli import cli @@ -342,6 +343,7 @@ def test_r2_inpaint_thresh_error(runner: CliRunner, basic_fuse_cli_params: FuseC ('GTiff', 'float64', float('nan')), ('GTiff', 'uint16', 65535), ('PNG', 'uint8', 0), + ('GTiff', 'uint8', None), ] ) # yapf: disable def test_out_profile(runner: CliRunner, basic_fuse_cli_params: FuseCliParams, driver: str, dtype: str, nodata: float): @@ -357,7 +359,8 @@ def test_out_profile(runner: CliRunner, basic_fuse_cli_params: FuseCliParams, dr with rio.open(corr_file, 'r') as out_ds: assert (out_ds.driver == driver) assert (out_ds.dtypes[0] == dtype) - assert (utils.nan_equals(out_ds.nodata, nodata)) + assert out_ds.nodata is None if nodata is None else (utils.nan_equals(out_ds.nodata, nodata)) + assert out_ds.mask_flag_enums[0] == [MaskFlags.per_dataset] if nodata is None else [MaskFlags.nodata] def test_out_driver_error(runner: CliRunner, basic_fuse_cli_params: FuseCliParams): From 0ca84a1de3d0cc768c57cc1a1d49a0fd2a4677f3 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Tue, 20 Feb 2024 15:59:56 +0200 Subject: [PATCH 12/31] remove 12bit jpeg check --- homonim/raster_pair.py | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/homonim/raster_pair.py b/homonim/raster_pair.py index 668550e..472236c 100644 --- a/homonim/raster_pair.py +++ b/homonim/raster_pair.py @@ -136,26 +136,8 @@ def _validate_pair_format(src_im: rasterio.DatasetReader, ref_im: rasterio.Datas def validate_image_format(im: rasterio.DatasetReader): """ Validate an open rasterio dataset for use as a source or reference image. """ - # test for 12 bit jpegs - name = Path(im.name).name - twelve_bit_jpeg_error = errors.UnsupportedImageError( - f'Could not read image {name}. JPEG compression with NBITS==12 is not supported, ' - f'you probably need to recompress this image.' - ) - # TODO: only throw the exception if/when a gdal/rio exception is thrown. - # if 'IMAGE_STRUCTURE' in im.tag_namespaces(1) and 'NBITS' in im.tags(1, 'IMAGE_STRUCTURE'): - # if im.tags(1,'IMAGE_STRUCTURE')['NBITS'] == '12': - # raise twelve_bit_jpeg_error - - try: - _ = im.read(1, window=im.block_window(1, 0, 0)) - except Exception as ex: - if 'compress' in im.profile and im.profile['compress'] == 'jpeg': # assume it is a 12bit JPEG - raise twelve_bit_jpeg_error - else: - raise ex - # warn if there is no nodata or associated mask + name = Path(im.name).name is_masked = any([MaskFlags.all_valid not in im.mask_flag_enums[bi] for bi in range(im.count)]) if im.nodata is None and not is_masked: logger.warning( @@ -309,8 +291,6 @@ def __enter__(self): self._stack.enter_context( rio.Env(GDAL_NUM_THREADS='ALL_CPUS', GTIFF_FORCE_RGBA=False, GDAL_TIFF_INTERNAL_MASK=True) ) - # TODO: rio is logging warnings (e.g. with 12 bit jpeg) which don't get redirected. rather do logging as in - # orthority. self._stack.enter_context(logging_redirect_tqdm([logging.getLogger(__package__)])) self.open() return self From 789b4dfd6e05dfebadc9365c0787190e20e9a6d2 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Wed, 21 Feb 2024 14:29:28 +0200 Subject: [PATCH 13/31] use trhead-safe np.errstate to suppress cast warning rather than warnings.catch_warnings --- homonim/raster_array.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/homonim/raster_array.py b/homonim/raster_array.py index 1a1f316..6dff89f 100644 --- a/homonim/raster_array.py +++ b/homonim/raster_array.py @@ -381,8 +381,7 @@ def _convert_array_dtype(self, dtype: str, nodata: Union[float, None] = None) -> np.clip(array, dst_info.min, dst_info.max, out=array) # convert dtype - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=RuntimeWarning) + with np.errstate(invalid='ignore'): # ignore numpy warning for cast of nodata=nan to dtype array = array.astype(dtype, copy=False, casting='unsafe') # set nodata value if it has changed, or may be invalid after rounding, clipping and casting @@ -499,6 +498,8 @@ def to_rio_dataset( # convert data type and write to dataset array = bounded_ra._convert_array_dtype(rio_dataset.dtypes[0], nodata=rio_dataset.nodata) + # TODO: rio is raising warnings when values in the write below are to a 12 bit jpeg and they overflow the 12bit + # bound. it only does this writing in a thread (?). rio_dataset.write(array, window=window, indexes=indexes, **kwargs) if rio_dataset.nodata is None and (1 in np.array(indexes)): # write internal mask once (for first band) if nodata is None @@ -576,7 +577,7 @@ def reproject( _dst_array = np.zeros(shape, dtype=dtype) # suppress NotGeoreferencedWarning which rasterio sometimes raises incorrectly - with warnings.catch_warnings(): + with warnings.catch_warnings(): # TODO: this is not thread-safe warnings.simplefilter('ignore', category=NotGeoreferencedWarning) _, _dst_transform = reproject( From 2207a6aa5003a9c512dc3e18d10ae0ef54ac6fae Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Wed, 21 Feb 2024 14:30:06 +0200 Subject: [PATCH 14/31] add null handler to package logger --- homonim/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/homonim/__init__.py b/homonim/__init__.py index e679d5f..a727d1d 100644 --- a/homonim/__init__.py +++ b/homonim/__init__.py @@ -18,6 +18,7 @@ """ import os import pathlib +import logging from homonim.compare import RasterCompare from homonim.enums import Model, ProcCrs @@ -25,6 +26,11 @@ from homonim.kernel_model import KernelModel from homonim.stats import ParamStats +# Add a NullHandler to the package logger to hide logs by default. Applications can then add +# their own handler(s). +log = logging.getLogger(__name__) +log.addHandler(logging.NullHandler()) + if '__file__' in globals(): root_path = pathlib.Path(__file__).absolute().parents[1] else: From a1e909cbad6857aebbc76171e07f7ddbf4d19e3f Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Wed, 21 Feb 2024 14:36:34 +0200 Subject: [PATCH 15/31] redirect homonim warnings to module logger --- homonim/cli.py | 63 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 17 deletions(-) diff --git a/homonim/cli.py b/homonim/cli.py index fcbdcd1..5c576d3 100644 --- a/homonim/cli.py +++ b/homonim/cli.py @@ -21,7 +21,7 @@ import logging import pathlib import re -import sys +import warnings from timeit import default_timer as timer from typing import Tuple, Dict @@ -123,18 +123,43 @@ def _update_existing_keys(default_dict: Dict, **kwargs) -> Dict: def _configure_logging(verbosity: int): - """ Configure python logging level.""" - # adapted from rasterio https://github.com/rasterio/rasterio - log_level = max(10, 20 - 10 * verbosity) + """Configure python logging level.""" + # TODO: change logger.warning to warnings.warn where a client may want to respond to a warning. + # TODO: lose PlainInfoFormatter if not needed + + def showwarning(message, category, filename, lineno, file=None, line=None): + """Redirect orthority warnings to the source module's logger, otherwise show warning as + usual. + """ + # adapted from https://discuss.python.org/t/some-easy-and-pythonic-way-to-bind-warnings-to-loggers/14009/2 + package_root = pathlib.Path(__file__).parents[1] + file_path = pathlib.Path(filename) + try: + module_path = file_path.relative_to(package_root) + is_relative_to = True + except ValueError: + is_relative_to = False - # apply config to package logger, rather than root logger + if file is not None or not is_relative_to: + orig_show_warning(message, category, filename, lineno, file, line) + else: + module_name = module_path.with_suffix('').as_posix().replace('/', '.') + logger = logging.getLogger(module_name) + logger.warning(str(message)) + + # redirect orthority warnings to module logger + orig_show_warning = warnings.showwarning + warnings.showwarning = showwarning + + # Configure the package logger, leaving logs from dependencies on their defaults (e.g. for rasterio, they stay + # hidden). + # Adapted from rasterio: https://github.com/rasterio/rasterio/blob/main/rasterio/rio/main.py. + log_level = max(10, 20 - 10 * verbosity) pkg_logger = logging.getLogger(__package__) - formatter = PlainInfoFormatter() - handler = logging.StreamHandler(sys.stderr) - handler.setFormatter(formatter) + handler = logging.StreamHandler() + handler.setFormatter(PlainInfoFormatter()) pkg_logger.addHandler(handler) pkg_logger.setLevel(log_level) - logging.captureWarnings(True) def _threads_cb(ctx: click.Context, param: click.Option, value): @@ -270,7 +295,7 @@ def cli(verbose: int, quiet: int): # fuse command -@cloup.command(cls=FuseCommand) +@cli.command(cls=FuseCommand) # standard options @cloup.argument( 'src-file', nargs=-1, metavar='SOURCE...', type=click.Path(exists=False, dir_okay=False, path_type=pathlib.Path), @@ -380,8 +405,9 @@ def cli(verbose: int, quiet: int): click.option( '--nodata', 'nodata', type=click.STRING, callback=_nodata_cb, metavar='[NUMBER|null|nan]', default=RasterFuse.create_out_profile()['nodata'], show_default=True, - help=f'Output image nodata value. Valid for corrected images only, parameter images always use ' - f'{RasterArray.default_nodata}.' + help=f"Output image nodata value. Valid for corrected images only, parameter images always use " + f"{RasterArray.default_nodata}. If null, an internal mask is written (recommended for lossy " + f"compression)." ), click.option( '-co', '--creation-options', metavar='NAME=VALUE', multiple=True, default=(), callback=_creation_options_cb, @@ -498,11 +524,11 @@ def fuse( raise click.Abort() -cli.add_command(fuse) +# cli.add_command(fuse) # compare command -@cloup.command(cls=HomonimCommand) +@cli.command(cls=HomonimCommand) @cloup.argument( 'src-file', nargs=-1, metavar='IMAGE...', type=click.Path(exists=True, dir_okay=False, path_type=pathlib.Path), help='Path(s) to image(s) to compare with :option:`REFERENCE`.' @@ -598,10 +624,10 @@ def compare( raise click.Abort() -cli.add_command(compare) +# cli.add_command(compare) -@cloup.command(cls=HomonimCommand) +@cli.command(cls=HomonimCommand) @cloup.argument( 'param-files', nargs=-1, metavar='PARAM...', type=click.Path(exists=True, dir_okay=False, path_type=pathlib.Path), callback=_param_file_cb, help='Path(s) to parameter image(s).' @@ -644,6 +670,9 @@ def stats(param_files: Tuple[pathlib.Path, ...], output: pathlib.Path): raise click.Abort() -cli.add_command(stats) +# cli.add_command(stats) + +if __name__ == '__main__': + cli() ## From f69f9d284942a0d7e5345f29bd67b0192aebbaf4 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 12:11:35 +0200 Subject: [PATCH 16/31] doc updates for dtype rounding and clipping --- homonim/cli.py | 6 +++--- homonim/fuse.py | 5 +++-- homonim/raster_array.py | 9 ++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/homonim/cli.py b/homonim/cli.py index 5c576d3..47ee22d 100644 --- a/homonim/cli.py +++ b/homonim/cli.py @@ -124,7 +124,7 @@ def _update_existing_keys(default_dict: Dict, **kwargs) -> Dict: def _configure_logging(verbosity: int): """Configure python logging level.""" - # TODO: change logger.warning to warnings.warn where a client may want to respond to a warning. + # TODO: change logger.warning to warnings.warn where a user may want to respond to a warning. # TODO: lose PlainInfoFormatter if not needed def showwarning(message, category, filename, lineno, file=None, line=None): @@ -399,8 +399,8 @@ def cli(verbose: int, quiet: int): click.option( '--dtype', type=click.Choice(list(rio.dtypes.dtype_fwd.values())[1:8], case_sensitive=False), default=RasterFuse.create_out_profile()['dtype'], show_default=True, - help=f'Output image data type. Valid for corrected images only, parameter images always use ' - f'{RasterArray.default_dtype}.' + help=f'Output image data type. If an integer type, values are rounded and clipped to its range. Valid for ' + f'corrected images only, parameter images always use {RasterArray.default_dtype}.' ), click.option( '--nodata', 'nodata', type=click.STRING, callback=_nodata_cb, metavar='[NUMBER|null|nan]', diff --git a/homonim/fuse.py b/homonim/fuse.py index d890fbb..9e361af 100644 --- a/homonim/fuse.py +++ b/homonim/fuse.py @@ -128,12 +128,13 @@ def create_out_profile( Output format driver. See the `GDAL docs `_ for available options. dtype: str, optional - Output image data type. One of: uint8|uint16|int16|uint32|int32|float32|float64. + Output image data type. One of: uint8|uint16|int16|uint32|int32|float32|float64. Data values are rounded + and clipped to integer ``dtype`` ranges. nodata: float, optional Output image nodata value. If set to None, an internal mask is written (recommended when ``creation_options`` are configured for lossy, e.g. JPEG, compression). creation_options: dict, optional - Driver specific creation options e.g. ``dict(compression='deflate')``. + Driver specific creation options e.g. ``dict(compress='deflate')``. See the `GDAL docs `_ for available keys and values. Returns diff --git a/homonim/raster_array.py b/homonim/raster_array.py index 6dff89f..fbfbb4e 100644 --- a/homonim/raster_array.py +++ b/homonim/raster_array.py @@ -351,8 +351,8 @@ def nodata(self, value: float): self._mask = None def _convert_array_dtype(self, dtype: str, nodata: Union[float, None] = None) -> np.array: - """ Return the image array converted to ``dtype``, rounding and clipping when necessary. Passing ``nodata`` - will set nodata areas in the returned array to this value. + """ Return the image array converted to ``dtype``, rounding and clipping when ``dtype`` is integer. Passing + ``nodata`` will set nodata areas in the returned array to this value. """ if nodata is not None and not rio.dtypes.can_cast_dtype(nodata, dtype): raise ValueError(f"'nodata' value: {nodata} cannot be safely cast to '{dtype}'") @@ -432,9 +432,8 @@ def to_rio_dataset( """ Write the RasterArray into a rasterio dataset, converting data types with rounding and clipping when necessary. - The RasterArray mask is written as an internal mask band when the dataset's nodata is None, otherwise no mask - is written, and the RasterArray array is written as is. Note that typically, dataset bounds would encompass - the RasterArray bounds. + The RasterArray mask is written as an internal mask band when the ``rio_dataset`` nodata is None, otherwise + no mask is written, and the array is written as is. Parameters ---------- From 3302fb9dd4b2286138f2899ad6b37e2ce1014309 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 12:12:32 +0200 Subject: [PATCH 17/31] add to output format examples & description --- docs/background.rst | 1 - docs/cli.rst | 33 ++++++++++++++++++++++++--------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/docs/background.rst b/docs/background.rst index 0f8e875..2b0aedb 100644 --- a/docs/background.rst +++ b/docs/background.rst @@ -53,6 +53,5 @@ The minimum *kernel shape* is *model* dependent. For the two parameter :attr:`~ Kernel shape can be specified with the :option:`--kernel-shape ` option via the command line; or with the corresponding argument in the :meth:`homonim.RasterFuse.process` API. - .. |geedim| replace:: ``geedim`` .. _geedim: https://github.com/leftfield-geospatial/geedim diff --git a/docs/cli.rst b/docs/cli.rst index 10b5be4..fbca92f 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -1,8 +1,8 @@ Command line interface ----------------------- +====================== Getting started -^^^^^^^^^^^^^^^ +--------------- .. include:: ../README.rst :start-after: cli_start @@ -11,7 +11,7 @@ Getting started .. _cli_running_examples: Running examples -~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^ The examples that follow use the ``homonim`` test images. You can get these by downloading the repository directly: @@ -37,7 +37,7 @@ The commands that follow use relative paths, and should be run from *` and :option:`--ref-band ` options. @@ -122,19 +122,34 @@ Let's repeat the previous example to see how this would look. Here, we also spe Output file format -~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^ By default ``homonim`` writes output files as *GeoTIFF*\s with *DEFLATE* compression, *float32* data type and *nan* nodata value. These options are all configurable. -Here we create a corrected image in *JPEG* format, with *uint8* data type, *0* nodata value, and a *QUALITY* setting of *85*. +Here we create a *JPEG* compressed image in *GeoTIFF* format, with *uint8* data type: .. code:: shell - homonim fuse -od ./corrected --driver JPEG --dtype uint8 --nodata 0 -co QUALITY=85 ./source/ngi_rgb_byte_4.tif ./reference/sentinel2_b432_byte.tif + homonim fuse -od ./corrected --driver GTiff --dtype uint8 --nodata null -co COMPRESS=JPEG -co INTERLEAVE=PIXEL -co PHOTOMETRIC=YCBCR ./source/ngi_rgb_byte_4.tif ./reference/sentinel2_b432_byte.tif +Setting nodata to *null* forces the writing of an internal mask. This avoids lossy compression `transparency artifacts `__. JPEG compression is configured to sub-sample YCbCr colour space values with the ``-co INTERLEAVE=PIXEL -co PHOTOMETRIC=YCBCR`` creation options. + +Next, the corrected image is formatted as a 12 bit JPEG compressed GeoTIFF. A *null* nodata value is used again to write an internal mask, and the *uint16* data type gets truncated to 12 bits: + +.. code:: shell + + homonim fuse -od ./corrected --driver GTiff --dtype uint16 --nodata null -co COMPRESS=JPEG -co NBITS=12 ./source/ngi_rgb_byte_4.tif ./reference/sentinel2_b432_byte.tif + +The ``-co INTERLEAVE=PIXEL -co PHOTOMETRIC=YCBCR`` creation options could also be used with this example, to further compress the RGB image. + +.. note:: + + Support for 12 bit JPEG compression is `rasterio `__ build / package dependent. + +See the `GDAL docs `__ for available drivers and their parameters. Usage -^^^^^ +----- .. click:: homonim.cli:cli :prog: homonim From 8ace6b151678026b90605b626dabf5f7789fc393 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 12:49:30 +0200 Subject: [PATCH 18/31] add contributing page --- docs/contributing.rst | 8 ++++++++ docs/index.rst | 1 + 2 files changed, 9 insertions(+) create mode 100644 docs/contributing.rst diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 0000000..dbcc6fb --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1,8 @@ +Contributing +============ + +Bug reports and feature requests are welcome, and can be made with the `github issue tracker `__. + +If ``homonim`` is useful to you, please consider `making a donation `__ to fund its development. + +While a lack of funding shouldn't prevent you from making feature requests, funded requests will be prioritised. \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 387a69f..04a8889 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,6 +26,7 @@ Contents api advanced tutorials + contributing * :ref:`genindex` From 1b07f1ee3c7ba099e6e5a635b63e5798947bc515 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 13:52:05 +0200 Subject: [PATCH 19/31] update codecov-action --- .github/workflows/run-unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index f5dcfe5..e2bcaf9 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -49,7 +49,7 @@ jobs: python -m pytest -n auto --cov=homonim --cov-report=term-missing --cov-report=xml:coverage.xml ./tests - name: Upload coverage - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: fail_ci_if_error: false files: ./coverage.xml From a4d908972ed9f3719ed86431b3c29974f0d77eeb Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 15:28:14 +0200 Subject: [PATCH 20/31] revert to GDAL_NUM_THREADS=1 --- homonim/raster_pair.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/homonim/raster_pair.py b/homonim/raster_pair.py index 472236c..e1fed03 100644 --- a/homonim/raster_pair.py +++ b/homonim/raster_pair.py @@ -289,7 +289,7 @@ def __enter__(self): self._stack = ExitStack() # TODO: revert to GDAL_NUM_THREADS='ALL_CPUS' when https://github.com/rasterio/rasterio/issues/2847 is resolved self._stack.enter_context( - rio.Env(GDAL_NUM_THREADS='ALL_CPUS', GTIFF_FORCE_RGBA=False, GDAL_TIFF_INTERNAL_MASK=True) + rio.Env(GDAL_NUM_THREADS=1, GTIFF_FORCE_RGBA=False, GDAL_TIFF_INTERNAL_MASK=True) ) self._stack.enter_context(logging_redirect_tqdm([logging.getLogger(__package__)])) self.open() From 5d22632a029f266e8f9de440a8437f6e39b3b481 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 15:39:17 +0200 Subject: [PATCH 21/31] switch to pyproject.toml --- pyproject.toml | 51 +++++++++++++++++++++++++++++++-- setup.py | 78 -------------------------------------------------- 2 files changed, 49 insertions(+), 80 deletions(-) delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml index 1b68d94..247405a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,50 @@ +[project] +name = 'homonim' +description = 'Correct aerial and satellite imagery to surface reflectance.' +readme = 'README.rst' +requires-python = '>=3.8' +dependencies = [ + 'rasterio>=1.3.8', + 'opencv-python-headless>=4.5', + 'numpy>=1.19', + 'click>=8', + 'tqdm>=4.6', + 'pyyaml>=5.4', + 'cloup>=0.15', + 'tabulate>=0.8', +] +authors = [{name = 'Leftfield Geospatial'}] +license = {text = 'AGPL-3.0-or-later'} +keywords = [ + 'drone', 'aerial', 'satellite', 'image', 'surface reflectance', 'correction', 'harmonization', 'anisotropy', 'brdf', + 'atmospheric correction', +] +classifiers = [ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', + 'Operating System :: OS Independent', +] +dynamic = ['version'] + +[project.scripts] +homonim = 'homonim.cli:cli' + +[project.optional-dependencies] +test = ['pytest'] + +[project.urls] +Homepage = 'https://github.com/leftfield-geospatial/homonim' +Source = 'https://github.com/leftfield-geospatial/homonim' +Documentation = 'https://homonim.readthedocs.io' +Changelog = 'https://github.com/leftfield-geospatial/homonim/releases' +Issues = 'https://github.com/leftfield-geospatial/homonim/issues' + [build-system] -requires = ["setuptools>=42", "wheel"] -build-backend = "setuptools.build_meta" \ No newline at end of file +requires = ['setuptools'] +build-backend = 'setuptools.build_meta' + +[tool.setuptools] +packages = ['homonim'] + +[tool.setuptools.dynamic] +version = {attr = 'homonim.version.__version__'} diff --git a/setup.py b/setup.py deleted file mode 100644 index 838e745..0000000 --- a/setup.py +++ /dev/null @@ -1,78 +0,0 @@ -""" - Homonim: Correction of aerial and satellite imagery to surface reflectance - Copyright (C) 2021 Dugal Harris - Email: dugalh@gmail.com - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as - published by the Free Software Foundation, either version 3 of the - License, or any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . -""" -import sys -from pathlib import Path - -from setuptools import setup, find_packages - -""" - Build and upload to testpypi: - conda install -c conda-forge build twine - python -m build - python -m twine upload --repository testpypi dist/* - - Install from testpypi: - python -m pip install --extra-index-url https://test.pypi.org/simple/ homonim - - Install local development version: - pip install -e . -""" - -this_directory = Path(__file__).parent -long_description = (this_directory / 'README.rst').read_text() -sys.path[0:0] = ['homonim'] -from version import __version__ - -setup( - name='homonim', - version=__version__, - description='Correct aerial and satellite imagery to surface reflectance.', - long_description=long_description, - long_description_content_type='text/x-rst', - author='Dugal Harris', - author_email='dugalh@gmail.com', - url='https://github.com/leftfield-geospatial/homonim', - license='AGPLv3', - packages=find_packages(include=['homonim']), - install_requires=[ - 'numpy>=1.19', - 'rasterio>=1.1', - 'opencv-python-headless>=4.5', - 'click>=8', - 'tqdm>=4.6', - 'pyyaml>=5.4', - 'cloup>=0.15', - 'tabulate>=0.8', - ], - python_requires='>=3.6', - classifiers=[ - 'Programming Language :: Python :: 3', - 'License :: OSI Approved :: GNU Affero General Public License v3', - 'Operating System :: OS Independent', - ], - keywords=[ - 'drone imagery', 'aerial imagery', 'satellite imagery', 'surface reflectance', 'correction', 'harmonization', - 'anisotropy', 'brdf', 'atmospheric correction', - ], - entry_points={'console_scripts': ['homonim=homonim.cli:cli']}, - project_urls={ - 'Documentation': 'https://homonim.readthedocs.io', - 'Source': 'https://github.com/leftfield-geospatial/homonim', - }, -) # yapf: disable From ab58b6dda769be3c181c2c3ba8a3cce423190cae Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 15:39:40 +0200 Subject: [PATCH 22/31] update install instructions --- README.rst | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 4d65dc1..0a358f4 100644 --- a/README.rst +++ b/README.rst @@ -30,24 +30,21 @@ See the documentation site for more detail: https://homonim.readthedocs.io/. Installation ------------ -``homonim`` is available as a python 3 package, via ``pip`` and ``conda``. +``homonim`` is available as a python 3 package, via `pip `_ or `conda `_. -conda -~~~~~ - -Under Windows, using ``conda`` is the easiest way to resolve binary dependencies. The -`Miniconda `__ installation provides a minimal ``conda``. +pip +~~~ .. code:: shell - conda install -c conda-forge homonim + pip install homonim -pip -~~~ +conda +~~~~~ .. code:: shell - pip install homonim + conda install -c conda-forge homonim .. install_end From 5b474101aa5e61a48091840b8b8c41f48ffc9199 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 16:20:45 +0200 Subject: [PATCH 23/31] revert rasterio verion --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 247405a..c0717a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ description = 'Correct aerial and satellite imagery to surface reflectance.' readme = 'README.rst' requires-python = '>=3.8' dependencies = [ - 'rasterio>=1.3.8', + 'rasterio>=1.1', 'opencv-python-headless>=4.5', 'numpy>=1.19', 'click>=8', From 0bbb447e27a09373e18e2e8e5a4f8aa1aef03906 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 17:05:03 +0200 Subject: [PATCH 24/31] - fix nodata callback (cast to dtype is checked by rasterio) - remove command docstring bottom separator & stripping --- homonim/cli.py | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/homonim/cli.py b/homonim/cli.py index 47ee22d..7d88588 100644 --- a/homonim/cli.py +++ b/homonim/cli.py @@ -70,7 +70,6 @@ def get_help(self, ctx: click.Context): '``(.*?)``': r'\g<1>', # convert from RST '``literal``' to 'literal' ':option:`(.*?)( <.*?>)?`': r'\g<1>', # convert ':option:`--name `' to '--name' ':option:`(.*?)`': r'\g<1>', # convert ':option:`--name`' to '--name' - '-{4,}': r'', # strip '----...' '`([^<]*) <([^>]*)>`_': r'\g<1>', # convert from RST cross-ref '` <>`_' to 'name' } # yapf: disable @@ -171,25 +170,17 @@ def _threads_cb(ctx: click.Context, param: click.Option, value): return threads -def _nodata_cb(ctx: click.Context, param: click.Option, value): +def _nodata_cb(ctx: click.Context, param: click.Option, value: str): """ click callback to convert --nodata value to None, nan or float. """ # adapted from rasterio https://github.com/rasterio/rasterio if value is None or value.lower() in ['null', 'nil', 'none', 'nada']: return None else: # check value is a number and can be cast to output dtype - # TODO: there is a bug here if --nodata is specified before --dtype on the CLI (ctx.params['dtype'] does not - # exist) - # TODO: add test for case where --nodata cannot be cast to --dtype try: value = float(value.lower()) - if not rio.dtypes.can_cast_dtype(value, ctx.params['dtype']): - raise click.BadParameter( - f'{value} cannot be cast to the output image data type {ctx.params["dtype"]}', param=param, - param_hint='nodata' - ) except (TypeError, ValueError): - raise click.BadParameter(f'{value} is not a number', param=param, param_hint='nodata') + raise click.BadParameter(f'{value} is not a number', param=param, param_hint='--nodata') return value @@ -423,7 +414,6 @@ def fuse( overwrite: bool, cmp_file: pathlib.Path, cmp_bands: Tuple[int], build_ovw: bool, proc_crs: ProcCrs, conf: pathlib.Path, param_image: bool, force_match: bool, **kwargs ): - # TODO: don't strip Examples heading underline # @formatter:off """ Correct image(s) to surface reflectance. @@ -465,16 +455,8 @@ def fuse( Correct bands 2 and 3 of `source.tif`, with bands 7 and 8 of `reference.tif`, using the default correction options:: homonim fuse -sb 2 -sb 3 -rb 7 -rb 8 source.tif reference.tif - - ---- """ # @formatter:on - - # try: - # kernel_shape = utils.validate_kernel_shape(kernel_shape, model=model) - # except Exception as ex: - # raise click.BadParameter(str(ex)) - # build configuration dictionaries for RasterFuse block_config = _update_existing_keys(RasterFuse.create_block_config(), **kwargs) model_config = _update_existing_keys(RasterFuse.create_model_config(), **kwargs) @@ -577,8 +559,6 @@ def compare( Compare `source.tif` and `corrected.tif` with `reference.tif`:: homonim compare source.tif corrected.tif reference.tif - - ---- """ # build configuration dictionary From 5b6bfabaa86858d8a7c2f0fa39892920433e1772 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 19:13:46 +0200 Subject: [PATCH 25/31] logger.warning -> warnings.warn --- homonim/errors.py | 16 ++++++++++++---- homonim/matched_pair.py | 23 +++++++++++++++-------- homonim/raster_array.py | 2 +- homonim/raster_pair.py | 31 ++++++++++++++++++++----------- homonim/utils.py | 8 ++++++-- homonim/version.py | 2 +- 6 files changed, 55 insertions(+), 27 deletions(-) diff --git a/homonim/errors.py b/homonim/errors.py index 3c44ed4..2998af2 100644 --- a/homonim/errors.py +++ b/homonim/errors.py @@ -46,9 +46,17 @@ class IoError(HomonimError): """ Raised when accessing unopened file(s). """ -class BandCountMismatchWarning(UserWarning): - """ Warn when the reference and source band counts don't match. """ +class HomonimWarning(RuntimeWarning): + """ Homonim runtime warning. """ -class NodataMaskWarning(UserWarning): - """ Warn when an image has no mask or nodata values. """ +class BandMatchWarning(HomonimWarning): + """ Warn about band matching issues. """ + + +class ImageFormatWarning(HomonimWarning): + """ Warn about image format issues. """ + + +class ConfigWarning(HomonimWarning): + """ Warn about configuration issues. """ diff --git a/homonim/matched_pair.py b/homonim/matched_pair.py index bb78463..f663080 100644 --- a/homonim/matched_pair.py +++ b/homonim/matched_pair.py @@ -18,7 +18,7 @@ """ import logging from pathlib import Path -from typing import List, Tuple, Union, Dict +from typing import Tuple, Union, Dict import warnings import numpy as np @@ -27,9 +27,11 @@ from tabulate import tabulate from homonim.raster_pair import RasterPairReader from homonim.enums import ProcCrs +from homonim.errors import BandMatchWarning logger = logging.getLogger(__name__) + class MatchedPairReader(RasterPairReader): _max_rel_wavelength_diff = .1 # maximum allowed relative distance between center wavelengths for matching @@ -116,7 +118,10 @@ def _get_band_info( # warn if some but not all bands have center wavelength metadata if (bands is not None) and len(refl_bands) and (not set(bands).issubset(refl_bands)): non_refl_bands = list(set(bands).difference(refl_bands)) - logger.warning(f'User specified {name} bands contain non-reflectance band index(es) {non_refl_bands}.') + warnings.warn( + f'User specified {name} bands contain non-reflectance band index(es) {non_refl_bands}.', + category=BandMatchWarning + ) log_prefix = f'Using {name} bands ' if (bands is not None) and (len(bands) > 0): @@ -154,15 +159,17 @@ def _get_band_info( center_wavelengths[bi - 1] = std_rgb_cws[im.colorinterp[bi - 1]] log_list.append(im.colorinterp[bi - 1].name) if len(log_list) > 0: - logger.warning( - f'Assigning standard {", ".join(log_list)} center wavelengths for {name}.' + warnings.warn( + f'Assigning standard {", ".join(log_list)} center wavelengths for {name}.', + category=BandMatchWarning ) # if the image has no valid RGB colorinterp's or center wavelengths if sum(np.isnan(center_wavelengths[non_alpha_bands - 1])) == 3: # assume RGB and assign center wavelengths in that order - logger.warning( - f'Assuming image is RGB, and assigning standard center wavelengths for {name}.' + warnings.warn( + f'Assuming image is RGB, and assigning standard center wavelengths for {name}.', + category=BandMatchWarning ) center_wavelengths[non_alpha_bands - 1] = list(std_rgb_cws.values()) @@ -230,7 +237,7 @@ def _match_pair_bands( if not self._force: raise ValueError(f'{ref_name} has fewer bands than {src_name}.') else: - logger.warning(f'{ref_name} has fewer bands than {src_name}.') + warnings.warn(f'{ref_name} has fewer bands than {src_name}.', category=BandMatchWarning) match_bands = np.array([np.nan] * len(src_bands)) # match src with ref bands based on center wavelength metadata, bypassing if force==True @@ -313,7 +320,7 @@ def greedy_match(dist: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: # if there are not N unmatched ref bands, truncate unmatched to just match what we can unmatched = np.where(unmatched)[0][:len(unmatch_ref_bands)] match_bands[unmatched] = unmatch_ref_bands - logger.warning( + logger.debug( f'Matching {src_name} band(s) {list(src_bands[unmatched])} in file order with {ref_name} ' f'band(s): {list(unmatch_ref_band_names)}.' ) diff --git a/homonim/raster_array.py b/homonim/raster_array.py index fbfbb4e..48418d3 100644 --- a/homonim/raster_array.py +++ b/homonim/raster_array.py @@ -478,7 +478,7 @@ def to_rio_dataset( f'The length of indexes ({len(indexes)}) exceeds the number of bands in the ' f'RasterArray ({self.count})' ) - # TODO: warn if nodata doesn't match (switch logger.warning -> warnings.warn at same time) + # TODO: warn if nodata doesn't match dataset if window is None: # a window defining the region in the dataset corresponding to the RasterArray extents diff --git a/homonim/raster_pair.py b/homonim/raster_pair.py index e1fed03..0b329bc 100644 --- a/homonim/raster_pair.py +++ b/homonim/raster_pair.py @@ -22,6 +22,7 @@ from contextlib import ExitStack from itertools import product from typing import Tuple, NamedTuple, Union, Iterable, List +import warnings import numpy as np import rasterio @@ -31,9 +32,11 @@ from rasterio.warp import Resampling from rasterio.windows import Window from tqdm.contrib.logging import logging_redirect_tqdm +from unicodedata import category from homonim import errors, utils from homonim.enums import ProcCrs +from homonim.errors import ImageFormatWarning, BandMatchWarning, ConfigWarning from homonim.raster_array import RasterArray logger = logging.getLogger(__name__) @@ -140,13 +143,16 @@ def validate_image_format(im: rasterio.DatasetReader): name = Path(im.name).name is_masked = any([MaskFlags.all_valid not in im.mask_flag_enums[bi] for bi in range(im.count)]) if im.nodata is None and not is_masked: - logger.warning( - f'{name} has no mask or nodata value, any invalid pixels should be masked before processing.' + warnings.warn( + f'{name} has no mask or nodata value, any invalid pixels should be masked before processing.', + category=ImageFormatWarning ) # warn if not standard north-up orientation if not utils.north_up(im): - logger.warning(f'{name} will be re-projected to a standard North-up orientation.') + warnings.warn( + f'{name} will be re-projected to a standard North-up orientation.', category=ImageFormatWarning + ) validate_image_format(src_im) validate_image_format(ref_im) @@ -154,8 +160,9 @@ def validate_image_format(im: rasterio.DatasetReader): if src_im.crs.to_proj4() != ref_im.crs.to_proj4(): src_name = Path(src_im.name).name ref_name = Path(ref_im.name).name - logger.warning( - f'Source and reference image will be re-projected to the same CRS: {src_name} and {ref_name}' + warnings.warn( + f'Source and reference image will be re-projected to the same CRS: {src_name} and {ref_name}', + category=ImageFormatWarning ) def _match_pair_bands( @@ -177,9 +184,9 @@ def _match_pair_bands( ) # warn if source and reference band counts don't match if len(src_bands) != len(ref_bands): - logger.warning( + warnings.warn( f'Source and reference image non-alpha band counts don`t match. Using the first {len(src_bands)} ' - f'non-alpha bands of reference.' + f'non-alpha bands of reference.', category=BandMatchWarning ) return src_bands, ref_bands @@ -210,8 +217,9 @@ def _resolve_proc_crs( # warn if the proc_crs value does not correspond to the lowest resolution of the source and # reference images rec_crs_str = ProcCrs.ref if src_pixel_smaller else ProcCrs.src - logger.warning( - f'proc_crs={rec_crs_str} is recommended when the source pixel size is {cmp_str} than the reference.' + warnings.warn( + f'proc_crs={rec_crs_str} is recommended when the source pixel size is {cmp_str} than the reference.', + category=ConfigWarning ) return proc_crs @@ -254,8 +262,9 @@ def _auto_block_shape(self, max_block_mem: float = np.inf) -> Tuple[int, int]: # warn if the block shape in the highest res image is less than a typical tile if np.any(block_shape / mem_scale < (256, 256)) and np.any(block_shape < (proc_win.height, proc_win.width)): - logger.warning( - f'The auto block shape is small: {block_shape}. Increase `max_block_mem` to improve processing times.' + warnings.warn( + f'The auto block shape is small: {block_shape}. Increase `max_block_mem` to improve processing times.', + category=ConfigWarning ) return tuple(block_shape) diff --git a/homonim/utils.py b/homonim/utils.py index 2188a5e..5f63895 100644 --- a/homonim/utils.py +++ b/homonim/utils.py @@ -19,6 +19,7 @@ import logging import pathlib +import warnings from multiprocessing import cpu_count from typing import Tuple, Dict, Union from contextlib import contextmanager @@ -31,7 +32,7 @@ from tabulate import TableFormat, Line, DataRow, tabulate from homonim.enums import Model, ProcCrs -from homonim.errors import ImageFormatError +from homonim.errors import ImageFormatError, ConfigWarning logger = logging.getLogger(__name__) tabulate.MIN_PADDING = 0 @@ -123,7 +124,10 @@ def validate_kernel_shape(kernel_shape: Tuple[int, int], model: Model = Model.ga if np.product(kernel_shape) < 2: raise ValueError('`kernel_shape` area should contain at least 2 elements for the gain-offset model.') elif np.product(kernel_shape) < 25: - logger.warning('A `kernel_shape` of at least 25 elements is recommended for the gain-offset model.') + warnings.warn( + 'A `kernel_shape` of at least 25 elements is recommended for the gain-offset model.', + category=ConfigWarning + ) if not np.all(kernel_shape >= 1): raise ValueError('`kernel_shape` must be a minimum of one in both dimensions.') return tuple(kernel_shape) diff --git a/homonim/version.py b/homonim/version.py index 73e3bb4..abeeedb 100644 --- a/homonim/version.py +++ b/homonim/version.py @@ -1 +1 @@ -__version__ = '0.3.2' +__version__ = '0.4.0' From 9802c139fb396d1297f240c947520d5ce892a3f6 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 19:14:45 +0200 Subject: [PATCH 26/31] change test_out_nodata_error for rasterio message --- tests/test_fuse_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_fuse_cli.py b/tests/test_fuse_cli.py index 951978b..292222c 100644 --- a/tests/test_fuse_cli.py +++ b/tests/test_fuse_cli.py @@ -384,7 +384,7 @@ def test_out_nodata_error(runner: CliRunner, basic_fuse_cli_params: FuseCliParam cli_str = basic_fuse_cli_params.cli_str + f' --dtype uint8 --nodata nan' result = runner.invoke(cli, cli_str.split()) assert (result.exit_code != 0) - assert ('Invalid value' in result.output) + assert ('nodata' in result.output and 'data type' in result.output) def test_creation_options(runner: CliRunner, basic_fuse_cli_params: FuseCliParams): From 3d689789ce99afd720d490411d7210386267d241 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 20:59:05 +0200 Subject: [PATCH 27/31] add nodata warning and move NotGeoreferencedWarning suppression to __init__ --- homonim/raster_array.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/homonim/raster_array.py b/homonim/raster_array.py index 48418d3..021c50b 100644 --- a/homonim/raster_array.py +++ b/homonim/raster_array.py @@ -35,7 +35,7 @@ from rasterio.errors import NotGeoreferencedWarning from homonim import utils -from homonim.errors import ImageProfileError, ImageFormatError +from homonim.errors import ImageProfileError, ImageFormatError, ImageFormatWarning logger = logging.getLogger(__name__) @@ -478,7 +478,13 @@ def to_rio_dataset( f'The length of indexes ({len(indexes)}) exceeds the number of bands in the ' f'RasterArray ({self.count})' ) - # TODO: warn if nodata doesn't match dataset + if rio_dataset.nodata is not None and ( + self.nodata is None or not utils.nan_equals(self.nodata, rio_dataset.nodata) + ): + warnings.warn( + f"The dataset nodata: {rio_dataset.nodata} does not match the RasterArray nodata: {self.nodata}", + category=ImageFormatWarning + ) if window is None: # a window defining the region in the dataset corresponding to the RasterArray extents @@ -498,7 +504,7 @@ def to_rio_dataset( # convert data type and write to dataset array = bounded_ra._convert_array_dtype(rio_dataset.dtypes[0], nodata=rio_dataset.nodata) # TODO: rio is raising warnings when values in the write below are to a 12 bit jpeg and they overflow the 12bit - # bound. it only does this writing in a thread (?). + # bound. it only does this when writing in a thread (?). rio_dataset.write(array, window=window, indexes=indexes, **kwargs) if rio_dataset.nodata is None and (1 in np.array(indexes)): # write internal mask once (for first band) if nodata is None @@ -575,15 +581,11 @@ def reproject( else: _dst_array = np.zeros(shape, dtype=dtype) - # suppress NotGeoreferencedWarning which rasterio sometimes raises incorrectly - with warnings.catch_warnings(): # TODO: this is not thread-safe - warnings.simplefilter('ignore', category=NotGeoreferencedWarning) - - _, _dst_transform = reproject( - self._array, destination=_dst_array, src_crs=self._crs, src_transform=self._transform, - src_nodata=self._nodata, dst_crs=crs, dst_transform=transform, dst_nodata=nodata, - num_threads=multiprocessing.cpu_count(), resampling=resampling, **kwargs - ) + _, _dst_transform = reproject( + self._array, destination=_dst_array, src_crs=self._crs, src_transform=self._transform, + src_nodata=self._nodata, dst_crs=crs, dst_transform=transform, dst_nodata=nodata, + num_threads=multiprocessing.cpu_count(), resampling=resampling, **kwargs + ) return RasterArray(_dst_array, crs=crs, transform=_dst_transform, nodata=nodata) From ced600f29c132ac5119250aa6185044923a254b0 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 21:00:31 +0200 Subject: [PATCH 28/31] suppress NotGeoreferencedWarning globally for thread safety --- homonim/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/homonim/__init__.py b/homonim/__init__.py index a727d1d..f0723ff 100644 --- a/homonim/__init__.py +++ b/homonim/__init__.py @@ -19,13 +19,18 @@ import os import pathlib import logging +import warnings +from rasterio.errors import NotGeoreferencedWarning from homonim.compare import RasterCompare from homonim.enums import Model, ProcCrs from homonim.fuse import RasterFuse from homonim.kernel_model import KernelModel from homonim.stats import ParamStats +# suppress NotGeoreferencedWarning which rasterio can raise incorrectly +warnings.simplefilter('ignore', category=NotGeoreferencedWarning) + # Add a NullHandler to the package logger to hide logs by default. Applications can then add # their own handler(s). log = logging.getLogger(__name__) From 4aed31de8a92e85e6e9257ac385ecd811eead80f Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 21:00:58 +0200 Subject: [PATCH 29/31] remove completed to dos --- homonim/cli.py | 1 - homonim/fuse.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/homonim/cli.py b/homonim/cli.py index 7d88588..77369e2 100644 --- a/homonim/cli.py +++ b/homonim/cli.py @@ -123,7 +123,6 @@ def _update_existing_keys(default_dict: Dict, **kwargs) -> Dict: def _configure_logging(verbosity: int): """Configure python logging level.""" - # TODO: change logger.warning to warnings.warn where a user may want to respond to a warning. # TODO: lose PlainInfoFormatter if not needed def showwarning(message, category, filename, lineno, file=None, line=None): diff --git a/homonim/fuse.py b/homonim/fuse.py index 9e361af..9f53b38 100644 --- a/homonim/fuse.py +++ b/homonim/fuse.py @@ -142,10 +142,9 @@ def create_out_profile( dict Output image profile. """ - # TODO: add compress_overview='auto' when gdal >=3.6 creation_options = creation_options or dict( tiled=True, blockxsize=512, blockysize=512, compress='deflate', interleave='band', photometric='minisblack', - bigtiff='if_safer', + bigtiff='if_safer' ) return dict(driver=driver, dtype=dtype, nodata=nodata, creation_options=creation_options) From 8429284eb260a453efc0b505b0245d579d93cda2 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 21:11:24 +0200 Subject: [PATCH 30/31] bump version --- .github/workflows/install-test-conda-forge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/install-test-conda-forge.yml b/.github/workflows/install-test-conda-forge.yml index 32382db..883068e 100644 --- a/.github/workflows/install-test-conda-forge.yml +++ b/.github/workflows/install-test-conda-forge.yml @@ -32,7 +32,7 @@ jobs: - name: Install package run: | conda info - conda install homonim>=0.3.2 + conda install homonim>=0.4.0 conda list - name: Run CLI fusion test From 63d0954fba6ebea61774a10573bdd1fc987abc52 Mon Sep 17 00:00:00 2001 From: Dugal Harris Date: Thu, 22 Feb 2024 21:11:53 +0200 Subject: [PATCH 31/31] change opencv-python -> opencv-python-headless --- .github/workflows/run-integration-tests.yml | 2 +- .github/workflows/run-unit-tests.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-integration-tests.yml b/.github/workflows/run-integration-tests.yml index ccad6cd..dc8af58 100644 --- a/.github/workflows/run-integration-tests.yml +++ b/.github/workflows/run-integration-tests.yml @@ -30,7 +30,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install pytest pytest-xdist - python -m pip install rasterio opencv-python numpy click tqdm pyyaml cloup tabulate + python -m pip install rasterio opencv-python-headless numpy click tqdm pyyaml cloup tabulate - name: Test with pytest timeout-minutes: 5 diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml index e2bcaf9..0fb2362 100644 --- a/.github/workflows/run-unit-tests.yml +++ b/.github/workflows/run-unit-tests.yml @@ -34,7 +34,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 pytest pytest-xdist pytest-cov - python -m pip install rasterio opencv-python numpy tabulate click tqdm pyyaml cloup + python -m pip install rasterio opencv-python-headless numpy tabulate click tqdm pyyaml cloup - name: Lint with flake8 run: |