Skip to content

Commit

Permalink
Merge pull request natcap#365 from davemfish/bugfix/363-strahler-d8
Browse files Browse the repository at this point in the history
Bugfix for nodata handling in `extract_strahler_streams_d8`
  • Loading branch information
phargogh authored Jan 30, 2024
2 parents c552799 + 6407a43 commit 26aa8fa
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 22 deletions.
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ Unreleased Changes
environment variables), the GDAL cache max is checked against the amount of
memory available on the compute node. If GDAL may exceed the available slurm
memory, a warning is issued or logged. https://github.com/natcap/pygeoprocessing/issues/361
* Fixed an issue in ``extract_strahler_streams_d8`` where a nodata pixel
could be mistakenly treated as a stream seed point, ultimately creating
a stream feature with no geometry.
https://github.com/natcap/pygeoprocessing/issues/361

2.4.2 (2023-10-24)
------------------
Expand Down
26 changes: 16 additions & 10 deletions src/pygeoprocessing/routing/routing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ cdef struct MFDFlowPixelType:
double sum_of_weights

# used when constructing geometric streams, the x/y coordinates represent
# a seed point to walk upstream from, the upstream_flow_dir indicates the
# a seed point to walk upstream from, the upstream_d8_dir indicates the
# d8 flow direction to walk and the source_id indicates the source stream it
# spawned from
cdef struct StreamConnectivityPoint:
Expand Down Expand Up @@ -3197,8 +3197,6 @@ def extract_strahler_streams_d8(
from the upstream to downstream component of this stream
segment.
* "outlet" (int): 1 if this segment is an outlet, 0 if not.
* "river_id": unique ID among all stream segments which are
hydrologically connected.
* "us_fa" (int): flow accumulation value at the upstream end of
the stream segment.
* "ds_fa" (int): flow accumulation value at the downstream end of
Expand Down Expand Up @@ -3291,6 +3289,9 @@ def extract_strahler_streams_d8(
cdef int flow_nodata = pygeoprocessing.get_raster_info(
flow_dir_d8_raster_path_band[0])['nodata'][
flow_dir_d8_raster_path_band[1]-1]
cdef double flow_accum_nodata = pygeoprocessing.get_raster_info(
flow_accum_raster_path_band[0])['nodata'][
flow_accum_raster_path_band[1]-1]

# D8 flow directions encoded as
# 321
Expand Down Expand Up @@ -3320,7 +3321,7 @@ def extract_strahler_streams_d8(
# this array is filled out as upstream directions are calculated and
# indexed by `upstream_count`
cdef int *upstream_dirs = [0, 0, 0, 0, 0, 0, 0, 0]
cdef unsigned long local_flow_accum
cdef double local_flow_accum
# used to determine if source is a drain and should be tracked
cdef int is_drain

Expand Down Expand Up @@ -3354,16 +3355,17 @@ def extract_strahler_streams_d8(
is_drain = 0
x_l = xoff + i
y_l = yoff + j
local_flow_accum = <long>flow_accum_managed_raster.get(
local_flow_accum = <double>flow_accum_managed_raster.get(
x_l, y_l)
if local_flow_accum < min_flow_accum_threshold:
if (local_flow_accum < min_flow_accum_threshold or
_is_close(local_flow_accum,
flow_accum_nodata, 1e-8, 1e-5)):
continue
# check to see if it's a drain
d_n = <int>flow_dir_managed_raster.get(x_l, y_l)
x_n = x_l + D8_XOFFSET[d_n]
y_n = y_l + D8_YOFFSET[d_n]

if (x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_cols or
if (x_n < 0 or y_n < 0 or x_n >= n_cols or y_n >= n_rows or
<int>flow_dir_managed_raster.get(
x_n, y_n) == flow_nodata):
is_drain = 1
Expand All @@ -3385,7 +3387,7 @@ def extract_strahler_streams_d8(
if d_n == flow_nodata:
continue
if (D8_REVERSE_DIRECTION[d] == d_n and
<long>flow_accum_managed_raster.get(
<double>flow_accum_managed_raster.get(
x_n, y_n) >= min_flow_accum_threshold):
upstream_dirs[upstream_count] = d
upstream_count += 1
Expand Down Expand Up @@ -3434,6 +3436,10 @@ def extract_strahler_streams_d8(
flow_accum_managed_raster, flow_dir_managed_raster, flow_nodata,
min_flow_accum_threshold, coord_to_stream_ids)
if payload is None:
LOGGER.debug(
f'no geometry for source point at '
f'{source_stream_point.xi}, {source_stream_point.yi} '
f'upstream direction: {source_stream_point.upstream_d8_dir}')
continue
x_u, y_u, ds_x_1, ds_y_1, upstream_id_list, stream_line = payload

Expand Down Expand Up @@ -4871,7 +4877,7 @@ cdef _calculate_stream_geometry(
def _delete_feature(
stream_feature, stream_layer, upstream_to_downstream_id,
downstream_to_upstream_ids):
"""Helper for Mahler extraction to delete all references to a stream.
"""Helper for Strahler extraction to delete all references to a stream.
Args:
stream_feature (ogr.Feature): feature to delete
Expand Down
29 changes: 17 additions & 12 deletions tests/test_routing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,19 +1131,26 @@ def test_flow_dir_mfd_plateau(self):
flow_dir_array[1:-1, 1: -1], flow_dir_nodata).any(),
'all flow directions should be defined')

def test_extract_straher_streams_watersheds_d8(self):
def test_extract_strahler_streams_watersheds_d8(self):
"""PGP.routing: test Strahler stream and subwatershed creation."""
# make a long canyon herringbone style DEM that will have a main
# central river and single pixel tributaries every other pixel to
# the west and east as one steps south the canyon
# the west and east as one steps north/south through the canyon
target_nodata = -1
n = 53
dem_array = numpy.zeros((n, 3))
dem_array[0, 1] = -0.5
# make notches every other row for both columns
dem_array[1::2, 0::2] = 1
# near the downstream end, set values in such a way that a nodata
# pixel would otherwise be treated as a stream seed point if nodata
# is not properly masked.
dem_array[1, 1] = -0.5 # a drain
dem_array[0, :] = 1 # two high points that drain into nodata pixel
dem_array[0, 1] = target_nodata

dem_path = os.path.join(self.workspace_dir, 'dem.tif')
pygeoprocessing.numpy_array_to_raster(
dem_array, -1, (1, -1), (0, 0), None, dem_path)
dem_array, target_nodata, (1, -1), (0, 0), None, dem_path)

filled_pits_path = os.path.join(self.workspace_dir, 'filled_pits.tif')
pygeoprocessing.routing.fill_pits(
Expand All @@ -1168,11 +1175,10 @@ def test_extract_straher_streams_watersheds_d8(self):
autotune_flow_accumulation=False,
min_flow_accum_threshold=1)

# every pixel is a stream
stream_vector = gdal.OpenEx(
no_autotune_stream_vector_path, gdal.OF_VECTOR)
stream_layer = stream_vector.GetLayer()
self.assertEqual(stream_layer.GetFeatureCount(), n*2+2)
self.assertEqual(stream_layer.GetFeatureCount(), n*2+1)
stream_layer = None
stream_vector = None

Expand All @@ -1186,11 +1192,10 @@ def test_extract_straher_streams_watersheds_d8(self):
autotune_flow_accumulation=True,
min_flow_accum_threshold=2)

# n-1 streams
stream_vector = gdal.OpenEx(
autotune_stream_vector_path, gdal.OF_VECTOR)
stream_layer = stream_vector.GetLayer()
self.assertEqual(stream_layer.GetFeatureCount(), n-2)
self.assertEqual(stream_layer.GetFeatureCount(), n-3)

# this gets just the single outlet feature
stream_layer.SetAttributeFilter(f'"outlet"=1')
Expand All @@ -1212,8 +1217,9 @@ def test_extract_straher_streams_watersheds_d8(self):
watershed_confluence_vector_path, gdal.OF_VECTOR)
watershed_layer = watershed_vector.GetLayer()
# there should be exactly an integer half number of watersheds as
# the length of the canyon
self.assertEqual(watershed_layer.GetFeatureCount(), n//2)
# the length of the canyon; -1 for the special configuration
# around the nodata pixel.
self.assertEqual(watershed_layer.GetFeatureCount(), n//2 - 1)
watershed_vector = None
watershed_layer = None

Expand All @@ -1226,8 +1232,7 @@ def test_extract_straher_streams_watersheds_d8(self):
watershed_vector = gdal.OpenEx(
watershed_confluence_vector_path, gdal.OF_VECTOR)
watershed_layer = watershed_vector.GetLayer()
# every stream should have a watershed
self.assertEqual(watershed_layer.GetFeatureCount(), n-2)
self.assertEqual(watershed_layer.GetFeatureCount(), n-4)
watershed_vector = None
watershed_layer = None

Expand Down

0 comments on commit 26aa8fa

Please sign in to comment.