diff --git a/docs/source/swn.rst b/docs/source/swn.rst index 84b7eb1..ee2d560 100644 --- a/docs/source/swn.rst +++ b/docs/source/swn.rst @@ -49,6 +49,7 @@ Methods SurfaceWaterNetwork.gather_segnums SurfaceWaterNetwork.locate_geoms SurfaceWaterNetwork.aggregate + SurfaceWaterNetwork.coarsen SurfaceWaterNetwork.remove SurfaceWaterNetwork.evaluate_upstream_length SurfaceWaterNetwork.evaluate_upstream_area diff --git a/swn/core.py b/swn/core.py index 452df1d..367f745 100644 --- a/swn/core.py +++ b/swn/core.py @@ -1344,6 +1344,9 @@ def aggregate(self, segnums, follow_up="upstream_length"): segnums that flow into 'agg_path'. Also 'from_segnums' is updated to reflect the uppermost segment. + See Also + -------- + coarsen : Coarsen stream networks to a higher stream order. """ from shapely.ops import linemerge, unary_union @@ -1524,6 +1527,176 @@ def up_path_headwater_segnums(segnum): na.segments["agg_unpath"] = agg_unpath return na + def coarsen(self, level: int): + r"""Coarsen stream network to a minimum stream order level. + + Parameters + ---------- + level : int + Minimum stream order level, e.g. 3 to coarsen to 3rd order streams. + + Returns + ------- + SurfaceWaterNetwork + With the returned :py:attr:`segments` attribute, a `traced_segnums` + column containing a segnum list from the original network. + If the original network has catchment polygons, the result will + aggregate the relevant catchment polygons, listed with a + `catchment_segnums` column. + + Examples + -------- + >>> import swn + >>> from shapely import wkt + >>> lines = geopandas.GeoSeries(list(wkt.loads('''\ + ... MULTILINESTRING( + ... (380 490, 370 420), (300 460, 370 420), (370 420, 420 330), + ... (190 250, 280 270), (225 180, 280 270), (280 270, 420 330), + ... (420 330, 584 250), (520 220, 584 250), (584 250, 710 160), + ... (740 270, 710 160), (735 350, 740 270), (880 320, 740 270), + ... (925 370, 880 320), (974 300, 880 320), (760 460, 735 350), + ... (650 430, 735 350), (710 160, 770 100), (700 90, 770 100), + ... (770 100, 820 40))''').geoms)) + >>> lines.index += 100 + >>> n = swn.SurfaceWaterNetwork.from_lines(lines) + >>> n2 = n.coarsen(2) + >>> n2.segments[["stream_order", "traced_segnums"]] + stream_order traced_segnums + 102 2 [102] + 105 2 [105] + 108 3 [106, 108] + 109 3 [109] + 110 2 [110] + 111 2 [111] + 118 4 [116, 118] + + See Also + -------- + aggregate : Aggregate segments (and catchments) to a coarser network of + segnums. + remove : Remove segments. + """ + from shapely.ops import linemerge, unary_union + + # filter to a minimum stream order + sel_level = self.segments.stream_order >= level + if not sel_level.any(): + raise ValueError(f"no segments found with {level=} or higher") + sel_to_segnums = self.segments.loc[ + (self.segments["to_segnum"] != self.END_SEGNUM) & sel_level, "to_segnum" + ] + sel_from_segnums = ( + sel_to_segnums.to_frame(0) + .reset_index() + .groupby(0) + .aggregate(set) + .iloc[:, 0] + ).astype(object) + sel_from_segnums.index.name = self.segments.index.name + sel_from_segnums.name = "from_segnums" + # similar to "headwater" + sel_index = sel_level.index[sel_level] + sel_start = sel_index[ + ~sel_index.isin(self.segments.loc[sel_level, "to_segnum"]) + ] + + # trace lines down from each start to outlets + all_traced_segnums = set() + traced_segnums_l = list() + traced_segnums_d = dict() + + def trace_down(segnum): + traced_segnums_l.append(segnum) + down_segnum = sel_to_segnums.get(segnum) + if down_segnum is None or len(sel_from_segnums[down_segnum]) > 1: + # find outlet or before confluence with more than 1 branch + traced_segnums_d[segnum] = traced_segnums_l[:] + traced_segnums_l.clear() + if down_segnum is not None and down_segnum not in all_traced_segnums: + all_traced_segnums.add(segnum) + trace_down(down_segnum) + + for segnum in sel_start: + trace_down(segnum) + + # Convert traced_segnums_d to a Series + traced_segnums = pd.Series( + traced_segnums_d.values(), + index=pd.Index( + traced_segnums_d.keys(), + dtype=self.segments.index.dtype, + name=self.segments.index.name, + ), + ) + # Make index order similar to original + if self.segments.index.is_monotonic_increasing: + traced_segnums.sort_index(inplace=True) + else: + traced_segnums = traced_segnums.reindex( + self.segments.index[self.segments.index.isin(traced_segnums.index)] + ) + + self.logger.debug( + "traced down %d initial junctions to assemble %d traced segnums", + len(sel_start), + len(traced_segnums), + ) + + # Merge lines + lines_l = [] + for segnums in traced_segnums.values: + lines_l.append(linemerge(self.segments.geometry[segnums].to_list())) + lines_gs = geopandas.GeoSeries( + lines_l, + index=traced_segnums.index, + crs=self.segments.crs, + ) + + if self.catchments is None: + catchments_gs = None + else: + # Negate selections + nsel_to_segnums = self.segments.loc[ + (self.segments["to_segnum"] != self.END_SEGNUM) & ~sel_level, + "to_segnum", + ] + nsel_from_segnums = ( + nsel_to_segnums.to_frame(0) + .reset_index() + .groupby(0) + .aggregate(set) + .iloc[:, 0] + ).astype(object) + nsel_from_segnums.index.name = self.segments.index.name + nsel_from_segnums.name = "from_segnums" + + def up_nsel_patch_segnums(segnum): + yield segnum + for up_segnum in nsel_from_segnums.get(segnum, []): + yield from up_nsel_patch_segnums(up_segnum) + + catchments_l = [] + catchment_segnums_l = [] + for segnums in traced_segnums.values: + catchment_segnums = [] + for up_segnum in segnums: + catchment_segnums += list(up_nsel_patch_segnums(up_segnum)) + catchment_geom = unary_union( + self.catchments.geometry[catchment_segnums].to_list() + ) + catchments_l.append(catchment_geom) + catchment_segnums_l.append(catchment_segnums) + catchments_gs = geopandas.GeoSeries( + catchments_l, index=lines_gs.index, crs=self.catchments.crs + ) + + nc = SurfaceWaterNetwork.from_lines(lines_gs, catchments_gs) + nc.segments["traced_segnums"] = traced_segnums + if self.catchments is not None: + nc.segments["catchment_segnums"] = catchment_segnums_l + nc.segments["stream_order"] += level - 1 + return nc + def accumulate_values(self, values): """Accumulate values down the stream network. @@ -1994,6 +2167,8 @@ def remove(self, condition=False, segnums=[]): See Also -------- + coarsen : Coarsen stream networks to a higher stream order, + removing lower orders. evaluate_upstream_length : Re-evaluate upstream length. evaluate_upstream_area : Re-evaluate upstream catchment area. diff --git a/tests/test_basic.py b/tests/test_basic.py index cc12d1d..d156da9 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -2,6 +2,7 @@ from textwrap import dedent import geopandas +import geopandas.testing import numpy as np import pandas as pd import pytest @@ -1538,6 +1539,85 @@ def test_aggregate_fluss_coarse(fluss_n): ) +def test_coarsen_fluss(fluss_n): + # level 1 gets back the same network, but with 'traced_segnums' + nc1 = fluss_n.coarsen(1) + assert len(nc1) == 19 + assert nc1.catchments is None + pd.testing.assert_frame_equal( + fluss_n.segments, nc1.segments.drop(columns="traced_segnums") + ) + pd.testing.assert_series_equal( + nc1.segments["traced_segnums"], + nc1.segments.index.to_series().apply(lambda x: [x]), + check_names=False, + ) + + # coarsen to level 2 + nc2 = fluss_n.coarsen(2) + expected_nc2 = geopandas.GeoDataFrame( + { + "to_segnum": [8, 8, 18, 18, 9, 9, 0], + "from_segnums": [set(), set(), {2, 5}, {10, 11}, set(), set(), {8, 9}], + "stream_order": [2, 2, 3, 3, 2, 2, 4], + "traced_segnums": [[2], [5], [6, 8], [9], [10], [11], [16, 18]], + }, + geometry=geopandas.GeoSeries.from_wkt( + [ + "LINESTRING (370 420, 420 330)", + "LINESTRING (280 270, 420 330)", + "LINESTRING (420 330, 584 250, 710 160)", + "LINESTRING (740 270, 710 160)", + "LINESTRING (735 350, 740 270)", + "LINESTRING (880 320, 740 270)", + "LINESTRING (710 160, 770 100, 820 40)", + ], + ), + ).set_index(pd.Index([2, 5, 8, 9, 10, 11, 18])) + cols = ["geometry", "to_segnum", "from_segnums", "stream_order", "traced_segnums"] + geopandas.testing.assert_geodataframe_equal(nc2.segments[cols], expected_nc2[cols]) + + # coarsen to level 3 + nc3 = fluss_n.coarsen(3) + expected_nc3 = geopandas.GeoDataFrame( + { + "to_segnum": [18, 18, 0], + "from_segnums": [set(), set(), {8, 9}], + "stream_order": [3, 3, 4], + "traced_segnums": [[6, 8], [9], [16, 18]], + }, + geometry=geopandas.GeoSeries.from_wkt( + [ + "LINESTRING (420 330, 584 250, 710 160)", + "LINESTRING (740 270, 710 160)", + "LINESTRING (710 160, 770 100, 820 40)", + ], + ), + ).set_index(pd.Index([8, 9, 18])) + geopandas.testing.assert_geodataframe_equal(nc3.segments[cols], expected_nc3[cols]) + + # coarsen to level 4 + nc4 = fluss_n.coarsen(4) + expected_nc4 = geopandas.GeoDataFrame( + { + "to_segnum": [0], + "from_segnums": [set()], + "stream_order": [4], + "traced_segnums": [[16, 18]], + }, + geometry=geopandas.GeoSeries.from_wkt( + [ + "LINESTRING (710 160, 770 100, 820 40)", + ], + ), + ).set_index(pd.Index([18])) + geopandas.testing.assert_geodataframe_equal(nc4.segments[cols], expected_nc4[cols]) + + # can't coarsen to level 5 + with pytest.raises(ValueError, match="no segments found"): + fluss_n.coarsen(5) + + def test_adjust_elevation_profile_errors(valid_n): with pytest.raises(ValueError, match="min_slope must be greater than zero"): valid_n.adjust_elevation_profile(0.0) diff --git a/tests/test_complex.py b/tests/test_complex.py index cd4fc40..7cef3b4 100644 --- a/tests/test_complex.py +++ b/tests/test_complex.py @@ -1,6 +1,8 @@ from textwrap import dedent import numpy as np +import pandas as pd +import pytest import swn @@ -131,3 +133,57 @@ def test_catchment_polygons(coastal_lines_gdf, coastal_polygons_gdf): if matplotlib: _ = n.plot() plt.close() + + +def test_coarsen_coastal_swn_w_poly(coastal_swn_w_poly): + # level 1 gets back the same network, but with 'traced_segnums' + # and 'catchment_segnums' + nc1 = coastal_swn_w_poly.coarsen(1) + assert len(nc1) == 304 + assert nc1.catchments is not None + assert nc1.catchments.area.sum() == pytest.approx(165924652.6749345) + pd.testing.assert_frame_equal( + coastal_swn_w_poly.segments, + nc1.segments.drop(columns=["traced_segnums", "catchment_segnums"]), + ) + pd.testing.assert_series_equal( + nc1.segments["traced_segnums"], + nc1.segments.index.to_series().apply(lambda x: [x]), + check_names=False, + ) + pd.testing.assert_series_equal( + nc1.segments["catchment_segnums"], + nc1.segments.index.to_series().apply(lambda x: [x]), + check_names=False, + ) + + # coarsen to level 2 + nc2 = coastal_swn_w_poly.coarsen(2) + assert len(nc2) == 66 + assert nc2.catchments.area.sum() == pytest.approx(165492135.76667663) + assert set(nc2.segments.stream_order.unique()) == {2, 3, 4, 5} + # validate one item + item = nc2.segments.loc[3046539] + assert item.geometry.length == pytest.approx(5670.908519171191) + assert item.traced_segnums == [3046456, 3046455, 3046539] + assert item.catchment_segnums == [ + 3046456, + 3046604, + 3046605, + 3046455, + 3046409, + 3046539, + 3046542, + ] + + # coarsen to level 3 + nc3 = coastal_swn_w_poly.coarsen(3) + assert len(nc3) == 14 + assert nc3.catchments.area.sum() == pytest.approx(165491123.14988112) + assert set(nc3.segments.stream_order.unique()) == {3, 4, 5} + + # coarsen to level 4 + nc4 = coastal_swn_w_poly.coarsen(4) + assert len(nc4) == 4 + assert nc4.catchments.area.sum() == pytest.approx(165491123.14988112) + assert set(nc4.segments.stream_order.unique()) == {4, 5}