diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 90eb8fdf6..8b326777a 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -25,6 +25,8 @@ This part of documentation collects descriptive release notes to capture the mai * Update workflow to geopandas >= 1.0 `PR #1276 `__ +* Index hydro units by their location and drop use of alternative clustering option for hydro `PR #1331 `__ + **Minor Changes and bug-fixing** * Align structure of the components with consistency checks in the updated PyPSA version `PR #1315 `__ diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py index 16cf2bc2d..abd427a50 100755 --- a/scripts/add_electricity.py +++ b/scripts/add_electricity.py @@ -243,7 +243,7 @@ def load_powerplants(ppl_fn): null_ppls = ppl[ppl.p_nom <= 0] if not null_ppls.empty: logger.warning(f"Drop powerplants with null capacity: {list(null_ppls.name)}.") - ppl = ppl.drop(null_ppls.index).reset_index(drop=True) + ppl = ppl.drop(null_ppls.index) return ppl @@ -472,6 +472,7 @@ def attach_hydro(n, costs, ppl): ppl = ( ppl.query('carrier == "hydro"') + .assign(ppl_id=lambda df: df.index) .reset_index(drop=True) .rename(index=lambda s: str(s) + " hydro") ) @@ -488,65 +489,36 @@ def attach_hydro(n, costs, ppl): ror = ppl.query('technology == "Run-Of-River"') phs = ppl.query('technology == "Pumped Storage"') hydro = ppl.query('technology == "Reservoir"') - if snakemake.params.alternative_clustering: - bus_id = ppl["region_id"] - else: - bus_id = ppl["bus"] inflow_idx = ror.index.union(hydro.index) if not inflow_idx.empty: with xr.open_dataarray(snakemake.input.profile_hydro) as inflow: - inflow_buses = bus_id[inflow_idx] - missing_plants = pd.Index(inflow_buses.unique()).difference( - inflow.indexes["plant"] - ) - # map power plants index (regions_onshore) into power plants ids (powerplants.csv) - # plants_to_keep correspond to "plant" index of regions_onshore - # plants_to_keep.index corresponds to bus_id of PyPSA network - plants_with_data = inflow_buses[inflow_buses.isin(inflow.indexes["plant"])] - plants_to_keep = plants_with_data.to_numpy() + found_plants = ppl.ppl_id[ppl.ppl_id.isin(inflow.indexes["plant"])] + missing_plants_idxs = ppl.index.difference(found_plants.index) # if missing time series are found, notify the user and exclude missing hydro plants - if not missing_plants.empty: + if not missing_plants_idxs.empty: # original total p_nom total_p_nom = ror.p_nom.sum() + hydro.p_nom.sum() - # update plants_with_data to ensure proper match between "plant" index and bus_id - plants_with_data = inflow_buses[inflow_buses.isin(plants_to_keep)] - network_buses_to_keep = plants_with_data.index - plants_to_keep = plants_with_data.to_numpy() - ror = ror.loc[ror.index.intersection(network_buses_to_keep)] - hydro = hydro.loc[hydro.index.intersection(network_buses_to_keep)] + ror = ror.loc[ror.index.intersection(found_plants.index)] + hydro = hydro.loc[hydro.index.intersection(found_plants.index)] # loss of p_nom loss_p_nom = ror.p_nom.sum() + hydro.p_nom.sum() - total_p_nom logger.warning( - f"'{snakemake.input.profile_hydro}' is missing inflow time-series for at least one bus: {', '.join(missing_plants)}." + f"'{snakemake.input.profile_hydro}' is missing inflow time-series for at least one bus: {', '.join(missing_plants_idxs)}." f"Corresponding hydro plants are dropped, corresponding to a total loss of {loss_p_nom:.2f}MW out of {total_p_nom:.2f}MW." ) # if there are any plants for which runoff data are available - if not plants_with_data.empty: - network_buses_to_keep = plants_with_data.index - plants_to_keep = plants_with_data.to_numpy() - - # hydro_inflow_factor is used to divide the inflow between the various units of each power plant - if not snakemake.params.alternative_clustering: - hydro_inflow_factor = hydro["p_nom"] / hydro.groupby("bus")[ - "p_nom" - ].transform("sum") - else: - hydro_inflow_factor = hydro["p_nom"] / hydro.groupby("region_id")[ - "p_nom" - ].transform("sum") - + if not found_plants.empty: inflow_t = ( - inflow.sel(plant=plants_to_keep) + inflow.sel(plant=found_plants.values) + .assign_coords(plant=found_plants.index) .rename({"plant": "name"}) - .assign_coords(name=network_buses_to_keep) .transpose("time", "name") .to_pandas() - * hydro_inflow_factor ) if "ror" in carriers and not ror.empty: diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py index 49e4f1b5f..690340cd2 100644 --- a/scripts/build_renewable_profiles.py +++ b/scripts/build_renewable_profiles.py @@ -215,6 +215,7 @@ COPERNICUS_CRS = "EPSG:4326" GEBCO_CRS = "EPSG:4326" +PPL_CRS = "EPSG:4326" def check_cutout_match(cutout, geodf): @@ -356,9 +357,6 @@ def rescale_hydro(plants, runoff, normalize_using_yearly, normalization_year): logger.info("No bus has installed hydro plants, ignoring normalization.") return runoff - if snakemake.params.alternative_clustering: - plants = plants.set_index("shape_id") - years_statistics = normalize_using_yearly.index if isinstance(years_statistics, pd.DatetimeIndex): years_statistics = years_statistics.year @@ -488,7 +486,7 @@ def create_scaling_factor( if "snakemake" not in globals(): from _helpers import mock_snakemake - snakemake = mock_snakemake("build_renewable_profiles", technology="solar") + snakemake = mock_snakemake("build_renewable_profiles", technology="hydro") configure_logging(snakemake) pgb.streams.wrap_stderr() @@ -533,24 +531,6 @@ def create_scaling_factor( # the region should be restricted for non-hydro technologies, as the hydro potential is calculated across hydrobasins which may span beyond the region of the country cutout = filter_cutout_region(cutout, regions) - if snakemake.params.alternative_clustering: - regions = gpd.GeoDataFrame( - regions.reset_index() - .groupby("shape_id") - .agg( - { - "x": "mean", - "y": "mean", - "country": "first", - "geometry": "first", - "bus": "first", - } - ) - .reset_index() - .set_index("bus"), - crs=regions.crs, - ) - buses = regions.index func = getattr(cutout, resource.pop("method")) @@ -564,58 +544,33 @@ def create_scaling_factor( hydrobasins = gpd.read_file(hydrobasins_path) ppls = load_powerplants(snakemake.input.powerplants) - hydro_ppls = ppls[ppls.carrier == "hydro"] - - # commented out as rivers may span across multiple countries - # hydrobasins = hydrobasins[ - # [ - # any(country_shapes.geometry.intersects(geom)) - # for geom in hydrobasins.geometry - # ] - # ] # exclude hydrobasins shapes that do not intersect the countries of interest - - # select busbar whose location (p) belongs to at least one hydrobasin geometry - # if extendable option is true, all buses are included - # otherwise only where hydro powerplants are available are considered - if snakemake.params.alternative_clustering: - filter_bus_to_consider = regions.index.map( - lambda bus_id: config.get("extendable", False) - | (bus_id in hydro_ppls.region_id.values) - ) - ### TODO: quickfix. above case and the below case should by unified - if snakemake.params.alternative_clustering == False: - filter_bus_to_consider = regions.index.map( - lambda bus_id: config.get("extendable", False) - | (bus_id in hydro_ppls.bus.values) - ) - bus_to_consider = regions.index[filter_bus_to_consider] + all_hydro_ppls = ppls[ppls.carrier == "hydro"] + + # select hydro units within hydrobasins + hgdf = gpd.GeoDataFrame( + all_hydro_ppls, + index=all_hydro_ppls.index, + geometry=gpd.points_from_xy(all_hydro_ppls.lon, all_hydro_ppls.lat), + crs=PPL_CRS, + ).to_crs(hydrobasins.crs) + temp_gdf = gpd.sjoin(hgdf, hydrobasins, predicate="within", how="left") - # identify subset of buses within the hydrobasins - filter_bus_in_hydrobasins = regions[filter_bus_to_consider].apply( - lambda row: any(hydrobasins.geometry.contains(Point(row["x"], row["y"]))), - axis=1, + hydro_ppls = pd.DataFrame( + hgdf.loc[temp_gdf.index_right.dropna().index].drop(columns="geometry") ) - bus_in_hydrobasins = filter_bus_in_hydrobasins[filter_bus_in_hydrobasins].index bus_notin_hydrobasins = list( - set(bus_to_consider).difference(set(bus_in_hydrobasins)) + set(all_hydro_ppls.index).difference(set(hydro_ppls.index)) ) - resource["plants"] = regions.rename( - columns={"x": "lon", "y": "lat", "country": "countries"} - ).loc[bus_in_hydrobasins, ["lon", "lat", "countries", "shape_id"]] + resource["plants"] = hydro_ppls.rename(columns={"country": "countries"})[ + ["lon", "lat", "countries"] + ] - # TODO: these cases shall be fixed by restructuring the alternative clustering procedure - if snakemake.params.alternative_clustering == False: - resource["plants"]["installed_hydro"] = [ - True if (bus_id in hydro_ppls.bus.values) else False - for bus_id in resource["plants"].index - ] - else: - resource["plants"]["installed_hydro"] = [ - True if (bus_id in hydro_ppls.region_id.values) else False - for bus_id in resource["plants"].shape_id.values - ] + # TODO: possibly revise to account for non-existent hydro powerplants + resource["plants"]["installed_hydro"] = [ + True for bus_id in resource["plants"].index + ] # get normalization before executing runoff normalization = None @@ -631,8 +586,6 @@ def create_scaling_factor( else: # otherwise perform the calculations inflow = correction_factor * func(capacity_factor=True, **resource) - if snakemake.params.alternative_clustering: - inflow["plant"] = regions.shape_id.loc[inflow["plant"]].values if "clip_min_inflow" in config: inflow = inflow.where(inflow >= config["clip_min_inflow"], 0) @@ -647,14 +600,13 @@ def create_scaling_factor( get_hydro_capacities_annual_hydro_generation( path_hydro_capacities, countries, norm_year ) - * config.get("multiplier", 1.0) ) elif method == "eia": path_eia_stats = snakemake.input.eia_hydro_generation normalize_using_yearly = get_eia_annual_hydro_generation( path_eia_stats, countries - ) * config.get("multiplier", 1.0) + ) inflow = rescale_hydro( resource["plants"], inflow, normalize_using_yearly, norm_year @@ -669,8 +621,8 @@ def create_scaling_factor( # add zero values for out of hydrobasins elements if len(bus_notin_hydrobasins) > 0: - regions_notin = regions.loc[ - bus_notin_hydrobasins, ["x", "y", "country", "shape_id"] + regions_notin = all_hydro_ppls.loc[ + bus_notin_hydrobasins, ["lon", "lat", "country"] ] logger.warning( f"Buses {bus_notin_hydrobasins} are not contained into hydrobasins." @@ -684,11 +636,7 @@ def create_scaling_factor( ), dims=("plant", "time"), coords=dict( - plant=( - bus_notin_hydrobasins - if not snakemake.params.alternative_clustering - else regions_notin["shape_id"].values - ), + plant=bus_notin_hydrobasins, time=inflow.coords["time"], ), ) @@ -816,7 +764,7 @@ def create_scaling_factor( if snakemake.wildcards.technology.startswith("offwind"): logger.info("Calculate underwater fraction of connections.") - offshore_shape = gpd.read_file(paths["offshore_shapes"]).unary_union + offshore_shape = gpd.read_file(paths["offshore_shapes"]).union_all() underwater_fraction = [] for bus in buses: p = centre_of_mass.sel(bus=bus).data