Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Index hydro units by their location #1346

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ This part of documentation collects descriptive release notes to capture the mai

* Update workflow to geopandas >= 1.0 `PR #1276 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/1276>`__

* Index hydro units by their location and drop use of alternative clustering option for hydro `PR #1331 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/1331>`__

**Minor Changes and bug-fixing**

* Align structure of the components with consistency checks in the updated PyPSA version `PR #1315 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/1315>`__
Expand Down
50 changes: 11 additions & 39 deletions scripts/add_electricity.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def load_powerplants(ppl_fn):
null_ppls = ppl[ppl.p_nom <= 0]
if not null_ppls.empty:
logger.warning(f"Drop powerplants with null capacity: {list(null_ppls.name)}.")
ppl = ppl.drop(null_ppls.index).reset_index(drop=True)
ppl = ppl.drop(null_ppls.index)
return ppl


Expand Down Expand Up @@ -472,6 +472,7 @@ def attach_hydro(n, costs, ppl):

ppl = (
ppl.query('carrier == "hydro"')
.assign(ppl_id=lambda df: df.index)
.reset_index(drop=True)
.rename(index=lambda s: str(s) + " hydro")
)
Expand All @@ -488,65 +489,36 @@ def attach_hydro(n, costs, ppl):
ror = ppl.query('technology == "Run-Of-River"')
phs = ppl.query('technology == "Pumped Storage"')
hydro = ppl.query('technology == "Reservoir"')
if snakemake.params.alternative_clustering:
bus_id = ppl["region_id"]
else:
bus_id = ppl["bus"]

inflow_idx = ror.index.union(hydro.index)
if not inflow_idx.empty:
with xr.open_dataarray(snakemake.input.profile_hydro) as inflow:
inflow_buses = bus_id[inflow_idx]
missing_plants = pd.Index(inflow_buses.unique()).difference(
inflow.indexes["plant"]
)
# map power plants index (regions_onshore) into power plants ids (powerplants.csv)
# plants_to_keep correspond to "plant" index of regions_onshore
# plants_to_keep.index corresponds to bus_id of PyPSA network
plants_with_data = inflow_buses[inflow_buses.isin(inflow.indexes["plant"])]
plants_to_keep = plants_with_data.to_numpy()
found_plants = ppl.ppl_id[ppl.ppl_id.isin(inflow.indexes["plant"])]
missing_plants_idxs = ppl.index.difference(found_plants.index)

# if missing time series are found, notify the user and exclude missing hydro plants
if not missing_plants.empty:
if not missing_plants_idxs.empty:
# original total p_nom
total_p_nom = ror.p_nom.sum() + hydro.p_nom.sum()
# update plants_with_data to ensure proper match between "plant" index and bus_id
plants_with_data = inflow_buses[inflow_buses.isin(plants_to_keep)]
network_buses_to_keep = plants_with_data.index
plants_to_keep = plants_with_data.to_numpy()

ror = ror.loc[ror.index.intersection(network_buses_to_keep)]
hydro = hydro.loc[hydro.index.intersection(network_buses_to_keep)]
ror = ror.loc[ror.index.intersection(found_plants.index)]
hydro = hydro.loc[hydro.index.intersection(found_plants.index)]
# loss of p_nom
loss_p_nom = ror.p_nom.sum() + hydro.p_nom.sum() - total_p_nom

logger.warning(
f"'{snakemake.input.profile_hydro}' is missing inflow time-series for at least one bus: {', '.join(missing_plants)}."
f"'{snakemake.input.profile_hydro}' is missing inflow time-series for at least one bus: {', '.join(missing_plants_idxs)}."
f"Corresponding hydro plants are dropped, corresponding to a total loss of {loss_p_nom:.2f}MW out of {total_p_nom:.2f}MW."
)

# if there are any plants for which runoff data are available
if not plants_with_data.empty:
network_buses_to_keep = plants_with_data.index
plants_to_keep = plants_with_data.to_numpy()

# hydro_inflow_factor is used to divide the inflow between the various units of each power plant
if not snakemake.params.alternative_clustering:
hydro_inflow_factor = hydro["p_nom"] / hydro.groupby("bus")[
"p_nom"
].transform("sum")
else:
hydro_inflow_factor = hydro["p_nom"] / hydro.groupby("region_id")[
"p_nom"
].transform("sum")

if not found_plants.empty:
inflow_t = (
inflow.sel(plant=plants_to_keep)
inflow.sel(plant=found_plants.values)
.assign_coords(plant=found_plants.index)
.rename({"plant": "name"})
.assign_coords(name=network_buses_to_keep)
.transpose("time", "name")
.to_pandas()
* hydro_inflow_factor
)

if "ror" in carriers and not ror.empty:
Expand Down
106 changes: 27 additions & 79 deletions scripts/build_renewable_profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@

COPERNICUS_CRS = "EPSG:4326"
GEBCO_CRS = "EPSG:4326"
PPL_CRS = "EPSG:4326"


def check_cutout_match(cutout, geodf):
Expand Down Expand Up @@ -356,9 +357,6 @@ def rescale_hydro(plants, runoff, normalize_using_yearly, normalization_year):
logger.info("No bus has installed hydro plants, ignoring normalization.")
return runoff

if snakemake.params.alternative_clustering:
plants = plants.set_index("shape_id")

years_statistics = normalize_using_yearly.index
if isinstance(years_statistics, pd.DatetimeIndex):
years_statistics = years_statistics.year
Expand Down Expand Up @@ -488,7 +486,7 @@ def create_scaling_factor(
if "snakemake" not in globals():
from _helpers import mock_snakemake

snakemake = mock_snakemake("build_renewable_profiles", technology="solar")
snakemake = mock_snakemake("build_renewable_profiles", technology="hydro")
configure_logging(snakemake)

pgb.streams.wrap_stderr()
Expand Down Expand Up @@ -533,24 +531,6 @@ def create_scaling_factor(
# the region should be restricted for non-hydro technologies, as the hydro potential is calculated across hydrobasins which may span beyond the region of the country
cutout = filter_cutout_region(cutout, regions)

if snakemake.params.alternative_clustering:
regions = gpd.GeoDataFrame(
regions.reset_index()
.groupby("shape_id")
.agg(
{
"x": "mean",
"y": "mean",
"country": "first",
"geometry": "first",
"bus": "first",
}
)
.reset_index()
.set_index("bus"),
crs=regions.crs,
)

buses = regions.index

func = getattr(cutout, resource.pop("method"))
Expand All @@ -564,58 +544,33 @@ def create_scaling_factor(
hydrobasins = gpd.read_file(hydrobasins_path)
ppls = load_powerplants(snakemake.input.powerplants)

hydro_ppls = ppls[ppls.carrier == "hydro"]

# commented out as rivers may span across multiple countries
# hydrobasins = hydrobasins[
# [
# any(country_shapes.geometry.intersects(geom))
# for geom in hydrobasins.geometry
# ]
# ] # exclude hydrobasins shapes that do not intersect the countries of interest

# select busbar whose location (p) belongs to at least one hydrobasin geometry
# if extendable option is true, all buses are included
# otherwise only where hydro powerplants are available are considered
if snakemake.params.alternative_clustering:
filter_bus_to_consider = regions.index.map(
lambda bus_id: config.get("extendable", False)
| (bus_id in hydro_ppls.region_id.values)
)
### TODO: quickfix. above case and the below case should by unified
if snakemake.params.alternative_clustering == False:
filter_bus_to_consider = regions.index.map(
lambda bus_id: config.get("extendable", False)
| (bus_id in hydro_ppls.bus.values)
)
bus_to_consider = regions.index[filter_bus_to_consider]
all_hydro_ppls = ppls[ppls.carrier == "hydro"]

# select hydro units within hydrobasins
hgdf = gpd.GeoDataFrame(
all_hydro_ppls,
index=all_hydro_ppls.index,
geometry=gpd.points_from_xy(all_hydro_ppls.lon, all_hydro_ppls.lat),
crs=PPL_CRS,
).to_crs(hydrobasins.crs)
temp_gdf = gpd.sjoin(hgdf, hydrobasins, predicate="within", how="left")

# identify subset of buses within the hydrobasins
filter_bus_in_hydrobasins = regions[filter_bus_to_consider].apply(
lambda row: any(hydrobasins.geometry.contains(Point(row["x"], row["y"]))),
axis=1,
hydro_ppls = pd.DataFrame(
hgdf.loc[temp_gdf.index_right.dropna().index].drop(columns="geometry")
)
bus_in_hydrobasins = filter_bus_in_hydrobasins[filter_bus_in_hydrobasins].index

bus_notin_hydrobasins = list(
set(bus_to_consider).difference(set(bus_in_hydrobasins))
set(all_hydro_ppls.index).difference(set(hydro_ppls.index))
)

resource["plants"] = regions.rename(
columns={"x": "lon", "y": "lat", "country": "countries"}
).loc[bus_in_hydrobasins, ["lon", "lat", "countries", "shape_id"]]
resource["plants"] = hydro_ppls.rename(columns={"country": "countries"})[
["lon", "lat", "countries"]
]

# TODO: these cases shall be fixed by restructuring the alternative clustering procedure
if snakemake.params.alternative_clustering == False:
resource["plants"]["installed_hydro"] = [
True if (bus_id in hydro_ppls.bus.values) else False
for bus_id in resource["plants"].index
]
else:
resource["plants"]["installed_hydro"] = [
True if (bus_id in hydro_ppls.region_id.values) else False
for bus_id in resource["plants"].shape_id.values
]
# TODO: possibly revise to account for non-existent hydro powerplants
resource["plants"]["installed_hydro"] = [
True for bus_id in resource["plants"].index
]

# get normalization before executing runoff
normalization = None
Expand All @@ -631,8 +586,6 @@ def create_scaling_factor(
else:
# otherwise perform the calculations
inflow = correction_factor * func(capacity_factor=True, **resource)
if snakemake.params.alternative_clustering:
inflow["plant"] = regions.shape_id.loc[inflow["plant"]].values

if "clip_min_inflow" in config:
inflow = inflow.where(inflow >= config["clip_min_inflow"], 0)
Expand All @@ -647,14 +600,13 @@ def create_scaling_factor(
get_hydro_capacities_annual_hydro_generation(
path_hydro_capacities, countries, norm_year
)
* config.get("multiplier", 1.0)
)

elif method == "eia":
path_eia_stats = snakemake.input.eia_hydro_generation
normalize_using_yearly = get_eia_annual_hydro_generation(
path_eia_stats, countries
) * config.get("multiplier", 1.0)
)

inflow = rescale_hydro(
resource["plants"], inflow, normalize_using_yearly, norm_year
Expand All @@ -669,8 +621,8 @@ def create_scaling_factor(

# add zero values for out of hydrobasins elements
if len(bus_notin_hydrobasins) > 0:
regions_notin = regions.loc[
bus_notin_hydrobasins, ["x", "y", "country", "shape_id"]
regions_notin = all_hydro_ppls.loc[
bus_notin_hydrobasins, ["lon", "lat", "country"]
]
logger.warning(
f"Buses {bus_notin_hydrobasins} are not contained into hydrobasins."
Expand All @@ -684,11 +636,7 @@ def create_scaling_factor(
),
dims=("plant", "time"),
coords=dict(
plant=(
bus_notin_hydrobasins
if not snakemake.params.alternative_clustering
else regions_notin["shape_id"].values
),
plant=bus_notin_hydrobasins,
time=inflow.coords["time"],
),
)
Expand Down Expand Up @@ -816,7 +764,7 @@ def create_scaling_factor(

if snakemake.wildcards.technology.startswith("offwind"):
logger.info("Calculate underwater fraction of connections.")
offshore_shape = gpd.read_file(paths["offshore_shapes"]).unary_union
offshore_shape = gpd.read_file(paths["offshore_shapes"]).union_all()
underwater_fraction = []
for bus in buses:
p = centre_of_mass.sel(bus=bus).data
Expand Down