From e3026f6f46e1442a98225641bf32323136e14aaa Mon Sep 17 00:00:00 2001 From: rakow Date: Thu, 5 Sep 2024 11:53:48 +0200 Subject: [PATCH] integrate survey data for workplace assignment --- Makefile | 4 +- .../AttributedActivityFacility.java | 5 +- .../facilities/CreateMATSimFacilities.java | 65 +++++++++++------- .../matsim/prepare/population/Attributes.java | 11 +-- .../population/CommuterAssignment.java | 47 +++++++++++-- .../population/CreateBerlinPopulation.java | 2 +- .../prepare/population/FacilityIndex.java | 67 ++++++++++++++++++- .../population/InitLocationChoice.java | 42 +++++++++--- src/main/python/extract_workplaces.py | 30 +++++++++ 9 files changed, 223 insertions(+), 50 deletions(-) create mode 100644 src/main/python/extract_workplaces.py diff --git a/Makefile b/Makefile index e98efdf5..f3b17649 100644 --- a/Makefile +++ b/Makefile @@ -134,9 +134,10 @@ $p/berlin-$V-counts-vmz.xml.gz: $p/berlin-$V-network.xml.gz --target-crs $(CRS)\ --counts-mapping input/counts_mapping.csv -$p/berlin-$V-facilities.xml.gz: $p/berlin-$V-network.xml.gz input/facilities.gpkg +$p/berlin-$V-facilities.xml.gz: $p/berlin-$V-network.xml.gz input/facilities.gpkg $(berlin)/input/shp/Planungsraum_EPSG_25833.shp $(sc) prepare facilities --network $< --shp $(word 2,$^)\ --facility-mapping input/facility_mapping.json\ + --zones-shp $(word 3,$^)\ --output $@ $p/berlin-only-$V-100pct.plans.xml.gz: input/PLR_2013_2020.csv $(berlin)/input/shp/Planungsraum_EPSG_25833.shp input/facilities.gpkg @@ -189,6 +190,7 @@ $p/berlin-initial-$V-25pct.plans.xml.gz: $p/berlin-activities-$V-25pct.plans.xml --network $(word 3,$^)\ --shp $(germany)/vg5000/vg5000_ebenen_0101/VG5000_GEM.shp\ --commuter $(germany)/regionalstatistik/commuter.csv\ + --berlin-commuter src/main/python/berlin-work-commuter.csv # For debugging and visualization $(sc) prepare downsample-population $@\ diff --git a/src/main/java/org/matsim/prepare/facilities/AttributedActivityFacility.java b/src/main/java/org/matsim/prepare/facilities/AttributedActivityFacility.java index 238dfa69..a51dad50 100644 --- a/src/main/java/org/matsim/prepare/facilities/AttributedActivityFacility.java +++ b/src/main/java/org/matsim/prepare/facilities/AttributedActivityFacility.java @@ -9,8 +9,7 @@ import java.util.Map; -import static org.matsim.prepare.population.Attributes.ATTRACTION_OTHER; -import static org.matsim.prepare.population.Attributes.ATTRACTION_WORK; +import static org.matsim.prepare.population.Attributes.*; /** * Wraps any {@link ActivityFacility} and adds cached attributes because the normal attributes are terrible slow. @@ -31,7 +30,7 @@ public AttributedActivityFacility(ActivityFacility facility) { this.workAttraction = (double) facility.getAttributes().getAttribute(ATTRACTION_WORK); this.otherAttraction = (double) facility.getAttributes().getAttribute(ATTRACTION_OTHER); this.location = (String) facility.getAttributes().getAttribute("location"); - this.zone = (String) facility.getAttributes().getAttribute("zone"); + this.zone = (String) facility.getAttributes().getAttribute(ZONE); } public double getWorkAttraction() { diff --git a/src/main/java/org/matsim/prepare/facilities/CreateMATSimFacilities.java b/src/main/java/org/matsim/prepare/facilities/CreateMATSimFacilities.java index 7d2ce10c..208a7c46 100644 --- a/src/main/java/org/matsim/prepare/facilities/CreateMATSimFacilities.java +++ b/src/main/java/org/matsim/prepare/facilities/CreateMATSimFacilities.java @@ -25,6 +25,7 @@ import org.matsim.core.utils.geometry.geotools.MGC; import org.matsim.facilities.*; import org.matsim.prepare.population.Attributes; +import org.matsim.run.OpenBerlinScenario; import picocli.CommandLine; import java.math.BigDecimal; @@ -35,19 +36,17 @@ import java.util.stream.Collectors; @CommandLine.Command( - name = "facilities", - description = "Creates MATSim facilities from shape-file and network" + name = "facilities", + description = "Creates MATSim facilities from shape-file and network" ) public class CreateMATSimFacilities implements MATSimAppCommand { - private static final Logger log = LogManager.getLogger(CreateMATSimFacilities.class); - /** * Filter link types that don't have a facility associated. */ public static final Set IGNORED_LINK_TYPES = Set.of("motorway", "trunk", - "motorway_link", "trunk_link", "secondary_link", "primary_link"); - + "motorway_link", "trunk_link", "secondary_link", "primary_link"); + private static final Logger log = LogManager.getLogger(CreateMATSimFacilities.class); @CommandLine.Option(names = "--network", required = true, description = "Path to car network") private Path network; @@ -57,6 +56,9 @@ public class CreateMATSimFacilities implements MATSimAppCommand { @CommandLine.Option(names = "--facility-mapping", description = "Path to facility napping json", required = true) private Path mappingPath; + @CommandLine.Option(names = "--zones-shp", description = "Path to shp file with zonal system", required = false) + private Path zonesPath; + @CommandLine.Mixin private ShpOptions shp; @@ -75,13 +77,24 @@ public static Id generateId(ActivityFacilities facilities, Spl byte[] bytes = new byte[3]; do { rnd.nextBytes(bytes); - id = Id.create( "f" + HexFormat.of().formatHex(bytes), ActivityFacility.class); + id = Id.create("f" + HexFormat.of().formatHex(bytes), ActivityFacility.class); } while (facilities.getFacilities().containsKey(id)); return id; } + private static double round(double f) { + return BigDecimal.valueOf(f).setScale(4, RoundingMode.HALF_UP).doubleValue(); + } + + private static boolean hasAttribute(SimpleFeature ft, String name) { + return ft.getAttribute(name) != null && + (Boolean.TRUE.equals(ft.getAttribute(name)) || "1".equals(ft.getAttribute(name)) || + (ft.getAttribute(name) instanceof Number number && number.intValue() > 0) + ); + } + @Override public Integer call() throws Exception { @@ -92,6 +105,9 @@ public Integer call() throws Exception { config = new ObjectMapper().readerFor(MappingConfig.class).readValue(mappingPath.toFile()); + ShpOptions zoneShp = zonesPath != null ? new ShpOptions(zonesPath, "EPSG:25833", null) : null; + ShpOptions.Index zones = zoneShp != null ? zoneShp.createIndex(OpenBerlinScenario.CRS, "SCHLUESSEL") : null; + Network completeNetwork = NetworkUtils.readNetwork(this.network.toString()); TransportModeNetworkFilter filter = new TransportModeNetworkFilter(completeNetwork); Network carOnlyNetwork = NetworkUtils.createNetwork(); @@ -120,8 +136,8 @@ public Integer call() throws Exception { double workUpper = work.getPercentile(75) + 1.5 * workIQR; double otherUpper = other.getPercentile(75) + 1.5 * otherIQR; - double workLower = Math.max(work.getPercentile(1), work.getPercentile(25) - 1.5 * workIQR); - double otherLower = Math.max(other.getPercentile(1), other.getPercentile(25) - 1.5 * otherIQR); + double workLower = Math.max(Math.max(work.getPercentile(1), 0.1), work.getPercentile(25) - 1.5 * workIQR); + double otherLower = Math.max(Math.max(other.getPercentile(1), 0.1), other.getPercentile(25) - 1.5 * otherIQR); log.info("Work IQR: {} Upper: {} Lower: {}", workIQR, workUpper, workLower); log.info("Other IQR: {} Upper: {} Lower: {}", otherIQR, otherUpper, otherLower); @@ -151,12 +167,20 @@ public Integer call() throws Exception { // Filter outliers from the attraction facility.getAttributes().putAttribute(Attributes.ATTRACTION_WORK, - round(Math.min(Math.max(h.attractionWork, workLower), workUpper)) + round(Math.min(Math.max(h.attractionWork, workLower), workUpper)) ); facility.getAttributes().putAttribute(Attributes.ATTRACTION_OTHER, round(Math.min(Math.max(h.attractionOther, otherLower), otherUpper)) ); + if (zones != null) { + String zone = zones.query(facility.getCoord()); + if (zone != null) { + facility.getAttributes().putAttribute(Attributes.LOR, Integer.parseInt(zone)); + facility.getAttributes().putAttribute(Attributes.ZONE, zone.substring(0, 2)); + } + } + facilities.addActivityFacility(facility); } @@ -168,10 +192,6 @@ public Integer call() throws Exception { return 0; } - private static double round(double f) { - return BigDecimal.valueOf(f).setScale(4, RoundingMode.HALF_UP).doubleValue(); - } - /** * Sample points and choose link with the nearest points. */ @@ -186,8 +206,8 @@ private Holder processFeature(SimpleFeature ft, Network network) { List> links = coords.stream().map(coord -> NetworkUtils.getNearestLinkExactly(network, coord).getId()).toList(); Map, Long> map = links.stream() - .filter(l -> !IGNORED_LINK_TYPES.contains(NetworkUtils.getType(network.getLinks().get(l)))) - .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + .filter(l -> !IGNORED_LINK_TYPES.contains(NetworkUtils.getType(network.getLinks().get(l)))) + .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); // Everything could be filtered and map empty if (map.isEmpty()) @@ -203,7 +223,7 @@ private Holder processFeature(SimpleFeature ft, Network network) { double area = (double) ft.getAttribute("area"); List, Long>> counts = map.entrySet().stream().sorted(Map.Entry.comparingByValue()) - .toList(); + .toList(); // The "main" link of the facility Id link = counts.get(counts.size() - 1).getKey(); @@ -237,8 +257,8 @@ private List samplePoints(MultiPolygon geometry, int n) { for (int i = 0; i < max && result.size() < n; i++) { Coord coord = CoordUtils.round(new Coord( - bbox.getMinX() + (bbox.getMaxX() - bbox.getMinX()) * rnd.nextDouble(), - bbox.getMinY() + (bbox.getMaxY() - bbox.getMinY()) * rnd.nextDouble() + bbox.getMinX() + (bbox.getMaxX() - bbox.getMinX()) * rnd.nextDouble(), + bbox.getMinY() + (bbox.getMaxY() - bbox.getMinY()) * rnd.nextDouble() )); try { @@ -271,13 +291,6 @@ private Set activities(SimpleFeature ft) { return act; } - private static boolean hasAttribute(SimpleFeature ft, String name) { - return ft.getAttribute(name) != null && - (Boolean.TRUE.equals(ft.getAttribute(name)) || "1".equals(ft.getAttribute(name)) || - (ft.getAttribute(name) instanceof Number number && number.intValue() > 0) - ); - } - /** * Temporary data holder for facilities. */ diff --git a/src/main/java/org/matsim/prepare/population/Attributes.java b/src/main/java/org/matsim/prepare/population/Attributes.java index eaafa20f..43fc37b8 100644 --- a/src/main/java/org/matsim/prepare/population/Attributes.java +++ b/src/main/java/org/matsim/prepare/population/Attributes.java @@ -21,18 +21,21 @@ public final class Attributes { * Gemeinde code. */ public static final String GEM = "gem"; - public static final String ARS = "ars"; + /** + * Amtliche Regionalschlüssel (ARS). + */ + public static final String ARS = "ars"; /** - * LOR for Berlin. + * Lebensweltlich orientierte Räume (LOR) for Berlin. / Zonal system of Berlin. */ public static final String LOR = "lor"; /** - * District of Berlin. + * Zonal code, in Berlin this is the district number. */ - public static final String DISTRICT = "district"; + public static final String ZONE = "zone"; public static final String RegioStaR7 = "RegioStaR7"; diff --git a/src/main/java/org/matsim/prepare/population/CommuterAssignment.java b/src/main/java/org/matsim/prepare/population/CommuterAssignment.java index 7120222b..f7a17189 100644 --- a/src/main/java/org/matsim/prepare/population/CommuterAssignment.java +++ b/src/main/java/org/matsim/prepare/population/CommuterAssignment.java @@ -1,16 +1,20 @@ package org.matsim.prepare.population; +import it.unimi.dsi.fastutil.ints.Int2DoubleMap; +import it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; import it.unimi.dsi.fastutil.longs.*; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.geotools.api.feature.simple.SimpleFeature; import org.locationtech.jts.geom.Geometry; import org.locationtech.jts.geom.Point; import org.matsim.application.options.CsvOptions; import org.matsim.facilities.ActivityFacility; -import org.geotools.api.feature.simple.SimpleFeature; import java.io.IOException; import java.io.UncheckedIOException; @@ -28,14 +32,19 @@ public class CommuterAssignment { private final Map zones; /** - * Outgoing commuter from ars to ars. + * Outgoing commuter from ars to ars. This is german wide with quite large zones. */ private final Long2ObjectMap commuter; + /** + * Maps home district to probabilities of commuting to other districts. + */ + private final Int2ObjectMap berlinCommuter; + private final CsvOptions csv = new CsvOptions(CSVFormat.Predefined.Default); private final double sample; - public CommuterAssignment(Long2ObjectMap zones, Path commuterPath, double sample) { + public CommuterAssignment(Long2ObjectMap zones, Path commuterPath, Path berlinCommuterPath, double sample) { this.sample = sample; // outgoing commuters @@ -62,9 +71,32 @@ public CommuterAssignment(Long2ObjectMap zones, Path commuterPath throw new UncheckedIOException(e); } + this.berlinCommuter = new Int2ObjectOpenHashMap<>(); + + try (CSVParser parser = csv.createParser(berlinCommuterPath)) { + + for (CSVRecord row : parser) { + int home = Integer.parseInt(row.get("home")); + int work = Integer.parseInt(row.get("work")); + + berlinCommuter.computeIfAbsent(home, k -> new Int2DoubleOpenHashMap()) + .put(work, Double.parseDouble(row.get("n"))); + } + + // Normalize probabilities + for (Int2ObjectMap.Entry kv : berlinCommuter.int2ObjectEntrySet()) { + Int2DoubleMap m = kv.getValue(); + double sum = m.values().doubleStream().sum(); + m.replaceAll((k, v) -> v / sum); + } + + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } - /** + /** * Select and return a commute target. * * @param f sampler producing target locations @@ -126,6 +158,13 @@ public ActivityFacility selectTarget(SplittableRandom rnd, long ars, double dist return null; } + /** + * Returns the weight of commuting from homeZone to targetZone. + */ + public double getZoneWeight(String homeZone, String targetZone) { + return berlinCommuter.get(Integer.parseInt(homeZone)).get(Integer.parseInt(targetZone)); + } + /** * Sample locations from specific zone. */ diff --git a/src/main/java/org/matsim/prepare/population/CreateBerlinPopulation.java b/src/main/java/org/matsim/prepare/population/CreateBerlinPopulation.java index f30e3d9a..f52561da 100644 --- a/src/main/java/org/matsim/prepare/population/CreateBerlinPopulation.java +++ b/src/main/java/org/matsim/prepare/population/CreateBerlinPopulation.java @@ -230,7 +230,7 @@ private void processLOR(CSVRecord row) throws ParseException { person.getAttributes().putAttribute(Attributes.GEM, 11000000); person.getAttributes().putAttribute(Attributes.ARS, 110000000000L); person.getAttributes().putAttribute(Attributes.LOR, Integer.parseInt(raumID)); - person.getAttributes().putAttribute(Attributes.DISTRICT, Integer.parseInt(raumID.substring(0, 2))); + person.getAttributes().putAttribute(Attributes.ZONE, raumID.substring(0, 2)); Plan plan = f.createPlan(); plan.addActivity(f.createActivityFromCoord("home", coord)); diff --git a/src/main/java/org/matsim/prepare/population/FacilityIndex.java b/src/main/java/org/matsim/prepare/population/FacilityIndex.java index f603085e..fa84eb17 100644 --- a/src/main/java/org/matsim/prepare/population/FacilityIndex.java +++ b/src/main/java/org/matsim/prepare/population/FacilityIndex.java @@ -135,14 +135,75 @@ public static ActivityFacility sampleByWeightWithRejection(List candidates, + Function classifier, + Function>, Double> groupWeight, + Function facilityWeight, SplittableRandom rnd) { + + if (candidates.isEmpty()) + return null; + + // Entries which produce a null key are discarded + Map> map = candidates.stream() + .filter(af -> classifier.apply(af) != null) + .collect(Collectors.groupingBy(classifier)); + + List>> grouped = map.entrySet().stream().toList(); + + double totalWeight = 0.0; + double[] weights = new double[grouped.size()]; + + for (int i = 0; i < grouped.size(); ++i) { + double w = groupWeight.apply(grouped.get(i)); + totalWeight += w; + weights[i] = totalWeight; + } + + // No weights, sample uniformly + if (totalWeight == 0.0) { + List list = grouped.get(rnd.nextInt(grouped.size())).getValue(); + return list.get(rnd.nextInt(list.size())); + } + + double r = rnd.nextDouble(0.0, totalWeight); + int idx = Arrays.binarySearch(weights, r); + if (idx < 0) { + idx = -idx - 1; + } + + // First get the samples group + List list = grouped.get(idx).getValue(); + + double totalGroupWeight = 0; + double[] groupWeights = new double[list.size()]; + + for (int i = 0; i < list.size(); i++) { + double w = facilityWeight.apply(list.get(i)); + totalGroupWeight += w; + groupWeights[i] = totalGroupWeight; + } + + int idx2 = Arrays.binarySearch(groupWeights, rnd.nextDouble(0, totalGroupWeight)); + if (idx2 < 0) { + idx2 = -idx2 - 1; + } + + // Sample random facility from the zone + return list.get(idx2); + } + /** * Groups candidates first using classifier. Then does a weighted sample on the groups and selects a random facility. * The groups are typical zones which might have certain OD relations. */ public static ActivityFacility sampleWithGrouping(List candidates, - Function classifier, - Function>, Double> groupWeight, - SplittableRandom rnd) { + Function classifier, + Function>, Double> groupWeight, + SplittableRandom rnd) { if (candidates.isEmpty()) return null; diff --git a/src/main/java/org/matsim/prepare/population/InitLocationChoice.java b/src/main/java/org/matsim/prepare/population/InitLocationChoice.java index e703ae0e..c5daac44 100644 --- a/src/main/java/org/matsim/prepare/population/InitLocationChoice.java +++ b/src/main/java/org/matsim/prepare/population/InitLocationChoice.java @@ -71,6 +71,9 @@ public class InitLocationChoice implements MATSimAppCommand, PersonAlgorithm { @CommandLine.Option(names = "--commuter", description = "Path to commuter.csv", required = true) private Path commuterPath; + @CommandLine.Option(names = "--berlin-commuter", description = "Home work commuter within Berlin", required = true) + private Path berlinCommuterPath; + @CommandLine.Option(names = "--facilities", description = "Path to facilities file", required = true) private Path facilityPath; @@ -152,7 +155,7 @@ public Integer call() throws Exception { log.info("Generating plan {} with seed {}", i, seed); - commuter = new CommuterAssignment(zones, commuterPath, sample); + commuter = new CommuterAssignment(zones, commuterPath, berlinCommuterPath, sample); Population population = PopulationUtils.readPopulation(input.toString()); @@ -196,6 +199,7 @@ public Integer call() throws Exception { public void run(Person person) { Coord homeCoord = Attributes.getHomeCoord(person); + long ars = (long) person.getAttributes().getAttribute(Attributes.ARS); // Reference persons are not assigned locations if (person.getAttributes().getAttribute(Attributes.REF_MODES) != null) { @@ -239,7 +243,7 @@ public void run(Person person) { if (location == null && type.equals("work")) { // sample work commute - location = sampleCommute(rnd, dist, lastCoord, (long) person.getAttributes().getAttribute(Attributes.ARS)); + location = sampleCommute(rnd, dist, lastCoord, (String) person.getAttributes().getAttribute(Attributes.ZONE), ars); } if (location == null && facilities.index.containsKey(type)) { @@ -307,7 +311,7 @@ private SplittableRandom initRandomNumberGenerator(Person person, long planNumbe /** * Sample work place by using commute and distance information. */ - private ActivityFacility sampleCommute(SplittableRandom rnd, double dist, Coord refCoord, long ars) { + private ActivityFacility sampleCommute(SplittableRandom rnd, double dist, Coord refCoord, String homeZone, long ars) { STRtree index = facilities.index.get("work"); @@ -318,13 +322,14 @@ private ActivityFacility sampleCommute(SplittableRandom rnd, double dist, Coord workPlace = commuter.selectTarget(rnd, ars, dist, MGC.coord2Point(refCoord), zone -> sampleZone(index, dist, refCoord, zone, rnd)); } + // Within Berlin, separate data for commute is used + if (workPlace == null && ars == 110000000000L && homeZone != null) { + workPlace = sampleBerlinWorkPlace(index, dist, refCoord, homeZone, rnd); + } + if (workPlace == null) { // Try selecting within same zone workPlace = sampleZone(index, dist, refCoord, (Geometry) zones.get(ars).getDefaultGeometry(), rnd); - - // TODO: if the home location and the work is in Berlin, - // we may use the berlin OD file for distribution of work trips within berlin - // TODO need to add zone shp and csv } return workPlace; @@ -347,7 +352,7 @@ private Coord sampleLink(SplittableRandom rnd, double dist, Coord origin) { } /** - * Only samples randomly from the zone, ignoring the distance. + * Samples randomly from the zone. */ private ActivityFacility sampleZone(STRtree index, double dist, Coord refCoord, Geometry zone, SplittableRandom rnd) { @@ -358,6 +363,27 @@ private ActivityFacility sampleZone(STRtree index, double dist, Coord refCoord, return FacilityIndex.sampleByWeightWithRejection(query, f -> zone.contains(MGC.coord2Point(f.getCoord())), AttributedActivityFacility::getWorkAttraction, rnd); } + /** + * Only samples randomly from the zone, ignoring the distance. + */ + private ActivityFacility sampleBerlinWorkPlace(STRtree index, double dist, Coord refCoord, String homeZone, SplittableRandom rnd) { + + List query = index.query(MGC.coord2Point(refCoord).buffer(dist * 1.2).getEnvelopeInternal()); + + query = query.stream() + .filter(f -> f.getZone() != null) + .filter(f -> checkDistanceBound(dist, refCoord, f.getCoord(), 1)) + .collect(Collectors.toList()); + + if (query.isEmpty()) + return null; + + int idx = FacilityIndex.sampleByWeight(query, + f -> f.getWorkAttraction() * commuter.getZoneWeight(homeZone, f.getZone()), rnd); + + return query.get(idx); + } + /** * General logic to filter coordinate within target distance. */ diff --git a/src/main/python/extract_workplaces.py b/src/main/python/extract_workplaces.py new file mode 100644 index 00000000..ed84c011 --- /dev/null +++ b/src/main/python/extract_workplaces.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import pandas as pd +import geopandas as gpd + +if __name__ == "__main__": + + # This script reads the IHK Berlin Gewerbedaten from github, aggregates and normalizes the number of workplaces to zones + + # Note that this data is not actually used in the model + # The IHK data registers company locations, but not the actual work places + # Large companies produce unrealistic high number of workplaces at one location, which is not the case in reality + + df = pd.read_csv("https://github.com/IHKBerlin/IHKBerlin_Gewerbedaten/blob/master/data/IHKBerlin_Gewerbedaten.csv?raw=true") + df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326").to_crs("EPSG:25833") + + shp = gpd.read_file("../../../../public-svn/matsim/scenarios/countries/de/berlin/berlin-v6.4/input/shp/Planungsraum_EPSG_25833.shp").set_crs("EPSG:25833") + + df = gpd.sjoin(df, shp, op="within") + + df["employees_min"] = df.employees_range.str.extract(r"(\d+)").fillna(0).astype(int) + 1 + df["employees_max"] = df.employees_range.str.extract(r"- (\d+)").fillna(0).astype(int) + 1 + + aggr = df.groupby("SCHLUESSEL").agg(min=("employees_min", "sum"), max=("employees_max", "sum")).reset_index().rename(columns={"SCHLUESSEL": "location"}) + + aggr["employees"] = (aggr["min"] + aggr["max"]) / 2 + aggr["zone"] = aggr.location.str.slice(0, 2).astype(int) + + aggr.to_csv("workplaces.csv", columns=["location", "zone", "employees"], index=False)