From b63e5281123447e4ce8bc2532cabccad141eb764 Mon Sep 17 00:00:00 2001 From: rakow Date: Thu, 30 May 2024 21:28:55 +0200 Subject: [PATCH] created class to assign reference population, refactored person matcher --- .../prepare/RunOpenBerlinCalibration.java | 3 +- .../choices/AssignReferencePopulation.java | 64 ++++++++ .../prepare/choices/ComputeTripChoices.java | 3 +- .../matsim/prepare/choices/PlanBuilder.java | 56 +++++-- .../prepare/population/PersonMatcher.java | 144 ++++++++++++++++++ .../population/RunActivitySampling.java | 105 ++----------- src/main/python/extract_trips.py | 9 +- 7 files changed, 269 insertions(+), 115 deletions(-) create mode 100644 src/main/java/org/matsim/prepare/choices/AssignReferencePopulation.java create mode 100644 src/main/java/org/matsim/prepare/population/PersonMatcher.java diff --git a/src/main/java/org/matsim/prepare/RunOpenBerlinCalibration.java b/src/main/java/org/matsim/prepare/RunOpenBerlinCalibration.java index 804989cd..ea308718 100644 --- a/src/main/java/org/matsim/prepare/RunOpenBerlinCalibration.java +++ b/src/main/java/org/matsim/prepare/RunOpenBerlinCalibration.java @@ -44,6 +44,7 @@ import org.matsim.core.scoring.SumScoringFunction; import org.matsim.core.scoring.functions.*; import org.matsim.core.utils.geometry.CoordUtils; +import org.matsim.prepare.choices.AssignReferencePopulation; import org.matsim.prepare.choices.ComputePlanChoices; import org.matsim.prepare.choices.ComputeTripChoices; import org.matsim.prepare.counts.CreateCountsFromGeoPortalBerlin; @@ -85,7 +86,7 @@ CreateCountsFromGeoPortalBerlin.class, CreateCountsFromVMZOld.class, CreateCountsFromVMZ.class, ReprojectNetwork.class, RunActivitySampling.class, MergePlans.class, SplitActivityTypesDuration.class, CleanPopulation.class, CleanAttributes.class, GenerateSmallScaleCommercialTrafficDemand.class, CreateDataDistributionOfStructureData.class, - RunCountOptimization.class, SelectPlansFromIndex.class, + RunCountOptimization.class, SelectPlansFromIndex.class, AssignReferencePopulation.class, ExtractRelevantFreightTrips.class, CheckCarAvailability.class, FixSubtourModes.class, ComputeTripChoices.class, ComputePlanChoices.class, ApplyNetworkParams.class, SetCarAvailabilityByAge.class }) diff --git a/src/main/java/org/matsim/prepare/choices/AssignReferencePopulation.java b/src/main/java/org/matsim/prepare/choices/AssignReferencePopulation.java new file mode 100644 index 00000000..714b4779 --- /dev/null +++ b/src/main/java/org/matsim/prepare/choices/AssignReferencePopulation.java @@ -0,0 +1,64 @@ +package org.matsim.prepare.choices; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.matsim.api.core.v01.population.Population; +import org.matsim.application.MATSimAppCommand; +import org.matsim.application.options.ShpOptions; +import org.matsim.core.population.PopulationUtils; +import picocli.CommandLine; + +import java.nio.file.Files; +import java.nio.file.Path; + +@CommandLine.Command( + name = "assign-reference-population", + description = "Assigns persons from reference data to a population." +) +public class AssignReferencePopulation implements MATSimAppCommand { + + private static final Logger log = LogManager.getLogger(AssignReferencePopulation.class); + + + @CommandLine.Option(names = "--population", description = "Input population path.", required = true) + private String populationPath; + + @CommandLine.Option(names = "--persons", description = "Input persons from survey data, in matsim-python-tools format.", required = true) + private Path persons; + + @CommandLine.Option(names = "--trips", description = "Input trips from survey data, in matsim-python-tools format.", required = true) + private Path trips; + + @CommandLine.Option(names = "--facilities", description = "Shp file with facilities", required = true) + private Path facilities; + + @CommandLine.Option(names = "--output", description = "Output population path.", required = true) + private Path output; + + @CommandLine.Mixin + private ShpOptions shp; + + @Override + public Integer call() throws Exception { + + if (!shp.isDefined()) { + log.error("No shapefile defined. Please specify a shapefile for the zones using the --shp option."); + return 2; + } + + if (!Files.exists(trips)) { + log.error("Input trip file does not exist: {}", trips); + return 2; + } + + Population population = PopulationUtils.readPopulation(populationPath); + + PlanBuilder builder = new PlanBuilder(shp, new ShpOptions(facilities, null, null), population.getFactory()); + + builder.mergePlans(population, trips, persons); + + PopulationUtils.writePopulation(population, output.toString()); + + return 0; + } +} diff --git a/src/main/java/org/matsim/prepare/choices/ComputeTripChoices.java b/src/main/java/org/matsim/prepare/choices/ComputeTripChoices.java index b973e6d3..952019ce 100644 --- a/src/main/java/org/matsim/prepare/choices/ComputeTripChoices.java +++ b/src/main/java/org/matsim/prepare/choices/ComputeTripChoices.java @@ -76,11 +76,10 @@ public Integer call() throws Exception { } if (!Files.exists(input)) { - log.error("Input file does not exist: " + input); + log.error("Input file does not exist: {}", input); return 2; } - Config config = this.scenario.getConfig(); config.controller().setOutputDirectory("choice-output"); config.controller().setLastIteration(0); diff --git a/src/main/java/org/matsim/prepare/choices/PlanBuilder.java b/src/main/java/org/matsim/prepare/choices/PlanBuilder.java index a7b18429..420c38f9 100644 --- a/src/main/java/org/matsim/prepare/choices/PlanBuilder.java +++ b/src/main/java/org/matsim/prepare/choices/PlanBuilder.java @@ -6,22 +6,21 @@ import it.unimi.dsi.fastutil.objects.Object2LongMap; import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap; import me.tongfei.progressbar.ProgressBar; +import org.apache.commons.csv.CSVRecord; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.locationtech.jts.geom.Geometry; import org.matsim.api.core.v01.Coord; import org.matsim.api.core.v01.Id; import org.matsim.api.core.v01.Scenario; -import org.matsim.api.core.v01.population.Activity; -import org.matsim.api.core.v01.population.Person; -import org.matsim.api.core.v01.population.Plan; -import org.matsim.api.core.v01.population.PopulationFactory; +import org.matsim.api.core.v01.population.*; import org.matsim.application.options.ShpOptions; import org.matsim.application.prepare.population.SplitActivityTypesDuration; import org.matsim.core.population.PersonUtils; import org.matsim.core.population.PopulationUtils; import org.matsim.core.utils.geometry.CoordUtils; import org.matsim.prepare.population.InitLocationChoice; +import org.matsim.prepare.population.PersonMatcher; import org.matsim.vehicles.Vehicle; import org.matsim.vehicles.VehicleType; import org.matsim.vehicles.VehicleUtils; @@ -115,16 +114,38 @@ public static void addVehiclesToScenario(Scenario scenario) { /** * Create persons with plans from a table. */ - public List createPlans(Path input) { + public List createPlans(Path trips) { + return handleTrips(trips, null, this::createPerson); + } + + /** + * Reads reference trips from input and merges it into existing population. + * @see #createPlans(Path) + */ + public List mergePlans(Population population, Path trips, Path persons) { + + // TODO + + + return null; + } + - Table table = Table.read().csv(input.toFile()); + /** + * Helper function to iterate through trips data and process it. + */ + private List handleTrips(Path trips, Path persons, EntryHandler handler) { + + Table table = Table.read().csv(trips.toFile()); String currentPerson = null; int currentSeq = -1; + PersonMatcher matcher = new PersonMatcher("p_id", persons); + List result = new ArrayList<>(); - List trips = new ArrayList<>(); + List tripRows = new ArrayList<>(); try (ProgressBar pb = new ProgressBar("Reading trips", table.rowCount())) { @@ -136,23 +157,24 @@ public List createPlans(Path input) { int seq = row.getInt("seq"); if (!pId.equals(currentPerson) || seq != currentSeq) { - if (!trips.isEmpty()) { + if (!tripRows.isEmpty()) { // Filter person with too many trips - if (maxTripNumber <= 0 || trips.size() <= maxTripNumber) { - Person person = createPerson(pId, seq, trips); + if (maxTripNumber <= 0 || tripRows.size() <= maxTripNumber) { + Person person = handler.process(currentPerson, currentSeq, + matcher.getPerson(pId), tripRows); if (person != null) result.add(person); } - trips.clear(); + tripRows.clear(); } currentPerson = pId; currentSeq = seq; } - trips.add(row); + tripRows.add(row); pb.step(); } } @@ -163,7 +185,7 @@ public List createPlans(Path input) { /** * Create person from row data. */ - private Person createPerson(String id, int seq, List trips) { + private Person createPerson(String id, int seq, CSVRecord p, List trips) { Person person = f.createPerson(Id.createPersonId(id + "_" + seq)); @@ -322,4 +344,12 @@ private Set matchLocation(String location, String zone) { private record Location(String name, String zone) { } + + @FunctionalInterface + private interface EntryHandler { + + Person process(String pId, int seq, CSVRecord person, List trips); + + } + } diff --git a/src/main/java/org/matsim/prepare/population/PersonMatcher.java b/src/main/java/org/matsim/prepare/population/PersonMatcher.java new file mode 100644 index 00000000..a14355cf --- /dev/null +++ b/src/main/java/org/matsim/prepare/population/PersonMatcher.java @@ -0,0 +1,144 @@ +package org.matsim.prepare.population; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.matsim.api.core.v01.population.Person; +import org.matsim.application.options.CsvOptions; +import org.matsim.core.population.PersonUtils; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Path; +import java.util.*; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +/** + * This class is used to read and match persons from the reference data in csv format. + */ +public class PersonMatcher { + + private static final Logger log = LogManager.getLogger(PersonMatcher.class); + + private final String idxColumn; + + private final CsvOptions csv = new CsvOptions(CSVFormat.Predefined.Default); + private final Map> groups = new HashMap<>(); + private final Map persons = new HashMap<>(); + + public PersonMatcher(String idxColumn, Path personsPath) { + this.idxColumn = idxColumn; + + try (CSVParser parser = csv.createParser(personsPath)) { + buildSubgroups(parser); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Match reference person to a person in the population. + * @return person id + */ + public String matchPerson(Person person, SplittableRandom rnd) { + + Key key = createKey(person); + + List subgroup = groups.get(key); + if (subgroup == null) { + log.error("No subgroup found for key {}", key); + throw new IllegalStateException("Invalid entry"); + } + + if (subgroup.size() < 30) { + log.warn("Group {} has low sample size: {}", key, subgroup.size()); + } + + return subgroup.get(rnd.nextInt(subgroup.size())); + } + + /** + * Return reference person with given index. + */ + public CSVRecord getPerson(String personId) { + return persons.get(personId); + } + + /** + * Create subpopulations for sampling. + */ + private void buildSubgroups(CSVParser csv) { + + int i = 0; + + for (CSVRecord r : csv) { + + String idx = r.get(idxColumn); + int regionType = Integer.parseInt(r.get("region_type")); + String gender = r.get("gender"); + String employment = r.get("employment"); + int age = Integer.parseInt(r.get("age")); + + Stream keys = createKey(gender, age, regionType, employment); + keys.forEach(key -> groups.computeIfAbsent(key, (k) -> new ArrayList<>()).add(idx)); + persons.put(idx, r); + i++; + } + + log.info("Read {} persons from csv.", i); + } + + private Stream createKey(String gender, int age, int regionType, String employment) { + if (age < 6) { + return IntStream.rangeClosed(0, 5).mapToObj(i -> new Key(null, i, regionType, null)); + } + if (age <= 10) { + return IntStream.rangeClosed(6, 10).mapToObj(i -> new Key(null, i, regionType, null)); + } + if (age < 18) { + return IntStream.rangeClosed(11, 18).mapToObj(i -> new Key(gender, i, regionType, null)); + } + + Boolean isEmployed = age > 65 ? null : !employment.equals("unemployed"); + int min = Math.max(18, age - 6); + int max = Math.min(65, age + 6); + + // larger groups for older people + if (age > 65) { + min = Math.max(66, age - 10); + max = Math.min(99, age + 10); + } + + return IntStream.rangeClosed(min, max).mapToObj(i -> new Key(gender, i, regionType, isEmployed)); + } + + private Key createKey(Person person) { + + Integer age = PersonUtils.getAge(person); + String gender = PersonUtils.getSex(person); + if (age <= 10) + gender = null; + + Boolean employed = PersonUtils.isEmployed(person); + if (age < 18 || age > 65) + employed = null; + + int regionType = (int) person.getAttributes().getAttribute(Attributes.RegioStaR7); + + // Region types have been reduced to 1 and 3 + if (regionType != 1) + regionType = 3; + + return new Key(gender, age, regionType, employed); + } + + /** + * Key used to match persons. + */ + public record Key(String gender, int age, int regionType, Boolean employed) { + } + +} diff --git a/src/main/java/org/matsim/prepare/population/RunActivitySampling.java b/src/main/java/org/matsim/prepare/population/RunActivitySampling.java index 245e17dd..682063dd 100644 --- a/src/main/java/org/matsim/prepare/population/RunActivitySampling.java +++ b/src/main/java/org/matsim/prepare/population/RunActivitySampling.java @@ -35,12 +35,12 @@ public class RunActivitySampling implements MATSimAppCommand, PersonAlgorithm { private static final Logger log = LogManager.getLogger(RunActivitySampling.class); private final CsvOptions csv = new CsvOptions(CSVFormat.Predefined.Default); - private final Map groups = new HashMap<>(); - private final Int2ObjectMap persons = new Int2ObjectOpenHashMap<>(); + /** * Maps person index to list of activities. */ - private final Int2ObjectMap> activities = new Int2ObjectOpenHashMap<>(); + private final Map> activities = new HashMap<>(); + @CommandLine.Option(names = "--input", description = "Path to input population", required = true) private Path input; @CommandLine.Option(names = "--output", description = "Output path for population", required = true) @@ -54,6 +54,7 @@ public class RunActivitySampling implements MATSimAppCommand, PersonAlgorithm { private ThreadLocal ctxs; private PopulationFactory factory; + private PersonMatcher matcher; public static void main(String[] args) { new RunActivitySampling().execute(args); @@ -64,9 +65,7 @@ public Integer call() throws Exception { Population population = PopulationUtils.readPopulation(input.toString()); - try (CSVParser parser = csv.createParser(personsPath)) { - buildSubgroups(parser); - } + matcher = new PersonMatcher("idx", personsPath); try (CSVParser parser = csv.createParser(activityPath)) { readActivities(parser); @@ -94,41 +93,17 @@ public Integer call() throws Exception { return 0; } - /** - * Create subpopulations for sampling. - */ - private void buildSubgroups(CSVParser csv) { - - int i = 0; - - for (CSVRecord r : csv) { - - int idx = Integer.parseInt(r.get("idx")); - int regionType = Integer.parseInt(r.get("region_type")); - String gender = r.get("gender"); - String employment = r.get("employment"); - int age = Integer.parseInt(r.get("age")); - - Stream keys = createKey(gender, age, regionType, employment); - keys.forEach(key -> groups.computeIfAbsent(key, (k) -> new IntArrayList()).add(idx)); - persons.put(idx, r); - i++; - } - - log.info("Read {} persons from csv.", i); - } - private void readActivities(CSVParser csv) { - int currentId = -1; + String currentId = null; List current = null; int i = 0; for (CSVRecord r : csv) { - int pId = Integer.parseInt(r.get("p_id")); + String pId = r.get("p_id"); - if (pId != currentId) { + if (!Objects.equals(pId, currentId)) { if (current != null) activities.put(currentId, current); @@ -147,69 +122,15 @@ private void readActivities(CSVParser csv) { log.info("Read {} activities for {} persons", i, activities.size()); } - private Stream createKey(String gender, int age, int regionType, String employment) { - if (age < 6) { - return IntStream.rangeClosed(0, 5).mapToObj(i -> new Key(null, i, regionType, null)); - } - if (age <= 10) { - return IntStream.rangeClosed(6, 10).mapToObj(i -> new Key(null, i, regionType, null)); - } - if (age < 18) { - return IntStream.rangeClosed(11, 18).mapToObj(i -> new Key(gender, i, regionType, null)); - } - Boolean isEmployed = age > 65 ? null : !employment.equals("unemployed"); - int min = Math.max(18, age - 6); - int max = Math.min(65, age + 6); - - // larger groups for older people - if (age > 65) { - min = Math.max(66, age - 10); - max = Math.min(99, age + 10); - } - - return IntStream.rangeClosed(min, max).mapToObj(i -> new Key(gender, i, regionType, isEmployed)); - } - - private Key createKey(Person person) { - - Integer age = PersonUtils.getAge(person); - String gender = PersonUtils.getSex(person); - if (age <= 10) - gender = null; - - Boolean employed = PersonUtils.isEmployed(person); - if (age < 18 || age > 65) - employed = null; - - int regionType = (int) person.getAttributes().getAttribute(Attributes.RegioStaR7); - - // Region types have been reduced to 1 and 3 - if (regionType != 1) - regionType = 3; - - return new Key(gender, age, regionType, employed); - } @Override public void run(Person person) { SplittableRandom rnd = ctxs.get().rnd; - Key key = createKey(person); - - IntList subgroup = groups.get(key); - if (subgroup == null) { - log.error("No subgroup found for key {}", key); - throw new IllegalStateException("Invalid entry"); - } - - if (subgroup.size() < 30) { - log.warn("Group {} has low sample size: {}", key, subgroup.size()); - } - - int idx = subgroup.getInt(rnd.nextInt(subgroup.size())); - CSVRecord row = persons.get(idx); + String idx = matcher.matchPerson(person, rnd); + CSVRecord row = matcher.getPerson(idx); PersonUtils.setCarAvail(person, row.get("car_avail").equals("True") ? "always" : "never"); PersonUtils.setLicence(person, row.get("driving_license").toLowerCase()); @@ -377,12 +298,6 @@ private Plan createPlan(Coord homeCoord, List activities, SplittableR return plan; } - /** - * Key used for sampling activities. - */ - private record Key(String gender, int age, int regionType, Boolean employed) { - } - private record Context(SplittableRandom rnd) { } diff --git a/src/main/python/extract_trips.py b/src/main/python/extract_trips.py index 6eefeb34..5ec0d123 100644 --- a/src/main/python/extract_trips.py +++ b/src/main/python/extract_trips.py @@ -36,13 +36,14 @@ df["seq"] = seq - df = df.drop(columns=["t_weight", "valid"]) - - df_p = df.merge(persons, left_on="p_id", right_index=True) - df["p_age"] = df_p["age"] + df = df.drop(columns=["valid"]) + # Norm weight to 3 as well + df.t_weight = df.t_weight / 3 df_hh = df.merge(hh, left_on="hh_id", right_index=True) df["hh_cars"] = df_hh["n_cars"] df = df.sort_values(["p_id", "seq", "n"]) df.to_csv("trips-scaled.csv", index=False) + + persons.to_csv("persons-unscaled.csv", index=False)