Skip to content

Commit

Permalink
Merge branch 'master' into facility-gen
Browse files Browse the repository at this point in the history
  • Loading branch information
rakow authored Jun 15, 2024
2 parents aa02af4 + b431704 commit 7429274
Show file tree
Hide file tree
Showing 8 changed files with 376 additions and 74 deletions.
1 change: 1 addition & 0 deletions matsim/calibration/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def f(trial):

if os.name != 'nt':
cmd = cmd.split(" ")
cmd = [c for c in cmd if c != ""]

p = subprocess.Popen(cmd,
stdout=sys.stdout if debug else subprocess.DEVNULL,
Expand Down
39 changes: 16 additions & 23 deletions matsim/calibration/__main__.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,29 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import optuna
import traceback
from . import run_create_csv
from . import run_simulations

from . import study_as_df
def _add(subparsers, m):
""" Adds module to as subcommand"""
s1 = subparsers.add_parser(m.METADATA[0], help=m.METADATA[1])
m.setup(s1)
s1.set_defaults(func=m.main)

if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(prog="matsim-calibration", description="Calibration CLI")
parser.add_argument('file', nargs=1, type=str, help="Path to input db")
parser.add_argument("--name", type=str, default="calib", help="Calibration name")
parser.add_argument("--output", default=None, help="Output path")
args = parser.parse_args()

study = optuna.load_study(
study_name=args.name,
storage="sqlite:///%s" % args.file[0],
)
parser = argparse.ArgumentParser(prog="matsim-calibration", description="MATSim calibration command line utility")

if not args.output:
args.output = args.file[0] + ".csv"
subparsers = parser.add_subparsers(title="Subcommands")

df = study_as_df(study)
df.to_csv(args.output, index=False)
_add(subparsers, run_create_csv)
_add(subparsers, run_simulations)

try:
from .plot import plot_study
plot_study(study)
args = parser.parse_args()

except ImportError:
print("Could not plot study.")
traceback.print_exc()
if not hasattr(args, 'func'):
parser.print_help()
else:
args.func(args)

36 changes: 36 additions & 0 deletions matsim/calibration/run_create_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import traceback

METADATA = "create-csv", "Create plots and csv from calibration study."

def setup(parser: argparse.ArgumentParser):
parser.add_argument('file', nargs=1, type=str, help="Path to input db")
parser.add_argument("--name", type=str, default="calib", help="Calibration name")
parser.add_argument("--output", default=None, help="Output path")

def main(args):

import optuna
from . import study_as_df

study = optuna.load_study(
study_name=args.name,
storage="sqlite:///%s" % args.file[0],
)

if not args.output:
args.output = args.file[0] + ".csv"

df = study_as_df(study)
df.to_csv(args.output, index=False)

try:
from .plot import plot_study
plot_study(study)

except ImportError:
print("Could not plot study.")
traceback.print_exc()
177 changes: 177 additions & 0 deletions matsim/calibration/run_simulations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
import os
import subprocess
import sys
from os import makedirs
from time import sleep
from typing import Union, Callable

import pandas as pd
import numpy as np

METADATA = "run-simulations", "Utility to run multiple simulations at once."


def process_results(runs):
"""Process results of multiple simulations"""
from sklearn.metrics import log_loss, accuracy_score
from sklearn.preprocessing import LabelEncoder

print("Processing results in %s" % runs)

dfs = None
for run in os.listdir(runs):
if not os.path.isdir(os.path.join(runs, run)):
continue

df = pd.read_csv(os.path.join(runs, run, "analysis", "population", "mode_choices.csv"))
if dfs is None:
dfs = df
else:
dfs= dfs.merge(df, left_on=["person", "n", "true_mode"], right_on=["person", "n", "true_mode"], suffixes=("", "_%s" % run))

shares = dfs.groupby("true_mode").size() / len(dfs)
modes = shares.index

labels = LabelEncoder().fit(modes)
y_true = labels.transform(dfs["true_mode"])
y_null = np.tile(shares.to_numpy(), reps=(len(y_true), 1))
y_pred = np.zeros((len(y_true), len(modes)))
dists = dfs.euclidean_distance.to_numpy() / 1000

pred_cols = [c for c in dfs.columns if c.startswith("pred_mode")]
for p in dfs[pred_cols].itertuples():

for j, m in enumerate(modes):
c = 0
for col in pred_cols:
if getattr(p, col) == m:
c += 1

y_pred[p.Index, j] = c / len(pred_cols)

accs = [accuracy_score(dfs.true_mode, dfs[col], sample_weight=dfs.weight) for col in pred_cols]
accs_d = [accuracy_score(dfs.true_mode, dfs[col], sample_weight=dfs.weight * dists) for col in pred_cols]

result = [
("Log likelihood", -log_loss(y_true, y_pred, sample_weight=dfs.weight, normalize=False), -log_loss(y_true, y_pred, sample_weight=dfs.weight * dists, normalize=False)),
("Log likelihood (normalized)", -log_loss(y_true, y_pred, sample_weight=dfs.weight, normalize=True), -log_loss(y_true, y_pred, sample_weight=dfs.weight * dists, normalize=True)),
("Mean Accuracy", np.mean(accs), np.mean(accs_d)),
("Log likelihood (null)", -log_loss(y_true, y_null, sample_weight=dfs.weight, normalize=False), -log_loss(y_true, y_null, sample_weight=dfs.weight * dists, normalize=False)),
("Samples", len(dfs), sum(dists)),
("Runs", len(pred_cols), len(pred_cols))
]

df = pd.DataFrame(result, columns=["Metric", "Value", "Distance weighted"]).set_index("Metric")
print(df)

df.to_csv(os.path.join(runs, "results.csv"), index=True)


def run(jar: Union[str, os.PathLike],
config: Union[str, os.PathLike],
args: Union[str, Callable] = "",
jvm_args="",
runs: int = 10,
worker_id: int = 0,
workers: int = 1,
seed: int = 4711,
overwrite: bool = False,
custom_cli: Callable = None,
debug: bool = False):
"""Run multiple simulations using different seeds at once. Simulations will be performed sequentially.
For parallel execution, multiple workers must be started.
:param jar: path to executable jar file of the scenario
:param config: path to config file to run
:param args: arguments to pass to the simulation
:param jvm_args: arguments to pass to the JVM
:param runs: number of simulations to run
:param worker_id: id of this process
:param workers: total number of processes
:param seed: starting seed
:param overwrite: overwrite existing output directory
:param custom_cli: use custom command line interface
:param debug: if true, output will be printed to console
"""
if not os.access(jar, os.R_OK):
raise ValueError("Can not access JAR File: %s" % jar)

if not os.access(config, os.R_OK):
raise ValueError("Can not access config File: %s" % config)

if worker_id >= workers:
raise ValueError("Worker ID must be smaller than number of workers (starts at 0).")

if not os.path.exists("eval-runs"):
makedirs("eval-runs")

for i in range(runs):
if i % workers != worker_id:
continue

run_dir = "eval-runs/%03d" % i

if os.path.exists(run_dir) and not overwrite:
print("Run %s already exists, skipping." % run_dir)
continue

run_args = args(i) if callable(args) else args

# Similar custom cli interface as calibration
if custom_cli:
cmd = custom_cli(jvm_args, jar, config, run_dir, i, seed + i, run_args)
else:
cmd = "java %s -jar %s run --config %s --output %s --runId %03d --config:global.randomSeed=%d %s" \
% (jvm_args, jar, config, run_dir, i, seed + i, run_args)

# Extra whitespaces will break argument parsing
cmd = cmd.strip()

print("Running cmd %s" % cmd)

if os.name != 'nt':
cmd = cmd.split(" ")
cmd = [c for c in cmd if c != ""]

p = subprocess.Popen(cmd,
stdout=sys.stdout if debug else subprocess.DEVNULL,
stderr=sys.stderr if debug else subprocess.DEVNULL)

try:
while p.poll() is None:
sleep(1)

if p.returncode != 0:
print("The scenario could not be run properly and returned with an error code.", file=sys.stderr)
if not debug:
print("Set debug=True and check the output for any errors.", file=sys.stderr)
print("Alternatively run the cmd from the log above manually and check for errors.",
file=sys.stderr)

raise Exception("Process returned with error code: %s." % p.returncode)
finally:
p.terminate()

process_results("eval-runs")


def setup(parser: argparse.ArgumentParser):
parser.add_argument("--jar", type=str, required=True, help="Path to executable JAR file")
parser.add_argument("--config", type=str, required=True, help="Path to config file")
parser.add_argument("--args", type=str, default="", help="Arguments to pass to the simulation")
parser.add_argument("--jvm-args", type=str, default="", help="Arguments to pass to the JVM")
parser.add_argument("--runs", type=int, default=10, help="Number of simulations to run")
parser.add_argument("--worker-id", type=int, default=0, help="ID of this worker")
parser.add_argument("--workers", type=int, default=1, help="Total number of workers")
parser.add_argument("--seed", type=int, default=4711, help="Starting seed")
parser.add_argument("--overwrite", action="store_true", help="Overwrite existing output directories")
parser.add_argument("--debug", action="store_true", help="Print output to console")


def main(args):
run(args.jar, args.config, args.args, args.jvm_args, args.runs, args.worker_id, args.workers, args.seed,
args.overwrite, debug=args.debug)
5 changes: 4 additions & 1 deletion matsim/scenariogen/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,15 +367,18 @@ class Trip:

@dataclass
class Activity:
""" Activity information (including leg) """
""" Activity information (including some leg information as well) """
a_id: str
a_weight: float
p_id: str
n: int
type: Purpose
duration: int
leg_dist: float
leg_duration: float
leg_mode: TripMode
location: str = pd.NA
zone: str = pd.NA


@dataclass
Expand Down
Loading

0 comments on commit 7429274

Please sign in to comment.