From 4c435cf4b12dc48ebcc3d05ca82c886883eb0962 Mon Sep 17 00:00:00 2001 From: rakow Date: Sat, 20 Apr 2024 17:18:19 +0200 Subject: [PATCH 01/11] visitations datatype --- matsim/scenariogen/data/__init__.py | 12 ++++- matsim/scenariogen/data/formats/netcheck.py | 52 +++++++++++++++++++++ matsim/scenariogen/facilities/__init__.py | 0 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 matsim/scenariogen/data/formats/netcheck.py create mode 100644 matsim/scenariogen/facilities/__init__.py diff --git a/matsim/scenariogen/data/__init__.py b/matsim/scenariogen/data/__init__.py index 0555f6e..32cefb1 100644 --- a/matsim/scenariogen/data/__init__.py +++ b/matsim/scenariogen/data/__init__.py @@ -4,7 +4,7 @@ __all__ = ["read_all", "ParkingPosition", "HouseholdType", "EconomicStatus", "Gender", "Employment", "Availability", "Purpose", "TripMode", "DistanceGroup", "DurationGroup", "SourceDestinationGroup", - "Household", "Person", "Trip", "Activity"] + "Household", "Person", "Trip", "Activity", "Visitations"] import os from dataclasses import dataclass @@ -119,6 +119,7 @@ def sort_idx(cls, series): v = list(cls) return series.map(v.index) + def _df_to_categorical(df, clazz): """ Convert columns to categorical types """ @@ -375,3 +376,12 @@ class Activity: leg_dist: float leg_duration: float leg_mode: TripMode + + +@dataclass +class Visitations: + """ Aggregated visitation information """ + location: str + n: int + purpose: Purpose = pd.NA + time: pd.Timestamp = pd.NA diff --git a/matsim/scenariogen/data/formats/netcheck.py b/matsim/scenariogen/data/formats/netcheck.py new file mode 100644 index 0000000..bd35b2f --- /dev/null +++ b/matsim/scenariogen/data/formats/netcheck.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +import os + +import pandas as pd + +from .. import * + + +def read_visitations(folder): + """ Read all visits from folder """ + + visits = [] + + for f in os.listdir(folder): + + if not f.endswith(".csv"): + continue + + print("Reading", f) + + t = pd.read_csv(os.path.join(folder, f), parse_dates=[0]) + t.timestamps = t.timestamps.str.split(",") + t["idx"] = t.index + + t = t.explode("timestamps") + t.timestamps = pd.to_datetime(t.timestamps, format="%H:%M:%S") + delta = (t.timestamps.dt.hour * 60 + t.timestamps.dt.minute) * 60 + t.timestamps.dt.second + delta = pd.to_timedelta(delta, unit="s") + + t["ts"] = t.day + delta + + t = t.groupby(["device_id", "osm_id", "idx"]).agg(start=("ts", "min"), end=("ts", "max"), + home=("distance_to_home", "mean"), + work=("distance_to_work", "mean")).reset_index() + + t["purpose"] = Purpose.OTHER + t.loc[t.home == 0, "purpose"] = Purpose.HOME + t.loc[t.work == 0, "purpose"] = Purpose.WORK + + visits.append(t) + + df = pd.concat(visits) + + aggr = df.groupby(["osm_id", "purpose"]).agg(count=("idx", "count")).reset_index() + + total = [] + + for v in aggr.itertuples(): + total.append(Visitations(v.osm_id, v.count, v.purpose)) + + return pd.DataFrame(total) \ No newline at end of file diff --git a/matsim/scenariogen/facilities/__init__.py b/matsim/scenariogen/facilities/__init__.py new file mode 100644 index 0000000..e69de29 From a5f0f8bccb9cb977b333f9a3e836b8fac0251896 Mon Sep 17 00:00:00 2001 From: rakow Date: Sun, 21 Apr 2024 17:34:01 +0200 Subject: [PATCH 02/11] new more re-usable ml code --- matsim/scenariogen/ml/__init__.py | 5 + matsim/scenariogen/ml/models.py | 371 +++++++++++++++++++++++++++ matsim/scenariogen/ml/models_feyn.py | 170 ++++++++++++ matsim/scenariogen/ml/train.py | 197 ++++++++++++++ 4 files changed, 743 insertions(+) create mode 100644 matsim/scenariogen/ml/__init__.py create mode 100644 matsim/scenariogen/ml/models.py create mode 100644 matsim/scenariogen/ml/models_feyn.py create mode 100644 matsim/scenariogen/ml/train.py diff --git a/matsim/scenariogen/ml/__init__.py b/matsim/scenariogen/ml/__init__.py new file mode 100644 index 0000000..fed5137 --- /dev/null +++ b/matsim/scenariogen/ml/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + +__all__ = ['MLRegressor'] + +from .train import MLRegressor diff --git a/matsim/scenariogen/ml/models.py b/matsim/scenariogen/ml/models.py new file mode 100644 index 0000000..903b6c5 --- /dev/null +++ b/matsim/scenariogen/ml/models.py @@ -0,0 +1,371 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import itertools +import re + +import sklearn +import sklearn.compose +import sklearn.ensemble +import sklearn.linear_model +import sklearn.svm + +try: + import lightning.regression + import lightning.classification +except ValueError as e: + print("Error during lightning import", e) + +import xgboost as xgb +import lightgbm as lgb + +from sympy.utilities.codegen import codegen + + +def powerset(iterable): + s = list(iterable) + return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(len(s) + 1)) + + +def create_regressor(trial, classifier_name, random_state): + """ Create regressor model from given classifier """ + + if classifier_name == 'mean': + model = sklearn.dummy.DummyRegressor() + + elif classifier_name == 'SVR': + kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']) + svc_c = trial.suggest_float('svc_r', 1e-10, 1e10, log=True) + model = sklearn.svm.SVR(C=svc_c, gamma='auto', kernel=kernel) + + elif classifier_name == 'QLatticeRegressor': + from .models_feyn import QLatticeRegressor + + model = QLatticeRegressor(trial.suggest_int('complexity', 3, 6, step=1), random_state=random_state) + + elif classifier_name == 'RandomForestRegressor': + rf_max_depth = trial.suggest_int('rf_max_depth', 2, 4) + rf_n = trial.suggest_int('rf_n', 5, 30, step=5) + min_samples_split = trial.suggest_int('min_samples_split', 2, 8, step=1) + b = trial.suggest_categorical('bootstrap', [True, False]) + + model = sklearn.ensemble.RandomForestRegressor(max_depth=rf_max_depth, n_estimators=rf_n, + oob_score=b, min_samples_split=min_samples_split, + random_state=random_state) + elif classifier_name == 'XGBRFRegressor': + + params = { + 'objective': 'reg:squarederror', + 'eval_metric': 'mae', + 'max_depth': trial.suggest_int('max_depth', low=2, high=4), + 'min_child_weight': trial.suggest_int('min_child_weight', low=1, high=10), + 'eta': trial.suggest_float('eta', 0.01, 1, log=True), + 'lambda': trial.suggest_float('lambda', 0.01, 1, log=True), + 'alpha': trial.suggest_float('alpha', 0.01, 1, log=True), + 'gamma': trial.suggest_float('gamma', 0.01, 1, log=True), + 'n_estimators': trial.suggest_int('rf_n', 5, 30, step=5), + 'subsample': 0.9, + 'colsample_bytree': 0.9, + 'colsample_bynode': 0.9, + 'random_state': random_state + } + + model = xgb.XGBRFRegressor(**params) + + elif classifier_name == 'XGBRegressor': + + params = { + 'objective': 'reg:squarederror', + 'eval_metric': 'mae', + 'max_depth': trial.suggest_int('max_depth', low=2, high=4), + 'min_child_weight': trial.suggest_int('min_child_weight', low=1, high=10), + 'eta': trial.suggest_float('eta', 0.01, 1, log=True), + 'lambda': trial.suggest_float('lambda', 0.01, 1, log=True), + 'alpha': trial.suggest_float('alpha', 0.01, 1, log=True), + 'gamma': trial.suggest_float('gamma', 0.01, 1, log=True), + 'n_estimators': trial.suggest_int('rf_n', 5, 30, step=5), + 'subsample': 0.9, + 'colsample_bytree': 0.9, + 'colsample_bynode': 0.9, + 'random_state': random_state + } + + model = xgb.XGBRegressor(**params) + + elif classifier_name == 'ExtraTreesRegressor': + + rf_n = trial.suggest_int('rf_n', 5, 30, step=5) + min_samples_split = trial.suggest_int('min_samples_split', 2, 8, step=1) + max_depth = trial.suggest_int('max_depth', 2, 4) + b = trial.suggest_categorical('bootstrap', [True, False]) + + model = sklearn.ensemble.ExtraTreesRegressor(n_estimators=rf_n, + min_samples_split=min_samples_split, + max_depth=max_depth, + bootstrap=b, oob_score=b, + random_state=random_state) + + elif classifier_name == 'LGBMRegressor': + + params = { + 'objective': 'regression', + 'num_leaves': trial.suggest_int('num_leaves', 2, 32, log=True), + 'n_estimators': trial.suggest_int('n_estimators', 5, 30, step=5), + # 'min_child_weight': trial.suggest_loguniform('min_child_weight', 0.0001, 1), + # 'min_child_samples': trial.suggest_int('min_child_samples', low=5, high=30, step=5), + 'subsample': 0.9, + 'subsample_freq': 10, + 'colsample_bytree': 0.9, + 'reg_alpha': trial.suggest_float('alpha', 1e-10, 1, log=True), + 'random_state': random_state + } + + model = lgb.LGBMRegressor(**params) + + elif classifier_name == "DecisionTreeRegressor": + + min_samples_split = trial.suggest_int('min_samples_split', 2, 8, step=1) + max_depth = trial.suggest_int('max_depth', 2, 4) + + model = sklearn.tree.DecisionTreeRegressor(max_depth=max_depth, min_samples_split=min_samples_split, + random_state=random_state) + + + elif classifier_name == "BaggingRegressor": + + reg = sklearn.linear_model.PassiveAggressiveRegressor(C=trial.suggest_float('C', 0.0001, 100, log=True)) + model = sklearn.ensemble.BaggingRegressor(reg, + n_estimators=trial.suggest_int('rf_n', 5, 30, step=5), + max_samples=trial.suggest_float('alpha', 0.2, 1), + random_state=random_state) + + elif classifier_name == 'Ridge': + + model = sklearn.linear_model.Ridge(alpha=trial.suggest_float('alpha', 0.001, 1000, log=True), solver='saga', + random_state=random_state) + + elif classifier_name == 'Lasso': + + model = sklearn.linear_model.Lasso(alpha=trial.suggest_float('alpha', 0.001, 1000, log=True), + random_state=random_state) + + + elif classifier_name == 'ElasticNet': + + model = sklearn.linear_model.ElasticNet(alpha=trial.suggest_float('alpha', 0.01, 1, log=True), + l1_ratio=trial.suggest_float('ratio', 0, 1), + random_state=random_state) + elif classifier_name == 'SGDRegressor': + + model = sklearn.linear_model.SGDRegressor(alpha=trial.suggest_float('alpha', 0.0001, 1, log=True), + random_state=random_state) + + elif classifier_name == "LinearSVR": + + model = sklearn.svm.LinearSVR(C=trial.suggest_float('C', 0.001, 1000, log=True), dual=False, max_iter=2000, + loss="squared_epsilon_insensitive", random_state=random_state) + + elif classifier_name == 'PassiveAggressiveRegressor': + + model = sklearn.linear_model.PassiveAggressiveRegressor(C=trial.suggest_float('C', 0.0001, 100, log=True), + random_state=random_state) + + elif classifier_name == 'LogisticRegression': + + model = sklearn.linear_model.LogisticRegression(C=trial.suggest_float('C', 0.0001, 100, log=True), + solver='saga', random_state=random_state) + + elif classifier_name == 'AdaGradRegressor': + + model = lightning.regression.AdaGradRegressor(eta=trial.suggest_float('eta', 0.0001, 10, log=True), + alpha=trial.suggest_float('alpha', 0.0001, 10, log=True), + random_state=random_state) + + elif classifier_name == 'CDClassifier': + + model = lightning.regression.CDClassifier(C=trial.suggest_float('C', 0.0001, 10, log=True), + alpha=trial.suggest_float('alpha', 0.0001, 10, log=True), + random_state=random_state) + + elif classifier_name == 'FistaClassifier': + + model = lightning.regression.FistaClassifier(C=trial.suggest_float('C', 0.0001, 10, log=True), + alpha=trial.suggest_float('alpha', 0.0001, 10, log=True), + random_state=random_state) + + elif classifier_name == 'SDCAClassifier': + + model = lightning.regression.SDCAClassifier(alpha=trial.suggest_float('alpha', 0.0001, 10, log=True), + random_state=random_state) + + elif classifier_name == 'OneClassSVM': + + kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']) + gamma = trial.suggest_categorical('gamma', ['scale', 'auto']) + nu = trial.suggest_float('nu', 0, 1) + + model = sklearn.svm.OneClassSVM(kernel=kernel, gamma=gamma, nu=nu, random_state=random_state) + + elif classifier_name == 'KernelSVC': + + kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid', 'cosine']) + model = lightning.regression.KernelSVC(alpha=trial.suggest_float('alpha', 0.0001, 10, log=True), + kernel=kernel, random_state=random_state) + + else: + raise Exception("Unknown regressor: " + classifier_name) + + return model + + +def sympy_to_c(model): + # classifier can also be used directly if only 2 classes + [(c_name, c_code), (h_name, c_header)] = codegen(("score", model.sympify()), "C89", "model", header=False, + empty=False) + c_code = re.sub(r"data([0-9]+)", r"data[\1]", c_code) + + return c_code + + +def model_to_java(name, package, model_and_error: tuple, scaler, df): + """ Convert to java source file """ + import m2cgen as m2c + + code = m2c.export_to_java(model_and_error[0], package, name) + + code, params = replace_params(code) + + imp = """import org.matsim.application.prepare.Predictor; +import it.unimi.dsi.fastutil.objects.Object2DoubleMap; +import it.unimi.dsi.fastutil.objects.Object2ObjectMap; + +/** +* Generated model, do not modify. +* Model: %s +* Error: %f +*/ +public final class""" % model_and_error + + code = code.replace("public class", imp) + code = code.replace(name, name + " implements Predictor") + + features = [f"data[{i}] = {s};\n" for i, s in enumerate(model_features(scaler, df))] + + idx = code.index("public static double score") + + pre = """ + public static %s INSTANCE = new %s(); + public static final double[] DEFAULT_PARAMS = %s; + + @Override + public double predict(Object2DoubleMap features, Object2ObjectMap categories) { + return predict(features, categories, DEFAULT_PARAMS); + } + + @Override + public double[] getData(Object2DoubleMap features, Object2ObjectMap categories) { + double[] data = new double[%d]; +""" % (name, name, str(params).replace("[", "{").replace("]", "}"), len(features)) + + for ft in features: + pre += "\t\t" + ft + + pre += """ + return data; + } + + @Override + public double predict(Object2DoubleMap features, Object2ObjectMap categories, double[] params) { + + double[] data = getData(features, categories); + for (int i = 0; i < data.length; i++) + if (Double.isNaN(data[i])) throw new IllegalArgumentException("Invalid data at index: " + i); + + return score(data, params); + } + """ + + code = code.replace("score(double[] input)", "score(double[] input, double[] params)") + + return f"{code[:idx]}{pre}{code[idx:]}" + + +def model_to_py(name, model_and_error, scaler, df): + import m2cgen as m2c + + code = m2c.export_to_python(model[0], function_name="score") + + code, params = replace_params(code) + + code = code.replace("input", "inputs") + code = code.replace("(inputs):", "(params, inputs):") + + features = "\t\t".join([f"data[{i}] = {s}\n" for i, s in enumerate(model_features(scaler, df))]) + features = features.replace(".getDouble(", ".get(") + + code = """def features(ft, data): +\t\t%s +params = %s +""" % (features, params) + code + + code += """ +def batch_loss(params, inputs, targets): + error = 0 + for x, y in zip(inputs, targets): + preds = score(params, x) + error += (preds - y) ** 2 + return error +""" + + return code + + +def replace_params(code): + """ Replaces and collects model parameters """ + + pattern = re.compile(r"(var\d+) = (-?\d+.\d+)") + + params = [] + new_code = '' + start = 0 + + for m in pattern.finditer(code): + end, newstart = m.span() + new_code += code[start:end] + rep = "%s = params[%d]" % (m.group(1), len(params)) + new_code += rep + start = newstart + + params.append(float(m.group(2))) + + new_code += code[start:] + + return new_code, params + + +def model_features(scaler, df): + for name, t, ids in scaler.transformers_: + + for i, idx in enumerate(ids): + + c = df.columns[idx] + + if name == "scale": + + t = scaler.named_transformers_[name] + + with_mean = t.get_params()["with_mean"] + + if with_mean: + yield f"(features.getDouble(\"{c}\") - {t.mean_[i]}) / {t.scale_[i]}" + else: + yield f"features.getDouble(\"{c}\") / {t.scale_[i]}" + + elif t == "passthrough": + yield f"features.getDouble(\"{c}\")" + + elif t == "drop": + continue + + else: + raise Exception("Unknown transformer: " + t) diff --git a/matsim/scenariogen/ml/models_feyn.py b/matsim/scenariogen/ml/models_feyn.py new file mode 100644 index 0000000..74df311 --- /dev/null +++ b/matsim/scenariogen/ml/models_feyn.py @@ -0,0 +1,170 @@ +# -*- coding: utf-8 -*- +"""These models depend on the feyn package which is not listed and only optional due to its licensing""" + +from time import time + +import numpy as np +import pandas as pd + +from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin +from sklearn.metrics import mean_squared_error + +import feyn + +class QLatticeBase(BaseEstimator): + _kind = None + _loss = None + + @staticmethod + def as_df(X, y=None): + tf = pd.DataFrame(X, dtype=np.float64) + if y is not None: + tf["y"] = y + + tf.columns = tf.columns.map(lambda x: "data%s" % x if x != "y" else "y") + + return tf + + def __init__(self, max_complexity, n_epochs, criterion, progress, random_state) -> None: + super().__init__() + + self.ql = feyn.QLattice(random_state) + self.n_epochs = n_epochs + self.max_complexity = max_complexity + self.criterion = criterion + self.progress = progress + self.random_state = random_state + + def fit(self, X, y, sample_weight=None, test=None, val=None, f=None, ftest=None, fval=None, val_interval=10): + + tf = self.as_df(X, y) + + if test is not None: + test = self.as_df(*test) + + if val is not None: + val = self.as_df(*val) + + models = [] + m_count = 0 + + start = time() + for i in range(1, self.n_epochs + 1): + + new_sample = self.ql.sample_models( + input_names=tf.columns, + output_name='y', + kind=self._kind, + max_complexity=self.max_complexity + ) + + models += new_sample + m_count += len(new_sample) + + models = feyn.fit_models( + models=models, + data=tf, + sample_weights=sample_weight, + loss_function=self._loss, + criterion=self.criterion, + threads=8 + ) + + # Sort for val method for pruning + if test is not None: + if f is None: + f = lambda pred: mean_squared_error(tf["y"], pred) + + if ftest is None: + ftest = lambda pred: mean_squared_error(test["y"], pred) + + if fval is None: + fval = lambda pred: mean_squared_error(val["y"], pred) + + # Custom prune / sorting + if val is not None and ((i - 1) % val_interval == 0 or i == self.n_epochs): + models = list(sorted( + models, key=lambda m: ftest(m.predict(test)) + fval(m.predict(val)) + )) + else: + models = list(sorted( + models, key=lambda m: f(m.predict(tf)) + ftest(m.predict(test)) + )) + + models = feyn.prune_models(models) + + info = "" + if ftest is not None: + # Custom label + info += " Test: %.2f" % ftest(models[0].predict(test)) + + if val is not None and fval is not None: + # Custom label + info += " Val: %.2f" % fval(models[0].predict(val)) + + elapsed = time() - start + if len(models) > 0 and self.progress: + feyn.show_model( + models[0], + feyn.tools.get_progress_label(i, self.n_epochs, elapsed, m_count) + info, + update_display=True, + ) + + self.ql.update(models) + + self.best = [models[0]] + feyn.get_diverse_models(models[1:], n=5) + + return self + + def predict(self, X): + return self.best[0].predict(self.as_df(X)) + + def show(self): + feyn.show_model(self.best[0]) + + def sympify(self): + return self.best[0].sympify(signif=9, symbolic_lr=True, include_weights=True) + + def plot(self, X, y, Xval, yval): + + train = self.as_df(X, y) + test = self.as_df(Xval, yval) + + self.best[0].plot( + data=train, + compare_data=test + ) + + +class QLatticeClassifier(QLatticeBase, ClassifierMixin): + + def __init__(self, max_complexity=10, n_epochs=30, criterion="bic", progress=False, random_state=-1) -> None: + super().__init__(max_complexity, n_epochs, criterion, progress, random_state) + + self._kind = "classification" + self._loss = "binary_cross_entropy" + + def copy(self, n=0): + """ Make new model by copying the best n""" + + other = QLatticeClassifier(self.max_complexity, self.n_epochs, self.progress, self.random_state) + other.best = [self.best[n]] + + return other + + +class QLatticeRegressor(QLatticeBase, RegressorMixin): + + def __init__(self, max_complexity=10, n_epochs=30, criterion="bic", progress=False, random_state=-1) -> None: + super().__init__(max_complexity, n_epochs, criterion, progress, random_state) + + self._kind = "regression" + self._loss = "squared_error" + + def copy(self, n=0): + """ Make new model by copying the best n""" + + other = QLatticeRegressor(self.max_complexity, self.n_epochs, self.progress, self.random_state) + other.best = [self.best[n]] + + return other \ No newline at end of file diff --git a/matsim/scenariogen/ml/train.py b/matsim/scenariogen/ml/train.py new file mode 100644 index 0000000..b5347c4 --- /dev/null +++ b/matsim/scenariogen/ml/train.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python + +import random +from os import makedirs +from os.path import join + +import optuna +import pandas as pd +import sklearn.ensemble +from sklearn.metrics import mean_absolute_error +from sklearn.model_selection import KFold +from tqdm.auto import tqdm + +from .models import create_regressor, model_to_java, model_to_py + +classifier = { + 'mean', + 'XGBRFRegressor', + 'XGBRegressor', + 'RandomForestRegressor', + 'ExtraTreesRegressor', + 'LGBMRegressor', + 'DecisionTreeRegressor', + 'PassiveAggressiveRegressor', + # More + # 'SVR', + # 'KernelSVC', + # 'QLatticeRegressor', + # 'LinearSVR', + # 'Ridge', + # 'SGDRegressor', + # 'LogisticRegression', + # 'AdaGradRegressor', + # 'CDRegressor', + # 'FistaRegressor', + # 'SDCARegressor', + # 'Lasso', + # 'ElasticNet' +} + + +class MLRegressor: + """ General class for machine learning regression models """ + + def __init__(self, n_trials=100, error="mae", fold=None): + self.n_trials = n_trials + self.fold = fold if fold else KFold(n_splits=5, shuffle=False) + self.error = mean_absolute_error + self.models = {} + self.df = None + self.exclude = None + self.target = None + self.pb: tqdm = None + self.best = None + # Currently trained model + self.model = None + + def get(self, idx): + tf = self.df if idx is None else self.df.iloc[idx] + return tf.drop(columns=self.exclude + [self.target]), tf[self.target].to_numpy() + + def best_model(self, models): + errors = [] + for m in models: + X, y = self.get(None) + X = self.scaler.transform(X) + + pred = m.predict(X) + err = self.error(y, pred) + + errors.append((m, err)) + + errors = sorted(errors, key=lambda m: m[1]) + + return errors[0] + + def callback(self, study: optuna.Study, trial: optuna.Trial): + """ Callback during training """ + if study.best_trial == trial: + self.best = self.model + self.pb.set_postfix({"error": trial.values[0], "iter": trial.number}) + + self.pb.update(1) + + def fit(self, df: pd.DataFrame, target: str, + exclude: list[str] = None, + normalize: list[str] = None): + """ Fit model to the data + + :param df: DataFrame with the data + :param target: Column name of target values contained in df + :param exclude: List of columns to exclude from the model + :param normalize: List of columns to normalize + """ + self.df = df + self.exclude = [] if exclude is None else exclude + self.target = target + + _scaler = sklearn.preprocessing.StandardScaler(with_mean=True) + + if normalize is None: + normalize = [] + + self.scaler = sklearn.compose.ColumnTransformer([ + ("scale", _scaler, [df.columns.get_loc(x) for x in normalize]) # column indices + ], + remainder="passthrough" + ) + + # Fit scaler, excluding target column + self.scaler.fit(self.get(None)[0]) + + def objective(classifier_name): + def _fn(trial): + r = random.Random(42) + + random_state = r.getrandbits(31) + + seq = iter(self.fold.split(df)) + + error = 0 + i = 0 + + candidates = [] + + for train, test in seq: + model = create_regressor(trial, classifier_name, random_state) + + candidates.append(model) + + X, y = self.get(train) + X = self.scaler.transform(X) + + model.fit(X, y) + + Xval, yval = self.get(test) + Xval = self.scaler.transform(Xval) + + pred = model.predict(Xval) + + error += self.error(yval, pred) + i += 1 + + self.model = self.best_model(candidates)[0] + + return error / i + + return _fn + + self.models = {} + + optuna.logging.set_verbosity(optuna.logging.WARNING) + + with tqdm(total=len(classifier), position=0, leave=True) as pbar: + for m in classifier: + pbar.set_description(f"Training model {m}") + + with tqdm(total=self.n_trials, desc="Iteration", position=1, leave=True) as self.pb: + study = optuna.create_study(sampler=optuna.samplers.TPESampler(seed=42), direction='minimize') + study.optimize(objective(m), n_trials=self.n_trials, callbacks=[self.callback], + show_progress_bar=False) + + self.models[m] = self.best + + pbar.update(1) + + self.best = self.best_model(self.models.values()) + + def write_java(self, folder, package_name, class_name): + """ Write trained models as java code""" + + if self.best is None: + raise ValueError("No model trained") + + output = join(folder, package_name.replace(".", "/")) + makedirs(output, exist_ok=True) + + with open(join(output, class_name + ".java"), "w") as f: + code = model_to_java(class_name, package_name, self.best, self.scaler, self.get(None)[0]) + f.write(code) + + def write_python(self, folder, name): + """ Write trained models as python code""" + + if self.best is None: + raise ValueError("No model trained") + + makedirs(folder, exist_ok=True) + + with open(join(folder, "__init__.py"), "w") as f: + f.write("") + + with open(join(folder, name + ".py"), "w") as f: + code = model_to_py(t, self.best, self.scaler, self.get(None)[0]) + f.write("# -*- coding: utf-8 -*-\n") + f.write("\"\"\"%s\nError: %f\"\"\"\n" % self.best) + f.write(code) From 09beb991f344426f23736984175b5a5d801b9964 Mon Sep 17 00:00:00 2001 From: rakow Date: Tue, 14 May 2024 21:56:37 +0200 Subject: [PATCH 03/11] model type for intersections --- matsim/scenariogen/ml/models.py | 8 +++++--- matsim/scenariogen/ml/train.py | 8 ++++++-- matsim/scenariogen/network/features.py | 7 ++++++- matsim/scenariogen/network/run_train_model.py | 6 ++++-- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/matsim/scenariogen/ml/models.py b/matsim/scenariogen/ml/models.py index 903b6c5..1608efa 100644 --- a/matsim/scenariogen/ml/models.py +++ b/matsim/scenariogen/ml/models.py @@ -227,7 +227,7 @@ def sympy_to_c(model): return c_code -def model_to_java(name, package, model_and_error: tuple, scaler, df): +def model_to_java(name, package, model_and_error: tuple, scaler, bounds, df): """ Convert to java source file """ import m2cgen as m2c @@ -270,6 +270,8 @@ def model_to_java(name, package, model_and_error: tuple, scaler, df): for ft in features: pre += "\t\t" + ft + ret = "score(data, params)" if bounds is None else "Math.min(Math.max(score(data, params), %f), %f)" % bounds + pre += """ return data; } @@ -281,9 +283,9 @@ def model_to_java(name, package, model_and_error: tuple, scaler, df): for (int i = 0; i < data.length; i++) if (Double.isNaN(data[i])) throw new IllegalArgumentException("Invalid data at index: " + i); - return score(data, params); + return %s; } - """ + """ % ret code = code.replace("score(double[] input)", "score(double[] input, double[] params)") diff --git a/matsim/scenariogen/ml/train.py b/matsim/scenariogen/ml/train.py index b5347c4..a9e47ba 100644 --- a/matsim/scenariogen/ml/train.py +++ b/matsim/scenariogen/ml/train.py @@ -42,9 +42,10 @@ class MLRegressor: """ General class for machine learning regression models """ - def __init__(self, n_trials=100, error="mae", fold=None): + def __init__(self, n_trials=100, error="mae", fold=None, bounds=None): self.n_trials = n_trials self.fold = fold if fold else KFold(n_splits=5, shuffle=False) + self.bounds = bounds self.error = mean_absolute_error self.models = {} self.df = None @@ -176,7 +177,7 @@ def write_java(self, folder, package_name, class_name): makedirs(output, exist_ok=True) with open(join(output, class_name + ".java"), "w") as f: - code = model_to_java(class_name, package_name, self.best, self.scaler, self.get(None)[0]) + code = model_to_java(class_name, package_name, self.best, self.scaler, self.bounds, self.get(None)[0]) f.write(code) def write_python(self, folder, name): @@ -191,6 +192,9 @@ def write_python(self, folder, name): f.write("") with open(join(folder, name + ".py"), "w") as f: + + # TODO: bounds not implemented + code = model_to_py(t, self.best, self.scaler, self.get(None)[0]) f.write("# -*- coding: utf-8 -*-\n") f.write("\"\"\"%s\nError: %f\"\"\"\n" % self.best) diff --git a/matsim/scenariogen/network/features.py b/matsim/scenariogen/network/features.py index 219a59e..824db73 100644 --- a/matsim/scenariogen/network/features.py +++ b/matsim/scenariogen/network/features.py @@ -26,7 +26,8 @@ def build_datasets(network, inter, routes, model_type): aggr = df_i.groupby(["junction_type"]) df_i["norm_cap"] = df_i.capacity / df_i.num_lanes for g in aggr.groups: - result["capacity_" + str(g)] = prepare_dataframe(aggr.get_group(g), model_type, target="norm_cap") + # Use total capacity and not normed per lane + result["capacity_" + str(g)] = prepare_dataframe(aggr.get_group(g), model_type, target="capacity") return result @@ -65,6 +66,10 @@ def prepare_dataframe(df, model_type, target): "junction_inc_lanes", "priority_lower", "priority_equal", "priority_higher", "is_secondary_or_higher", "is_primary_or_higher", "is_motorway", "is_link"]] + elif model_type == "intersection": + df = df[["target", "speed", "num_lanes", "junction_inc_lanes", "num_conns", "num_response", "num_foes", + "is_secondary_or_higher", "num_left", "num_right", "num_straight", "dir_exclusive"]] + else: raise ValueError("Illegal model type:" + model_type) diff --git a/matsim/scenariogen/network/run_train_model.py b/matsim/scenariogen/network/run_train_model.py index 8c82824..989587e 100644 --- a/matsim/scenariogen/network/run_train_model.py +++ b/matsim/scenariogen/network/run_train_model.py @@ -25,7 +25,7 @@ def setup(parser: ArgumentParser): parser.add_argument("--input-intersections", type=str, nargs="+", help="Path to file with intersection results", required=True) parser.add_argument("--input-routes", type=str, nargs="+", help="Path to file with route results.", required=True) - parser.add_argument("--model-type", help="Type of model (features to use)", choices=["default", "extended"], + parser.add_argument("--model-type", help="Type of model (features to use)", choices=["default", "extended", "intersection"], default="default") @@ -49,8 +49,10 @@ def get(idx, t): df = get(None, t)[0] norm = ["length", "speed", "num_lanes"] - if args.model_type == "full": + if args.model_type == "extended": norm += ["num_foes", "junction_inc_lanes"] + elif args.model_type == "intersection": + norm += [] scaler[t] = sklearn.compose.ColumnTransformer([ ("scale", _scaler, [df.columns.get_loc(x) for x in norm]) # column indices From bbfd48e5011ed4e13cbf3f4c6cfce9b35d21ac38 Mon Sep 17 00:00:00 2001 From: rakow Date: Tue, 14 May 2024 22:06:52 +0200 Subject: [PATCH 04/11] update interssection data --- matsim/scenariogen/network/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matsim/scenariogen/network/features.py b/matsim/scenariogen/network/features.py index 824db73..392e33c 100644 --- a/matsim/scenariogen/network/features.py +++ b/matsim/scenariogen/network/features.py @@ -113,7 +113,7 @@ def read_intersections(folder): # there could be exclusive lanes and the capacity to two edges completely additive # however if the capacity is shared one direction could use way more than physical possible - aggr = df.groupby("fromEdgeId").agg(capacity=("flow", "max")).reset_index() + aggr = df.groupby("fromEdgeId").agg(capacityMax=("flow", "max"), capacityMean=("flow", "mean")).reset_index() aggr.rename(columns={"fromEdgeId": "edgeId"}) data.append(aggr) From 6c88f346317f96d3a24a239db9ec3f2faf57f290 Mon Sep 17 00:00:00 2001 From: rakow Date: Wed, 15 May 2024 07:57:26 +0200 Subject: [PATCH 05/11] measure separate flow per lane --- .../scenariogen/network/run_intersections.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/matsim/scenariogen/network/run_intersections.py b/matsim/scenariogen/network/run_intersections.py index a688229..1a472cb 100644 --- a/matsim/scenariogen/network/run_intersections.py +++ b/matsim/scenariogen/network/run_intersections.py @@ -72,8 +72,9 @@ def writeDetectorFile(f_name, output, lanes): f.write(text) -def read_result(folder, **kwargs): - flow = 0 +def read_result(folder, junctionId, fromEdgeId, toEdgeId): + + res = [] for f in os.listdir(folder): if not f.endswith(".xml"): @@ -81,6 +82,7 @@ def read_result(folder, **kwargs): total = 0 end = 0 + lane = 0 for _, elem in ET.iterparse(join(folder, f), events=("end",), tag=('interval',), @@ -91,12 +93,21 @@ def read_result(folder, **kwargs): if begin < 60: continue - total += float(elem.attrib["nVehContrib"]) + total = float(elem.attrib["nVehContrib"]) + + flow = total * (3600 / (end - 60)) + + res.append({ + "junctionId": junctionId, + "fromEdgeId": fromEdgeId, + "toEdgeId": toEdgeId, + "lane": lane, + "flow": flow + }) - flow += total * (3600 / (end - 60)) + lane += 1 - kwargs["flow"] = flow - return kwargs + return res def run(args, nodes): @@ -170,7 +181,7 @@ def run(args, nodes): go(p_scenario, args) # Read output - res.append(read_result(folder, + res.extend(read_result(folder, junctionId=node._id, fromEdgeId=fromEdge._id, toEdgeId=toEdge._id)) From 1bef1afe5135f22aa938206f310321d2744ea4a6 Mon Sep 17 00:00:00 2001 From: rakow Date: Wed, 15 May 2024 08:19:54 +0200 Subject: [PATCH 06/11] measure separate flow per lane --- matsim/scenariogen/network/run_intersections.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/matsim/scenariogen/network/run_intersections.py b/matsim/scenariogen/network/run_intersections.py index 1a472cb..26ab5be 100644 --- a/matsim/scenariogen/network/run_intersections.py +++ b/matsim/scenariogen/network/run_intersections.py @@ -75,6 +75,7 @@ def writeDetectorFile(f_name, output, lanes): def read_result(folder, junctionId, fromEdgeId, toEdgeId): res = [] + lane = 0 for f in os.listdir(folder): if not f.endswith(".xml"): @@ -82,7 +83,6 @@ def read_result(folder, junctionId, fromEdgeId, toEdgeId): total = 0 end = 0 - lane = 0 for _, elem in ET.iterparse(join(folder, f), events=("end",), tag=('interval',), @@ -93,10 +93,9 @@ def read_result(folder, junctionId, fromEdgeId, toEdgeId): if begin < 60: continue - total = float(elem.attrib["nVehContrib"]) + total += float(elem.attrib["nVehContrib"]) flow = total * (3600 / (end - 60)) - res.append({ "junctionId": junctionId, "fromEdgeId": fromEdgeId, From 51657ffba9cc8dccc204f1ff9738e063be310e87 Mon Sep 17 00:00:00 2001 From: rakow Date: Wed, 15 May 2024 10:54:59 +0200 Subject: [PATCH 07/11] update capacity calculation --- matsim/scenariogen/network/features.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/matsim/scenariogen/network/features.py b/matsim/scenariogen/network/features.py index 392e33c..be4db0f 100644 --- a/matsim/scenariogen/network/features.py +++ b/matsim/scenariogen/network/features.py @@ -111,11 +111,18 @@ def read_intersections(folder): print("Empty csv", f) continue - # there could be exclusive lanes and the capacity to two edges completely additive - # however if the capacity is shared one direction could use way more than physical possible - aggr = df.groupby("fromEdgeId").agg(capacityMax=("flow", "max"), capacityMean=("flow", "mean")).reset_index() + # Filter flows that are not relevant + df = df[df.flow > 50] + + # Capacities per lane + aggr = df.groupby(["fromEdgeId", "lane"]).agg(capacityMax=("flow", "max"), capacityMean=("flow", "mean")).reset_index() + + # Sum lane capacities + aggr = df.groupby("fromEdgeId").agg(capacityMax=("capacityMax", "sum"), capacityMean=("capacityMean", "sum")).reset_index() aggr.rename(columns={"fromEdgeId": "edgeId"}) + # There will be two capacities, one for the max which would assume that all vehicles takes the turn with the highest capacity + # Mean capacity is the average capacity of all possible turn directions per lane data.append(aggr) return pd.concat(data) From e167ea609761cf30aeb5801e0ebc0b0fa5db3b22 Mon Sep 17 00:00:00 2001 From: rakow Date: Wed, 15 May 2024 14:20:00 +0200 Subject: [PATCH 08/11] possibility to only train intersection model, updated to new predictor API --- matsim/scenariogen/network/features.py | 22 ++++++++++------ matsim/scenariogen/network/models.py | 25 +++++++++++-------- matsim/scenariogen/network/run_train_model.py | 4 +-- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/matsim/scenariogen/network/features.py b/matsim/scenariogen/network/features.py index be4db0f..48db129 100644 --- a/matsim/scenariogen/network/features.py +++ b/matsim/scenariogen/network/features.py @@ -11,19 +11,25 @@ def build_datasets(network, inter, routes, model_type): ft = pd.concat([pd.read_csv(n) for n in network]) df_i = pd.concat([pd.merge(pd.read_csv(i), ft, left_on="fromEdgeId", right_on="linkId") for i in inter]) - df_r = pd.concat( - [pd.merge(pd.read_csv(r).drop(columns=["speed"]), ft, left_on="edgeId", right_on="linkId") for r in routes]) result = {} - aggr = df_r.groupby(["junction_type"]) - for g in aggr.groups: - if str(g) == "dead_end": - continue + if routes: + df_r = pd.concat( + [pd.merge(pd.read_csv(r).drop(columns=["speed"]), ft, left_on="edgeId", right_on="linkId") for r in routes]) + + aggr = df_r.groupby(["junction_type"]) + for g in aggr.groups: + if str(g) == "dead_end": + continue - result["speedRelative_" + str(g)] = prepare_dataframe(aggr.get_group(g), model_type, target="speedRelative") + result["speedRelative_" + str(g)] = prepare_dataframe(aggr.get_group(g), model_type, target="speedRelative") aggr = df_i.groupby(["junction_type"]) + + # Use a mix of max and mean capacities + df_i["capacity"] = (df_i.capacityMax + df_i.capacityMean) / 2 + df_i["norm_cap"] = df_i.capacity / df_i.num_lanes for g in aggr.groups: # Use total capacity and not normed per lane @@ -67,7 +73,7 @@ def prepare_dataframe(df, model_type, target): "is_secondary_or_higher", "is_primary_or_higher", "is_motorway", "is_link"]] elif model_type == "intersection": - df = df[["target", "speed", "num_lanes", "junction_inc_lanes", "num_conns", "num_response", "num_foes", + df = df[["target", "speed", "num_lanes", "num_to_links", "junction_inc_lanes", "num_conns", "num_response", "num_foes", "is_secondary_or_higher", "num_left", "num_right", "num_straight", "dir_exclusive"]] else: diff --git a/matsim/scenariogen/network/models.py b/matsim/scenariogen/network/models.py index fd2d9df..8965ad5 100644 --- a/matsim/scenariogen/network/models.py +++ b/matsim/scenariogen/network/models.py @@ -238,16 +238,19 @@ def model_to_java(name, package, model, scaler, df): code, params = replace_params(code) - imp = """import org.matsim.application.prepare.network.params.FeatureRegressor; + imp = """import org.matsim.application.prepare.Predictor; import it.unimi.dsi.fastutil.objects.Object2DoubleMap; +import it.unimi.dsi.fastutil.objects.Object2ObjectMap; + /** * Generated model, do not modify. +* Model: %s */ -public final class""" +public final class""" % model code = code.replace("public class", imp) - code = code.replace(name, name + " implements FeatureRegressor") + code = code.replace(name, name + " implements Predictor") features = [f"data[{i}] = {s};\n" for i, s in enumerate(model_features(scaler, df))] @@ -258,12 +261,12 @@ def model_to_java(name, package, model, scaler, df): public static final double[] DEFAULT_PARAMS = %s; @Override - public double predict(Object2DoubleMap ft) { - return predict(ft, DEFAULT_PARAMS); + public double predict(Object2DoubleMap features, Object2ObjectMap categories) { + return predict(features, categories, DEFAULT_PARAMS); } @Override - public double[] getData(Object2DoubleMap ft) { + public double[] getData(Object2DoubleMap features, Object2ObjectMap categories) { double[] data = new double[%d]; """ % (name, name, str(params).replace("[", "{").replace("]", "}"), len(features)) @@ -275,9 +278,9 @@ def model_to_java(name, package, model, scaler, df): } @Override - public double predict(Object2DoubleMap ft, double[] params) { + public double predict(Object2DoubleMap features, Object2ObjectMap categories, double[] params) { - double[] data = getData(ft); + double[] data = getData(features, categories); for (int i = 0; i < data.length; i++) if (Double.isNaN(data[i])) throw new IllegalArgumentException("Invalid data at index: " + i); @@ -357,12 +360,12 @@ def model_features(scaler, df): with_mean = t.get_params()["with_mean"] if with_mean: - yield f"(ft.getDouble(\"{c}\") - {t.mean_[i]}) / {t.scale_[i]}" + yield f"(features.getDouble(\"{c}\") - {t.mean_[i]}) / {t.scale_[i]}" else: - yield f"ft.getDouble(\"{c}\") / {t.scale_[i]}" + yield f"features.getDouble(\"{c}\") / {t.scale_[i]}" elif t == "passthrough": - yield f"ft.getDouble(\"{c}\")" + yield f"features.getDouble(\"{c}\")" elif t == "drop": continue diff --git a/matsim/scenariogen/network/run_train_model.py b/matsim/scenariogen/network/run_train_model.py index 989587e..14ca80a 100644 --- a/matsim/scenariogen/network/run_train_model.py +++ b/matsim/scenariogen/network/run_train_model.py @@ -24,7 +24,7 @@ def setup(parser: ArgumentParser): parser.add_argument("--network-features", type=str, nargs="+", help="Path to file with edge features", required=True) parser.add_argument("--input-intersections", type=str, nargs="+", help="Path to file with intersection results", required=True) - parser.add_argument("--input-routes", type=str, nargs="+", help="Path to file with route results.", required=True) + parser.add_argument("--input-routes", type=str, nargs="+", help="Path to file with route results.", required=False) parser.add_argument("--model-type", help="Type of model (features to use)", choices=["default", "extended", "intersection"], default="default") @@ -52,7 +52,7 @@ def get(idx, t): if args.model_type == "extended": norm += ["num_foes", "junction_inc_lanes"] elif args.model_type == "intersection": - norm += [] + norm = [] scaler[t] = sklearn.compose.ColumnTransformer([ ("scale", _scaler, [df.columns.get_loc(x) for x in norm]) # column indices From b6eecdfdc569053cc1f41c7b71998e98bdc75af8 Mon Sep 17 00:00:00 2001 From: rakow Date: Wed, 15 May 2024 22:15:58 +0200 Subject: [PATCH 09/11] update capacity target --- matsim/scenariogen/network/features.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/matsim/scenariogen/network/features.py b/matsim/scenariogen/network/features.py index 48db129..753af5d 100644 --- a/matsim/scenariogen/network/features.py +++ b/matsim/scenariogen/network/features.py @@ -28,12 +28,10 @@ def build_datasets(network, inter, routes, model_type): aggr = df_i.groupby(["junction_type"]) # Use a mix of max and mean capacities - df_i["capacity"] = (df_i.capacityMax + df_i.capacityMean) / 2 - + df_i["capacity"] = df_i.capacityMax df_i["norm_cap"] = df_i.capacity / df_i.num_lanes for g in aggr.groups: - # Use total capacity and not normed per lane - result["capacity_" + str(g)] = prepare_dataframe(aggr.get_group(g), model_type, target="capacity") + result["capacity_" + str(g)] = prepare_dataframe(aggr.get_group(g), model_type, target="norm_cap") return result From aa02af4f349dc67898e63c9fab14843934e749d5 Mon Sep 17 00:00:00 2001 From: rakow Date: Wed, 15 May 2024 22:52:28 +0200 Subject: [PATCH 10/11] update features --- matsim/scenariogen/network/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matsim/scenariogen/network/features.py b/matsim/scenariogen/network/features.py index 753af5d..14b0b17 100644 --- a/matsim/scenariogen/network/features.py +++ b/matsim/scenariogen/network/features.py @@ -72,7 +72,7 @@ def prepare_dataframe(df, model_type, target): elif model_type == "intersection": df = df[["target", "speed", "num_lanes", "num_to_links", "junction_inc_lanes", "num_conns", "num_response", "num_foes", - "is_secondary_or_higher", "num_left", "num_right", "num_straight", "dir_exclusive"]] + "is_primary_or_higher", "is_secondary_or_higher", "num_left", "num_right", "num_straight"]] else: raise ValueError("Illegal model type:" + model_type) From 0f535aad8ca03e12361cb362ad3eea08213f3350 Mon Sep 17 00:00:00 2001 From: rakow Date: Sat, 15 Jun 2024 15:26:24 +0200 Subject: [PATCH 11/11] fix linter --- matsim/scenariogen/ml/models.py | 2 +- matsim/scenariogen/ml/train.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/matsim/scenariogen/ml/models.py b/matsim/scenariogen/ml/models.py index 1608efa..d081b33 100644 --- a/matsim/scenariogen/ml/models.py +++ b/matsim/scenariogen/ml/models.py @@ -295,7 +295,7 @@ def model_to_java(name, package, model_and_error: tuple, scaler, bounds, df): def model_to_py(name, model_and_error, scaler, df): import m2cgen as m2c - code = m2c.export_to_python(model[0], function_name="score") + code = m2c.export_to_python(model_and_error[0], function_name="score") code, params = replace_params(code) diff --git a/matsim/scenariogen/ml/train.py b/matsim/scenariogen/ml/train.py index a9e47ba..5f44d90 100644 --- a/matsim/scenariogen/ml/train.py +++ b/matsim/scenariogen/ml/train.py @@ -195,7 +195,7 @@ def write_python(self, folder, name): # TODO: bounds not implemented - code = model_to_py(t, self.best, self.scaler, self.get(None)[0]) + code = model_to_py(name, self.best, self.scaler, self.get(None)[0]) f.write("# -*- coding: utf-8 -*-\n") f.write("\"\"\"%s\nError: %f\"\"\"\n" % self.best) f.write(code)