From 9bf96d1dd4d53214c1c2155a738a5b795cf38424 Mon Sep 17 00:00:00 2001 From: Abdulrahman Semrie Date: Fri, 21 Feb 2020 16:09:22 +0300 Subject: [PATCH] Add target feature option for moses runner --- crossval/moses_cross_val.py | 6 +++--- crossval/moses_runner.py | 5 +++-- crossval/post_process.py | 5 +++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/crossval/moses_cross_val.py b/crossval/moses_cross_val.py index d2f9f9e..8abfa2a 100644 --- a/crossval/moses_cross_val.py +++ b/crossval/moses_cross_val.py @@ -6,7 +6,7 @@ import pandas as pd from scipy import stats -from sklearn.model_selection import StratifiedShuffleSplit +from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit from crossval.model_evaluator import ModelEvaluator from crossval.moses_runner import MosesRunner @@ -95,7 +95,7 @@ def split_dataset(self): x, y = df.values, df[self.session.target_feature].values splits, test_size = self.session.crossval_options["folds"], self.session.crossval_options["testSize"] - cv = StratifiedShuffleSplit(n_splits=splits, test_size=test_size) + cv = ShuffleSplit(n_splits=splits, test_size=test_size, random_state=42) return x, df.columns.values, cv.split(x, y) @@ -111,7 +111,7 @@ def run_seeds(self, seeds, i, file): output_file = tempfile.NamedTemporaryFile(mode="w+") moses_options = " ".join([self.session.moses_options, "--random-seed " + str(seed)]) - moses_runner = MosesRunner(file, output_file.name, moses_options, self.session.mnemonic) + moses_runner = MosesRunner(file, output_file.name, moses_options, self.session.mnemonic, self.session.target_feature) returncode, stdout, stderr = moses_runner.run_moses() if returncode != 0: diff --git a/crossval/moses_runner.py b/crossval/moses_runner.py index 24f8059..8be67f2 100644 --- a/crossval/moses_runner.py +++ b/crossval/moses_runner.py @@ -12,7 +12,7 @@ class MosesRunner: A class that handles running of the MOSES binary program """ - def __init__(self, input_file, output_file, moses_opts, session_id): + def __init__(self, input_file, output_file, moses_opts, session_id, target_feature="case"): """ :param input_file: The input file to run MOSES on :param output_file: The file to write MOSES program outputs to @@ -24,6 +24,7 @@ def __init__(self, input_file, output_file, moses_opts, session_id): if not "W1" in moses_opts: moses_opts += ' -W1' self.output_regex = re.compile(r"(-?\d+) (.+) \[(.+)\]") self.logger = get_logger(session_id) + self.target_feature = target_feature def run_moses(self): """ @@ -34,7 +35,7 @@ def run_moses(self): :returns stdin: the error output of the process, if any """ - cmd = ["moses", "-i", self.input, "-o", self.output] + cmd = ["moses", "-i", self.input, "-o", self.output, "--target-feature", self.target_feature] for opt in self.moses_options.split(): cmd.append(opt) diff --git a/crossval/post_process.py b/crossval/post_process.py index 3ed4ad9..7f7d479 100644 --- a/crossval/post_process.py +++ b/crossval/post_process.py @@ -1,10 +1,11 @@ __author__ = 'Abdulrahman Semrie' -import pymongo -from config import MONGODB_URI, DB_NAME, DATASET_DIR import os import pathlib + import pandas as pd + +from config import DATASET_DIR from crossval.filters import loader from models.objmodel import Score, MosesModel