Merge branch 'pickle_minimize_objective'

tBuLi · Oct 24, 2018 · 41425fd · 41425fd
2 parents fd77839 + d8e5282
commit 41425fd
Show file tree

Hide file tree

Showing 7 changed files with 279 additions and 12 deletions.
diff --git a/symfit/core/fit.py b/symfit/core/fit.py
@@ -108,7 +108,7 @@ def __eq__(self, other):
                 if var_1 != var_2:
                     return False
                 else:
-                    if not self[var_1].expand() - other[var_2].expand() == 0:
+                    if not self[var_1].expand() == other[var_2].expand():
                         return False
             else:
                 return True
@@ -754,7 +754,7 @@ def _make_signature(self):
     def __reduce__(self):
         return (
             self.__class__,
-            (self.constraint_type(list(self.values())[0]), self.model)
+            (self.constraint_type(list(self.values())[0], 0), self.model)
         )
 
 class TakesData(object):

diff --git a/symfit/core/fit_results.py b/symfit/core/fit_results.py
@@ -121,3 +121,30 @@ def covariance(self, param_1, param_2):
         param_1_number = self.model.params.index(param_1)
         param_2_number = self.model.params.index(param_2)
         return self.covariance_matrix[param_1_number, param_2_number]
+
+    @staticmethod
+    def _array_safe_dict_eq(one_dict, other_dict):
+        """
+        Dicts containing arrays are hard to compare. This function uses
+        numpy.allclose to compare arrays, and does normal comparison for all
+        other types.
+
+        :param one_dict:
+        :param other_dict:
+        :return: bool
+        """
+        for key in one_dict:
+            try:
+                assert one_dict[key] == other_dict[key]
+            except ValueError as err:
+                # When dealing with arrays, we need to use numpy for comparison
+                if isinstance(one_dict[key], dict):
+                    assert FitResults._array_safe_dict_eq(one_dict[key], other_dict[key])
+                else:
+                    assert np.allclose(one_dict[key], other_dict[key])
+            except AssertionError:
+                return False
+        else: return True
+
+    def __eq__(self, other):
+        return FitResults._array_safe_dict_eq(self.__dict__, other.__dict__)
diff --git a/symfit/core/minimizers.py b/symfit/core/minimizers.py
@@ -32,6 +32,8 @@ def __init__(self, objective, parameters):
         self.parameters = parameters
         self._fixed_params = [p for p in parameters if p.fixed]
         self.objective = partial(objective, **{p.name: p.value for p in self._fixed_params})
+        # Mapping which we use to track the original, to be used upon pickling
+        self._pickle_kwargs = {'parameters': parameters, 'objective': objective}
         self.params = [p for p in parameters if not p.fixed]
 
     @abc.abstractmethod
@@ -56,6 +58,13 @@ def initial_guesses(self):
     def initial_guesses(self, vals):
         self._initial_guesses = vals
 
+    def __getstate__(self):
+        return {key: value for key, value in self.__dict__.items()
+                if not key.startswith('wrapped_')}
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        self.__init__(**self._pickle_kwargs)
 
 class BoundedMinimizer(BaseMinimizer):
     """
@@ -73,6 +82,10 @@ class ConstrainedMinimizer(BaseMinimizer):
     def __init__(self, *args, **kwargs):
         constraints = kwargs.pop('constraints')
         super(ConstrainedMinimizer, self).__init__(*args, **kwargs)
+        # Remember the vanilla constraints for pickling
+        self._pickle_kwargs['constraints'] = constraints
+        if constraints is None:
+            constraints = []
         self.constraints = [
             partial(constraint, **{p.name: p.value for p in self._fixed_params})
             for constraint in constraints
@@ -84,11 +97,12 @@ class GradientMinimizer(BaseMinimizer):
     """
     @keywordonly(jacobian=None)
     def __init__(self, *args, **kwargs):
-        jacobian = kwargs.pop('jacobian')
+        self.jacobian = kwargs.pop('jacobian')
         super(GradientMinimizer, self).__init__(*args, **kwargs)
+        self._pickle_kwargs['jacobian'] = self.jacobian
 
-        if jacobian is not None:
-            jac_with_fixed_params = partial(jacobian, **{p.name: p.value for p in self._fixed_params})
+        if self.jacobian is not None:
+            jac_with_fixed_params = partial(self.jacobian, **{p.name: p.value for p in self._fixed_params})
             self.wrapped_jacobian = self.resize_jac(jac_with_fixed_params)
         else:
             self.jacobian = None
@@ -143,6 +157,7 @@ def __init__(self, *args, **kwargs):
         minimizers = kwargs.pop('minimizers')
         super(ChainedMinimizer, self).__init__(*args, **kwargs)
         self.minimizers = minimizers
+        self._pickle_kwargs['minimizers'] = self.minimizers
         self.__signature__ = self._make_signature()
 
     def execute(self, **minimizer_kwargs):
@@ -224,6 +239,10 @@ def _make_signature(self):
             )
         return inspect_sig.Signature(parameters=reversed(parameters))
 
+    def __getstate__(self):
+        state = super(ChainedMinimizer, self).__getstate__()
+        del state['__signature__']
+        return state
 
 class ScipyMinimize(object):
     """
@@ -306,7 +325,7 @@ def _pack_output(self, ans):
             covariance_matrix=None,
             infodic=infodic,
             mesg=ans.message,
-            ier=ans.nit if hasattr(ans, 'nit') else float('nan'),
+            ier=ans.nit if hasattr(ans, 'nit') else None,
             objective_value=ans.fun,
         )
 
@@ -327,6 +346,7 @@ def method_name(cls):
         """
         return cls.__name__
 
+
 class ScipyGradientMinimize(ScipyMinimize, GradientMinimizer):
     """
     Base class for :func:`scipy.optimize.minimize`'s gradient-minimizers.
@@ -374,6 +394,7 @@ def scipy_constraints(self, constraints):
         cons = tuple(cons)
         return cons
 
+
 class BFGS(ScipyGradientMinimize):
     """
     Wrapper around :func:`scipy.optimize.minimize`'s BFGS algorithm.

diff --git a/tests/test_minimize.py b/tests/test_minimize.py
@@ -167,12 +167,10 @@ def test_basinhopping(self):
         x0 = [1.]
         np.random.seed(555)
         res = basinhopping(func, x0, minimizer_kwargs={"method": "BFGS"}, niter=200)
-        print(res)
         np.random.seed(555)
         x, = parameters('x')
         fit = BasinHopping(func, [x])
         fit_result = fit.execute(minimizer_kwargs={"method": "BFGS", 'jac': False}, niter=200)
-        print(fit_result)
 
         self.assertEqual(res.x, fit_result.value(x))
         self.assertEqual(res.fun, fit_result.objective_value)

diff --git a/tests/test_minimizers.py b/tests/test_minimizers.py
@@ -4,15 +4,46 @@
 import warnings
 
 import numpy as np
-from scipy.optimize import minimize
+import pickle
+import multiprocessing as mp
 
 from symfit import (
     Variable, Parameter, Eq, Ge, Le, Lt, Gt, Ne, parameters, ModelError, Fit,
-    Model, FitResults, variables
+    Model, FitResults, variables, CallableNumericalModel, Constraint
 )
-from symfit.core.objectives import MinimizeModel
-from symfit.core.minimizers import BFGS, Powell
+from symfit.core.minimizers import *
+from symfit.core.support import partial
 
+# Defined at the global level because local functions can't be pickled.
+def f(x, a, b):
+    return a * x + b
+
+def chi_squared(x, y, a, b, sum=True):
+    if sum:
+        return np.sum((y - f(x, a, b)) ** 2)
+    else:
+        return (y - f(x, a, b)) ** 2
+
+def worker(fit_obj):
+    return fit_obj.execute()
+
+def subclasses(base, leaves_only=True):
+    """
+    Recursively create a set of subclasses of ``object``.
+
+    :param object: Class
+    :param leaves_only: If ``True``, return only the leaves of the subclass tree
+    :return: (All leaves of) the subclass tree.
+    """
+    base_subs = set(base.__subclasses__())
+    if not base_subs or not leaves_only:
+        all_subs = {base}
+    else:
+        all_subs = set()
+    for sub in list(base_subs):
+        sub_subs = subclasses(sub, leaves_only=leaves_only)
+        all_subs.update(sub_subs)
+    return all_subs
 
 class TestMinimize(unittest.TestCase):
     @classmethod
@@ -86,6 +117,140 @@ def test_powell(self):
         fit_result = fit.execute()
         self.assertAlmostEqual(fit_result.value(b), 1.0)
 
+    def test_pickle(self):
+        """
+        Test the picklability of the different minimizers.
+        """
+        # Create test data
+        xdata = np.linspace(0, 100, 2)  # From 0 to 100 in 100 steps
+        a_vec = np.random.normal(15.0, scale=2.0, size=xdata.shape)
+        b_vec = np.random.normal(100, scale=2.0, size=xdata.shape)
+        ydata = a_vec * xdata + b_vec  # Point scattered around the line 5 * x + 105
+
+        # Normal symbolic fit
+        a = Parameter('a', value=0, min=0.0, max=1000)
+        b = Parameter('b', value=0, min=0.0, max=1000)
+
+        # Make a set of all ScipyMinimizers, and add a chained minimizer.
+        scipy_minimizers = subclasses(ScipyMinimize)
+        chained_minimizer = partial(ChainedMinimizer,
+                                    minimizers=[DifferentialEvolution, BFGS])
+        scipy_minimizers.add(chained_minimizer)
+        constrained_minimizers = subclasses(ScipyConstrainedMinimize)
+        # Test for all of them if they can be pickled.
+        for minimizer in scipy_minimizers:
+            if minimizer is MINPACK:
+                fit = minimizer(
+                    partial(chi_squared, x=xdata, y=ydata, sum=False),
+                    [a, b]
+                )
+            elif minimizer in constrained_minimizers:
+                # For constraint minimizers we also add a constraint, just to be
+                # sure constraints are treated well.
+                dummy_model = CallableNumericalModel({}, independent_vars=[], params=[a, b])
+                fit = minimizer(
+                    partial(chi_squared, x=xdata, y=ydata),
+                    [a, b],
+                    constraints=[Constraint(Ge(b, a), model=dummy_model)]
+                )
+            elif isinstance(minimizer, partial) and issubclass(minimizer.func, ChainedMinimizer):
+                init_minimizers = []
+                for sub_minimizer in minimizer.keywords['minimizers']:
+                    init_minimizers.append(sub_minimizer(
+                        partial(chi_squared, x=xdata, y=ydata),
+                        [a, b]
+                    ))
+                minimizer.keywords['minimizers'] = init_minimizers
+                fit = minimizer(partial(chi_squared, x=xdata, y=ydata), [a, b])
+            else:
+                fit = minimizer(partial(chi_squared, x=xdata, y=ydata), [a, b])
+
+            dump = pickle.dumps(fit)
+            pickled_fit = pickle.loads(dump)
+            problematic_attr = [
+                'objective', '_pickle_kwargs', 'wrapped_objective',
+                'constraints', 'wrapped_constraints',
+                'local_minimizer', 'minimizers'
+            ]
+
+            for key, value in fit.__dict__.items():
+                new_value = pickled_fit.__dict__[key]
+                try:
+                    self.assertEqual(value, new_value)
+                except AssertionError as err:
+                    if key in problematic_attr:
+                        # These attr are new instances, and therefore do not
+                        # pass an equality test. All we can do is see if they
+                        # are at least the same type.
+                        if isinstance(value, (list, tuple)):
+                            for val1, val2 in zip(value, new_value):
+                                self.assertTrue(isinstance(val1, val2.__class__))
+                                if key == 'constraints':
+                                    self.assertEqual(val1.func.constraint_type,
+                                                     val2.func.constraint_type)
+                                    self.assertEqual(
+                                        list(val1.func.model_dict.values())[0],
+                                        list(val2.func.model_dict.values())[0]
+                                    )
+                                    self.assertEqual(val1.func.independent_vars,
+                                                     val2.func.independent_vars)
+                                    self.assertEqual(val1.func.params,
+                                                     val2.func.params)
+                                    self.assertEqual(val1.func.__signature__,
+                                                     val2.func.__signature__)
+                                elif key == 'wrapped_constraints':
+                                    self.assertEqual(val1['type'],
+                                                     val2['type'])
+                                    self.assertEqual(set(val1.keys()),
+                                                     set(val2.keys()))
+                        elif key == '_pickle_kwargs':
+                            FitResults._array_safe_dict_eq(value, new_value)
+                        else:
+                            self.assertTrue(isinstance(new_value, value.__class__))
+                    else:
+                        raise err
+            self.assertEqual(set(fit.__dict__.keys()),
+                             set(pickled_fit.__dict__.keys()))
+
+            # Test if we converge to the same result.
+            np.random.seed(2)
+            res_before = fit.execute()
+            np.random.seed(2)
+            res_after = pickled_fit.execute()
+            self.assertEqual(res_before, res_after)
+
+    def test_multiprocessing(self):
+        """
+        To make sure pickling truly works, try multiprocessing. No news is good
+        news.
+        """
+        np.random.seed(2)
+        x = np.arange(100, dtype=float)
+        y = x + 0.25 * x * np.random.rand(100)
+        a_values = np.arange(3) + 1
+        np.random.shuffle(a_values)
+
+        def gen_fit_objs(x, y, a, minimizer):
+            for a_i in a:
+                a_par = Parameter('a', 5, min=0.0, max=20)
+                b_par = Parameter('b', 1, min=0.0, max=2)
+                x_var = Variable('x')
+                y_var = Variable('y')
+
+                model = CallableNumericalModel({y_var: f}, [x_var], [a_par, b_par])
+
+                fit = Fit(model, x, a_i * y + 1, minimizer=minimizer)
+                yield fit
+
+        minimizers = subclasses(ScipyMinimize)
+        chained_minimizer = (DifferentialEvolution, BFGS)
+        minimizers.add(chained_minimizer)
+
+        all_results = {}
+        pool = mp.Pool()
+        for minimizer in minimizers:
+            results = pool.map(worker, gen_fit_objs(x, y, a_values, minimizer))
+            all_results[minimizer] = [res.params['a'] for res in results]
 
 
 if __name__ == '__main__':

diff --git a/tests/test_model.py b/tests/test_model.py
@@ -230,6 +230,8 @@ def test_pickle(self):
                 new_model.model_dict = model.model_dict
                 new_model.dependent_vars = model.dependent_vars
                 new_model.sigmas = model.sigmas
+            # Compare signatures
+            self.assertEqual(model.__signature__, new_model.__signature__)
             # Trigger the cached vars.
             model.vars
             new_model.vars