Steps taken towards #22, but more theoretical backing is needed. This…

… new version does reduce the problem, and now defaults to curve_fit with absolute_error=True. Fixed #23.
tBuLi · May 3, 2015 · aab33f8 · aab33f8
1 parent 2fd9c61
commit aab33f8
Show file tree

Hide file tree

Showing 3 changed files with 160 additions and 39 deletions.
diff --git a/MANIFEST b/MANIFEST
@@ -5,8 +5,9 @@ symfit\api.py
 symfit\functions.py
 symfit\core\__init__.py
 symfit\core\argument.py
-symfit\core\fit (Exemplaar met conflict van WinBook 2015-03-15).py
 symfit\core\fit.py
 symfit\core\leastsqbound.py
 symfit\core\operators.py
 symfit\core\support.py
+symfit\tests\__init__.py
+symfit\tests\tests.py
diff --git a/symfit/core/fit.py b/symfit/core/fit.py
@@ -127,7 +127,7 @@ def __init__(self, params, popt, pcov, infodic, mesg, ier, ydata):
         assert(type(ier) == int)
         self.__iterations = ier
         assert(type(ydata) == np.ndarray)
-        self.__ydata = ydata  # Needed to calculate R^2
+        self.__ydata = ydata
         self.__params = ParameterDict(params, popt, pcov)
 
     def __str__(self):
@@ -173,15 +173,16 @@ def r_squared(self):
         Getter for the r_squared property.
         :return: Regression coefficient.
         """
-        ss_err = (self.infodict['fvec'] ** 2).sum()
-        ss_tot = ((self.__ydata - self.__ydata.mean()) ** 2).sum()
+        ss_err = np.sum(self.infodict['fvec'] ** 2)
+        ss_tot = np.sum((self.__ydata - self.__ydata.mean()) ** 2)
         return 1 - (ss_err / ss_tot)
 
 
 class BaseFit(object):
     __metaclass__  = abc.ABCMeta
     __jac = None  # private attribute for the jacobian
     __fit_results = None  # private attribute for the fit_results
+    sigma = 1
 
     def __init__(self, model, *args, **kwargs):
         """
@@ -208,9 +209,8 @@ def eval_jacobian(self, p, func, x, y):
         """
         residues = []
         for jac in self.jacobian:
-            # raise Exception(inspect.getargspec(jac))
-            residue = jac(x, p)
-            # raise Exception('')
+            # Emperically, the /sigma works. Theoretical backing needed!
+            residue = jac(x, p)/self.sigma
             # If only params in f, we must multiply with an array to preserve the shape of x
             try:
                 len(residue)
@@ -302,7 +302,20 @@ def __init__(self, model, x, y, weights=None, sigma=None, *args, **kwargs):
             # We use sigma to calculate the error function.
             self.sigma = 1 / np.sqrt(weights)
         elif sigma is not None:
-            self.sigma = sigma
+            try:
+                # sigma can be just a constant
+                len(sigma)
+            except TypeError:
+                # Make it a list with the same shape as ydata
+                sigma = np.ones_like(self.ydata) * sigma
+            else:
+                if y.shape != sigma.shape:
+                    raise Exception('y and sigma must have the same shape.')
+                else:
+                    # self.sigma is an array if this else is reached, so we flatten it.
+                    sigma = sigma.reshape(-1)  # flatten
+            finally:
+                self.sigma = np.array(sigma)
         else:
             self.sigma = 1
 
@@ -311,12 +324,7 @@ def __init__(self, model, x, y, weights=None, sigma=None, *args, **kwargs):
             self.sigma.shape
         except AttributeError:
             pass
-        else:
-            if y.shape != self.sigma.shape:
-                raise Exception('y and sigma must have the same shape.')
-            else:
-                # self.sigma is an array if this else is reached, so we flatten it.
-                self.sigma = self.sigma.reshape(-1)  # flatten
+
 
 
 
@@ -373,17 +381,24 @@ def execute(self, *args, **kwargs):
             **kwargs
         )
 
-        s_sq = (infodic['fvec'] ** 2).sum() / (len(self.ydata) - len(popt))
-        # raise Exception(infodic['fvec'], self.ydata.shape, self.xdata.shape)
+        # s_sq = (infodic['fvec'] ** 2).sum() / (len(self.ydata) - len(popt))
+        # residual_sum_of_squares = ((self.scipy_func(self.xdata, popt) - self.ydata) ** 2).sum()
+        ss_res = np.sum((self.ydata - self.scipy_func(self.xdata, popt)) ** 2)
+        degrees_of_freedom = len(self.ydata) - len(popt)
+        s_sq = ss_res / degrees_of_freedom
         pcov = cov_x * s_sq if cov_x is not None else None
+
+        # Update the residuals
+        infodic['fvec'] = self.ydata - self.scipy_func(self.xdata, popt)
+
         self.__fit_results = FitResults(
             params=self.params,
             popt=popt,
             pcov=pcov,
             infodic=infodic,
             mesg=mesg,
             ier=ier,
-            ydata=self.ydata,  # Needed to calculate R^2
+            ydata=self.ydata
         )
         return self.__fit_results
 
@@ -552,6 +567,8 @@ def execute(self, method='SLSQP', *args, **kwargs):
             'nfev': ans.nfev,
         }
 
+
+
         self.__fit_results = FitResults(
             params=self.params,
             popt=ans.x,

diff --git a/symfit/tests/tests.py b/symfit/tests/tests.py
@@ -1,15 +1,16 @@
-#!/usr/local/bin/python3
 from __future__ import division, print_function
 import unittest
+import inspect
 import sympy
 from sympy import symbols
 import numpy as np
-from symfit.api import Variable, Parameter, Fit, FitResults, Maximize, Minimize, exp, Likelihood
+from symfit.api import Variable, Parameter, Fit, FitResults, Maximize, Minimize, exp, Likelihood, ln, log
 from symfit.functions import Gaussian, Exp
 import scipy.stats
+from scipy.optimize import curve_fit
 from symfit.core.support import sympy_to_scipy, sympy_to_py
-# import matplotlib.pyplot as plt
-# import seaborn
+import matplotlib.pyplot as plt
+import seaborn
 
 class TddInPythonExample(unittest.TestCase):
     def test_gaussian(self):
@@ -94,9 +95,6 @@ def test_read_only_results(self):
         self.assertAlmostEqual(fit_result.params.b, 2.0)
         self.assertAlmostEqual(fit_result_2.params.b, 3.0)
 
-
-
-
     def test_fitting(self):
         xdata = np.linspace(1,10,10)
         ydata = 3*xdata**2
@@ -115,8 +113,7 @@ def test_fitting(self):
         result = fit.scipy_func(fit.xdata, [3, 2])
         self.assertTrue(np.array_equal(result, ydata))
 
-        import inspect
-        args, varargs, keywords, defaults = inspect.getargspec(fit.scipy_func)
+        args, varargs, keywords, defaults = inspect.getargspec(func)
 
         # self.assertEqual(args, ['x', 'a', 'b'])
         fit_result = fit.execute()
@@ -427,22 +424,128 @@ def test_simple_sigma(self):
         fit = Fit(t_model, y_data, t_data)#, sigma=sigma_t)
         fit_result = fit.execute()
 
-        h_smooth = np.linspace(0,60,100)
-        t_smooth = t_model(y=h_smooth, **fit_result.params)
+        # h_smooth = np.linspace(0,60,100)
+        # t_smooth = t_model(y=h_smooth, **fit_result.params)
+
+        # Lets with the results from curve_fit, no weights
+        popt_noweights, pcov_noweights = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data)
+
+        self.assertAlmostEqual(fit_result.params.g, popt_noweights[0])
+        self.assertAlmostEqual(fit_result.params.g_stdev, np.sqrt(pcov_noweights[0, 0]))
+
+        # Same sigma everywere
+        fit = Fit(t_model, y_data, t_data, sigma=0.0031)
+        fit_result = fit.execute()
+        popt_sameweights, pcov_sameweights = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=len(t_data)*[0.1])
+
+        self.assertAlmostEqual(fit_result.params.g, popt_sameweights[0], 4)
+        self.assertAlmostEqual(fit_result.params.g_stdev, np.sqrt(pcov_sameweights[0, 0]), 4)
+        # Same weight everywere should be the same as no weight.
+        self.assertAlmostEqual(fit_result.params.g, popt_noweights[0], 4)
+        self.assertAlmostEqual(fit_result.params.g_stdev, np.sqrt(pcov_noweights[0, 0]), 4)
+
+        # Different sigma for every point
+        fit = Fit(t_model, y_data, t_data, sigma=0.1*sigma_t)
+        fit_result = fit.execute()
+        popt, pcov = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=.1*sigma_t, absolute_sigma=True)
+        print(popt, np.sqrt(pcov[0, 0]))
+        print(fit_result.params.g, fit_result.params.g_stdev)
+        self.assertAlmostEqual(fit_result.params.g, popt[0])
+        self.assertAlmostEqual(fit_result.params.g_stdev, np.sqrt(pcov[0, 0]))
+
+        # self.assertAlmostEqual(fit_result.params.g, 9.095, 3)
+        # self.assertAlmostEqual(fit_result.params.g_stdev, 0.102, 3) # according to Mathematica
+
+    def test_error_advanced(self):
+        """
+        Models an example from the mathematica docs and try's to replicate it:
+        http://reference.wolfram.com/language/howto/FitModelsWithMeasurementErrors.html
+        """
+        data = [
+            [0.9, 6.1, 9.5], [3.9, 6., 9.7], [0.3, 2.8, 6.6],
+            [1., 2.2, 5.9], [1.8, 2.4, 7.2], [9., 1.7, 7.],
+            [7.9, 8., 10.4], [4.9, 3.9, 9.], [2.3, 2.6, 7.4],
+            [4.7, 8.4, 10.]
+        ]
+        x, y, z = zip(*data)
+        xy = np.vstack((x, y))
+        z = np.array(z)
+        errors = np.array([.4, .4, .2, .4, .1, .3, .1, .2, .2, .2])
+
+        # raise Exception(xy, z)
+        a = Parameter()
+        b = Parameter(0.9)
+        c = Parameter(5)
+        x = Variable()
+        y = Variable()
+        model = a * log(b * x + c * y)
+
+        fit = Fit(model, xy, z, sigma=errors)
+        fit_result = fit.execute()
+        print(fit_result)
+
+        popt, pcov, infodict, errmsg, ier = curve_fit(lambda x_vec, a, b, c: a * np.log(b * x_vec[0] + c * x_vec[1]), xy, z, sigma=100*errors, full_output=True)
+
+        # Same as curve_fit?
+        self.assertAlmostEqual(fit_result.params.a, popt[0], 4)
+        self.assertAlmostEqual(fit_result.params.b, popt[1], 4)
+        self.assertAlmostEqual(fit_result.params.c, popt[2], 4)
+        self.assertAlmostEqual(fit_result.params.a_stdev, np.sqrt(pcov[0,0]), 4)
+        self.assertAlmostEqual(fit_result.params.b_stdev, np.sqrt(pcov[1,1]), 4)
+        self.assertAlmostEqual(fit_result.params.c_stdev, np.sqrt(pcov[2,2]), 4)
+
+        # Same as Mathematica?
+        self.assertAlmostEqual(fit_result.params.a, 2.68807, 4)
+        self.assertAlmostEqual(fit_result.params.b, 0.941344, 4)
+        self.assertAlmostEqual(fit_result.params.c, 5.01541, 4)
+        self.assertAlmostEqual(fit_result.params.a_stdev, 0.189622, 4)
+        self.assertAlmostEqual(fit_result.params.b_stdev, 0.480592, 4)
+        self.assertAlmostEqual(fit_result.params.c_stdev, 1.1628, 4)
+
+    def test_error_analytical(self):
+        """
+        Test using a case where the analytical answer is known.
+        Modeled after:
+        http://nbviewer.ipython.org/urls/gist.github.com/taldcroft/5014170/raw/31e29e235407e4913dc0ec403af7ed524372b612/curve_fit.ipynb
+        """
+        N = 100
+        sigma = 100
+        xn = np.arange(N, dtype=np.float)
+        yn = np.zeros_like(xn)
+        yn = yn + np.random.normal(size=len(yn), scale=1)
 
-        # plt.plot(h_smooth, t_smooth)
-        # plt.errorbar(y_data, t_data, yerr=sigma_t, fmt='o')
-        # plt.xlabel('height (/m)')
-        # plt.ylabel('time in free fall (/s)')
-        # plt.xlim(0,60)
-        # plt.show()
-        self.assertAlmostEqual(fit_result.params.g, 9.0879601268490919)
-        self.assertAlmostEqual(fit_result.params.g_stdev, 0.15247139498208956)
+        a = Parameter(0.0)
+        model = a
 
-        fit = Fit(t_model, y_data, t_data, sigma=sigma_t)
+        fit = Fit(model, xn, yn, sigma=sigma)
         fit_result = fit.execute()
-        self.assertAlmostEqual(fit_result.params.g, 9.0951297073387991)
-        self.assertAlmostEqual(fit_result.params.g_stdev, 2.0562987023203601)
+        print(fit_result)
+
+        fit_no_sigma = Fit(model, xn, yn)
+        fit_result_no_sigma = fit_no_sigma.execute()
+        print(fit_result_no_sigma)
+
+        popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn, sigma=sigma, absolute_sigma=True)
+
+        print(popt[0], np.sqrt(np.diag(pcov))[0])
+        # symfit and curve_fit should be in agreement
+        self.assertAlmostEqual(fit_result.params.a, popt[0], 5)
+        self.assertAlmostEqual(fit_result.params.a_stdev, np.sqrt(np.diag(pcov))[0], 2)
+        # With or without sigma should also be in agreement?
+        self.assertAlmostEqual(fit_result.params.a, fit_result_no_sigma.params.a, 5)
+        self.assertAlmostEqual(fit_result.params.a_stdev, fit_result_no_sigma.params.a_stdev, 5)
+
+        # Analytical answer for mean of N(0,1):
+        mu = 0.0
+        sigma_mu = 1.0/N**0.5
+
+        print('{} ~ {}, {} ~ {}'.format(mu, fit_result.params.a, sigma_mu, fit_result.params.a_stdev))
+
+        plt.scatter(xn, yn)
+        plt.plot(xn, model(**fit_result.params) * np.ones_like(xn))
+        plt.plot(xn, popt[0] * np.ones_like(xn))
+        plt.show()
+
 
 if __name__ == '__main__':
     unittest.main()