Skip to content

Commit

Permalink
Steps taken towards #22, but more theoretical backing is needed. This…
Browse files Browse the repository at this point in the history
… new version does reduce the problem, and now defaults to curve_fit with absolute_error=True. Fixed #23.
  • Loading branch information
tBuLi committed May 3, 2015
1 parent 2fd9c61 commit aab33f8
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 39 deletions.
3 changes: 2 additions & 1 deletion MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ symfit\api.py
symfit\functions.py
symfit\core\__init__.py
symfit\core\argument.py
symfit\core\fit (Exemplaar met conflict van WinBook 2015-03-15).py
symfit\core\fit.py
symfit\core\leastsqbound.py
symfit\core\operators.py
symfit\core\support.py
symfit\tests\__init__.py
symfit\tests\tests.py
49 changes: 33 additions & 16 deletions symfit/core/fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def __init__(self, params, popt, pcov, infodic, mesg, ier, ydata):
assert(type(ier) == int)
self.__iterations = ier
assert(type(ydata) == np.ndarray)
self.__ydata = ydata # Needed to calculate R^2
self.__ydata = ydata
self.__params = ParameterDict(params, popt, pcov)

def __str__(self):
Expand Down Expand Up @@ -173,15 +173,16 @@ def r_squared(self):
Getter for the r_squared property.
:return: Regression coefficient.
"""
ss_err = (self.infodict['fvec'] ** 2).sum()
ss_tot = ((self.__ydata - self.__ydata.mean()) ** 2).sum()
ss_err = np.sum(self.infodict['fvec'] ** 2)
ss_tot = np.sum((self.__ydata - self.__ydata.mean()) ** 2)
return 1 - (ss_err / ss_tot)


class BaseFit(object):
__metaclass__ = abc.ABCMeta
__jac = None # private attribute for the jacobian
__fit_results = None # private attribute for the fit_results
sigma = 1

def __init__(self, model, *args, **kwargs):
"""
Expand All @@ -208,9 +209,8 @@ def eval_jacobian(self, p, func, x, y):
"""
residues = []
for jac in self.jacobian:
# raise Exception(inspect.getargspec(jac))
residue = jac(x, p)
# raise Exception('')
# Emperically, the /sigma works. Theoretical backing needed!
residue = jac(x, p)/self.sigma
# If only params in f, we must multiply with an array to preserve the shape of x
try:
len(residue)
Expand Down Expand Up @@ -302,7 +302,20 @@ def __init__(self, model, x, y, weights=None, sigma=None, *args, **kwargs):
# We use sigma to calculate the error function.
self.sigma = 1 / np.sqrt(weights)
elif sigma is not None:
self.sigma = sigma
try:
# sigma can be just a constant
len(sigma)
except TypeError:
# Make it a list with the same shape as ydata
sigma = np.ones_like(self.ydata) * sigma
else:
if y.shape != sigma.shape:
raise Exception('y and sigma must have the same shape.')
else:
# self.sigma is an array if this else is reached, so we flatten it.
sigma = sigma.reshape(-1) # flatten
finally:
self.sigma = np.array(sigma)
else:
self.sigma = 1

Expand All @@ -311,12 +324,7 @@ def __init__(self, model, x, y, weights=None, sigma=None, *args, **kwargs):
self.sigma.shape
except AttributeError:
pass
else:
if y.shape != self.sigma.shape:
raise Exception('y and sigma must have the same shape.')
else:
# self.sigma is an array if this else is reached, so we flatten it.
self.sigma = self.sigma.reshape(-1) # flatten




Expand Down Expand Up @@ -373,17 +381,24 @@ def execute(self, *args, **kwargs):
**kwargs
)

s_sq = (infodic['fvec'] ** 2).sum() / (len(self.ydata) - len(popt))
# raise Exception(infodic['fvec'], self.ydata.shape, self.xdata.shape)
# s_sq = (infodic['fvec'] ** 2).sum() / (len(self.ydata) - len(popt))
# residual_sum_of_squares = ((self.scipy_func(self.xdata, popt) - self.ydata) ** 2).sum()
ss_res = np.sum((self.ydata - self.scipy_func(self.xdata, popt)) ** 2)
degrees_of_freedom = len(self.ydata) - len(popt)
s_sq = ss_res / degrees_of_freedom
pcov = cov_x * s_sq if cov_x is not None else None

# Update the residuals
infodic['fvec'] = self.ydata - self.scipy_func(self.xdata, popt)

self.__fit_results = FitResults(
params=self.params,
popt=popt,
pcov=pcov,
infodic=infodic,
mesg=mesg,
ier=ier,
ydata=self.ydata, # Needed to calculate R^2
ydata=self.ydata
)
return self.__fit_results

Expand Down Expand Up @@ -552,6 +567,8 @@ def execute(self, method='SLSQP', *args, **kwargs):
'nfev': ans.nfev,
}



self.__fit_results = FitResults(
params=self.params,
popt=ans.x,
Expand Down
147 changes: 125 additions & 22 deletions symfit/tests/tests.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
#!/usr/local/bin/python3
from __future__ import division, print_function
import unittest
import inspect
import sympy
from sympy import symbols
import numpy as np
from symfit.api import Variable, Parameter, Fit, FitResults, Maximize, Minimize, exp, Likelihood
from symfit.api import Variable, Parameter, Fit, FitResults, Maximize, Minimize, exp, Likelihood, ln, log
from symfit.functions import Gaussian, Exp
import scipy.stats
from scipy.optimize import curve_fit
from symfit.core.support import sympy_to_scipy, sympy_to_py
# import matplotlib.pyplot as plt
# import seaborn
import matplotlib.pyplot as plt
import seaborn

class TddInPythonExample(unittest.TestCase):
def test_gaussian(self):
Expand Down Expand Up @@ -94,9 +95,6 @@ def test_read_only_results(self):
self.assertAlmostEqual(fit_result.params.b, 2.0)
self.assertAlmostEqual(fit_result_2.params.b, 3.0)




def test_fitting(self):
xdata = np.linspace(1,10,10)
ydata = 3*xdata**2
Expand All @@ -115,8 +113,7 @@ def test_fitting(self):
result = fit.scipy_func(fit.xdata, [3, 2])
self.assertTrue(np.array_equal(result, ydata))

import inspect
args, varargs, keywords, defaults = inspect.getargspec(fit.scipy_func)
args, varargs, keywords, defaults = inspect.getargspec(func)

# self.assertEqual(args, ['x', 'a', 'b'])
fit_result = fit.execute()
Expand Down Expand Up @@ -427,22 +424,128 @@ def test_simple_sigma(self):
fit = Fit(t_model, y_data, t_data)#, sigma=sigma_t)
fit_result = fit.execute()

h_smooth = np.linspace(0,60,100)
t_smooth = t_model(y=h_smooth, **fit_result.params)
# h_smooth = np.linspace(0,60,100)
# t_smooth = t_model(y=h_smooth, **fit_result.params)

# Lets with the results from curve_fit, no weights
popt_noweights, pcov_noweights = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data)

self.assertAlmostEqual(fit_result.params.g, popt_noweights[0])
self.assertAlmostEqual(fit_result.params.g_stdev, np.sqrt(pcov_noweights[0, 0]))

# Same sigma everywere
fit = Fit(t_model, y_data, t_data, sigma=0.0031)
fit_result = fit.execute()
popt_sameweights, pcov_sameweights = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=len(t_data)*[0.1])

self.assertAlmostEqual(fit_result.params.g, popt_sameweights[0], 4)
self.assertAlmostEqual(fit_result.params.g_stdev, np.sqrt(pcov_sameweights[0, 0]), 4)
# Same weight everywere should be the same as no weight.
self.assertAlmostEqual(fit_result.params.g, popt_noweights[0], 4)
self.assertAlmostEqual(fit_result.params.g_stdev, np.sqrt(pcov_noweights[0, 0]), 4)

# Different sigma for every point
fit = Fit(t_model, y_data, t_data, sigma=0.1*sigma_t)
fit_result = fit.execute()
popt, pcov = curve_fit(lambda y, p: (2 * y / p)**0.5, y_data, t_data, sigma=.1*sigma_t, absolute_sigma=True)
print(popt, np.sqrt(pcov[0, 0]))
print(fit_result.params.g, fit_result.params.g_stdev)
self.assertAlmostEqual(fit_result.params.g, popt[0])
self.assertAlmostEqual(fit_result.params.g_stdev, np.sqrt(pcov[0, 0]))

# self.assertAlmostEqual(fit_result.params.g, 9.095, 3)
# self.assertAlmostEqual(fit_result.params.g_stdev, 0.102, 3) # according to Mathematica

def test_error_advanced(self):
"""
Models an example from the mathematica docs and try's to replicate it:
http://reference.wolfram.com/language/howto/FitModelsWithMeasurementErrors.html
"""
data = [
[0.9, 6.1, 9.5], [3.9, 6., 9.7], [0.3, 2.8, 6.6],
[1., 2.2, 5.9], [1.8, 2.4, 7.2], [9., 1.7, 7.],
[7.9, 8., 10.4], [4.9, 3.9, 9.], [2.3, 2.6, 7.4],
[4.7, 8.4, 10.]
]
x, y, z = zip(*data)
xy = np.vstack((x, y))
z = np.array(z)
errors = np.array([.4, .4, .2, .4, .1, .3, .1, .2, .2, .2])

# raise Exception(xy, z)
a = Parameter()
b = Parameter(0.9)
c = Parameter(5)
x = Variable()
y = Variable()
model = a * log(b * x + c * y)

fit = Fit(model, xy, z, sigma=errors)
fit_result = fit.execute()
print(fit_result)

popt, pcov, infodict, errmsg, ier = curve_fit(lambda x_vec, a, b, c: a * np.log(b * x_vec[0] + c * x_vec[1]), xy, z, sigma=100*errors, full_output=True)

# Same as curve_fit?
self.assertAlmostEqual(fit_result.params.a, popt[0], 4)
self.assertAlmostEqual(fit_result.params.b, popt[1], 4)
self.assertAlmostEqual(fit_result.params.c, popt[2], 4)
self.assertAlmostEqual(fit_result.params.a_stdev, np.sqrt(pcov[0,0]), 4)
self.assertAlmostEqual(fit_result.params.b_stdev, np.sqrt(pcov[1,1]), 4)
self.assertAlmostEqual(fit_result.params.c_stdev, np.sqrt(pcov[2,2]), 4)

# Same as Mathematica?
self.assertAlmostEqual(fit_result.params.a, 2.68807, 4)
self.assertAlmostEqual(fit_result.params.b, 0.941344, 4)
self.assertAlmostEqual(fit_result.params.c, 5.01541, 4)
self.assertAlmostEqual(fit_result.params.a_stdev, 0.189622, 4)
self.assertAlmostEqual(fit_result.params.b_stdev, 0.480592, 4)
self.assertAlmostEqual(fit_result.params.c_stdev, 1.1628, 4)

def test_error_analytical(self):
"""
Test using a case where the analytical answer is known.
Modeled after:
http://nbviewer.ipython.org/urls/gist.github.com/taldcroft/5014170/raw/31e29e235407e4913dc0ec403af7ed524372b612/curve_fit.ipynb
"""
N = 100
sigma = 100
xn = np.arange(N, dtype=np.float)
yn = np.zeros_like(xn)
yn = yn + np.random.normal(size=len(yn), scale=1)

# plt.plot(h_smooth, t_smooth)
# plt.errorbar(y_data, t_data, yerr=sigma_t, fmt='o')
# plt.xlabel('height (/m)')
# plt.ylabel('time in free fall (/s)')
# plt.xlim(0,60)
# plt.show()
self.assertAlmostEqual(fit_result.params.g, 9.0879601268490919)
self.assertAlmostEqual(fit_result.params.g_stdev, 0.15247139498208956)
a = Parameter(0.0)
model = a

fit = Fit(t_model, y_data, t_data, sigma=sigma_t)
fit = Fit(model, xn, yn, sigma=sigma)
fit_result = fit.execute()
self.assertAlmostEqual(fit_result.params.g, 9.0951297073387991)
self.assertAlmostEqual(fit_result.params.g_stdev, 2.0562987023203601)
print(fit_result)

fit_no_sigma = Fit(model, xn, yn)
fit_result_no_sigma = fit_no_sigma.execute()
print(fit_result_no_sigma)

popt, pcov = curve_fit(lambda x, a: a * np.ones_like(x), xn, yn, sigma=sigma, absolute_sigma=True)

print(popt[0], np.sqrt(np.diag(pcov))[0])
# symfit and curve_fit should be in agreement
self.assertAlmostEqual(fit_result.params.a, popt[0], 5)
self.assertAlmostEqual(fit_result.params.a_stdev, np.sqrt(np.diag(pcov))[0], 2)
# With or without sigma should also be in agreement?
self.assertAlmostEqual(fit_result.params.a, fit_result_no_sigma.params.a, 5)
self.assertAlmostEqual(fit_result.params.a_stdev, fit_result_no_sigma.params.a_stdev, 5)

# Analytical answer for mean of N(0,1):
mu = 0.0
sigma_mu = 1.0/N**0.5

print('{} ~ {}, {} ~ {}'.format(mu, fit_result.params.a, sigma_mu, fit_result.params.a_stdev))

plt.scatter(xn, yn)
plt.plot(xn, model(**fit_result.params) * np.ones_like(xn))
plt.plot(xn, popt[0] * np.ones_like(xn))
plt.show()


if __name__ == '__main__':
unittest.main()

0 comments on commit aab33f8

Please sign in to comment.