From 498f71bfc4227e84710c27e305a539a9156291cc Mon Sep 17 00:00:00 2001 From: Michael Pilosov <40366263+mathematicalmichael@users.noreply.github.com> Date: Wed, 3 Nov 2021 19:15:49 -0600 Subject: [PATCH] weights in kwargs (attempt) (#52) * weights in kwargs (attempt) * renaming and testing * remove unused import * kluge for tests * clean up note * add test for setting weights twice * add property attributes to class for samples and features * ensure weights from predicted kde are saved * lints --- src/mud/base.py | 25 +++++++++++++- src/mud/funs.py | 29 ++++++++++++---- tests/conftest.py | 57 +++++++++++++++++++++++++++++- tests/test_base.py | 86 ++++++++++++++++++++++++++++++++++++++++++++++ tests/test_funs.py | 3 ++ 5 files changed, 192 insertions(+), 8 deletions(-) create mode 100644 tests/test_base.py diff --git a/src/mud/base.py b/src/mud/base.py index 19ba405..ac24d43 100644 --- a/src/mud/base.py +++ b/src/mud/base.py @@ -38,6 +38,14 @@ def __init__(self, X, y, domain=None, weights=None): self._pr = None self._ob = None + @property + def _n_features(self): + return self.y.shape[1] + + @property + def _n_samples(self): + return self.y.shape[0] + def set_observed(self, distribution=dist.norm()): self._ob = distribution.pdf(self.y).prod(axis=1) @@ -49,18 +57,25 @@ def set_initial(self, distribution=None): distribution = dist.uniform(loc=mn, scale=mx - mn) else: distribution = dist.norm() + initial_dist = distribution self._in = initial_dist.pdf(self.X).prod(axis=1) self._up = None self._pr = None def set_predicted(self, distribution=None, **kwargs): + if "weights" not in kwargs: + kwargs["weights"] = self._weights + else: + self._weights = kwargs["weights"] + if distribution is None: # Reweight kde of predicted by weights from previous iteration if present - distribution = gkde(self.y.T, **kwargs, weights=self._weights) + distribution = gkde(self.y.T, **kwargs) pred_pdf = distribution.pdf(self.y.T).T else: pred_pdf = distribution.pdf(self.y, **kwargs) + self._pr = pred_pdf self._up = None @@ -127,6 +142,14 @@ def __init__(self, X, y, domain=None): self._pr = None self._ll = None + @property + def _n_features(self): + return self.y.shape[1] + + @property + def _n_samples(self): + return self.y.shape[0] + def set_likelihood(self, distribution, log=False): if log: self._log = True diff --git a/src/mud/funs.py b/src/mud/funs.py index a1d765f..a2cd6a0 100644 --- a/src/mud/funs.py +++ b/src/mud/funs.py @@ -88,15 +88,32 @@ def run(): ############################################################ -def wme(X, data, sd=None): +def wme(predictions, data, sd=None): + """ + Calculates Weighted Mean Error (WME) functional. + + Parameters + ---------- + predictions: numpy.ndarray of shape (n_samples, n_features) + Predicted values against which data is compared. + data: list or numpy.ndarray of shape (n_features, 1) + Collected (noisy) data + sd: float, optional + Standard deviation + + Returns + ------- + numpy.ndarray of shape (n_samples, 1) + + """ if sd is None: sd = np.std(data) - if X.ndim == 1: - X = X.reshape(1, -1) - num_evals = X.shape[0] - assert X.shape[1] == len(data) + if predictions.ndim == 1: + predictions = predictions.reshape(1, -1) + num_evals = predictions.shape[0] + assert predictions.shape[1] == len(data) - residuals = np.subtract(X, data) + residuals = np.subtract(predictions, data) weighted_residuals = np.divide(residuals, sd) assert weighted_residuals.shape[0] == num_evals diff --git a/tests/conftest.py b/tests/conftest.py index 4adcf86..83a2c97 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,4 +7,59 @@ https://pytest.org/latest/plugins.html """ -# import pytest +import pytest +from mud.base import DensityProblem, BayesProblem +from mud.funs import wme +from scipy.stats import distributions as ds +import numpy as np + + +@pytest.fixture +def identity_1D_50_wme(): + X = np.random.rand(100, 1) + num_observations = 50 + y_pred = np.repeat(X, num_observations, 1) + y_true = 0.5 + noise = 0.05 + y_observed = y_true * np.ones(num_observations) + noise * np.random.randn( + num_observations + ) + Y = wme(y_pred, y_observed, sd=noise) + return (X, Y) + + +@pytest.fixture +def identity_problem_mud_1D(identity_1D_50_wme): + X, Y = identity_1D_50_wme + return DensityProblem(X, Y, np.array([[0, 1], [0, 1]])) + + +@pytest.fixture +def identity_problem_map_1D(): + X = np.random.rand(1000, 1) + num_observations = 50 + y_pred = np.repeat(X, num_observations, 1) + y_true = 0.5 + noise = 0.05 + y_observed = y_true * np.ones(num_observations) + noise * np.random.randn( + num_observations + ) + B = BayesProblem(X, y_pred, np.array([[0, 1], [0, 1]])) + B.set_likelihood(ds.norm(loc=y_observed, scale=noise)) + return B + + +@pytest.fixture +def identity_problem_mud_1D_equal_weights(identity_1D_50_wme): + X, Y = identity_1D_50_wme + weights = np.ones(X.shape[0]) + return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]), weights=weights) + + +@pytest.fixture +def identity_problem_mud_1D_bias_weights(identity_1D_50_wme): + X, Y = identity_1D_50_wme + weights = np.ones(X.shape[0]) + weights[X[:, 0] < 0.2] = 0.1 + weights[X[:, 0] > 0.8] = 0.1 + return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]), weights=weights) diff --git a/tests/test_base.py b/tests/test_base.py new file mode 100644 index 0000000..3a37408 --- /dev/null +++ b/tests/test_base.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +import numpy as np + +__author__ = "Mathematical Michael" +__copyright__ = "Mathematical Michael" +__license__ = "mit" + + +def test_identity_mud_problem_1D(identity_problem_mud_1D): + # Arrange + D = identity_problem_mud_1D + + # Act + mud_point = D.estimate() + updated_density = D._up + ratio = D._r + + # Assert + assert np.round(mud_point, 1) == 0.5 + assert np.sum(updated_density) > 0 + assert np.mean(ratio) > 0 + + +def test_we_can_set_weights_in_predicted(identity_problem_mud_1D_equal_weights): + """Mimicks existing usage in mud-examples""" + # Arrange + # weights were used for initialization + D = identity_problem_mud_1D_equal_weights + D.set_initial() # domain has been set -> uniform as default + # want to make sure we can set weights on predicted + weights = np.random.rand(D._n_samples) + D.set_predicted(weights=weights) + + # Act + mud_point = D.estimate() + updated_density = D._up + ratio = D._r + + # Assert + # ensure weights were set correctly + assert np.linalg.norm(weights - D._weights) == 0 + assert np.round(mud_point, 1) == 0.5 + assert np.sum(updated_density) > 0 + assert np.mean(ratio) > 0 + + +def test_identity_mud_1D_with_equal_weights(identity_problem_mud_1D_equal_weights): + # Arrange + D = identity_problem_mud_1D_equal_weights + + # Act + mud_point = D.estimate() + updated_density = D._up + ratio = D._r + + # Assert + assert np.round(mud_point, 1) == 0.5 + assert np.sum(updated_density) > 0 + assert np.mean(ratio) > 0 + + +def test_identity_mud_1D_with_biased_weights(identity_problem_mud_1D_bias_weights): + # Arrange + D = identity_problem_mud_1D_bias_weights + + # Act + mud_point = D.estimate() + updated_density = D._up + ratio = D._r + + # Assert + assert np.round(mud_point, 1) == 0.5 + assert np.sum(updated_density) > 0 + assert np.mean(ratio) > 0 + + +def test_identity_map_problem_1D(identity_problem_map_1D): + # Arrange + D = identity_problem_map_1D + + # Act + map_point = D.estimate() + + # Assert + assert np.round(map_point, 1) == 0.5 diff --git a/tests/test_funs.py b/tests/test_funs.py index d1eecca..480909e 100644 --- a/tests/test_funs.py +++ b/tests/test_funs.py @@ -68,3 +68,6 @@ class TestWME_20(TestWME): def setUp(self): self.d = np.random.rand(20) self.A = np.tile(self.d, (100, 1)) + + +# TODO: test wme works with data of shape (n_features, 1), (1, n_features), and list