From 498f71bfc4227e84710c27e305a539a9156291cc Mon Sep 17 00:00:00 2001
From: Michael Pilosov <40366263+mathematicalmichael@users.noreply.github.com>
Date: Wed, 3 Nov 2021 19:15:49 -0600
Subject: [PATCH] weights in kwargs (attempt) (#52)

* weights in kwargs (attempt)

* renaming and testing

* remove unused import

* kluge for tests

* clean up note

* add test for setting weights twice

* add property attributes to class for samples and features

* ensure weights from predicted kde are saved

* lints
---
 src/mud/base.py    | 25 +++++++++++++-
 src/mud/funs.py    | 29 ++++++++++++----
 tests/conftest.py  | 57 +++++++++++++++++++++++++++++-
 tests/test_base.py | 86 ++++++++++++++++++++++++++++++++++++++++++++++
 tests/test_funs.py |  3 ++
 5 files changed, 192 insertions(+), 8 deletions(-)
 create mode 100644 tests/test_base.py

diff --git a/src/mud/base.py b/src/mud/base.py
index 19ba405..ac24d43 100644
--- a/src/mud/base.py
+++ b/src/mud/base.py
@@ -38,6 +38,14 @@ def __init__(self, X, y, domain=None, weights=None):
         self._pr = None
         self._ob = None
 
+    @property
+    def _n_features(self):
+        return self.y.shape[1]
+
+    @property
+    def _n_samples(self):
+        return self.y.shape[0]
+
     def set_observed(self, distribution=dist.norm()):
         self._ob = distribution.pdf(self.y).prod(axis=1)
 
@@ -49,18 +57,25 @@ def set_initial(self, distribution=None):
                 distribution = dist.uniform(loc=mn, scale=mx - mn)
             else:
                 distribution = dist.norm()
+
         initial_dist = distribution
         self._in = initial_dist.pdf(self.X).prod(axis=1)
         self._up = None
         self._pr = None
 
     def set_predicted(self, distribution=None, **kwargs):
+        if "weights" not in kwargs:
+            kwargs["weights"] = self._weights
+        else:
+            self._weights = kwargs["weights"]
+
         if distribution is None:
             # Reweight kde of predicted by weights from previous iteration if present
-            distribution = gkde(self.y.T, **kwargs, weights=self._weights)
+            distribution = gkde(self.y.T, **kwargs)
             pred_pdf = distribution.pdf(self.y.T).T
         else:
             pred_pdf = distribution.pdf(self.y, **kwargs)
+
         self._pr = pred_pdf
         self._up = None
 
@@ -127,6 +142,14 @@ def __init__(self, X, y, domain=None):
         self._pr = None
         self._ll = None
 
+    @property
+    def _n_features(self):
+        return self.y.shape[1]
+
+    @property
+    def _n_samples(self):
+        return self.y.shape[0]
+
     def set_likelihood(self, distribution, log=False):
         if log:
             self._log = True
diff --git a/src/mud/funs.py b/src/mud/funs.py
index a1d765f..a2cd6a0 100644
--- a/src/mud/funs.py
+++ b/src/mud/funs.py
@@ -88,15 +88,32 @@ def run():
 ############################################################
 
 
-def wme(X, data, sd=None):
+def wme(predictions, data, sd=None):
+    """
+    Calculates Weighted Mean Error (WME) functional.
+
+    Parameters
+    ----------
+    predictions: numpy.ndarray of shape (n_samples, n_features)
+        Predicted values against which data is compared.
+    data: list or numpy.ndarray of shape (n_features, 1)
+        Collected (noisy) data
+    sd: float, optional
+        Standard deviation
+
+    Returns
+    -------
+    numpy.ndarray of shape (n_samples, 1)
+
+    """
     if sd is None:
         sd = np.std(data)
-    if X.ndim == 1:
-        X = X.reshape(1, -1)
-    num_evals = X.shape[0]
-    assert X.shape[1] == len(data)
+    if predictions.ndim == 1:
+        predictions = predictions.reshape(1, -1)
+    num_evals = predictions.shape[0]
+    assert predictions.shape[1] == len(data)
 
-    residuals = np.subtract(X, data)
+    residuals = np.subtract(predictions, data)
     weighted_residuals = np.divide(residuals, sd)
     assert weighted_residuals.shape[0] == num_evals
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 4adcf86..83a2c97 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,4 +7,59 @@
     https://pytest.org/latest/plugins.html
 """
 
-# import pytest
+import pytest
+from mud.base import DensityProblem, BayesProblem
+from mud.funs import wme
+from scipy.stats import distributions as ds
+import numpy as np
+
+
+@pytest.fixture
+def identity_1D_50_wme():
+    X = np.random.rand(100, 1)
+    num_observations = 50
+    y_pred = np.repeat(X, num_observations, 1)
+    y_true = 0.5
+    noise = 0.05
+    y_observed = y_true * np.ones(num_observations) + noise * np.random.randn(
+        num_observations
+    )
+    Y = wme(y_pred, y_observed, sd=noise)
+    return (X, Y)
+
+
+@pytest.fixture
+def identity_problem_mud_1D(identity_1D_50_wme):
+    X, Y = identity_1D_50_wme
+    return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]))
+
+
+@pytest.fixture
+def identity_problem_map_1D():
+    X = np.random.rand(1000, 1)
+    num_observations = 50
+    y_pred = np.repeat(X, num_observations, 1)
+    y_true = 0.5
+    noise = 0.05
+    y_observed = y_true * np.ones(num_observations) + noise * np.random.randn(
+        num_observations
+    )
+    B = BayesProblem(X, y_pred, np.array([[0, 1], [0, 1]]))
+    B.set_likelihood(ds.norm(loc=y_observed, scale=noise))
+    return B
+
+
+@pytest.fixture
+def identity_problem_mud_1D_equal_weights(identity_1D_50_wme):
+    X, Y = identity_1D_50_wme
+    weights = np.ones(X.shape[0])
+    return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]), weights=weights)
+
+
+@pytest.fixture
+def identity_problem_mud_1D_bias_weights(identity_1D_50_wme):
+    X, Y = identity_1D_50_wme
+    weights = np.ones(X.shape[0])
+    weights[X[:, 0] < 0.2] = 0.1
+    weights[X[:, 0] > 0.8] = 0.1
+    return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]), weights=weights)
diff --git a/tests/test_base.py b/tests/test_base.py
new file mode 100644
index 0000000..3a37408
--- /dev/null
+++ b/tests/test_base.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+
+__author__ = "Mathematical Michael"
+__copyright__ = "Mathematical Michael"
+__license__ = "mit"
+
+
+def test_identity_mud_problem_1D(identity_problem_mud_1D):
+    # Arrange
+    D = identity_problem_mud_1D
+
+    # Act
+    mud_point = D.estimate()
+    updated_density = D._up
+    ratio = D._r
+
+    # Assert
+    assert np.round(mud_point, 1) == 0.5
+    assert np.sum(updated_density) > 0
+    assert np.mean(ratio) > 0
+
+
+def test_we_can_set_weights_in_predicted(identity_problem_mud_1D_equal_weights):
+    """Mimicks existing usage in mud-examples"""
+    # Arrange
+    # weights were used for initialization
+    D = identity_problem_mud_1D_equal_weights
+    D.set_initial()  # domain has been set -> uniform as default
+    # want to make sure we can set weights on predicted
+    weights = np.random.rand(D._n_samples)
+    D.set_predicted(weights=weights)
+
+    # Act
+    mud_point = D.estimate()
+    updated_density = D._up
+    ratio = D._r
+
+    # Assert
+    # ensure weights were set correctly
+    assert np.linalg.norm(weights - D._weights) == 0
+    assert np.round(mud_point, 1) == 0.5
+    assert np.sum(updated_density) > 0
+    assert np.mean(ratio) > 0
+
+
+def test_identity_mud_1D_with_equal_weights(identity_problem_mud_1D_equal_weights):
+    # Arrange
+    D = identity_problem_mud_1D_equal_weights
+
+    # Act
+    mud_point = D.estimate()
+    updated_density = D._up
+    ratio = D._r
+
+    # Assert
+    assert np.round(mud_point, 1) == 0.5
+    assert np.sum(updated_density) > 0
+    assert np.mean(ratio) > 0
+
+
+def test_identity_mud_1D_with_biased_weights(identity_problem_mud_1D_bias_weights):
+    # Arrange
+    D = identity_problem_mud_1D_bias_weights
+
+    # Act
+    mud_point = D.estimate()
+    updated_density = D._up
+    ratio = D._r
+
+    # Assert
+    assert np.round(mud_point, 1) == 0.5
+    assert np.sum(updated_density) > 0
+    assert np.mean(ratio) > 0
+
+
+def test_identity_map_problem_1D(identity_problem_map_1D):
+    # Arrange
+    D = identity_problem_map_1D
+
+    # Act
+    map_point = D.estimate()
+
+    # Assert
+    assert np.round(map_point, 1) == 0.5
diff --git a/tests/test_funs.py b/tests/test_funs.py
index d1eecca..480909e 100644
--- a/tests/test_funs.py
+++ b/tests/test_funs.py
@@ -68,3 +68,6 @@ class TestWME_20(TestWME):
     def setUp(self):
         self.d = np.random.rand(20)
         self.A = np.tile(self.d, (100, 1))
+
+
+# TODO: test wme works with data of shape (n_features, 1), (1, n_features), and list