Skip to content

Commit

Permalink
weights in kwargs (attempt) (#52)
Browse files Browse the repository at this point in the history
* weights in kwargs (attempt)

* renaming and testing

* remove unused import

* kluge for tests

* clean up note

* add test for setting weights twice

* add property attributes to class for samples and features

* ensure weights from predicted kde are saved

* lints
  • Loading branch information
mathematicalmichael authored Nov 4, 2021
1 parent ce312d0 commit 498f71b
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 8 deletions.
25 changes: 24 additions & 1 deletion src/mud/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ def __init__(self, X, y, domain=None, weights=None):
self._pr = None
self._ob = None

@property
def _n_features(self):
return self.y.shape[1]

@property
def _n_samples(self):
return self.y.shape[0]

def set_observed(self, distribution=dist.norm()):
self._ob = distribution.pdf(self.y).prod(axis=1)

Expand All @@ -49,18 +57,25 @@ def set_initial(self, distribution=None):
distribution = dist.uniform(loc=mn, scale=mx - mn)
else:
distribution = dist.norm()

initial_dist = distribution
self._in = initial_dist.pdf(self.X).prod(axis=1)
self._up = None
self._pr = None

def set_predicted(self, distribution=None, **kwargs):
if "weights" not in kwargs:
kwargs["weights"] = self._weights
else:
self._weights = kwargs["weights"]

if distribution is None:
# Reweight kde of predicted by weights from previous iteration if present
distribution = gkde(self.y.T, **kwargs, weights=self._weights)
distribution = gkde(self.y.T, **kwargs)
pred_pdf = distribution.pdf(self.y.T).T
else:
pred_pdf = distribution.pdf(self.y, **kwargs)

self._pr = pred_pdf
self._up = None

Expand Down Expand Up @@ -127,6 +142,14 @@ def __init__(self, X, y, domain=None):
self._pr = None
self._ll = None

@property
def _n_features(self):
return self.y.shape[1]

@property
def _n_samples(self):
return self.y.shape[0]

def set_likelihood(self, distribution, log=False):
if log:
self._log = True
Expand Down
29 changes: 23 additions & 6 deletions src/mud/funs.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,32 @@ def run():
############################################################


def wme(X, data, sd=None):
def wme(predictions, data, sd=None):
"""
Calculates Weighted Mean Error (WME) functional.
Parameters
----------
predictions: numpy.ndarray of shape (n_samples, n_features)
Predicted values against which data is compared.
data: list or numpy.ndarray of shape (n_features, 1)
Collected (noisy) data
sd: float, optional
Standard deviation
Returns
-------
numpy.ndarray of shape (n_samples, 1)
"""
if sd is None:
sd = np.std(data)
if X.ndim == 1:
X = X.reshape(1, -1)
num_evals = X.shape[0]
assert X.shape[1] == len(data)
if predictions.ndim == 1:
predictions = predictions.reshape(1, -1)
num_evals = predictions.shape[0]
assert predictions.shape[1] == len(data)

residuals = np.subtract(X, data)
residuals = np.subtract(predictions, data)
weighted_residuals = np.divide(residuals, sd)
assert weighted_residuals.shape[0] == num_evals

Expand Down
57 changes: 56 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,59 @@
https://pytest.org/latest/plugins.html
"""

# import pytest
import pytest
from mud.base import DensityProblem, BayesProblem
from mud.funs import wme
from scipy.stats import distributions as ds
import numpy as np


@pytest.fixture
def identity_1D_50_wme():
X = np.random.rand(100, 1)
num_observations = 50
y_pred = np.repeat(X, num_observations, 1)
y_true = 0.5
noise = 0.05
y_observed = y_true * np.ones(num_observations) + noise * np.random.randn(
num_observations
)
Y = wme(y_pred, y_observed, sd=noise)
return (X, Y)


@pytest.fixture
def identity_problem_mud_1D(identity_1D_50_wme):
X, Y = identity_1D_50_wme
return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]))


@pytest.fixture
def identity_problem_map_1D():
X = np.random.rand(1000, 1)
num_observations = 50
y_pred = np.repeat(X, num_observations, 1)
y_true = 0.5
noise = 0.05
y_observed = y_true * np.ones(num_observations) + noise * np.random.randn(
num_observations
)
B = BayesProblem(X, y_pred, np.array([[0, 1], [0, 1]]))
B.set_likelihood(ds.norm(loc=y_observed, scale=noise))
return B


@pytest.fixture
def identity_problem_mud_1D_equal_weights(identity_1D_50_wme):
X, Y = identity_1D_50_wme
weights = np.ones(X.shape[0])
return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]), weights=weights)


@pytest.fixture
def identity_problem_mud_1D_bias_weights(identity_1D_50_wme):
X, Y = identity_1D_50_wme
weights = np.ones(X.shape[0])
weights[X[:, 0] < 0.2] = 0.1
weights[X[:, 0] > 0.8] = 0.1
return DensityProblem(X, Y, np.array([[0, 1], [0, 1]]), weights=weights)
86 changes: 86 additions & 0 deletions tests/test_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-

import numpy as np

__author__ = "Mathematical Michael"
__copyright__ = "Mathematical Michael"
__license__ = "mit"


def test_identity_mud_problem_1D(identity_problem_mud_1D):
# Arrange
D = identity_problem_mud_1D

# Act
mud_point = D.estimate()
updated_density = D._up
ratio = D._r

# Assert
assert np.round(mud_point, 1) == 0.5
assert np.sum(updated_density) > 0
assert np.mean(ratio) > 0


def test_we_can_set_weights_in_predicted(identity_problem_mud_1D_equal_weights):
"""Mimicks existing usage in mud-examples"""
# Arrange
# weights were used for initialization
D = identity_problem_mud_1D_equal_weights
D.set_initial() # domain has been set -> uniform as default
# want to make sure we can set weights on predicted
weights = np.random.rand(D._n_samples)
D.set_predicted(weights=weights)

# Act
mud_point = D.estimate()
updated_density = D._up
ratio = D._r

# Assert
# ensure weights were set correctly
assert np.linalg.norm(weights - D._weights) == 0
assert np.round(mud_point, 1) == 0.5
assert np.sum(updated_density) > 0
assert np.mean(ratio) > 0


def test_identity_mud_1D_with_equal_weights(identity_problem_mud_1D_equal_weights):
# Arrange
D = identity_problem_mud_1D_equal_weights

# Act
mud_point = D.estimate()
updated_density = D._up
ratio = D._r

# Assert
assert np.round(mud_point, 1) == 0.5
assert np.sum(updated_density) > 0
assert np.mean(ratio) > 0


def test_identity_mud_1D_with_biased_weights(identity_problem_mud_1D_bias_weights):
# Arrange
D = identity_problem_mud_1D_bias_weights

# Act
mud_point = D.estimate()
updated_density = D._up
ratio = D._r

# Assert
assert np.round(mud_point, 1) == 0.5
assert np.sum(updated_density) > 0
assert np.mean(ratio) > 0


def test_identity_map_problem_1D(identity_problem_map_1D):
# Arrange
D = identity_problem_map_1D

# Act
map_point = D.estimate()

# Assert
assert np.round(map_point, 1) == 0.5
3 changes: 3 additions & 0 deletions tests/test_funs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,6 @@ class TestWME_20(TestWME):
def setUp(self):
self.d = np.random.rand(20)
self.A = np.tile(self.d, (100, 1))


# TODO: test wme works with data of shape (n_features, 1), (1, n_features), and list

0 comments on commit 498f71b

Please sign in to comment.