Merge pull request #1023 from timothy-glover/merge_mcts_changes

Monte Carlo Tree Search Sensor Manager and Autonomous STE Examples
dstl · Jun 17, 2024 · f67aead · f67aead
2 parents 9ba14e6 + 1a793fc
commit f67aead
Show file tree

Hide file tree

Showing 8 changed files with 1,766 additions and 90 deletions.
diff --git a/docs/examples/sensormanagement/Autonomous_Source_Term_Estimation.py b/docs/examples/sensormanagement/Autonomous_Source_Term_Estimation.py
diff --git a/...xamples/sensormanagement/Monte_Carlo_Tree_Search_for_Autonomous_Source_Term_Estimation.py b/...xamples/sensormanagement/Monte_Carlo_Tree_Search_for_Autonomous_Source_Term_Estimation.py
diff --git a/docs/source/stonesoup.sensormanager.rst b/docs/source/stonesoup.sensormanager.rst
@@ -12,6 +12,14 @@ Sensor Managers
     :inherited-members:
 
 
+Tree Search Managers
+--------------------
+
+.. automodule:: stonesoup.sensormanager.tree_search
+    :show-inheritance:
+    :inherited-members:
+
+
 Reward Functions
 ----------------
 .. automodule:: stonesoup.sensormanager.reward

diff --git a/stonesoup/models/measurement/gas.py b/stonesoup/models/measurement/gas.py
@@ -1,7 +1,7 @@
 from typing import Sequence, Union
 
-from math import sqrt
 import numpy as np
+from scipy.stats import norm
 from scipy.special import erf
 
 from ...base import Property
@@ -199,7 +199,7 @@ def logpdf(self, state1: State, state2: State, **kwargs) -> Union[float, np.ndar
         pred_meas = self.function(state2, **kwargs)
         if state1.state_vector[0] <= self.sensing_threshold:
             pdf = p_m + ((1-p_m) * 1/2 * (1+erf((self.sensing_threshold - pred_meas)
-                                                / (nd_sigma * sqrt(2)))))
+                                                / (nd_sigma * np.sqrt(2)))))
             likelihood = np.atleast_1d(np.log(pdf)).view(np.ndarray)
 
         else:
@@ -291,10 +291,10 @@ def rvs(self, state: Union[StateVector, StateVectors], num_samples: int = 1,
 
         random_state = random_state if random_state is not None else self.random_state
 
-        generator = np.random.RandomState(random_state)
-        noise = generator.normal(np.zeros(self.ndim_meas),
-                                 np.ravel(state*self.standard_deviation_percentage),
-                                 num_samples)
+        noise = norm.rvs(loc=np.zeros(self.ndim_meas),
+                         scale=np.ravel(state*self.standard_deviation_percentage),
+                         size=num_samples,
+                         random_state=random_state)
 
         noise = np.atleast_2d(noise)
 

diff --git a/stonesoup/sensormanager/reward.py b/stonesoup/sensormanager/reward.py
@@ -10,6 +10,7 @@
 from ..sensormanager.action import Actionable
 from ..types.detection import TrueDetection
 from ..base import Base, Property
+from ..predictor.base import Predictor
 from ..predictor.particle import ParticlePredictor
 from ..predictor.kalman import KalmanPredictor
 from ..updater.kalman import ExtendedKalmanUpdater
@@ -18,9 +19,11 @@
 from ..sensor.sensor import Sensor
 from ..sensormanager.action import Action
 from ..types.prediction import Prediction
+from ..updater.base import Updater
 from ..updater.particle import ParticleUpdater
 from ..resampler.particle import SystematicResampler
 from ..types.state import State
+from ..dataassociator.base import DataAssociator
 
 
 class RewardFunction(Base, ABC):
@@ -157,28 +160,32 @@ class ExpectedKLDivergence(RewardFunction):
     the action and resulting measurement from that action.
     """
 
-    predictor: ParticlePredictor = Property(default=None,
-                                            doc="Predictor used to predict the track to a "
-                                                "new state. This reward function is only "
-                                                "compatible with :class:`~.ParticlePredictor` "
-                                                "types.")
-    updater: ParticleUpdater = Property(default=None,
-                                        doc="Updater used to update the track to the new state. "
-                                            "This reward function is only compatible with "
-                                            ":class:`~.ParticleUpdater` types.")
-    method_sum: bool = Property(default=True, doc="Determines method of calculating reward."
-                                                  "Default calculates sum across all targets."
-                                                  "Otherwise calculates mean of all targets.")
+    predictor: Predictor = Property(default=None,
+                                    doc="Predictor used to predict the track to a "
+                                        "new state. This reward function is only "
+                                        "compatible with :class:`~.ParticlePredictor` "
+                                        "types.")
+    updater: Updater = Property(default=None,
+                                doc="Updater used to update the track to the new state. "
+                                    "This reward function is only compatible with "
+                                    ":class:`~.ParticleUpdater` types.")
+    method_sum: bool = Property(default=True,
+                                doc="Determines method of calculating reward."
+                                    "Default calculates sum across all targets."
+                                    "Otherwise calculates mean of all targets.")
+    data_associator: DataAssociator = Property(default=None,
+                                               doc="Data associator for associating "
+                                                   "detections to tracks when "
+                                                   "multiple sensors are managed.")
+
+    return_tracks: bool = Property(default=False,
+                                   doc="A flag for allowing the predicted track, "
+                                       "used to calculate the reward, to be "
+                                       "returned.")
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.KLD = KLDivergence()
-        if self.predictor is not None and not isinstance(self.predictor, ParticlePredictor):
-            raise NotImplementedError('Only ParticlePredictor types are currently compatible '
-                                      'with this reward function')
-        if self.updater is not None and not isinstance(self.updater, ParticleUpdater):
-            raise NotImplementedError('Only ParticleUpdater types are currently compatible '
-                                      'with this reward function')
 
     def __call__(self, config: Mapping[Sensor, Sequence[Action]], tracks: Set[Track],
                  metric_time: datetime.datetime, *args, **kwargs):
@@ -200,6 +207,10 @@ def __call__(self, config: Mapping[Sensor, Sequence[Action]], tracks: Set[Track]
         : float
             Kullback-Leibler divergence for given configuration
 
+        : Set[Track] (if defined)
+            Set of tracks that have been predicted and updated in reward
+            calculation if :attr:`return_tracks` is `True`
+
         """
 
         # Reward value
@@ -222,9 +233,8 @@ def __call__(self, config: Mapping[Sensor, Sequence[Action]], tracks: Set[Track]
         # Create dictionary of predictions for the tracks in the configuration
         predicted_tracks = set()
         for track in tracks:
-            predicted_track = Track()
+            predicted_track = copy.copy(track)
             if self.predictor:
-                predicted_track = copy.copy(track)
                 predicted_track.append(self.predictor.predict(track[-1],
                                                               timestamp=metric_time))
             else:
@@ -233,44 +243,61 @@ def __call__(self, config: Mapping[Sensor, Sequence[Action]], tracks: Set[Track]
 
             predicted_tracks.add(predicted_track)
 
-        for sensor in predicted_sensors:
-            # Assumes one detection per track
-
-            detections = self._generate_detections(predicted_tracks, sensor)
+        sensor_detections = self._generate_detections(predicted_tracks,
+                                                      predicted_sensors,
+                                                      timestamp=metric_time)
+        det_count = 0
+        for sensor, detections in sensor_detections.items():
 
             for predicted_track, detection_set in detections.items():
-
+                det_count += len(detection_set)
                 for n, detection in enumerate(detection_set):
 
-                    # if detection:
                     # Generate hypothesis based on prediction/previous update and detection
                     hypothesis = SingleHypothesis(predicted_track, detection)
 
                     # Do the update based on this hypothesis and store covariance matrix
                     update = self.updater.update(hypothesis)
 
-                    # else:
-                    #     update = copy.copy(predicted_track[-1])
-
                     kld += self.KLD(predicted_track[-1], update)
 
-            if self.method_sum is False and len(detections) != 0:
+                    if not isinstance(self, MultiUpdateExpectedKLDivergence):
+                        predicted_track.append(update)
+
+        if self.method_sum is False and det_count != 0:
 
-                kld /= len(detections)
+            kld /= det_count
 
         # Return value of configuration metric
-        return kld
+        if self.return_tracks:
+            return kld, predicted_tracks
+        else:
+            return kld
 
-    def _generate_detections(self, predicted_tracks, sensor):
+    def _generate_detections(self, predicted_tracks, sensors, timestamp=None):
 
-        detections = {}
-        for predicted_track in predicted_tracks:
-            track_detections = set()
-            track_detections.update(sensor.measure({State(predicted_track.mean)}, noise=True))
+        all_detections = {}
+
+        for sensor in sensors:
+            detections = {}
+            for predicted_track in predicted_tracks:
+                tmp_detection = sensor.measure({State(predicted_track.mean,
+                                                      timestamp=predicted_track.timestamp)},
+                                               noise=True)
+                detections.update({predicted_track: tmp_detection})
+
+            if self.data_associator:
+                tmp_hypotheses = self.data_associator.associate(
+                    predicted_tracks,
+                    {det for dets in detections.values() for det in dets},
+                    timestamp)
+                detections = {predicted_track: {hypothesis.measurement}
+                              for predicted_track, hypothesis in tmp_hypotheses.items()
+                              if hypothesis}
 
-            detections.update({predicted_track: track_detections})
+            all_detections.update({sensor: detections})
 
-        return detections
+        return all_detections
 
 
 class MultiUpdateExpectedKLDivergence(ExpectedKLDivergence):
@@ -286,6 +313,16 @@ class MultiUpdateExpectedKLDivergence(ExpectedKLDivergence):
     of them.
     """
 
+    predictor: ParticlePredictor = Property(default=None,
+                                            doc="Predictor used to predict the track to a "
+                                                "new state. This reward function is only "
+                                                "compatible with :class:`~.ParticlePredictor` "
+                                                "types.")
+    updater: ParticleUpdater = Property(default=None,
+                                        doc="Updater used to update the track to the new state. "
+                                            "This reward function is only compatible with "
+                                            ":class:`~.ParticleUpdater` types.")
+
     updates_per_track: int = Property(default=2,
                                       doc="Number of measurements to generate from each "
                                           "track prediction. This should be > 1.")
@@ -303,21 +340,23 @@ def __init__(self, *args, **kwargs):
             raise ValueError(f'updates_per_track = {self.updates_per_track}. This reward '
                              f'function only accepts >= 2')
 
-    def _generate_detections(self, predicted_tracks, sensor):
+    def _generate_detections(self, predicted_tracks, sensors, timestamp=None):
 
         detections = {}
-
+        all_detections = {}
         resampler = SystematicResampler()
 
-        for predicted_track in predicted_tracks:
-
-            measurement_sources = resampler.resample(predicted_track[-1],
-                                                     nparts=self.updates_per_track)
+        for sensor in sensors:
+            for predicted_track in predicted_tracks:
 
-            track_detections = set()
-            for state in measurement_sources.state_vector:
-                track_detections.update(sensor.measure({State(state)}, noise=True))
+                measurement_sources = resampler.resample(predicted_track[-1],
+                                                         nparts=self.updates_per_track)
+                tmp_detections = set()
+                for state in measurement_sources.state_vector:
+                    tmp_detections.update(sensor.measure({State(state, timestamp=timestamp)},
+                                                         noise=True))
 
-            detections.update({predicted_track: track_detections})
+                detections.update({predicted_track: tmp_detections})
+            all_detections.update({sensor: detections})
 
-        return detections
+        return all_detections