diff --git a/pyproject.toml b/pyproject.toml
index 4e79885..0468a15 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "pyelq-sdk"
-version = "1.0.9"
+version = "1.0.10"
description = "Package for detection, localization and quantification code."
authors = ["Bas van de Kerkhof", "Matthew Jones", "David Randell"]
homepage = "https://sede-open.github.io/pyELQ/"
@@ -23,7 +23,7 @@ packages = [{ include = "pyelq", from = "src" }]
python = ">=3.9, <3.12"
pandas = ">=2.1.4"
numpy = ">=1.26.2"
-plotly = ">=5.18.0"
+plotly = ">=5.18.0, <5.24"
scipy = ">=1.11.4"
pymap3d = ">=3.0.1"
geojson = ">=3.1.0"
@@ -49,7 +49,7 @@ addopts = "--cov=pyelq --cov-fail-under=90 --ignore-glob=*plot*"
testpaths = ["tests"]
[tool.coverage.report]
-omit = ["*plot*", "*/data_access/*", "*/plotting/*"]
+omit = ["*plot*", "*/data_access/*", "*/plotting/*", "*/post_processing/*"]
exclude_lines = [".*def.*plot.*", "from pyelq.plotting.plot import Plot"]
[tool.coverage.run]
diff --git a/src/pyelq/plotting/plot.py b/src/pyelq/plotting/plot.py
index 7deaaa3..99ac722 100644
--- a/src/pyelq/plotting/plot.py
+++ b/src/pyelq/plotting/plot.py
@@ -12,7 +12,7 @@
import warnings
from copy import deepcopy
from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Callable, Type, Union
+from typing import TYPE_CHECKING, Any, Callable, Type, Union
import numpy as np
import pandas as pd
@@ -20,16 +20,20 @@
import plotly.graph_objects as go
from geojson import Feature, FeatureCollection
from openmcmc.mcmc import MCMC
-from scipy.ndimage import label
from shapely import geometry
from pyelq.component.background import TemporalBackground
from pyelq.component.error_model import ErrorModel
from pyelq.component.offset import PerSensor
from pyelq.component.source_model import SlabAndSpike, SourceModel
-from pyelq.coordinate_system import ENU, LLA
+from pyelq.coordinate_system import LLA
from pyelq.dispersion_model.gaussian_plume import GaussianPlume
from pyelq.sensor.sensor import Sensor, SensorGroup
+from pyelq.support_functions.post_processing import (
+ calculate_rectangular_statistics,
+ create_lla_polygons_from_xy_points,
+ is_regularly_spaced,
+)
if TYPE_CHECKING:
from pyelq.model import ELQModel
@@ -127,7 +131,7 @@ def plot_quantiles_from_array(
return fig
-def create_trace_specifics(object_to_plot: Union[Type[SlabAndSpike], SourceModel, MCMC], **kwargs) -> dict:
+def create_trace_specifics(object_to_plot: Union[Type[SlabAndSpike], SourceModel, MCMC], **kwargs: Any) -> dict:
"""Specification of different traces of single variables.
Provides all details for plots where we want to plot a single variable as a line plot. Based on the object_to_plot
@@ -136,7 +140,7 @@ def create_trace_specifics(object_to_plot: Union[Type[SlabAndSpike], SourceModel
Args:
object_to_plot (Union[Type[SlabAndSpike], SourceModel, MCMC]): Object which we want to plot a single
variable from
- **kwargs (dict): Additional key word arguments, e.g. burn_in or dict_key, used in some specific plots but not
+ **kwargs (Any): Additional key word arguments, e.g. burn_in or dict_key, used in some specific plots but not
applicable to all.
Returns:
@@ -201,7 +205,7 @@ def create_trace_specifics(object_to_plot: Union[Type[SlabAndSpike], SourceModel
def create_plot_specifics(
- object_to_plot: Union[ErrorModel, PerSensor, MCMC], sensor_object: SensorGroup, plot_type: str = "", **kwargs
+ object_to_plot: Union[ErrorModel, PerSensor, MCMC], sensor_object: SensorGroup, plot_type: str = "", **kwargs: Any
) -> dict:
"""Specification of different traces where we want to plot a trace for each sensor.
@@ -216,7 +220,7 @@ def create_plot_specifics(
object_to_plot (Union[ErrorModel, PerSensor, MCMC]): Object which we want to plot a single variable from
sensor_object (SensorGroup): SensorGroup object associated with the object_to_plot
plot_type (str, optional): String specifying either a line or a box plot.
- **kwargs (dict): Additional key word arguments, e.g. burn_in or dict_key, used in some specific plots but not
+ **kwargs (Any): Additional key word arguments, e.g. burn_in or dict_key, used in some specific plots but not
applicable to all.
Returns:
@@ -310,7 +314,7 @@ def plot_single_scatter(
y_values: np.ndarray,
color: str,
name: str,
- **kwargs,
+ **kwargs: Any,
) -> go.Figure:
"""Plots a single scatter trace on the supplied figure object.
@@ -320,8 +324,8 @@ def plot_single_scatter(
y_values (np.ndarray): Numpy array containing the y-values to use in plotting.
color (str): RGB color string to use for this trace.
name (str): String name to show in the legend.
- **kwargs (dict): Additional key word arguments, e.g. burn_in, legend_group, show_legend, used in some specific plots
- but not applicable to all.
+ **kwargs (Any): Additional key word arguments, e.g. burn_in, legend_group, show_legend, used in some specific
+ plots but not applicable to all.
Returns:
fig (go.Figure): Plotly figure with the trace added to it.
@@ -377,7 +381,7 @@ def plot_single_box(fig: go.Figure, y_values: np.ndarray, color: str, name: str)
def plot_polygons_on_map(
- polygons: Union[np.ndarray, list], values: np.ndarray, opacity: float, map_color_scale: str, **kwargs
+ polygons: Union[np.ndarray, list], values: np.ndarray, opacity: float, map_color_scale: str, **kwargs: Any
) -> go.Choroplethmapbox:
"""Plot a set of polygons on a map.
@@ -387,8 +391,8 @@ def plot_polygons_on_map(
used in coloring the polygons on the map.
opacity (float): Float between 0 and 1 specifying the opacity of the polygon fill color.
map_color_scale (str): The string which defines which plotly color scale.
- **kwargs (dict): Additional key word arguments which can be passed on the go.Choroplethmapbox object (will override
- the default values as specified in this function)
+ **kwargs (Any): Additional key word arguments which can be passed on the go.Choroplethmapbox object
+ (will override the default values as specified in this function)
Returns:
trace: go.Choroplethmapbox trace with the colored polygons which can be added to a go.Figure object.
@@ -577,7 +581,7 @@ def show_all(self, renderer="browser"):
for fig in self.figure_dict.values():
fig.show(renderer=renderer)
- def plot_single_trace(self, object_to_plot: Union[Type[SlabAndSpike], SourceModel, MCMC], **kwargs):
+ def plot_single_trace(self, object_to_plot: Union[Type[SlabAndSpike], SourceModel, MCMC], **kwargs: Any):
"""Plotting a trace of a single variable.
Depending on the object to plot it creates a figure which is stored in the figure_dict attribute.
@@ -585,8 +589,8 @@ def plot_single_trace(self, object_to_plot: Union[Type[SlabAndSpike], SourceMode
Args:
object_to_plot (Union[Type[SlabAndSpike], SourceModel, MCMC]): The object from which to plot a variable
- **kwargs (dict): Additional key word arguments, e.g. burn_in, legend_group, show_legend, dict_key, used in some
- specific plots but not applicable to all.
+ **kwargs (Any): Additional key word arguments, e.g. burn_in, legend_group, show_legend, dict_key, used in
+ some specific plots but not applicable to all.
"""
plot_specifics = create_trace_specifics(object_to_plot=object_to_plot, **kwargs)
@@ -632,7 +636,7 @@ def plot_trace_per_sensor(
object_to_plot: Union[ErrorModel, PerSensor, MCMC],
sensor_object: Union[SensorGroup, Sensor],
plot_type: str,
- **kwargs,
+ **kwargs: Any,
):
"""Plotting a trace of a single variable per sensor.
@@ -643,8 +647,8 @@ def plot_trace_per_sensor(
object_to_plot (Union[ErrorModel, PerSensor, MCMC]): The object which to plot a variable from
sensor_object (Union[SensorGroup, Sensor]): Sensor object associated with the object_to_plot
plot_type (str): String specifying a line or box plot.
- **kwargs (dict): Additional key word arguments, e.g. burn_in, legend_group, show_legend, dict_key, used in some
- specific plots but not applicable to all.
+ **kwargs (Any): Additional key word arguments, e.g. burn_in, legend_group, show_legend, dict_key, used in
+ some specific plots but not applicable to all.
"""
if isinstance(sensor_object, Sensor):
@@ -789,7 +793,7 @@ def plot_fitted_values_per_sensor(
self.figure_dict[dict_key] = fig
- def plot_emission_rate_estimates(self, source_model_object, y_axis_type="linear", **kwargs):
+ def plot_emission_rate_estimates(self, source_model_object, y_axis_type="linear", **kwargs: Any):
"""Plot the emission rate estimates source model object against MCMC iteration.
Based on the inputs it plots the results of the mcmc analysis, being the estimated emission rate values for
@@ -803,7 +807,7 @@ def plot_emission_rate_estimates(self, source_model_object, y_axis_type="linear"
Args:
source_model_object (SourceModel): Source model object which contains the estimated emission rate estimates.
y_axis_type (str, optional): String to indicate whether the y-axis should be linear of log scale.
- **kwargs (dict): Additional key word arguments, e.g. burn_in, dict_key, used in some specific plots but not
+ **kwargs (Any): Additional key word arguments, e.g. burn_in, dict_key, used in some specific plots but not
applicable to all.
"""
@@ -912,7 +916,7 @@ def create_empty_mapbox_figure(self, dict_key: str = "map_plot") -> None:
)
def plot_values_on_map(
- self, dict_key: str, coordinates: LLA, values: np.ndarray, aggregate_function: Callable = np.sum, **kwargs
+ self, dict_key: str, coordinates: LLA, values: np.ndarray, aggregate_function: Callable = np.sum, **kwargs: Any
):
"""Plot values on a map based on coordinates.
@@ -922,7 +926,7 @@ def plot_values_on_map(
values (np.ndarray): Numpy array of values consistent with coordinates to plot on the map
aggregate_function (Callable, optional): Function which to apply on the data in each hexagonal bin to
aggregate the data and visualise the result.
- **kwargs (dict): Additional keyword arguments for plotting behaviour (opacity, map_color_scale, num_hexagons,
+ **kwargs (Any): Additional keyword arguments for plotting behaviour (opacity, map_color_scale, num_hexagons,
show_positions)
"""
@@ -975,107 +979,50 @@ def plot_quantification_results_on_map(
burn_in: int = 0,
show_summary_results: bool = True,
):
- """Placeholder for the quantification plots."""
- nof_iterations = model_object.n_iter
+ """Function to create a map with the quantification results of the model object.
+
+ This function takes the ELQModel object and calculates the statistics for the quantification results. It then
+ populates the figure dictionary with three different maps showing the normalized count, median emission rate
+ and the inter-quartile range of the emission rate estimates.
+
+ Args:
+ model_object (ELQModel): ELQModel object containing the quantification results
+ bin_size_x (float, optional): Size of the bins in the x-direction. Defaults to 1.
+ bin_size_y (float, optional): Size of the bins in the y-direction. Defaults to 1.
+ normalized_count_limit (float, optional): Limit for the normalized count to show on the map.
+ Defaults to 0.005.
+ burn_in (int, optional): Number of burn-in iterations to discard before calculating the statistics.
+ Defaults to 0.
+ show_summary_results (bool, optional): Flag to show the summary results on the map. Defaults to True.
+
+ """
ref_latitude = model_object.components["source"].dispersion_model.source_map.location.ref_latitude
ref_longitude = model_object.components["source"].dispersion_model.source_map.location.ref_longitude
ref_altitude = model_object.components["source"].dispersion_model.source_map.location.ref_altitude
+
datetime_min_string = model_object.sensor_object.time.min().strftime("%d-%b-%Y, %H:%M:%S")
datetime_max_string = model_object.sensor_object.time.max().strftime("%d-%b-%Y, %H:%M:%S")
- all_source_locations = model_object.mcmc.store["z_src"]
- min_x = np.nanmin(all_source_locations[0, :, :])
- max_x = np.nanmax(all_source_locations[0, :, :])
- min_y = np.nanmin(all_source_locations[1, :, :])
- max_y = np.nanmax(all_source_locations[1, :, :])
-
- bin_min_x = np.floor(min_x - 0.1)
- bin_max_x = np.ceil(max_x + 0.1)
- bin_min_y = np.floor(min_y - 0.1)
- bin_max_y = np.ceil(max_y + 0.1)
- bin_min_iteration = burn_in + 0.5
- bin_max_iteration = nof_iterations + 0.5
-
- max_nof_sources = all_source_locations.shape[1]
-
- x_edges = np.arange(start=bin_min_x, stop=bin_max_x + bin_size_x, step=bin_size_x)
- y_edges = np.arange(start=bin_min_y, stop=bin_max_y + bin_size_y, step=bin_size_y)
- iteration_edges = np.arange(start=bin_min_iteration, stop=bin_max_iteration + bin_size_y, step=1)
-
- result_x_vals = all_source_locations[0, :, :].flatten()
- result_y_vals = all_source_locations[1, :, :].flatten()
- result_z_vals = all_source_locations[2, :, :].flatten()
- # 1-indexing for iterations effectively
- result_iteration_vals = np.array(range(nof_iterations)).reshape(1, -1) + 1
- result_iteration_vals = np.tile(result_iteration_vals, (max_nof_sources, 1)).flatten()
- results_estimates = model_object.mcmc.store["s"].flatten()
-
- result_weighted, _ = np.histogramdd(
- sample=np.array([result_x_vals, result_y_vals, result_iteration_vals]).T,
- bins=[x_edges, y_edges, iteration_edges],
- weights=results_estimates,
- density=False,
+ result_weighted, _, normalized_count, count_boolean, enu_points, summary_result = (
+ calculate_rectangular_statistics(
+ model_object=model_object,
+ bin_size_x=bin_size_x,
+ bin_size_y=bin_size_y,
+ burn_in=burn_in,
+ normalized_count_limit=normalized_count_limit,
+ )
)
- count_result, edges_result = np.histogramdd(
- sample=np.array([result_x_vals, result_y_vals, result_iteration_vals]).T,
- bins=[x_edges, y_edges, iteration_edges],
- density=False,
+ polygons = create_lla_polygons_from_xy_points(
+ points_array=enu_points,
+ ref_latitude=ref_latitude,
+ ref_longitude=ref_longitude,
+ ref_altitude=ref_altitude,
+ boolean_mask=count_boolean,
)
- enu_x = edges_result[0]
- enu_x = enu_x[:-1] + np.diff(enu_x) / 2
- enu_y = edges_result[1]
- enu_y = enu_y[:-1] + np.diff(enu_y) / 2
-
- enu_x, enu_y = np.meshgrid(enu_x, enu_y, indexing="ij")
-
- enu_object_full_grid = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude)
- enu_object_full_grid.east = enu_x.flatten()
- enu_object_full_grid.north = enu_y.flatten()
- enu_object_full_grid.up = np.zeros_like(enu_object_full_grid.north)
- lla_object_full_grid = enu_object_full_grid.to_lla()
-
- _, gridsize_lat = is_regularly_spaced(lla_object_full_grid.latitude, tolerance=1e-6)
- _, gridsize_lon = is_regularly_spaced(lla_object_full_grid.longitude, tolerance=1e-6)
-
- overall_count = np.sum(count_result, axis=2)
- normalized_count = overall_count / (nof_iterations - burn_in)
-
- count_boolean = normalized_count >= normalized_count_limit
-
- enu_object = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude)
- enu_object.east = enu_x[count_boolean].flatten()
- enu_object.north = enu_y[count_boolean].flatten()
- enu_object.up = np.zeros_like(enu_object.north)
- lla_object = enu_object.to_lla()
-
- polygons = [
- geometry.box(
- lla_object.longitude[idx] - gridsize_lon / 2,
- lla_object.latitude[idx] - gridsize_lat / 2,
- lla_object.longitude[idx] + gridsize_lon / 2,
- lla_object.latitude[idx] + gridsize_lat / 2,
- )
- for idx in range(lla_object.nof_observations)
- ]
-
if show_summary_results:
- summary_trace = self.create_summary_trace(
- result_iteration_vals=result_iteration_vals,
- burn_in=burn_in,
- result_x_vals=result_x_vals,
- result_y_vals=result_y_vals,
- result_z_vals=result_z_vals,
- results_estimates=results_estimates,
- count_boolean=count_boolean,
- x_edges=x_edges,
- y_edges=y_edges,
- nof_iterations=nof_iterations,
- ref_latitude=ref_latitude,
- ref_longitude=ref_longitude,
- ref_altitude=ref_altitude,
- )
+ summary_trace = self.create_summary_trace(summary_result=summary_result)
self.create_empty_mapbox_figure(dict_key="count_map")
trace = plot_polygons_on_map(
@@ -1199,118 +1146,20 @@ def plot_coverage(
@staticmethod
def create_summary_trace(
- result_x_vals: np.ndarray,
- result_y_vals: np.ndarray,
- result_z_vals: np.ndarray,
- results_estimates: np.ndarray,
- result_iteration_vals: np.ndarray,
- count_boolean: np.ndarray,
- x_edges: np.ndarray,
- y_edges: np.ndarray,
- nof_iterations: int,
- burn_in: int,
- ref_latitude: float,
- ref_longitude: float,
- ref_altitude: float,
+ summary_result: pd.DataFrame,
) -> go.Scattermapbox:
"""Helper function to create the summary information to plot on top of map type plots.
- We identify all blobs of estimates which appear close together on the map by looking at connected pixels in the
- count_boolean array. Next we find the summary statistics for all estimates in that blob like overall median and
- IQR estimate, mean location and the likelihood of that blob.
-
- When multiple sources are present in the same blob at the same iteration we first sum those emission rate
- estimates before taking the median.
-
- The summary statistics are also printed out on screen.
+ We use the summary result calculated through the support functions module to create a trace which contains
+ the summary information for each source location.
Args:
- result_x_vals (np.ndarray): X-coordinate of estimates, flattened array of (n_sources_max * nof_iterations,).
- result_y_vals (np.ndarray): Y-coordinate of estimates, flattened array of (n_sources_max * nof_iterations,).
- result_z_vals (np.ndarray): Z-coordinate of estimates, flattened array of (n_sources_max * nof_iterations,).
- results_estimates (np.ndarray): Emission rate estimates, flattened array of
- (n_sources_max * nof_iterations,).
- result_iteration_vals (np.ndarray): Iteration number corresponding each estimated value, flattened array
- of (n_sources_max * nof_iterations,).
- count_boolean (np.ndarray): Boolean array which indicates if likelihood of pixel is over threshold.
- x_edges (np.ndarray): Pixel edges x-coordinates.
- y_edges (np.ndarray): Pixel edges y-coordinates.
- nof_iterations (int): Number of iterations used in MCMC.
- burn_in (int): Burn-in used in MCMC.
- ref_latitude (float): Reference latitude in degrees of ENU coordinate system.
- ref_longitude (float): Reference longitude in degrees of ENU coordinate system.
- ref_altitude (float): Reference altitude in meters of ENU coordinate system.
+ summary_result (pd.DataFrame): DataFrame containing the summary information for each source location.
Returns:
summary_trace (go.Scattermapbox): Trace with summary information to plot on top of map type plots.
"""
- labeled_array, num_features = label(input=count_boolean, structure=np.ones((3, 3)))
-
- burn_in_bool = result_iteration_vals > burn_in
- nan_x_vals = np.isnan(result_x_vals)
- nan_y_vals = np.isnan(result_y_vals)
- nan_z_vals = np.isnan(result_z_vals)
- no_nan_idx = np.logical_not(np.logical_or(np.logical_or(nan_x_vals, nan_y_vals), nan_z_vals))
- no_nan_and_burn_in_bool = np.logical_and(no_nan_idx, burn_in_bool)
- result_x_vals_no_nan = result_x_vals[no_nan_and_burn_in_bool]
- result_y_vals_no_nan = result_y_vals[no_nan_and_burn_in_bool]
- result_z_vals_no_nan = result_z_vals[no_nan_and_burn_in_bool]
- results_estimates_no_nan = results_estimates[no_nan_and_burn_in_bool]
- result_iteration_vals_no_nan = result_iteration_vals[no_nan_and_burn_in_bool]
-
- x_idx = np.digitize(result_x_vals_no_nan, x_edges, right=False) - 1
- y_idx = np.digitize(result_y_vals_no_nan, y_edges, right=False) - 1
- bin_numbers = np.ravel_multi_index((x_idx, y_idx), labeled_array.shape)
-
- bin_numbers_per_label = [
- np.ravel_multi_index(np.nonzero(labeled_array == value), labeled_array.shape)
- for value in np.array(range(num_features)) + 1
- ]
-
- summary_result = pd.DataFrame()
- for label_idx, curr_bins in enumerate(bin_numbers_per_label):
- boolean_for_result = np.isin(bin_numbers, curr_bins)
- mean_x = np.mean(result_x_vals_no_nan[boolean_for_result])
- mean_y = np.mean(result_y_vals_no_nan[boolean_for_result])
- mean_z = np.mean(result_z_vals_no_nan[boolean_for_result])
-
- unique_iteration_vals, indices, counts = np.unique(
- result_iteration_vals_no_nan[boolean_for_result], return_inverse=True, return_counts=True
- )
- nof_iterations_present = unique_iteration_vals.size
- blob_likelihood = nof_iterations_present / (nof_iterations - burn_in)
- single_idx = np.argwhere(counts == 1)
- results_estimates_for_blob = results_estimates_no_nan[boolean_for_result]
- temp_estimate_result = results_estimates_for_blob[indices[single_idx.flatten()]]
- multiple_idx = np.argwhere(counts > 1)
- for single_idx in multiple_idx:
- temp_val = np.sum(results_estimates_for_blob[indices == single_idx])
- temp_estimate_result = np.append(temp_estimate_result, temp_val)
-
- median_estimate = np.median(temp_estimate_result)
- iqr_estimate = np.nanquantile(a=temp_estimate_result, q=0.75) - np.nanquantile(
- a=temp_estimate_result, q=0.25
- )
- lower_bound = np.nanquantile(a=temp_estimate_result, q=0.025)
- upper_bound = np.nanquantile(a=temp_estimate_result, q=0.975)
- enu_object = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude)
- enu_object.east = mean_x
- enu_object.north = mean_y
- enu_object.up = mean_z
- lla_object = enu_object.to_lla()
-
- summary_result.loc[label_idx, "latitude"] = lla_object.latitude
- summary_result.loc[label_idx, "longitude"] = lla_object.longitude
- summary_result.loc[label_idx, "altitude"] = lla_object.altitude
- summary_result.loc[label_idx, "height"] = mean_z
- summary_result.loc[label_idx, "median_estimate"] = median_estimate
- summary_result.loc[label_idx, "quantile_025"] = lower_bound
- summary_result.loc[label_idx, "quantile_975"] = upper_bound
- summary_result.loc[label_idx, "iqr_estimate"] = iqr_estimate
- summary_result.loc[label_idx, "absolute_count_iterations"] = nof_iterations_present
- summary_result.loc[label_idx, "blob_likelihood"] = blob_likelihood
-
summary_text_values = [
f"Source ID: {value}
"
f"(Lon, Lat, Alt) ([deg], [deg], [m]):
"
@@ -1337,38 +1186,4 @@ def create_summary_trace(
hoverinfo="text",
)
- summary_result.index.name = "source_ID"
- summary_result = summary_result.astype({"absolute_count_iterations": "int"})
- print("Summary results:")
- print(summary_result.to_string(float_format=lambda x: "%.7f" % x))
-
return summary_trace
-
-
-def is_regularly_spaced(array: np.ndarray, tolerance: float = 0.01, return_delta: bool = True):
- """Determines whether an input array is regularly spaced, within some (absolute) tolerance.
-
- Gets the large differences (defined by tolerance) in the array, and sees whether all of them are within 5% of one
- another.
-
- Args:
- array (np.ndarray): Input array to be analysed.
- tolerance (float, optional): Absolute value above which the difference between values is considered significant.
- Defaults to 0.01.
- return_delta (bool, optional): Whether to return the value of the regular grid spacing. Defaults to True.
-
- Returns:
- (bool): Whether or not the grid is regularly spaced.
- (float): The value of the regular grid spacing.
-
- """
- unique_vals = np.unique(array)
- diff_unique_vals = np.diff(unique_vals)
- diff_big = diff_unique_vals[diff_unique_vals > tolerance]
-
- boolean = np.all([np.isclose(diff_big[i], diff_big[i + 1], rtol=0.05) for i in range(len(diff_big) - 1)])
-
- if return_delta:
- return boolean, np.mean(diff_big)
-
- return boolean, None
diff --git a/src/pyelq/support_functions/__init__.py b/src/pyelq/support_functions/__init__.py
index 043f324..60848ca 100644
--- a/src/pyelq/support_functions/__init__.py
+++ b/src/pyelq/support_functions/__init__.py
@@ -2,4 +2,4 @@
#
# SPDX-License-Identifier: Apache-2.0
"""Support Functions Module."""
-__all__ = ["spatio_temporal_interpolation"]
+__all__ = ["post_processing", "spatio_temporal_interpolation"]
diff --git a/src/pyelq/support_functions/post_processing.py b/src/pyelq/support_functions/post_processing.py
new file mode 100644
index 0000000..4267148
--- /dev/null
+++ b/src/pyelq/support_functions/post_processing.py
@@ -0,0 +1,343 @@
+# SPDX-FileCopyrightText: 2024 Shell Global Solutions International B.V. All Rights Reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# -*- coding: utf-8 -*-
+"""Post-processing module.
+
+Module containing some functions used in post-processing of the results.
+
+"""
+from typing import TYPE_CHECKING, Tuple, Union
+
+import numpy as np
+import pandas as pd
+from scipy.ndimage import label
+from shapely import geometry
+
+from pyelq.coordinate_system import ENU
+
+if TYPE_CHECKING:
+ from pyelq.model import ELQModel
+
+
+def is_regularly_spaced(array: np.ndarray, tolerance: float = 0.01, return_delta: bool = True):
+ """Determines whether an input array is regularly spaced, within some (absolute) tolerance.
+
+ Gets the large differences (defined by tolerance) in the array, and sees whether all of them are within 5% of one
+ another.
+
+ Args:
+ array (np.ndarray): Input array to be analysed.
+ tolerance (float, optional): Absolute value above which the difference between values is considered significant.
+ Defaults to 0.01.
+ return_delta (bool, optional): Whether to return the value of the regular grid spacing. Defaults to True.
+
+ Returns:
+ (bool): Whether the grid is regularly spaced.
+ (float): The value of the regular grid spacing.
+
+ """
+ unique_vals = np.unique(array)
+ diff_unique_vals = np.diff(unique_vals)
+ diff_big = diff_unique_vals[diff_unique_vals > tolerance]
+
+ boolean = np.all([np.isclose(diff_big[i], diff_big[i + 1], rtol=0.05) for i in range(len(diff_big) - 1)])
+
+ if return_delta:
+ return boolean, np.mean(diff_big)
+
+ return boolean, None
+
+
+def calculate_rectangular_statistics(
+ model_object: "ELQModel",
+ bin_size_x: float = 1,
+ bin_size_y: float = 1,
+ burn_in: int = 0,
+ normalized_count_limit: float = 0.005,
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, list, pd.DataFrame]:
+ """Function which aggregates the pyELQ results into rectangular bins and outputs the related summary statistics.
+
+ The function creates a pixel grid (binning) in East-North coordinates based on the bin_size_x and bin_size_y
+ parameters. For each bin both a count as well as a weighted sum of the emission rate estimates is calculated. The
+ count is normalized by the number of iterations used in the MCMC and a boolean array is created which indicates if
+ the count is above a certain threshold. Connected pixels where the count is above this threshold are considered to
+ be a single blob/source and emission estimates per blob are summed over all pixels in the blob. The function
+ then calculates the summary statistics for each blob of estimates which are connected pixels. The summary
+ statistics include the median and IQR of the emission rate estimates, the mean location of the blob and the
+ likelihood of the blob.
+
+ Args:
+ model_object (ELQModel): ELQModel object containing the results of the MCMC run.
+ bin_size_x (float, optional): Size of the bins in the x-direction. Defaults to 1.
+ bin_size_y (float, optional): Size of the bins in the y-direction. Defaults to 1.
+ burn_in (int, optional): Number of burn-in iterations used in the MCMC. Defaults to 0.
+ normalized_count_limit (float, optional): Threshold for the normalized count to be considered a blob.
+
+ Returns:
+ result_weighted (np.ndarray): Weighted sum of the emission rate estimates in each bin.
+ overall_count (np.ndarray): Count of the number of estimates in each bin.
+ normalized_count (np.ndarray): Normalized count of the number of estimates in each bin.
+ count_boolean (np.ndarray): Boolean array which indicates if likelihood of pixel is over threshold.
+ edges_result (np.ndarray): Centers of the pixels in the x and y direction.
+ summary_result (pd.DataFrame): Summary statistics for each blob of estimates.
+
+ """
+ nof_iterations = model_object.n_iter
+ ref_latitude = model_object.components["source"].dispersion_model.source_map.location.ref_latitude
+ ref_longitude = model_object.components["source"].dispersion_model.source_map.location.ref_longitude
+ ref_altitude = model_object.components["source"].dispersion_model.source_map.location.ref_altitude
+
+ all_source_locations = model_object.mcmc.store["z_src"]
+ min_x = np.nanmin(all_source_locations[0, :, :])
+ max_x = np.nanmax(all_source_locations[0, :, :])
+ min_y = np.nanmin(all_source_locations[1, :, :])
+ max_y = np.nanmax(all_source_locations[1, :, :])
+
+ bin_min_x = np.floor(min_x - 0.1)
+ bin_max_x = np.ceil(max_x + 0.1)
+ bin_min_y = np.floor(min_y - 0.1)
+ bin_max_y = np.ceil(max_y + 0.1)
+ bin_min_iteration = burn_in + 0.5
+ bin_max_iteration = nof_iterations + 0.5
+
+ max_nof_sources = all_source_locations.shape[1]
+
+ x_edges = np.arange(start=bin_min_x, stop=bin_max_x + bin_size_x, step=bin_size_x)
+ y_edges = np.arange(start=bin_min_y, stop=bin_max_y + bin_size_y, step=bin_size_y)
+ iteration_edges = np.arange(start=bin_min_iteration, stop=bin_max_iteration + bin_size_y, step=1)
+
+ result_x_vals = all_source_locations[0, :, :].flatten()
+ result_y_vals = all_source_locations[1, :, :].flatten()
+ result_z_vals = all_source_locations[2, :, :].flatten()
+
+ result_iteration_vals = np.array(range(nof_iterations)).reshape(1, -1) + 1
+ result_iteration_vals = np.tile(result_iteration_vals, (max_nof_sources, 1)).flatten()
+ results_estimates = model_object.mcmc.store["s"].flatten()
+
+ result_weighted, _ = np.histogramdd(
+ sample=np.array([result_x_vals, result_y_vals, result_iteration_vals]).T,
+ bins=[x_edges, y_edges, iteration_edges],
+ weights=results_estimates,
+ density=False,
+ )
+
+ count_result, edges_result = np.histogramdd(
+ sample=np.array([result_x_vals, result_y_vals, result_iteration_vals]).T,
+ bins=[x_edges, y_edges, iteration_edges],
+ density=False,
+ )
+
+ overall_count = np.array(np.sum(count_result, axis=2))
+ normalized_count = overall_count / (nof_iterations - burn_in)
+ count_boolean = normalized_count >= normalized_count_limit
+
+ summary_result = create_aggregation(
+ result_iteration_vals=result_iteration_vals,
+ burn_in=burn_in,
+ result_x_vals=result_x_vals,
+ result_y_vals=result_y_vals,
+ result_z_vals=result_z_vals,
+ results_estimates=results_estimates,
+ count_boolean=count_boolean,
+ x_edges=x_edges,
+ y_edges=y_edges,
+ nof_iterations=nof_iterations,
+ ref_latitude=ref_latitude,
+ ref_longitude=ref_longitude,
+ ref_altitude=ref_altitude,
+ )
+
+ return result_weighted, overall_count, normalized_count, count_boolean, edges_result[:2], summary_result
+
+
+def create_lla_polygons_from_xy_points(
+ points_array: list[np.ndarray],
+ ref_latitude: float,
+ ref_longitude: float,
+ ref_altitude: float,
+ boolean_mask: Union[np.ndarray, None] = None,
+) -> list[geometry.Polygon]:
+ """Function to create polygons in LLA coordinates from a grid of points in ENU coordinates.
+
+ This function takes a grid of East-North points, these points are used as center points for a pixel grid. The pixel
+ grid is then converted to LLA coordinates and these center points are used to create a polygon in LLA coordinates.
+ A polygon is only created if the boolean mask for that pixel is True.
+
+ Args:
+ points_array (list[np.ndarray]): List of arrays of grid of points in ENU coordinates.
+ ref_latitude (float): Reference latitude in degrees of ENU coordinate system.
+ ref_longitude (float): Reference longitude in degrees of ENU coordinate system.
+ ref_altitude (float): Reference altitude in meters of ENU coordinate system.
+ boolean_mask (np.ndarray, optional): Boolean mask to indicate which pixels to create polygons for.
+ Defaults to None which means all pixels are used.
+
+ Returns:
+ list[geometry.Polygon]: List of polygons in LLA coordinates
+ """
+ if boolean_mask is None:
+ boolean_mask = np.ones_like(points_array, dtype=bool)
+
+ enu_x = points_array[0]
+ enu_x = enu_x[:-1] + np.diff(enu_x) / 2
+ enu_y = points_array[1]
+ enu_y = enu_y[:-1] + np.diff(enu_y) / 2
+
+ enu_x, enu_y = np.meshgrid(enu_x, enu_y, indexing="ij")
+
+ enu_object_full_grid = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude)
+ enu_object_full_grid.east = enu_x.flatten()
+ enu_object_full_grid.north = enu_y.flatten()
+ enu_object_full_grid.up = np.zeros_like(enu_object_full_grid.north)
+ lla_object_full_grid = enu_object_full_grid.to_lla()
+
+ _, gridsize_lat = is_regularly_spaced(lla_object_full_grid.latitude, tolerance=1e-6)
+ _, gridsize_lon = is_regularly_spaced(lla_object_full_grid.longitude, tolerance=1e-6)
+
+ polygons = [
+ geometry.box(
+ lla_object_full_grid.longitude[idx] - gridsize_lon / 2,
+ lla_object_full_grid.latitude[idx] - gridsize_lat / 2,
+ lla_object_full_grid.longitude[idx] + gridsize_lon / 2,
+ lla_object_full_grid.latitude[idx] + gridsize_lat / 2,
+ )
+ for idx in np.argwhere(boolean_mask.flatten()).flatten()
+ ]
+
+ return polygons
+
+
+def create_aggregation(
+ result_x_vals: np.ndarray,
+ result_y_vals: np.ndarray,
+ result_z_vals: np.ndarray,
+ results_estimates: np.ndarray,
+ result_iteration_vals: np.ndarray,
+ count_boolean: np.ndarray,
+ x_edges: np.ndarray,
+ y_edges: np.ndarray,
+ nof_iterations: int,
+ burn_in: int,
+ ref_latitude: float,
+ ref_longitude: float,
+ ref_altitude: float,
+) -> pd.DataFrame:
+ """Function to create the aggregated information for the blobs of estimates.
+
+ We identify all blobs of estimates which appear close together on the map by looking at connected pixels in the
+ count_boolean array. Next we find the summary statistics for all estimates in that blob like overall median and
+ IQR estimate, mean location and the likelihood of that blob.
+
+ When multiple sources are present in the same blob at the same iteration we first sum those emission rate
+ estimates before taking the median.
+
+ If no blobs are found a dataframe with nan values is return to avoid breaking plotting code which calls this
+ function.
+
+ Args:
+ result_x_vals (np.ndarray): X-coordinate of estimates, flattened array of (n_sources_max * nof_iterations,).
+ result_y_vals (np.ndarray): Y-coordinate of estimates, flattened array of (n_sources_max * nof_iterations,).
+ result_z_vals (np.ndarray): Z-coordinate of estimates, flattened array of (n_sources_max * nof_iterations,).
+ results_estimates (np.ndarray): Emission rate estimates, flattened array of
+ (n_sources_max * nof_iterations,).
+ result_iteration_vals (np.ndarray): Iteration number corresponding each estimated value, flattened array
+ of (n_sources_max * nof_iterations,).
+ count_boolean (np.ndarray): Boolean array which indicates if likelihood of pixel is over threshold.
+ x_edges (np.ndarray): Pixel edges x-coordinates.
+ y_edges (np.ndarray): Pixel edges y-coordinates.
+ nof_iterations (int): Number of iterations used in MCMC.
+ burn_in (int): Burn-in used in MCMC.
+ ref_latitude (float): Reference latitude in degrees of ENU coordinate system.
+ ref_longitude (float): Reference longitude in degrees of ENU coordinate system.
+ ref_altitude (float): Reference altitude in meters of ENU coordinate system.
+
+ Returns:
+ summary_result (pd.DataFrame): Summary statistics for each blob of estimates.
+
+ """
+ labeled_array, num_features = label(input=count_boolean, structure=np.ones((3, 3)))
+
+ if num_features == 0:
+ summary_result = pd.DataFrame()
+ summary_result.index.name = "source_ID"
+ summary_result.loc[0, "latitude"] = np.nan
+ summary_result.loc[0, "longitude"] = np.nan
+ summary_result.loc[0, "altitude"] = np.nan
+ summary_result.loc[0, "height"] = np.nan
+ summary_result.loc[0, "median_estimate"] = np.nan
+ summary_result.loc[0, "quantile_025"] = np.nan
+ summary_result.loc[0, "quantile_975"] = np.nan
+ summary_result.loc[0, "iqr_estimate"] = np.nan
+ summary_result.loc[0, "absolute_count_iterations"] = np.nan
+ summary_result.loc[0, "blob_likelihood"] = np.nan
+
+ return summary_result
+
+ burn_in_bool = result_iteration_vals > burn_in
+ nan_x_vals = np.isnan(result_x_vals)
+ nan_y_vals = np.isnan(result_y_vals)
+ nan_z_vals = np.isnan(result_z_vals)
+ no_nan_idx = np.logical_not(np.logical_or(np.logical_or(nan_x_vals, nan_y_vals), nan_z_vals))
+ no_nan_and_burn_in_bool = np.logical_and(no_nan_idx, burn_in_bool)
+ result_x_vals_no_nan = result_x_vals[no_nan_and_burn_in_bool]
+ result_y_vals_no_nan = result_y_vals[no_nan_and_burn_in_bool]
+ result_z_vals_no_nan = result_z_vals[no_nan_and_burn_in_bool]
+ results_estimates_no_nan = results_estimates[no_nan_and_burn_in_bool]
+ result_iteration_vals_no_nan = result_iteration_vals[no_nan_and_burn_in_bool]
+
+ x_idx = np.digitize(result_x_vals_no_nan, x_edges, right=False) - 1
+ y_idx = np.digitize(result_y_vals_no_nan, y_edges, right=False) - 1
+ bin_numbers = np.ravel_multi_index((x_idx, y_idx), labeled_array.shape)
+
+ bin_numbers_per_label = [
+ np.ravel_multi_index(np.nonzero(labeled_array == value), labeled_array.shape)
+ for value in np.array(range(num_features)) + 1
+ ]
+
+ summary_result = pd.DataFrame()
+ summary_result.index.name = "source_ID"
+
+ for label_idx, curr_bins in enumerate(bin_numbers_per_label):
+ boolean_for_result = np.isin(bin_numbers, curr_bins)
+ mean_x = np.mean(result_x_vals_no_nan[boolean_for_result])
+ mean_y = np.mean(result_y_vals_no_nan[boolean_for_result])
+ mean_z = np.mean(result_z_vals_no_nan[boolean_for_result])
+
+ unique_iteration_vals, indices, counts = np.unique(
+ result_iteration_vals_no_nan[boolean_for_result], return_inverse=True, return_counts=True
+ )
+ nof_iterations_present = unique_iteration_vals.size
+ blob_likelihood = nof_iterations_present / (nof_iterations - burn_in)
+ single_idx = np.argwhere(counts == 1)
+ results_estimates_for_blob = results_estimates_no_nan[boolean_for_result]
+ temp_estimate_result = results_estimates_for_blob[indices[single_idx.flatten()]]
+ multiple_idx = np.argwhere(counts > 1)
+ for single_idx in multiple_idx:
+ temp_val = np.sum(results_estimates_for_blob[indices == single_idx])
+ temp_estimate_result = np.append(temp_estimate_result, temp_val)
+
+ median_estimate = np.median(temp_estimate_result)
+ iqr_estimate = np.nanquantile(a=temp_estimate_result, q=0.75) - np.nanquantile(a=temp_estimate_result, q=0.25)
+ lower_bound = np.nanquantile(a=temp_estimate_result, q=0.025)
+ upper_bound = np.nanquantile(a=temp_estimate_result, q=0.975)
+ enu_object = ENU(ref_latitude=ref_latitude, ref_longitude=ref_longitude, ref_altitude=ref_altitude)
+ enu_object.east = mean_x
+ enu_object.north = mean_y
+ enu_object.up = mean_z
+ lla_object = enu_object.to_lla()
+
+ summary_result.loc[label_idx, "latitude"] = lla_object.latitude
+ summary_result.loc[label_idx, "longitude"] = lla_object.longitude
+ summary_result.loc[label_idx, "altitude"] = lla_object.altitude
+ summary_result.loc[label_idx, "height"] = mean_z
+ summary_result.loc[label_idx, "median_estimate"] = median_estimate
+ summary_result.loc[label_idx, "quantile_025"] = lower_bound
+ summary_result.loc[label_idx, "quantile_975"] = upper_bound
+ summary_result.loc[label_idx, "iqr_estimate"] = iqr_estimate
+ summary_result.loc[label_idx, "absolute_count_iterations"] = nof_iterations_present
+ summary_result.loc[label_idx, "blob_likelihood"] = blob_likelihood
+
+ summary_result = summary_result.astype({"absolute_count_iterations": "int"})
+
+ return summary_result