From 9fc638093f41b2fa58a2fae054a20ce1a170801d Mon Sep 17 00:00:00 2001
From: pmc4 <117096890+pmc4@users.noreply.github.com>
Date: Mon, 18 Dec 2023 17:23:46 +0100
Subject: [PATCH] Add bin filling with zeros function.

Because
* It's useful when we want to digitize bins to do chi squared analysis.

Update to version 0.1.2-alpha.
---
 Project.toml            |  2 +-
 src/helper_functions.jl | 88 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index a5f635a..e01b49e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "VectorPlotDigitizer"
 uuid = "c2bc8a40-e6d5-422b-ae2d-904269160af5"
 authors = ["pmc4 <117096890+pmc4@users.noreply.github.com> and contributors"]
-version = "0.1.1-alpha"
+version = "0.1.2-alpha"
 
 [deps]
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
diff --git a/src/helper_functions.jl b/src/helper_functions.jl
index c38b54b..5705ec9 100644
--- a/src/helper_functions.jl
+++ b/src/helper_functions.jl
@@ -44,3 +44,91 @@ function binedges_to_bin_midpoints(data; zero_threshold::Real = 0.0, atol::Real=
 
     return hcat(x,y)
 end
+
+
+"""
+    fill_zeros_bins(real_xs, xs, ys, atol; zero_threshold::Real = 0.0)
+
+Given some digitized binned data, where `xs` are the the bins x axis midpoints and `ys` are
+the corresponding values on the y axis, this function fills with zeros the missing bins.
+
+The complete set of bins is given by `real_xs` and `atol` is the absolute tolerance to use
+when comparing whether a given element of `xs` is inside of `ys` or not. Smaller values of
+`atol` will make less values of `xs` to be found inside `real_xs`.
+
+If a value of `zero_threshold` different than `0` is given, then every bin with a
+y value `y < zero_threshold` will be considered as `0.0`.
+
+# Examples
+Given the following `real_x` bins and the digitized `x_bins` and `y_bins`:
+```
+julia> real_bins = [0.5, 1.5, 2.5, 3.5]
+4-element Vector{Float64}:
+ 0.5
+ 1.5
+ 2.5
+ 3.5
+
+julia> x_bins = [0.5001, 2.499987]
+2-element Vector{Float64}:
+ 0.5001
+ 2.499987
+
+julia> y_bins = [34.7, 4.2]
+2-element Vector{Float64}:
+ 34.7
+  4.2
+```
+
+We can fill it with zeros by choosing an absolute tolerance of `atol = 1e-4`:
+```
+julia> fill_zeros_bins(real_bins, x_binx, y_bins, 1e-4)
+4-element Vector{Float64}:
+ 34.7
+  0.0
+  4.2
+  0.0
+```
+
+We can also set a zero threshold at `10.0` so every point of `y_bins` smaller than that will
+bet zero:
+```
+julia> fill_zeros_bins(real_bins, x_binx, y_bins, 1e-4; zero_threshold = 10.0)
+4-element Vector{Float64}:
+ 34.7
+  0.0
+  0.0
+  0.0
+```
+
+# Notes
+A warning is given if the absolute tolerance is very small. This means that the function is
+filling with more zeros than needed and some ys values are not taken into account. This
+warning has nothing to do whether we set a high `zero_threshold` value or not.
+"""
+function fill_zeros_bins(real_xs, xs, ys, atol; zero_threshold::Real = 0.0)
+    # Array of real binned y axis values
+    real_ys = zero(real_xs)
+
+    # Obtain which values of our xs array are in the array of real binned x values
+    # and assign their corresponding ys value to the real binned y array
+    for i in eachindex(xs)
+        indexes = isapprox.(real_xs, xs[i]; atol=atol)
+        real_ys[indexes] .= ys[i]
+    end
+
+    # Check that the number of zeros added is correct. If it is greater, then the tolerance
+    # might be too high
+    number_of_zeros = count(x -> x == zero(x), real_ys)
+    if ! (number_of_zeros == length(real_xs) - length(ys))
+        warn_message = """Expected to write $(length(real_xs) - length(ys)) zeros, but $number_of_zeros were written.
+        Consider increasing the value of `atol` to fix this issue."""
+        @warn warn_message
+    end
+
+    # Make zero all those elements smaller than the threshold
+    zero_index = real_ys .< zero_threshold
+    real_ys[zero_index] .= 0.0
+
+    return real_ys
+end